grammar Rst; options { output=AST; language=Java; backtrack=true; memoize=true; } tokens { File; Section; Title; Paragraph; Indent; Dedent; Line; Word; Bold; Italic; InterpretedText; InlineLitteral; Reference; Class; SubstitutionReference; } @lexer::header { package org.diabeteman.rst4eclipse; } @parser::header { package org.diabeteman.rst4eclipse; } /**************** * PARSER RULES * ****************/ file : section+ EOF -> ^(File section+) ; section : section_title section_content+ -> ^(Section section_title section_content+) // | empty_line* -> /* omit line-breaks between paragraphs from AST */ ; section_title : line underline[$line.length] {System.out.println("Matched Title: '" + $line.content + "' length: " + $line.length);} -> ^(Title line) ; section_content : underline[4] | paragraph | section // | empty_line* -> /* omit line-breaks between paragraphs from AST */ ; underline[int minLength] returns [char symbol] @init { $symbol = (char) 0; int length = 0; } : first=underline_atom {$symbol=$first.symbol; length++;} ( next=underline_atom {$symbol == $next.symbol}? {length++;} )+ {length >= minLength}? LINE_BREAK ; underline_atom returns [char symbol] @init { $symbol = (char) 0; } : (UNDERSCORE | STAR | PIPE | BACKTICK | COLUMN | SPECIAL_CHAR) {$symbol = $text.charAt(0);} ; paragraph : line+ LINE_BREAK -> ^(Paragraph line+) ; line returns [int length, String content] @init { $length = 0; $content = null; } : any_text+ { $length = $text.length(); $content = $text; } LINE_BREAK ; any_text : styled_text | reference | WS | ESCAPE_SEQUENCE | SIMPLE_CHAR | ANY ; styled_text : bold | italic | inline_litteral | interpreted_text ; bold : STAR STAR no_star_atom+ STAR STAR -> ^(Bold no_star_atom+) ; italic : STAR no_star_atom+ STAR -> ^(Italic no_star_atom+) ; no_star_atom : ~(STAR | LINE_BREAK) ; interpreted_text : interpreted_text_class BACKTICK interpreted_text_atoms BACKTICK -> ^(InterpretedText interpreted_text_class interpreted_text_atoms) | BACKTICK interpreted_text_atoms BACKTICK interpreted_text_class -> ^(InterpretedText interpreted_text_class interpreted_text_atoms) | BACKTICK interpreted_text_atoms BACKTICK -> ^(InterpretedText interpreted_text_atoms) ; interpreted_text_atoms : ~BACKTICK+ ; interpreted_text_class : COLUMN class_atom COLUMN -> ^(Class class_atom) ; class_atom : SIMPLE_CHAR+ ; inline_litteral : BACKTICK BACKTICK inline_litteral_atoms BACKTICK BACKTICK -> ^(InlineLitteral inline_litteral_atoms) ; inline_litteral_atoms : inline_litteral_atom+ ; inline_litteral_atom : ~BACKTICK | BACKTICK ~BACKTICK ; reference : simple_reference | long_reference | substitution_reference ; simple_reference : simple_reference_atoms UNDERSCORE -> ^(Reference simple_reference_atoms) ; simple_reference_atoms : (SIMPLE_CHAR|ANY)+ ; long_reference : BACKTICK interpreted_text_atoms BACKTICK UNDERSCORE -> ^(Reference interpreted_text_atoms) ; substitution_reference : PIPE substitution_reference_atom+ PIPE -> ^(SubstitutionReference substitution_reference_atom+) ; substitution_reference_atom : ~PIPE ; //empty_line // : LEADING_WS? LINE_BREAK // ; /*************** * LEXER RULES * ***************/ UNDERSCORE : '_' ; BACKTICK : '`' ; STAR : '*' ; PIPE : '|' ; COLUMN : ':' ; SPECIAL_CHAR : ( '!' |'"' |'#' |'$' |'%' |'&' |'\'' |'(' |')' |'+' |',' |'.' |'/' |';' |'<' |'=' |'>' |'?' |'@' |'[' |'\\' |']' |'^' |'{' |'}' |'~' ) ; ESCAPE_SEQUENCE : '\\' ('*' | '`' | '|' | '\\') ; SIMPLE_CHAR : '-' | '0'..'9' | 'A'..'Z' | 'a'..'z' ; LINE_BREAK : '\r'? '\n' ; WS : /*{getCharPositionInLine()>0}?=>*/ (' '|'\t'|'\u000C')+ ; //LEADING_WS //@init { //int spaces = 0; //} // : {getCharPositionInLine() == 0}?=> // ( // (' ' {spaces++;}|'\u000C' {spaces++;}|'\t' {spaces += 8; spaces -= (spaces \% 8);})+ // { // // make a string of n spaces where n is column number - 1 // char[] indentation = new char[spaces]; // for (int i=0; i