diff --git a/_tests/trconvert/antlr3/Gold/ASN.g4 b/_tests/trconvert/antlr3/Gold/ASN.g4 index b0d166a4f..c1ced464f 100644 --- a/_tests/trconvert/antlr3/Gold/ASN.g4 +++ b/_tests/trconvert/antlr3/Gold/ASN.g4 @@ -956,7 +956,7 @@ NUMBER : DIGIT+; //WORD : UPPER+; -WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} +WS : (' '|'\r'|'\t'|'\u000C'|'\n') ; @@ -964,7 +964,7 @@ fragment Exponent : ('e'|'E') ('+'|'-')? NUMBER ; LINE_COMMENT - : '--' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} + : '--' ~('\n'|'\r')* '\r'? '\n' ; diff --git a/_tests/trconvert/antlr3/Gold/Antlr3.g4 b/_tests/trconvert/antlr3/Gold/Antlr3.g4 index e4d71a7e5..1c4d781a7 100644 --- a/_tests/trconvert/antlr3/Gold/Antlr3.g4 +++ b/_tests/trconvert/antlr3/Gold/Antlr3.g4 @@ -107,380 +107,45 @@ tokens REWRITES } -@lexer::header { -package org.antlr.grammar.v3; -import org.antlr.tool.ErrorManager; -import org.antlr.tool.Grammar; -} - -@parser::header { -package org.antlr.grammar.v3; -import org.antlr.tool.ErrorManager; -import org.antlr.tool.Grammar; -import org.antlr.tool.GrammarAST; -import org.antlr.misc.IntSet; -import org.antlr.tool.Rule; -} - -@lexer::members { -public boolean hasASTOperator = false; -private String fileName; - -public String getFileName() { - return fileName; -} - -public void setFileName(String value) { - fileName = value; -} - -@Override -public Token nextToken() { - Token token = super.nextToken(); - while (token.getType() == STRAY_BRACKET) { - ErrorManager.syntaxError( - ErrorManager.MSG_SYNTAX_ERROR, - null, - token, - "antlr: dangling ']'? make sure to escape with \\]", - null); - - // skip this token - token = super.nextToken(); - } - - return token; -} -} - -@parser::members { -protected String currentRuleName = null; -protected GrammarAST currentBlockAST = null; -protected boolean atTreeRoot; // are we matching a tree root in tree grammar? - -public static ANTLRParser createParser(TokenStream input) { - ANTLRParser parser = new ANTLRParser(input); - parser.adaptor = new grammar_Adaptor(parser); - return parser; -} - -private static class GrammarASTErrorNode extends GrammarAST { - public IntStream input; - public Token start; - public Token stop; - public RecognitionException trappedException; - - public GrammarASTErrorNode(TokenStream input, Token start, Token stop, RecognitionException e) { - super(stop); - //Console.Out.WriteLine( "start: " + start + ", stop: " + stop ); - if ( stop == null || - ( stop.getTokenIndex() < start.getTokenIndex() && - stop.getType() != Token.EOF) ) { - // sometimes resync does not consume a token (when LT(1) is - // in follow set. So, stop will be 1 to left to start. adjust. - // Also handle case where start is the first token and no token - // is consumed during recovery; LT(-1) will return null. - stop = start; - } - this.input = input; - this.start = start; - this.stop = stop; - this.trappedException = e; - } - - @Override - public boolean isNil() { return false; } - - @Override - public String getText() { - String badText = null; - if (start != null) { - int i = start.getTokenIndex(); - int j = stop.getTokenIndex(); - if (stop.getType() == Token.EOF) { - j = input.size(); - } - badText = ((TokenStream)input).toString(i, j); - } else { - // people should subclass if they alter the tree type so this - // next one is for sure correct. - badText = ""; - } - return badText; - } - - @Override - public void setText(String value) { } - - @Override - public int getType() { return Token.INVALID_TOKEN_TYPE; } - - @Override - public void setType(int value) { } - - @Override - public String toString() - { - if (trappedException instanceof MissingTokenException) - { - return ""; - } else if (trappedException instanceof UnwantedTokenException) { - return ""; - } else if (trappedException instanceof MismatchedTokenException) { - return ""; - } else if (trappedException instanceof NoViableAltException) { - return ""; - } - return ""; - } -} - -static class grammar_Adaptor extends CommonTreeAdaptor { - ANTLRParser _outer; - - public grammar_Adaptor(ANTLRParser outer) { - _outer = outer; - } - - @Override - public Object create(Token payload) { - GrammarAST t = new GrammarAST( payload ); - if (_outer != null) - t.enclosingRuleName = _outer.currentRuleName; - return t; - } - - @Override - public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { - GrammarAST t = new GrammarASTErrorNode(input, start, stop, e); - if (_outer != null) - t.enclosingRuleName = _outer.currentRuleName; - return t; - } -} - -private Grammar grammar; -private int grammarType; -private String fileName; - -public Grammar getGrammar() { - return grammar; -} - -public void setGrammar(Grammar value) { - grammar = value; -} - -public int getGrammarType() { - return grammarType; -} - -public void setGrammarType(int value) { - grammarType = value; -} +// Token string literals converted to explicit lexer rules. +// Reorder these rules accordingly. -public String getFileName() { - return fileName; -} - -public void setFileName(String value) { - fileName = value; -} - -private final int LA(int i) { return input.LA( i ); } - -private final Token LT(int k) { return input.LT( k ); } - -/*partial void createTreeAdaptor(ref ITreeAdaptor adaptor) -{ - adaptor = new grammar_Adaptor(this); -}*/ - -protected GrammarAST setToBlockWithSet(GrammarAST b) { - /* - * alt = ^(ALT["ALT"] {b} EOA["EOA"]) - * prefixWithSynpred( alt ) - * return ^(BLOCK["BLOCK"] {alt} EOB[""]) - */ - GrammarAST alt = (GrammarAST)adaptor.create(ALT, "ALT"); - adaptor.addChild(alt, b); - adaptor.addChild(alt, adaptor.create(EOA, "")); - - prefixWithSynPred(alt); - - GrammarAST block = (GrammarAST)adaptor.create(BLOCK, b.getToken(), "BLOCK"); - adaptor.addChild(block, alt); - adaptor.addChild(alt, adaptor.create(EOB, "")); - - return block; -} +LEXER: 'lexer'; +PARSER: 'parser'; +CATCH: 'catch'; +FINALLY: 'finally'; +GRAMMAR: 'grammar'; +PRIVATE: 'private'; +PROTECTED: 'protected'; +PUBLIC: 'public'; +RETURNS: 'returns'; +THROWS: 'throws'; +TREE: 'tree'; +SCOPE: 'scope'; +IMPORT: 'import'; +FRAGMENT: 'fragment'; +// -/** Create a copy of the alt and make it into a BLOCK; all actions, - * labels, tree operators, rewrites are removed. - */ -protected GrammarAST createBlockFromDupAlt(GrammarAST alt) { - /* - * ^(BLOCK["BLOCK"] {GrammarAST.dupTreeNoActions(alt)} EOB[""]) - */ - GrammarAST nalt = GrammarAST.dupTreeNoActions(alt, null); - GrammarAST block = (GrammarAST)adaptor.create(BLOCK, alt.getToken(), "BLOCK"); - adaptor.addChild( block, nalt ); - adaptor.addChild( block, adaptor.create( EOB, "" ) ); - return block; -} -/** Rewrite alt to have a synpred as first element; - * (xxx)=>xxx - * but only if they didn't specify one manually. - */ -protected void prefixWithSynPred( GrammarAST alt ) { - // if they want backtracking and it's not a lexer rule in combined grammar - String autoBacktrack = (String)grammar.getBlockOption( currentBlockAST, "backtrack" ); - if ( autoBacktrack == null ) - { - autoBacktrack = (String)grammar.getOption( "backtrack" ); - } - if ( autoBacktrack != null && autoBacktrack.equals( "true" ) && - !( grammarType == Grammar.COMBINED && - Rule.getRuleType(currentRuleName) == Grammar.LEXER) && - alt.getChild( 0 ).getType() != SYN_SEMPRED ) - { - // duplicate alt and make a synpred block around that dup'd alt - GrammarAST synpredBlockAST = createBlockFromDupAlt( alt ); - - // Create a BACKTRACK_SEMPRED node as if user had typed this in - // Effectively we replace (xxx)=>xxx with {synpredxxx}? xxx - GrammarAST synpredAST = createSynSemPredFromBlock( synpredBlockAST, - BACKTRACK_SEMPRED ); - - // insert BACKTRACK_SEMPRED as first element of alt - //synpredAST.getLastSibling().setNextSibling( alt.getFirstChild() ); - //synpredAST.addChild( alt.getFirstChild() ); - //alt.setFirstChild( synpredAST ); - GrammarAST[] children = alt.getChildrenAsArray(); - adaptor.setChild( alt, 0, synpredAST ); - for ( int i = 0; i < children.length; i++ ) - { - if ( i < children.length - 1 ) - adaptor.setChild( alt, i + 1, children[i] ); - else - adaptor.addChild( alt, children[i] ); - } - } -} - -protected GrammarAST createSynSemPredFromBlock( GrammarAST synpredBlockAST, int synpredTokenType ) { - // add grammar fragment to a list so we can make fake rules for them later. - String predName = grammar.defineSyntacticPredicate( synpredBlockAST, currentRuleName ); - // convert (alpha)=> into {synpredN}? where N is some pred count - // during code gen we convert to function call with templates - String synpredinvoke = predName; - GrammarAST p = (GrammarAST)adaptor.create( synpredTokenType, synpredinvoke ); - // track how many decisions have synpreds - grammar.blocksWithSynPreds.add( currentBlockAST ); - return p; -} - -public static GrammarAST createSimpleRuleAST( String name, GrammarAST block, boolean fragment ) { - TreeAdaptor adaptor = new grammar_Adaptor(null); - - GrammarAST modifier = null; - if ( fragment ) - { - modifier = (GrammarAST)adaptor.create( FRAGMENT, "fragment" ); - } - - /* - * EOBAST = block.getLastChild() - * ^(RULE[block,"rule"] ID["name"] {modifier} ARG["ARG"] RET["RET"] SCOPE["scope"] {block} EOR[EOBAST,""]) - */ - GrammarAST rule = (GrammarAST)adaptor.create( RULE, block.getToken(), "rule" ); - - adaptor.addChild( rule, adaptor.create( ID, name ) ); - if ( modifier != null ) - adaptor.addChild( rule, modifier ); - adaptor.addChild( rule, adaptor.create( ARG, "ARG" ) ); - adaptor.addChild( rule, adaptor.create( RET, "RET" ) ); - adaptor.addChild( rule, adaptor.create( SCOPE, "scope" ) ); - adaptor.addChild( rule, block ); - adaptor.addChild( rule, adaptor.create( EOR, block.getLastChild().getToken(), "" ) ); - - return rule; -} - -@Override -public void reportError(RecognitionException ex) -{ - //Token token = null; - //try - //{ - // token = LT( 1 ); - //} - //catch ( TokenStreamException tse ) - //{ - // ErrorManager.internalError( "can't get token???", tse ); - //} - Token token = ex.token; - ErrorManager.syntaxError( - ErrorManager.MSG_SYNTAX_ERROR, - grammar, - token, - "antlr: " + ex.toString(), - ex ); -} - -public void cleanup( GrammarAST root ) -{ - if ( grammarType == Grammar.LEXER ) - { - String filter = (String)grammar.getOption( "filter" ); - GrammarAST tokensRuleAST = - grammar.addArtificialMatchTokensRule( - root, - grammar.lexerRuleNamesInCombined, - grammar.getDelegateNames(), - filter != null && filter.equals( "true" ) ); - } -} -} - -public -grammar_[Grammar g] -@init -{ - this.grammar = g; - Map opts; -} -@after -{ - cleanup( $tree ); -} +grammar_ : //hdr:headerSpec ( ACTION )? - ( cmt=DOC_COMMENT )? - gr=grammarType gid=id {grammar.setName($gid.text);} SEMI - ( optionsSpec {opts = $optionsSpec.opts; grammar.setOptions(opts, $optionsSpec.start);} + (DOC_COMMENT )?grammarTypeid SEMI + ( optionsSpec )? - (ig=delegateGrammars)? - (ts=tokensSpec)? - scopes=attrScopes - (a=actions)? - r=rules + (delegateGrammars)? + (tokensSpec)?attrScopes + (actions)?rules EOF ; grammarType - : ( 'lexer' gr='grammar' {grammarType=Grammar.LEXER; grammar.type = Grammar.LEXER;} - | 'parser' gr='grammar' {grammarType=Grammar.PARSER; grammar.type = Grammar.PARSER;} - | 'tree' gr='grammar' {grammarType=Grammar.TREE_PARSER; grammar.type = Grammar.TREE_PARSER;} - | gr='grammar' {grammarType=Grammar.COMBINED; grammar.type = Grammar.COMBINED;} + : ( 'lexer''grammar' + | 'parser''grammar' + | 'tree''grammar' + |'grammar' ) ; @@ -498,31 +163,24 @@ action */ actionScopeName : id - | l='lexer' - | p='parser' + |'lexer' + |'parser' ; -optionsSpec returns [Map opts=new HashMap()] - : OPTIONS (option[$opts] SEMI)+ RCURLY +optionsSpec returns + : OPTIONS (option SEMI)+ RCURLY ; -option[Map opts] +option : id ASSIGN optionValue - { - $opts.put($id.text, $optionValue.value); - } - ; - -optionValue returns [Object value = null] - : x=id {$value = $x.text;} - | s=STRING_LITERAL {String vs = $s.text; - // remove the quotes: - $value=vs.substring(1,vs.length()-1);} - | c=CHAR_LITERAL {String vs = $c.text; - // remove the quotes: - $value=vs.substring(1,vs.length()-1);} - | i=INT {$value = Integer.parseInt($i.text);} - | ss=STAR {$value = "*";} + ; + +optionValue returns + :id + |STRING_LITERAL + |CHAR_LITERAL + |INT + |STAR // | cs:charSet {value = #cs;} // return set AST in this case ; @@ -531,8 +189,8 @@ delegateGrammars ; delegateGrammar - : lab=id ASSIGN g=id {grammar.importGrammar($g.tree, $lab.text);} - | g2=id {grammar.importGrammar($g2.tree,null);} + :id ASSIGNid + |id ; tokensSpec @@ -558,44 +216,27 @@ rules )+ ; -public + rule -@init -{ - GrammarAST eob=null; - CommonToken start = (CommonToken)LT(1); - int startLine = LT(1).getLine(); -} : - ( ( d=DOC_COMMENT - )? - ( p1='protected' //{modifier=$p1.tree;} - | p2='public' //{modifier=$p2.tree;} - | p3='private' //{modifier=$p3.tree;} - | p4='fragment' //{modifier=$p4.tree;} + ( (DOC_COMMENT )? - ruleName=id - { - currentRuleName=$ruleName.text; - if ( grammarType==Grammar.LEXER && $p4==null ) - grammar.lexerRuleNamesInCombined.add(currentRuleName); - } + ('protected' //{modifier=$p1.tree;} + |'public' //{modifier=$p2.tree;} + |'private' //{modifier=$p3.tree;} + |'fragment' //{modifier=$p4.tree;} + )?id ( BANG )? - ( aa=ARG_ACTION )? - ( 'returns' rt=ARG_ACTION )? + (ARG_ACTION )? + ( 'returns'ARG_ACTION )? ( throwsSpec )? - ( optionsSpec )? - scopes=ruleScopeSpec + ( optionsSpec )?ruleScopeSpec (ruleActions)? COLON - ruleAltList[$optionsSpec.opts] + ruleAltList SEMI - ( ex=exceptionGroup )? + (exceptionGroup )? ) - { - $tree.setTreeEnclosingRuleNameDeeply(currentRuleName); - ((GrammarAST)$tree.getChild(0)).setBlockOptions($optionsSpec.opts); - } ; ruleActions @@ -616,65 +257,34 @@ ruleScopeSpec ( 'scope' idList SEMI )* ; -ruleAltList[Map opts] -@init -{ - GrammarAST blkRoot = null; - GrammarAST save = currentBlockAST; -} - : ( ) - { - blkRoot = (GrammarAST)$tree.getChild(0); - blkRoot.setBlockOptions($opts); - currentBlockAST = blkRoot; - } - ( a1=alternative r1=rewrite - {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a1.tree);} +ruleAltList + : + (alternativerewrite ) - ( ( OR a2=alternative r2=rewrite - {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a2.tree);} + ( ( ORalternativerewrite )+ | ) ; -finally { currentBlockAST = save; } +finally /** Build #(BLOCK ( #(ALT ...) EOB )+ ) */ block -@init -{ - GrammarAST save = currentBlockAST; -} - : ( lp=LPAREN + : (LPAREN ) - {currentBlockAST = (GrammarAST)$tree.getChild(0);} ( // 2nd alt and optional branch ambig due to // linear approx LL(2) issue. COLON ACTION // matched correctly in 2nd alt. - (optionsSpec {((GrammarAST)$tree.getChild(0)).setOptions(grammar,$optionsSpec.opts);})? + (optionsSpec)? ( ruleActions )? COLON | ACTION COLON - )? - - a=alternative r=rewrite - { - stream_alternative.add( $r.tree ); - if ( LA(1)==OR || (LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR) ) - prefixWithSynPred($a.tree); - } - ( OR a=alternative r=rewrite - { - stream_alternative.add( $r.tree ); - if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) - prefixWithSynPred($a.tree); - } - )* - - rp=RPAREN + )?alternativerewrite + ( ORalternativerewrite + )*RPAREN ; -finally { currentBlockAST = save; } +finally // ALT and EOA have indexes tracking start/stop of entire alt alternative @@ -700,32 +310,25 @@ element ; elementNoOptionSpec -@init -{ - IntSet elements=null; -} : ( id (ASSIGN|PLUS_ASSIGN) - ( atom (sub=ebnfSuffix[root_0,false] {root_0 = $sub.tree;})? + ( atom (ebnfSuffix)? | ebnf ) - | a=atom - ( sub2=ebnfSuffix[$a.tree,false] {root_0=$sub2.tree;} + |atom + (ebnfSuffix )? | ebnf | FORCED_ACTION | ACTION - | p=SEMPRED ( IMPLIES {$p.setType(GATED_SEMPRED);} )? - { - grammar.blocksWithSemPreds.add(currentBlockAST); - } - | t3=tree_ + |SEMPRED ( IMPLIES )? + |tree_ ) ; atom : range (ROOT|BANG)? | ( - id w=WILDCARD (terminal|ruleref) {$w.setType(DOT);} + idWILDCARD (terminal|ruleref) | terminal | ruleref ) @@ -744,8 +347,6 @@ notSet ; treeRoot -@init{atTreeRoot=true;} -@after{atTreeRoot=false;} : id (ASSIGN|PLUS_ASSIGN) (atom|block) | atom | block @@ -771,85 +372,49 @@ ebnf ; range - : {Rule.getRuleType(currentRuleName) == Grammar.LEXER}? - c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL + : CHAR_LITERAL RANGECHAR_LITERAL | // range elsewhere is an error - ( t=TOKEN_REF r=RANGE TOKEN_REF - | t=STRING_LITERAL r=RANGE STRING_LITERAL - | t=CHAR_LITERAL r=RANGE CHAR_LITERAL - ) - { - ErrorManager.syntaxError( - ErrorManager.MSG_RANGE_OP_ILLEGAL,grammar,$r,null,null); - } // have to generate something for surrounding code, just return first token + (TOKEN_REFRANGE TOKEN_REF + |STRING_LITERALRANGE STRING_LITERAL + |CHAR_LITERALRANGE CHAR_LITERAL + ) // have to generate something for surrounding code, just return first token ; terminal - : cl=CHAR_LITERAL ( elementOptions[$cl.tree] )? (ROOT|BANG)? + :CHAR_LITERAL ( elementOptions )? (ROOT|BANG)? - | tr=TOKEN_REF - ( elementOptions[$tr.tree] )? + |TOKEN_REF + ( elementOptions )? ( ARG_ACTION )? // Args are only valid for lexer rules (ROOT|BANG)? - | sl=STRING_LITERAL ( elementOptions[$sl.tree] )? (ROOT|BANG)? + |STRING_LITERAL ( elementOptions )? (ROOT|BANG)? - | wi=WILDCARD (ROOT|BANG)? - { - if ( atTreeRoot ) - { - ErrorManager.syntaxError( - ErrorManager.MSG_WILDCARD_AS_ROOT,grammar,$wi,null,null); - } - } + |WILDCARD (ROOT|BANG)? ; -elementOptions[GrammarAST terminalAST] - : OPEN_ELEMENT_OPTION defaultNodeOption[terminalAST] CLOSE_ELEMENT_OPTION - | OPEN_ELEMENT_OPTION elementOption[terminalAST] (SEMI elementOption[terminalAST])* CLOSE_ELEMENT_OPTION +elementOptions + : OPEN_ELEMENT_OPTION defaultNodeOption CLOSE_ELEMENT_OPTION + | OPEN_ELEMENT_OPTION elementOption (SEMI elementOption)* CLOSE_ELEMENT_OPTION ; -defaultNodeOption[GrammarAST terminalAST] +defaultNodeOption : elementOptionId - {terminalAST.setTerminalOption(grammar,Grammar.defaultTokenOption,$elementOptionId.qid);} ; -elementOption[GrammarAST terminalAST] +elementOption : id ASSIGN ( elementOptionId - {terminalAST.setTerminalOption(grammar,$id.text,$elementOptionId.qid);} - | (t=STRING_LITERAL|t=DOUBLE_QUOTE_STRING_LITERAL|t=DOUBLE_ANGLE_STRING_LITERAL) - {terminalAST.setTerminalOption(grammar,$id.text,$t.text);} + | (STRING_LITERAL|DOUBLE_QUOTE_STRING_LITERAL|DOUBLE_ANGLE_STRING_LITERAL) ) ; -elementOptionId returns [String qid] -@init{StringBuffer buf = new StringBuffer();} - : i=id {buf.append($i.text);} ('.' i=id {buf.append("." + $i.text);})* - {$qid = buf.toString();} +elementOptionId returns + :id ('.'id)* ; -ebnfSuffix[GrammarAST elemAST, boolean inRewrite] -@init -{ -GrammarAST blkRoot=null; -GrammarAST alt=null; -GrammarAST save = currentBlockAST; -} -@after -{ -currentBlockAST = save; -} - : ( - ) - { blkRoot = (GrammarAST)$tree.getChild(0); currentBlockAST = blkRoot; } - ( - ) - { - alt = (GrammarAST)$tree.getChild(0); - if ( !inRewrite ) - prefixWithSynPred(alt); - } +ebnfSuffix + : ( QUESTION | STAR | PLUS @@ -890,31 +455,32 @@ rewrite_block ; rewrite_alternative - : {grammar.buildTemplate()}? rewrite_template +options{k=1;} + : rewrite_template - | {grammar.buildAST()}? ( rewrite_element )+ + | ( rewrite_element )+ | - | {grammar.buildAST()}? ETC + | ETC ; rewrite_element - : ( t=rewrite_atom + : (rewrite_atom ) - ( subrule=ebnfSuffix[$t.tree,true] + (ebnfSuffix )? | rewrite_ebnf - | ( tr=rewrite_tree + | (rewrite_tree ) - ( subrule=ebnfSuffix[$tr.tree,true] + (ebnfSuffix )? ; rewrite_atom - : tr=TOKEN_REF elementOptions[$tr.tree]? ARG_ACTION? // for imaginary nodes + :TOKEN_REF elementOptions? ARG_ACTION? // for imaginary nodes | RULE_REF - | cl=CHAR_LITERAL elementOptions[$cl.tree]? - | sl=STRING_LITERAL elementOptions[$sl.tree]? + |CHAR_LITERAL elementOptions? + |STRING_LITERAL elementOptions? | DOLLAR label // reference to a label in a rewrite rule | ACTION ; @@ -925,7 +491,7 @@ label ; rewrite_ebnf - : b=rewrite_block + :rewrite_block ( QUESTION | STAR | PLUS @@ -949,14 +515,13 @@ rewrite_tree -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) -> {st-expr} // st-expr evaluates to ST */ -public + rewrite_template - : // -> template(a={...},...) "..." - {LT(1).getText().equals("template")}? // inline +options{k=1;} + : // inline ( rewrite_template_head ) - ( st=DOUBLE_QUOTE_STRING_LITERAL | st=DOUBLE_ANGLE_STRING_LITERAL ) - { adaptor.addChild( $tree.getChild(0), adaptor.create($st) ); } + (DOUBLE_QUOTE_STRING_LITERAL |DOUBLE_ANGLE_STRING_LITERAL ) | // -> foo(a={...}, ...) rewrite_template_head @@ -970,14 +535,14 @@ rewrite_template /** -> foo(a={...}, ...) */ rewrite_template_head - : id lp=LPAREN + : idLPAREN rewrite_template_args RPAREN ; /** -> ({expr})(a={...}, ...) */ rewrite_indirect_template_head - : lp=LPAREN + :LPAREN ACTION RPAREN LPAREN rewrite_template_args RPAREN @@ -989,7 +554,7 @@ rewrite_template_args ; rewrite_template_arg - : id a=ASSIGN ACTION + : idASSIGN ACTION ; ////////////////////////////////////////////////////////////////////////////// @@ -1008,16 +573,10 @@ WS | '\t' | ('\r')? '\n' ) - { $channel = HIDDEN; } ; COMMENT -@init{List type = new ArrayList() {{ add(0); }};} - : ( SL_COMMENT | ML_COMMENT[type] {$type = type.get(0);} ) - { - if ( $type != DOC_COMMENT ) - $channel = HIDDEN; - } + : ( SL_COMMENT | ML_COMMENT ) ; fragment @@ -1029,7 +588,7 @@ SL_COMMENT ; fragment -ML_COMMENT[List type] +ML_COMMENT : '/*' .* '*/' @@ -1071,9 +630,9 @@ REWRITE : '->' ; SEMI: ';' ; -ROOT : '^' {hasASTOperator=true;} ; +ROOT : '^' ; -BANG : '!' {hasASTOperator=true;} ; +BANG : '!' ; OR : '|' ; @@ -1099,29 +658,15 @@ CHAR_LITERAL | ~('\\'|'\'') )* '\'' - { - StringBuffer s = Grammar.getUnescapedStringFromGrammarStringLiteral($text); - if ( s.length() > 1 ) - { - $type = STRING_LITERAL; - } - } ; DOUBLE_QUOTE_STRING_LITERAL -@init -{ - StringBuilder builder = new StringBuilder(); -} - : '"' {builder.append('"');} - ( '\\' '"' {builder.append('"');} - | '\\'~'"' {builder.append("\\" + (char)$c);} - |~('\\'|'"') {builder.append((char)$c);} + : '"' + ( '\\' '"' + | '\\'~'"' + |~('\\'|'"') )* - '"' {builder.append('"');} - { - setText(builder.toString()); - } + '"' ; DOUBLE_ANGLE_STRING_LITERAL @@ -1163,21 +708,13 @@ INT ; ARG_ACTION -@init { - List text = new ArrayList() {{ add(null); }}; -} : '[' - NESTED_ARG_ACTION[text] + NESTED_ARG_ACTION ']' - {setText(text.get(0));} ; fragment -NESTED_ARG_ACTION[List text] -@init { - $text.set(0, ""); - StringBuilder builder = new StringBuilder(); -} +NESTED_ARG_ACTION : ( '\\' ']' | '\\'~(']') | ACTION_STRING_LITERAL @@ -1187,24 +724,8 @@ NESTED_ARG_ACTION[List text] ; ACTION -@init -{ - int actionLine = getLine(); - int actionColumn = getCharPositionInLine(); -} : NESTED_ACTION - ('?' {$type = SEMPRED;})? - { - String action = $text; - int n = 1; // num delimiter chars - if ( action.startsWith("{{") && action.endsWith("}}") ) - { - $type = FORCED_ACTION; - n = 2; - } - action = action.substring(n,action.length()-n - ($type==SEMPRED ? 1 : 0)); - setText(action); - } + ('?')? ; fragment @@ -1261,10 +782,6 @@ OPTIONS // we get a warning here when looking for options '{', but it works right RULE_REF -@init -{ - int t=0; -} : 'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* ; @@ -1287,32 +804,4 @@ WS_OPT fragment SRC : 'src' ' 'ACTION_STRING_LITERAL ' 'INT - ; - -LEXER : 'lexer' ; - -PARSER : 'parser' ; - -CATCH : 'catch' ; - -FINALLY : 'finally' ; - -GRAMMAR : 'grammar' ; - -PRIVATE : 'private' ; - -PROTECTED : 'protected' ; - -PUBLIC : 'public' ; - -RETURNS : 'returns' ; - -THROWS : 'throws' ; - -TREE : 'tree' ; - -SCOPE : 'scope' ; - -IMPORT : 'import' ; - -FRAGMENT : 'fragment' ; \ No newline at end of file + ; \ No newline at end of file diff --git a/_tests/trconvert/antlr3/Gold/C.g4 b/_tests/trconvert/antlr3/Gold/C.g4 index 030697fac..c9dfea56e 100644 --- a/_tests/trconvert/antlr3/Gold/C.g4 +++ b/_tests/trconvert/antlr3/Gold/C.g4 @@ -31,32 +31,13 @@ Terence Parr July 2006 */ grammar C; - -scope Symbols { - Set types; // only track types in order to get parser working +options { + k=2; } -@header { -import java.util.Set; -import java.util.HashSet; -} +scope Symbols -@members { - boolean isTypeName(String name) { - for (int i = Symbols_stack.size()-1; i>=0; i--) { - Symbols_scope scope = (Symbols_scope)Symbols_stack.get(i); - if ( scope.types.contains(name) ) { - return true; - } - } - return false; - } -} - -translation_unit // entire file is a scope -@init { - $Symbols::types = new HashSet(); -} +translation_unit : external_declaration+ ; @@ -75,14 +56,12 @@ translation_unit // entire file is a scope * I'll have to optimize that in the future. */ external_declaration +options {k=1;} : function_definition | declaration ; -function_definition // put parameters and locals into same scope for now -@init { - $Symbols::types = new HashSet(); -} +function_definition : declaration_specifiers? declarator ( declaration+ compound_statement // K&R style | compound_statement // ANSI style @@ -90,10 +69,7 @@ function_definition // put parameters and locals into same scope for now ; declaration -@init { - $declaration::isTypedef = false; -} - : 'typedef' declaration_specifiers? {$declaration::isTypedef=true;} + : 'typedef' declaration_specifiers? init_declarator_list ';' // special case, looking for typedef | declaration_specifiers init_declarator_list? ';' ; @@ -136,14 +112,12 @@ type_specifier ; type_id - : {isTypeName(input.LT(1).getText())}? IDENTIFIER + : IDENTIFIER // {System.out.println($IDENTIFIER.text+" is a type");} ; -struct_or_union_specifier // structs are scopes -@init { - $Symbols::types = new HashSet(); -} +struct_or_union_specifier +options {k=3;} : struct_or_union IDENTIFIER? '{' struct_declaration_list '}' | struct_or_union IDENTIFIER ; @@ -175,6 +149,7 @@ struct_declarator ; enum_specifier +options {k=3;} : 'enum' '{' enumerator_list '}' | 'enum' IDENTIFIER '{' enumerator_list '}' | 'enum' IDENTIFIER @@ -200,12 +175,6 @@ declarator direct_declarator : ( IDENTIFIER - { - if ($declaration.size()>0&&$declaration::isTypedef) { - $Symbols::types.add($IDENTIFIER.text); - System.out.println("define type "+$IDENTIFIER.text); - } - } | '(' declarator ')' ) declarator_suffix* @@ -419,10 +388,7 @@ labeled_statement | 'default' ':' statement ; -compound_statement // blocks have a scope of symbols -@init { - $Symbols::types = new HashSet(); -} +compound_statement : '{' declaration* statement_list? '}' ; @@ -520,18 +486,18 @@ UnicodeEscape : '\\' 'u' HexDigit HexDigit HexDigit HexDigit ; -WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} +WS : (' '|'\r'|'\t'|'\u000C'|'\n') ; COMMENT - : '/*' ( . ) * ? '*/' {$channel=HIDDEN;} + : '/*' ( . )*? '*/' ; LINE_COMMENT - : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} + : '//' ~('\n'|'\r')* '\r'? '\n' ; // ignore #line info for now LINE_COMMAND - : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} + : '#' ~('\n'|'\r')* '\r'? '\n' ; diff --git a/_tests/trconvert/antlr3/Gold/DCM_2_0_grammar.g4 b/_tests/trconvert/antlr3/Gold/DCM_2_0_grammar.g4 index babf3865d..37751346d 100644 --- a/_tests/trconvert/antlr3/Gold/DCM_2_0_grammar.g4 +++ b/_tests/trconvert/antlr3/Gold/DCM_2_0_grammar.g4 @@ -253,8 +253,8 @@ fragment Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; -WS : (' '|'\r'|'\t'|'\u000C') {skip();} +WS : (' '|'\r'|'\t'|'\u000C') ; -COMMENT : ('*'|'!'|'.') ( . ) * ? '\n' {skip();} +COMMENT : ('*'|'!'|'.') ( . )*? '\n' ; diff --git a/_tests/trconvert/antlr3/Gold/FreeMPS.g4 b/_tests/trconvert/antlr3/Gold/FreeMPS.g4 index b7e6c56b2..a4fb863b4 100644 --- a/_tests/trconvert/antlr3/Gold/FreeMPS.g4 +++ b/_tests/trconvert/antlr3/Gold/FreeMPS.g4 @@ -6,19 +6,6 @@ options { } -@lexer::members { - override - public void EmitErrorMessage(String s) { - throw new Exception(s); - } -} - -@rulecatch{ - catch(RecognitionException e){ - throw e; - } -} - /*------------------------------------------------------------------ * PARSER RULES *------------------------------------------------------------------*/ @@ -116,8 +103,8 @@ NUMERICALVALUE : DIGIT DIGITS*; * Diese Zeichen allein sind noch keine Token *------------------------------------------------------------------*/ -WS : (' ' | '\t' | '\n' | '\r' | '\f')+ {$channel=HIDDEN;}; -LINE_COMMENT : ('*' | '$') ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}; +WS : (' ' | '\t' | '\n' | '\r' | '\f')+; +LINE_COMMENT : ('*' | '$') ~('\n'|'\r')* '\r'? '\n'; fragment ZEICHEN: (LETTER | DIGIT) ; //Ein Zeichen ist noch kein Token, besteht aber aus ein einem Buchstaben oder einer Zahl fragment LETTER : ('a'..'z' | 'A'..'Z' | '_' | '/' | '#' | '@' | '(' | ')');//Ein Buchstabe ist noch kein Token fragment DIGIT : '0'..'9' | '-' | '+' | '.' | ',' ; //Eine Ziffer ist noch kein Token diff --git a/_tests/trconvert/antlr3/Gold/Java.g4 b/_tests/trconvert/antlr3/Gold/Java.g4 index 5ad09100c..e449d0a66 100644 --- a/_tests/trconvert/antlr3/Gold/Java.g4 +++ b/_tests/trconvert/antlr3/Gold/Java.g4 @@ -1318,7 +1318,7 @@ NonIntegerNumber | ('0' .. '9')+ | HexPrefix (HexDigit )* - ( () + ( | ('.' (HexDigit )* ) ) ( 'p' | 'P' ) @@ -1392,42 +1392,18 @@ WS | '\t' | '\u000C' | '\n' - ) - { - skip(); - } + ) ; COMMENT - @init{ - boolean isJavaDoc = false; - } : '/*' - { - if((char)input.LA(1) == '*'){ - isJavaDoc = true; - } - } - ( . ) * ? + ( . )*? '*/' - { - if(isJavaDoc==true){ - $channel=HIDDEN; - }else{ - skip(); - } - } ; LINE_COMMENT - : '//' ~('\n'|'\r')* ('\r\n' | '\r' | '\n') - { - skip(); - } - | '//' ~('\n'|'\r')* // a line comment could appear at the end of the file without CR/LF - { - skip(); - } + : '//' ~('\n'|'\r')* ('\r\n' | '\r' | '\n') + | '//' ~('\n'|'\r')* ; ABSTRACT diff --git a/_tests/trconvert/antlr3/Gold/Java6Lex.g4 b/_tests/trconvert/antlr3/Gold/Java6Lex.g4 index f6634fe37..1dc80aeef 100644 --- a/_tests/trconvert/antlr3/Gold/Java6Lex.g4 +++ b/_tests/trconvert/antlr3/Gold/Java6Lex.g4 @@ -85,7 +85,7 @@ NonIntegerNumber | ('0' .. '9')+ | HexPrefix (HexDigit )* - ( () + ( | ('.' (HexDigit )* ) ) ( 'p' | 'P' ) @@ -160,41 +160,17 @@ WS | '\u000C' | '\n' ) - { - skip(); - } ; COMMENT - @init{ - boolean isJavaDoc = false; - } : '/*' - { - if((char)input.LA(1) == '*'){ - isJavaDoc = true; - } - } - ( . ) * ? + ( . )*? '*/' - { - if(isJavaDoc==true){ - $channel=HIDDEN; - }else{ - skip(); - } - } ; LINE_COMMENT : '//' ~('\n'|'\r')* ('\r\n' | '\r' | '\n') - { - skip(); - } - | '//' ~('\n'|'\r')* // a line comment could appear at the end of the file without CR/LF - { - skip(); - } + | '//' ~('\n'|'\r')* ; ABSTRACT diff --git a/_tests/trconvert/antlr3/Gold/Lua.g4 b/_tests/trconvert/antlr3/Gold/Lua.g4 index 8b9375442..7d87bba98 100644 --- a/_tests/trconvert/antlr3/Gold/Lua.g4 +++ b/_tests/trconvert/antlr3/Gold/Lua.g4 @@ -141,16 +141,16 @@ HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; COMMENT - : '--[[' ( . ) * ? ']]' {skip();} + : '--[[' ( . )*? ']]' ; LINE_COMMENT - : '--' ~('\n'|'\r')* '\r'? '\n' {skip();} + : '--' ~('\n'|'\r')* '\r'? '\n' ; -WS : (' '|'\t'|'\u000C') {skip();} +WS : (' '|'\t'|'\u000C') ; -NEWLINE : ('\r')? '\n' {skip();} +NEWLINE : ('\r')? '\n' ; diff --git a/_tests/trconvert/antlr3/Gold/ObjectiveC2ansi.g4 b/_tests/trconvert/antlr3/Gold/ObjectiveC2ansi.g4 index 7d46e4c4e..a243e305e 100644 --- a/_tests/trconvert/antlr3/Gold/ObjectiveC2ansi.g4 +++ b/_tests/trconvert/antlr3/Gold/ObjectiveC2ansi.g4 @@ -458,11 +458,11 @@ UnicodeEscape : '\\' 'u' HexDigit HexDigit HexDigit HexDigit ; -WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} +WS : (' '|'\r'|'\t'|'\u000C'|'\n') ; COMMENT - : '/*' ( . ) * ? '*/' + : '/*' ( . )*? '*/' ; LINE_COMMENT diff --git a/_tests/trconvert/antlr3/Gold/Sexpr.g4 b/_tests/trconvert/antlr3/Gold/Sexpr.g4 index 69b03c701..f3388da76 100644 --- a/_tests/trconvert/antlr3/Gold/Sexpr.g4 +++ b/_tests/trconvert/antlr3/Gold/Sexpr.g4 @@ -35,8 +35,14 @@ RPAREN, DOT } -@package {com.arcanearcade.antlr} -@lexer::package {com.arcanearcade.antlr} +// Token string literals converted to explicit lexer rules. +// Reorder these rules accordingly. + +LPAREN: '('; +RPAREN: ')'; +// + + sexpr @@ -58,14 +64,13 @@ STRING :'"' ( '\\' . | ~('\\'|'"') )* '"' ; WHITESPACE - : (' ' | '\n' | '\t' | '\r')+ - {skip();} + : (' ' | '\n' | '\t' | '\r')+ ; NUMBER : ('+' | '-')? (DIGIT)+ ('.' (DIGIT)+)? ; SYMBOL - : SYMBOL_START (SYMBOL_START | DIGIT)* {if ($text == '.') $type = DOT;} + : SYMBOL_START (SYMBOL_START | DIGIT)* ; fragment @@ -78,7 +83,3 @@ fragment DIGIT : ('0'..'9') ; - -LPAREN : '(' ; - -RPAREN : ')' ; diff --git a/_tests/trconvert/antlr3/Gold/StackTraceText.g4 b/_tests/trconvert/antlr3/Gold/StackTraceText.g4 index 5a9690cad..5d9e141ba 100644 --- a/_tests/trconvert/antlr3/Gold/StackTraceText.g4 +++ b/_tests/trconvert/antlr3/Gold/StackTraceText.g4 @@ -19,7 +19,21 @@ UNKNOWN_SOURCE, INIT } -@rulecatch { } /** Message will catch anything */ +// Token string literals converted to explicit lexer rules. +// Reorder these rules accordingly. + +DOT: '.'; +AT: 'at'; +CAUSED_BY: 'Caused by:'; +MORE: 'more'; +ELLIPSIS: '...'; +COLON: ':'; +NATIVE_METHOD: 'Native Method'; +UNKNOWN_SOURCE: 'Unknown Source'; +INIT: ''; +// + + /** Message will catch anything */ startRule : stackTrace EOF; @@ -76,7 +90,7 @@ className : JavaWord; identifier : JavaWord; -message : COLON ( .) * ?; +message : COLON ( .)*?; Number : Digit+ ; @@ -101,23 +115,5 @@ fragment Symbol : '_' fragment Digit : '0'..'9'; -WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} +WS : (' '|'\r'|'\t'|'\u000C'|'\n') ; - -DOT : '.' ; - -AT : 'at' ; - -CAUSED_BY : 'Caused by:' ; - -MORE : 'more' ; - -ELLIPSIS : '...' ; - -COLON : ':' ; - -NATIVE_METHOD : 'Native Method' ; - -UNKNOWN_SOURCE : 'Unknown Source' ; - -INIT : '' ; diff --git a/_tests/trconvert/antlr3/Gold/Verilog3.g4 b/_tests/trconvert/antlr3/Gold/Verilog3.g4 index a5f7caa77..9848a4fa0 100644 --- a/_tests/trconvert/antlr3/Gold/Verilog3.g4 +++ b/_tests/trconvert/antlr3/Gold/Verilog3.g4 @@ -2,7 +2,10 @@ grammar Verilog3; options { - language= C; // 3 characters of lookahead + language= C; + //tokenVocab = Verilog; //call the vocabulary 'Verilog' + //testLiterals = false; don't automatically test for literals + k = 3; // 3 characters of lookahead } @@ -112,9 +115,7 @@ udp_initial_statement : init_val : '1\'b0' | '1\'b1' | - '1\'bx' | - n=NUMBER - { $n.text=="0" || $n.text=="1"}? + '1\'bx' |NUMBER ; table_definition : @@ -532,7 +533,7 @@ non_blocking_assignment : // so suppress warning. conditional_statement : 'if' LPAREN expression RPAREN statement_or_null - ( : 'else' statement_or_null)? + ( 'else' statement_or_null)? ; case_statement : @@ -732,12 +733,8 @@ edge_control_specifier : // thus avoiding a lexical conflict. edge_descriptor : '0x' | '1x' - | - n=NUMBER - { $n.text=="01" || $n.text=="10"}? - | - i=IDENTIFIER - { $i.text=="x1" || $i.text=="x0"}? + |NUMBER + |IDENTIFIER ; timing_check_condition : @@ -1132,13 +1129,12 @@ fragment SPACE_OR_TAB WS : SPACE_OR_TAB+ - {$channel=HIDDEN;} ; ML_COMMENT - : '/*' ( . ) * ? '*/' {$channel=HIDDEN;} + : '/*' ( . )*? '*/' ; SL_COMMENT - : '//' ( . ) * ? '\r'? '\n' {$channel=HIDDEN;} + : '//' ( . )*? '\r'? '\n' ; \ No newline at end of file diff --git a/_tests/trconvert/antlr3/Gold/WavefrontOBJ.g4 b/_tests/trconvert/antlr3/Gold/WavefrontOBJ.g4 index f6c567a58..7f88a636b 100644 --- a/_tests/trconvert/antlr3/Gold/WavefrontOBJ.g4 +++ b/_tests/trconvert/antlr3/Gold/WavefrontOBJ.g4 @@ -15,21 +15,17 @@ comment : COMMENT face : FACE (INTEGER)+ ; -vertex : GEOMETRIC_VERTEX x=DECIMAL y=DECIMAL z=DECIMAL +vertex : GEOMETRIC_VERTEXDECIMALDECIMALDECIMAL ; -mtllib : MATERIAL_LIBRARY a=NAME '.mtl' - { - System.out.println($a.text); - System.out.println($NAME.text); - } +mtllib : MATERIAL_LIBRARYNAME '.mtl' ; -object : OBJECT_NAME a=NAME +object : OBJECT_NAMENAME ; use_material - : MATERIAL_NAME a=NAME + : MATERIAL_NAMENAME ; group : SMOOTHING_GROUP ('on' | 'off' ) @@ -169,11 +165,11 @@ INTEGER : '-'? (DIGIT)+; DECIMAL: INTEGER ('.' DIGIT*)?; -COMMENT : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} +COMMENT : '#' ~('\n'|'\r')* '\r'? '\n' ; NAME : ( 'A'..'Z' | 'a'..'z' | '0'..'9' | '-' | '_' | '~'| '(' | ')' )+ ; -WS: (' ' | '\t')+ {$channel = HIDDEN;} +WS: (' ' | '\t')+ ; diff --git a/_tests/trconvert/antlr3/Gold/XPath2.g4 b/_tests/trconvert/antlr3/Gold/XPath2.g4 index 03a81c9c4..3bc11b262 100644 --- a/_tests/trconvert/antlr3/Gold/XPath2.g4 +++ b/_tests/trconvert/antlr3/Gold/XPath2.g4 @@ -215,9 +215,6 @@ This lexer rule for comments handles multiline, nested comments */ COMMENT_CONTENTS : '(:' - { - $channel=98; - } ( ~('('|':') | '(' | ':' @@ -228,7 +225,7 @@ COMMENT_CONTENTS WS - : (' '|'\r'|'\t'|'\u000C'|'\n')+ {$channel = HIDDEN;} + : (' '|'\r'|'\t'|'\u000C'|'\n')+ ; diff --git a/_tests/trconvert/antlr3/Gold/css21.g4 b/_tests/trconvert/antlr3/Gold/css21.g4 index aa1b5b7a2..04961c8c0 100644 --- a/_tests/trconvert/antlr3/Gold/css21.g4 +++ b/_tests/trconvert/antlr3/Gold/css21.g4 @@ -486,11 +486,7 @@ fragment Z : ('z'|'Z') ('\r'|'\n'|'\t'|'\f'|' ')* // COMMENTS are hidden from the parser which simplifies the parser // grammar a lot. // -COMMENT : '/*' ( .*) * ? '*/' - - { - $channel = 2; // Comments on channel 2 in case we want to find them - } +COMMENT : '/*' ( .*) '*/' ; // --------------------- @@ -500,10 +496,6 @@ COMMENT : '/*' ( .*) * ? '*/' // it from the ANLTR parser. // CDO : '' - - { - $channel = 4; // CDC on channel 4 in case we want it later - } ; INCLUDES : '~=' ; @@ -546,13 +534,13 @@ fragment INVALID :; STRING : '\'' ( ~('\n'|'\r'|'\f'|'\'') )* ( '\'' - | { $type = INVALID; } + | ) | '"' ( ~('\n'|'\r'|'\f'|'"') )* ( '"' - | { $type = INVALID; } + | ) ; @@ -600,8 +588,8 @@ NUMBER ( E ( - M { $type = EMS; } - | X { $type = EXS; } + M + | X ) | P @@ -609,33 +597,32 @@ NUMBER X | T | C - ) - { $type = LENGTH; } + ) | - C M { $type = LENGTH; } + C M | M ( - M { $type = LENGTH; } + M - | S { $type = TIME; } + | S ) | - I N { $type = LENGTH; } + I N | - D E G { $type = ANGLE; } + D E G | - R A D { $type = ANGLE; } + R A D - |S { $type = TIME; } + |S | - K? H Z { $type = FREQ; } + K? H Z - | IDENT { $type = DIMENSION; } + | IDENT - | '%' { $type = PERCENTAGE; } + | '%' | // Just a number ) @@ -655,8 +642,8 @@ URI : U R L // that process the whitespace within the parser, ANTLR does not // need to deal with the whitespace directly in the parser. // -WS : (' '|'\t')+ { $channel = HIDDEN; } ; -NL : ('\r' '\n'? | '\n') { $channel = HIDDEN; } ; +WS : (' '|'\t')+ ; +NL : ('\r' '\n'? | '\n') ; // ------------- diff --git a/_tests/trconvert/antlr3/Gold/krl.g4 b/_tests/trconvert/antlr3/Gold/krl.g4 index 77e8fdec7..66154e3e5 100644 --- a/_tests/trconvert/antlr3/Gold/krl.g4 +++ b/_tests/trconvert/antlr3/Gold/krl.g4 @@ -203,7 +203,7 @@ parameterWithType ; parameterCallType - : ':' { input.LT(1).getText().toLowerCase().matches("in|out") }? IDENTIFIER + : ':' IDENTIFIER ; @@ -248,7 +248,7 @@ statementList statement : CONTINUE NEWLINE | EXIT NEWLINE - | FOR IDENTIFIER '=' expression TO expression ({ input.LT(1).getText().equalsIgnoreCase("step") }? IDENTIFIER expression)? NEWLINE + | FOR IDENTIFIER '=' expression TO expression ( IDENTIFIER expression)? NEWLINE statementList ENDFOR | GOTO IDENTIFIER NEWLINE @@ -273,32 +273,32 @@ statement statementList ENDWHILE NEWLINE | RETURN (assignmentExpression )? NEWLINE - | BRAKE ({ input.LT(1).getText().equalsIgnoreCase("f") }? IDENTIFIER)? NEWLINE + | BRAKE ( IDENTIFIER)? NEWLINE | assignmentExpression NEWLINE | IDENTIFIER ':' NEWLINE | NEWLINE | GLOBAL? INTERRUPT DECL primary WHEN expression DO assignmentExpression NEWLINE - | INTERRUPT { input.LT(1).getText().toLowerCase().matches("on|off|disable|enable") }? IDENTIFIER primary? NEWLINE + | INTERRUPT IDENTIFIER primary? NEWLINE | (PTP|PTP_REL) geometricExpression ( C_PTP ( C_DIS | C_ORI | C_VEL )? )? NEWLINE | LIN geometricExpression ( C_DIS | C_ORI | C_VEL )? NEWLINE | LIN_REL geometricExpression ( C_DIS | C_ORI | C_VEL )? enumElement? NEWLINE - | (CIRC|CIRC_REL) geometricExpression ',' geometricExpression (',' { input.LT(1).getText().equalsIgnoreCase("ca") }? IDENTIFIER primary)? ( C_DIS | C_ORI | C_VEL )? NEWLINE - | TRIGGER WHEN ({ input.LT(1).getText().equalsIgnoreCase("distance") }? IDENTIFIER) '=' expression DELAY '=' expression DO assignmentExpression ( PRIO '=' expression )? NEWLINE + | (CIRC|CIRC_REL) geometricExpression ',' geometricExpression (',' IDENTIFIER primary)? ( C_DIS | C_ORI | C_VEL )? NEWLINE + | TRIGGER WHEN ( IDENTIFIER) '=' expression DELAY '=' expression DO assignmentExpression ( PRIO '=' expression )? NEWLINE | analogInputStatement NEWLINE | analogOutputStatement NEWLINE ; analogOutputStatement : ANOUT - ( { input.LT(1).getText().equalsIgnoreCase("on") }? IDENTIFIER assignmentExpression ({ input.LT(1).getText().toLowerCase().matches("delay|minimum|maximum") }? IDENTIFIER '=' literal)* - | { input.LT(1).getText().equalsIgnoreCase("off") }? IDENTIFIER IDENTIFIER + ( IDENTIFIER assignmentExpression ( IDENTIFIER '=' literal)* + | IDENTIFIER IDENTIFIER ) ; analogInputStatement : ANIN - ( { input.LT(1).getText().equalsIgnoreCase("on") }? IDENTIFIER assignmentExpression - | { input.LT(1).getText().equalsIgnoreCase("off") }? IDENTIFIER IDENTIFIER + ( IDENTIFIER assignmentExpression + | IDENTIFIER IDENTIFIER ) ; @@ -558,10 +558,7 @@ fragment Y:('y'|'Y'); fragment Z:('z'|'Z'); HEADERLINE - : '&' ~('\n'|'\r')* ('\r\n' | '\r' | '\n' | EOF) - { - skip(); - } + : '&' ~('\n'|'\r')* ('\r\n' | '\r' | '\n' | EOF) ; WS @@ -569,19 +566,13 @@ WS ' ' | '\t' | '\u000C' - ) - { - skip(); - } + ) ; NEWLINE : '\r'? '\n' ; LINE_COMMENT : ';' ~('\n' | '\r')* - { - skip(); - } ; CHARLITERAL diff --git a/_tests/trconvert/antlr3/Gold/m2pim4_LL1.g4 b/_tests/trconvert/antlr3/Gold/m2pim4_LL1.g4 index 8636ef3da..e2f3cf466 100644 --- a/_tests/trconvert/antlr3/Gold/m2pim4_LL1.g4 +++ b/_tests/trconvert/antlr3/Gold/m2pim4_LL1.g4 @@ -8,54 +8,102 @@ grammar m2pim4_LL1; // Modula-2 PIM 4 standard // strict LL(1) options { + k = 1; buildAST=true; } // Reserved Words tokens { - AND, - ARRAY, - BEGIN, - BY, - CASE, - CONST, - DEFINITION, - DIV, - DO, - ELSE, - ELSIF, - END, - EXIT, - EXPORT, - FOR, - FROM, - IF, - IMPLEMENTATION, - IMPORT, - IN, - LOOP, - MOD, - MODULE, - NOT, - OF, - OR, - POINTER, - PROCEDURE, - QUALIFIED, - RECORD, - REPEAT, - RETURN, - SET, - THEN, - TO, - TYPE, - UNTIL, - VAR, - WHILE, - WITH + AND , + ARRAY , + BEGIN , + BY , + CASE , + CONST , + DEFINITION , + DIV , + DO , + ELSE , + ELSIF , + END , + EXIT , + EXPORT , + FOR , + FROM , + IF , + IMPLEMENTATION , + IMPORT , + IN , + LOOP , + MOD , + MODULE , + NOT , + OF , + OR , + POINTER , + PROCEDURE , + QUALIFIED , + RECORD , + REPEAT , + RETURN , + SET , + THEN , + TO , + TYPE , + UNTIL , + VAR , + WHILE , + WITH } +// Token string literals converted to explicit lexer rules. +// Reorder these rules accordingly. + +AND: 'AND'; +ARRAY: 'ARRAY'; +BEGIN: 'BEGIN'; +BY: 'BY'; +CASE: 'CASE'; +CONST: 'CONST'; +DEFINITION: 'DEFINITION'; +DIV: 'DIV'; +DO: 'DO'; +ELSE: 'ELSE'; +ELSIF: 'ELSIF'; +END: 'END'; +EXIT: 'EXIT'; +EXPORT: 'EXPORT'; +FOR: 'FOR'; +FROM: 'FROM'; +IF: 'IF'; +IMPLEMENTATION: 'IMPLEMENTATION'; +IMPORT: 'IMPORT'; +IN: 'IN'; +LOOP: 'LOOP'; +MOD: 'MOD'; +MODULE: 'MODULE'; +NOT: 'NOT'; +OF: 'OF'; +OR: 'OR'; +POINTER: 'POINTER'; +PROCEDURE: 'PROCEDURE'; +QUALIFIED: 'QUALIFIED'; +RECORD: 'RECORD'; +REPEAT: 'REPEAT'; +RETURN: 'RETURN'; +SET: 'SET'; +THEN: 'THEN'; +TO: 'TO'; +TYPE: 'TYPE'; +UNTIL: 'UNTIL'; +VAR: 'VAR'; +WHILE: 'WHILE'; +WITH: 'WITH'; +// + + + // --------------------------------------------------------------------------- // L E X E R G R A M M A R // --------------------------------------------------------------------------- @@ -70,7 +118,7 @@ IDENT : INTEGER : DIGIT+ | - OCTAL_DIGIT+ ( 'B' | 'C' {}) | + OCTAL_DIGIT+ ( 'B' | 'C') | DIGIT ( HEX_DIGIT )* 'H' ; @@ -194,20 +242,19 @@ constExpression : // ***** PIM 4 Appendix 1 line 14 ***** relation : - '=' | '#' | '<>' | '<' | '<=' | '>' | '>=' | 'IN' {} + '=' | '#' | '<>' | '<' | '<=' | '>' | '>=' | 'IN' ; // ***** PIM 4 Appendix 1 line 15 ***** simpleConstExpr : - ( '+' | '-' {})? constTerm ( addOperator constTerm )* + ( '+' | '-')? constTerm ( addOperator constTerm )* ; // ***** PIM 4 Appendix 1 line 16 ***** addOperator : - '+' | '-' | OR - {} // make ANTLRworks display separate branches + '+' | '-' | OR // make ANTLRworks display separate branches ; // ***** PIM 4 Appendix 1 line 17 ***** @@ -219,8 +266,7 @@ constTerm : // ***** PIM 4 Appendix 1 line 18 ***** mulOperator : - '*' | '/' | DIV | MOD | AND | '&' - {} // make ANTLRworks display separate branches + '*' | '/' | DIV | MOD | AND | '&' // make ANTLRworks display separate branches ; // ***** PIM 4 Appendix 1 lines 19-20 ***** @@ -231,7 +277,7 @@ mulOperator : // but the grammar does not actually show it constFactor : number | string | setOrQualident | - '(' constExpression ')' | ( NOT | '~' {}) constFactor + '(' constExpression ')' | ( NOT | '~') constFactor ; // new for LL(1) @@ -312,7 +358,7 @@ fieldListSequence : // refactored for LL(1) fieldList : ( identList ':' type | - CASE ident ( ( ':' | '.' {}) qualident )? OF variant ( '|' variant )* + CASE ident ( ( ':' | '.') qualident )? OF variant ( '|' variant )* ( ELSE fieldListSequence )? END )? ; @@ -393,7 +439,7 @@ expression : // ***** PIM 4 Appendix 1 line 48 ***** simpleExpression : - ( '+' | '-' {})? term ( addOperator term )* + ( '+' | '-')? term ( addOperator term )* ; // ***** PIM 4 Appendix 1 line 49 ***** @@ -412,7 +458,7 @@ factor : number | string | setOrDesignatorOrProcCall | - '(' expression ')' | ( NOT | '~' {}) factor + '(' expression ')' | ( NOT | '~') factor ; // new for LL(1) @@ -612,84 +658,4 @@ programModule : compilationUnit : definitionModule | IMPLEMENTATION? programModule - ; - -AND : 'AND' ; - -ARRAY : 'ARRAY' ; - -BEGIN : 'BEGIN' ; - -BY : 'BY' ; - -CASE : 'CASE' ; - -CONST : 'CONST' ; - -DEFINITION : 'DEFINITION' ; - -DIV : 'DIV' ; - -DO : 'DO' ; - -ELSE : 'ELSE' ; - -ELSIF : 'ELSIF' ; - -END : 'END' ; - -EXIT : 'EXIT' ; - -EXPORT : 'EXPORT' ; - -FOR : 'FOR' ; - -FROM : 'FROM' ; - -IF : 'IF' ; - -IMPLEMENTATION : 'IMPLEMENTATION' ; - -IMPORT : 'IMPORT' ; - -IN : 'IN' ; - -LOOP : 'LOOP' ; - -MOD : 'MOD' ; - -MODULE : 'MODULE' ; - -NOT : 'NOT' ; - -OF : 'OF' ; - -OR : 'OR' ; - -POINTER : 'POINTER' ; - -PROCEDURE : 'PROCEDURE' ; - -QUALIFIED : 'QUALIFIED' ; - -RECORD : 'RECORD' ; - -REPEAT : 'REPEAT' ; - -RETURN : 'RETURN' ; - -SET : 'SET' ; - -THEN : 'THEN' ; - -TO : 'TO' ; - -TYPE : 'TYPE' ; - -UNTIL : 'UNTIL' ; - -VAR : 'VAR' ; - -WHILE : 'WHILE' ; - -WITH : 'WITH' ; \ No newline at end of file + ; \ No newline at end of file diff --git a/_tests/trconvert/antlr3/Gold/memcached_protocol.g4 b/_tests/trconvert/antlr3/Gold/memcached_protocol.g4 index 552869283..ab8a741ed 100644 --- a/_tests/trconvert/antlr3/Gold/memcached_protocol.g4 +++ b/_tests/trconvert/antlr3/Gold/memcached_protocol.g4 @@ -170,5 +170,5 @@ fragment PRINTABLE_CHAR : '!'..'~'; WHITESPACE - : (' ' | '\t' | '\r' | '\n' | '\u000C')+ {$channel = HIDDEN;} + : (' ' | '\t' | '\r' | '\n' | '\u000C')+ ; diff --git a/_tests/trconvert/antlr3/Gold/simplecalc.g4 b/_tests/trconvert/antlr3/Gold/simplecalc.g4 index 6fa64ffcc..525e8a2b6 100644 --- a/_tests/trconvert/antlr3/Gold/simplecalc.g4 +++ b/_tests/trconvert/antlr3/Gold/simplecalc.g4 @@ -1,26 +1,22 @@ grammar SimpleCalc; tokens { - PLUS , - MINUS , - MULT , - DIV -} - -@members { - public static void main(String[] args) throws Exception { - SimpleCalcLexer lex = new SimpleCalcLexer(new ANTLRFileStream(args[0])); - CommonTokenStream tokens = new CommonTokenStream(lex); - - SimpleCalcParser parser = new SimpleCalcParser(tokens); - - try { - parser.expr(); - } catch (RecognitionException e) { - e.printStackTrace(); - } - } + PLUS , + MINUS , + MULT , + DIV } + +// Token string literals converted to explicit lexer rules. +// Reorder these rules accordingly. + +PLUS: '+'; +MINUS: '-'; +MULT: '*'; +DIV: '/'; +// + + /*------------------------------------------------------------------ * PARSER RULES @@ -39,15 +35,7 @@ factor : NUMBER ; NUMBER : (DIGIT)+ ; -WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ; +WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ ; fragment DIGIT : '0'..'9' ; -PLUS : '+' ; - -MINUS : '-' ; - -MULT : '*' ; - -DIV : '/' ; - diff --git a/_tests/trconvert/antlr3/o b/_tests/trconvert/antlr3/o deleted file mode 100644 index c72d8ea3e..000000000 --- a/_tests/trconvert/antlr3/o +++ /dev/null @@ -1,45 +0,0 @@ -grammar SimpleCalc; - -tokens { - PLUS = '+' ; - MINUS = '-' ; - MULT = '*' ; - DIV = '/' ; -} - -@members { - public static void main(String[] args) throws Exception { - SimpleCalcLexer lex = new SimpleCalcLexer(new ANTLRFileStream(args[0])); - CommonTokenStream tokens = new CommonTokenStream(lex); - - SimpleCalcParser parser = new SimpleCalcParser(tokens); - - try { - parser.expr(); - } catch (RecognitionException e) { - e.printStackTrace(); - } - } -} - -/*------------------------------------------------------------------ - * PARSER RULES - *------------------------------------------------------------------*/ - -expr : term ( ( PLUS | MINUS ) term )* ; - -term : factor ( ( MULT | DIV ) factor )* ; - -factor : NUMBER ; - - -/*------------------------------------------------------------------ - * LEXER RULES - *------------------------------------------------------------------*/ - -NUMBER : (DIGIT)+ ; - -WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ; - -fragment DIGIT : '0'..'9' ; - diff --git a/_tests/trconvert/antlr3/save.test.sh b/_tests/trconvert/antlr3/test.sh similarity index 82% rename from _tests/trconvert/antlr3/save.test.sh rename to _tests/trconvert/antlr3/test.sh index b925cc5c2..79208a000 100644 --- a/_tests/trconvert/antlr3/save.test.sh +++ b/_tests/trconvert/antlr3/test.sh @@ -10,9 +10,8 @@ do extension="${i##*.}" filename="${i%.*}" echo "converting $i" - trparse $i -t antlr3 | trconvert | trsponge -c -o "$where/Generated" + trparse $i -t ANTLRv3 | trconvert | trsponge -c -o "$where/Generated" done -exit 0 rm -f "$where"/Generated/*.txt3 diff -r "$where/Gold" "$where/Generated" if [ "$?" != "0" ] diff --git a/src/grammars/bison/BisonLexer.g4 b/src/grammars/bison/BisonLexer.g4 index f7d68cc51..43d1a8247 100644 --- a/src/grammars/bison/BisonLexer.g4 +++ b/src/grammars/bison/BisonLexer.g4 @@ -1,14 +1,12 @@ // Author -- Ken Domino -// Copyright 2020 +// Copyright 2020-2022 // MIT License lexer grammar BisonLexer; options { superClass = BisonLexerBase; } -channels { - OFF_CHANNEL // non-default channel for whitespace and comments -} +// Insert here @header for C++ lexer. tokens { SC_EPILOGUE @@ -17,109 +15,109 @@ tokens { // ======================= Common fragments ========================= fragment Underscore - : '_' - ; + : '_' + ; fragment NameStartChar - : 'A'..'Z' - | 'a'..'z' + : 'A'..'Z' + | 'a'..'z' | '_' - | '\u00C0'..'\u00D6' - | '\u00D8'..'\u00F6' - | '\u00F8'..'\u02FF' - | '\u0370'..'\u037D' - | '\u037F'..'\u1FFF' - | '\u200C'..'\u200D' - | '\u2070'..'\u218F' - | '\u2C00'..'\u2FEF' - | '\u3001'..'\uD7FF' - | '\uF900'..'\uFDCF' - | '\uFDF0'..'\uFFFD' - | '$' // For PHP - ; // ignores | ['\u10000-'\uEFFFF] ; + | '\u00C0'..'\u00D6' + | '\u00D8'..'\u00F6' + | '\u00F8'..'\u02FF' + | '\u0370'..'\u037D' + | '\u037F'..'\u1FFF' + | '\u200C'..'\u200D' + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + | '$' // For PHP + ; // ignores | ['\u10000-'\uEFFFF] ; fragment DQuoteLiteral - : DQuote ( EscSeq | ~["\r\n\\] | ( '\\' [\n\r]*) )* DQuote - ; + : DQuote ( EscSeq | ~["\r\n\\] | '\\' [\n\r]* )* DQuote + ; fragment DQuote - : '"' - ; + : '"' + ; fragment SQuote - : '\'' - ; + : '\'' + ; fragment CharLiteral - : SQuote ( EscSeq | ~['\r\n\\] ) SQuote - ; + : SQuote ( EscSeq | ~['\r\n\\] ) SQuote + ; fragment SQuoteLiteral - : SQuote ( EscSeq | ~['\r\n\\] )* SQuote - ; + : SQuote ( EscSeq | ~['\r\n\\] )* SQuote + ; fragment Esc - : '\\' - ; + : '\\' + ; fragment EscSeq - : Esc - ([abefnrtv?"'\\] // The standard escaped character set such as tab, newline, etc. - | [xuU]?[0-9]+) // C-style - ; + : Esc + ([abefnrtv?"'\\] // The standard escaped character set such as tab, newline, etc. + | [xuU]?[0-9]+) // C-style + ; fragment EscAny - : Esc . - ; + : Esc . + ; fragment Id - : NameStartChar NameChar* - ; + : NameStartChar NameChar* + ; fragment Type - : ([\t\r\n\f a-zA-Z0-9] | '[' | ']' | '{' | '}' | '.' | '_' | '(' | ')' | ',')+ - ; + : ([\t\r\n\f a-zA-Z0-9] | '[' | ']' | '{' | '}' | '.' | '_' | '(' | ')' | ',')+ + ; fragment NameChar - : NameStartChar - | '0'..'9' - | Underscore - | '\u00B7' - | '\u0300'..'\u036F' - | '\u203F'..'\u2040' - | '.' - | '-' - ; + : NameStartChar + | '0'..'9' + | Underscore + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + | '.' + | '-' + ; fragment BlockComment - : '/*' + : '/*' ( - ('/' ~'*') - | ~'/' - )* - '*/' - ; + '/' ~'*' + | ~'/' + )* + '*/' + ; fragment LineComment - : '//' ~[\r\n]* - ; + : '//' ~[\r\n]* + ; fragment LineCommentExt - : '//' ~'\n'* ( '\n' Hws* '//' ~'\n'* )* - ; + : '//' ~'\n'* ( '\n' Hws* '//' ~'\n'* )* + ; fragment Ws - : Hws - | Vws - ; + : Hws + | Vws + ; fragment Hws - : [ \t] - ; + : [ \t] + ; fragment Vws - : [\r\n\f] - ; + : [\r\n\f] + ; /* Four types of user code: - prologue (code between '%{' '%}' in the first section, before %%); @@ -131,61 +129,61 @@ fragment Vws // Actions fragment LBrace - : '{' - ; + : '{' + ; fragment RBrace - : '}' - ; + : '}' + ; fragment PercentLBrace - : '%{' - ; + : '%{' + ; fragment PercentRBrace - : '%}' - ; + : '%}' + ; fragment PercentQuestion - : '%?{' - ; + : '%?{' + ; fragment ActionCode : Stuff* - ; + ; fragment Stuff - : EscAny - | DQuoteLiteral - | SQuoteLiteral - | BlockComment - | LineComment - | NestedAction - | ~('{' | '}' | '\'' | '"') - ; + : EscAny + | DQuoteLiteral + | SQuoteLiteral + | BlockComment + | LineComment + | NestedAction + | ~('{' | '}' | '\'' | '"') + ; fragment NestedPrologue - : PercentLBrace ActionCode PercentRBrace - ; + : PercentLBrace ActionCode PercentRBrace + ; fragment NestedAction - : LBrace ActionCode RBrace - ; + : LBrace ActionCode RBrace + ; fragment NestedPredicate - : PercentQuestion ActionCode RBrace - ; + : PercentQuestion ActionCode RBrace + ; fragment Sp - : Ws* - ; + : Ws* + ; fragment Eqopt - : (Sp [=])? - ; + : (Sp [=])? + ; PercentPercent: '%%' { this.NextMode(); } - ; + ; /*----------------------------. | Scanning Bison directives. | @@ -198,213 +196,213 @@ PercentPercent: '%%' { this.NextMode(); } to ../build-aux/cross-options.pl. */ NONASSOC - : '%binary' - ; + : '%binary' + ; CODE - : '%code' - ; + : '%code' + ; PERCENT_DEBUG - : '%debug' - ; + : '%debug' + ; DEFAULT_PREC - : '%default-prec' - ; + : '%default-prec' + ; DEFINE - : '%define' - ; + : '%define' + ; DEFINES - : '%defines' - ; + : '%defines' + ; DESTRUCTOR - : '%destructor' - ; + : '%destructor' + ; DPREC - : '%dprec' - ; + : '%dprec' + ; EMPTY_RULE - : '%empty' - ; + : '%empty' + ; EXPECT - : '%expect' - ; + : '%expect' + ; EXPECT_RR - : '%expect-rr' - ; + : '%expect-rr' + ; PERCENT_FILE_PREFIX - : '%file-prefix' - ; + : '%file-prefix' + ; INITIAL_ACTION - : '%initial-action' - ; + : '%initial-action' + ; GLR_PARSER - : '%glr-parser' - ; + : '%glr-parser' + ; LANGUAGE - : '%language' - ; + : '%language' + ; PERCENT_LEFT - : '%left' - ; + : '%left' + ; LEX - : '%lex-param' - ; + : '%lex-param' + ; LOCATIONS - : '%locations' - ; + : '%locations' + ; MERGE - : '%merge' - ; + : '%merge' + ; NO_DEFAULT_PREC - : '%no-default-prec' - ; + : '%no-default-prec' + ; NO_LINES - : '%no-lines' - ; + : '%no-lines' + ; PERCENT_NONASSOC - : '%nonassoc' - ; + : '%nonassoc' + ; NONDETERMINISTIC_PARSER - : '%nondeterministic-parser' - ; + : '%nondeterministic-parser' + ; NTERM - : '%nterm' - ; + : '%nterm' + ; PARAM - : '%param' - ; + : '%param' + ; PARSE - : '%parse-param' - ; + : '%parse-param' + ; PERCENT_PREC - : '%prec' - ; + : '%prec' + ; PRECEDENCE - : '%precedence' - ; + : '%precedence' + ; PRINTER - : '%printer' - ; + : '%printer' + ; REQUIRE - : '%require' - ; + : '%require' + ; PERCENT_RIGHT - : '%right' - ; + : '%right' + ; SKELETON - : '%skeleton' - ; + : '%skeleton' + ; PERCENT_START - : '%start' - ; + : '%start' + ; TOKEN - : '%term' - ; + : '%term' + ; PERCENT_TOKEN - : '%token' - ; + : '%token' + ; TOKEN_TABLE - : '%token'[-_]'table' - ; + : '%token' [-_] 'table' + ; PERCENT_TYPE - : '%type' - ; + : '%type' + ; PERCENT_UNION - : '%union' - ; + : '%union' + ; VERBOSE - : '%verbose' - ; + : '%verbose' + ; PERCENT_YACC - : '%yacc' - ; + : '%yacc' + ; /* Deprecated since Bison 2.3b (2008-05-27), but the warning is issued only since Bison 3.4. */ PERCENT_PURE_PARSER - : '%pure'[-_]'parser' - ; + : '%pure' [-_] 'parser' + ; /* Deprecated since Bison 2.6 (2012-07-19), but the warning is issued only since Bison 3.3. */ PERCENT_NAME_PREFIX - : '%name'[-_]'prefix'(Eqopt)?(Sp) - ; + : '%name' [-_] 'prefix' Eqopt? Sp + ; /* Deprecated since Bison 2.7.90, 2012. */ OBS_DEFAULT_PREC - : '%default'[-_]'prec' - ; + : '%default' [-_] 'prec' + ; OBS_PERCENT_ERROR_VERBOSE - : '%error'[-_]'verbose' - ; + : '%error' [-_] 'verbose' + ; OBS_EXPECT_RR - : '%expect'[-_]'rr' - ; + : '%expect' [-_] 'rr' + ; OBS_PERCENT_FILE_PREFIX - : '%file-prefix'(Eqopt) - ; + : '%file-prefix' Eqopt + ; OBS_FIXED_OUTPUT - : '%fixed'[-_]'output'[-_]'files' - ; + : '%fixed' [-_] 'output' [-_] 'files' + ; OBS_NO_DEFAULT_PREC - : '%no'[-_]'default'[-_]'prec' - ; + : '%no' [-_] 'default' [-_] 'prec' + ; OBS_NO_LINES - : '%no'[-_]'lines' - ; + : '%no' [-_] 'lines' + ; OBS_OUTPUT - : '%output' Eqopt - ; + : '%output' Eqopt + ; OBS_TOKEN_TABLE - : '%token'[-_]'table' - ; + : '%token' [-_] 'table' + ; BRACED_CODE: NestedAction; @@ -416,9 +414,6 @@ COLON: ':'; EQUAL: '='; //ID_COLON: Id ':'; ID: Id; -PERCENT_PERCENT - : PercentPercent - ; PIPE: '|'; SEMICOLON: ';'; TAG: '<' Type '>'; @@ -430,21 +425,21 @@ LPAREN: '('; RPAREN: ')'; BLOCK_COMMENT - : BlockComment -> channel(OFF_CHANNEL) - ; + : BlockComment -> channel(HIDDEN) + ; LINE_COMMENT - : LineComment -> channel(OFF_CHANNEL) - ; + : LineComment -> channel(HIDDEN) + ; WS - : ( Hws | Vws )+ -> channel(OFF_CHANNEL) + : ( Hws | Vws )+ -> channel(HIDDEN) ; PROLOGUE - : NestedPrologue - ; + : NestedPrologue + ; // ============================================================== // Note, all prologue rules can be used in grammar declarations. @@ -452,7 +447,7 @@ PROLOGUE //mode RuleMode; mode EpilogueMode; -// Expected: Warning AC0131 greedy block ()+ contains wildcard; the non-greedy syntax ()+? may be preferred LanguageServer +// Expected: Warning AC0131 greedy block ()+ contains wildcard; the non-greedy syntax ()+? may be preferred LanguageServer // Changing from .* to .*? to avoid the warning. It may or may not work. - EPILOGUE: .+ ; + EPILOGUE: .+ ; diff --git a/src/grammars/bison/BisonParser.g4 b/src/grammars/bison/BisonParser.g4 index 67476a44d..2c3ab4e7d 100644 --- a/src/grammars/bison/BisonParser.g4 +++ b/src/grammars/bison/BisonParser.g4 @@ -25,12 +25,9 @@ parser grammar BisonParser; -options { - tokenVocab=BisonLexer; -// contextSuperClass=ParseTreeEditing.AntlrDOM.ObserverParserRuleContext; -} +options { tokenVocab=BisonLexer; } -input +input_ : prologue_declarations '%%' bison_grammar epilogue_opt EOF ; @@ -128,8 +125,7 @@ tag_opt ; generic_symlist - : generic_symlist_item - | generic_symlist generic_symlist_item + : generic_symlist_item+ ; generic_symlist_item @@ -163,12 +159,8 @@ nterm_decls // A non empty list of possibly tagged symbols for %token or %nterm. -token_decls : ( | TAG ) token_decl_1 ( TAG token_decl_1 )* ; +token_decls : ( | TAG ) token_decl+ ( TAG token_decl+ )* ; -// One or more symbol declarations for %token or %nterm. - -token_decl_1 : token_decl token_decl* ; - // One symbol declaration for %token or %nterm. token_decl @@ -198,16 +190,9 @@ alias // FOO and 'foo' as two different symbols instead of aliasing them. token_decls_for_prec - : token_decl_for_prec_1 - | TAG token_decl_for_prec_1 - | token_decls_for_prec TAG token_decl_for_prec_1 - ; - -// One or more token declarations for precedence declaration. - -token_decl_for_prec_1 - : token_decl_for_prec - | token_decl_for_prec_1 token_decl_for_prec + : token_decl_for_prec+ + | TAG token_decl_for_prec+ + | token_decls_for_prec TAG token_decl_for_prec+ ; // One token declaration for precedence declaration. @@ -225,16 +210,9 @@ token_decl_for_prec // A non empty list of typed symbols (for %type). symbol_decls - : symbol_decl_1 - | TAG symbol_decl_1 - | symbol_decls TAG symbol_decl_1 - ; - -// One or more token declarations (for %type). - -symbol_decl_1 - : symbol - | symbol_decl_1 symbol + : symbol+ + | TAG symbol+ + | symbol_decls TAG symbol+ ; /*------------------------------------------. @@ -326,4 +304,4 @@ epilogue_opt actionBlock : BRACED_CODE - ; \ No newline at end of file + ; diff --git a/src/grammars/bison/CSharp/BisonLexerBase.cs b/src/grammars/bison/CSharp/BisonLexerBase.cs index e827fbacc..5d9d8d8cc 100644 --- a/src/grammars/bison/CSharp/BisonLexerBase.cs +++ b/src/grammars/bison/CSharp/BisonLexerBase.cs @@ -17,15 +17,24 @@ public BisonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutp public void NextMode() { ++percent_percent_count; - if (percent_percent_count == 1) { - //this.PushMode(BisonLexer.RuleMode); + if (percent_percent_count == 1) + { return; - } else if (percent_percent_count == 2) { + } else if (percent_percent_count == 2) + { this.PushMode(BisonLexer.EpilogueMode); return; - } else { - this.Type = BisonLexer.PERCENT_PERCENT; + } else + { + this.Type = BisonLexer.PercentPercent; return; } } + + public override void Reset() + { + percent_percent_count = 0; + base.Reset(); + } } + diff --git a/src/grammars/bison/Cpp/BisonLexerBase.cpp b/src/grammars/bison/Cpp/BisonLexerBase.cpp new file mode 100644 index 000000000..eda0ea7ee --- /dev/null +++ b/src/grammars/bison/Cpp/BisonLexerBase.cpp @@ -0,0 +1,32 @@ +#include "antlr4-runtime.h" +#include "BisonLexerBase.h" +#include "BisonLexer.h" + +BisonLexerBase::BisonLexerBase(antlr4::CharStream * input) : antlr4::Lexer(input) +{ + percent_percent_count = 0; + _input = input; +} + +void BisonLexerBase::NextMode() +{ + ++percent_percent_count; + if (percent_percent_count == 1) + { + return; + } else if (percent_percent_count == 2) + { + this->pushMode(BisonLexer::EpilogueMode); + return; + } else + { + this->setType(BisonLexer::PercentPercent); + return; + } +} + +void BisonLexerBase::reset() +{ + percent_percent_count = 0; + Lexer::reset(); +} diff --git a/src/grammars/bison/Cpp/BisonLexerBase.h b/src/grammars/bison/Cpp/BisonLexerBase.h new file mode 100644 index 000000000..9ad78314a --- /dev/null +++ b/src/grammars/bison/Cpp/BisonLexerBase.h @@ -0,0 +1,14 @@ +#pragma once +#include "antlr4-runtime.h" + +class BisonLexerBase : public antlr4::Lexer +{ + private: + antlr4::CharStream * _input; + int percent_percent_count; + + public: + BisonLexerBase(antlr4::CharStream * input); + void NextMode(); + virtual void reset() override; +}; diff --git a/src/grammars/bison/Cpp/transformGrammar.py b/src/grammars/bison/Cpp/transformGrammar.py new file mode 100644 index 000000000..a2468920b --- /dev/null +++ b/src/grammars/bison/Cpp/transformGrammar.py @@ -0,0 +1,32 @@ +import sys, os, re, shutil +from glob import glob +from pathlib import Path + +def main(argv): + for file in glob("./*.g4"): + fix(file) + +def fix(file_path): + print("Altering " + file_path) + if not os.path.exists(file_path): + print(f"Could not find file: {file_path}") + sys.exit(1) + parts = os.path.split(file_path) + file_name = parts[-1] + shutil.move(file_path, file_path + ".bak") + input_file = open(file_path + ".bak",'r') + output_file = open(file_path, 'w') + for x in input_file: + if '// Insert here @header for C++ lexer.' in x: + x = x.replace('// Insert here @header for C++ lexer.', '@header {#include "BisonLexerBase.h"}') + if 'this.' in x: + x = x.replace('this.', 'this->') + output_file.write(x) + output_file.flush() + + print("Writing ...") + input_file.close() + output_file.close() + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/grammars/bison/Dart/BisonLexerBase.dart b/src/grammars/bison/Dart/BisonLexerBase.dart new file mode 100644 index 000000000..96fff925d --- /dev/null +++ b/src/grammars/bison/Dart/BisonLexerBase.dart @@ -0,0 +1,38 @@ +import 'package:antlr4/antlr4.dart'; +import 'dart:io'; +import 'dart:core'; +import 'dart:convert'; +import 'dart:collection'; +import 'BisonLexer.dart'; + +abstract class BisonLexerBase extends Lexer +{ + BisonLexerBase(CharStream input) : super(input) + { + } + + int percent_percent_count = 0; + + void NextMode() + { + ++percent_percent_count; + if (percent_percent_count == 1) + { + return; + } else if (percent_percent_count == 2) + { + this.pushMode(BisonLexer.EpilogueMode); + return; + } else + { + this.type = BisonLexer.TOKEN_PercentPercent; + return; + } + } + + @override void reset([bool resetInput = false]) + { + percent_percent_count = 0; + super.reset(resetInput); + } +} diff --git a/src/grammars/bison/Generated-CSharp/BisonLexer.g4 b/src/grammars/bison/Generated-CSharp/BisonLexer.g4 index f7d68cc51..43d1a8247 100644 --- a/src/grammars/bison/Generated-CSharp/BisonLexer.g4 +++ b/src/grammars/bison/Generated-CSharp/BisonLexer.g4 @@ -1,14 +1,12 @@ // Author -- Ken Domino -// Copyright 2020 +// Copyright 2020-2022 // MIT License lexer grammar BisonLexer; options { superClass = BisonLexerBase; } -channels { - OFF_CHANNEL // non-default channel for whitespace and comments -} +// Insert here @header for C++ lexer. tokens { SC_EPILOGUE @@ -17,109 +15,109 @@ tokens { // ======================= Common fragments ========================= fragment Underscore - : '_' - ; + : '_' + ; fragment NameStartChar - : 'A'..'Z' - | 'a'..'z' + : 'A'..'Z' + | 'a'..'z' | '_' - | '\u00C0'..'\u00D6' - | '\u00D8'..'\u00F6' - | '\u00F8'..'\u02FF' - | '\u0370'..'\u037D' - | '\u037F'..'\u1FFF' - | '\u200C'..'\u200D' - | '\u2070'..'\u218F' - | '\u2C00'..'\u2FEF' - | '\u3001'..'\uD7FF' - | '\uF900'..'\uFDCF' - | '\uFDF0'..'\uFFFD' - | '$' // For PHP - ; // ignores | ['\u10000-'\uEFFFF] ; + | '\u00C0'..'\u00D6' + | '\u00D8'..'\u00F6' + | '\u00F8'..'\u02FF' + | '\u0370'..'\u037D' + | '\u037F'..'\u1FFF' + | '\u200C'..'\u200D' + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + | '$' // For PHP + ; // ignores | ['\u10000-'\uEFFFF] ; fragment DQuoteLiteral - : DQuote ( EscSeq | ~["\r\n\\] | ( '\\' [\n\r]*) )* DQuote - ; + : DQuote ( EscSeq | ~["\r\n\\] | '\\' [\n\r]* )* DQuote + ; fragment DQuote - : '"' - ; + : '"' + ; fragment SQuote - : '\'' - ; + : '\'' + ; fragment CharLiteral - : SQuote ( EscSeq | ~['\r\n\\] ) SQuote - ; + : SQuote ( EscSeq | ~['\r\n\\] ) SQuote + ; fragment SQuoteLiteral - : SQuote ( EscSeq | ~['\r\n\\] )* SQuote - ; + : SQuote ( EscSeq | ~['\r\n\\] )* SQuote + ; fragment Esc - : '\\' - ; + : '\\' + ; fragment EscSeq - : Esc - ([abefnrtv?"'\\] // The standard escaped character set such as tab, newline, etc. - | [xuU]?[0-9]+) // C-style - ; + : Esc + ([abefnrtv?"'\\] // The standard escaped character set such as tab, newline, etc. + | [xuU]?[0-9]+) // C-style + ; fragment EscAny - : Esc . - ; + : Esc . + ; fragment Id - : NameStartChar NameChar* - ; + : NameStartChar NameChar* + ; fragment Type - : ([\t\r\n\f a-zA-Z0-9] | '[' | ']' | '{' | '}' | '.' | '_' | '(' | ')' | ',')+ - ; + : ([\t\r\n\f a-zA-Z0-9] | '[' | ']' | '{' | '}' | '.' | '_' | '(' | ')' | ',')+ + ; fragment NameChar - : NameStartChar - | '0'..'9' - | Underscore - | '\u00B7' - | '\u0300'..'\u036F' - | '\u203F'..'\u2040' - | '.' - | '-' - ; + : NameStartChar + | '0'..'9' + | Underscore + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + | '.' + | '-' + ; fragment BlockComment - : '/*' + : '/*' ( - ('/' ~'*') - | ~'/' - )* - '*/' - ; + '/' ~'*' + | ~'/' + )* + '*/' + ; fragment LineComment - : '//' ~[\r\n]* - ; + : '//' ~[\r\n]* + ; fragment LineCommentExt - : '//' ~'\n'* ( '\n' Hws* '//' ~'\n'* )* - ; + : '//' ~'\n'* ( '\n' Hws* '//' ~'\n'* )* + ; fragment Ws - : Hws - | Vws - ; + : Hws + | Vws + ; fragment Hws - : [ \t] - ; + : [ \t] + ; fragment Vws - : [\r\n\f] - ; + : [\r\n\f] + ; /* Four types of user code: - prologue (code between '%{' '%}' in the first section, before %%); @@ -131,61 +129,61 @@ fragment Vws // Actions fragment LBrace - : '{' - ; + : '{' + ; fragment RBrace - : '}' - ; + : '}' + ; fragment PercentLBrace - : '%{' - ; + : '%{' + ; fragment PercentRBrace - : '%}' - ; + : '%}' + ; fragment PercentQuestion - : '%?{' - ; + : '%?{' + ; fragment ActionCode : Stuff* - ; + ; fragment Stuff - : EscAny - | DQuoteLiteral - | SQuoteLiteral - | BlockComment - | LineComment - | NestedAction - | ~('{' | '}' | '\'' | '"') - ; + : EscAny + | DQuoteLiteral + | SQuoteLiteral + | BlockComment + | LineComment + | NestedAction + | ~('{' | '}' | '\'' | '"') + ; fragment NestedPrologue - : PercentLBrace ActionCode PercentRBrace - ; + : PercentLBrace ActionCode PercentRBrace + ; fragment NestedAction - : LBrace ActionCode RBrace - ; + : LBrace ActionCode RBrace + ; fragment NestedPredicate - : PercentQuestion ActionCode RBrace - ; + : PercentQuestion ActionCode RBrace + ; fragment Sp - : Ws* - ; + : Ws* + ; fragment Eqopt - : (Sp [=])? - ; + : (Sp [=])? + ; PercentPercent: '%%' { this.NextMode(); } - ; + ; /*----------------------------. | Scanning Bison directives. | @@ -198,213 +196,213 @@ PercentPercent: '%%' { this.NextMode(); } to ../build-aux/cross-options.pl. */ NONASSOC - : '%binary' - ; + : '%binary' + ; CODE - : '%code' - ; + : '%code' + ; PERCENT_DEBUG - : '%debug' - ; + : '%debug' + ; DEFAULT_PREC - : '%default-prec' - ; + : '%default-prec' + ; DEFINE - : '%define' - ; + : '%define' + ; DEFINES - : '%defines' - ; + : '%defines' + ; DESTRUCTOR - : '%destructor' - ; + : '%destructor' + ; DPREC - : '%dprec' - ; + : '%dprec' + ; EMPTY_RULE - : '%empty' - ; + : '%empty' + ; EXPECT - : '%expect' - ; + : '%expect' + ; EXPECT_RR - : '%expect-rr' - ; + : '%expect-rr' + ; PERCENT_FILE_PREFIX - : '%file-prefix' - ; + : '%file-prefix' + ; INITIAL_ACTION - : '%initial-action' - ; + : '%initial-action' + ; GLR_PARSER - : '%glr-parser' - ; + : '%glr-parser' + ; LANGUAGE - : '%language' - ; + : '%language' + ; PERCENT_LEFT - : '%left' - ; + : '%left' + ; LEX - : '%lex-param' - ; + : '%lex-param' + ; LOCATIONS - : '%locations' - ; + : '%locations' + ; MERGE - : '%merge' - ; + : '%merge' + ; NO_DEFAULT_PREC - : '%no-default-prec' - ; + : '%no-default-prec' + ; NO_LINES - : '%no-lines' - ; + : '%no-lines' + ; PERCENT_NONASSOC - : '%nonassoc' - ; + : '%nonassoc' + ; NONDETERMINISTIC_PARSER - : '%nondeterministic-parser' - ; + : '%nondeterministic-parser' + ; NTERM - : '%nterm' - ; + : '%nterm' + ; PARAM - : '%param' - ; + : '%param' + ; PARSE - : '%parse-param' - ; + : '%parse-param' + ; PERCENT_PREC - : '%prec' - ; + : '%prec' + ; PRECEDENCE - : '%precedence' - ; + : '%precedence' + ; PRINTER - : '%printer' - ; + : '%printer' + ; REQUIRE - : '%require' - ; + : '%require' + ; PERCENT_RIGHT - : '%right' - ; + : '%right' + ; SKELETON - : '%skeleton' - ; + : '%skeleton' + ; PERCENT_START - : '%start' - ; + : '%start' + ; TOKEN - : '%term' - ; + : '%term' + ; PERCENT_TOKEN - : '%token' - ; + : '%token' + ; TOKEN_TABLE - : '%token'[-_]'table' - ; + : '%token' [-_] 'table' + ; PERCENT_TYPE - : '%type' - ; + : '%type' + ; PERCENT_UNION - : '%union' - ; + : '%union' + ; VERBOSE - : '%verbose' - ; + : '%verbose' + ; PERCENT_YACC - : '%yacc' - ; + : '%yacc' + ; /* Deprecated since Bison 2.3b (2008-05-27), but the warning is issued only since Bison 3.4. */ PERCENT_PURE_PARSER - : '%pure'[-_]'parser' - ; + : '%pure' [-_] 'parser' + ; /* Deprecated since Bison 2.6 (2012-07-19), but the warning is issued only since Bison 3.3. */ PERCENT_NAME_PREFIX - : '%name'[-_]'prefix'(Eqopt)?(Sp) - ; + : '%name' [-_] 'prefix' Eqopt? Sp + ; /* Deprecated since Bison 2.7.90, 2012. */ OBS_DEFAULT_PREC - : '%default'[-_]'prec' - ; + : '%default' [-_] 'prec' + ; OBS_PERCENT_ERROR_VERBOSE - : '%error'[-_]'verbose' - ; + : '%error' [-_] 'verbose' + ; OBS_EXPECT_RR - : '%expect'[-_]'rr' - ; + : '%expect' [-_] 'rr' + ; OBS_PERCENT_FILE_PREFIX - : '%file-prefix'(Eqopt) - ; + : '%file-prefix' Eqopt + ; OBS_FIXED_OUTPUT - : '%fixed'[-_]'output'[-_]'files' - ; + : '%fixed' [-_] 'output' [-_] 'files' + ; OBS_NO_DEFAULT_PREC - : '%no'[-_]'default'[-_]'prec' - ; + : '%no' [-_] 'default' [-_] 'prec' + ; OBS_NO_LINES - : '%no'[-_]'lines' - ; + : '%no' [-_] 'lines' + ; OBS_OUTPUT - : '%output' Eqopt - ; + : '%output' Eqopt + ; OBS_TOKEN_TABLE - : '%token'[-_]'table' - ; + : '%token' [-_] 'table' + ; BRACED_CODE: NestedAction; @@ -416,9 +414,6 @@ COLON: ':'; EQUAL: '='; //ID_COLON: Id ':'; ID: Id; -PERCENT_PERCENT - : PercentPercent - ; PIPE: '|'; SEMICOLON: ';'; TAG: '<' Type '>'; @@ -430,21 +425,21 @@ LPAREN: '('; RPAREN: ')'; BLOCK_COMMENT - : BlockComment -> channel(OFF_CHANNEL) - ; + : BlockComment -> channel(HIDDEN) + ; LINE_COMMENT - : LineComment -> channel(OFF_CHANNEL) - ; + : LineComment -> channel(HIDDEN) + ; WS - : ( Hws | Vws )+ -> channel(OFF_CHANNEL) + : ( Hws | Vws )+ -> channel(HIDDEN) ; PROLOGUE - : NestedPrologue - ; + : NestedPrologue + ; // ============================================================== // Note, all prologue rules can be used in grammar declarations. @@ -452,7 +447,7 @@ PROLOGUE //mode RuleMode; mode EpilogueMode; -// Expected: Warning AC0131 greedy block ()+ contains wildcard; the non-greedy syntax ()+? may be preferred LanguageServer +// Expected: Warning AC0131 greedy block ()+ contains wildcard; the non-greedy syntax ()+? may be preferred LanguageServer // Changing from .* to .*? to avoid the warning. It may or may not work. - EPILOGUE: .+ ; + EPILOGUE: .+ ; diff --git a/src/grammars/bison/Generated-CSharp/BisonLexerBase.cs b/src/grammars/bison/Generated-CSharp/BisonLexerBase.cs index 9496a29da..5d9d8d8cc 100644 --- a/src/grammars/bison/Generated-CSharp/BisonLexerBase.cs +++ b/src/grammars/bison/Generated-CSharp/BisonLexerBase.cs @@ -16,20 +16,25 @@ public BisonLexerBase(ICharStream input, TextWriter output, TextWriter errorOutp public void NextMode() { - ++percent_percent_count; - if (percent_percent_count == 1) - { - //this.PushMode(BisonLexer.RuleMode); - return; - } else if (percent_percent_count == 2) - { - this.PushMode(BisonLexer.EpilogueMode); - return; - } else - { - this.Type = BisonLexer.PERCENT_PERCENT; - return; - } + ++percent_percent_count; + if (percent_percent_count == 1) + { + return; + } else if (percent_percent_count == 2) + { + this.PushMode(BisonLexer.EpilogueMode); + return; + } else + { + this.Type = BisonLexer.PercentPercent; + return; + } + } + + public override void Reset() + { + percent_percent_count = 0; + base.Reset(); } } diff --git a/src/grammars/bison/Generated-CSharp/BisonParser.g4 b/src/grammars/bison/Generated-CSharp/BisonParser.g4 index 67476a44d..2c3ab4e7d 100644 --- a/src/grammars/bison/Generated-CSharp/BisonParser.g4 +++ b/src/grammars/bison/Generated-CSharp/BisonParser.g4 @@ -25,12 +25,9 @@ parser grammar BisonParser; -options { - tokenVocab=BisonLexer; -// contextSuperClass=ParseTreeEditing.AntlrDOM.ObserverParserRuleContext; -} +options { tokenVocab=BisonLexer; } -input +input_ : prologue_declarations '%%' bison_grammar epilogue_opt EOF ; @@ -128,8 +125,7 @@ tag_opt ; generic_symlist - : generic_symlist_item - | generic_symlist generic_symlist_item + : generic_symlist_item+ ; generic_symlist_item @@ -163,12 +159,8 @@ nterm_decls // A non empty list of possibly tagged symbols for %token or %nterm. -token_decls : ( | TAG ) token_decl_1 ( TAG token_decl_1 )* ; +token_decls : ( | TAG ) token_decl+ ( TAG token_decl+ )* ; -// One or more symbol declarations for %token or %nterm. - -token_decl_1 : token_decl token_decl* ; - // One symbol declaration for %token or %nterm. token_decl @@ -198,16 +190,9 @@ alias // FOO and 'foo' as two different symbols instead of aliasing them. token_decls_for_prec - : token_decl_for_prec_1 - | TAG token_decl_for_prec_1 - | token_decls_for_prec TAG token_decl_for_prec_1 - ; - -// One or more token declarations for precedence declaration. - -token_decl_for_prec_1 - : token_decl_for_prec - | token_decl_for_prec_1 token_decl_for_prec + : token_decl_for_prec+ + | TAG token_decl_for_prec+ + | token_decls_for_prec TAG token_decl_for_prec+ ; // One token declaration for precedence declaration. @@ -225,16 +210,9 @@ token_decl_for_prec // A non empty list of typed symbols (for %type). symbol_decls - : symbol_decl_1 - | TAG symbol_decl_1 - | symbol_decls TAG symbol_decl_1 - ; - -// One or more token declarations (for %type). - -symbol_decl_1 - : symbol - | symbol_decl_1 symbol + : symbol+ + | TAG symbol+ + | symbol_decls TAG symbol+ ; /*------------------------------------------. @@ -326,4 +304,4 @@ epilogue_opt actionBlock : BRACED_CODE - ; \ No newline at end of file + ; diff --git a/src/grammars/bison/Generated-CSharp/Test.cs b/src/grammars/bison/Generated-CSharp/Test.cs index d41eb17d5..b499ee3a6 100644 --- a/src/grammars/bison/Generated-CSharp/Test.cs +++ b/src/grammars/bison/Generated-CSharp/Test.cs @@ -19,7 +19,7 @@ public class Program public static ITokenStream TokenStream { get; set; } public static ICharStream CharStream { get; set; } public static IParseTree Tree { get; set; } - public static string StartSymbol { get; set; } = "input"; + public static string StartSymbol { get; set; } = "input_"; public static string Input { get; set; } public static void SetupParse2(string input, bool quiet = false) { @@ -43,7 +43,7 @@ public static void SetupParse2(string input, bool quiet = false) public static IParseTree Parse2() { - var tree = Parser.input(); + var tree = Parser.input_(); Input = Lexer.InputStream.ToString(); TokenStream = Parser.TokenStream; Tree = tree; @@ -71,7 +71,7 @@ public static IParseTree Parse(string input) parser.RemoveErrorListeners(); lexer.AddErrorListener(listener_lexer); parser.AddErrorListener(listener_parser); - var tree = parser.input(); + var tree = parser.input_(); Input = lexer.InputStream.ToString(); TokenStream = parser.TokenStream; Tree = tree; @@ -256,7 +256,7 @@ static void DoParse(ICharStream str, string input_name, int row_number) // ParserATNSimulator.trace_atn_sim = true; } DateTime before = DateTime.Now; - var tree = parser.input(); + var tree = parser.input_(); DateTime after = DateTime.Now; var result = ""; if (listener_lexer.had_error || listener_parser.had_error) diff --git a/src/grammars/bison/Generated-CSharp/build.sh b/src/grammars/bison/Generated-CSharp/build.sh index 0b4f00cf0..25613818c 100644 --- a/src/grammars/bison/Generated-CSharp/build.sh +++ b/src/grammars/bison/Generated-CSharp/build.sh @@ -1,6 +1,6 @@ # Generated from trgen 0.21.0 set -e if [ -f transformGrammar.py ]; then python3 transformGrammar.py ; fi -dotnet restore bison.csproj -dotnet build bison.csproj +dotnet restore Test.csproj +dotnet build Test.csproj exit 0 diff --git a/src/grammars/bison/Generated-CSharp/desc.xml b/src/grammars/bison/Generated-CSharp/desc.xml index 26bde4cec..eb46af1b7 100644 --- a/src/grammars/bison/Generated-CSharp/desc.xml +++ b/src/grammars/bison/Generated-CSharp/desc.xml @@ -1,4 +1,4 @@ - CSharp;Cpp;Dart;Go;Java;JavaScript;PHP;Python3;TypeScript + Cpp;CSharp;Dart;Go;Java;JavaScript;Python3;TypeScript diff --git a/src/grammars/bison/Generated-CSharp/examples/irc.y b/src/grammars/bison/Generated-CSharp/examples/irc.y new file mode 100644 index 000000000..1bc90850a --- /dev/null +++ b/src/grammars/bison/Generated-CSharp/examples/irc.y @@ -0,0 +1,189 @@ +// This code from https://begriffs.com/posts/2021-11-28-practical-parsing.html +/* irc.y (Bison only) + + Using Bison mostly for the %code positions, making + it easier to use libderp between flex and bison. + + - WARNING - + There is absolutely no memory hygiene in this example. + We don't check for allocation failure, and we don't free + things when done. See the earlier lisp.y/.l examples + for guidance about that. +*/ + +/* output more descriptive messages than "syntax error" */ +%define parse.error verbose + +%code top { + #define _XOPEN_SOURCE 600 + #include + #include +} + +%code requires { + #include + #include + + struct prefix + { + char *host; + char *nick; + char *user; + }; + + /* building an irc_message is the overall + goal for this parser */ + struct irc_message + { + treemap *tags; + struct prefix *prefix; + char *command; + list *params; + }; +} + +%code provides { + int yyerror(char const *msg); + int yylex(void); + void message_print(struct irc_message *m); +} + +%union +{ + char *str; + struct prefix *prefix; + treemap *map; + struct map_pair *pair; + list *list; + struct irc_message *msg; +} + +%token SPACE +%token COMMAND MIDDLE TRAILING +%token TAG +%token PREFIX + +%type message tagged_message prefixed_message +%type tags +%type params + +%% + + /* Like in the CSV example, we start with a dummy + rule just to add side-effects */ + +final : + tagged_message { message_print($1); } +; + + /* Messages begin with two optional components, + a set of tags and a prefix. + + ::= ['@' ] [':' ] [params] + + Rather than making a single message rule with + tons of variations (and duplicated code), I chose + to build the message in stages. + + tagged_message <- prefixed_message <- message + + A prefixed_message adds prefix information, or + passes the message along verbatim if there is none. + Similarly for tagged_message. */ + +tagged_message : + + /* When there are more than one matched token, + it's helpful to add Bison "named references" + in brackets. Thus, below, the rule can refer to + $ts rather than $2, or $msg rather than $4. + Makes it way easier to rearrange tokens while + you're experimenting. */ + + '@' tags[ts] SPACE prefixed_message[msg] { + $msg->tags = $ts; + $$ = $msg; + } + + /* here's the pass-through case when there are + no tags on the message */ + +| prefixed_message +; + +prefixed_message : + ':' PREFIX[pfx] SPACE message[msg] { + $msg->prefix = $pfx; + $$ = $msg; + } +| message +; + +message : + COMMAND[cmd] params[ps] { + struct irc_message *m = malloc(sizeof *m); + *m = (struct irc_message) { + .command=$cmd, .params=$ps + }; + $$ = m; + } +; + +tags : + TAG { + treemap *t = tm_new(derp_strcmp, NULL); + tm_insert(t, $1->k, $1->v); + $$ = t; + } +| tags[ts] ';' TAG[t] { + tm_insert($ts, $t->k, $t->v); + $$ = $ts; + } +; + +params : + SPACE TRAILING { + $$ = l_new(); + l_prepend($$, $2); + } +| SPACE MIDDLE[mid] params[ps] { + l_prepend($ps, $mid); + $$ = $ps; + } +| %empty { + $$ = l_new(); + } +; + +%% + +int yyerror(char const *msg) +{ + return fprintf(stderr, "%s\n", msg); +} + +void message_print(struct irc_message *m) +{ + if (m->tags) + { + struct tm_iter *it = tm_iter_begin(m->tags); + struct map_pair *p; + + puts("Tags:"); + while ((p = tm_iter_next(it)) != NULL) + printf("\t'%s'='%s'\n", (char*)p->k, (char*)p->v); + tm_iter_free(it); + } + if (m->prefix) + printf("Prefix: Nick %s, User %s, Host %s\n", + m->prefix->nick, m->prefix->user, + m->prefix->host); + if (m->command) + printf("Command: %s\n", m->command); + if (!l_is_empty(m->params)) + { + puts("Params:"); + for (list_item *li = l_first(m->params); li; li = li->next) + printf("\t%s\n", (char*)li->data); + } +} \ No newline at end of file diff --git a/src/grammars/bison/Generated-CSharp/parse.txt b/src/grammars/bison/Generated-CSharp/parse.txt new file mode 100644 index 000000000..e69de29bb diff --git a/src/grammars/bison/Generated-CSharp/test.sh b/src/grammars/bison/Generated-CSharp/test.sh index 6b2bdb231..6369bb4b9 100644 --- a/src/grammars/bison/Generated-CSharp/test.sh +++ b/src/grammars/bison/Generated-CSharp/test.sh @@ -33,7 +33,7 @@ done # Parse all input files. # Group parsing. -echo "${files[*]}" | dotnet trwdog -- ./bin/Debug/net7.0/bison.exe -q -x -tee -tree > parse.txt 2>&1 +echo "${files[*]}" | dotnet trwdog -- ./bin/Debug/net7.0/Test.exe -q -x -tee -tree > parse.txt 2>&1 status=$? # trwdog returns 255 if it cannot spawn the process. This could happen diff --git a/src/grammars/bison/Go/bison_lexer_base.go b/src/grammars/bison/Go/bison_lexer_base.go new file mode 100644 index 000000000..80020903b --- /dev/null +++ b/src/grammars/bison/Go/bison_lexer_base.go @@ -0,0 +1,28 @@ +package parser + +import ( + "github.com/antlr4-go/antlr/v4" +) + +type BisonLexerBase struct { + *antlr.BaseLexer + percent_percent_count int +} + +func (l *BisonLexerBase) NextMode() { + l.percent_percent_count = l.percent_percent_count + 1; + if (l.percent_percent_count == 1) { + return; + } else if (l.percent_percent_count == 2) { + l.PushMode(BisonLexerEpilogueMode); + return; + } else { + l.SetType(BisonLexerPercentPercent); + return; + } +} + +func (l *BisonLexerBase) Reset() { + l.percent_percent_count = 0 + l.BaseLexer.Reset() +} diff --git a/src/grammars/bison/Go/transformGrammar.py b/src/grammars/bison/Go/transformGrammar.py new file mode 100644 index 000000000..e33021d1f --- /dev/null +++ b/src/grammars/bison/Go/transformGrammar.py @@ -0,0 +1,56 @@ +import sys, os, re, shutil +from glob import glob +from pathlib import Path + +def main(argv): + for file in glob("./parser/*Lexer.g4"): + fix_lexer(file) + for file in glob("./parser/*Parser.g4"): + fix_parser(file) + +def fix_lexer(file_path): + print("Altering " + file_path) + if not os.path.exists(file_path): + print(f"Could not find file: {file_path}") + sys.exit(1) + parts = os.path.split(file_path) + file_name = parts[-1] + + shutil.move(file_path, file_path + ".bak") + input_file = open(file_path + ".bak",'r') + output_file = open(file_path, 'w') + for x in input_file: + if 'this.' in x and '}?' in x: + x = x.replace('this.', 'p.') + elif 'this.' in x: + x = x.replace('this.', 'l.') + output_file.write(x) + output_file.flush() + + print("Writing ...") + input_file.close() + output_file.close() + +def fix_parser(file_path): + print("Altering " + file_path) + if not os.path.exists(file_path): + print(f"Could not find file: {file_path}") + sys.exit(1) + parts = os.path.split(file_path) + file_name = parts[-1] + + shutil.move(file_path, file_path + ".bak") + input_file = open(file_path + ".bak",'r') + output_file = open(file_path, 'w') + for x in input_file: + if 'this.' in x: + x = x.replace('this.', 'p.') + output_file.write(x) + output_file.flush() + + print("Writing ...") + input_file.close() + output_file.close() + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/grammars/bison/Java/BisonLexerBase.java b/src/grammars/bison/Java/BisonLexerBase.java index 7494e5d82..fbf29334e 100644 --- a/src/grammars/bison/Java/BisonLexerBase.java +++ b/src/grammars/bison/Java/BisonLexerBase.java @@ -13,13 +13,12 @@ public void NextMode() { ++percent_percent_count; if (percent_percent_count == 1) { - //this.PushMode(BisonLexer.RuleMode); return; } else if (percent_percent_count == 2) { this.pushMode(BisonLexer.EpilogueMode); return; } else { - this.setType(BisonLexer.PERCENT_PERCENT); + this.setType(BisonLexer.PercentPercent); return; } } diff --git a/src/grammars/bison/JavaScript/BisonLexerBase.js b/src/grammars/bison/JavaScript/BisonLexerBase.js new file mode 100644 index 000000000..097b599da --- /dev/null +++ b/src/grammars/bison/JavaScript/BisonLexerBase.js @@ -0,0 +1,32 @@ +import antlr4 from 'antlr4'; +import BisonLexer from './BisonLexer.js'; + +export default class BisonLexerBase extends antlr4.Lexer { + constructor(input) { + super(input); + this.percent_percent_count = 0; + } + + NextMode() + { + ++this.percent_percent_count; + if (this.percent_percent_count == 1) + { + return; + } else if (this.percent_percent_count == 2) + { + this.pushMode(BisonLexer.EpilogueMode); + return; + } else + { + this.type = BisonLexer.PercentPercent; + return; + } + } + + reset() { + this.percent_percent_count = 0; + super.reset(); + } +} + diff --git a/src/grammars/bison/Python3/BisonLexerBase.py b/src/grammars/bison/Python3/BisonLexerBase.py new file mode 100644 index 000000000..2cbf192f4 --- /dev/null +++ b/src/grammars/bison/Python3/BisonLexerBase.py @@ -0,0 +1,30 @@ +from typing import TextIO +from antlr4 import * +from antlr4.Token import CommonToken +import sys +from typing import TextIO + +class BisonLexerBase(Lexer): + + def __init__(self, input: InputStream, output: TextIO = sys.stdout): + super().__init__(input, output) + self.percent_percent_count = 0 + + def reset(self): + self.percent_percent_count = 0 + super().reset() + + def NextMode(self): + if "." in __name__: + from .BisonLexer import BisonLexer + else: + from BisonLexer import BisonLexer + self.percent_percent_count = self.percent_percent_count + 1; + if self.percent_percent_count == 1: + return + elif self.percent_percent_count == 2: + self.pushMode(BisonLexer.EpilogueMode) + return + else: + self.type = BisonLexer.PercentPercent; + return; diff --git a/src/grammars/bison/Python3/transformGrammar.py b/src/grammars/bison/Python3/transformGrammar.py new file mode 100644 index 000000000..80fa1b7bf --- /dev/null +++ b/src/grammars/bison/Python3/transformGrammar.py @@ -0,0 +1,28 @@ +import sys, os, re, shutil + +def main(argv): + fix("BisonLexer.g4") + +def fix(file_path): + print("Altering " + file_path) + if not os.path.exists(file_path): + print(f"Could not find file: {file_path}") + sys.exit(1) + parts = os.path.split(file_path) + file_name = parts[-1] + shutil.move(file_path, file_path + ".bak") + input_file = open(file_path + ".bak",'r') + output_file = open(file_path, 'w') + for x in input_file: + if '!this.' in x: + x = x.replace('!this.', 'not self.') + if 'this.' in x: + x = x.replace('this.', 'self.') + output_file.write(x) + output_file.flush() + print("Writing ...") + input_file.close() + output_file.close() + +if __name__ == '__main__': + main(sys.argv) diff --git a/src/grammars/bison/TypeScript/BisonLexerBase.ts b/src/grammars/bison/TypeScript/BisonLexerBase.ts new file mode 100644 index 000000000..57169ee5c --- /dev/null +++ b/src/grammars/bison/TypeScript/BisonLexerBase.ts @@ -0,0 +1,34 @@ +import { CommonToken, Lexer, CharStream, Token } from "antlr4"; +import BisonLexer from './BisonLexer'; + +export default abstract class BisonLexerBase extends Lexer { + percent_percent_count: number; + + constructor(input: CharStream) { + super(input); + this.percent_percent_count = 0; + } + + reset() { + this.percent_percent_count = 0; + super.reset(); + } + + NextMode() + { + ++this.percent_percent_count; + if (this.percent_percent_count == 1) + { + return; + } else if (this.percent_percent_count == 2) + { + this.pushMode(BisonLexer.EpilogueMode); + return; + } else + { + this._type = BisonLexer.PercentPercent; + return; + } + } +} + diff --git a/src/grammars/bison/desc.xml b/src/grammars/bison/desc.xml index 26bde4cec..eb46af1b7 100644 --- a/src/grammars/bison/desc.xml +++ b/src/grammars/bison/desc.xml @@ -1,4 +1,4 @@ - CSharp;Cpp;Dart;Go;Java;JavaScript;PHP;Python3;TypeScript + Cpp;CSharp;Dart;Go;Java;JavaScript;Python3;TypeScript diff --git a/src/grammars/bison/readme.md b/src/grammars/bison/readme.md new file mode 100644 index 000000000..8f606eed5 --- /dev/null +++ b/src/grammars/bison/readme.md @@ -0,0 +1,14 @@ +# Bison Grammar + +## Author + +Ken Domino, FSF + +## Source + +https://git.savannah.gnu.org/cgit/bison.git/tree/src/parse-gram.y + +## Links + +[wikipedia](https://en.wikipedia.org/wiki/GNU_Bison) +[pldb](https://pldb.pub/concepts/bison.html) diff --git a/src/trconvert/ConvertAntlr3.cs b/src/trconvert/ConvertAntlr3.cs index a8b73805f..d2fbbcd19 100644 --- a/src/trconvert/ConvertAntlr3.cs +++ b/src/trconvert/ConvertAntlr3.cs @@ -401,7 +401,7 @@ or text() = 'rewrite' new StaticContextBuilder()).evaluate( dynamicContext, new object[] { os }) .Select(x => (x.NativeValue as ParseTreeEditing.UnvParseTreeDOM.UnvParseTreeElement)).FirstOrDefault(); - TreeEdits.Replace(star_plus, star_plus.GetText() + "?"); + if (star_plus != null) TreeEdits.Replace(star_plus, star_plus.GetText() + "?"); } // if (greedyOptionSpec.Contains(os) && os.ParentNode.LocalName == "block") diff --git a/src/trconvert/Properties/launchSettings.json b/src/trconvert/Properties/launchSettings.json index 7cf6da97b..3e6c04f2d 100644 --- a/src/trconvert/Properties/launchSettings.json +++ b/src/trconvert/Properties/launchSettings.json @@ -3,7 +3,7 @@ "trconvert": { "commandName": "Project", "commandLineArgs": "-f o.pt", - "workingDirectory": "c:\\Users\\Kenne\\Documents\\GitHub\\Domemtech.Trash\\src\\grammars\\bison\\examples" + "workingDirectory": "c:\\Users\\Kenne\\Documents\\GitHub\\Domemtech.Trash\\_tests\\trconvert\\antlr3" } } } \ No newline at end of file diff --git a/src/trperf/Properties/launchSettings.json b/src/trperf/Properties/launchSettings.json index ca982ef4c..56e3be10b 100644 --- a/src/trperf/Properties/launchSettings.json +++ b/src/trperf/Properties/launchSettings.json @@ -2,8 +2,8 @@ "profiles": { "trperf": { "commandName": "Project", - "commandLineArgs": "test2.in", - "workingDirectory": "c:\\msys64\\home\\Kenne\\so\\so74357697\\cs" + "commandLineArgs": "examples/Test.kt", + "workingDirectory": "c:\\msys64\\home\\Kenne\\issues\\g4-3539\\kotlin\\kotlin\\Generated-CSharp" } } } \ No newline at end of file