1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.generator.parsercodegen; 8 import dparsergen.core.utils; 9 import dparsergen.generator.codewriter; 10 import dparsergen.generator.globaloptions; 11 import dparsergen.generator.grammar; 12 import dparsergen.generator.ids; 13 import dparsergen.generator.parser; 14 import dparsergen.generator.parsercodegencommon; 15 import dparsergen.generator.production; 16 import dparsergen.generator.regexlookahead; 17 import std.algorithm; 18 import std.conv; 19 import std.exception; 20 import std.range; 21 import std.stdio; 22 import std.typecons; 23 24 void generateReduceFunction(ref CodeWriter code, LRGraph graph, string funcName, 25 immutable ProductionID[] productionIDs, 26 NonterminalID[immutable(NonterminalID[])] combinedReduceNonterminals) 27 { 28 auto grammar = graph.grammar; 29 NonterminalID[] combinedReduceNonterminalIDs; 30 NonterminalID combinedReduceNonterminal; 31 if (productionIDs.length > 1) 32 { 33 foreach (productionID; productionIDs) 34 { 35 enforce(!grammar.hasNonTrivialRewriteRule(grammar.productionsData[productionID])); 36 combinedReduceNonterminalIDs.addOnce( 37 grammar.productionsData[productionID].nonterminalID); 38 code.writeln("// ", grammar.productionString(grammar.productionsData[productionID])); 39 } 40 combinedReduceNonterminalIDs.sort(); 41 combinedReduceNonterminal = combinedReduceNonterminals[combinedReduceNonterminalIDs]; 42 } 43 const e = LRElement(grammar.productionsData[productionIDs[0]], 44 grammar.productionsData[productionIDs[0]].symbols.length); 45 if (e.isFinished(grammar)) 46 { 47 const n = e.stackSymbols.length; 48 49 mixin(genCode("code", q{ 50 $$CommaGen comma; 51 auto $(funcName)( _ 52 $$if (n > 0) { 53 $(comma())Location parseStart _ 54 $$} 55 $$foreach (k, symbol; e.stackSymbols) { 56 $$if (symbol.dropNode) code.write("/*"); 57 $(comma())ParseStackElem!(Location, _ 58 $$if (symbol.isToken) 59 Token _ 60 $$else 61 NonterminalType!$(symbol.id + grammar.startNonterminalID) _ 62 ) stack$(e.stackSymbols.length - k) _ 63 $$if (symbol.dropNode) code.write("*/"); 64 $$} 65 ) 66 })); 67 68 code.writeln("{").incIndent; 69 70 if (n == 0) 71 code.writeln("Location parseStart = lastTokenEnd;"); 72 73 const RewriteRule[] rewriteRules = grammar.getRewriteRules(e.production); 74 assert(rewriteRules.length > 0); 75 76 string[] rewriteParameters; 77 foreach (k; iota(e.production.symbols.length, 0, -1)) 78 { 79 if (e.production.symbols[$ - k].dropNode) 80 rewriteParameters ~= "undefinedIdentifier/*error*/"; 81 else 82 rewriteParameters ~= text("stack", k); 83 } 84 85 void genSetParseTree() 86 { 87 string funcName = "createParseTree"; 88 if (productionIDs.length > 1) 89 funcName ~= "Combined"; 90 size_t stackPos = n; 91 string stackStartExpr = "parseStart"; 92 mixin(genCode("code", q{ 93 Location end = lastTokenEnd; 94 if (end < parseStart) 95 end = parseStart; 96 97 pt = creator.$(funcName)!( _ 98 $$if (productionIDs.length > 1) { 99 $(combinedReduceNonterminal.id + grammar.startNonterminalID)/*Combined:$(combinedReduceNonterminalIDs.map!(n=>grammar.getSymbolName(n)).join("|"))*/ _ 100 $$foreach (productionID; productionIDs) { 101 , $(grammar.getRewriteRules(grammar.productionsData[productionID])[$ - 1].applyProduction.productionID + grammar.startProductionID) _ 102 $$} 103 $$} else { 104 $(rewriteRules[$ - 1].applyProduction.productionID + grammar.startProductionID) _ 105 $$} 106 )( _ 107 parseStart, end _ 108 $$size_t iParam; 109 $$foreach (k; 0 .. rewriteRules[$ - 1].applyProduction.symbols.length) { 110 $$if (!rewriteRules[$ - 1].applyProduction.symbols[k].dropNode) { 111 $$if (rewriteRules[$ - 1].parameters[iParam] == size_t.max) { 112 , ParseStackElem!(Location, NonterminalType!$(rewriteRules[$ - 1].applyProduction.symbols[k].id + grammar.startNonterminalID))($(stackStartExpr), NonterminalType!$(rewriteRules[$ - 1].applyProduction.symbols[k].id + grammar.startNonterminalID).init/*null*/) _ 113 $$iParam++; 114 $$} else { 115 , $(rewriteParameters[rewriteRules[$ - 1].parameters[iParam]]) _ 116 $$stackStartExpr = text("stack", stackPos, ".end"); 117 $$stackPos--; 118 $$iParam++; 119 $$} 120 $$} else { 121 $$stackPos--; 122 $$} 123 $$} 124 ); 125 })); 126 } 127 128 code.writeln("NonterminalType!(", grammar.nonterminalIDCode( 129 nonterminalOutForProduction(rewriteRules[$ - 1].applyProduction)), ") pt;"); 130 131 if (grammar.isSimpleProduction(*e.production)) 132 { 133 size_t stackPos = n; 134 size_t nrNotDropped = size_t.max; 135 foreach (k, s; e.production.symbols) 136 { 137 if (!s.dropNode) 138 { 139 nrNotDropped = stackPos; 140 stackPos--; 141 } 142 else 143 stackPos--; 144 } 145 146 if (nrNotDropped != size_t.max) 147 { 148 code.writeln("pt = ", rewriteParameters[$ - nrNotDropped], ".val;"); 149 code.writeln("parseStart = ", rewriteParameters[$ - nrNotDropped], ".start;"); 150 } 151 else 152 code.writeln("pt = typeof(pt).init;"); 153 } 154 else 155 { 156 if (n > 0 && !grammar.nonterminals[e.production.nonterminalID].annotations.contains!"array"() 157 && !grammar.nonterminals[e.production.nonterminalID].annotations.contains!"string"()) 158 { 159 mixin(genCode("code", q{ 160 })); 161 } 162 mixin(genCode("code", q{ 163 { 164 $$genSetParseTree(); 165 } 166 })); 167 } 168 169 mixin(genCode("code", q{ 170 return ParseStackElem!(Location, NonterminalType!($(grammar.nonterminalIDCode(rewriteRules[$ - 1].applyProduction.nonterminalID))))(parseStart, pt); 171 })); 172 173 code.decIndent.writeln("}"); 174 } 175 } 176 177 string resultType(LRGraph graph, const LRGraphNode node, 178 NonterminalID[immutable(NonterminalID[])] combinedReduceNonterminals, 179 bool* useUnion = null, NonterminalID* singleNonterminal = null) 180 { 181 auto grammar = graph.grammar; 182 immutable inFinalParseState = node.isFinalParseState; 183 if (inFinalParseState) 184 { 185 if (useUnion) 186 *useUnion = false; 187 if (singleNonterminal) 188 *singleNonterminal = node.elements[0].production.symbols[0].toNonterminalID; 189 return text("NonterminalType!(", 190 node.elements[0].production.symbols[0].id + grammar.startNonterminalID, ")"); 191 } 192 193 const(NonterminalID)[] nonterminalsOut = node.allNonterminalsOut(grammar); 194 if (graph.globalOptions.delayedReduce == DelayedReduce.combined) 195 { 196 foreach (nonterminals; node.delayedReduceOutCombinations) 197 { 198 nonterminalsOut ~= combinedReduceNonterminals[nonterminals]; 199 } 200 } 201 if (nonterminalsOut.length == 1) 202 { 203 if (useUnion) 204 *useUnion = false; 205 if (singleNonterminal) 206 *singleNonterminal = nonterminalsOut[0]; 207 return text("NonterminalType!(", nonterminalsOut[0].id + grammar.startNonterminalID, ")"); 208 } 209 if (useUnion) 210 *useUnion = true; 211 return text("CreatorInstance.NonterminalUnion!(", "[", 212 nonterminalsOut.map!(x => (x.id + grammar.startNonterminalID).text).joiner(", "), "])"); 213 } 214 215 string parserStackType(LRGraph graph, size_t stateNr, size_t pos) 216 { 217 auto grammar = graph.grammar; 218 const LRGraphNode node = graph.states[stateNr]; 219 auto k = node.stackSymbols.length - pos; 220 auto symbol = node.stackSymbols[k]; 221 string r = "ParseStackElem!(Location, "; 222 if (symbol.isToken) 223 r ~= "Token"; 224 else if (node.stackDelayedReduce[k]) 225 r ~= text("NonterminalType!", 226 grammar.nonterminalIDCode(node.stackDelayedReduce[k][0].toNonterminalID)); 227 else if (symbol.id == SymbolID.max && graph.globalOptions.directUnwrap) 228 { 229 if (node.isArrayOnStack(grammar, k)) 230 r ~= "CreatorInstance.NonterminalArray"; 231 else 232 r ~= "CreatorInstance.Type"; 233 } 234 else 235 r ~= text("NonterminalType!", grammar.nonterminalIDCode(symbol.toNonterminalID)); 236 r ~= ")"; 237 return r; 238 } 239 240 void createParseFunction(ref CodeWriter code, LRGraph graph, size_t stateNr, const LRGraphNode node, bool useRegexlookahead, 241 ref EBNFGrammar lookaheadGrammar, 242 NonterminalID[immutable(NonterminalID[])] combinedReduceNonterminals, 243 ref RegexLookahead regexLookahead) 244 { 245 auto grammar = graph.grammar; 246 immutable endTok = grammar.tokens.getID("$end"); 247 248 createParseStateComments(code, graph, stateNr, node); 249 250 if (node.elements.length == 0) 251 { 252 code.writeln("// skipped ", parseFunctionName(graph, stateNr)); 253 code.writeln(); 254 return; 255 } 256 257 ActionTable actionTable = genActionTable(graph, node); 258 bool hasDelayedReduceOutput; 259 foreach (t, x; actionTable.actions) 260 foreach (s, a; x) 261 { 262 if (a.type == ActionType.delayedReduce) 263 { 264 if (a.reusedDelayedNonterminals.length > 0) 265 hasDelayedReduceOutput = true; 266 foreach (e; a.delayedElements) 267 if (node.elements[e].dotPos > 0) 268 hasDelayedReduceOutput = true; 269 } 270 } 271 272 if (graph.globalOptions.delayedReduce) 273 hasDelayedReduceOutput = true; 274 275 size_t[2] minMaxGotoParent = node.minMaxGotoParent; 276 immutable inFinalParseState = node.isFinalParseState; 277 278 const Symbol[] stackSymbols = node.stackSymbols; 279 bool[] stackSymbolsNotDropped = node.stackSymbolsNotDropped; 280 bool[] stackSymbolsStartOfProduction = node.stackSymbolsStartOfProduction; 281 282 bool thisHasTags = node.hasTags(graph); 283 284 bool thisResultUsesUnion; 285 NonterminalID thisResultSingleNonterminal; 286 287 mixin(genCode("code", q{ 288 $(inFinalParseState ? "" : "private ") _ 289 int $(parseFunctionName(graph, stateNr))( _ 290 ref $(resultType(graph, node, combinedReduceNonterminals, &thisResultUsesUnion, &thisResultSingleNonterminal)) result, _ 291 ref Location resultLocation _ 292 $$if (thisHasTags) { 293 , scope Tag *outTags$(inFinalParseState ? " = null" : "") _ 294 $$} 295 $$foreach (k, symbol; node.stackSymbols) { 296 $$if (stackSymbolsStartOfProduction[k]) { 297 , Location parseStart$(stackSymbols.length - k) _ 298 $$} 299 $$if (!stackSymbolsNotDropped[k]) code.write("/+"); 300 , $(parserStackType(graph, stateNr, stackSymbols.length - k)) stack$(stackSymbols.length - k) _ 301 $$if (!stackSymbolsNotDropped[k]) code.write("+/"); 302 $$if (node.needsTagsOnStack(grammar, k)) { 303 , Tag stackTags$(stackSymbols.length - k) _ 304 $$} 305 $$} 306 ) 307 })); 308 309 code.writeln("{"); 310 code.incIndent; 311 code.writeln("alias ThisParseResult = typeof(result);"); 312 313 bool[Symbol] usedNegLookahead; 314 foreach (e; node.elements) 315 { 316 if (e.isNextValid(grammar)) 317 foreach (n; e.next(grammar).negLookaheads) 318 if (n !in usedNegLookahead) 319 { 320 code.writeln("bool disallow", symbolNameCode(grammar, n), ";"); 321 usedNegLookahead[n] = true; 322 } 323 } 324 325 size_t allowTokenCount; 326 foreach (e; node.elements) 327 { 328 if (!e.isStartElement) 329 continue; 330 331 foreach (a; graph.grammar.nonterminals[e.production.nonterminalID] 332 .annotations.otherAnnotations) 333 if (a.startsWith("enableToken(")) 334 { 335 enforce(a.endsWith(")")); 336 string name = a[12 .. $ - 1]; 337 auto id = graph.grammar.tokens.getID(name); 338 code.writeln("const bool allowToken", allowTokenCount, 339 "Orig = lexer.allowToken!", graph.grammar.tokens[id].tokenDCode, ";"); 340 code.writeln("lexer.allowToken!", graph.grammar.tokens[id].tokenDCode, " = true;"); 341 code.writeln("scope(exit)"); 342 code.writeln(" lexer.allowToken!", graph.grammar.tokens[id].tokenDCode, 343 " = allowToken", allowTokenCount, "Orig;"); 344 allowTokenCount++; 345 } 346 } 347 348 if (actionTable.hasShift) 349 { 350 code.writeln("alias ParseResultIn = CreatorInstance.NonterminalUnion!([", 351 node.allNonterminals.map!(x => (x.id + grammar.startNonterminalID) 352 .text).joiner(", "), "]);"); 353 code.writeln("ParseResultIn currentResult;"); 354 code.writeln("Location currentResultLocation;"); 355 if (actionTable.hasTags) 356 code.writeln("Tag currentTags;"); 357 code.writeln("int gotoParent = -1;"); 358 } 359 code.writeln("Location currentStart = lexer.front.currentLocation;"); 360 361 bool stillReachable = true; 362 void shiftCode(size_t newState, string var, bool tokenShift, bool delayedReduceShift) 363 { 364 if (newState == size_t.max) 365 { 366 code.writeln(q{lastError = new SingleParseException!Location("shift to empty state ", lexer.front.currentLocation, lexer.front.currentLocation);}); 367 code.writeln(q{return -1;}); 368 return; 369 } 370 371 const LRGraphNode nextNode = graph.states[newState]; 372 const Symbol[] nextStackSymbols = nextNode.stackSymbols; 373 assert(nextStackSymbols.length <= stackSymbols.length + 1, text(stateNr, " ", newState)); 374 375 const(Symbol)[] nextTopStackSymbols; 376 foreach (e; nextNode.elements) 377 { 378 auto s = e.stackSymbols; 379 if (s.length == 0) 380 continue; 381 nextTopStackSymbols.addOnce(s[$ - 1]); 382 } 383 384 bool nextHasTags = graph.states[newState].hasTags(graph); 385 386 mixin(genCode("code", q{ 387 auto next = $(var); 388 $$bool nextUsesUnion; 389 $$NonterminalID nextSingleNonterminal; 390 $(resultType(graph, graph.states[newState], combinedReduceNonterminals, &nextUsesUnion, &nextSingleNonterminal)) r; 391 Location rl; 392 gotoParent = $(parseFunctionName(graph, newState))(r, rl _ 393 $$bool[] stackSymbolsNotDropped2 = nextNode.stackSymbolsNotDropped; 394 $$bool[] stackSymbolsStartOfProduction2 = nextNode.stackSymbolsStartOfProduction; 395 $$if (nextHasTags) { 396 , ¤tTags _ 397 $$} 398 $$foreach (k; iota(nextStackSymbols.length - 1, 0, -1)) { 399 $$if (stackSymbolsStartOfProduction2[$ - 1-k]) { 400 , parseStart$(k) _ 401 $$} 402 $$if (stackSymbolsNotDropped2[$ - 1-k]) { 403 , stack$(k) _ 404 $$} else { 405 /*, stack$(k)*/ _ 406 $$} 407 $$if (nextNode.needsTagsOnStack(grammar, nextStackSymbols.length - k - 1)) { 408 , stackTags$(k) _ 409 $$} 410 $$} 411 $$if (stackSymbolsStartOfProduction2.length && stackSymbolsStartOfProduction2[$ - 1]) { 412 , currentStart _ 413 $$} 414 $$if (stackSymbolsNotDropped2.length && stackSymbolsNotDropped2[$ - 1]) { 415 , next _ 416 $$} else { 417 /*, next*/ _ 418 $$} 419 $$if (nextNode.needsTagsOnStack(grammar, nextStackSymbols.length - 1)) { 420 , currentTags _ 421 $$} 422 ); 423 $$if (actionTable.hasTags && !nextHasTags) { 424 currentTags = Tag.none; 425 $$} 426 if (gotoParent < 0) 427 return gotoParent; 428 $$if (nextNode.minMaxGotoParent[0] > 0) { 429 assert(gotoParent > 0); 430 $$if (inFinalParseState) { 431 $$if (nextUsesUnion) { 432 auto tree = r.get!($(node.elements[0].production.symbols[0].id + grammar.startNonterminalID)); 433 $$} else { 434 auto tree = r; 435 $$} 436 result = tree; 437 resultLocation = rl; 438 $$if (thisHasTags) { 439 if (outTags) 440 *outTags = $(actionTable.hasTags ? "currentTags" : "Tag.none"); 441 $$} 442 return 0; 443 $$} else { 444 $$if (nextUsesUnion == thisResultUsesUnion) { 445 result = r; 446 $$} else { 447 result = ThisParseResult.create($(grammar.nonterminalIDCode(nextSingleNonterminal)), r); 448 $$} 449 resultLocation = rl; 450 $$if (thisHasTags) { 451 if (outTags) 452 *outTags = $(actionTable.hasTags ? "currentTags" : "Tag.none"); 453 $$} 454 return gotoParent - 1; 455 $$} 456 $$} else { 457 $$stillReachable = true; 458 $$if (nextUsesUnion) { 459 currentResult = r; 460 $$} else { 461 currentResult = ParseResultIn.create($(grammar.nonterminalIDCode(nextSingleNonterminal)), r); 462 $$} 463 currentResultLocation = rl; 464 $$} 465 })); 466 } 467 468 bool hasConflict = false; 469 foreach (subTokenActions; actionTable.actions) 470 foreach (a; subTokenActions) 471 { 472 if (a.type == ActionType.conflict) 473 hasConflict = true; 474 } 475 476 bool shownConflictError; 477 478 void printConflictComment(Action bestAction, TokenID currentToken = TokenID.invalid) 479 { 480 assert(bestAction.type == ActionType.conflict); 481 code.writeln("/+"); 482 code.writeln(bestAction); 483 code.writeln(" currentToken: ", grammar.getSymbolName(currentToken)); 484 foreach (a; bestAction.conflictActions) 485 { 486 if (a.type == ActionType.reduce || a.type == ActionType.accept) 487 { 488 auto e = node.elements[a.elementNr]; 489 code.writeln(" reduce ", e.toString(graph)); 490 if (currentToken != TokenID.invalid) 491 { 492 bool[ElementID] done; 493 bool findReduceElement(size_t stateNr, const LRElement e) 494 { 495 auto node = graph.states[stateNr]; 496 if (e.dotPos == 0) 497 { 498 foreach (elementNr2, e2; node.elements) 499 { 500 if (e2.dotPos >= e2.production.symbols.length) 501 continue; 502 if (e2.production.symbols[e2.dotPos].isToken) 503 continue; 504 bool found; 505 foreach (n; e2.nextNonterminals(graph.grammar, 506 graph.globalOptions.directUnwrap)) 507 { 508 if (n.nonterminalID == e.production.nonterminalID) 509 { 510 found = true; 511 break; 512 } 513 } 514 if (!found) 515 continue; 516 517 if (e2.dotPos + 1 < e2.production.symbols.length 518 && grammar.firstSetContains(e2.production.symbols[e2.dotPos + 1 .. $], 519 currentToken)) 520 { 521 code.writeln(" source: ", e2.toString(graph)); 522 return true; 523 } 524 if (e2.production.symbols[e2.dotPos + 1 .. $].all!( 525 s => grammar.canBeEmpty(s))) 526 { 527 if (ElementID(stateNr, elementNr2) !in done) 528 { 529 done[ElementID(stateNr, elementNr2)] = true; 530 if (findReduceElement(stateNr, e2)) 531 { 532 code.writeln(" state self: ", e2.toString(graph)); 533 return true; 534 } 535 } 536 } 537 } 538 } 539 foreach (prev; e.prevElements) 540 { 541 size_t prevState = prev.state; 542 if (prevState == size_t.max) 543 prevState = stateNr; 544 auto e2 = graph.states[prevState].elements[prev.elementNr]; 545 if (!e2.lookahead[currentToken]) 546 continue; 547 if (ElementID(prevState, prev.elementNr) in done) 548 { 549 continue; 550 } 551 done[ElementID(prevState, prev.elementNr)] = true; 552 if (findReduceElement(prevState, e2)) 553 { 554 if (prev.state == size_t.max) 555 { 556 code.writeln(" state self: ", e2.toString(graph)); 557 } 558 else 559 { 560 code.writeln(" state ", prev.state, 561 ": ", e2.toString(graph)); 562 } 563 return true; 564 } 565 } 566 return false; 567 } 568 569 findReduceElement(stateNr, e); 570 } 571 } 572 else if (a.type == ActionType.shift) 573 { 574 code.writeln(" ", a); 575 if (currentToken != TokenID.invalid) 576 { 577 foreach (e; node.elements) 578 { 579 if (grammar.firstSet(e.afterDot(grammar))[currentToken]) 580 { 581 code.writeln(" ", e.toString(graph)); 582 } 583 } 584 } 585 } 586 else if (a.type == ActionType.descent) 587 { 588 code.writeln(" descent ", grammar.getSymbolName(a.nonterminalID)); 589 if (currentToken != TokenID.invalid) 590 { 591 NonterminalID nonterminalID = a.nonterminalID; 592 bool[NonterminalID] done; 593 outer: while (nonterminalID !in done) 594 { 595 done[nonterminalID] = true; 596 foreach (p; grammar.getProductions(nonterminalID)) 597 { 598 foreach (i; 0 .. p.symbols.length) 599 { 600 if (p.symbols[i].isToken) 601 { 602 if (p.symbols[i] == currentToken) 603 { 604 code.writeln(" source ", i, " ", 605 grammar.productionString(p)); 606 break outer; 607 } 608 break; 609 } 610 else 611 { 612 BitSet!TokenID firstSet = grammar.firstSet(p.symbols[i .. i + 1]); 613 if (!firstSet[grammar.tokens.getID("$end")]) 614 break; 615 } 616 } 617 } 618 foreach (p; grammar.getProductions(nonterminalID)) 619 { 620 foreach (i; 0 .. p.symbols.length) 621 { 622 BitSet!TokenID firstSet = grammar.firstSet(p.symbols[i .. i + 1]); 623 if (firstSet[currentToken]) 624 { 625 if (!p.symbols[i].isToken) 626 { 627 foreach (p2; grammar.getProductions( 628 p.symbols[i].toNonterminalID)) 629 { 630 if (grammar.firstSet(p2.symbols)[currentToken]) 631 { 632 code.writeln(" middle ", i, " ", 633 grammar.productionString(p)); 634 nonterminalID = p.symbols[i].toNonterminalID; 635 continue outer; 636 } 637 } 638 } 639 } 640 if (!firstSet[grammar.tokens.getID("$end")]) 641 break; 642 } 643 } 644 } 645 } 646 } 647 else 648 code.writeln(" ", a); 649 if (currentToken != TokenID.invalid) 650 code.writeln(" next lookahead: ", tokenSetToString(calcNextLookahead(graph, 651 stateNr, a, currentToken), grammar)); 652 } 653 code.writeln("+/"); 654 } 655 656 void actionCode(Action bestAction, TokenID currentToken = TokenID.invalid, 657 bool disallowNextLookahead = false) 658 { 659 assert(bestAction.type != ActionType.none); 660 661 void generateReduceParameters(ref CodeWriter code, const LRElement e) 662 { 663 bool needsComma, needsCommentClose; 664 if (e.production.symbols.length > 0) 665 { 666 code.write("parseStart", e.production.symbols.length); 667 needsComma = true; 668 } 669 foreach (k; iota(e.production.symbols.length, 0, -1)) 670 { 671 if (needsComma) 672 code.write(", "); 673 needsComma = true; 674 if (e.production.symbols[$ - k].dropNode) 675 { 676 if (!needsCommentClose) 677 code.write("/*"); 678 code.write("dropped"); 679 needsCommentClose = true; 680 } 681 else 682 { 683 if (needsCommentClose) 684 { 685 code.write("*/"); 686 needsCommentClose = false; 687 } 688 code.write("stack", k); 689 } 690 } 691 if (needsCommentClose) 692 code.write("*/"); 693 } 694 695 if (bestAction.type == ActionType.shift) 696 { 697 shiftCode(bestAction.newState, "popToken()", true, false); 698 } 699 else if (bestAction.type == ActionType.reduce || bestAction.type == ActionType.accept) 700 { 701 auto e = node.elements[bestAction.elementNr]; 702 size_t n = e.stackSymbols.length; 703 704 if (checkTagsCode!((k) => text("stackTags", k))(code, grammar, e.production)) 705 { 706 mixin(genCode("code", q{ 707 { 708 lastError = new SingleParseException!Location(text("Wrong tags"), lexer.front.currentLocation, lexer.front.currentTokenEnd); 709 return -1; 710 } 711 })); 712 } 713 714 if (e.isStartElement) 715 { 716 if (thisHasTags) 717 { 718 code.writeln("if (outTags)"); 719 code.writeln(" *outTags = ", reduceTagsCode!((k) => text("stackTags", 720 k))(grammar, e.production), ";"); 721 } 722 if (n > 0) 723 { 724 if (thisResultUsesUnion) 725 code.writeln("result = ThisParseResult.create(", 726 grammar.nonterminalIDCode(e.production.nonterminalID), 727 ", stack1.val);"); 728 else 729 code.writeln("result = stack1.val;"); 730 code.writeln("resultLocation = stack1.start;"); 731 code.writeln("return 1;"); 732 } 733 else 734 { 735 code.writeln("result = ThisParseResult.init;"); 736 code.writeln("resultLocation = Location.init;"); 737 code.writeln("return 0;"); 738 } 739 } 740 else if (n == 0) 741 { 742 if (thisHasTags) 743 { 744 code.writeln("currentTags = ", reduceTagsCode!((k) => text("stackTags", 745 k))(grammar, e.production), ";"); 746 } 747 code.write("auto tmp = ", reduceFunctionName(graph, e.production, "reduce"), "("); 748 generateReduceParameters(code, e); 749 code.writeln(");"); 750 code.writeln("currentResult = ParseResultIn.create(", 751 grammar.nonterminalIDCode(e.production.nonterminalID), ", tmp.val);"); 752 code.writeln("currentResultLocation = tmp.start;"); 753 code.writeln("gotoParent = 0;"); 754 stillReachable = true; 755 } 756 else 757 { 758 if (thisHasTags) 759 { 760 code.writeln("if (outTags)"); 761 code.writeln(" *outTags = ", reduceTagsCode!((k) => text("stackTags", 762 k))(grammar, e.production), ";"); 763 } 764 code.write("auto tmp = ", reduceFunctionName(graph, e.production, "reduce"), "("); 765 generateReduceParameters(code, e); 766 code.writeln(");"); 767 if (thisResultUsesUnion) 768 code.writeln("result = ThisParseResult.create(", 769 grammar.nonterminalIDCode(e.production.nonterminalID), ", tmp.val);"); 770 else 771 code.writeln("result = tmp.val;"); 772 code.writeln("resultLocation = tmp.start;"); 773 code.writeln("return ", n - 1, ";"); 774 } 775 } 776 else if (bestAction.type == ActionType.delayedReduce) 777 { 778 bool tagsCompatible = true; 779 foreach (i, ei; bestAction.delayedElements) 780 { 781 if (i == 0) 782 continue; 783 if ( 784 node.elements[ei].production.tags 785 != node.elements[bestAction.delayedElements[i - 1]].production.tags) 786 tagsCompatible = false; 787 auto symbolsLast = node 788 .elements[bestAction.delayedElements[i - 1]].production.symbols; 789 foreach (j, s; node.elements[ei].production.symbols) 790 if (symbolsLast[j].tags != s.tags) 791 tagsCompatible = false; 792 } 793 794 enforce(tagsCompatible, () { 795 string r = "Tags are not compatible for delayed reduce state:\n"; 796 foreach (ei; bestAction.delayedElements) 797 { 798 auto e = node.elements[ei]; 799 r ~= e.toString(graph); 800 r ~= "\n"; 801 r ~= text(e.production.tags); 802 foreach (s; e.production.symbols) 803 r ~= text(" ", s.tags); 804 r ~= "\n"; 805 } 806 return r; 807 }()); 808 809 if (checkTagsCode!((k) => text("stackTags", k))(code, grammar, 810 node.elements[bestAction.delayedElements[0]].production)) 811 { 812 mixin(genCode("code", q{ 813 { 814 lastError = new SingleParseException!Location(text("Wrong tags"), lexer.front.currentLocation, lexer.front.currentTokenEnd); 815 return -1; 816 } 817 })); 818 } 819 if (graph.globalOptions.delayedReduce == DelayedReduce.combined) 820 { 821 auto e = node.elements[bestAction.delayedElements[0]]; 822 size_t n = e.stackSymbols.length; 823 NonterminalID[] nonterminals; 824 ProductionID[] productionIDs; 825 foreach (x; bestAction.delayedElements) 826 { 827 nonterminals.addOnce(node.elements[x].production.nonterminalID); 828 productionIDs.addOnce(node.elements[x].production.productionID); 829 } 830 nonterminals.sort(); 831 productionIDs.sort(); 832 string name = "Combined:" ~ nonterminals.map!(n => grammar.getSymbolName(n)) 833 .join("|"); 834 835 if (thisHasTags) 836 { 837 code.writeln("if (outTags)"); 838 code.writeln(" *outTags = ", reduceTagsCode!((k) => text("stackTags", 839 k))(grammar, e.production), ";"); 840 } 841 code.writeln("auto tmp = ", 842 "combinedReduce" ~ productionIDs.map!(i => text(i)).join("_"), "("); 843 generateReduceParameters(code, e); 844 code.writeln(");"); 845 code.writeln("result = ThisParseResult.create(", 846 combinedReduceNonterminals[nonterminals].id, "/*", name, "*/, tmp.val);"); 847 code.writeln("resultLocation = tmp.start;"); 848 code.writeln("return ", n - 1, ";"); 849 } 850 else 851 assert(false); 852 } 853 else if (bestAction.type == ActionType.descent) 854 { 855 assert(bestAction.nonterminalID.id != SymbolID.max); 856 857 auto nextNode = graph.states[graph.nonterminalToState[bestAction.nonterminalID.id]]; 858 bool nextHasTags = nextNode.hasTags(graph); 859 860 mixin(genCode("code", q{ 861 { 862 $$if (thisHasTags && !nextHasTags) { 863 currentTags = Tag.none; 864 $$} 865 $(resultType(graph, nextNode, combinedReduceNonterminals)) pt; 866 Location rl; 867 gotoParent = $(parseFunctionName(graph, graph.nonterminalToState[bestAction.nonterminalID.id]))(pt, rl$(nextHasTags ? ", ¤tTags" : "")); 868 if (gotoParent < 0) 869 return gotoParent; 870 assert(gotoParent == 0); 871 currentResult = ParseResultIn.create($(grammar.nonterminalIDCode(bestAction.nonterminalID)), pt); 872 currentResultLocation = rl; 873 } 874 })); 875 stillReachable = true; 876 } 877 else if (bestAction.type == ActionType.conflict && () { 878 foreach (a; bestAction.conflictActions) 879 if (a.allowRegexLookahead) 880 return true; 881 return false; 882 }()) 883 { 884 code.writeln("/+"); 885 code.writeln(" regexLookahead:"); 886 foreach (a; bestAction.conflictActions) 887 code.writeln(" ", a); 888 code.writeln("+/"); 889 890 if (regexLookahead is null) 891 { 892 regexLookahead = new RegexLookahead(grammar, 893 actionTable.reduceConflictProductions, useRegexlookahead); 894 } 895 896 RegexLookaheadGraph regexLookaheadGraph = new RegexLookaheadGraph; 897 regexLookaheadGraph.id = regexLookahead.graphs.length; 898 regexLookahead.graphs ~= regexLookaheadGraph; 899 900 size_t[NonterminalID] descentActionByNonterminal; 901 size_t shiftAction = size_t.max; 902 foreach (i, action; bestAction.conflictActions) 903 { 904 if (action.type == ActionType.descent) 905 descentActionByNonterminal[action.nonterminalID] = i; 906 else if (action.type == ActionType.shift) 907 { 908 enforce(shiftAction == size_t.max); 909 shiftAction = i; 910 } 911 else if (action.type == ActionType.reduce) 912 { 913 auto e = node.elements[action.elementNr]; 914 NonterminalID ml = regexLookahead.getLookaheadNonterminal(graph, 915 stateNr, action.elementNr); 916 immutable(SymbolInstance)[] symbols = [SymbolInstance(ml)]; 917 regexLookaheadGraph.sequences.addOnce(tuple!(immutable(SymbolInstance)[], 918 size_t)(symbols, i)); 919 } 920 else 921 enforce(false); 922 } 923 foreach (i, e; node.elements) 924 { 925 if (e.isNextNonterminal(grammar)) 926 { 927 foreach (nextNonterminal; e.nextNonterminals(grammar, 928 graph.globalOptions.directUnwrap)) 929 { 930 if (nextNonterminal.nonterminalID in descentActionByNonterminal) 931 { 932 size_t lookaheadUntil = e.dotPos + 1; 933 while (lookaheadUntil < e.production.symbols.length /* && e.production.symbols[lookaheadUntil].annotations.contains!"lookahead"*/ ) 934 lookaheadUntil++; 935 immutable(SymbolInstance)[] symbols = SymbolInstance( 936 nextNonterminal.nonterminalID) 937 ~ e.production.symbols[e.dotPos + 1 .. lookaheadUntil]; 938 if (e.production.annotations.contains!"lookahead") 939 { 940 NonterminalID ml = regexLookahead.getLookaheadNonterminal(graph, 941 stateNr, i); 942 symbols ~= SymbolInstance(ml); 943 } 944 else 945 symbols ~= [ 946 SymbolInstance(regexLookahead.getLookaheadNonterminalSimple(graph, stateNr, i)), 947 SymbolInstance(regexLookahead.grammar2.tokens.id("$anything")) 948 ]; 949 regexLookaheadGraph.sequences.addOnce(tuple!(immutable(SymbolInstance)[], size_t)(symbols, 950 descentActionByNonterminal[nextNonterminal.nonterminalID])); 951 } 952 } 953 } 954 else if (e.isNextToken(grammar) && e.next(grammar).toTokenID == currentToken) 955 { 956 size_t lookaheadUntil = e.dotPos + 1; 957 while (lookaheadUntil < e.production.symbols.length) 958 lookaheadUntil++; 959 immutable(SymbolInstance)[] symbols = e.production 960 .symbols[e.dotPos .. lookaheadUntil]; 961 if (e.production.annotations.contains!"lookahead") 962 { 963 NonterminalID ml = regexLookahead.getLookaheadNonterminal(graph, 964 stateNr, i); 965 symbols ~= SymbolInstance(ml); 966 } 967 else 968 symbols ~= [ 969 SymbolInstance(regexLookahead.getLookaheadNonterminalSimple(graph, stateNr, i)), 970 SymbolInstance(regexLookahead.grammar2.tokens.id("$anything")) 971 ]; 972 regexLookaheadGraph.sequences.addOnce(tuple!(immutable(SymbolInstance)[], 973 size_t)(symbols, shiftAction)); 974 } 975 } 976 977 code.writeln("switch (checkRegexLookahead", regexLookaheadGraph.id, "())"); 978 code.writeln("{").incIndent; 979 bool anyStillReachable = false; 980 foreach (i, action; bestAction.conflictActions) 981 { 982 mixin(genCode("code", q{ 983 case $(i): 984 { 985 $$stillReachable = false; 986 $$actionCode(action, currentToken); 987 } 988 $$if (stillReachable) { 989 $$anyStillReachable = true; 990 break; 991 $$} else { 992 assert(false); 993 $$} 994 })); 995 } 996 code.writeln("case SymbolID.max:"); 997 code.writeln(" return -1;"); 998 code.writeln("default:"); 999 code.writeln(" assert(false);"); 1000 code.decIndent.writeln("}"); 1001 1002 stillReachable = anyStillReachable; 1003 } 1004 else if (bestAction.type == ActionType.conflict 1005 && currentToken != TokenID.invalid && !disallowNextLookahead) 1006 { 1007 BitSet!TokenID[] nextLookaheads; 1008 nextLookaheads.length = bestAction.conflictActions.length; 1009 foreach (i, a; bestAction.conflictActions) 1010 { 1011 nextLookaheads[i] = calcNextLookahead(graph, stateNr, a, currentToken); 1012 } 1013 BitSet!TokenID[] exclusiveNextLookaheads; 1014 exclusiveNextLookaheads.length = bestAction.conflictActions.length; 1015 foreach (i, a; bestAction.conflictActions) 1016 { 1017 exclusiveNextLookaheads[i] = nextLookaheads[i].dup; 1018 foreach (j, a2; bestAction.conflictActions) 1019 { 1020 if (i != j) 1021 { 1022 foreach (t; nextLookaheads[j].bitsSet) 1023 exclusiveNextLookaheads[i][t] = false; 1024 } 1025 } 1026 } 1027 bool hasStillConflict; 1028 foreach (i, a; bestAction.conflictActions) 1029 { 1030 foreach (t; nextLookaheads[i].bitsSet) 1031 if (!exclusiveNextLookaheads[i][t]) 1032 hasStillConflict = true; 1033 } 1034 bool[] hasExclusive; 1035 hasExclusive.length = bestAction.conflictActions.length; 1036 bool hasAnyExclusive; 1037 foreach (i, a; bestAction.conflictActions) 1038 { 1039 foreach (t; exclusiveNextLookaheads[i].bitsSet) 1040 { 1041 hasExclusive[i] = true; 1042 hasAnyExclusive = true; 1043 break; 1044 } 1045 } 1046 if (!hasAnyExclusive) 1047 { 1048 actionCode(bestAction, currentToken, true); 1049 } 1050 else 1051 { 1052 if (hasStillConflict) 1053 printConflictComment(bestAction, currentToken); 1054 code.writeln("Lexer tmpLexer = *lexer;"); 1055 code.writeln("tmpLexer.popFront();"); 1056 bool needsElse; 1057 foreach (i, a; bestAction.conflictActions) 1058 { 1059 if (!hasExclusive[i]) 1060 continue; 1061 if (needsElse) 1062 code.write("else "); 1063 needsElse = true; 1064 bool needsOr; 1065 code.writeln("if (", tokenSetCode(grammar, 1066 exclusiveNextLookaheads[i], "tmpLexer"), ")"); 1067 code.writeln("{").incIndent; 1068 actionCode(a, currentToken, true); 1069 code.decIndent.writeln("}"); 1070 } 1071 code.writeln("else"); 1072 code.writeln("{").incIndent; 1073 if (hasStillConflict) 1074 { 1075 actionCode(bestAction, currentToken, true); 1076 } 1077 else 1078 { 1079 mixin(genCode("code", q{ 1080 lastError = new SingleParseException!Location(text("unexpected Token \"", tmpLexer.front.content, "\" \"", lexer.tokenName(tmpLexer.front.symbol), "\""), 1081 tmpLexer.front.currentLocation, tmpLexer.front.currentTokenEnd); 1082 return -1; 1083 })); 1084 } 1085 code.decIndent.writeln("}"); 1086 } 1087 } 1088 else if (bestAction.type == ActionType.conflict) 1089 { 1090 printConflictComment(bestAction, currentToken); 1091 mixin(genCode("code", q{ 1092 lastError = new SingleParseException!Location(text("conflict \"$(bestAction.errorMessage.escapeD)\" \"", lexer.front.content, "\" \"", lexer.tokenName(lexer.front.symbol), "\""), lexer.front.currentLocation, lexer.front.currentTokenEnd); 1093 return -1; 1094 })); 1095 if (!shownConflictError) 1096 { 1097 stderr.writeln("Warning: Parse conflict in state ", stateNr); 1098 shownConflictError = true; 1099 } 1100 } 1101 else 1102 assert(false); 1103 } 1104 1105 if (node.backtrackStates.length) 1106 { 1107 assert(node.type == LRGraphNodeType.backtrack || node.type == LRGraphNodeType.lookahead); 1108 } 1109 1110 if (node.elements.simpleLLState) 1111 { 1112 NonterminalID onlyNonterminal = getOnlyNonterminal(graph, node.elements); 1113 assert(onlyNonterminal.id != SymbolID.max); 1114 1115 auto nextNode = graph.states[graph.nonterminalToState[onlyNonterminal.id]]; 1116 bool nextHasTags = nextNode.hasTags(graph); 1117 1118 mixin(genCode("code", q{ 1119 { 1120 $$if (thisHasTags && !nextHasTags) { 1121 currentTags = Tag.none; 1122 $$} 1123 $(resultType(graph, nextNode, combinedReduceNonterminals)) pt; 1124 Location rl; 1125 gotoParent = $(parseFunctionName(graph, graph.nonterminalToState[onlyNonterminal.id]))(pt, rl$(nextHasTags ? ", ¤tTags" : "")); 1126 if (gotoParent < 0) 1127 return gotoParent; 1128 currentResult = ParseResultIn.create($(grammar.nonterminalIDCode(onlyNonterminal)), pt); 1129 currentResultLocation = rl; 1130 } 1131 })); 1132 } 1133 else if (node.type == LRGraphNodeType.backtrack) 1134 { 1135 assert(node.backtrackStates.length); 1136 code.writeln("Lexer savedLexer = *lexer;"); 1137 code.writeln("Location savedLastTokenEnd = lastTokenEnd;"); 1138 code.writeln("ParseException[", node.backtrackStates.length, "] exceptions;"); 1139 code.writeln("int gotoParent = -1;"); 1140 1141 NonterminalID startNonterminal = node.elements[0].production.nonterminalID; 1142 const Production*[] productions = grammar.getProductions(startNonterminal); 1143 assert(productions.length == node.backtrackStates.length); 1144 1145 string[] prods; 1146 foreach (i, s; node.backtrackStates) 1147 { 1148 prods ~= grammar.productionString(productions[i]); 1149 mixin(genCode("code", q{ 1150 // try $(s) $(grammar.productionString(productions[i])) 1151 try 1152 { 1153 $(resultType(graph, graph.states[s], combinedReduceNonterminals)) r; 1154 Location rl; 1155 gotoParent = $(parseFunctionName(graph, s))(r, rl); 1156 if (gotoParent < 0) 1157 { 1158 throw lastError; 1159 } 1160 if ($(tokenSetCode(grammar, node.elements[0].lookahead, "lexer", true))) 1161 { 1162 throw new SingleParseException!Location(text("unexpected \"", lexer.front.content, "\" \"", lexer.tokenName(lexer.front.symbol), "\""), lexer.front.currentLocation, lexer.front.currentTokenEnd); 1163 } 1164 result = r; 1165 resultLocation = rl; 1166 return gotoParent; 1167 } 1168 catch(ParseException e) 1169 { 1170 if (!e.allowBacktrack()) 1171 throw e; 1172 lastError = null; 1173 *lexer = savedLexer; 1174 lastTokenEnd = savedLastTokenEnd; 1175 exceptions[$(i)] = e; 1176 } 1177 })); 1178 } 1179 mixin(genCode("code", q{ 1180 lastError = new BacktrackParseException("no try worked", $(prods), exceptions.dup); 1181 return -1; 1182 })); 1183 stillReachable = false; 1184 } 1185 else if (node.type == LRGraphNodeType.lookahead) 1186 { 1187 assert(node.backtrackStates.length); 1188 code.writeln("//lookahead;"); 1189 1190 if (regexLookahead is null) 1191 { 1192 regexLookahead = new RegexLookahead(grammar, 1193 actionTable.reduceConflictProductions, useRegexlookahead); 1194 } 1195 1196 RegexLookaheadGraph regexLookaheadGraph = new RegexLookaheadGraph; 1197 regexLookaheadGraph.id = regexLookahead.graphs.length; 1198 regexLookahead.graphs ~= regexLookaheadGraph; 1199 1200 NonterminalID startNonterminal = node.elements[0].production.nonterminalID; 1201 const Production*[] productions = grammar.getProductions(startNonterminal); 1202 assert(productions.length == node.backtrackStates.length); 1203 1204 foreach (i, s; node.backtrackStates) 1205 { 1206 auto p = productions[i]; 1207 1208 immutable(SymbolInstance)[] symbols = p.symbols ~ SymbolInstance( 1209 regexLookahead.grammar2.tokens.id("$end")); 1210 regexLookaheadGraph.sequences.addOnce(tuple!(immutable(SymbolInstance)[], 1211 size_t)(symbols, i)); 1212 } 1213 1214 code.writeln("switch (checkRegexLookahead", regexLookaheadGraph.id, "())"); 1215 code.writeln("{").incIndent; 1216 foreach (i, s; node.backtrackStates) 1217 { 1218 mixin(genCode("code", q{ 1219 case $(i): 1220 { 1221 return $(parseFunctionName(graph, node.backtrackStates[i]))(result, resultLocation); 1222 } 1223 assert(false); 1224 })); 1225 } 1226 code.writeln("case SymbolID.max:"); 1227 code.writeln(" return -1;"); 1228 code.writeln("default:"); 1229 code.writeln(" assert(false);"); 1230 code.decIndent.writeln("}"); 1231 1232 stillReachable = false; 1233 } 1234 else 1235 { 1236 if (hasConflict && !useRegexlookahead) 1237 { 1238 foreach (e; node.elements) 1239 { 1240 if (e.production.annotations.contains!"regexLookahead") 1241 { 1242 useRegexlookahead = true; 1243 } 1244 if (e.dotPos < e.production.symbols.length 1245 && e.production.symbols[e.dotPos].annotations.contains!"regexLookahead") 1246 { 1247 useRegexlookahead = true; 1248 } 1249 } 1250 } 1251 1252 if (hasConflict && useRegexlookahead) 1253 { 1254 if (regexLookahead is null) 1255 { 1256 regexLookahead = new RegexLookahead(grammar, 1257 actionTable.reduceConflictProductions, useRegexlookahead); 1258 } 1259 1260 RegexLookaheadGraph regexLookaheadGraph = new RegexLookaheadGraph; 1261 regexLookaheadGraph.id = regexLookahead.graphs.length; 1262 regexLookahead.graphs ~= regexLookaheadGraph; 1263 1264 Action[] actions; 1265 size_t[Action] actionIds; 1266 foreach (k, element; node.elements) 1267 { 1268 NonterminalID l1 = regexLookahead.getLookaheadNonterminal(graph, stateNr, k); 1269 if (element.isFinished(regexLookahead.grammar2)) 1270 { 1271 Action action = Action(ActionType.reduce, 0, element.production, 1272 k, NonterminalID.invalid, element.ignoreInConflict); 1273 size_t id; 1274 if (action in actionIds) 1275 id = actionIds[action]; 1276 else 1277 { 1278 id = actions.length; 1279 actions ~= action; 1280 actionIds[action] = id; 1281 } 1282 immutable(SymbolInstance)[] symbols = [SymbolInstance(l1)]; 1283 regexLookaheadGraph.sequences.addOnce(tuple!(immutable(SymbolInstance)[], 1284 size_t)(symbols, id)); 1285 } 1286 else if (element.next(grammar).isToken) 1287 { 1288 NonterminalID lshift = regexLookahead.grammar2.nonterminals.id(text("$lshift_", 1289 stateNr, "_", k)); 1290 regexLookahead.grammar2.addProduction(new Production(lshift, 1291 element.production.symbols[element.dotPos .. $] ~ [ 1292 immutable(SymbolInstance)(l1) 1293 ])); 1294 size_t nextState = size_t.max; 1295 foreach (edge; node.edges) 1296 if (edge.symbol == element.next(grammar)) 1297 nextState = edge.next; 1298 Action action = Action(ActionType.shift, nextState); 1299 size_t id; 1300 if (action in actionIds) 1301 id = actionIds[action]; 1302 else 1303 { 1304 id = actions.length; 1305 actions ~= action; 1306 actionIds[action] = id; 1307 } 1308 immutable(SymbolInstance)[] symbols = [ 1309 SymbolInstance(lshift) 1310 ]; 1311 regexLookaheadGraph.sequences.addOnce(tuple!(immutable(SymbolInstance)[], 1312 size_t)(symbols, id)); 1313 } 1314 } 1315 1316 code.writeln("switch (checkRegexLookahead", regexLookaheadGraph.id, "())"); 1317 code.writeln("{").incIndent; 1318 bool anyStillReachable = false; 1319 foreach (i, action; actions) 1320 { 1321 mixin(genCode("code", q{ 1322 case $(i): 1323 { 1324 $$stillReachable = false; 1325 $$actionCode(action); 1326 } 1327 $$if (stillReachable) { 1328 $$anyStillReachable = true; 1329 break; 1330 $$} else { 1331 assert(false); 1332 $$} 1333 })); 1334 } 1335 code.writeln("case SymbolID.max:"); 1336 code.writeln(" return -1;"); 1337 code.writeln("default:"); 1338 code.writeln(" assert(false);"); 1339 code.decIndent.writeln("}"); 1340 1341 stillReachable = anyStillReachable; 1342 } 1343 else 1344 { 1345 stillReachable = false; 1346 bool anyNonEndTokens = false; 1347 foreach (t; actionTable.actions.sortedKeys) 1348 if (t != endTok) 1349 anyNonEndTokens = true; 1350 bool skipIf = !anyNonEndTokens && ((actionTable.defaultReduceAction.type != ActionType.none 1351 && (endTok !in actionTable.actions 1352 || actionTable.defaultReduceAction == actionTable.actions[endTok][""])) 1353 || (actionTable.defaultReduceAction.type == ActionType.none 1354 && endTok in actionTable.actions)); 1355 1356 static struct CaseInfo 1357 { 1358 TokenID[] tokens; 1359 string subToken; 1360 Action action; 1361 bool allowCombined; 1362 } 1363 1364 CaseInfo[] cases; 1365 1366 bool checkAllowCombined(TokenID t) 1367 { 1368 if (t !in actionTable.actions) 1369 return false; 1370 auto a = actionTable.actions[t]; 1371 string[] subTokens = a.sortedKeys; 1372 return t !in usedNegLookahead && subTokens.length == 1 && subTokens[0] == "" 1373 && a[""].type.among(ActionType.reduce, ActionType.accept, ActionType.descent); 1374 } 1375 1376 if (!skipIf) 1377 { 1378 if (checkAllowCombined(endTok)) 1379 { 1380 cases ~= CaseInfo([endTok], "", actionTable.actions[endTok][""], true); 1381 } 1382 else if (endTok in actionTable.actions) 1383 { 1384 code.writeln("if (lexer.empty)").writeln("{").incIndent; 1385 actionCode(actionTable.actions[endTok][""]); 1386 code.decIndent.writeln("}"); 1387 } 1388 else 1389 { 1390 mixin(genCode("code", q{ 1391 if (lexer.empty) 1392 { 1393 lastError = new SingleParseException!Location("EOF", lexer.front.currentLocation, lexer.front.currentTokenEnd); 1394 return -1; 1395 } 1396 })); 1397 } 1398 } 1399 1400 foreach (t; actionTable.actions.sortedKeys) 1401 { 1402 auto a = actionTable.actions[t]; 1403 if (t == endTok) 1404 continue; 1405 1406 bool allowCombined = checkAllowCombined(t); 1407 1408 if (allowCombined) 1409 { 1410 bool found; 1411 foreach (i, ref caseInfo; cases) 1412 { 1413 if (caseInfo.allowCombined && caseInfo.action == a[""]) 1414 { 1415 caseInfo.tokens ~= t; 1416 found = true; 1417 break; 1418 } 1419 } 1420 if (found) 1421 continue; 1422 } 1423 1424 foreach (subToken; a.sortedKeys) 1425 { 1426 auto a2 = a[subToken]; 1427 1428 if (a2.type != ActionType.none) 1429 { 1430 cases ~= CaseInfo([t], subToken, a2, allowCombined); 1431 } 1432 } 1433 } 1434 1435 foreach (caseInfo; cases) 1436 { 1437 assert(caseInfo.tokens.length == 1 || caseInfo.subToken.length == 0); 1438 string ifText = caseInfo.tokens[0] == endTok ? "if (" : "else if ("; 1439 foreach (i, t; caseInfo.tokens) 1440 { 1441 if (i) 1442 ifText ~= " || "; 1443 if (t == endTok) 1444 ifText ~= "lexer.empty"; 1445 else 1446 ifText ~= text("lexer.front.symbol == Lexer.tokenID!", 1447 grammar.tokens[t].tokenDCode); 1448 } 1449 if (caseInfo.subToken.length) 1450 ifText ~= text(" && lexer.front.content == ", caseInfo.subToken); 1451 ifText ~= ")"; 1452 1453 code.writeln(ifText).writeln("{").incIndent; 1454 foreach (t; caseInfo.tokens) 1455 { 1456 if (t in usedNegLookahead) 1457 { 1458 assert(caseInfo.tokens.length == 1); 1459 code.writeln("disallow", symbolNameCode(grammar, t), " = true;"); 1460 } 1461 } 1462 1463 actionCode(caseInfo.action, caseInfo.tokens.length == 1 1464 ? caseInfo.tokens[0] : TokenID.invalid); 1465 code.decIndent.writeln("}"); 1466 } 1467 1468 if (actionTable.defaultReduceAction.type != ActionType.none) 1469 { 1470 if (!skipIf) 1471 code.writeln("else"); 1472 code.writeln("{").incIndent; 1473 actionCode(actionTable.defaultReduceAction); 1474 code.decIndent.writeln("}"); 1475 } 1476 else if (endTok in actionTable.actions) 1477 { 1478 if (!skipIf) 1479 code.writeln("else"); 1480 code.writeln("{").incIndent; 1481 actionCode(actionTable.actions[endTok][""]); 1482 code.decIndent.writeln("}"); 1483 } 1484 else 1485 { 1486 mixin(genCode("code", q{ 1487 else 1488 { 1489 lastError = new SingleParseException!Location(text("unexpected Token \"", lexer.front.content, "\" \"", lexer.tokenName(lexer.front.symbol), "\""), 1490 lexer.front.currentLocation, lexer.front.currentTokenEnd); 1491 return -1; 1492 } 1493 })); 1494 } 1495 } 1496 if (actionTable.hasShift && stillReachable && actionTable.jumps.length) 1497 code.writeln(""); 1498 } 1499 1500 void genJumpCode(NonterminalID nonterminalID, size_t newState) 1501 { 1502 mixin(genCode("code", q{ 1503 $$if (nonterminalID in usedNegLookahead) { 1504 disallow$(symbolNameCode(grammar, nonterminalID)) = true; 1505 $$} 1506 $$if (newState != size_t.max && trivialState(graph.states[newState])) { 1507 currentResult = ParseResultIn.create($(grammar.nonterminalIDCode(graph.states[newState].elements[0].production.nonterminalID)), currentResult.get!($(grammar.nonterminalIDCode(nonterminalID)))); 1508 currentResultLocation = currentResultLocation; 1509 $$} else { 1510 $$shiftCode(newState, text(parserStackType(graph, newState, 1), "(currentResultLocation, currentResult.get!(", (nonterminalID.id >= grammar.nonterminals.vals.length)?text(nonterminalID.id):grammar.nonterminalIDCode(nonterminalID), ")())"), false, nonterminalID.id == SymbolID.max); 1511 $$} 1512 })); 1513 } 1514 1515 mixin(genCode("code", q{ 1516 $$if (actionTable.hasShift && stillReachable) { 1517 $$if (actionTable.jumps.length || actionTable.jumps2.length) { 1518 while (gotoParent == 0) 1519 { 1520 $$CommaGen elseCodeOuter = CommaGen("else "); 1521 $$foreach (nonterminalID; actionTable.jumps.sortedKeys) { 1522 $$auto jumps2 = actionTable.jumps[nonterminalID]; 1523 $$if (!node.allNonterminals.canFind(nonterminalID)) continue; 1524 $(elseCodeOuter())if (currentResult.nonterminalID == $(grammar.nonterminalIDCode(nonterminalID))) 1525 { 1526 $$CommaGen elseCode = CommaGen("else "); 1527 $$bool hasDisallowCheck; 1528 $$foreach (checkDisallowedSymbols; jumps2.sortedKeys) { 1529 $$auto newState = jumps2[checkDisallowedSymbols]; 1530 $$if (checkDisallowedSymbols.length == 0) { 1531 $$genJumpCode(nonterminalID, newState); 1532 $$} else { 1533 $$CommaGen and = CommaGen(" && "); 1534 $$hasDisallowCheck = true; 1535 $(elseCode())if ( _ 1536 $$foreach (x; checkDisallowedSymbols) { 1537 $(and())disallow$(symbolNameCode(grammar, x[0])) == $(x[1]) _ 1538 $$} 1539 ) 1540 { 1541 $$genJumpCode(nonterminalID, newState); 1542 } 1543 $$} 1544 $$} 1545 $$if (hasDisallowCheck) { 1546 else 1547 assert(false); 1548 $$} 1549 } 1550 $$} 1551 $$if (graph.globalOptions.delayedReduce == DelayedReduce.combined && actionTable.jumps2.length > 0) { 1552 $$foreach (nonterminalIDs; actionTable.jumps2.sortedKeys) { 1553 $$auto jumps2 = actionTable.jumps2[nonterminalIDs]; 1554 $(elseCodeOuter())if (currentResult.nonterminalID == $(combinedReduceNonterminals[nonterminalIDs].id)/*Combined:$(nonterminalIDs.map!(n=>grammar.getSymbolName(n)).join("|"))*/) 1555 { 1556 $$CommaGen elseCode = CommaGen("else "); 1557 $$bool hasDisallowCheck; 1558 $$foreach (checkDisallowedSymbols, newState; jumps2) { 1559 $$if (checkDisallowedSymbols.length == 0) { 1560 $$genJumpCode(combinedReduceNonterminals[nonterminalIDs], newState); 1561 $$} else { 1562 $$CommaGen and = CommaGen(" && "); 1563 $$hasDisallowCheck = true; 1564 $(elseCode())if ( _ 1565 $$foreach (x; checkDisallowedSymbols) { 1566 $(and())disallow$(symbolNameCode(grammar, x[0])) == $(x[1]) _ 1567 $$} 1568 ) 1569 { 1570 $$genJumpCode(combinedReduceNonterminals[nonterminalIDs], newState); 1571 } 1572 $$} 1573 $$} 1574 $$if (hasDisallowCheck) { 1575 else 1576 assert(false); 1577 $$} 1578 } 1579 $$} 1580 $$} 1581 else 1582 assert(0, text("no jump ", currentResult.nonterminalID, " ", allNonterminals[currentResult.nonterminalID].name)); 1583 } 1584 1585 $$} else { 1586 assert(gotoParent != 0); 1587 $$} 1588 $$if (inFinalParseState) { 1589 auto tree = currentResult.get!($(node.elements[0].production.symbols[0].id + grammar.startNonterminalID)); 1590 result = tree; 1591 resultLocation = currentResultLocation; 1592 return 0; 1593 $$} else if (minMaxGotoParent[0] == SymbolID.max) { 1594 assert(false); 1595 $$} else { 1596 $$if (thisResultUsesUnion) { 1597 result = currentResult; 1598 $$} else { 1599 result = currentResult.get!($(grammar.nonterminalIDCode(thisResultSingleNonterminal))); 1600 $$} 1601 resultLocation = currentResultLocation; 1602 return gotoParent - 1; 1603 $$} 1604 $$} 1605 })); 1606 1607 code.decIndent.writeln("}"); 1608 } 1609 1610 const(char)[] createParserModule(LRGraph graph, string modulename, bool useRegexlookahead) 1611 { 1612 EBNFGrammar grammar = graph.grammar; 1613 1614 if (graph.states.length == 0) 1615 return "// no states\n"; 1616 1617 CodeWriter code; 1618 code.indentStr = " "; 1619 1620 string nonterminalNameCode(const Nonterminal n) 1621 { 1622 return n.name.escapeD; 1623 } 1624 1625 string allTokensCode = createAllTokensCode(grammar); 1626 string allNonterminalsCode = createAllNonterminalsCode(grammar); 1627 1628 immutable(NonterminalID)[][] delayedReduceCombinations; 1629 NonterminalID[immutable(NonterminalID[])] combinedReduceNonterminals; 1630 if (graph.globalOptions.delayedReduce == DelayedReduce.combined) 1631 { 1632 foreach (stateNr; 0 .. graph.states.length) 1633 { 1634 foreach (x; graph.states[stateNr].delayedReduceCombinations) 1635 delayedReduceCombinations.addOnce(x); 1636 } 1637 delayedReduceCombinations.sort(); 1638 foreach (i, nonterminals; delayedReduceCombinations) 1639 { 1640 combinedReduceNonterminals[nonterminals] = NonterminalID( 1641 cast(SymbolID)(i + grammar.nonterminals.vals.length)); 1642 string name = "Combined:" ~ nonterminals.map!(n => grammar.getSymbolName(n)).join("|"); 1643 allNonterminalsCode ~= text(" /* ", 1644 i + grammar.nonterminals.vals.length + grammar.startNonterminalID, ": */ immutable(Nonterminal)(", "\"", 1645 name.escapeD, "\", ", 1646 grammar.nonterminals.vals[nonterminals[0].id].flags.nonterminalFlagsToCode, ", [", 1647 grammar.nonterminals.vals[nonterminals[0].id].annotations.contains!"array" 1648 ? "\"Array\"" : "", "], ", 1649 " []", ")", ",\n"); 1650 } 1651 } 1652 1653 EBNFGrammar lookaheadGrammar; 1654 1655 RegexLookahead regexLookahead; 1656 1657 mixin(genCode("code", q{ 1658 // Generated with DParserGen. 1659 module $(modulename); 1660 import dparsergen.core.grammarinfo; 1661 import dparsergen.core.parseexception; 1662 import dparsergen.core.parsestackelem; 1663 import dparsergen.core.utils; 1664 import std.algorithm; 1665 import std.conv; 1666 import std.meta; 1667 import std.stdio; 1668 import std.traits; 1669 1670 enum SymbolID startTokenID = $(grammar.startTokenID); 1671 static assert(allTokens.length < SymbolID.max - startTokenID); 1672 enum SymbolID endTokenID = startTokenID + allTokens.length; 1673 1674 enum SymbolID startNonterminalID = $(grammar.startNonterminalID); 1675 static assert(allNonterminals.length < SymbolID.max - startNonterminalID); 1676 enum SymbolID endNonterminalID = startNonterminalID + allNonterminals.length; 1677 1678 enum ProductionID startProductionID = $(grammar.startProductionID); 1679 static assert(allProductions.length < ProductionID.max - startProductionID); 1680 enum ProductionID endProductionID = startProductionID + allProductions.length; 1681 1682 enum nonterminalIDFor(string name) = startNonterminalID + staticIndexOf!(name, 1683 $$foreach (i, n; grammar.nonterminals.vals) { 1684 "$(nonterminalNameCode(n))", 1685 $$} 1686 ); 1687 $$if (grammar.tags.vals.length) { 1688 enum Tag 1689 { 1690 none = 0, 1691 $$foreach (i, t; grammar.tags.vals) { 1692 $(t.name) = 1 << $(i), 1693 $$} 1694 } 1695 $$} 1696 1697 struct Parser(CreatorInstance, alias L _ 1698 ) 1699 { 1700 alias Lexer = L; 1701 alias Location = typeof(Lexer.init.front.currentLocation); 1702 alias LocationDiff = typeof(Location.init - Location.init); 1703 1704 CreatorInstance creator; 1705 Lexer* lexer; 1706 ParseException lastError; 1707 1708 template NonterminalType(size_t nonterminalID) 1709 if (nonterminalID >= startNonterminalID && nonterminalID < endNonterminalID) 1710 { 1711 alias NonterminalType = CreatorInstance.NonterminalType!nonterminalID; 1712 } 1713 1714 alias Token = typeof(lexer.front.content); 1715 1716 Location lastTokenEnd; 1717 1718 ParseStackElem!(Location, Token) popToken() 1719 { 1720 if (lexer.front.currentLocation - lastTokenEnd != LocationDiff.invalid) 1721 assert(lexer.front.currentLocation >= lastTokenEnd, 1722 text(lastTokenEnd, " ", lexer.front.currentLocation)); 1723 1724 lastTokenEnd = lexer.front.currentTokenEnd; 1725 auto tok = lexer.front.content; 1726 auto pos = lexer.front.currentLocation; 1727 lexer.popFront; 1728 1729 assert(lastTokenEnd >= pos); 1730 1731 if (!lexer.empty) 1732 { 1733 assert(lexer.front.currentLocation >= lastTokenEnd, 1734 text(lastTokenEnd, " ", lexer.front.currentLocation)); 1735 } 1736 1737 return ParseStackElem!(Location, Token)(pos, tok); 1738 } 1739 $$foreach (production; graph.grammar.productions) { 1740 $$generateReduceFunction(code, graph, reduceFunctionName(graph, production, "reduce"), [production.productionID], combinedReduceNonterminals); 1741 1742 $$} 1743 $$foreach (productionIDs; graph.delayedReduceProductionCombinations) { 1744 $$generateReduceFunction(code, graph, "combinedReduce" ~ productionIDs.map!(i=>text(i)).join("_"), productionIDs, combinedReduceNonterminals); 1745 1746 $$} 1747 $$foreach (i, n; graph.states) { 1748 $$if (!trivialState(n)) { 1749 $$createParseFunction(code, graph, i, n, useRegexlookahead, lookaheadGrammar, combinedReduceNonterminals, regexLookahead); 1750 $$} 1751 $$} 1752 $$if (regexLookahead !is null) { 1753 $$regexLookahead.genGraphs(); 1754 $$regexLookahead.genMatchFuncs(code); 1755 $$foreach (g; regexLookahead.graphs) { 1756 $$regexLookahead.genGraph(code, g); 1757 $$} 1758 $$} 1759 } 1760 1761 immutable allTokens = [ 1762 $(allTokensCode)]; 1763 1764 immutable allNonterminals = [ 1765 $(allNonterminalsCode)]; 1766 1767 immutable allProductions = [ 1768 $(createAllProductionsCode(grammar))]; 1769 1770 immutable GrammarInfo grammarInfo = immutable(GrammarInfo)( 1771 startTokenID, startNonterminalID, startProductionID, 1772 allTokens, allNonterminals, allProductions); 1773 1774 Creator.Type parse(Creator, alias Lexer, string startNonterminal = "$(graph.grammar.getSymbolName(graph.grammar.startNonterminals[0].nonterminal).escapeD)")(ref Lexer lexer, Creator creator) 1775 { 1776 alias Location = typeof(Lexer.init.front.currentLocation); 1777 auto parser = Parser!(Creator, Lexer)( 1778 creator, 1779 &lexer); 1780 ParameterTypeTuple!(__traits(getMember, parser, "parse" ~ startNonterminal))[0] parseResult; 1781 Location parseResultLocation; 1782 int gotoParent = __traits(getMember, parser, "parse" ~ startNonterminal)(parseResult, parseResultLocation); 1783 if (gotoParent < 0) 1784 { 1785 assert(parser.lastError !is null); 1786 throw parser.lastError; 1787 } 1788 else 1789 assert(parser.lastError is null); 1790 auto result = parseResult; 1791 creator.adjustStart(result, parseResultLocation); 1792 return result; 1793 } 1794 1795 Creator.Type parse(Creator, alias Lexer, string startNonterminal = "$(graph.grammar.getSymbolName(graph.grammar.startNonterminals[0].nonterminal).escapeD)")(string input, Creator creator, typeof(Lexer.init.front.currentLocation) startLocation = typeof(Lexer.init.front.currentLocation).init) 1796 { 1797 alias Location = typeof(Lexer.init.front.currentLocation); 1798 Lexer lexer = Lexer(input, startLocation); 1799 auto result = parse!(Creator, Lexer, startNonterminal)(lexer, creator); 1800 if (!lexer.empty) 1801 { 1802 throw new SingleParseException!Location("input left after parse", 1803 lexer.front.currentLocation, lexer.front.currentTokenEnd); 1804 } 1805 return result; 1806 } 1807 })); 1808 1809 return code.data; 1810 }