1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.generator.parser; 8 import dparsergen.core.utils; 9 import dparsergen.generator.globaloptions; 10 import dparsergen.generator.grammar; 11 import dparsergen.generator.ids; 12 import dparsergen.generator.production; 13 import std.algorithm; 14 import std.bitmanip; 15 import std.conv; 16 import std.exception; 17 import std.range; 18 import std.typecons; 19 20 void tokenSetToString(ref Appender!string app, const BitSet!TokenID tokens, EBNFGrammar grammar) 21 { 22 app.put(" {"); 23 bool first = true; 24 foreach (x; tokens.bitsSet) 25 { 26 if (!first) 27 app.put(", "); 28 app.put(grammar.getSymbolName(x)); 29 first = false; 30 } 31 app.put("}"); 32 } 33 34 string tokenSetToString(const BitSet!TokenID tokens, EBNFGrammar grammar) 35 { 36 Appender!string app; 37 tokenSetToString(app, tokens, grammar); 38 return app.data; 39 } 40 41 struct PrevElement 42 { 43 size_t state; 44 size_t elementNr; 45 } 46 47 struct ElementID 48 { 49 size_t state; 50 size_t elementNr; 51 } 52 53 struct LRElement 54 { 55 const(Production)* production; 56 size_t dotPos; 57 BitSet!TokenID lookahead; 58 bool ignoreInConflict; 59 size_t[] results; 60 bool isStartElement; 61 62 immutable(PrevElement)[] prevElements; 63 bool[immutable(PrevElement)] prevElementsMap; 64 immutable(PrevElement)[] nextElements; 65 66 void addPrevElement(immutable(PrevElement) prev) 67 { 68 if (prev in prevElementsMap) 69 return; 70 prevElementsMap[prev] = true; 71 prevElements ~= prev; 72 } 73 74 void addPrevElement(const(PrevElement)[] prevs) 75 { 76 foreach (prev; prevs) 77 addPrevElement(prev); 78 } 79 80 Constraint extraConstraint; 81 82 BitSet!TokenID realLookahead() const 83 { 84 BitSet!TokenID r = lookahead.dup; 85 foreach (l; production.negLookaheads) 86 r[l.toTokenID] = false; 87 if (production.negLookaheadsAnytoken) 88 { 89 bool hasEOF = r[TokenID(0)]; 90 r = BitSet!TokenID(r.length); 91 r[TokenID(0)] = hasEOF; 92 } 93 return r; 94 } 95 96 string toStringOnlyProductionBeforeDot(LRGraph graph) const 97 { 98 Appender!string app; 99 toStringOnlyProductionBeforeDot(graph, app); 100 return app.data; 101 } 102 103 void toStringOnlyProductionBeforeDot(LRGraph graph, ref Appender!string app) const 104 { 105 const grammar = graph.grammar; 106 foreach (i, s; production.symbols[0 .. dotPos]) 107 { 108 if (i == dotPos) 109 { 110 app.put("."); 111 assert(false); 112 } 113 else 114 app.put(" "); 115 116 grammar.symbolInstanceToString(app, s, false); 117 } 118 } 119 120 string toStringOnlyProductionAfterDot(LRGraph graph) const 121 { 122 Appender!string app; 123 toStringOnlyProductionAfterDot(graph, app); 124 return app.data; 125 } 126 127 void toStringOnlyProductionAfterDot(LRGraph graph, ref Appender!string app) const 128 { 129 const grammar = graph.grammar; 130 foreach (idiff, s; production.symbols[dotPos .. $]) 131 { 132 auto i = dotPos + idiff; 133 if (i > dotPos) 134 app.put(" "); 135 136 grammar.symbolInstanceToString(app, s, false); 137 } 138 139 foreach (l; production.negLookaheads) 140 { 141 app.put(" !"); 142 app.put(graph.grammar.getSymbolName(l)); 143 } 144 if (production.negLookaheadsAnytoken) 145 app.put(" !anytoken"); 146 } 147 148 string toStringOnlyProduction(LRGraph graph) const 149 { 150 const grammar = graph.grammar; 151 Appender!string app; 152 toStringOnlyProductionBeforeDot(graph, app); 153 app.put("."); 154 toStringOnlyProductionAfterDot(graph, app); 155 return app.data; 156 } 157 158 string toStringOnlyLookahead(LRGraph graph) const 159 { 160 Appender!string app; 161 toStringOnlyLookahead(graph, app); 162 return app.data; 163 } 164 165 void toStringOnlyLookahead(LRGraph graph, ref Appender!string app) const 166 { 167 auto grammar = graph.grammar; 168 169 tokenSetToString(app, realLookahead, grammar); 170 } 171 172 string toStringOnlyEnd(LRGraph graph) const 173 { 174 Appender!string app; 175 toStringOnlyEnd(graph, app); 176 return app.data; 177 } 178 179 void toStringOnlyEnd(LRGraph graph, ref Appender!string app) const 180 { 181 auto grammar = graph.grammar; 182 string r; 183 184 if (ignoreInConflict) 185 app.put(" ignoreInConflict"); 186 187 if (results.length) 188 { 189 app.put("results: "); 190 app.put(results.map!text.join(", ")); 191 } 192 if (isStartElement) 193 app.put(" startElement"); 194 } 195 196 string toString(LRGraph graph) const 197 { 198 auto grammar = graph.grammar; 199 Appender!string app; 200 app.put(grammar.getSymbolName(production.nonterminalID)); 201 app.put(" -> "); 202 app.put(toStringOnlyProduction(graph)); 203 toStringOnlyLookahead(graph, app); 204 toStringOnlyEnd(graph, app); 205 return app.data; 206 } 207 208 string toStringNoLookahead(LRGraph graph) const 209 { 210 auto grammar = graph.grammar; 211 string r = grammar.getSymbolName(production.nonterminalID); 212 r ~= " -> "; 213 r ~= toStringOnlyProduction(graph); 214 return r; 215 } 216 217 bool isNextValid(EBNFGrammar grammar) const 218 { 219 return dotPos < production.symbols.length && grammar.isValid(production.symbols[dotPos]); 220 } 221 222 bool isNextNonterminal(EBNFGrammar grammar) const 223 { 224 return dotPos < production.symbols.length && !production.symbols[dotPos].isToken; 225 } 226 227 bool isNextToken(EBNFGrammar grammar) const 228 { 229 return dotPos < production.symbols.length && production.symbols[dotPos].isToken; 230 } 231 232 bool isFinished(EBNFGrammar grammar) const 233 { 234 return dotPos == production.symbols.length; 235 } 236 237 immutable(SymbolInstance) next(EBNFGrammar grammar) const 238 { 239 assert(isNextValid(grammar)); 240 return production.symbols[dotPos]; 241 } 242 243 immutable(SymbolInstance) prev(EBNFGrammar grammar) const 244 { 245 assert(dotPos > 0); 246 return production.symbols[dotPos - 1]; 247 } 248 249 immutable(SymbolInstance)[] afterNext(EBNFGrammar grammar) const 250 { 251 return production.symbols[dotPos + 1 .. $]; 252 } 253 254 immutable(SymbolInstance)[] afterDot(EBNFGrammar grammar) const 255 { 256 return production.symbols[dotPos .. $]; 257 } 258 259 immutable(SymbolInstance)[] stackSymbols() const 260 { 261 return production.symbols[0 .. dotPos]; 262 } 263 264 LRElement dup() const 265 { 266 LRElement r = LRElement(production, dotPos, lookahead.dup, ignoreInConflict, 267 results.dup, isStartElement, prevElements, null, nextElements, extraConstraint); 268 foreach (prev; prevElements) 269 r.prevElementsMap[prev] = true; 270 return r; 271 } 272 273 LRElement advance() const 274 { 275 assert(dotPos < production.symbols.length); 276 LRElement r = LRElement(production, dotPos + 1, lookahead.dup, 277 ignoreInConflict, results.dup, isStartElement); 278 r.extraConstraint.tags = extraConstraint.tags; 279 return r; 280 } 281 282 size_t gotoParent() const 283 { 284 size_t r = stackSymbols.length; 285 if (production.productionID == ProductionID.max) 286 r++; 287 if (r == 0) 288 return size_t.max; 289 return r - 1; 290 } 291 292 immutable(NonterminalWithConstraint)[] nextNonterminals(EBNFGrammar grammar, bool directUnwrap) const 293 { 294 if (isNextNonterminal(grammar)) 295 { 296 auto n = next(grammar); 297 298 auto m2 = grammar.nextNonterminalWithConstraint(extraConstraint, n, dotPos == 0); 299 300 if (directUnwrap && !next(grammar).annotations.contains!"excludeDirectUnwrap") 301 { 302 return grammar.directUnwrapClosure(n.toNonterminalID, 303 m2.constraint.negLookaheads, m2.constraint.tags); 304 } 305 else 306 return [ 307 NonterminalWithConstraint(n.toNonterminalID, m2.constraint) 308 ]; 309 } 310 else 311 return []; 312 } 313 } 314 315 struct LRElementSet 316 { 317 LRElement[] elements; 318 bool simpleLLState; 319 NonterminalID onlyNonterminal = NonterminalID(SymbolID.max); 320 NonterminalID[] descentNonterminals; 321 alias elements this; 322 size_t stackSize() const 323 { 324 size_t r; 325 foreach (e; elements) 326 { 327 if (e.dotPos > r) 328 r = e.dotPos; 329 } 330 return r; 331 } 332 } 333 334 void getNextLookahead(EBNFGrammar grammar, immutable(SymbolInstance)[] l, 335 BitSet!TokenID realLookahead, ref bool lookaheadNeedsAfterEnd, 336 ref BitSet!TokenID newLookahead) 337 { 338 if (l.length > 0) 339 { 340 BitSet!TokenID nextLookahead = grammar.firstSet(l); 341 newLookahead.length = nextLookahead.length; 342 newLookahead |= nextLookahead; 343 if (newLookahead[grammar.tokens.getID("$end")]) 344 { 345 lookaheadNeedsAfterEnd = true; 346 newLookahead[grammar.tokens.getID("$end")] = false; 347 newLookahead |= realLookahead; 348 } 349 } 350 else 351 { 352 lookaheadNeedsAfterEnd = true; 353 newLookahead = realLookahead; 354 } 355 } 356 357 LRElementSet elementSetClosure(LRGraph graph, const LRElement[] startElements, 358 bool transitive = true) 359 in 360 { 361 foreach (ref e; startElements) 362 { 363 assert(e.extraConstraint == Constraint.init); 364 } 365 } 366 do 367 { 368 auto grammar = graph.grammar; 369 Appender!(LRElement[]) result; 370 NonterminalID[] descentNonterminals; 371 const(Symbol)[][] negLookaheads; 372 size_t[ProductionID] newElementsForProductions; 373 foreach (e; startElements) 374 { 375 if (e.dotPos == 0) 376 newElementsForProductions[e.production.productionID] = result.data.length; 377 result.put(e.dup); 378 } 379 380 size_t findAlreadyUsed(const(Production*) production) 381 { 382 if (production.productionID in newElementsForProductions) 383 return newElementsForProductions[production.productionID]; 384 385 return size_t.max; 386 } 387 388 bool changed = true; 389 390 for (size_t i; i < (transitive ? result.data.length : startElements.length); i++) 391 { 392 foreach (n; result.data[i].nextNonterminals(graph.grammar, 393 graph.globalOptions.directUnwrap)) 394 { 395 bool hasEnableToken; 396 foreach (a; graph.grammar.nonterminals[n.nonterminalID] 397 .annotations.otherAnnotations) 398 if (a.startsWith("enableToken(")) 399 hasEnableToken = true; 400 401 if (!graph.globalOptions.glrParser 402 && (graph.grammar.nonterminals[n.nonterminalID].annotations.contains!"backtrack"() 403 || graph.grammar.nonterminals[n.nonterminalID].annotations.contains!"lookahead"() 404 || hasEnableToken 405 || result.data[i].next(grammar) 406 .annotations.contains!"lookahead"() || n.hasLookaheadAnnotation) 407 && (!result.data[i].isStartElement 408 || n.nonterminalID != result.data[i].production.nonterminalID)) 409 { 410 descentNonterminals.addOnce(n.nonterminalID); 411 412 if (!graph.startNonterminalsSet[n.nonterminalID.id]) 413 { 414 graph.startNonterminalsSet[n.nonterminalID.id] = true; 415 graph.startNonterminals ~= StartNonterminal(n.nonterminalID); 416 } 417 } 418 else 419 { 420 foreach (prodNr, p; graph.grammar.getProductions(n.nonterminalID)) 421 { 422 if (graph.globalOptions.directUnwrap 423 && grammar.isDirectUnwrapProduction(*p)) 424 continue; 425 if (!graph.grammar.isProductionAllowed(n, p)) 426 continue; 427 428 size_t elementNr = findAlreadyUsed(p); 429 if (elementNr == size_t.max) 430 { 431 elementNr = result.data.length; 432 auto newElem = LRElement(p, 0); 433 newElementsForProductions[p.productionID] = result.data.length; 434 result.put(newElem); 435 } 436 } 437 } 438 } 439 } 440 441 size_t[] mapping; 442 mapping.length = result.data.length; 443 foreach (i; 0 .. result.data.length) 444 mapping[i] = i; 445 446 static int specialElementOrder(const LRElement* e) 447 { 448 if (e.isStartElement && e.production.symbols.length == 1) 449 return 1; 450 if (e.isStartElement) 451 return 2; 452 return 100; 453 } 454 455 mapping.sort!((ia, ib) { 456 auto a = &result.data[ia]; 457 auto b = &result.data[ib]; 458 459 int soA = specialElementOrder(a); 460 int soB = specialElementOrder(b); 461 462 if (soA < soB) 463 return true; 464 if (soA > soB) 465 return false; 466 467 if (a.dotPos > b.dotPos) 468 return true; 469 if (a.dotPos < b.dotPos) 470 return false; 471 472 auto pA = a.production; 473 auto pB = b.production; 474 475 if (pA.nonterminalID.id < pB.nonterminalID.id) 476 return true; 477 if (pA.nonterminalID.id > pB.nonterminalID.id) 478 return false; 479 480 foreach (i; 0 .. min(pA.symbols.length, pB.symbols.length)) 481 { 482 if (pA.symbols[i].isToken < pB.symbols[i].isToken) 483 return true; 484 if (pA.symbols[i].isToken > pB.symbols[i].isToken) 485 return false; 486 487 if (pA.symbols[i].id < pB.symbols[i].id) 488 return true; 489 if (pA.symbols[i].id > pB.symbols[i].id) 490 return false; 491 } 492 493 if (pA.symbols.length < pB.symbols.length) 494 return true; 495 if (pA.symbols.length > pB.symbols.length) 496 return false; 497 498 return false; 499 }); 500 501 size_t[] mappingReverse; 502 mappingReverse.length = result.data.length; 503 foreach (i; 0 .. result.data.length) 504 mappingReverse[mapping[i]] = i; 505 506 Appender!(LRElement[]) result2; 507 result2.reserve(result.data.length); 508 foreach (i; 0 .. result.data.length) 509 { 510 result2.put(result.data[mapping[i]]); 511 immutable(PrevElement)[] prevElements; 512 foreach (PrevElement x; result2.data[i].prevElements) 513 { 514 if (x.state == size_t.max) 515 x.elementNr = mappingReverse[x.elementNr]; 516 prevElements ~= x; 517 } 518 result2.data[i].prevElements = prevElements; 519 result2.data[i].prevElementsMap = null; 520 foreach (prev; prevElements) 521 result2.data[i].prevElementsMap[prev] = true; 522 } 523 return LRElementSet(result2.data, false, NonterminalID(SymbolID.max), descentNonterminals); 524 } 525 526 NonterminalID getOnlyNonterminal(LRGraph graph, const LRElement[] elements) 527 { 528 auto grammar = graph.grammar; 529 NonterminalID r = NonterminalID(SymbolID.max); 530 foreach (e; elements) 531 { 532 if (!e.isNextValid(grammar)) 533 return NonterminalID(SymbolID.max); 534 535 if (e.isStartElement) 536 continue; 537 538 const SymbolInstance next = e.next(grammar); 539 540 if (next.isToken) 541 return NonterminalID(SymbolID.max); 542 543 if (next.negLookaheads.length > 0) 544 return NonterminalID(SymbolID.max); 545 546 if (r.id != SymbolID.max && next.id != r.id) 547 return NonterminalID(SymbolID.max); 548 549 if (grammar.canBeEmpty(next)) 550 return NonterminalID(SymbolID.max); 551 552 if (graph.globalOptions.directUnwrap) 553 { 554 auto c = grammar.directUnwrapClosure(grammar.nextNonterminalWithConstraint(e.extraConstraint, 555 next, e.dotPos == 0)); 556 if (c.length != 1 || c[0].nonterminalID != next.toNonterminalID) 557 return NonterminalID(SymbolID.max); 558 } 559 560 r = next.toNonterminalID; 561 } 562 return r; 563 } 564 565 LRElement[] elementSetGoto(LRGraph graph, size_t prevState, 566 const LRElement[] elements, Symbol next, string subToken) 567 { 568 assert(graph.grammar.isValid(next)); 569 if (!next.isToken) 570 assert(subToken == ""); 571 Appender!(LRElement[]) result; 572 573 foreach (i, e; elements) 574 { 575 if (!e.isNextValid(graph.grammar)) 576 continue; 577 578 bool isGood; 579 580 SymbolInstance nextP = e.next(graph.grammar); 581 582 if (nextP.isToken != next.isToken) 583 continue; 584 585 if (next.isToken) 586 { 587 if (nextP == next && nextP.subToken == subToken) 588 isGood = true; 589 } 590 else 591 { 592 if (graph.globalOptions.directUnwrap) 593 { 594 if (nextP == next && graph.grammar.directUnwrapClosureHasSelf( 595 graph.grammar.nextNonterminalWithConstraint(e.extraConstraint, 596 nextP, e.dotPos == 0))) 597 isGood = true; 598 if (!nextP.annotations.contains!"excludeDirectUnwrap") 599 { 600 if (next.toNonterminalID in graph.grammar.directUnwrapClosureMap( 601 graph.grammar.nextNonterminalWithConstraint(e.extraConstraint, 602 nextP, e.dotPos == 0))) 603 { 604 isGood = true; 605 } 606 } 607 } 608 else 609 { 610 if (nextP == next) 611 isGood = true; 612 } 613 } 614 615 if (isGood) 616 { 617 auto newElem = LRElement(e.production, e.dotPos + 1, e.lookahead.dup); 618 newElem.isStartElement = e.isStartElement; 619 newElem.addPrevElement(PrevElement(prevState, i)); 620 result.put(newElem); //const(LR0Element)(e.productionID, e.dotPos + 1); 621 } 622 } 623 //assert(result.data.length); 624 625 return result.data; 626 } 627 628 LRElementSet gotoSet(LRGraph graph, size_t prevState, const LRElement[] elements, 629 const NonterminalID[] next) 630 { 631 Appender!(LRElement[]) result; 632 633 foreach (i, e; elements) 634 { 635 if (!e.isNextValid(graph.grammar)) 636 continue; 637 638 SymbolInstance nextP = e.next(graph.grammar); 639 640 if (nextP.isToken) 641 continue; 642 643 bool isGood; 644 645 if (next.canFind(nextP)) 646 isGood = true; 647 648 if (graph.globalOptions.directUnwrap && !nextP.annotations.contains!"excludeDirectUnwrap") 649 { 650 foreach (n; next) 651 if (n in graph.grammar.directUnwrapClosureMap( 652 graph.grammar.nextNonterminalWithConstraint(e.extraConstraint, 653 nextP, e.dotPos == 0))) 654 { 655 isGood = true; 656 } 657 } 658 659 if (isGood) 660 { 661 auto newElem = LRElement(e.production, e.dotPos + 1, e.lookahead.dup); 662 newElem.isStartElement = e.isStartElement; 663 newElem.addPrevElement(PrevElement(prevState, i)); 664 result.put(newElem); //const(LR0Element)(e.productionID, e.dotPos + 1); 665 } 666 } 667 668 return completeLRElementSet(graph, LRElementSet(result.data)); 669 } 670 671 LRElementSet completeLRElementSet(LRGraph graph, LRElementSet elementSet) 672 { 673 NonterminalID onlyNonterminal = getOnlyNonterminal(graph, elementSet.elements); 674 LRElementSet r = elementSet; 675 if (graph.globalOptions.optimizationDescent && onlyNonterminal.id != SymbolID.max) 676 { 677 if (!graph.startNonterminalsSet[onlyNonterminal.id]) 678 { 679 graph.startNonterminalsSet[onlyNonterminal.id] = true; 680 graph.startNonterminals ~= StartNonterminal(onlyNonterminal); 681 } 682 LRElement[] c; 683 foreach (e; r.elements) 684 { 685 LRElement e2 = e.dup; 686 c ~= e2; 687 } 688 r.descentNonterminals.addOnce(onlyNonterminal); 689 r.elements = c; 690 r.simpleLLState = true; 691 r.onlyNonterminal = onlyNonterminal; 692 } 693 else 694 { 695 r = elementSetClosure(graph, r.elements, true); 696 } 697 return r; 698 } 699 700 LRElement[] firstElement(LRGraph graph, SymbolID nonterminalID, bool needsEmptyProduction) 701 { 702 auto grammar = graph.grammar; 703 Production* production = new Production(); 704 705 production.isVirtual = true; 706 production.nonterminalID = NonterminalID(nonterminalID); 707 production.symbols = [SymbolInstance(Symbol(false, nonterminalID))]; 708 production.productionID = ProductionID.max; 709 710 LRElement[] result = [LRElement(production, 0)]; 711 result[0].isStartElement = true; 712 713 if (needsEmptyProduction) 714 { 715 production = new Production(); 716 717 production.isVirtual = true; 718 production.nonterminalID = NonterminalID(nonterminalID); 719 production.symbols = []; 720 production.productionID = ProductionID.max; 721 722 result ~= [LRElement(production, 0)]; 723 result[$ - 1].isStartElement = true; 724 } 725 726 return result; 727 } 728 729 enum LRGraphNodeType 730 { 731 unknown, 732 lr, 733 descent, 734 backtrack, 735 lookahead 736 } 737 738 struct LRGraphEdge 739 { 740 Symbol symbol; 741 string subToken; 742 size_t next; 743 Tuple!(Symbol, bool)[] checkDisallowedSymbols; 744 immutable(NonterminalID)[] delayedNonterminals; 745 } 746 747 class LRGraphNode 748 { 749 LRElementSet elements; 750 LRGraphEdge[] edges; 751 LRGraphEdge[] reverseEdges; 752 const(Symbol)[] shortestSymbolPath; 753 immutable(size_t)[] backtrackStates; 754 LRGraphNodeType type; 755 bool isStartNode; 756 immutable(NonterminalID[])[] stackDelayedReduce; 757 immutable(NonterminalID[])[] delayedReduceCombinations; 758 immutable(NonterminalID[])[] delayedReduceOutCombinations; 759 size_t delayedReduceCombinationsDone; 760 761 this() 762 { 763 } 764 765 this(LRElementSet elements) 766 { 767 this.elements = elements; 768 } 769 770 size_t[2] minMaxGotoParent() const 771 { 772 size_t min = size_t.max; 773 size_t max = 0; 774 foreach (e; elements) 775 { 776 size_t x = e.gotoParent; 777 if (x != size_t.max && x < min) 778 min = x; 779 if (x != size_t.max && x > max) 780 max = x; 781 } 782 return [min, max]; 783 } 784 785 bool[] stackSymbolsNotDropped() const 786 { 787 bool[] r; 788 r.length = stackSymbols.length; 789 foreach (e; elements) 790 { 791 foreach (i, s; e.stackSymbols) 792 { 793 if (!s.dropNode) 794 r[$ - e.stackSymbols.length + i] = true; 795 } 796 } 797 return r; 798 } 799 800 bool[] stackSymbolsStartOfProduction() const 801 { 802 bool[] r; 803 r.length = stackSymbols.length; 804 foreach (e; elements) 805 { 806 if (e.stackSymbols.length) 807 r[$ - e.stackSymbols.length] = true; 808 } 809 return r; 810 } 811 812 size_t stackSize() const 813 { 814 size_t r; 815 foreach (e; elements) 816 { 817 if (e.dotPos > r) 818 r = e.dotPos; 819 } 820 return r; 821 } 822 823 immutable(Symbol)[] stackSymbols() const 824 { 825 Symbol[] r; 826 bool[] hasSymbol; 827 r.length = stackSize(); 828 hasSymbol.length = stackSize(); 829 foreach (e; elements) 830 { 831 auto s = e.stackSymbols; 832 foreach (i; 0 .. s.length) 833 { 834 size_t p = i + r.length - s.length; 835 if (hasSymbol[p]) 836 { 837 if (s[i] != r[p]) 838 r[p] = Symbol(false, SymbolID.max); 839 } 840 else 841 r[p] = s[i]; 842 hasSymbol[p] = true; 843 } 844 } 845 return r.idup; 846 } 847 848 bool hasSetStackSymbols() const 849 { 850 immutable(Symbol)[] r; 851 foreach (e; elements) 852 { 853 auto s = e.stackSymbols; 854 foreach (i; 0 .. min(s.length, r.length)) 855 if (r[$ - 1 - i] != s[$ - 1 - i].symbol) 856 return true; 857 858 if (s.length > r.length) 859 { 860 r = []; 861 foreach (x; s) 862 r ~= x.symbol; 863 } 864 } 865 return false; 866 } 867 868 bool isFinalParseState() const 869 { 870 return elements.length >= 1 && elements[0].isStartElement && elements[0].dotPos == 0; 871 } 872 873 bool isArrayOnStack(EBNFGrammar grammar, size_t i) const 874 { 875 size_t stackSize = this.stackSize(); 876 foreach (e; elements) 877 { 878 auto s = e.stackSymbols; 879 if (i < stackSize - s.length) 880 continue; 881 882 size_t k = i + s.length - stackSize; 883 if (!s[k].isToken 884 && (grammar.nonterminals[s[k].toNonterminalID].flags & NonterminalFlags.array) != 0) 885 return true; 886 } 887 return false; 888 } 889 890 bool needsTagsOnStack(EBNFGrammar grammar, size_t i) const 891 { 892 size_t stackSize = this.stackSize(); 893 foreach (e; elements) 894 { 895 auto s = e.stackSymbols; 896 if (i < stackSize - s.length) 897 continue; 898 899 size_t k = i + s.length - stackSize; 900 if (s[k].isToken) 901 continue; 902 auto possibleTags = grammar.nonterminals[s[k].toNonterminalID].possibleTags; 903 if (s[k].annotations.contains!"inheritAnyTag" && possibleTags.length) 904 return true; 905 if (s[k].unwrapProduction && possibleTags.length) 906 return true; 907 if (e.isStartElement && possibleTags.length) 908 return true; 909 foreach (tagUsage; s[k].tags) 910 { 911 if (possibleTags.canFind(tagUsage.tag)) 912 return true; 913 } 914 } 915 return false; 916 } 917 918 NonterminalID[] allNonterminals() const 919 { 920 BitSet!NonterminalID n; 921 foreach (e; elements) 922 { 923 n[e.production.nonterminalID] = true; 924 } 925 foreach (x; elements.descentNonterminals) 926 n[x] = true; 927 return n.bitsSet.array; 928 } 929 930 NonterminalID[] allNonterminalsOut(EBNFGrammar grammar) const 931 { 932 BitSet!NonterminalID n; 933 foreach (e; elements) 934 { 935 if (e.dotPos > 0 || e.isStartElement) 936 { 937 NonterminalID nonterminalID = nonterminalOutForProduction(e.production); 938 if (n.length <= nonterminalID.id) 939 n.length = nonterminalID.id + 1; 940 n[nonterminalID] = true; 941 } 942 } 943 return n.bitsSet.array; 944 } 945 946 NonterminalID[] directUnwrapNonterminalsOnStack(EBNFGrammar grammar, size_t pos) const 947 { 948 BitSet!NonterminalID n; 949 foreach (e; elements) 950 { 951 auto s = e.stackSymbols; 952 if (s.length < pos) 953 continue; 954 if (s[$ - pos].isToken) 955 continue; 956 BitSet!NonterminalID tmp; 957 tmp.length = grammar.nonterminals.vals.length; 958 if (s[$ - pos].annotations.contains!"excludeDirectUnwrap") 959 tmp[s[$ - pos].toNonterminalID] = true; 960 else 961 { 962 foreach (m2; grammar.directUnwrapClosure(grammar.nextNonterminalWithConstraint(e.extraConstraint, 963 s[$ - pos], e.dotPos == 0))) 964 tmp[m2.nonterminalID] = true; 965 } 966 if (n.length == 0) 967 n = tmp; 968 else 969 n &= tmp; 970 } 971 return n.bitsSet.array; 972 } 973 974 bool hasTags(LRGraph graph) const 975 { 976 foreach (e; elements) 977 if (graph.grammar.nonterminals[e.production.nonterminalID].possibleTags.length) 978 return true; 979 foreach (s; backtrackStates) 980 if (graph.states[s].hasTags(graph)) 981 return true; 982 return false; 983 } 984 } 985 986 struct NextLookaheadCacheKey 987 { 988 size_t stateNr; 989 size_t elementNr; 990 TokenID currentToken; 991 } 992 993 class LRGraph 994 { 995 EBNFGrammar grammar; 996 this(EBNFGrammar grammar) 997 { 998 this.grammar = grammar; 999 } 1000 1001 LRGraphNode[] states; 1002 const(StartNonterminal)[] startNonterminals; 1003 BitArray startNonterminalsSet; 1004 size_t[] nonterminalToState; 1005 1006 size_t[const(LRGraphNodeKey)] statesByKey2; 1007 size_t[const(LRGraphNodeKey)] closureCache; 1008 1009 GlobalOptions globalOptions; 1010 1011 size_t[][Tuple!(size_t, size_t)] statesWithProduction; 1012 1013 immutable(ProductionID)[][] delayedReduceProductionCombinations; 1014 1015 BitSet!TokenID[NextLookaheadCacheKey] nextLookaheadCache; 1016 } 1017 1018 struct LRGraphNodeKey 1019 { 1020 LRGraph graph; 1021 const LRElement[] elements; 1022 1023 size_t toHash() const pure nothrow 1024 { 1025 size_t r = 0; 1026 enum prime = 17; 1027 1028 r = (r * prime) ^ elements.length; 1029 1030 foreach (k; 0 .. elements.length) 1031 { 1032 auto e = &elements[k]; 1033 r = (r * prime) ^ e.dotPos; 1034 r = (r * prime) ^ e.production.nonterminalID.id; 1035 r = (r * prime) ^ e.production.symbols.length; 1036 } 1037 return r; 1038 } 1039 1040 bool opEquals(ref const LRGraphNodeKey s) const pure nothrow 1041 { 1042 if (s.elements.length != elements.length) 1043 return false; 1044 foreach (k; 0 .. elements.length) 1045 { 1046 auto e1 = &elements[k]; 1047 auto e2 = &s.elements[k]; 1048 if (e1.dotPos != e2.dotPos) 1049 return false; 1050 if (e1.production !is e2.production) 1051 { 1052 if (e1.production.nonterminalID != e2.production.nonterminalID) 1053 return false; 1054 if (e1.production.symbols.length != e2.production.symbols.length) 1055 return false; 1056 foreach (i; 0 .. e1.production.symbols.length) 1057 { 1058 auto s1 = e1.production.symbols[i]; 1059 auto s2 = e2.production.symbols[i]; 1060 if (s1.isToken != s2.isToken) 1061 return false; 1062 if (s1.tags != s2.tags) 1063 return false; 1064 if (!(graph.globalOptions.mergeSimilarStates && s1.isToken && i < e1.dotPos)) 1065 { 1066 if (s1.id != s2.id) 1067 return false; 1068 if (s1.subToken != s2.subToken) 1069 return false; 1070 } 1071 } 1072 } 1073 } 1074 1075 return true; 1076 } 1077 } 1078 1079 NonterminalID nonterminalOutForProduction(const(Production)* p) 1080 { 1081 return p.nonterminalID; 1082 } 1083 1084 bool similarElements(const LRElement e1, const LRElement e2) 1085 { 1086 if (e1.dotPos != e2.dotPos) 1087 return false; 1088 if (e1.production.nonterminalID != e2.production.nonterminalID) 1089 return false; 1090 if (e1.production.symbols.length != e2.production.symbols.length) 1091 return false; 1092 foreach (i; 0 .. e1.production.symbols.length) 1093 { 1094 if (e1.production.symbols[i].isToken != e2.production.symbols[i].isToken) 1095 return false; 1096 if (e1.production.symbols[i].isToken) 1097 { 1098 if (i >= e1.dotPos && e1.production.symbols[i] != e2.production.symbols[i]) 1099 return false; 1100 } 1101 else 1102 { 1103 if (e1.production.symbols[i] != e2.production.symbols[i]) 1104 return false; 1105 } 1106 } 1107 return true; 1108 } 1109 1110 sizediff_t countUntilGraph(bool checkLookahead)(LRGraph graph, const LRElement[] s) 1111 { 1112 sizediff_t r = -1; 1113 auto x = LRGraphNodeKey(graph, s) in graph.statesByKey2; 1114 if (x) 1115 r = *x; 1116 return r; 1117 } 1118 1119 private size_t makeLRGraphRec(LRGraph graph, LRElementSet s, const(Symbol)[] path, 1120 LRGraph origGraph, size_t depth = 0) 1121 { 1122 if (s.length == 0) 1123 return size_t.max; 1124 auto r = countUntilGraph!false(graph, s); 1125 1126 if (r < 0) 1127 { 1128 if (origGraph !is null) 1129 { 1130 r = countUntilGraph!false(origGraph, s); 1131 if (r >= 0) 1132 { 1133 if (r >= graph.states.length) 1134 graph.states.length = r + 1; 1135 graph.states[r] = new LRGraphNode(s); 1136 graph.statesByKey2[LRGraphNodeKey(graph, s.elements)] = r; 1137 } 1138 } 1139 if (r < 0) 1140 { 1141 r = graph.states.length; 1142 graph.states = graph.states ~ new LRGraphNode(s); 1143 graph.statesByKey2[LRGraphNodeKey(graph, s.elements)] = r; 1144 } 1145 graph.states[r].shortestSymbolPath = path; 1146 1147 bool[Tuple!(Symbol, string)] done; 1148 Tuple!(Symbol, string)[] todoList; 1149 foreach (e; s) 1150 { 1151 if (!e.isNextValid(graph.grammar)) 1152 continue; 1153 auto symbol = e.next(graph.grammar); 1154 auto symbolWithSubToken = tuple!(Symbol, string)(symbol.symbol, symbol.subToken); 1155 if (symbolWithSubToken !in done 1156 && (e.production.nonterminalID != s.onlyNonterminal || e.dotPos > 0)) 1157 { 1158 if (symbol.isToken) 1159 { 1160 done[symbolWithSubToken] = true; 1161 todoList ~= symbolWithSubToken; 1162 } 1163 else 1164 { 1165 foreach (n; e.nextNonterminals(graph.grammar, graph.globalOptions.directUnwrap)) 1166 { 1167 symbolWithSubToken = tuple!(Symbol, string)(n.nonterminalID, ""); 1168 if (symbolWithSubToken in done) 1169 continue; 1170 done[symbolWithSubToken] = true; 1171 todoList ~= symbolWithSubToken; 1172 } 1173 } 1174 } 1175 } 1176 sort!((a, b) => ((a[0].id == b[0].id) ? (a[1] < b[1]) : (a[0].id < b[0].id)))(todoList); 1177 foreach (x; todoList) 1178 { 1179 auto newElemAll = elementSetGoto(graph, r, s, x[0], x[1]); 1180 1181 immutable(Symbol)[] disallowableSymbols; 1182 foreach (i, e; newElemAll) 1183 { 1184 disallowableSymbols.addOnce(e.prev(graph.grammar).negLookaheads); 1185 } 1186 1187 assert(disallowableSymbols.length < 30); 1188 foreach (uint bits; 0 .. (1 << disallowableSymbols.length)) 1189 { 1190 Tuple!(Symbol, bool)[] checkDisallowedSymbols; 1191 foreach (j, symbol; disallowableSymbols) 1192 { 1193 if ((bits & (1 << j))) 1194 { 1195 checkDisallowedSymbols ~= tuple!(Symbol, bool)(symbol, true); 1196 } 1197 else 1198 { 1199 checkDisallowedSymbols ~= tuple!(Symbol, bool)(symbol, false); 1200 } 1201 } 1202 LRElement[] newElemFiltered; 1203 foreach (i, e; newElemAll) 1204 { 1205 bool filtered; 1206 foreach (j, symbol; disallowableSymbols) 1207 { 1208 if ((bits & (1 << j)) && e.prev(graph.grammar) 1209 .negLookaheads.canFind(symbol)) 1210 filtered = true; 1211 } 1212 1213 if (!filtered) 1214 { 1215 newElemFiltered ~= newElemAll[i]; 1216 } 1217 } 1218 1219 if (newElemFiltered.length == 0) 1220 continue; 1221 1222 auto newElem = completeLRElementSet(graph, LRElementSet(newElemFiltered)); 1223 size_t nid; 1224 auto nidp = LRGraphNodeKey(graph, newElem.elements) in graph.closureCache; 1225 if (nidp) 1226 { 1227 nid = *nidp; 1228 size_t i; 1229 assert(nid < graph.states.length, text(nid, " ", graph.states.length)); 1230 auto node = graph.states[nid]; 1231 foreach (ref e; newElem.elements) 1232 { 1233 while (true) 1234 { 1235 if ((node.elements[i].production is e.production 1236 && node.elements[i].dotPos == e.dotPos) 1237 || (graph.globalOptions.mergeSimilarStates 1238 && similarElements(node.elements[i], e))) 1239 { 1240 break; 1241 } 1242 i++; 1243 } 1244 node.elements[i].addPrevElement(e.prevElements); 1245 i++; 1246 } 1247 } 1248 else 1249 { 1250 nid = makeLRGraphRec(graph, newElem, path ~ x[0], origGraph, depth + 1); 1251 if (nid != size_t.max) 1252 graph.closureCache[LRGraphNodeKey(graph, newElem.elements)] = nid; 1253 } 1254 graph.states[r].edges ~= LRGraphEdge(x[0], x[1], nid, checkDisallowedSymbols); 1255 if (nid != size_t.max) 1256 graph.states[nid].reverseEdges ~= LRGraphEdge(x[0], x[1], r, 1257 checkDisallowedSymbols); 1258 } 1259 } 1260 } 1261 else 1262 { 1263 if (path.length < graph.states[r].shortestSymbolPath.length) 1264 graph.states[r].shortestSymbolPath = path; 1265 1266 size_t[] mapping; 1267 mapping.length = s.elements.length; 1268 foreach (i; 0 .. s.elements.length) 1269 { 1270 mapping[i] = size_t.max; 1271 foreach (k; 0 .. graph.states[r].elements.length) 1272 { 1273 if ((s.elements[i].production is graph.states[r].elements[k].production 1274 && s.elements[i].dotPos == graph.states[r].elements[k].dotPos) 1275 || (graph.globalOptions.mergeSimilarStates 1276 && similarElements(s.elements[i], graph.states[r].elements[k]))) 1277 { 1278 assert(mapping[i] == size_t.max); 1279 mapping[i] = k; 1280 } 1281 } 1282 assert(mapping[i] != size_t.max); 1283 } 1284 foreach (i; 0 .. s.elements.length) 1285 { 1286 foreach (PrevElement x; s.elements[i].prevElements) 1287 { 1288 if (x.state == size_t.max) 1289 x.elementNr = mapping[x.elementNr]; 1290 graph.states[r].elements[mapping[i]].addPrevElement(x); 1291 } 1292 } 1293 } 1294 1295 return r; 1296 } 1297 1298 void calculateLookahead(LRGraph graph) 1299 { 1300 auto grammar = graph.grammar; 1301 1302 foreach (j, node; graph.states) 1303 { 1304 foreach (k, ref e; node.elements) 1305 { 1306 e.lookahead = BitSet!TokenID(grammar.tokens.vals.length); 1307 if (e.isStartElement) 1308 e.lookahead[grammar.tokens.getID("$end")] = true; 1309 } 1310 } 1311 1312 static struct NonterminalLookaheadInfo 1313 { 1314 BitSet!TokenID lookahead; 1315 immutable(PrevElement)[] prevElements; 1316 bool[immutable(PrevElement)] prevElementsMap; 1317 1318 void addPrevElement(immutable(PrevElement) prev) 1319 { 1320 if (prev in prevElementsMap) 1321 return; 1322 prevElementsMap[prev] = true; 1323 prevElements ~= prev; 1324 } 1325 } 1326 1327 foreach (j, node; graph.states) 1328 { 1329 NonterminalLookaheadInfo[ProductionID] nonterminalLookaheadInfos; 1330 foreach (k, ref e; node.elements) 1331 { 1332 foreach (nextNonterminal; e.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 1333 { 1334 bool lookaheadNeedsAfterEnd; 1335 BitSet!TokenID newLookahead; 1336 1337 auto l = e.afterNext(grammar); 1338 getNextLookahead(grammar, l, e.realLookahead, 1339 lookaheadNeedsAfterEnd, newLookahead); 1340 1341 if (e.next(graph.grammar).annotations.contains!"eager") 1342 { 1343 newLookahead = BitSet!TokenID(newLookahead.length); 1344 newLookahead[graph.grammar.tokens.getID("$end")] = true; 1345 } 1346 1347 if (node.elements.descentNonterminals.canFind(nextNonterminal.nonterminalID) 1348 && !e.next(grammar).annotations.contains!"eager") 1349 { 1350 auto state2 = graph.nonterminalToState[nextNonterminal.nonterminalID.id]; 1351 foreach (k2, ref e2; graph.states[state2].elements) 1352 { 1353 if (e2.dotPos == 0 && e2.isStartElement) 1354 { 1355 e2.lookahead |= newLookahead; 1356 if (lookaheadNeedsAfterEnd) 1357 e.nextElements.addOnce(PrevElement(state2, k2)); 1358 } 1359 } 1360 } 1361 1362 foreach (p; graph.grammar.getProductions(nextNonterminal.nonterminalID)) 1363 { 1364 if (graph.globalOptions.directUnwrap && grammar.isDirectUnwrapProduction(*p)) 1365 continue; 1366 if (!graph.grammar.isProductionAllowed(nextNonterminal, p)) 1367 continue; 1368 1369 NonterminalLookaheadInfo* nonterminalLookaheadInfo = p 1370 .productionID in nonterminalLookaheadInfos; 1371 1372 if (nonterminalLookaheadInfo !is null) 1373 { 1374 nonterminalLookaheadInfo.lookahead |= newLookahead; 1375 if (lookaheadNeedsAfterEnd && !e.next(graph.grammar) 1376 .annotations.contains!"eager") 1377 nonterminalLookaheadInfo.addPrevElement(PrevElement(size_t.max, k)); 1378 } 1379 else 1380 { 1381 nonterminalLookaheadInfos[p.productionID] = NonterminalLookaheadInfo( 1382 newLookahead.dup); 1383 nonterminalLookaheadInfo = p.productionID in nonterminalLookaheadInfos; 1384 if (lookaheadNeedsAfterEnd && !e.next(graph.grammar) 1385 .annotations.contains!"eager") 1386 nonterminalLookaheadInfo.addPrevElement(PrevElement(size_t.max, k)); 1387 } 1388 } 1389 } 1390 } 1391 1392 foreach (k, ref e; node.elements) 1393 { 1394 if (e.dotPos > 0) 1395 continue; 1396 if (e.isStartElement) 1397 continue; 1398 if (e.production.productionID !in nonterminalLookaheadInfos) 1399 continue; 1400 e.lookahead |= nonterminalLookaheadInfos[e.production.productionID].lookahead; 1401 foreach (prev; nonterminalLookaheadInfos[e.production.productionID].prevElements) 1402 { 1403 node.elements[prev.elementNr].nextElements ~= PrevElement(j, k); 1404 } 1405 } 1406 } 1407 1408 bool changed = true; 1409 while (changed) 1410 { 1411 changed = false; 1412 1413 foreach (j, node; graph.states) 1414 { 1415 foreach (k, ref e; node.elements) 1416 { 1417 foreach (x; e.nextElements) 1418 { 1419 if (graph.states[x.state].elements[x.elementNr].lookahead.addOnce( 1420 e.realLookahead)) 1421 changed = true; 1422 } 1423 } 1424 } 1425 } 1426 } 1427 1428 void calculateExtraConstraints(LRGraph graph) 1429 { 1430 auto grammar = graph.grammar; 1431 1432 bool changed = true; 1433 while (changed) 1434 { 1435 changed = false; 1436 1437 foreach (j, node; graph.states) 1438 { 1439 Constraint[ProductionID] constraintByProduction; 1440 foreach (k, ref e; node.elements) 1441 { 1442 if (e.extraConstraint.disabled) 1443 continue; 1444 1445 foreach (nextNonterminal; e.nextNonterminals(grammar, 1446 graph.globalOptions.directUnwrap)) 1447 { 1448 foreach (p; graph.grammar.getProductions(nextNonterminal.nonterminalID)) 1449 { 1450 if (graph.globalOptions.directUnwrap 1451 && grammar.isDirectUnwrapProduction(*p)) 1452 continue; 1453 if (!graph.grammar.isProductionAllowed(nextNonterminal, p)) 1454 continue; 1455 1456 if (p.productionID in constraintByProduction) 1457 constraintByProduction[p.productionID] = grammar.orConstraint( 1458 constraintByProduction[p.productionID], 1459 nextNonterminal.constraint); 1460 else 1461 constraintByProduction[p.productionID] = nextNonterminal.constraint; 1462 } 1463 } 1464 1465 foreach (x; e.nextElements) 1466 { 1467 auto newConstraint = grammar.orConstraint( 1468 graph.states[x.state].elements[x.elementNr].extraConstraint, 1469 e.extraConstraint); 1470 if (graph.states[x.state].elements[x.elementNr].extraConstraint != newConstraint) 1471 { 1472 graph.states[x.state].elements[x.elementNr].extraConstraint = newConstraint; 1473 changed = true; 1474 } 1475 } 1476 } 1477 foreach (k, ref e; node.elements) 1478 { 1479 if (e.dotPos > 0) 1480 continue; 1481 if (e.isStartElement) 1482 continue; 1483 if (e.production.productionID !in constraintByProduction) 1484 continue; 1485 auto newConstraint = grammar.orConstraint(e.extraConstraint, 1486 constraintByProduction[e.production.productionID]); 1487 if (e.extraConstraint != newConstraint) 1488 { 1489 e.extraConstraint = newConstraint; 1490 changed = true; 1491 } 1492 } 1493 } 1494 } 1495 } 1496 1497 void addDelayedReduceStates(LRGraph graph, LRGraph origGraph = null) 1498 { 1499 auto grammar = graph.grammar; 1500 1501 foreach (k; 0 .. graph.states.length) 1502 { 1503 ActionTable actionTable = genActionTable(graph, graph.states[k]); 1504 foreach (t, actions; actionTable.actions) 1505 foreach (subToken, action; actions) 1506 { 1507 if (action.type == ActionType.delayedReduce) 1508 { 1509 NonterminalID[] nonterminals; 1510 ProductionID[] productionIDs; 1511 size_t stackSize; 1512 foreach (e; action.delayedElements) 1513 { 1514 nonterminals.addOnce(graph.states[k].elements[e].production.nonterminalID); 1515 productionIDs.addOnce(graph.states[k].elements[e].production.productionID); 1516 stackSize = graph.states[k].elements[e].production.symbols.length; 1517 } 1518 nonterminals.sort(); 1519 productionIDs.sort(); 1520 immutable NonterminalID[] nonterminalsI = nonterminals.idup; 1521 if (!graph.delayedReduceProductionCombinations.canFind(productionIDs)) 1522 graph.delayedReduceProductionCombinations ~= productionIDs.idup; 1523 1524 void addDelayedReduce(size_t i, size_t k) 1525 { 1526 if (i == 0) 1527 { 1528 graph.states[k].delayedReduceCombinations.addOnce(nonterminalsI); 1529 return; 1530 } 1531 else 1532 { 1533 graph.states[k].delayedReduceOutCombinations.addOnce(nonterminalsI); 1534 } 1535 foreach (e; graph.states[k].reverseEdges) 1536 { 1537 addDelayedReduce(i - 1, e.next); 1538 } 1539 } 1540 1541 addDelayedReduce(stackSize, k); 1542 } 1543 } 1544 } 1545 1546 foreach (k; 0 .. graph.states.length) 1547 { 1548 for (; graph.states[k].delayedReduceCombinationsDone 1549 < graph.states[k].delayedReduceCombinations.length; graph 1550 .states[k].delayedReduceCombinationsDone++) 1551 { 1552 auto generatedSet = graph.states[k] 1553 .delayedReduceCombinations[graph.states[k].delayedReduceCombinationsDone]; 1554 auto newElem = gotoSet(graph, k, graph.states[k].elements, generatedSet); 1555 assert(newElem.stackSize <= graph.states[k].elements.stackSize + 1); 1556 auto r2 = countUntilGraph!false(graph, newElem); 1557 if (r2 == k) 1558 continue; 1559 auto nid = makeLRGraphRec(graph, newElem, 1560 graph.states[k].shortestSymbolPath ~ generatedSet[0], origGraph, 0); 1561 graph.states[k].edges ~= LRGraphEdge(Symbol(false, SymbolID.max), 1562 "", nid, [], generatedSet); 1563 if (nid != size_t.max) 1564 graph.states[nid].reverseEdges ~= LRGraphEdge(Symbol(false, 1565 SymbolID.max), "", k, [], generatedSet); 1566 } 1567 } 1568 } 1569 1570 LRGraph makeLRGraph(EBNFGrammar grammar, GlobalOptions globalOptions, LRGraph origGraph = null) 1571 { 1572 LRGraph graph = new LRGraph(grammar); 1573 graph.globalOptions = globalOptions; 1574 1575 if (origGraph !is null) 1576 graph.states.length = origGraph.states.length; 1577 1578 graph.nonterminalToState.length = grammar.nonterminals.vals.length; 1579 graph.startNonterminalsSet.length = grammar.nonterminals.vals.length; 1580 1581 graph.startNonterminals = grammar.startNonterminals; 1582 foreach (startNonterminals; grammar.startNonterminals) 1583 graph.startNonterminalsSet[startNonterminals.nonterminal.id] = true; 1584 size_t i = 0; 1585 while (i < graph.startNonterminals.length) 1586 { 1587 auto start = firstElement(graph, graph.startNonterminals[i].nonterminal.id, 1588 graph.startNonterminals[i].needsEmptyProduction); 1589 1590 bool isBacktrack = graph.grammar.nonterminals[graph.startNonterminals[i].nonterminal] 1591 .annotations.contains!"backtrack"(); 1592 bool isLookahead = graph.grammar.nonterminals[graph.startNonterminals[i].nonterminal] 1593 .annotations.contains!"lookahead"(); 1594 if (!graph.globalOptions.glrParser && (isBacktrack || isLookahead)) 1595 { 1596 LRElementSet set = LRElementSet(start, false); 1597 1598 size_t backtrackState = graph.states.length; 1599 graph.states = graph.states ~ new LRGraphNode(set); 1600 graph.statesByKey2[LRGraphNodeKey(graph, set.elements)] = backtrackState; 1601 graph.states[backtrackState].isStartNode = true; 1602 graph.states[backtrackState].shortestSymbolPath 1603 = [graph.startNonterminals[i].nonterminal]; 1604 if (isBacktrack) 1605 graph.states[backtrackState].type = LRGraphNodeType.backtrack; 1606 else 1607 graph.states[backtrackState].type = LRGraphNodeType.lookahead; 1608 graph.nonterminalToState[graph.startNonterminals[i].nonterminal.id] = backtrackState; 1609 1610 foreach (prodNr, p; graph.grammar.getProductions( 1611 graph.startNonterminals[i].nonterminal)) 1612 { 1613 LRElementSet set2 = elementSetClosure(graph, [ 1614 LRElement(p, 0, set[0].lookahead) 1615 ]); 1616 set2.elements = start ~ set2.elements; 1617 foreach (ref e; set2.elements[1 .. $]) 1618 { 1619 auto prevElements = e.prevElements; 1620 e.prevElements = []; 1621 e.prevElementsMap = null; 1622 foreach (PrevElement x; prevElements) 1623 { 1624 if (x.state == size_t.max) 1625 x.elementNr++; // account for start element 1626 e.addPrevElement(x); 1627 } 1628 if (e.production is p && e.dotPos == 0) 1629 { 1630 e.addPrevElement(PrevElement(size_t.max, 0)); 1631 } 1632 } 1633 set2.elements[0].addPrevElement(PrevElement(backtrackState, 0)); 1634 1635 size_t state2 = makeLRGraphRec(graph, set2, 1636 [graph.startNonterminals[i].nonterminal], origGraph); 1637 1638 graph.states[backtrackState].backtrackStates ~= state2; 1639 } 1640 } 1641 else 1642 { 1643 LRElementSet set = completeLRElementSet(graph, elementSetClosure(graph, start, false)); 1644 size_t id = makeLRGraphRec(graph, set, 1645 [graph.startNonterminals[i].nonterminal], origGraph); 1646 graph.nonterminalToState[graph.startNonterminals[i].nonterminal.id] = id; 1647 graph.states[id].isStartNode = true; 1648 } 1649 i++; 1650 } 1651 1652 do 1653 { 1654 foreach (ref node; graph.states) 1655 { 1656 foreach (ref e; node.elements) 1657 { 1658 e.nextElements = []; 1659 e.extraConstraint = Constraint.init; 1660 e.extraConstraint.disabled = !e.isStartElement; 1661 } 1662 } 1663 foreach (j, node; graph.states) 1664 { 1665 foreach (k, e; node.elements) 1666 { 1667 foreach (x; e.prevElements) 1668 { 1669 if (x.state == size_t.max) 1670 graph.states[j].elements[x.elementNr].nextElements ~= PrevElement(j, k); 1671 else 1672 graph.states[x.state].elements[x.elementNr].nextElements ~= PrevElement(j, k); 1673 } 1674 } 1675 } 1676 1677 calculateExtraConstraints(graph); 1678 1679 calculateLookahead(graph); 1680 1681 size_t prevNumStates = graph.states.length; 1682 1683 if (graph.globalOptions.delayedReduce) 1684 { 1685 addDelayedReduceStates(graph, origGraph); 1686 } 1687 1688 if (prevNumStates == graph.states.length) 1689 break; 1690 } 1691 while (true); 1692 1693 foreach (k; 0 .. graph.states.length) 1694 { 1695 const stackSize = graph.states[k].stackSize; 1696 NonterminalID[][] stackDelayedReduce; 1697 stackDelayedReduce.length = stackSize; 1698 bool nonterminalPossible(size_t i, NonterminalID n) 1699 { 1700 foreach (e; graph.states[k].elements) 1701 { 1702 if (e.dotPos < i + 1) 1703 continue; 1704 if (n in grammar.directUnwrapClosureMap(e.production.symbols[e.dotPos - i - 1].toNonterminalID, 1705 e.production.symbols[e.dotPos - i - 1].negLookaheads, 1706 e.production.symbols[e.dotPos - i - 1].tags)) 1707 return true; 1708 } 1709 return false; 1710 } 1711 1712 void buildStackDelayedReduce(size_t i, size_t k) 1713 { 1714 if (i >= stackSize) 1715 return; 1716 foreach (e; graph.states[k].reverseEdges) 1717 { 1718 bool allNonterminalsPossible = true; 1719 foreach (x; e.delayedNonterminals) 1720 { 1721 if (!nonterminalPossible(i, x)) 1722 allNonterminalsPossible = false; 1723 } 1724 if (allNonterminalsPossible) 1725 foreach (x; e.delayedNonterminals) 1726 { 1727 stackDelayedReduce[$ - 1 - i].addOnce(x); 1728 } 1729 buildStackDelayedReduce(i + 1, e.next); 1730 } 1731 } 1732 1733 buildStackDelayedReduce(0, k); 1734 graph.states[k].stackDelayedReduce.length = 0; 1735 foreach (ref x; stackDelayedReduce) 1736 { 1737 if (x.length <= 1) 1738 x = []; 1739 x.sort(); 1740 graph.states[k].stackDelayedReduce ~= x.idup; 1741 } 1742 } 1743 1744 foreach (j, node; graph.states) 1745 { 1746 foreach (e; node.elements) 1747 graph.statesWithProduction[tuple!(size_t, 1748 size_t)(e.production.productionID, e.dotPos)] ~= j; 1749 } 1750 1751 return graph; 1752 } 1753 1754 BitSet!TokenID elementFirstSet(const LRElement e, LRGraph graph) 1755 { 1756 auto l = e.afterDot(graph.grammar); 1757 if (l.length > 0) 1758 { 1759 BitSet!TokenID lookahead; 1760 lookahead = graph.grammar.firstSet(l, e.extraConstraint, e.dotPos == 0); 1761 if (lookahead[graph.grammar.tokens.getID("$end")]) 1762 { 1763 lookahead[graph.grammar.tokens.getID("$end")] = false; 1764 lookahead |= e.lookahead; 1765 } 1766 return lookahead; 1767 } 1768 else 1769 { 1770 return e.lookahead.dup; 1771 } 1772 } 1773 1774 BitSet!TokenID nodeFirstSet(const LRGraphNode node, LRGraph graph) 1775 { 1776 BitSet!TokenID r = BitSet!TokenID(graph.grammar.tokens.vals.length); 1777 foreach (e; node.elements) 1778 { 1779 r |= elementFirstSet(e, graph); 1780 } 1781 return r; 1782 } 1783 1784 enum ActionType 1785 { 1786 none, 1787 shift, 1788 reduce, 1789 accept, 1790 descent, 1791 conflict, 1792 delayedReduce 1793 } 1794 1795 struct Action 1796 { 1797 ActionType type; 1798 size_t newState = size_t.max; // for shift and nonterminal 1799 const(Production)* production; // for reduce 1800 size_t elementNr = size_t.max; // for reduce 1801 NonterminalID nonterminalID = NonterminalID.invalid; // for descent 1802 bool ignoreInConflict; 1803 string errorMessage; // for conflict 1804 const(Action)[] conflictActions; // for conflict 1805 immutable(size_t)[] delayedElements; 1806 immutable(NonterminalID)[] reusedDelayedNonterminals; 1807 bool allowRegexLookahead; 1808 string toString() const 1809 { 1810 string r; 1811 if (type == ActionType.shift) 1812 r = text("shift ", newState); 1813 else if (type == ActionType.reduce) 1814 r = text("reduce ", elementNr); 1815 else if (type == ActionType.accept) 1816 r = text("accept ", elementNr); 1817 else if (type == ActionType.descent) 1818 r = text("descent ", nonterminalID); 1819 else if (type == ActionType.conflict) 1820 r = text("conflict"); 1821 else if (type == ActionType.delayedReduce) 1822 r = text("delayedReduce ", delayedElements, reusedDelayedNonterminals); 1823 else 1824 r = text(type); 1825 if (ignoreInConflict) 1826 r ~= " ignoreInConflict"; 1827 return r; 1828 } 1829 1830 int opCmp(ref const Action other) const 1831 { 1832 if (type != other.type) 1833 return type < other.type ? -1 : 1; 1834 if (newState != other.newState) 1835 return newState < other.newState ? -1 : 1; 1836 if (elementNr != other.elementNr) 1837 return elementNr < other.elementNr ? -1 : 1; 1838 if (nonterminalID != other.nonterminalID) 1839 return nonterminalID < other.nonterminalID ? -1 : 1; 1840 if (nonterminalID != other.nonterminalID) 1841 return nonterminalID < other.nonterminalID ? -1 : 1; 1842 return 0; 1843 } 1844 } 1845 1846 struct ActionTable 1847 { 1848 size_t[Tuple!(Symbol, bool)[]][NonterminalID] jumps; 1849 size_t[Tuple!(Symbol, bool)[]][immutable(NonterminalID)[]] jumps2; 1850 Action[string][TokenID] actions; 1851 bool hasShift; 1852 Action defaultReduceAction; 1853 bool hasTags; 1854 bool[ProductionID] reduceConflictProductions; 1855 } 1856 1857 bool actionIgnored(LRGraph graph, const LRGraphNode node, const Action a, const Action[] actions) 1858 { 1859 auto grammar = graph.grammar; 1860 if (a.ignoreInConflict) 1861 return true; 1862 if (a.type == ActionType.reduce) 1863 { 1864 auto p = node.elements[a.elementNr].production; 1865 if ((p.symbols.length && p.symbols[$ - 1].annotations.contains!"eager" 1866 || p.annotations.contains!"eagerEnd"()) && actions.length == 2 1867 && ((actions[0] == a && actions[1].type.among(ActionType.shift, 1868 ActionType.descent)) || (actions[1] == a 1869 && actions[0].type.among(ActionType.shift, ActionType.descent)))) 1870 return true; 1871 return p.annotations.contains!"ignoreInConflict"() 1872 || grammar.nonterminals[p.nonterminalID].annotations.contains!"ignoreInConflict"(); 1873 } 1874 return false; 1875 } 1876 1877 ActionTable genActionTable(LRGraph graph, const LRGraphNode node) 1878 { 1879 ActionTable r; 1880 auto grammar = graph.grammar; 1881 immutable endTok = graph.grammar.tokens.getID("$end"); 1882 Action[][string][TokenID] actions; 1883 1884 bool[NonterminalID] regexLookaheadNonterminals; 1885 bool[TokenID] regexLookaheadTokens; 1886 size_t numRegexLookaheads; 1887 if (!graph.globalOptions.glrParser) 1888 { 1889 foreach (e; node.elements) 1890 { 1891 if (e.extraConstraint.disabled) 1892 continue; 1893 if (!e.isNextValid(grammar)) 1894 { 1895 if (e.production.annotations.contains!"lookahead") 1896 numRegexLookaheads++; 1897 } 1898 else if (e.isNextNonterminal(grammar)) 1899 { 1900 foreach (n; e.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 1901 { 1902 if (n.hasLookaheadAnnotation || e.next(grammar) 1903 .annotations.contains!"lookahead") 1904 { 1905 regexLookaheadNonterminals[n.nonterminalID] = true; 1906 } 1907 } 1908 } 1909 else if (e.next(grammar).annotations.contains!"lookahead") 1910 { 1911 regexLookaheadTokens[e.next(grammar).toTokenID] = true; 1912 } 1913 } 1914 numRegexLookaheads += regexLookaheadNonterminals.length; 1915 numRegexLookaheads += regexLookaheadTokens.length; 1916 } 1917 1918 bool[Symbol] validShifts; 1919 foreach (e; node.elements) 1920 { 1921 if (e.extraConstraint.disabled) 1922 continue; 1923 if (!e.isNextValid(grammar)) 1924 continue; 1925 1926 if (e.isNextNonterminal(grammar)) 1927 { 1928 foreach (n; e.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 1929 { 1930 validShifts[n.nonterminalID] = true; 1931 } 1932 } 1933 else 1934 { 1935 validShifts[e.next(grammar).toTokenID] = true; 1936 } 1937 } 1938 foreach (edge; node.edges) 1939 { 1940 if (edge.delayedNonterminals.length) 1941 { 1942 bool allValidShifts = true; 1943 foreach (n; edge.delayedNonterminals) 1944 if (n !in validShifts) 1945 allValidShifts = false; 1946 if (!allValidShifts) 1947 continue; 1948 } 1949 else 1950 { 1951 if (edge.symbol !in validShifts) 1952 continue; 1953 } 1954 r.hasShift = true; 1955 if (graph.states[edge.next].hasTags(graph)) 1956 r.hasTags = true; 1957 if (edge.symbol.isToken) 1958 { 1959 Action a; 1960 a = Action(ActionType.shift, edge.next); 1961 a.allowRegexLookahead = !!(edge.symbol.toTokenID in regexLookaheadTokens); 1962 actions[edge.symbol.toTokenID][edge.subToken] = [a]; 1963 } 1964 else if (edge.delayedNonterminals.length > 0) 1965 { 1966 r.jumps2[edge.delayedNonterminals][edge.checkDisallowedSymbols] = edge.next; 1967 } 1968 else 1969 { 1970 r.jumps[edge.symbol.toNonterminalID][edge.checkDisallowedSymbols] = edge.next; 1971 } 1972 } 1973 1974 foreach (descentNonterminal; node.elements.descentNonterminals) 1975 { 1976 if (descentNonterminal !in validShifts) 1977 continue; 1978 if (grammar.nonterminals[descentNonterminal].possibleTags.length) 1979 r.hasTags = true; 1980 BitSet!TokenID firstTokens = grammar.firstSet([ 1981 SymbolInstance(descentNonterminal) 1982 ]); 1983 foreach (e; node.elements) 1984 { 1985 if (e.extraConstraint.disabled) 1986 continue; 1987 if (e.isNextNonterminal(grammar)) 1988 { 1989 BitSet!TokenID allAllowedTokens; 1990 foreach (n; e.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 1991 { 1992 if (n.nonterminalID == descentNonterminal) 1993 { 1994 BitSet!TokenID allowedTokensHere; 1995 allowedTokensHere.length = firstTokens.length; 1996 allowedTokensHere.arr[] = true; 1997 foreach (nl; n.constraint.negLookaheads) 1998 { 1999 if (nl.isToken) 2000 { 2001 allowedTokensHere[nl.toTokenID] = false; 2002 } 2003 } 2004 if (allAllowedTokens.length == 0) 2005 { 2006 allAllowedTokens = allowedTokensHere; 2007 } 2008 else 2009 { 2010 allAllowedTokens |= allowedTokensHere; 2011 } 2012 } 2013 } 2014 if (allAllowedTokens.length) 2015 { 2016 firstTokens &= allAllowedTokens; 2017 } 2018 } 2019 } 2020 2021 foreach (t; firstTokens.bitsSet) 2022 { 2023 Action a; 2024 a = Action(ActionType.descent, size_t.max, null, size_t.max, descentNonterminal); 2025 a.allowRegexLookahead = !!(descentNonterminal in regexLookaheadNonterminals); 2026 if (t !in actions || "" !in actions[t] || !actions[t][""].canFind(a)) 2027 actions[t][""] ~= a; 2028 } 2029 } 2030 2031 size_t numberOfFinished; 2032 size_t numberOfDescent; 2033 bool[NonterminalID] descentNonterminals; 2034 foreach (elementNr, e; node.elements) 2035 { 2036 if (e.extraConstraint.disabled) 2037 continue; 2038 if (e.isFinished(grammar)) 2039 { 2040 numberOfFinished++; 2041 } 2042 } 2043 foreach (descentNonterminal; node.elements.descentNonterminals) 2044 { 2045 if (descentNonterminal !in descentNonterminals) 2046 { 2047 descentNonterminals[descentNonterminal] = true; 2048 numberOfDescent++; 2049 } 2050 } 2051 2052 BitSet!TokenID allEndNegLookaheadTokens; 2053 foreach (elementNr, e; node.elements) 2054 { 2055 if (e.extraConstraint.disabled) 2056 continue; 2057 if (!e.isFinished(graph.grammar)) 2058 continue; 2059 foreach (l; e.production.negLookaheads) 2060 { 2061 if (l.toTokenID in actions) 2062 continue; 2063 allEndNegLookaheadTokens[l.toTokenID] = true; 2064 } 2065 } 2066 2067 foreach (elementNr, e; node.elements) 2068 { 2069 if (e.extraConstraint.disabled) 2070 continue; 2071 if (!e.isFinished(graph.grammar)) 2072 continue; 2073 2074 const BitSet!TokenID usedTokens = e.realLookahead; 2075 2076 foreach (j; usedTokens.bitsSet) 2077 { 2078 Action a; 2079 a = Action(e.isStartElement ? ActionType.accept : ActionType.reduce, 0, 2080 e.production, elementNr, NonterminalID.invalid, e.ignoreInConflict); 2081 a.allowRegexLookahead = e.production.annotations.contains!"lookahead"; 2082 actions[j][""] ~= []; // ensure it exists 2083 if (!actions[j][""].canFind(a)) 2084 actions[j][""] ~= a; 2085 } 2086 2087 if (usedTokens[endTok]) 2088 { 2089 foreach (j; allEndNegLookaheadTokens.bitsSet) 2090 { 2091 if (e.production.negLookaheadsAnytoken) 2092 continue; 2093 if (e.production.negLookaheads.canFind(j)) 2094 continue; 2095 Action a; 2096 a = Action(e.isStartElement ? ActionType.accept : ActionType.reduce, 0, 2097 e.production, elementNr, NonterminalID.invalid, e.ignoreInConflict); 2098 a.allowRegexLookahead = e.production.annotations.contains!"lookahead"; 2099 actions[j][""] ~= []; // ensure it exists 2100 if (!actions[j][""].canFind(a)) 2101 actions[j][""] ~= a; 2102 } 2103 } 2104 } 2105 2106 Action[] applyDelayedReduce(Action[] actions, TokenID token, string subToken) 2107 { 2108 if (!graph.globalOptions.delayedReduce) 2109 return actions; 2110 if (actions.length <= 1) 2111 return actions; 2112 2113 Action lastReduceAction; 2114 2115 immutable(size_t)[] delayedElements; 2116 foreach (ax; actions) 2117 { 2118 if (ax.type == ActionType.shift) 2119 return actions; 2120 if (ax.type == ActionType.reduce && node.elements[ax.elementNr].dotPos == 0) 2121 return actions; 2122 2123 if (ax.type != ActionType.reduce) 2124 return actions; 2125 if (grammar.hasNonTrivialRewriteRule(node.elements[ax.elementNr].production)) 2126 return actions; 2127 if (lastReduceAction.type == ActionType.reduce 2128 && ax.production.symbols.length != lastReduceAction.production.symbols.length) 2129 return actions; 2130 if (lastReduceAction.type == ActionType.reduce 2131 && grammar.nonterminals[ax.production.nonterminalID].annotations.contains!"array" 2132 != grammar.nonterminals[lastReduceAction.production.nonterminalID] 2133 .annotations.contains!"array") 2134 return actions; 2135 lastReduceAction = ax; 2136 delayedElements ~= ax.elementNr; 2137 } 2138 2139 immutable(NonterminalID)[] reusedDelayedNonterminals; 2140 if (delayedElements.all!(e => node.elements[e].production.symbols.length == 1)) 2141 foreach (i, e; node.elements) 2142 { 2143 if (e.dotPos == 0) 2144 continue; 2145 if (e.production.symbols[e.dotPos - 1].isToken) 2146 continue; 2147 if (delayedElements.canFind(i)) 2148 continue; 2149 auto f = elementFirstSet(e, graph); 2150 if (f[token]) 2151 reusedDelayedNonterminals ~= e.production.symbols[e.dotPos - 1].toNonterminalID; 2152 } 2153 2154 if (delayedElements.length == 0) 2155 return actions; 2156 if (delayedElements.length == 1 && reusedDelayedNonterminals.length == 0) 2157 return actions; 2158 2159 Action bestAction = Action(ActionType.delayedReduce); 2160 bestAction.delayedElements = delayedElements; 2161 bestAction.reusedDelayedNonterminals = reusedDelayedNonterminals; 2162 2163 return [bestAction]; 2164 } 2165 2166 Action findBestAction(Action[] actions) 2167 { 2168 if (actions.length == 0) 2169 return Action(ActionType.none); 2170 Action bestAction = actions[0]; 2171 size_t numNotIgnored; 2172 if (!actionIgnored(graph, node, bestAction, actions)) 2173 numNotIgnored++; 2174 2175 foreach (ax; actions[1 .. $]) 2176 { 2177 if (!actionIgnored(graph, node, ax, actions)) 2178 { 2179 numNotIgnored++; 2180 bestAction = ax; 2181 } 2182 } 2183 if (numNotIgnored > 1) 2184 { 2185 string message; 2186 foreach (a; actions) 2187 message ~= text("\n", a); 2188 bestAction = Action(ActionType.conflict); 2189 bestAction.errorMessage = message; 2190 actions.sort(); 2191 bestAction.conflictActions = actions; 2192 foreach (ax; actions) 2193 { 2194 if (ax.type.among(ActionType.reduce, ActionType.accept)) 2195 { 2196 r.reduceConflictProductions[node.elements[ax.elementNr].production.productionID] 2197 = true; 2198 } 2199 } 2200 } 2201 return bestAction; 2202 } 2203 2204 foreach (t, a1; actions) 2205 foreach (s, a2; a1) 2206 { 2207 Action bestAction = findBestAction(applyDelayedReduce(a2, t, s)); 2208 2209 if (bestAction.type == ActionType.reduce && numberOfFinished == 1 && t != endTok) 2210 { 2211 if (r.defaultReduceAction.type != ActionType.none) 2212 assert(r.defaultReduceAction == bestAction); 2213 r.defaultReduceAction = bestAction; 2214 } 2215 else if (bestAction.type == ActionType.descent 2216 && (numberOfFinished != 1 && numberOfDescent == 1) && t != endTok) 2217 { 2218 if (r.defaultReduceAction.type != ActionType.none) 2219 assert(r.defaultReduceAction == bestAction); 2220 r.defaultReduceAction = bestAction; 2221 } 2222 else 2223 r.actions[t][s] = bestAction; 2224 } 2225 2226 return r; 2227 } 2228 2229 bool trivialState(LRGraphNode s) 2230 { 2231 if (s.elements.length != 1) 2232 return false; 2233 auto e = s.elements[0]; 2234 if (e.production.symbols.length != 1) 2235 return false; 2236 if (e.dotPos != 1) 2237 return false; 2238 if (e.production.symbols[0].isToken) 2239 return false; 2240 if (s.stackDelayedReduce[0].length) 2241 return false; 2242 return e.production.symbols[0].unwrapProduction; 2243 } 2244 2245 bool needsJumps(LRGraph graph, size_t stateNr, const LRGraphNode node, ActionTable actionTable) 2246 { 2247 if (node.type == LRGraphNodeType.lookahead || node.type == LRGraphNodeType.backtrack) 2248 return false; 2249 2250 bool hasJumps; 2251 foreach (nonterminalID, _; actionTable.jumps) 2252 hasJumps = true; 2253 if (!hasJumps) 2254 return false; 2255 2256 return true; 2257 } 2258 2259 void calcNextLookahead(ref BitSet!TokenID result, LRGraph graph, SymbolWithConstraint symbol, 2260 TokenID currentToken, ref bool[NonterminalWithConstraint] done, size_t indent) 2261 { 2262 auto grammar = graph.grammar; 2263 if (symbol.symbol.isToken) 2264 { 2265 } 2266 else 2267 { 2268 if (NonterminalWithConstraint(symbol.symbol.toNonterminalID, symbol.constraint) in done) 2269 return; 2270 done[NonterminalWithConstraint(symbol.symbol.toNonterminalID, symbol.constraint)] = true; 2271 foreach (p; grammar.getProductions(symbol.symbol.toNonterminalID)) 2272 { 2273 if (!grammar.isProductionAllowed(NonterminalWithConstraint(symbol.symbol.toNonterminalID, 2274 symbol.constraint), p)) 2275 continue; 2276 2277 foreach (pos; 0 .. p.symbols.length) 2278 { 2279 auto s = grammar.nextSymbolWithConstraint(symbol.constraint, 2280 p.symbols[pos], pos == 0); 2281 calcNextLookahead(result, graph, s, currentToken, done, indent + 2); 2282 if (grammar.hasExactToken(p.symbols[pos], currentToken)) 2283 { 2284 auto tmpElement = LRElement(p, pos + 1); 2285 tmpElement.extraConstraint = symbol.constraint; 2286 auto set = elementFirstSet(tmpElement, graph); 2287 result |= set; 2288 } 2289 if (!grammar.canBeEmpty(p.symbols[pos])) 2290 break; 2291 } 2292 } 2293 } 2294 } 2295 2296 bool calcNextLookaheadAfter(ref BitSet!TokenID result, LRGraph graph, size_t stateNr, 2297 size_t elementNr, size_t dotPos, TokenID currentToken, size_t indent) 2298 { 2299 auto grammar = graph.grammar; 2300 auto element2 = graph.states[stateNr].elements[elementNr]; 2301 foreach (pos; dotPos .. element2.production.symbols.length) 2302 { 2303 if (grammar.firstSet(element2.production.symbols[pos .. $])[currentToken]) 2304 { 2305 bool[NonterminalWithConstraint] done2; 2306 auto s = grammar.nextSymbolWithConstraint(element2.extraConstraint, 2307 element2.production.symbols[pos], pos == 0); 2308 calcNextLookahead(result, graph, s, currentToken, done2, indent + 2); 2309 if (grammar.hasExactToken(element2.production.symbols[pos], currentToken)) 2310 { 2311 auto tmpElement = LRElement(element2.production, pos + 1, element2.lookahead.dup); 2312 tmpElement.extraConstraint = element2.extraConstraint; 2313 auto set = elementFirstSet(tmpElement, graph); 2314 result |= set; 2315 } 2316 } 2317 if (!grammar.canBeEmpty(element2.production.symbols[pos])) 2318 return false; 2319 } 2320 return true; 2321 } 2322 2323 BitSet!TokenID calcNextLookahead(LRGraph graph, size_t stateNr, size_t elementNr, 2324 TokenID currentToken, size_t indent) 2325 { 2326 auto grammar = graph.grammar; 2327 2328 auto cacheEntry = NextLookaheadCacheKey(stateNr, elementNr, currentToken) in graph.nextLookaheadCache; 2329 if (cacheEntry) 2330 return *cacheEntry; 2331 BitSet!TokenID result; 2332 result.length = grammar.tokens.vals.length; 2333 graph.nextLookaheadCache[NextLookaheadCacheKey(stateNr, elementNr, currentToken)] = result; 2334 2335 auto element = graph.states[stateNr].elements[elementNr]; 2336 2337 if (element.production.annotations.contains!"eagerEnd") 2338 { 2339 result[grammar.tokens.getID("$end")] = true; 2340 graph.nextLookaheadCache[NextLookaheadCacheKey(stateNr, elementNr, currentToken)] = result; 2341 return result; 2342 } 2343 2344 foreach (prev; element.prevElements) 2345 result |= calcNextLookahead(graph, prev.state == size_t.max ? stateNr 2346 : prev.state, prev.elementNr, currentToken, indent + 1); 2347 2348 if (element.dotPos == 0 && element.isStartElement) 2349 { 2350 foreach (state2, node2; graph.states) 2351 { 2352 if (!node2.elements.descentNonterminals.canFind(element.production.nonterminalID)) 2353 continue; 2354 foreach (elementNr2, element2; graph.states[state2].elements) 2355 { 2356 bool elementPossible; 2357 foreach (n; element2.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 2358 { 2359 if (n.nonterminalID == element.production.nonterminalID) 2360 { 2361 elementPossible = true; 2362 } 2363 } 2364 if (elementPossible) 2365 { 2366 if (calcNextLookaheadAfter(result, graph, state2, 2367 elementNr2, element2.dotPos + 1, currentToken, indent)) 2368 { 2369 result |= calcNextLookahead(graph, state2, elementNr2, 2370 currentToken, indent + 1); 2371 } 2372 } 2373 } 2374 } 2375 } 2376 if (element.dotPos == 0 && !element.isStartElement) 2377 { 2378 foreach (elementNr2, element2; graph.states[stateNr].elements) 2379 { 2380 bool elementPossible; 2381 foreach (n; element2.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 2382 { 2383 if (n.nonterminalID == element.production.nonterminalID) 2384 { 2385 elementPossible = true; 2386 } 2387 } 2388 if (elementPossible) 2389 { 2390 if (calcNextLookaheadAfter(result, graph, stateNr, elementNr2, 2391 element2.dotPos + 1, currentToken, indent)) 2392 { 2393 result |= calcNextLookahead(graph, stateNr, elementNr2, 2394 currentToken, indent + 1); 2395 } 2396 } 2397 } 2398 } 2399 2400 graph.nextLookaheadCache[NextLookaheadCacheKey(stateNr, elementNr, currentToken)] = result; 2401 return result; 2402 } 2403 2404 BitSet!TokenID calcNextLookahead(LRGraph graph, size_t stateNr, Action action, TokenID currentToken) 2405 { 2406 auto grammar = graph.grammar; 2407 BitSet!TokenID r; 2408 r.length = grammar.tokens.vals.length; 2409 if (action.type == ActionType.shift) 2410 { 2411 foreach (element; graph.states[stateNr].elements) 2412 { 2413 if (element.isNextToken(grammar) && element.next(grammar).toTokenID == currentToken) 2414 { 2415 auto set = elementFirstSet(element.advance, graph); 2416 r |= set; 2417 } 2418 } 2419 } 2420 else if (action.type.among(ActionType.reduce, ActionType.accept)) 2421 { 2422 r |= calcNextLookahead(graph, stateNr, action.elementNr, currentToken, 1); 2423 } 2424 else if (action.type == ActionType.descent) 2425 { 2426 { 2427 bool[NonterminalWithConstraint] done2; 2428 calcNextLookahead(r, graph, 2429 SymbolWithConstraint(action.nonterminalID), currentToken, done2, 1); 2430 } 2431 foreach (element2; graph.states[stateNr].elements) 2432 { 2433 bool elementPossible; 2434 foreach (n; element2.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 2435 { 2436 if (n.nonterminalID == action.nonterminalID) 2437 { 2438 elementPossible = true; 2439 } 2440 } 2441 if (elementPossible) 2442 { 2443 if (grammar.hasExactToken(action.nonterminalID, currentToken)) 2444 { 2445 auto set = elementFirstSet(element2.advance, graph); 2446 r |= set; 2447 } 2448 if (grammar.canBeEmpty(action.nonterminalID)) 2449 { 2450 if (calcNextLookaheadAfter(r, graph, stateNr, action.elementNr, 2451 graph.states[stateNr].elements[action.elementNr].dotPos + 1, 2452 currentToken, 1)) 2453 { 2454 //result |= calcNextLookahead(graph, stateNr, elementNr2, currentToken, indent + 1); 2455 } 2456 } 2457 } 2458 } 2459 } 2460 else 2461 { 2462 enforce(false); 2463 foreach (t; grammar.tokens.allIDs) 2464 r[t] = true; 2465 } 2466 return r; 2467 }