1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.generator.grammar; 8 import dparsergen.core.nodetype; 9 import dparsergen.core.utils; 10 import dparsergen.generator.ebnf; 11 import dparsergen.generator.grammarebnf; 12 import dparsergen.generator.ids; 13 import dparsergen.generator.nfa; 14 import dparsergen.generator.production; 15 import std.algorithm; 16 import std.array; 17 import std.conv; 18 import std.exception; 19 import std.stdio; 20 import std.string; 21 import std.typecons; 22 23 class SymbolInfo 24 { 25 string name; 26 const(Declaration)[] declarations; 27 28 bool isToken; 29 bool isIgnoreToken; 30 31 bool reachableFromStart; 32 bool reachableFromToken; 33 bool reachableFromStartNoToken; 34 } 35 36 struct StartNonterminal 37 { 38 NonterminalID nonterminal; 39 bool needsEmptyProduction; 40 } 41 42 string escapeName(string s) 43 { 44 if (s.endsWith("?")) 45 { 46 return escapeName(s[0 .. $ - 1]) ~ "?"; 47 } 48 if (s.startsWith("\"") && s.endsWith("\"")) 49 return s; 50 51 string r; 52 foreach (char c; s) 53 { 54 if (c.inCharSet!"a-zA-Z0-9") 55 r ~= c; 56 else 57 r ~= format("_%02X", c); 58 } 59 return r; 60 } 61 62 bool tagsSorted(immutable(TagUsage)[] tags) 63 { 64 foreach (i, t; tags) 65 { 66 if (i) 67 if (t.tag <= tags[i - 1].tag) 68 return false; 69 } 70 return true; 71 } 72 73 struct Constraint 74 { 75 immutable(Symbol)[] negLookaheads; 76 immutable(TagUsage)[] tags; 77 bool disabled; 78 } 79 80 struct NonterminalWithConstraint 81 { 82 NonterminalID nonterminalID; 83 Constraint constraint; 84 bool hasLookaheadAnnotation; 85 } 86 87 struct SymbolWithConstraint 88 { 89 Symbol symbol; 90 Constraint constraint; 91 enum invalid = SymbolWithConstraint(Symbol.invalid, Constraint.init); 92 } 93 94 class EBNFGrammar 95 { 96 const SymbolInfo[string] symbolInfos; 97 bool isLexerGrammar; 98 99 IDMap!(TokenID, Token) tokens; 100 IDMap!(NonterminalID, Nonterminal) nonterminals; 101 IDMap!(TagID, Tag) tags; 102 const(Production*)[] productionsData; 103 immutable(TokenID)[] inContextOnlyTokens; 104 105 StartNonterminal[] startNonterminals; 106 TokenID[2][] matchingTokens; 107 108 bool allowTokenNonterminals; 109 110 EBNFGrammar origGrammar_; 111 112 bool[Symbol] symbolsInMultiplePaths; 113 114 size_t startTokenID; 115 size_t startNonterminalID; 116 size_t startProductionID; 117 118 this(const SymbolInfo[string] symbolInfos, EBNFGrammar origGrammar_ = null) 119 { 120 this.symbolInfos = symbolInfos; 121 this.origGrammar_ = origGrammar_; 122 } 123 124 string getSymbolName(const Symbol s) const 125 { 126 if (s.isToken) 127 { 128 if (s.id >= tokens.vals.length) 129 return "??????"; 130 return tokens[s.toTokenID].name; 131 } 132 else 133 { 134 if (s.id >= nonterminals.vals.length) 135 return "??????"; 136 return nonterminals[s.toNonterminalID].name; 137 } 138 } 139 140 bool isValid(const Symbol s) const 141 { 142 if (s.isToken) 143 return s.id < tokens.vals.length; 144 else 145 return s.id < nonterminals.vals.length; 146 } 147 148 EBNFGrammar origGrammar() 149 { 150 if (origGrammar_ is null) 151 return this; 152 else 153 return origGrammar_.origGrammar(); 154 } 155 156 string symbolInstanceToString(const SymbolInstance s, bool includeAll = true, bool escape = false) const 157 { 158 Appender!string app; 159 symbolInstanceToString(app, s, includeAll, escape); 160 return app.data; 161 } 162 163 string symbolInstanceToString(const SymbolInstance[] symbols, 164 bool includeAll = true, bool escape = false) const 165 { 166 Appender!string app; 167 foreach (i, s; symbols) 168 { 169 if (i) 170 app.put(" "); 171 symbolInstanceToString(app, s, includeAll, escape); 172 } 173 return app.data; 174 } 175 176 void symbolInstanceToString(ref Appender!string app, const SymbolInstance s, 177 bool includeAll = true, bool escape = false) const 178 { 179 if (includeAll) 180 s.annotations.toStringCode(app, true); 181 foreach (l; s.negLookaheads) 182 { 183 app.put("!"); 184 app.put(getSymbolName(l)); 185 app.put(" "); 186 } 187 if (includeAll) 188 { 189 if (s.unwrapProduction) 190 app.put("<"); 191 if (s.dropNode) 192 app.put("^"); 193 if (s.symbolInstanceName.length) 194 { 195 app.put(s.symbolInstanceName); 196 app.put(":"); 197 } 198 } 199 if (escape && !s.isToken) 200 app.put(getSymbolName(s).escapeName); 201 else 202 app.put(getSymbolName(s)); 203 if (s.subToken.length) 204 { 205 app.put(">>"); 206 app.put(s.subToken); 207 } 208 } 209 210 string productionStringRhs(const Production* p) const 211 { 212 Appender!string app; 213 foreach (s; p.symbols) 214 { 215 app.put(" "); 216 symbolInstanceToString(app, s); 217 } 218 foreach (l; p.negLookaheads) 219 { 220 app.put(" !"); 221 app.put(getSymbolName(l)); 222 } 223 if (p.negLookaheadsAnytoken) 224 app.put(" !anytoken"); 225 p.annotations.toStringCode(app); 226 return app.data; 227 } 228 229 string productionString(const Production* p) const 230 { 231 Appender!string app; 232 app.put(nonterminals[p.nonterminalID].name); 233 nonterminals[p.nonterminalID].annotations.toStringCode(app); 234 app.put(" ="); 235 app.put(productionStringRhs(p)); 236 if (p.isVirtual) 237 app.put(" [virtual]"); 238 return app.data; 239 } 240 241 override string toString() const 242 { 243 Appender!string app; 244 app.put("EBNFGrammar\n"); 245 foreach (p; productions) 246 { 247 app.put(" "); 248 app.put(productionString(p)); 249 app.put("\n"); 250 } 251 return app.data; 252 } 253 254 auto productions() const 255 { 256 return productionsData.filter!(p => p !is null); 257 } 258 259 void addProduction(Production* p) 260 { 261 assert(productionsData.length < ProductionID.max); 262 p.productionID = cast(ProductionID) productionsData.length; 263 productionsData ~= p; 264 } 265 266 private const(Production*)[][] productionsForNonterminal; 267 void fillProductionsForNonterminal() 268 { 269 assert(productionsForNonterminal.length == 0); 270 productionsForNonterminal.length = nonterminals.vals.length; 271 foreach (p; productions) 272 { 273 productionsForNonterminal[p.nonterminalID.id] ~= p; 274 } 275 } 276 277 const(Production*)[] getProductions(const NonterminalID symbol) const 278 in 279 { 280 assert(!symbol.isToken); 281 } 282 do 283 { 284 assert(productionsForNonterminal.length == nonterminals.vals.length); 285 return productionsForNonterminal[symbol.id]; 286 } 287 288 private BitSet!NonterminalID nonterminalCanBeEmpty; 289 private BitSet!NonterminalID nonterminalCanBeNonEmpty; 290 291 bool canBeEmpty(const Symbol symbol) const 292 { 293 if (symbol.isToken) 294 return false; 295 return nonterminalCanBeEmpty[symbol.toNonterminalID]; 296 } 297 298 bool canBeNonEmpty(const Symbol symbol) const 299 { 300 if (symbol.isToken) 301 return true; 302 return nonterminalCanBeNonEmpty[symbol.toNonterminalID]; 303 } 304 305 void calcNonterminalCanBeEmpty() 306 { 307 calcNonterminalCanBeEmpty(nonterminalCanBeEmpty, nonterminalCanBeNonEmpty, false); 308 } 309 310 void calcNonterminalCanBeEmpty(ref BitSet!NonterminalID nonterminalCanBeEmpty, 311 ref BitSet!NonterminalID nonterminalCanBeNonEmpty, bool checkNoOptEmpty) const 312 { 313 bool changed; 314 nonterminalCanBeEmpty.length = nonterminals.vals.length; 315 nonterminalCanBeNonEmpty.length = nonterminals.vals.length; 316 do 317 { 318 changed = false; 319 foreach (p; productions) 320 { 321 if (nonterminalCanBeEmpty[p.nonterminalID] == true) 322 continue; 323 if (nonterminals[p.nonterminalID].annotations.contains!"noOptEmpty") 324 continue; 325 326 bool allSymbolsCanBeEmpty = true; 327 foreach (s; p.symbols) 328 if (s.isToken || !nonterminalCanBeEmpty[s.toNonterminalID]) 329 allSymbolsCanBeEmpty = false; 330 331 if (allSymbolsCanBeEmpty) 332 { 333 nonterminalCanBeEmpty[p.nonterminalID] = true; 334 changed = true; 335 } 336 } 337 } 338 while (changed); 339 do 340 { 341 changed = false; 342 foreach (p; productions) 343 { 344 if (nonterminalCanBeNonEmpty[p.nonterminalID] == true) 345 continue; 346 347 bool anySymbolCanBeNonEmpty = false; 348 foreach (s; p.symbols) 349 if (s.isToken || nonterminalCanBeNonEmpty[s.toNonterminalID]) 350 anySymbolCanBeNonEmpty = true; 351 352 if (anySymbolCanBeNonEmpty 353 || nonterminals[p.nonterminalID].annotations.contains!"noOptEmpty") 354 { 355 nonterminalCanBeNonEmpty[p.nonterminalID] = true; 356 changed = true; 357 } 358 } 359 } 360 while (changed); 361 } 362 363 struct FirstSetsKey 364 { 365 NonterminalID nonterminalID; 366 immutable(Symbol)[] negLookaheads; 367 immutable(TagUsage)[] tags; 368 } 369 370 BitSet!TokenID[FirstSetsKey] firstSetsCache; 371 BitSet!TokenID firstSetImpl(const(Symbol) x, 372 immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags) 373 { 374 if (x.isToken) 375 { 376 BitSet!TokenID result; 377 result.length = tokens.vals.length; 378 if (!negLookaheads.canFind(x)) 379 { 380 result[x.toTokenID] = true; 381 } 382 return result; 383 } 384 385 if (FirstSetsKey(x.toNonterminalID, negLookaheads, tags) in firstSetsCache) 386 return firstSetsCache[FirstSetsKey(x.toNonterminalID, negLookaheads, tags)]; 387 388 BitSet!TokenID result; 389 result.length = tokens.vals.length; 390 391 firstSetsCache[FirstSetsKey(x.toNonterminalID, negLookaheads, tags)] = result; 392 393 foreach (p; getProductions(x.toNonterminalID)) 394 { 395 if (!isProductionAllowed(NonterminalWithConstraint(x.toNonterminalID, 396 Constraint(negLookaheads, tags)), p)) 397 continue; 398 if (p.symbols.length) 399 { 400 const(SymbolInstance)[] symbols = p.symbols[]; 401 BitSet!TokenID f; 402 f.length = tokens.vals.length; 403 bool lastEmpty = true; 404 bool first = true; 405 do 406 { 407 f[tokens.getID("$end")] = false; 408 409 auto next = nextSymbolWithConstraint(Constraint(negLookaheads, 410 tags), symbols[0], first); 411 412 auto inner = firstSetImpl(symbols[0], 413 next.constraint.negLookaheads, next.constraint.tags); 414 if (lastEmpty) 415 f |= inner; 416 lastEmpty = lastEmpty && inner[tokens.getID("$end")]; 417 symbols = symbols[1 .. $]; 418 first = false; 419 } 420 while (symbols.length && lastEmpty); 421 result |= f; 422 } 423 else 424 { 425 result[tokens.getID("$end")] = true; 426 } 427 } 428 429 firstSetsCache[FirstSetsKey(x.toNonterminalID, negLookaheads, tags)] = result; 430 return result; 431 } 432 433 BitSet!TokenID firstSet(const SymbolInstance[] symbols, 434 Constraint extraConstraint = Constraint.init, bool isFirst = false) 435 { 436 BitSet!TokenID r; 437 r.length = tokens.vals.length; 438 r[tokens.getID("$end")] = true; 439 const(SymbolInstance)[] x = symbols; 440 441 bool lastEmpty = true; 442 while (x.length && lastEmpty) 443 { 444 BitSet!NonterminalID done; 445 done.length = nonterminals.vals.length; 446 447 r[tokens.getID("$end")] = false; 448 449 if (x[0].isToken) 450 { 451 if (lastEmpty) 452 { 453 r[x[0].toTokenID] = true; 454 } 455 456 lastEmpty = false; 457 } 458 else 459 { 460 auto next = nextNonterminalWithConstraint(extraConstraint, x[0], isFirst); 461 const BitSet!TokenID nextFirstSet = firstSetImpl(next.nonterminalID, 462 next.constraint.negLookaheads, next.constraint.tags); 463 if (lastEmpty) 464 { 465 foreach (t; nextFirstSet.bitsSet) 466 r[t] = true; 467 lastEmpty = nextFirstSet[tokens.getID("$end")]; 468 } 469 } 470 471 x = x[1 .. $]; 472 isFirst = false; 473 } 474 475 return r; 476 } 477 478 bool firstSetContains(const SymbolInstance[] symbols, TokenID t) 479 { 480 const(SymbolInstance)[] x = symbols; 481 482 while (x.length) 483 { 484 if (x[0].isToken) 485 { 486 return t == x[0].toTokenID; 487 } 488 else 489 { 490 const BitSet!TokenID nextFirstSet = firstSetImpl(x[0].toNonterminalID, 491 x[0].negLookaheads, x[0].tags); 492 if (nextFirstSet[t]) 493 return true; 494 if (!nextFirstSet[tokens.getID("$end")]) 495 return false; 496 } 497 498 x = x[1 .. $]; 499 } 500 501 return false; 502 } 503 504 BitSet!NonterminalID[FirstSetsKey] firstSetsNonterminalCache; 505 private BitSet!NonterminalID firstSetNonterminal(const(NonterminalID) x, 506 immutable(Symbol)[] negLookaheads) 507 { 508 if (FirstSetsKey(x.toNonterminalID, negLookaheads) in firstSetsNonterminalCache) 509 return firstSetsNonterminalCache[FirstSetsKey(x.toNonterminalID, negLookaheads)]; 510 511 BitSet!NonterminalID result; 512 result.length = nonterminals.vals.length; 513 514 firstSetsNonterminalCache[FirstSetsKey(x, negLookaheads)] = result; 515 516 foreach (p; getProductions(x.toNonterminalID)) 517 { 518 if (p.symbols.length) 519 { 520 immutable(Symbol)[] nextNegLookaheads = negLookaheads; 521 const(SymbolInstance)[] symbols = p.symbols[]; 522 if (negLookaheads.canFind(symbols[0].symbol)) 523 continue; 524 if (symbols[0].isToken) 525 continue; 526 527 nextNegLookaheads.addOnce(symbols[0].negLookaheads); 528 result |= firstSetNonterminal(symbols[0].toNonterminalID, nextNegLookaheads); 529 } 530 } 531 532 firstSetsNonterminalCache[FirstSetsKey(x, negLookaheads)] = result; 533 return result; 534 } 535 536 bool[NonterminalID][TokenID] hasExactTokenCache; 537 bool hasExactToken(Symbol symbol, TokenID currentToken) 538 { 539 if (symbol.isToken) 540 return symbol.toTokenID == currentToken; 541 542 bool[NonterminalID]* hasExactTokenCacheHere = currentToken in hasExactTokenCache; 543 if (hasExactTokenCacheHere is null) 544 { 545 hasExactTokenCache[currentToken] = null; 546 hasExactTokenCacheHere = currentToken in hasExactTokenCache; 547 } 548 if (symbol.toNonterminalID in *hasExactTokenCacheHere) 549 return (*hasExactTokenCacheHere)[symbol.toNonterminalID]; 550 551 (*hasExactTokenCacheHere)[symbol.toNonterminalID] = false; 552 553 bool r; 554 555 foreach (p; getProductions(symbol.toNonterminalID)) 556 { 557 bool nextExactMatch = false; 558 bool beforeToken = true; 559 foreach (pos; 0 .. p.symbols.length) 560 { 561 bool exactMatch2; 562 if (beforeToken) 563 { 564 exactMatch2 = hasExactToken(p.symbols[pos], currentToken); 565 } 566 if (!canBeEmpty(p.symbols[pos])) 567 { 568 nextExactMatch = false; 569 beforeToken = false; 570 } 571 if (exactMatch2) 572 nextExactMatch = true; 573 } 574 if (nextExactMatch) 575 r = true; 576 } 577 578 (*hasExactTokenCacheHere)[symbol.toNonterminalID] = r; 579 return r; 580 } 581 582 void calcNonterminalTypes() 583 { 584 BitSet!NonterminalID done; 585 done.length = nonterminals.vals.length; 586 Tuple!(NonterminalFlags, SymbolID[]) handleNonterminal(NonterminalID n, 587 ref TagID[] possibleTags) 588 { 589 if (done[n]) 590 return tuple!(NonterminalFlags, SymbolID[])(NonterminalFlags.none, null); 591 done[n] = true; 592 scope (success) 593 done[n] = false; 594 NonterminalFlags r; 595 SymbolID[] buildNonterminals; 596 foreach (p; getProductions(n)) 597 { 598 foreach (tag; p.tags) 599 possibleTags.addOnce(tag); 600 foreach (s; p.symbols) 601 foreach (tagUsage; s.tags) 602 if (tagUsage.inherit && !tagUsage.reject) 603 possibleTags.addOnce(tagUsage.tag); 604 if (nonterminals[n].annotations.contains!"array"()) 605 { 606 r |= NonterminalFlags.array; 607 if (p.symbols.length == 0) 608 r |= NonterminalFlags.empty; 609 else 610 { 611 foreach (s; p.symbols) 612 { 613 if (s.dropNode) 614 continue; 615 if (s.isToken) 616 r |= NonterminalFlags.arrayOfString; 617 else 618 { 619 TagID[] unusedPossibleTags; 620 auto x = handleNonterminal(s.toNonterminalID, unusedPossibleTags); 621 r |= (x[0] & NonterminalFlags.anyArray); 622 if (x[0] & NonterminalFlags.nonterminal) 623 r |= NonterminalFlags.arrayOfNonterminal; 624 if (x[0] & NonterminalFlags..string) 625 r |= NonterminalFlags.arrayOfString; 626 buildNonterminals.addOnce(x[1]); 627 } 628 } 629 } 630 } 631 else 632 { 633 if (nonterminals[n].annotations.contains!"string"()) 634 r |= NonterminalFlags..string; 635 636 if (p.symbols.length == 0) 637 r |= NonterminalFlags.empty; 638 else if (isSimpleProduction(*p)) 639 { 640 foreach (s; p.symbols) 641 { 642 if (s.dropNode) 643 continue; 644 if (s.isToken) 645 r |= NonterminalFlags..string; 646 else 647 { 648 auto x = handleNonterminal(s.toNonterminalID, possibleTags); 649 r |= x[0]; 650 buildNonterminals.addOnce(x[1]); 651 } 652 } 653 } 654 else if (nonterminals[n].annotations.contains!"string"()) 655 r |= NonterminalFlags..string; 656 else 657 { 658 r |= NonterminalFlags.nonterminal; 659 buildNonterminals.addOnce(n.id); 660 } 661 } 662 } 663 return tuple!(NonterminalFlags, SymbolID[])(r, buildNonterminals); 664 } 665 666 foreach (n; nonterminals.allIDs) 667 { 668 TagID[] possibleTags; 669 auto r = handleNonterminal(n, possibleTags); 670 if (r[0] & NonterminalFlags.anyArray) 671 enforce(!(r[0] & NonterminalFlags.anySingle)); 672 nonterminals[n].flags = r[0]; 673 r[1].sort(); 674 nonterminals[n].buildNonterminals = r[1].idup; 675 possibleTags.sort(); 676 nonterminals[n].possibleTags = possibleTags.idup; 677 } 678 } 679 680 const(RewriteRule)[] getRewriteRules(const Production* p) const 681 { 682 if (p.rewriteRules.length > 0) 683 return p.rewriteRules; 684 RewriteRule r = RewriteRule(p); 685 foreach (i, s; p.symbols) 686 { 687 if (!s.dropNode) 688 { 689 r.parameters ~= i; 690 } 691 } 692 return [r]; 693 } 694 695 bool hasNonTrivialRewriteRule(const Production* p) const 696 { 697 if (p.rewriteRules.length == 0) 698 { 699 foreach (i, s; p.symbols) 700 { 701 if (s.dropNode) 702 return true; 703 } 704 return false; 705 } 706 if (p.rewriteRules.length > 1) 707 return true; 708 if (p.symbols.length == p.rewriteRules[0].parameters.length) 709 { 710 foreach (i; 0 .. p.symbols.length) 711 if (p.rewriteRules[0].parameters[i] != i) 712 return true; 713 return false; 714 } 715 return true; 716 } 717 718 bool canForward(const Production* p, size_t symbolNr) 719 { 720 foreach (i2; 0 .. p.symbols.length) 721 { 722 if (i2 == symbolNr) 723 continue; 724 if (p.symbols[i2].isToken) 725 return false; 726 if (!canBeEmpty(p.symbols[i2])) 727 return false; 728 } 729 return true; 730 } 731 732 bool canForwardPrefix(const Production* p, size_t symbolNr) 733 { 734 foreach (i2; 0 .. symbolNr) 735 { 736 if (i2 == symbolNr) 737 continue; 738 if (p.symbols[i2].isToken) 739 return false; 740 if (!canBeEmpty(p.symbols[i2])) 741 return false; 742 } 743 return true; 744 } 745 746 bool isSimpleProduction(const Production production) const 747 { 748 static Appender!(size_t[]) notDropped; 749 notDropped.clear(); 750 foreach (i, s; production.symbols) 751 if (!s.dropNode) 752 notDropped.put(i); 753 754 return (notDropped.data.length == 1 755 && production.symbols[notDropped.data[0]].unwrapProduction) 756 || (production.isVirtual && production.symbols.length == 1 757 && !production.symbols[0].isToken 758 && !nonterminals[production.nonterminalID].annotations.contains!"array") 759 || (production.isVirtual && production.symbols.length == 0); 760 } 761 762 bool isDirectUnwrapProduction(const Production production) const 763 { 764 if (!nonterminals[production.nonterminalID].annotations.contains!"directUnwrap"() 765 && !nonterminals[production.nonterminalID].name.endsWith("?")) 766 return false; 767 return (!production.isVirtual && production.symbols.length == 1 768 && production.symbols[0].unwrapProduction && !production.symbols[0].isToken) 769 || (production.isVirtual && production.symbols.length == 1 770 && !production.symbols[0].isToken 771 && !nonterminals[production.nonterminalID].annotations.contains!"array"); 772 } 773 774 immutable(NonterminalWithConstraint)[][NonterminalWithConstraint] directUnwrapClosureCache; 775 immutable(NonterminalWithConstraint)[] directUnwrapClosure(NonterminalID s, 776 immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags) 777 { 778 if (NonterminalWithConstraint(s, Constraint(negLookaheads, tags)) in directUnwrapClosureCache) 779 { 780 auto r = directUnwrapClosureCache[NonterminalWithConstraint(s, 781 Constraint(negLookaheads, tags))]; 782 enforce(r.length, getSymbolName(s)); 783 return r; 784 } 785 786 if (getProductions(s).length == 0) 787 { 788 return []; 789 } 790 791 if (negLookaheads.canFind(s)) 792 { 793 return []; 794 } 795 796 foreach (t; tags) 797 { 798 if (t.needed && !nonterminals[s].possibleTags.canFind(t.tag)) 799 return []; 800 } 801 802 directUnwrapClosureCache[NonterminalWithConstraint(s, Constraint(negLookaheads, tags))] = [ 803 ]; 804 805 NonterminalWithConstraint[] r; 806 bool needsNonterminal; 807 foreach (p; getProductions(s)) 808 { 809 if (!isDirectUnwrapProduction(*p)) 810 needsNonterminal = true; 811 } 812 void addNonterminal(NonterminalWithConstraint n) 813 { 814 foreach (t; n.constraint.tags) 815 { 816 if (t.needed && !nonterminals[n.nonterminalID].possibleTags.canFind(t.tag)) 817 return; 818 } 819 820 foreach (i, ref m2; r) 821 { 822 if (m2.nonterminalID == n.nonterminalID) 823 { 824 m2.constraint = orConstraint(m2.constraint, n.constraint); 825 return; 826 } 827 } 828 r ~= n; 829 } 830 831 if (needsNonterminal) 832 addNonterminal(NonterminalWithConstraint(s, Constraint(negLookaheads, tags))); 833 834 foreach (p; getProductions(s)) 835 { 836 if (isDirectUnwrapProduction(*p)) 837 { 838 if (negLookaheads.canFind(p.symbols[0])) 839 continue; 840 immutable(Symbol)[] nextNegLookaheads = negLookaheads; 841 nextNegLookaheads.addOnce(p.symbols[0].negLookaheads); 842 auto nextTags = tags; 843 if (p.symbols[0].annotations.contains!"excludeDirectUnwrap") 844 { 845 if (directUnwrapClosureHasSelf(p.symbols[0].symbol.toNonterminalID, 846 nextNegLookaheads, nextTags)) 847 addNonterminal(NonterminalWithConstraint(p.symbols[0].symbol.toNonterminalID, 848 Constraint(nextNegLookaheads, nextTags), 849 p.symbols[0].annotations.contains!"lookahead")); 850 } 851 else 852 { 853 foreach (n; directUnwrapClosure(p.symbols[0].symbol.toNonterminalID, 854 nextNegLookaheads, nextTags)) 855 addNonterminal(NonterminalWithConstraint(n.nonterminalID, n.constraint, 856 n.hasLookaheadAnnotation 857 || p.symbols[0].annotations.contains!"lookahead")); 858 } 859 } 860 } 861 862 enforce(r.length, getSymbolName(s)); 863 864 auto r2 = r.idup; 865 866 directUnwrapClosureCache[NonterminalWithConstraint(s, Constraint(negLookaheads, tags))] = r2; 867 868 return r2; 869 } 870 871 immutable(NonterminalWithConstraint)[] directUnwrapClosure(const SymbolInstance s) 872 { 873 return directUnwrapClosure(s.toNonterminalID, s.negLookaheads, s.tags); 874 } 875 876 immutable(NonterminalWithConstraint)[] directUnwrapClosure(NonterminalWithConstraint n) 877 { 878 return directUnwrapClosure(n.nonterminalID, n.constraint.negLookaheads, n.constraint.tags); 879 } 880 881 immutable(Symbol)[][NonterminalID][NonterminalWithConstraint] directUnwrapClosureMapCache; 882 immutable(Symbol)[][NonterminalID] directUnwrapClosureMap(NonterminalID s, 883 immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags) 884 { 885 if (NonterminalWithConstraint(s, Constraint(negLookaheads, 886 tags)) in directUnwrapClosureMapCache) 887 { 888 return directUnwrapClosureMapCache[NonterminalWithConstraint(s, 889 Constraint(negLookaheads, tags))]; 890 } 891 892 auto r = directUnwrapClosure(s, negLookaheads, tags); 893 if (r.length == 0) 894 return null; 895 896 immutable(Symbol)[][NonterminalID] n; 897 foreach (x; r) 898 n[x.nonterminalID] = x.constraint.negLookaheads; 899 900 directUnwrapClosureMapCache[NonterminalWithConstraint(s, Constraint(negLookaheads, tags))] = n; 901 902 return n; 903 } 904 905 immutable(Symbol)[][NonterminalID] directUnwrapClosureMap(NonterminalWithConstraint n) 906 { 907 return directUnwrapClosureMap(n.nonterminalID, 908 n.constraint.negLookaheads, n.constraint.tags); 909 } 910 911 bool directUnwrapClosureHasSelf(NonterminalID s, 912 immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags) 913 { 914 if (negLookaheads.canFind(s)) 915 return false; 916 917 bool needsNonterminal; 918 foreach (p; getProductions(s)) 919 { 920 if (!isProductionAllowed(NonterminalWithConstraint(s, 921 Constraint(negLookaheads, tags)), p)) 922 continue; 923 if (!isDirectUnwrapProduction(*p)) 924 needsNonterminal = true; 925 } 926 return needsNonterminal; 927 } 928 929 bool directUnwrapClosureHasSelf(NonterminalWithConstraint n) 930 { 931 return directUnwrapClosureHasSelf(n.nonterminalID, 932 n.constraint.negLookaheads, n.constraint.tags); 933 } 934 935 bool isProductionAllowed(NonterminalWithConstraint n, const Production* p) 936 { 937 assert(p.nonterminalID == n.nonterminalID); 938 if (p.symbols.length && n.constraint.negLookaheads.canFind(p.symbols[0])) 939 return false; 940 foreach (t; n.constraint.tags) 941 { 942 if (t.reject && p.tags.canFind(t.tag)) 943 return false; 944 if (t.needed && !nonterminals[n.nonterminalID].possibleTags.canFind(t.tag)) 945 return false; 946 } 947 return true; 948 } 949 950 SymbolWithConstraint nextSymbolWithConstraint(Constraint c, SymbolInstance s, bool first) 951 { 952 immutable(Symbol)[] nextNegLookaheads; 953 if (first) 954 nextNegLookaheads = c.negLookaheads; 955 nextNegLookaheads.addOnce(s.negLookaheads); 956 957 TagUsage[] nextTags; 958 ref TagUsage findTag(TagID tag) 959 { 960 foreach (ref t; nextTags) 961 if (t.tag == tag) 962 return t; 963 nextTags ~= TagUsage(tag); 964 return nextTags[$ - 1]; 965 } 966 967 if (s.unwrapProduction || s.annotations.contains!"inheritAnyTag") 968 { 969 foreach (t; c.tags) 970 { 971 if (t.reject) 972 findTag(t.tag).reject = true; 973 if (t.needed) 974 findTag(t.tag).needed = true; 975 } 976 } 977 foreach (t; s.tags) 978 { 979 if (t.reject) 980 findTag(t.tag).reject = true; 981 if (t.needed) 982 findTag(t.tag).needed = true; 983 if (t.inherit) 984 { 985 foreach (t2; c.tags) 986 { 987 if (t2.tag == t.tag) 988 { 989 if (t2.reject) 990 findTag(t.tag).reject = true; 991 if (t2.needed) 992 findTag(t.tag).needed = true; 993 } 994 } 995 } 996 } 997 nextTags.sort(); 998 999 return SymbolWithConstraint(s, Constraint(nextNegLookaheads, nextTags.idup, c.disabled)); 1000 } 1001 1002 NonterminalWithConstraint nextNonterminalWithConstraint(Constraint c, 1003 SymbolInstance s, bool first) 1004 in (!s.isToken) 1005 { 1006 auto r = nextSymbolWithConstraint(c, s, first); 1007 return NonterminalWithConstraint(r.symbol.toNonterminalID, r.constraint); 1008 } 1009 1010 Constraint orConstraint(Constraint a, Constraint b) 1011 { 1012 assert(tagsSorted(a.tags)); 1013 assert(tagsSorted(b.tags)); 1014 1015 if (a.disabled) 1016 return b; 1017 if (b.disabled) 1018 return a; 1019 1020 immutable(Symbol)[] newNegLookahead; 1021 foreach (l; a.negLookaheads) 1022 if (b.negLookaheads.canFind(l)) 1023 newNegLookahead ~= l; 1024 1025 immutable(TagUsage)[] nextTags; 1026 immutable(TagUsage)[] aTags = a.tags; 1027 immutable(TagUsage)[] bTags = b.tags; 1028 while (aTags.length && bTags.length) 1029 { 1030 if (aTags.length >= 2) 1031 assert(aTags[0].tag < aTags[1].tag); 1032 if (bTags.length >= 2) 1033 assert(bTags[0].tag < bTags[1].tag); 1034 1035 if (aTags[0].tag < bTags[0].tag) 1036 aTags = aTags[1 .. $]; 1037 else if (bTags[0].tag < aTags[0].tag) 1038 bTags = bTags[1 .. $]; 1039 else 1040 { 1041 TagUsage t; 1042 t.tag = aTags[0].tag; 1043 if (aTags[0].reject && bTags[0].reject) 1044 t.reject = true; 1045 if (aTags[0].needed && bTags[0].needed) 1046 t.needed = true; 1047 if (t.reject || t.needed) 1048 nextTags ~= t; 1049 1050 aTags = aTags[1 .. $]; 1051 bTags = bTags[1 .. $]; 1052 } 1053 } 1054 1055 return Constraint(newNegLookahead, nextTags, a.disabled && b.disabled); 1056 } 1057 } 1058 1059 SymbolInfo[string] generateSymbolInfos(EBNF ebnf) 1060 { 1061 SymbolInfo[string] symbolInfos; 1062 1063 SymbolInfo info(string name) 1064 { 1065 auto s = name in symbolInfos; 1066 if (s is null) 1067 { 1068 symbolInfos[name] = new SymbolInfo(); 1069 s = name in symbolInfos; 1070 s.name = name; 1071 } 1072 assert(s !is null); 1073 assert(s.name == name); 1074 return *s; 1075 } 1076 1077 void visitExprs(SymbolInfo parent, const Tree expr, const string[] excludeNames, 1078 bool delegate(SymbolInfo parent, const Tree childExpr, SymbolInfo child) dg) 1079 { 1080 if (expr is null || expr.isToken) 1081 return; 1082 1083 if (expr.nonterminalID.among(nonterminalIDFor!"Token", 1084 nonterminalIDFor!"Name", nonterminalIDFor!"MacroInstance")) 1085 { 1086 foreach (excludeName; excludeNames) 1087 { 1088 if (expr.childs[0].content == excludeName) 1089 return; 1090 } 1091 1092 auto s = info(expr.childs[0].content); 1093 bool changed = dg(parent, expr, s); 1094 1095 if (changed) 1096 { 1097 foreach (d; s.declarations) 1098 visitExprs(s, d.exprTree, d.parameters, dg); 1099 } 1100 } 1101 if (expr.nonterminalID == nonterminalIDFor!"AnnotatedExpression") 1102 { 1103 visitExprs(parent, expr.childs[$ - 1], excludeNames, dg); 1104 } 1105 else if (expr.nonterminalID == nonterminalIDFor!"Concatenation") 1106 { 1107 foreach (c; expr.childs[0 .. $ - 1]) 1108 { 1109 visitExprs(parent, c, excludeNames, dg); 1110 } 1111 } 1112 else 1113 { 1114 foreach (c; expr.childs) 1115 { 1116 visitExprs(parent, c, excludeNames, dg); 1117 } 1118 } 1119 } 1120 1121 foreach (i, d; ebnf.symbols) 1122 { 1123 auto s = info(d.name); 1124 s.declarations ~= d; 1125 } 1126 1127 foreach (i, d; ebnf.symbols) 1128 { 1129 auto s = info(d.name); 1130 visitExprs(s, d.exprTree, d.parameters, (parent, childExpr, child) { 1131 if (childExpr.nonterminalID.among(nonterminalIDFor!"Name", 1132 nonterminalIDFor!"MacroInstance") && child.declarations.length == 0) 1133 { 1134 stderr.writeln("Warning: Using undefined symbol ", child.name); 1135 } 1136 return false; 1137 }); 1138 } 1139 1140 foreach (i, d; ebnf.symbols) 1141 { 1142 auto s = info(d.name); 1143 if (d.type == DeclarationType.token) 1144 { 1145 s.isToken = true; 1146 s.reachableFromToken = true; 1147 } 1148 if (d.annotations.canFind("ignoreToken")) 1149 { 1150 enforce(d.type == DeclarationType.token); 1151 s.isToken = true; 1152 s.isIgnoreToken = true; 1153 s.reachableFromToken = true; 1154 } 1155 visitExprs(s, d.exprTree, d.parameters, (parent, childExpr, child) { 1156 if (parent.reachableFromToken && !child.reachableFromToken) 1157 { 1158 child.reachableFromToken = true; 1159 return true; 1160 } 1161 return false; 1162 }); 1163 } 1164 1165 foreach (i, d; ebnf.symbols) 1166 { 1167 if (i != 0 && !d.annotations.canFind("start") && !d.annotations.canFind("ignoreToken")) 1168 continue; 1169 auto s = info(d.name); 1170 s.reachableFromStart = true; 1171 foreach (d2; s.declarations) 1172 visitExprs(s, d2.exprTree, d2.parameters, (parent, childExpr, child) { 1173 if (parent.reachableFromStart && !child.reachableFromStart) 1174 { 1175 child.reachableFromStart = true; 1176 return true; 1177 } 1178 return false; 1179 }); 1180 } 1181 1182 foreach (i, d; ebnf.symbols) 1183 { 1184 if (i != 0 && !d.annotations.canFind("start")) 1185 continue; 1186 auto s = info(d.name); 1187 if (!s.isToken) 1188 s.reachableFromStartNoToken = true; 1189 foreach (d2; s.declarations) 1190 visitExprs(s, d2.exprTree, d2.parameters, (parent, childExpr, child) { 1191 if (parent.reachableFromStartNoToken 1192 && !child.reachableFromStartNoToken && !child.isToken) 1193 { 1194 child.reachableFromStartNoToken = true; 1195 return true; 1196 } 1197 return false; 1198 }); 1199 } 1200 1201 foreach (i, d; ebnf.symbols) 1202 { 1203 auto s = info(d.name); 1204 if (!s.reachableFromStart) 1205 { 1206 stderr.writeln("Warning: Unused symbol ", d.name); 1207 } 1208 else if (d.type == DeclarationType.fragment && d.parameters.length == 0 1209 && s.reachableFromStartNoToken) 1210 { 1211 stderr.writeln("Warning: Fragment ", d.name, " used in nonterminal"); 1212 } 1213 else if (d.type == DeclarationType.auto_ && d.parameters.length == 0 && s.reachableFromToken) 1214 { 1215 stderr.writeln("Warning: Nonterminal ", d.name, " used in token"); 1216 } 1217 } 1218 1219 return symbolInfos; 1220 } 1221 1222 Symbol getSymbolByName(EBNFGrammar grammar, string name) 1223 { 1224 Symbol ls; 1225 if (name.startsWith("\"") || (grammar.allowTokenNonterminals 1226 && grammar.symbolInfos[name].isToken)) 1227 { 1228 if (name !in grammar.symbolInfos || !grammar.symbolInfos[name].isIgnoreToken) 1229 ls = grammar.tokens.id(name); 1230 else 1231 throw new Exception("IgnoreTokens cannot be used"); 1232 } 1233 else 1234 ls = grammar.nonterminals.id(name); 1235 return ls; 1236 } 1237 1238 void addAnnotation(ref Annotations annotations, Tree tree) 1239 in (tree.nonterminalID == nonterminalIDFor!"Annotation") 1240 { 1241 assert(tree.childs.length == 3); 1242 assert(tree.childs[0].content == "@"); 1243 string name = tree.childs[1].content; 1244 if (tree.childs[2] !is null) 1245 { 1246 name ~= "(" ~ concatTree(tree.childs[2].childs[1]).strip() ~ ")"; 1247 } 1248 annotations.add(name); 1249 } 1250 1251 SymbolInstance createSymbol(EBNFGrammar grammar, Tree tree, EBNF ebnf) 1252 { 1253 SymbolInstance symbol; 1254 1255 if (tree.nonterminalID == nonterminalIDFor!"AnnotatedExpression") 1256 { 1257 if (tree.childs[1] !is null) 1258 symbol.symbolInstanceName = tree.childs[1].childs[0].content; 1259 foreach (c; tree.childs[2].memberOrDefault!"childs") 1260 { 1261 if (c.childs[0].content == "<") 1262 symbol.unwrapProduction = true; 1263 else if (c.childs[0].content == "^") 1264 symbol.dropNode = true; 1265 else 1266 assert(false, c.childs[0].content); 1267 } 1268 } 1269 Tree realTree = tree; 1270 while (realTree.nonterminalID == nonterminalIDFor!"AnnotatedExpression") 1271 { 1272 foreach (c; realTree.childs[0].memberOrDefault!"childs") 1273 { 1274 if (c.nonterminalID == nonterminalIDFor!"Annotation") 1275 addAnnotation(symbol.annotations, c); 1276 else if (c.nonterminalID == nonterminalIDFor!"NegativeLookahead") 1277 { 1278 assert(c.childs.length == 2); 1279 assert(c.childs[0].content == "!"); 1280 if (c.childs[1].nodeType == NodeType.nonterminal 1281 && c.childs[1].nonterminalID.among(nonterminalIDFor!"Token", 1282 nonterminalIDFor!"Name")) 1283 { 1284 assert(c.childs[1].childs.length == 1); 1285 symbol.negLookaheads ~= getSymbolByName(grammar, c.childs[1].childs[0].content); 1286 } 1287 else if (c.childs[1].nodeType == NodeType.token && c.childs[1].content == "anytoken") 1288 { 1289 throw new Exception("anytoken not implemented here"); 1290 } 1291 else 1292 assert(false, c.childs[1].name); 1293 } 1294 else 1295 assert(false, c.name); 1296 } 1297 realTree = realTree.childs[$ - 1]; 1298 } 1299 1300 TagUsage[] tags; 1301 foreach (a; symbol.annotations.otherAnnotations) 1302 { 1303 ref TagUsage findTag(TagID tag) 1304 { 1305 foreach (ref t; tags) 1306 if (t.tag == tag) 1307 return t; 1308 tags ~= TagUsage(tag); 1309 return tags[$ - 1]; 1310 } 1311 1312 if (a.startsWith("inheritTag(")) 1313 { 1314 assert(a.endsWith(")")); 1315 foreach (t; a[11 .. $ - 1].split(", ")) 1316 { 1317 t = t.strip(); 1318 findTag(grammar.tags.id(t)).inherit = true; 1319 } 1320 } 1321 else if (a.startsWith("needTag(")) 1322 { 1323 assert(a.endsWith(")")); 1324 foreach (t; a[8 .. $ - 1].split(", ")) 1325 { 1326 t = t.strip(); 1327 findTag(grammar.tags.id(t)).needed = true; 1328 } 1329 } 1330 else if (a.startsWith("rejectTag(")) 1331 { 1332 assert(a.endsWith(")")); 1333 foreach (t; a[10 .. $ - 1].split(", ")) 1334 { 1335 t = t.strip(); 1336 findTag(grammar.tags.id(t)).reject = true; 1337 } 1338 } 1339 } 1340 tags.sort(); 1341 symbol.tags = tags.idup; 1342 1343 string name = ebnfTreeToString(realTree); 1344 1345 if (symbol.annotations.contains!"regArray" && realTree.nonterminalID.among( 1346 nonterminalIDFor!"Repetition", nonterminalIDFor!"RepetitionPlus")) 1347 name = "@regArray " ~ name; 1348 1349 if (realTree.nonterminalID != nonterminalIDFor!"MacroInstance") 1350 name = name.replace(" ", "_"); 1351 1352 bool alreadyDone = (name in grammar.nonterminals.ids) !is null; 1353 if (alreadyDone && realTree.nonterminalID != nonterminalIDFor!"Token" 1354 && realTree.nonterminalID != nonterminalIDFor!"SubToken") 1355 { 1356 symbol.symbol = grammar.nonterminals.id(name); 1357 return symbol; 1358 } 1359 1360 if (realTree.nonterminalID == nonterminalIDFor!"Token") 1361 { 1362 symbol.symbol = grammar.tokens.id(realTree.childs[0].content); 1363 grammar.tokens[symbol.toTokenID].annotations.add(symbol.annotations); 1364 } 1365 else if (realTree.nonterminalID == nonterminalIDFor!"Name") 1366 { 1367 if (grammar.allowTokenNonterminals 1368 && grammar.symbolInfos[realTree.childs[0].content].isToken) 1369 { 1370 if (!grammar.symbolInfos[realTree.childs[0].content].isIgnoreToken) 1371 symbol.symbol = grammar.tokens.id(realTree.childs[0].content); 1372 else 1373 throw new Exception("IgnoreTokens cannot be used"); 1374 } 1375 else 1376 symbol.symbol = grammar.nonterminals.id(realTree.childs[0].content); 1377 } 1378 else if (realTree.nonterminalID == nonterminalIDFor!"Optional") 1379 { 1380 assert(realTree.childs.length == 2); 1381 auto innerSymbol = createSymbol(grammar, realTree.childs[0], ebnf); 1382 1383 Production* production = new Production; 1384 production.nonterminalID = grammar.nonterminals.id(name); 1385 production.isVirtual = true; 1386 innerSymbol.unwrapProduction = true; 1387 production.symbols = [innerSymbol]; 1388 grammar.addProduction(production); 1389 1390 Production* production2 = new Production; 1391 production2.nonterminalID = grammar.nonterminals.id(name); 1392 production2.isVirtual = true; 1393 production2.symbols = []; 1394 grammar.addProduction(production2); 1395 1396 symbol.symbol = grammar.nonterminals.id(name); 1397 } 1398 else if (realTree.nonterminalID == nonterminalIDFor!"Repetition") 1399 { 1400 assert(realTree.childs.length == 2); 1401 /* Repetition reuses RepetitionPlus, so parser conflicts are avoided. */ 1402 Tree tree2 = realTree; 1403 tree2 = new TreeNonterminal(nonterminalIDFor!"RepetitionPlus", ProductionID.max, 1404 [tree2.childs[0], new TreeToken("+", &grammarInfo)], &grammarInfo); 1405 auto innerSymbol = createSymbol(grammar, tree2, ebnf); 1406 1407 Production* production = new Production; 1408 production.nonterminalID = grammar.nonterminals.id(name); 1409 production.isVirtual = true; 1410 grammar.addProduction(production); 1411 1412 Production* production2 = new Production; 1413 production2.nonterminalID = grammar.nonterminals.id(name); 1414 production2.isVirtual = true; 1415 production2.symbols = [innerSymbol]; 1416 grammar.addProduction(production2); 1417 1418 grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("array"); 1419 if (symbol.annotations.contains!"regArray") 1420 grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("regArray"); 1421 symbol.symbol = grammar.nonterminals.id(name); 1422 } 1423 else if (realTree.nonterminalID == nonterminalIDFor!"RepetitionPlus") 1424 { 1425 assert(realTree.childs.length == 2); 1426 auto innerSymbol = createSymbol(grammar, realTree.childs[0], ebnf); 1427 1428 Production* production = new Production; 1429 production.nonterminalID = grammar.nonterminals.id(name); 1430 production.isVirtual = true; 1431 production.symbols = [innerSymbol]; 1432 grammar.addProduction(production); 1433 1434 Production* production2 = new Production; 1435 production2.nonterminalID = grammar.nonterminals.id(name); 1436 production2.isVirtual = true; 1437 production2.symbols = [ 1438 immutable(SymbolInstance)(grammar.nonterminals.id(name)), innerSymbol 1439 ]; 1440 grammar.addProduction(production2); 1441 1442 grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("array"); 1443 if (symbol.annotations.contains!"regArray") 1444 grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("regArray"); 1445 symbol.symbol = grammar.nonterminals.id(name); 1446 } 1447 else if (realTree.nonterminalID == nonterminalIDFor!"MacroInstance") 1448 { 1449 if (grammar.allowTokenNonterminals 1450 && grammar.symbolInfos[realTree.childs[0].content].isToken) 1451 { 1452 if (!grammar.symbolInfos[realTree.childs[0].content].isIgnoreToken) 1453 symbol.symbol = grammar.tokens.id(name); 1454 else 1455 throw new Exception("IgnoreTokens cannot be used"); 1456 } 1457 else 1458 { 1459 symbol.symbol = grammar.nonterminals.id(name); 1460 1461 Tree[] macroParameters; 1462 foreach (c; realTree.childs[2].memberOrDefault!"childs") 1463 if (!c.isToken) 1464 macroParameters ~= c; 1465 foreach (d; ebnf.symbols) 1466 { 1467 if (d.name == realTree.childs[0].content 1468 && (d.parameters.length == macroParameters.length || (d.variadicParameterIndex != size_t.max 1469 && macroParameters.length >= d.parameters.length - 1))) 1470 { 1471 Tree[string] table; 1472 Tree[] parametersHere = macroParameters; 1473 if (d.variadicParameterIndex != size_t.max) 1474 { 1475 Tree[] parametersBeforeVariadic = macroParameters[0 1476 .. d.variadicParameterIndex]; 1477 size_t numAfterVariadic = d.parameters.length - 1 - d 1478 .variadicParameterIndex; 1479 Tree[] parametersVariadic = macroParameters[d.variadicParameterIndex 1480 .. $ - numAfterVariadic]; 1481 Tree[] parametersAfterVariadic = macroParameters[$ - numAfterVariadic .. $]; 1482 Tree variadicTuple = new TreeNonterminal(nonterminalIDFor!"Tuple", 1483 ProductionID.max, [ 1484 new TreeToken("t(", &grammarInfo), 1485 new TreeArray(parametersVariadic, &grammarInfo), 1486 new TreeToken(")", &grammarInfo) 1487 ], &grammarInfo); 1488 parametersHere = parametersBeforeVariadic 1489 ~ variadicTuple ~ parametersAfterVariadic; 1490 } 1491 foreach (i, c; parametersHere) 1492 { 1493 table[d.parameters[i]] = c; 1494 } 1495 NonterminalID id2 = createGrammar(grammar, name, 1496 replaceNames(table, d.exprTree), ebnf); 1497 1498 grammar.nonterminals[id2].annotations = Annotations(d.annotations); 1499 1500 assert(id2 == symbol, text(realTree.childs[0].content, " ", symbol, " ", id2)); 1501 } 1502 } 1503 } 1504 } 1505 else if (realTree.nonterminalID == nonterminalIDFor!"Concatenation") 1506 { 1507 symbol.symbol = createGrammar(grammar, name, realTree, ebnf); 1508 } 1509 else if (realTree.nonterminalID == nonterminalIDFor!"Alternation") 1510 { 1511 symbol.symbol = createGrammar(grammar, name, realTree, ebnf); 1512 } 1513 else if (realTree.nonterminalID == nonterminalIDFor!"SubToken") 1514 { 1515 symbol = createSymbol(grammar, realTree.childs[0], ebnf); 1516 if (realTree.childs[2].nonterminalID == nonterminalIDFor!"Token") 1517 { 1518 symbol.subToken = realTree.childs[2].childs[0].content; 1519 } 1520 else 1521 throw new Exception("subterminal only for strings implemented"); 1522 } 1523 else if (realTree.nonterminalID == nonterminalIDFor!"TokenMinus") 1524 { 1525 if (grammar.isLexerGrammar) 1526 { 1527 symbol.symbol = grammar.nonterminals.id("$tokenminus" ~ name); 1528 1529 auto innerSymbol1 = createSymbol(grammar, realTree.childs[0], ebnf); 1530 auto innerSymbol2 = createSymbol(grammar, realTree.childs[2], ebnf); 1531 1532 foreach (p; grammar.productions) 1533 { 1534 if (p.nonterminalID == symbol.toNonterminalID) 1535 { 1536 assert(p.symbols[0] == innerSymbol1); 1537 assert(p.symbols[1] == innerSymbol2); 1538 return symbol; 1539 } 1540 } 1541 1542 Production* production = new Production; 1543 production.nonterminalID = grammar.nonterminals.id("$tokenminus" ~ name); 1544 production.isVirtual = true; 1545 production.symbols = [innerSymbol1, innerSymbol2]; 1546 grammar.addProduction(production); 1547 } 1548 else 1549 { 1550 enforce(false, "Using token minus outside token or fragment: " ~ name); 1551 } 1552 } 1553 else if (realTree.nonterminalID == nonterminalIDFor!"ParenExpression") 1554 { 1555 symbol.symbol = createSymbol(grammar, realTree.childs[1], ebnf); 1556 } 1557 else 1558 enforce(0, text("Unexpected expression ", realTree.name)); 1559 1560 return symbol; 1561 } 1562 1563 NonterminalID createGrammar(EBNFGrammar grammar, string name, Tree tree, EBNF ebnf, 1564 bool isVirtual = false) 1565 { 1566 if (name.length == 0) 1567 { 1568 name = ebnfTreeToString(tree); 1569 } 1570 1571 Production* production = new Production; 1572 production.nonterminalID = grammar.nonterminals.id(name); 1573 production.isVirtual = isVirtual; 1574 1575 while (tree.nonterminalID == nonterminalIDFor!"AnnotatedExpression" 1576 && tree.childs[0].memberOrDefault!"childs".length == 0 1577 && tree.childs[1] is null && tree.childs[2].memberOrDefault!"childs".length == 0) 1578 tree = tree.childs[$ - 1]; 1579 1580 if (tree.name == "Concatenation") 1581 { 1582 assert(tree.nonterminalID == nonterminalIDFor!"Concatenation"); 1583 foreach (c; tree.childs[$ - 1].memberOrDefault!"childs") 1584 { 1585 if (c.nonterminalID == nonterminalIDFor!"Annotation") 1586 addAnnotation(production.annotations, c); 1587 else if (c.nonterminalID == nonterminalIDFor!"NegativeLookahead") 1588 { 1589 assert(c.childs.length == 2); 1590 assert(c.childs[0].content == "!"); 1591 if (c.childs[1].nodeType == NodeType.nonterminal 1592 && c.childs[1].nonterminalID.among(nonterminalIDFor!"Token", 1593 nonterminalIDFor!"Name")) 1594 { 1595 assert(c.childs[1].childs.length == 1); 1596 auto negLookaheadSymbol = getSymbolByName(grammar, 1597 c.childs[1].childs[0].content); 1598 if (!grammar.isLexerGrammar && !negLookaheadSymbol.isToken) 1599 throw new Exception(text("Error: Nonterminal ", c.childs[1].childs[0].content, 1600 " used as negative lookahead for production")); 1601 production.negLookaheads ~= negLookaheadSymbol; 1602 } 1603 else if (c.childs[1].nodeType == NodeType.token && c.childs[1].content == "anytoken") 1604 { 1605 production.negLookaheadsAnytoken = true; 1606 } 1607 else 1608 assert(false, c.childs[1].name); 1609 } 1610 } 1611 1612 TagID[] tags; 1613 foreach (a; production.annotations.otherAnnotations) 1614 { 1615 if (a.startsWith("tag(")) 1616 { 1617 assert(a.endsWith(")")); 1618 foreach (t; a[4 .. $ - 1].split(", ")) 1619 { 1620 t = t.strip(); 1621 tags.addOnce(grammar.tags.id(t)); 1622 } 1623 } 1624 } 1625 tags.sort(); 1626 production.tags = tags.idup; 1627 1628 if (tree.childs.length >= 2) 1629 production.symbols ~= createSymbol(grammar, tree.childs[0], ebnf); 1630 if (tree.childs.length == 3) 1631 foreach (c; tree.childs[1].memberOrDefault!"childs") 1632 { 1633 if (!c.isToken) 1634 production.symbols ~= createSymbol(grammar, c, ebnf); 1635 } 1636 1637 if (production.symbols.length == 0 && !production.annotations.canFind("empty")) 1638 throw new Exception("Empty concatenation missing @empty."); 1639 } 1640 else if (tree.nonterminalID == nonterminalIDFor!"Alternation") 1641 { 1642 foreach (c; tree.childs) 1643 { 1644 if (!c.isToken) 1645 createGrammar(grammar, name, c, ebnf); 1646 } 1647 return grammar.nonterminals.id(name); 1648 } 1649 else if (tree.nonterminalID == nonterminalIDFor!"VariadicList") 1650 { 1651 throw new Exception(text("can't use variadic parameter here ", tree)); 1652 } 1653 else 1654 production.symbols ~= createSymbol(grammar, tree, ebnf); 1655 1656 grammar.addProduction(production); 1657 return grammar.nonterminals.id(name); 1658 } 1659 1660 EBNFGrammar createGrammar(EBNF ebnf) 1661 { 1662 EBNFGrammar grammar = new EBNFGrammar(generateSymbolInfos(ebnf)); 1663 grammar.tokens.id("$end"); 1664 assert(grammar.tokens.getID("$end") == TokenID(0)); 1665 1666 grammar.startTokenID = ebnf.startTokenID; 1667 grammar.startNonterminalID = ebnf.startNonterminalID; 1668 grammar.startProductionID = ebnf.startProductionID; 1669 1670 grammar.allowTokenNonterminals = true; 1671 1672 foreach (i, d; ebnf.symbols) 1673 { 1674 if (d.parameters.length) 1675 continue; 1676 1677 if (!grammar.symbolInfos[d.name].reachableFromStartNoToken) 1678 continue; 1679 1680 NonterminalID id = grammar.nonterminals.id(d.name); 1681 if (i == 0 || d.annotations.canFind("start")) 1682 grammar.startNonterminals ~= StartNonterminal(id); 1683 1684 grammar.nonterminals[grammar.nonterminals.id(d.name)].annotations = Annotations( 1685 d.annotations); 1686 } 1687 foreach (i, d; ebnf.symbols) 1688 { 1689 if (!grammar.symbolInfos[d.name].isToken || grammar.symbolInfos[d.name].isIgnoreToken) 1690 continue; 1691 grammar.tokens.id(d.name); 1692 } 1693 1694 foreach (i, d; ebnf.symbols) 1695 { 1696 if (d.parameters.length) 1697 continue; 1698 1699 if (!grammar.symbolInfos[d.name].reachableFromStartNoToken) 1700 continue; 1701 1702 createGrammar(grammar, d.name, d.exprTree, ebnf); 1703 } 1704 1705 foreach (s; grammar.symbolInfos) 1706 { 1707 if (s.isToken && !s.isIgnoreToken && s.reachableFromStart) 1708 { 1709 auto id = grammar.tokens.id(s.name); 1710 foreach (d; s.declarations) 1711 grammar.tokens[id.toTokenID].annotations.add(d.annotations); 1712 } 1713 } 1714 1715 grammar.calcNonterminalCanBeEmpty(); 1716 1717 grammar.fillProductionsForNonterminal(); 1718 grammar.calcNonterminalTypes(); 1719 1720 foreach (i, m; ebnf.matchingTokens) 1721 { 1722 TokenID t1 = grammar.tokens.id(m[0]); 1723 TokenID t2 = grammar.tokens.id(m[1]); 1724 grammar.matchingTokens ~= [t1, t2]; 1725 } 1726 checkGrammar(grammar); 1727 1728 foreach (t; grammar.tokens.allIDs) 1729 { 1730 if (grammar.tokens[t].annotations.contains!"inContextOnly"()) 1731 grammar.inContextOnlyTokens.addOnce(t); 1732 } 1733 return grammar; 1734 } 1735 1736 void checkGrammar(EBNFGrammar grammar) 1737 { 1738 foreach (p; grammar.productions) 1739 { 1740 if (p is null) 1741 continue; 1742 } 1743 1744 Appender!(NonterminalID[]) path; 1745 bool[NonterminalID] inPath; 1746 void checkCycles2(NonterminalID nonterminalID) 1747 { 1748 if (nonterminalID in inPath) 1749 throw new Exception(text("cycle ", path.data.map!(n => grammar.getSymbolName(n)) 1750 .array, grammar.getSymbolName(nonterminalID))); 1751 foreach (p; grammar.getProductions(nonterminalID)) 1752 { 1753 foreach (i; 0 .. p.symbols.length) 1754 { 1755 if (p.symbols[i].isToken) 1756 continue; 1757 if (!grammar.canForward(p, i)) 1758 continue; 1759 path.put(nonterminalID); 1760 inPath[nonterminalID] = true; 1761 checkCycles2(p.symbols[i].toNonterminalID); 1762 inPath.remove(nonterminalID); 1763 path.shrinkTo(path.data.length - 1); 1764 } 1765 } 1766 } 1767 1768 foreach (i, ref nonterminal; grammar.nonterminals.vals) 1769 { 1770 checkCycles2(NonterminalID(i.to!SymbolID)); 1771 } 1772 1773 void checkCycles3(NonterminalID nonterminalID, bool hasDirectProd) 1774 { 1775 if (nonterminalID in inPath) 1776 { 1777 if (path.data[0] == nonterminalID && hasDirectProd) 1778 throw new Exception(text("cycle3 ", path.data.map!(n => grammar.getSymbolName(n)) 1779 .array, grammar.getSymbolName(nonterminalID))); 1780 else 1781 return; 1782 } 1783 foreach (p; grammar.getProductions(nonterminalID)) 1784 { 1785 foreach (i; 0 .. p.symbols.length) 1786 { 1787 if (p.symbols[i].isToken) 1788 continue; 1789 if (!grammar.canForwardPrefix(p, i)) 1790 continue; 1791 path.put(nonterminalID); 1792 inPath[nonterminalID] = true; 1793 checkCycles3(p.symbols[i].toNonterminalID, hasDirectProd || i > 0); 1794 inPath.remove(nonterminalID); 1795 path.shrinkTo(path.data.length - 1); 1796 } 1797 } 1798 } 1799 1800 foreach (i, ref nonterminal; grammar.nonterminals.vals) 1801 { 1802 checkCycles3(NonterminalID(i.to!SymbolID), false); 1803 } 1804 1805 struct SymbolKey 1806 { 1807 Symbol symbol; 1808 string subToken; 1809 immutable(TagUsage)[] tags; 1810 } 1811 1812 struct ProductionKey 1813 { 1814 NonterminalID nonterminalID; 1815 SymbolKey[] symbols; 1816 } 1817 1818 const(Production)*[ProductionKey] productionsDone; 1819 foreach (p; grammar.productions) 1820 { 1821 auto key = ProductionKey(p.nonterminalID, 1822 p.symbols.map!(s => SymbolKey(s.symbol, s.subToken, s.tags)).array); 1823 if (key in productionsDone) 1824 { 1825 throw new Exception(text("Error: Duplicate production:\n ", 1826 grammar.productionString(p), "\n ", 1827 grammar.productionString(productionsDone[key]))); 1828 } 1829 productionsDone[key] = p; 1830 } 1831 } 1832 1833 EBNFGrammar createLexerGrammar(EBNF ebnf, EBNFGrammar realGrammar) 1834 { 1835 EBNFGrammar lexerGrammar = new EBNFGrammar(realGrammar.symbolInfos); 1836 lexerGrammar.isLexerGrammar = true; 1837 lexerGrammar.tokens.id("$end"); 1838 assert(lexerGrammar.tokens.getID("$end") == TokenID(0)); 1839 1840 lexerGrammar.startTokenID = ebnf.startTokenID; 1841 lexerGrammar.startNonterminalID = ebnf.startNonterminalID; 1842 lexerGrammar.startProductionID = ebnf.startProductionID; 1843 1844 foreach (token; realGrammar.tokens.allIDs) 1845 { 1846 string name = realGrammar.tokens[token].name; 1847 if (name == "$end") 1848 name = "$null"; 1849 auto nonterminal = lexerGrammar.nonterminals.id(name); 1850 assert(nonterminal.id == token.id); 1851 if (name != "$null") 1852 lexerGrammar.startNonterminals ~= StartNonterminal(nonterminal); 1853 lexerGrammar.nonterminals[nonterminal].annotations.add( 1854 realGrammar.tokens[token].annotations); 1855 } 1856 1857 foreach (i, d; ebnf.symbols) 1858 { 1859 if (d.parameters.length) 1860 continue; 1861 1862 if (!lexerGrammar.symbolInfos[d.name].reachableFromToken) 1863 continue; 1864 1865 NonterminalID id = lexerGrammar.nonterminals.id(d.name); 1866 1867 lexerGrammar.nonterminals[id].annotations.add(d.annotations); 1868 } 1869 foreach (i, d; ebnf.symbols) 1870 { 1871 if (d.parameters.length) 1872 continue; 1873 1874 if (!lexerGrammar.symbolInfos[d.name].reachableFromToken) 1875 continue; 1876 1877 NonterminalID id = createGrammar(lexerGrammar, d.name, d.exprTree, ebnf); 1878 } 1879 1880 lexerGrammar.fillProductionsForNonterminal(); 1881 1882 return lexerGrammar; 1883 } 1884 1885 EBNFGrammar createOptEmptyGrammar(EBNF ebnf, EBNFGrammar realGrammar) 1886 { 1887 EBNFGrammar newGrammar = new EBNFGrammar(realGrammar.symbolInfos, realGrammar); 1888 newGrammar.tokens = realGrammar.tokens; 1889 newGrammar.nonterminals = realGrammar.nonterminals; 1890 newGrammar.tags = realGrammar.tags; 1891 newGrammar.startNonterminals = realGrammar.startNonterminals; 1892 newGrammar.allowTokenNonterminals = realGrammar.allowTokenNonterminals; 1893 newGrammar.matchingTokens = realGrammar.matchingTokens; 1894 newGrammar.inContextOnlyTokens = realGrammar.inContextOnlyTokens; 1895 1896 newGrammar.startTokenID = ebnf.startTokenID; 1897 newGrammar.startNonterminalID = ebnf.startNonterminalID; 1898 newGrammar.startProductionID = ebnf.startProductionID; 1899 1900 BitSet!NonterminalID nonterminalCanBeEmpty; 1901 BitSet!NonterminalID nonterminalCanBeNonEmpty; 1902 realGrammar.calcNonterminalCanBeEmpty(nonterminalCanBeEmpty, nonterminalCanBeNonEmpty, true); 1903 1904 bool canBeEmpty(const Symbol symbol) 1905 { 1906 if (symbol.isToken) 1907 return false; 1908 return nonterminalCanBeEmpty[symbol.toNonterminalID]; 1909 } 1910 1911 bool canBeNonEmpty(const Symbol symbol) 1912 { 1913 if (symbol.isToken) 1914 return true; 1915 return nonterminalCanBeNonEmpty[symbol.toNonterminalID]; 1916 } 1917 1918 foreach (i, n; realGrammar.startNonterminals) 1919 if (canBeEmpty(n.nonterminal)) 1920 newGrammar.startNonterminals[i].needsEmptyProduction = true; 1921 1922 foreach (i, p; realGrammar.productionsData) 1923 { 1924 if (p is null) 1925 { 1926 newGrammar.productionsData ~= null; 1927 continue; 1928 } 1929 assert(p.productionID == i); 1930 assert(newGrammar.productionsData.length == i); 1931 1932 bool anyCanBeEmpty, anyCanBeNonEmpty; 1933 foreach (s; p.symbols) 1934 { 1935 if (canBeEmpty(s)) 1936 anyCanBeEmpty = true; 1937 if (canBeNonEmpty(s)) 1938 anyCanBeNonEmpty = true; 1939 } 1940 1941 if ((anyCanBeEmpty || !anyCanBeNonEmpty || p.symbols.length == 0) 1942 && !(p.symbols.length == 0 1943 && realGrammar.nonterminals[p.nonterminalID].annotations.contains!"noOptEmpty")) 1944 newGrammar.productionsData ~= null; 1945 else 1946 newGrammar.productionsData ~= p; 1947 } 1948 1949 foreach (i, p; realGrammar.productionsData) 1950 { 1951 if (p is null) 1952 continue; 1953 if (p.symbols.length == 0) 1954 continue; 1955 if (newGrammar.productionsData[i]!is null) 1956 continue; // cant be empty 1957 1958 void buildGraph(immutable(SymbolInstance)[] symbolsAvailable, size_t[] symbolPositions, 1959 immutable(SymbolInstance)[] symbolsSelected, bool afterSkippedEager) 1960 { 1961 if (symbolsAvailable.length == 0) 1962 { 1963 if (symbolsSelected.length == 0) 1964 return; 1965 Production* p2 = new Production(); 1966 p2.nonterminalID = p.nonterminalID; 1967 p2.symbols = symbolsSelected; 1968 p2.annotations = p.annotations; 1969 p2.negLookaheads = p.negLookaheads.dup; 1970 p2.negLookaheadsAnytoken = p.negLookaheadsAnytoken; 1971 p2.rewriteRules = [RewriteRule(p, symbolPositions, 0)]; 1972 p2.tags = p.tags; 1973 if (afterSkippedEager) 1974 p2.annotations.add("eagerEnd"); 1975 newGrammar.addProduction(p2); 1976 } 1977 else 1978 { 1979 auto s = symbolsAvailable[0]; 1980 if (canBeEmpty(p.symbols[symbolPositions.length])) 1981 buildGraph(symbolsAvailable[1 .. $], s.dropNode ? symbolPositions : (symbolPositions ~ [ 1982 size_t.max 1983 ]), symbolsSelected, afterSkippedEager 1984 || symbolsAvailable[0].annotations.contains!"eager"); 1985 if (canBeNonEmpty(p.symbols[symbolPositions.length])) 1986 buildGraph(symbolsAvailable[1 .. $], s.dropNode ? symbolPositions 1987 : (symbolPositions ~ [symbolsSelected.length]), 1988 symbolsSelected ~ symbolsAvailable[0], false); 1989 } 1990 } 1991 1992 buildGraph(p.symbols, [], [], false); 1993 } 1994 1995 newGrammar.calcNonterminalCanBeEmpty(); 1996 newGrammar.fillProductionsForNonterminal(); 1997 newGrammar.calcNonterminalTypes(); 1998 1999 checkGrammar(newGrammar); 2000 2001 return newGrammar; 2002 } 2003 2004 EBNFGrammar createGrammarWithoutDeactivatedProductions(EBNFGrammar realGrammar) 2005 { 2006 EBNFGrammar newGrammar = new EBNFGrammar(realGrammar.symbolInfos, realGrammar); 2007 newGrammar.tokens = realGrammar.tokens; 2008 newGrammar.nonterminals = realGrammar.nonterminals; 2009 newGrammar.tags = realGrammar.tags; 2010 newGrammar.startNonterminals = realGrammar.startNonterminals; 2011 newGrammar.allowTokenNonterminals = realGrammar.allowTokenNonterminals; 2012 newGrammar.matchingTokens = realGrammar.matchingTokens; 2013 newGrammar.inContextOnlyTokens = realGrammar.inContextOnlyTokens; 2014 2015 newGrammar.startTokenID = realGrammar.startTokenID; 2016 newGrammar.startNonterminalID = realGrammar.startNonterminalID; 2017 newGrammar.startProductionID = realGrammar.startProductionID; 2018 2019 foreach (i, p; realGrammar.productionsData) 2020 { 2021 if (p is null) 2022 { 2023 newGrammar.productionsData ~= null; 2024 continue; 2025 } 2026 2027 assert(p.productionID == i); 2028 assert(newGrammar.productionsData.length == i); 2029 2030 if (p.annotations.contains!"deactivated"()) 2031 newGrammar.productionsData ~= null; 2032 else 2033 newGrammar.productionsData ~= p; 2034 } 2035 2036 newGrammar.calcNonterminalCanBeEmpty(); 2037 newGrammar.fillProductionsForNonterminal(); 2038 newGrammar.calcNonterminalTypes(); 2039 2040 checkGrammar(newGrammar); 2041 2042 return newGrammar; 2043 } 2044 2045 Graph!(SymbolWithConstraint, NonterminalID) buildRegArrayGraph(EBNFGrammar grammar, 2046 ref NonterminalID[] regArrayNonterminals) 2047 { 2048 alias G = Graph!(SymbolWithConstraint, NonterminalID); 2049 alias NodeID = G.NodeID; 2050 2051 bool[NonterminalWithConstraint] done; 2052 regArrayNonterminals = []; 2053 2054 G g = new G(); 2055 g.start = g.addNode("start"); 2056 2057 void buildGraph(NonterminalWithConstraint n, string indent) 2058 { 2059 assert(!n.constraint.negLookaheads.canFind(n.nonterminalID)); 2060 if (n in done) 2061 return; 2062 done[n] = true; 2063 if (grammar.nonterminals[n.nonterminalID].annotations.contains!"array"() 2064 && grammar.nonterminals[n.nonterminalID].annotations.contains!"regArray"()) 2065 { 2066 regArrayNonterminals ~= n.nonterminalID; 2067 2068 void buildGraphPart(NonterminalWithConstraint n, NodeID start, NodeID end, string indent) 2069 { 2070 assert(!n.constraint.negLookaheads.canFind(n.nonterminalID)); 2071 if (!grammar.nonterminals[n.nonterminalID].annotations.contains!"array"() 2072 && !grammar.nonterminals[n.nonterminalID].name.endsWith("?")) 2073 { 2074 foreach (m2; grammar.directUnwrapClosure(n)) 2075 { 2076 SymbolWithConstraint s = SymbolWithConstraint(m2.nonterminalID); 2077 bool edgeNeeded; 2078 foreach (p; grammar.getProductions(m2.nonterminalID)) 2079 { 2080 if (grammar.isProductionAllowed(m2, p)) 2081 edgeNeeded = true; 2082 } 2083 if (!edgeNeeded) 2084 continue; 2085 foreach (x; m2.constraint.negLookaheads) 2086 { 2087 s.constraint.negLookaheads.addOnce(x); 2088 } 2089 g.addEdge(start, end, s, EdgeFlags.none); 2090 } 2091 buildGraph(n, indent ~ " "); 2092 return; 2093 } 2094 2095 bool needsNonterminalEdge; 2096 2097 bool hasLeftRec, hasRightRec, hasNonRec; 2098 2099 foreach (p; grammar.getProductions(n.nonterminalID)) 2100 { 2101 if (!grammar.isProductionAllowed(n, p)) 2102 continue; 2103 if (p.symbols.length == 0) 2104 { 2105 hasNonRec = true; 2106 continue; 2107 } 2108 if (p.symbols[0].symbol == n.nonterminalID) 2109 hasLeftRec = true; 2110 if (p.symbols[$ - 1].symbol == n.nonterminalID) 2111 hasRightRec = true; 2112 if (p.symbols[0].symbol != n.nonterminalID 2113 && p.symbols[$ - 1].symbol != n.nonterminalID) 2114 hasNonRec = true; 2115 } 2116 2117 NodeID loopStart; 2118 if (hasLeftRec) 2119 loopStart = g.addNode("loopStart"); 2120 else if (hasRightRec) 2121 loopStart = start; 2122 2123 foreach (p; grammar.getProductions(n.nonterminalID)) 2124 { 2125 if (!grammar.isProductionAllowed(n, p)) 2126 continue; 2127 2128 immutable(SymbolInstance)[] symbols = p.symbols; 2129 bool thisIsRightRec; 2130 bool thisIsLeftRec; 2131 bool first = true; 2132 // use only left recursion. 2133 // using both left and right recursion would turn S=@empty|S a| b S; into S=(a|b)*; 2134 if (symbols.length && symbols[0].symbol == n.nonterminalID) 2135 { 2136 thisIsLeftRec = true; 2137 symbols = symbols[1 .. $]; 2138 first = false; 2139 } 2140 if (symbols.length && symbols[$ - 1].symbol == n.nonterminalID) 2141 { 2142 thisIsRightRec = true; 2143 symbols = symbols[0 .. $ - 1]; 2144 } 2145 enforce(!thisIsLeftRec || !thisIsRightRec); 2146 2147 NodeID node; 2148 if (!thisIsLeftRec) 2149 { 2150 node = g.addNode("start p"); 2151 g.addEdge(start, node, SymbolWithConstraint.invalid); 2152 } 2153 else 2154 node = loopStart; 2155 2156 foreach (s; symbols) 2157 { 2158 NodeID node2 = g.addNode(""); 2159 if (s.isToken) 2160 { 2161 g.addEdge(node, node2, SymbolWithConstraint(s)); 2162 } 2163 else 2164 { 2165 buildGraphPart(grammar.nextNonterminalWithConstraint(n.constraint, 2166 s, first), node, node2, indent ~ " "); 2167 } 2168 node = node2; 2169 first = false; 2170 } 2171 2172 if (!thisIsRightRec) 2173 { 2174 g.addEdge(node, end, SymbolWithConstraint.invalid); 2175 } 2176 else 2177 { 2178 g.addEdge(node, start, SymbolWithConstraint.invalid); 2179 } 2180 if (hasLeftRec && !thisIsRightRec) 2181 { 2182 g.addEdge(node, loopStart, SymbolWithConstraint.invalid); 2183 } 2184 } 2185 } 2186 2187 string name = grammar.nonterminals[n.nonterminalID].name; 2188 NodeID startM = g.addNode("start " ~ name); 2189 g.addEdge(g.start, startM, SymbolWithConstraint.invalid); 2190 NodeID endM = g.addNode("end"); 2191 g.get(endM).results = [n.nonterminalID]; 2192 buildGraphPart(n, startM, endM, indent ~ " "); 2193 } 2194 else 2195 { 2196 foreach (p; grammar.getProductions(n.nonterminalID)) 2197 { 2198 if (!grammar.isProductionAllowed(n, p)) 2199 continue; 2200 bool first = true; 2201 foreach (s; p.symbols) 2202 { 2203 if (!s.isToken) 2204 buildGraph(NonterminalWithConstraint(s.toNonterminalID), indent); 2205 first = false; 2206 } 2207 } 2208 } 2209 } 2210 2211 foreach (n; grammar.startNonterminals) 2212 buildGraph(NonterminalWithConstraint(n.nonterminal), ""); 2213 2214 return g; 2215 } 2216 2217 EBNFGrammar createRegArrayGrammar(EBNF ebnf, EBNFGrammar realGrammar) 2218 { 2219 EBNFGrammar newGrammar = new EBNFGrammar(realGrammar.symbolInfos /*, realGrammar*/ ); 2220 newGrammar.tokens = realGrammar.tokens; 2221 newGrammar.nonterminals = realGrammar.nonterminals; 2222 newGrammar.tags = realGrammar.tags; 2223 newGrammar.startNonterminals = realGrammar.startNonterminals; 2224 newGrammar.allowTokenNonterminals = realGrammar.allowTokenNonterminals; 2225 newGrammar.matchingTokens = realGrammar.matchingTokens; 2226 newGrammar.inContextOnlyTokens = realGrammar.inContextOnlyTokens; 2227 2228 newGrammar.startTokenID = ebnf.startTokenID; 2229 newGrammar.startNonterminalID = ebnf.startNonterminalID; 2230 newGrammar.startProductionID = ebnf.startProductionID; 2231 2232 NonterminalID[] regArrayNonterminals; 2233 Graph!(SymbolWithConstraint, NonterminalID) g = buildRegArrayGraph(realGrammar, 2234 regArrayNonterminals); 2235 g = g.makeDeterministic(g.start, false).minimizeDFA; 2236 2237 foreach (i, p; realGrammar.productionsData) 2238 { 2239 if (p is null) 2240 { 2241 newGrammar.productionsData ~= null; 2242 continue; 2243 } 2244 2245 assert(p.productionID == i); 2246 assert(newGrammar.productionsData.length == i); 2247 2248 if (regArrayNonterminals.canFind(p.nonterminalID)) 2249 newGrammar.productionsData ~= null; 2250 else 2251 newGrammar.productionsData ~= p; 2252 } 2253 2254 NonterminalID[size_t] regarrayNonterminals; 2255 SymbolWithConstraint[][Tuple!(size_t, size_t)] regarrayEdgeSymbols; 2256 NonterminalID[Tuple!(size_t, size_t)] regarrayEdgeNonterminals; 2257 2258 bool isFirstStateDest; 2259 foreach (id; g.nodeIDs) 2260 { 2261 auto n = g.get(id); 2262 foreach (i, e; g.getEdges(id)) 2263 { 2264 if (e.next.id == g.start.id) 2265 isFirstStateDest = true; 2266 auto fromTo = tuple!(size_t, size_t)(id.id, e.next.id); 2267 if (fromTo !in regarrayEdgeSymbols) 2268 regarrayEdgeSymbols[fromTo] = []; 2269 regarrayEdgeSymbols[fromTo].addOnce(e.symbol); 2270 } 2271 } 2272 bool needsFirstState = isFirstStateDest || g.get(g.start).results.length > 0; 2273 2274 foreach (id; g.nodeIDs) 2275 { 2276 if (id == g.start && !needsFirstState) 2277 continue; 2278 string name; 2279 auto results = g.get(id).results; 2280 if (results.length == 1) 2281 name = text("$regarray_", realGrammar.nonterminals[results[0]].name, "_", id.id); 2282 else 2283 name = text("$regarray_", id.id); 2284 NonterminalID n = newGrammar.nonterminals.id(name); 2285 regarrayNonterminals[id.id] = n; 2286 newGrammar.nonterminals[n].annotations.add("array"); 2287 newGrammar.nonterminals[n].annotations.add("directUnwrap"); 2288 } 2289 2290 foreach (fromTo; regarrayEdgeSymbols.sortedKeys) 2291 { 2292 auto symbols = regarrayEdgeSymbols[fromTo]; 2293 SymbolInstance s; 2294 if (symbols.length == 1) 2295 { 2296 s = SymbolInstance(symbols[0].symbol); 2297 s.annotations.add("excludeDirectUnwrap"); 2298 s.negLookaheads = symbols[0].constraint.negLookaheads; 2299 } 2300 else 2301 { 2302 string name; 2303 name = text("$regarrayedge_", fromTo[0], "_", fromTo[1]); 2304 NonterminalID n = newGrammar.nonterminals.id(name); 2305 newGrammar.nonterminals[n].annotations.add("directUnwrap"); 2306 s = SymbolInstance(n); 2307 regarrayEdgeNonterminals[fromTo] = n; 2308 } 2309 2310 Production* p2 = new Production(); 2311 p2.nonterminalID = regarrayNonterminals[fromTo[1]]; 2312 if (!isFirstStateDest && fromTo[0] == 0) 2313 continue; 2314 else 2315 { 2316 SymbolInstance s0 = SymbolInstance(regarrayNonterminals[fromTo[0]]); 2317 s0.annotations.add("inheritAnyTag"); 2318 p2.symbols = [s0, s]; 2319 } 2320 p2.annotations = Annotations(); 2321 newGrammar.addProduction(p2); 2322 } 2323 2324 foreach (fromTo; regarrayEdgeSymbols.sortedKeys) 2325 { 2326 auto symbols = regarrayEdgeSymbols[fromTo]; 2327 if (symbols.length == 1) 2328 continue; 2329 foreach (symbol; symbols) 2330 { 2331 Production* p2 = new Production(); 2332 p2.nonterminalID = regarrayEdgeNonterminals[fromTo]; 2333 SymbolInstance si = SymbolInstance(symbol.symbol, "", "", true); 2334 si.annotations.add("excludeDirectUnwrap"); 2335 si.negLookaheads = symbol.constraint.negLookaheads; 2336 p2.symbols = [si]; 2337 p2.annotations = Annotations(); 2338 newGrammar.addProduction(p2); 2339 } 2340 } 2341 2342 bool[size_t] startEdgeDone; 2343 foreach (e; g.getEdges(g.start)) 2344 { 2345 if (e.next.id in startEdgeDone) 2346 continue; 2347 startEdgeDone[e.next.id] = true; 2348 2349 auto fromTo = tuple!(size_t, size_t)(g.start.id, e.next.id); 2350 SymbolInstance s; 2351 if (regarrayEdgeSymbols[fromTo].length == 1) 2352 { 2353 s = SymbolInstance(regarrayEdgeSymbols[fromTo][0].symbol); 2354 s.annotations.add("excludeDirectUnwrap"); 2355 s.negLookaheads = regarrayEdgeSymbols[fromTo][0].constraint.negLookaheads; 2356 } 2357 else 2358 { 2359 s = SymbolInstance(regarrayEdgeNonterminals[fromTo]); 2360 s.annotations.add("inheritAnyTag"); 2361 } 2362 2363 Production* p2 = new Production(); 2364 p2.nonterminalID = regarrayNonterminals[e.next.id]; 2365 p2.symbols = [s]; 2366 p2.annotations = Annotations(); 2367 newGrammar.addProduction(p2); 2368 } 2369 2370 foreach (r; g.get(g.start).results) 2371 { 2372 Production* p2 = new Production(); 2373 p2.nonterminalID = r; 2374 p2.symbols = []; 2375 p2.annotations = Annotations(); 2376 newGrammar.addProduction(p2); 2377 } 2378 2379 foreach (id; g.nodeIDs) 2380 { 2381 if (id == g.start && !isFirstStateDest) 2382 continue; 2383 auto n = g.get(id); 2384 foreach (r; n.results) 2385 { 2386 Production* p2 = new Production(); 2387 p2.nonterminalID = r; 2388 SymbolInstance s0 = SymbolInstance(regarrayNonterminals[id.id], "", "", true); 2389 s0.annotations.add("inheritAnyTag"); 2390 p2.symbols = [s0]; 2391 p2.annotations = Annotations(); 2392 newGrammar.addProduction(p2); 2393 newGrammar.nonterminals[r].annotations.add("directUnwrap"); 2394 } 2395 } 2396 2397 newGrammar.calcNonterminalCanBeEmpty(); 2398 newGrammar.fillProductionsForNonterminal(); 2399 newGrammar.calcNonterminalTypes(); 2400 2401 checkGrammar(newGrammar); 2402 2403 return newGrammar; 2404 } 2405 2406 void writeFinalGrammarFile(string finalgrammarfilename, const EBNFGrammar grammar, 2407 const EBNFGrammar lexerGrammar) 2408 { 2409 if (finalgrammarfilename.length) 2410 { 2411 File f = File(finalgrammarfilename, "w"); 2412 2413 foreach (n; grammar.nonterminals.allIDs) 2414 { 2415 f.writeln(grammar.getSymbolName(n), grammar.nonterminals[n].annotations.toStringCode()); 2416 foreach (i, p; grammar.getProductions(n)) 2417 { 2418 f.write("\t", i ? "|" : "=", grammar.productionStringRhs(p)); 2419 if (p.symbols.length == 0) 2420 f.write(" @empty"); 2421 if (p.isVirtual) 2422 f.write(" [virtual]"); 2423 f.writeln(); 2424 } 2425 f.writeln("\t;"); 2426 } 2427 f.writeln("\n// Lexer grammar:\n"); 2428 bool[NonterminalID] startNonterminals; 2429 foreach (n; lexerGrammar.startNonterminals) 2430 startNonterminals[n.nonterminal] = true; 2431 foreach (n; lexerGrammar.nonterminals.allIDs) 2432 { 2433 if (lexerGrammar.getProductions(n).length == 0) 2434 continue; 2435 if (n in startNonterminals) 2436 f.write("token "); 2437 else 2438 f.write("fragment "); 2439 f.writeln(lexerGrammar.getSymbolName(n), 2440 lexerGrammar.nonterminals[n].annotations.toStringCode()); 2441 foreach (i, p; lexerGrammar.getProductions(n)) 2442 { 2443 f.write("\t", i ? "|" : "=", lexerGrammar.productionStringRhs(p)); 2444 if (p.symbols.length == 0) 2445 f.write(" @empty"); 2446 if (p.isVirtual) 2447 f.write(" [virtual]"); 2448 f.writeln(); 2449 } 2450 f.writeln("\t;"); 2451 } 2452 } 2453 }