1 
2 //          Copyright Tim Schendekehl 2023.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          https://www.boost.org/LICENSE_1_0.txt)
6 
7 module dparsergen.generator.grammar;
8 import dparsergen.core.nodetype;
9 import dparsergen.core.utils;
10 import dparsergen.generator.ebnf;
11 import dparsergen.generator.grammarebnf;
12 import dparsergen.generator.ids;
13 import dparsergen.generator.nfa;
14 import dparsergen.generator.production;
15 import std.algorithm;
16 import std.array;
17 import std.conv;
18 import std.exception;
19 import std.stdio;
20 import std.string;
21 import std.typecons;
22 
23 class SymbolInfo
24 {
25     string name;
26     const(Declaration)[] declarations;
27 
28     bool isToken;
29     bool isIgnoreToken;
30 
31     bool reachableFromStart;
32     bool reachableFromToken;
33     bool reachableFromStartNoToken;
34 }
35 
36 struct StartNonterminal
37 {
38     NonterminalID nonterminal;
39     bool needsEmptyProduction;
40 }
41 
42 string escapeName(string s)
43 {
44     if (s.endsWith("?"))
45     {
46         return escapeName(s[0 .. $ - 1]) ~ "?";
47     }
48     if (s.startsWith("\"") && s.endsWith("\""))
49         return s;
50 
51     string r;
52     foreach (char c; s)
53     {
54         if (c.inCharSet!"a-zA-Z0-9")
55             r ~= c;
56         else
57             r ~= format("_%02X", c);
58     }
59     return r;
60 }
61 
62 bool tagsSorted(immutable(TagUsage)[] tags)
63 {
64     foreach (i, t; tags)
65     {
66         if (i)
67             if (t.tag <= tags[i - 1].tag)
68                 return false;
69     }
70     return true;
71 }
72 
73 struct Constraint
74 {
75     immutable(Symbol)[] negLookaheads;
76     immutable(TagUsage)[] tags;
77     bool disabled;
78 }
79 
80 struct NonterminalWithConstraint
81 {
82     NonterminalID nonterminalID;
83     Constraint constraint;
84     bool hasLookaheadAnnotation;
85 }
86 
87 struct SymbolWithConstraint
88 {
89     Symbol symbol;
90     Constraint constraint;
91     enum invalid = SymbolWithConstraint(Symbol.invalid, Constraint.init);
92 }
93 
94 class EBNFGrammar
95 {
96     const SymbolInfo[string] symbolInfos;
97     bool isLexerGrammar;
98 
99     IDMap!(TokenID, Token) tokens;
100     IDMap!(NonterminalID, Nonterminal) nonterminals;
101     IDMap!(TagID, Tag) tags;
102     const(Production*)[] productionsData;
103     immutable(TokenID)[] inContextOnlyTokens;
104 
105     StartNonterminal[] startNonterminals;
106     TokenID[2][] matchingTokens;
107 
108     bool allowTokenNonterminals;
109 
110     EBNFGrammar origGrammar_;
111 
112     bool[Symbol] symbolsInMultiplePaths;
113 
114     size_t startTokenID;
115     size_t startNonterminalID;
116     size_t startProductionID;
117 
118     this(const SymbolInfo[string] symbolInfos, EBNFGrammar origGrammar_ = null)
119     {
120         this.symbolInfos = symbolInfos;
121         this.origGrammar_ = origGrammar_;
122     }
123 
124     string getSymbolName(const Symbol s) const
125     {
126         if (s.isToken)
127         {
128             if (s.id >= tokens.vals.length)
129                 return "??????";
130             return tokens[s.toTokenID].name;
131         }
132         else
133         {
134             if (s.id >= nonterminals.vals.length)
135                 return "??????";
136             return nonterminals[s.toNonterminalID].name;
137         }
138     }
139 
140     bool isValid(const Symbol s) const
141     {
142         if (s.isToken)
143             return s.id < tokens.vals.length;
144         else
145             return s.id < nonterminals.vals.length;
146     }
147 
148     EBNFGrammar origGrammar()
149     {
150         if (origGrammar_ is null)
151             return this;
152         else
153             return origGrammar_.origGrammar();
154     }
155 
156     string symbolInstanceToString(const SymbolInstance s, bool includeAll = true, bool escape = false) const
157     {
158         Appender!string app;
159         symbolInstanceToString(app, s, includeAll, escape);
160         return app.data;
161     }
162 
163     string symbolInstanceToString(const SymbolInstance[] symbols,
164             bool includeAll = true, bool escape = false) const
165     {
166         Appender!string app;
167         foreach (i, s; symbols)
168         {
169             if (i)
170                 app.put(" ");
171             symbolInstanceToString(app, s, includeAll, escape);
172         }
173         return app.data;
174     }
175 
176     void symbolInstanceToString(ref Appender!string app, const SymbolInstance s,
177             bool includeAll = true, bool escape = false) const
178     {
179         if (includeAll)
180             s.annotations.toStringCode(app, true);
181         foreach (l; s.negLookaheads)
182         {
183             app.put("!");
184             app.put(getSymbolName(l));
185             app.put(" ");
186         }
187         if (includeAll)
188         {
189             if (s.unwrapProduction)
190                 app.put("<");
191             if (s.dropNode)
192                 app.put("^");
193             if (s.symbolInstanceName.length)
194             {
195                 app.put(s.symbolInstanceName);
196                 app.put(":");
197             }
198         }
199         if (escape && !s.isToken)
200             app.put(getSymbolName(s).escapeName);
201         else
202             app.put(getSymbolName(s));
203         if (s.subToken.length)
204         {
205             app.put(">>");
206             app.put(s.subToken);
207         }
208     }
209 
210     string productionStringRhs(const Production* p) const
211     {
212         Appender!string app;
213         foreach (s; p.symbols)
214         {
215             app.put(" ");
216             symbolInstanceToString(app, s);
217         }
218         foreach (l; p.negLookaheads)
219         {
220             app.put(" !");
221             app.put(getSymbolName(l));
222         }
223         if (p.negLookaheadsAnytoken)
224             app.put(" !anytoken");
225         p.annotations.toStringCode(app);
226         return app.data;
227     }
228 
229     string productionString(const Production* p) const
230     {
231         Appender!string app;
232         app.put(nonterminals[p.nonterminalID].name);
233         nonterminals[p.nonterminalID].annotations.toStringCode(app);
234         app.put(" =");
235         app.put(productionStringRhs(p));
236         if (p.isVirtual)
237             app.put(" [virtual]");
238         return app.data;
239     }
240 
241     override string toString() const
242     {
243         Appender!string app;
244         app.put("EBNFGrammar\n");
245         foreach (p; productions)
246         {
247             app.put("   ");
248             app.put(productionString(p));
249             app.put("\n");
250         }
251         return app.data;
252     }
253 
254     auto productions() const
255     {
256         return productionsData.filter!(p => p !is null);
257     }
258 
259     void addProduction(Production* p)
260     {
261         assert(productionsData.length < ProductionID.max);
262         p.productionID = cast(ProductionID) productionsData.length;
263         productionsData ~= p;
264     }
265 
266     private const(Production*)[][] productionsForNonterminal;
267     void fillProductionsForNonterminal()
268     {
269         assert(productionsForNonterminal.length == 0);
270         productionsForNonterminal.length = nonterminals.vals.length;
271         foreach (p; productions)
272         {
273             productionsForNonterminal[p.nonterminalID.id] ~= p;
274         }
275     }
276 
277     const(Production*)[] getProductions(const NonterminalID symbol) const
278     in
279     {
280         assert(!symbol.isToken);
281     }
282     do
283     {
284         assert(productionsForNonterminal.length == nonterminals.vals.length);
285         return productionsForNonterminal[symbol.id];
286     }
287 
288     private BitSet!NonterminalID nonterminalCanBeEmpty;
289     private BitSet!NonterminalID nonterminalCanBeNonEmpty;
290 
291     bool canBeEmpty(const Symbol symbol) const
292     {
293         if (symbol.isToken)
294             return false;
295         return nonterminalCanBeEmpty[symbol.toNonterminalID];
296     }
297 
298     bool canBeNonEmpty(const Symbol symbol) const
299     {
300         if (symbol.isToken)
301             return true;
302         return nonterminalCanBeNonEmpty[symbol.toNonterminalID];
303     }
304 
305     void calcNonterminalCanBeEmpty()
306     {
307         calcNonterminalCanBeEmpty(nonterminalCanBeEmpty, nonterminalCanBeNonEmpty, false);
308     }
309 
310     void calcNonterminalCanBeEmpty(ref BitSet!NonterminalID nonterminalCanBeEmpty,
311             ref BitSet!NonterminalID nonterminalCanBeNonEmpty, bool checkNoOptEmpty) const
312     {
313         bool changed;
314         nonterminalCanBeEmpty.length = nonterminals.vals.length;
315         nonterminalCanBeNonEmpty.length = nonterminals.vals.length;
316         do
317         {
318             changed = false;
319             foreach (p; productions)
320             {
321                 if (nonterminalCanBeEmpty[p.nonterminalID] == true)
322                     continue;
323                 if (nonterminals[p.nonterminalID].annotations.contains!"noOptEmpty")
324                     continue;
325 
326                 bool allSymbolsCanBeEmpty = true;
327                 foreach (s; p.symbols)
328                     if (s.isToken || !nonterminalCanBeEmpty[s.toNonterminalID])
329                         allSymbolsCanBeEmpty = false;
330 
331                 if (allSymbolsCanBeEmpty)
332                 {
333                     nonterminalCanBeEmpty[p.nonterminalID] = true;
334                     changed = true;
335                 }
336             }
337         }
338         while (changed);
339         do
340         {
341             changed = false;
342             foreach (p; productions)
343             {
344                 if (nonterminalCanBeNonEmpty[p.nonterminalID] == true)
345                     continue;
346 
347                 bool anySymbolCanBeNonEmpty = false;
348                 foreach (s; p.symbols)
349                     if (s.isToken || nonterminalCanBeNonEmpty[s.toNonterminalID])
350                         anySymbolCanBeNonEmpty = true;
351 
352                 if (anySymbolCanBeNonEmpty
353                         || nonterminals[p.nonterminalID].annotations.contains!"noOptEmpty")
354                 {
355                     nonterminalCanBeNonEmpty[p.nonterminalID] = true;
356                     changed = true;
357                 }
358             }
359         }
360         while (changed);
361     }
362 
363     struct FirstSetsKey
364     {
365         NonterminalID nonterminalID;
366         immutable(Symbol)[] negLookaheads;
367         immutable(TagUsage)[] tags;
368     }
369 
370     BitSet!TokenID[FirstSetsKey] firstSetsCache;
371     BitSet!TokenID firstSetImpl(const(Symbol) x,
372             immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags)
373     {
374         if (x.isToken)
375         {
376             BitSet!TokenID result;
377             result.length = tokens.vals.length;
378             if (!negLookaheads.canFind(x))
379             {
380                 result[x.toTokenID] = true;
381             }
382             return result;
383         }
384 
385         if (FirstSetsKey(x.toNonterminalID, negLookaheads, tags) in firstSetsCache)
386             return firstSetsCache[FirstSetsKey(x.toNonterminalID, negLookaheads, tags)];
387 
388         BitSet!TokenID result;
389         result.length = tokens.vals.length;
390 
391         firstSetsCache[FirstSetsKey(x.toNonterminalID, negLookaheads, tags)] = result;
392 
393         foreach (p; getProductions(x.toNonterminalID))
394         {
395             if (!isProductionAllowed(NonterminalWithConstraint(x.toNonterminalID,
396                     Constraint(negLookaheads, tags)), p))
397                 continue;
398             if (p.symbols.length)
399             {
400                 const(SymbolInstance)[] symbols = p.symbols[];
401                 BitSet!TokenID f;
402                 f.length = tokens.vals.length;
403                 bool lastEmpty = true;
404                 bool first = true;
405                 do
406                 {
407                     f[tokens.getID("$end")] = false;
408 
409                     auto next = nextSymbolWithConstraint(Constraint(negLookaheads,
410                             tags), symbols[0], first);
411 
412                     auto inner = firstSetImpl(symbols[0],
413                             next.constraint.negLookaheads, next.constraint.tags);
414                     if (lastEmpty)
415                         f |= inner;
416                     lastEmpty = lastEmpty && inner[tokens.getID("$end")];
417                     symbols = symbols[1 .. $];
418                     first = false;
419                 }
420                 while (symbols.length && lastEmpty);
421                 result |= f;
422             }
423             else
424             {
425                 result[tokens.getID("$end")] = true;
426             }
427         }
428 
429         firstSetsCache[FirstSetsKey(x.toNonterminalID, negLookaheads, tags)] = result;
430         return result;
431     }
432 
433     BitSet!TokenID firstSet(const SymbolInstance[] symbols,
434             Constraint extraConstraint = Constraint.init, bool isFirst = false)
435     {
436         BitSet!TokenID r;
437         r.length = tokens.vals.length;
438         r[tokens.getID("$end")] = true;
439         const(SymbolInstance)[] x = symbols;
440 
441         bool lastEmpty = true;
442         while (x.length && lastEmpty)
443         {
444             BitSet!NonterminalID done;
445             done.length = nonterminals.vals.length;
446 
447             r[tokens.getID("$end")] = false;
448 
449             if (x[0].isToken)
450             {
451                 if (lastEmpty)
452                 {
453                     r[x[0].toTokenID] = true;
454                 }
455 
456                 lastEmpty = false;
457             }
458             else
459             {
460                 auto next = nextNonterminalWithConstraint(extraConstraint, x[0], isFirst);
461                 const BitSet!TokenID nextFirstSet = firstSetImpl(next.nonterminalID,
462                         next.constraint.negLookaheads, next.constraint.tags);
463                 if (lastEmpty)
464                 {
465                     foreach (t; nextFirstSet.bitsSet)
466                         r[t] = true;
467                     lastEmpty = nextFirstSet[tokens.getID("$end")];
468                 }
469             }
470 
471             x = x[1 .. $];
472             isFirst = false;
473         }
474 
475         return r;
476     }
477 
478     bool firstSetContains(const SymbolInstance[] symbols, TokenID t)
479     {
480         const(SymbolInstance)[] x = symbols;
481 
482         while (x.length)
483         {
484             if (x[0].isToken)
485             {
486                 return t == x[0].toTokenID;
487             }
488             else
489             {
490                 const BitSet!TokenID nextFirstSet = firstSetImpl(x[0].toNonterminalID,
491                         x[0].negLookaheads, x[0].tags);
492                 if (nextFirstSet[t])
493                     return true;
494                 if (!nextFirstSet[tokens.getID("$end")])
495                     return false;
496             }
497 
498             x = x[1 .. $];
499         }
500 
501         return false;
502     }
503 
504     BitSet!NonterminalID[FirstSetsKey] firstSetsNonterminalCache;
505     private BitSet!NonterminalID firstSetNonterminal(const(NonterminalID) x,
506             immutable(Symbol)[] negLookaheads)
507     {
508         if (FirstSetsKey(x.toNonterminalID, negLookaheads) in firstSetsNonterminalCache)
509             return firstSetsNonterminalCache[FirstSetsKey(x.toNonterminalID, negLookaheads)];
510 
511         BitSet!NonterminalID result;
512         result.length = nonterminals.vals.length;
513 
514         firstSetsNonterminalCache[FirstSetsKey(x, negLookaheads)] = result;
515 
516         foreach (p; getProductions(x.toNonterminalID))
517         {
518             if (p.symbols.length)
519             {
520                 immutable(Symbol)[] nextNegLookaheads = negLookaheads;
521                 const(SymbolInstance)[] symbols = p.symbols[];
522                 if (negLookaheads.canFind(symbols[0].symbol))
523                     continue;
524                 if (symbols[0].isToken)
525                     continue;
526 
527                 nextNegLookaheads.addOnce(symbols[0].negLookaheads);
528                 result |= firstSetNonterminal(symbols[0].toNonterminalID, nextNegLookaheads);
529             }
530         }
531 
532         firstSetsNonterminalCache[FirstSetsKey(x, negLookaheads)] = result;
533         return result;
534     }
535 
536     bool[NonterminalID][TokenID] hasExactTokenCache;
537     bool hasExactToken(Symbol symbol, TokenID currentToken)
538     {
539         if (symbol.isToken)
540             return symbol.toTokenID == currentToken;
541 
542         bool[NonterminalID]* hasExactTokenCacheHere = currentToken in hasExactTokenCache;
543         if (hasExactTokenCacheHere is null)
544         {
545             hasExactTokenCache[currentToken] = null;
546             hasExactTokenCacheHere = currentToken in hasExactTokenCache;
547         }
548         if (symbol.toNonterminalID in *hasExactTokenCacheHere)
549             return (*hasExactTokenCacheHere)[symbol.toNonterminalID];
550 
551         (*hasExactTokenCacheHere)[symbol.toNonterminalID] = false;
552 
553         bool r;
554 
555         foreach (p; getProductions(symbol.toNonterminalID))
556         {
557             bool nextExactMatch = false;
558             bool beforeToken = true;
559             foreach (pos; 0 .. p.symbols.length)
560             {
561                 bool exactMatch2;
562                 if (beforeToken)
563                 {
564                     exactMatch2 = hasExactToken(p.symbols[pos], currentToken);
565                 }
566                 if (!canBeEmpty(p.symbols[pos]))
567                 {
568                     nextExactMatch = false;
569                     beforeToken = false;
570                 }
571                 if (exactMatch2)
572                     nextExactMatch = true;
573             }
574             if (nextExactMatch)
575                 r = true;
576         }
577 
578         (*hasExactTokenCacheHere)[symbol.toNonterminalID] = r;
579         return r;
580     }
581 
582     void calcNonterminalTypes()
583     {
584         BitSet!NonterminalID done;
585         done.length = nonterminals.vals.length;
586         Tuple!(NonterminalFlags, SymbolID[]) handleNonterminal(NonterminalID n,
587                 ref TagID[] possibleTags)
588         {
589             if (done[n])
590                 return tuple!(NonterminalFlags, SymbolID[])(NonterminalFlags.none, null);
591             done[n] = true;
592             scope (success)
593                 done[n] = false;
594             NonterminalFlags r;
595             SymbolID[] buildNonterminals;
596             foreach (p; getProductions(n))
597             {
598                 foreach (tag; p.tags)
599                     possibleTags.addOnce(tag);
600                 foreach (s; p.symbols)
601                     foreach (tagUsage; s.tags)
602                         if (tagUsage.inherit && !tagUsage.reject)
603                             possibleTags.addOnce(tagUsage.tag);
604                 if (nonterminals[n].annotations.contains!"array"())
605                 {
606                     r |= NonterminalFlags.array;
607                     if (p.symbols.length == 0)
608                         r |= NonterminalFlags.empty;
609                     else
610                     {
611                         foreach (s; p.symbols)
612                         {
613                             if (s.dropNode)
614                                 continue;
615                             if (s.isToken)
616                                 r |= NonterminalFlags.arrayOfString;
617                             else
618                             {
619                                 TagID[] unusedPossibleTags;
620                                 auto x = handleNonterminal(s.toNonterminalID, unusedPossibleTags);
621                                 r |= (x[0] & NonterminalFlags.anyArray);
622                                 if (x[0] & NonterminalFlags.nonterminal)
623                                     r |= NonterminalFlags.arrayOfNonterminal;
624                                 if (x[0] & NonterminalFlags..string)
625                                     r |= NonterminalFlags.arrayOfString;
626                                 buildNonterminals.addOnce(x[1]);
627                             }
628                         }
629                     }
630                 }
631                 else
632                 {
633                     if (nonterminals[n].annotations.contains!"string"())
634                         r |= NonterminalFlags..string;
635 
636                     if (p.symbols.length == 0)
637                         r |= NonterminalFlags.empty;
638                     else if (isSimpleProduction(*p))
639                     {
640                         foreach (s; p.symbols)
641                         {
642                             if (s.dropNode)
643                                 continue;
644                             if (s.isToken)
645                                 r |= NonterminalFlags..string;
646                             else
647                             {
648                                 auto x = handleNonterminal(s.toNonterminalID, possibleTags);
649                                 r |= x[0];
650                                 buildNonterminals.addOnce(x[1]);
651                             }
652                         }
653                     }
654                     else if (nonterminals[n].annotations.contains!"string"())
655                         r |= NonterminalFlags..string;
656                     else
657                     {
658                         r |= NonterminalFlags.nonterminal;
659                         buildNonterminals.addOnce(n.id);
660                     }
661                 }
662             }
663             return tuple!(NonterminalFlags, SymbolID[])(r, buildNonterminals);
664         }
665 
666         foreach (n; nonterminals.allIDs)
667         {
668             TagID[] possibleTags;
669             auto r = handleNonterminal(n, possibleTags);
670             if (r[0] & NonterminalFlags.anyArray)
671                 enforce(!(r[0] & NonterminalFlags.anySingle));
672             nonterminals[n].flags = r[0];
673             r[1].sort();
674             nonterminals[n].buildNonterminals = r[1].idup;
675             possibleTags.sort();
676             nonterminals[n].possibleTags = possibleTags.idup;
677         }
678     }
679 
680     const(RewriteRule)[] getRewriteRules(const Production* p) const
681     {
682         if (p.rewriteRules.length > 0)
683             return p.rewriteRules;
684         RewriteRule r = RewriteRule(p);
685         foreach (i, s; p.symbols)
686         {
687             if (!s.dropNode)
688             {
689                 r.parameters ~= i;
690             }
691         }
692         return [r];
693     }
694 
695     bool hasNonTrivialRewriteRule(const Production* p) const
696     {
697         if (p.rewriteRules.length == 0)
698         {
699             foreach (i, s; p.symbols)
700             {
701                 if (s.dropNode)
702                     return true;
703             }
704             return false;
705         }
706         if (p.rewriteRules.length > 1)
707             return true;
708         if (p.symbols.length == p.rewriteRules[0].parameters.length)
709         {
710             foreach (i; 0 .. p.symbols.length)
711                 if (p.rewriteRules[0].parameters[i] != i)
712                     return true;
713             return false;
714         }
715         return true;
716     }
717 
718     bool canForward(const Production* p, size_t symbolNr)
719     {
720         foreach (i2; 0 .. p.symbols.length)
721         {
722             if (i2 == symbolNr)
723                 continue;
724             if (p.symbols[i2].isToken)
725                 return false;
726             if (!canBeEmpty(p.symbols[i2]))
727                 return false;
728         }
729         return true;
730     }
731 
732     bool canForwardPrefix(const Production* p, size_t symbolNr)
733     {
734         foreach (i2; 0 .. symbolNr)
735         {
736             if (i2 == symbolNr)
737                 continue;
738             if (p.symbols[i2].isToken)
739                 return false;
740             if (!canBeEmpty(p.symbols[i2]))
741                 return false;
742         }
743         return true;
744     }
745 
746     bool isSimpleProduction(const Production production) const
747     {
748         static Appender!(size_t[]) notDropped;
749         notDropped.clear();
750         foreach (i, s; production.symbols)
751             if (!s.dropNode)
752                 notDropped.put(i);
753 
754         return (notDropped.data.length == 1
755                 && production.symbols[notDropped.data[0]].unwrapProduction)
756             || (production.isVirtual && production.symbols.length == 1
757                     && !production.symbols[0].isToken
758                     && !nonterminals[production.nonterminalID].annotations.contains!"array")
759             || (production.isVirtual && production.symbols.length == 0);
760     }
761 
762     bool isDirectUnwrapProduction(const Production production) const
763     {
764         if (!nonterminals[production.nonterminalID].annotations.contains!"directUnwrap"()
765                 && !nonterminals[production.nonterminalID].name.endsWith("?"))
766             return false;
767         return (!production.isVirtual && production.symbols.length == 1
768                 && production.symbols[0].unwrapProduction && !production.symbols[0].isToken)
769             || (production.isVirtual && production.symbols.length == 1
770                     && !production.symbols[0].isToken
771                     && !nonterminals[production.nonterminalID].annotations.contains!"array");
772     }
773 
774     immutable(NonterminalWithConstraint)[][NonterminalWithConstraint] directUnwrapClosureCache;
775     immutable(NonterminalWithConstraint)[] directUnwrapClosure(NonterminalID s,
776             immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags)
777     {
778         if (NonterminalWithConstraint(s, Constraint(negLookaheads, tags)) in directUnwrapClosureCache)
779         {
780             auto r = directUnwrapClosureCache[NonterminalWithConstraint(s,
781                         Constraint(negLookaheads, tags))];
782             enforce(r.length, getSymbolName(s));
783             return r;
784         }
785 
786         if (getProductions(s).length == 0)
787         {
788             return [];
789         }
790 
791         if (negLookaheads.canFind(s))
792         {
793             return [];
794         }
795 
796         foreach (t; tags)
797         {
798             if (t.needed && !nonterminals[s].possibleTags.canFind(t.tag))
799                 return [];
800         }
801 
802         directUnwrapClosureCache[NonterminalWithConstraint(s, Constraint(negLookaheads, tags))] = [
803         ];
804 
805         NonterminalWithConstraint[] r;
806         bool needsNonterminal;
807         foreach (p; getProductions(s))
808         {
809             if (!isDirectUnwrapProduction(*p))
810                 needsNonterminal = true;
811         }
812         void addNonterminal(NonterminalWithConstraint n)
813         {
814             foreach (t; n.constraint.tags)
815             {
816                 if (t.needed && !nonterminals[n.nonterminalID].possibleTags.canFind(t.tag))
817                     return;
818             }
819 
820             foreach (i, ref m2; r)
821             {
822                 if (m2.nonterminalID == n.nonterminalID)
823                 {
824                     m2.constraint = orConstraint(m2.constraint, n.constraint);
825                     return;
826                 }
827             }
828             r ~= n;
829         }
830 
831         if (needsNonterminal)
832             addNonterminal(NonterminalWithConstraint(s, Constraint(negLookaheads, tags)));
833 
834         foreach (p; getProductions(s))
835         {
836             if (isDirectUnwrapProduction(*p))
837             {
838                 if (negLookaheads.canFind(p.symbols[0]))
839                     continue;
840                 immutable(Symbol)[] nextNegLookaheads = negLookaheads;
841                 nextNegLookaheads.addOnce(p.symbols[0].negLookaheads);
842                 auto nextTags = tags;
843                 if (p.symbols[0].annotations.contains!"excludeDirectUnwrap")
844                 {
845                     if (directUnwrapClosureHasSelf(p.symbols[0].symbol.toNonterminalID,
846                             nextNegLookaheads, nextTags))
847                         addNonterminal(NonterminalWithConstraint(p.symbols[0].symbol.toNonterminalID,
848                                 Constraint(nextNegLookaheads, nextTags),
849                                 p.symbols[0].annotations.contains!"lookahead"));
850                 }
851                 else
852                 {
853                     foreach (n; directUnwrapClosure(p.symbols[0].symbol.toNonterminalID,
854                             nextNegLookaheads, nextTags))
855                         addNonterminal(NonterminalWithConstraint(n.nonterminalID, n.constraint,
856                                 n.hasLookaheadAnnotation
857                                 || p.symbols[0].annotations.contains!"lookahead"));
858                 }
859             }
860         }
861 
862         enforce(r.length, getSymbolName(s));
863 
864         auto r2 = r.idup;
865 
866         directUnwrapClosureCache[NonterminalWithConstraint(s, Constraint(negLookaheads, tags))] = r2;
867 
868         return r2;
869     }
870 
871     immutable(NonterminalWithConstraint)[] directUnwrapClosure(const SymbolInstance s)
872     {
873         return directUnwrapClosure(s.toNonterminalID, s.negLookaheads, s.tags);
874     }
875 
876     immutable(NonterminalWithConstraint)[] directUnwrapClosure(NonterminalWithConstraint n)
877     {
878         return directUnwrapClosure(n.nonterminalID, n.constraint.negLookaheads, n.constraint.tags);
879     }
880 
881     immutable(Symbol)[][NonterminalID][NonterminalWithConstraint] directUnwrapClosureMapCache;
882     immutable(Symbol)[][NonterminalID] directUnwrapClosureMap(NonterminalID s,
883             immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags)
884     {
885         if (NonterminalWithConstraint(s, Constraint(negLookaheads,
886                 tags)) in directUnwrapClosureMapCache)
887         {
888             return directUnwrapClosureMapCache[NonterminalWithConstraint(s,
889                         Constraint(negLookaheads, tags))];
890         }
891 
892         auto r = directUnwrapClosure(s, negLookaheads, tags);
893         if (r.length == 0)
894             return null;
895 
896         immutable(Symbol)[][NonterminalID] n;
897         foreach (x; r)
898             n[x.nonterminalID] = x.constraint.negLookaheads;
899 
900         directUnwrapClosureMapCache[NonterminalWithConstraint(s, Constraint(negLookaheads, tags))] = n;
901 
902         return n;
903     }
904 
905     immutable(Symbol)[][NonterminalID] directUnwrapClosureMap(NonterminalWithConstraint n)
906     {
907         return directUnwrapClosureMap(n.nonterminalID,
908                 n.constraint.negLookaheads, n.constraint.tags);
909     }
910 
911     bool directUnwrapClosureHasSelf(NonterminalID s,
912             immutable(Symbol)[] negLookaheads, immutable(TagUsage)[] tags)
913     {
914         if (negLookaheads.canFind(s))
915             return false;
916 
917         bool needsNonterminal;
918         foreach (p; getProductions(s))
919         {
920             if (!isProductionAllowed(NonterminalWithConstraint(s,
921                     Constraint(negLookaheads, tags)), p))
922                 continue;
923             if (!isDirectUnwrapProduction(*p))
924                 needsNonterminal = true;
925         }
926         return needsNonterminal;
927     }
928 
929     bool directUnwrapClosureHasSelf(NonterminalWithConstraint n)
930     {
931         return directUnwrapClosureHasSelf(n.nonterminalID,
932                 n.constraint.negLookaheads, n.constraint.tags);
933     }
934 
935     bool isProductionAllowed(NonterminalWithConstraint n, const Production* p)
936     {
937         assert(p.nonterminalID == n.nonterminalID);
938         if (p.symbols.length && n.constraint.negLookaheads.canFind(p.symbols[0]))
939             return false;
940         foreach (t; n.constraint.tags)
941         {
942             if (t.reject && p.tags.canFind(t.tag))
943                 return false;
944             if (t.needed && !nonterminals[n.nonterminalID].possibleTags.canFind(t.tag))
945                 return false;
946         }
947         return true;
948     }
949 
950     SymbolWithConstraint nextSymbolWithConstraint(Constraint c, SymbolInstance s, bool first)
951     {
952         immutable(Symbol)[] nextNegLookaheads;
953         if (first)
954             nextNegLookaheads = c.negLookaheads;
955         nextNegLookaheads.addOnce(s.negLookaheads);
956 
957         TagUsage[] nextTags;
958         ref TagUsage findTag(TagID tag)
959         {
960             foreach (ref t; nextTags)
961                 if (t.tag == tag)
962                     return t;
963             nextTags ~= TagUsage(tag);
964             return nextTags[$ - 1];
965         }
966 
967         if (s.unwrapProduction || s.annotations.contains!"inheritAnyTag")
968         {
969             foreach (t; c.tags)
970             {
971                 if (t.reject)
972                     findTag(t.tag).reject = true;
973                 if (t.needed)
974                     findTag(t.tag).needed = true;
975             }
976         }
977         foreach (t; s.tags)
978         {
979             if (t.reject)
980                 findTag(t.tag).reject = true;
981             if (t.needed)
982                 findTag(t.tag).needed = true;
983             if (t.inherit)
984             {
985                 foreach (t2; c.tags)
986                 {
987                     if (t2.tag == t.tag)
988                     {
989                         if (t2.reject)
990                             findTag(t.tag).reject = true;
991                         if (t2.needed)
992                             findTag(t.tag).needed = true;
993                     }
994                 }
995             }
996         }
997         nextTags.sort();
998 
999         return SymbolWithConstraint(s, Constraint(nextNegLookaheads, nextTags.idup, c.disabled));
1000     }
1001 
1002     NonterminalWithConstraint nextNonterminalWithConstraint(Constraint c,
1003             SymbolInstance s, bool first)
1004     in (!s.isToken)
1005     {
1006         auto r = nextSymbolWithConstraint(c, s, first);
1007         return NonterminalWithConstraint(r.symbol.toNonterminalID, r.constraint);
1008     }
1009 
1010     Constraint orConstraint(Constraint a, Constraint b)
1011     {
1012         assert(tagsSorted(a.tags));
1013         assert(tagsSorted(b.tags));
1014 
1015         if (a.disabled)
1016             return b;
1017         if (b.disabled)
1018             return a;
1019 
1020         immutable(Symbol)[] newNegLookahead;
1021         foreach (l; a.negLookaheads)
1022             if (b.negLookaheads.canFind(l))
1023                 newNegLookahead ~= l;
1024 
1025         immutable(TagUsage)[] nextTags;
1026         immutable(TagUsage)[] aTags = a.tags;
1027         immutable(TagUsage)[] bTags = b.tags;
1028         while (aTags.length && bTags.length)
1029         {
1030             if (aTags.length >= 2)
1031                 assert(aTags[0].tag < aTags[1].tag);
1032             if (bTags.length >= 2)
1033                 assert(bTags[0].tag < bTags[1].tag);
1034 
1035             if (aTags[0].tag < bTags[0].tag)
1036                 aTags = aTags[1 .. $];
1037             else if (bTags[0].tag < aTags[0].tag)
1038                 bTags = bTags[1 .. $];
1039             else
1040             {
1041                 TagUsage t;
1042                 t.tag = aTags[0].tag;
1043                 if (aTags[0].reject && bTags[0].reject)
1044                     t.reject = true;
1045                 if (aTags[0].needed && bTags[0].needed)
1046                     t.needed = true;
1047                 if (t.reject || t.needed)
1048                     nextTags ~= t;
1049 
1050                 aTags = aTags[1 .. $];
1051                 bTags = bTags[1 .. $];
1052             }
1053         }
1054 
1055         return Constraint(newNegLookahead, nextTags, a.disabled && b.disabled);
1056     }
1057 }
1058 
1059 SymbolInfo[string] generateSymbolInfos(EBNF ebnf)
1060 {
1061     SymbolInfo[string] symbolInfos;
1062 
1063     SymbolInfo info(string name)
1064     {
1065         auto s = name in symbolInfos;
1066         if (s is null)
1067         {
1068             symbolInfos[name] = new SymbolInfo();
1069             s = name in symbolInfos;
1070             s.name = name;
1071         }
1072         assert(s !is null);
1073         assert(s.name == name);
1074         return *s;
1075     }
1076 
1077     void visitExprs(SymbolInfo parent, const Tree expr, const string[] excludeNames,
1078             bool delegate(SymbolInfo parent, const Tree childExpr, SymbolInfo child) dg)
1079     {
1080         if (expr is null || expr.isToken)
1081             return;
1082 
1083         if (expr.nonterminalID.among(nonterminalIDFor!"Token",
1084                 nonterminalIDFor!"Name", nonterminalIDFor!"MacroInstance"))
1085         {
1086             foreach (excludeName; excludeNames)
1087             {
1088                 if (expr.childs[0].content == excludeName)
1089                     return;
1090             }
1091 
1092             auto s = info(expr.childs[0].content);
1093             bool changed = dg(parent, expr, s);
1094 
1095             if (changed)
1096             {
1097                 foreach (d; s.declarations)
1098                     visitExprs(s, d.exprTree, d.parameters, dg);
1099             }
1100         }
1101         if (expr.nonterminalID == nonterminalIDFor!"AnnotatedExpression")
1102         {
1103             visitExprs(parent, expr.childs[$ - 1], excludeNames, dg);
1104         }
1105         else if (expr.nonterminalID == nonterminalIDFor!"Concatenation")
1106         {
1107             foreach (c; expr.childs[0 .. $ - 1])
1108             {
1109                 visitExprs(parent, c, excludeNames, dg);
1110             }
1111         }
1112         else
1113         {
1114             foreach (c; expr.childs)
1115             {
1116                 visitExprs(parent, c, excludeNames, dg);
1117             }
1118         }
1119     }
1120 
1121     foreach (i, d; ebnf.symbols)
1122     {
1123         auto s = info(d.name);
1124         s.declarations ~= d;
1125     }
1126 
1127     foreach (i, d; ebnf.symbols)
1128     {
1129         auto s = info(d.name);
1130         visitExprs(s, d.exprTree, d.parameters, (parent, childExpr, child) {
1131             if (childExpr.nonterminalID.among(nonterminalIDFor!"Name",
1132                 nonterminalIDFor!"MacroInstance") && child.declarations.length == 0)
1133             {
1134                 stderr.writeln("Warning: Using undefined symbol ", child.name);
1135             }
1136             return false;
1137         });
1138     }
1139 
1140     foreach (i, d; ebnf.symbols)
1141     {
1142         auto s = info(d.name);
1143         if (d.type == DeclarationType.token)
1144         {
1145             s.isToken = true;
1146             s.reachableFromToken = true;
1147         }
1148         if (d.annotations.canFind("ignoreToken"))
1149         {
1150             enforce(d.type == DeclarationType.token);
1151             s.isToken = true;
1152             s.isIgnoreToken = true;
1153             s.reachableFromToken = true;
1154         }
1155         visitExprs(s, d.exprTree, d.parameters, (parent, childExpr, child) {
1156             if (parent.reachableFromToken && !child.reachableFromToken)
1157             {
1158                 child.reachableFromToken = true;
1159                 return true;
1160             }
1161             return false;
1162         });
1163     }
1164 
1165     foreach (i, d; ebnf.symbols)
1166     {
1167         if (i != 0 && !d.annotations.canFind("start") && !d.annotations.canFind("ignoreToken"))
1168             continue;
1169         auto s = info(d.name);
1170         s.reachableFromStart = true;
1171         foreach (d2; s.declarations)
1172             visitExprs(s, d2.exprTree, d2.parameters, (parent, childExpr, child) {
1173                 if (parent.reachableFromStart && !child.reachableFromStart)
1174                 {
1175                     child.reachableFromStart = true;
1176                     return true;
1177                 }
1178                 return false;
1179             });
1180     }
1181 
1182     foreach (i, d; ebnf.symbols)
1183     {
1184         if (i != 0 && !d.annotations.canFind("start"))
1185             continue;
1186         auto s = info(d.name);
1187         if (!s.isToken)
1188             s.reachableFromStartNoToken = true;
1189         foreach (d2; s.declarations)
1190             visitExprs(s, d2.exprTree, d2.parameters, (parent, childExpr, child) {
1191                 if (parent.reachableFromStartNoToken
1192                     && !child.reachableFromStartNoToken && !child.isToken)
1193                 {
1194                     child.reachableFromStartNoToken = true;
1195                     return true;
1196                 }
1197                 return false;
1198             });
1199     }
1200 
1201     foreach (i, d; ebnf.symbols)
1202     {
1203         auto s = info(d.name);
1204         if (!s.reachableFromStart)
1205         {
1206             stderr.writeln("Warning: Unused symbol ", d.name);
1207         }
1208         else if (d.type == DeclarationType.fragment && d.parameters.length == 0
1209                 && s.reachableFromStartNoToken)
1210         {
1211             stderr.writeln("Warning: Fragment ", d.name, " used in nonterminal");
1212         }
1213         else if (d.type == DeclarationType.auto_ && d.parameters.length == 0 && s.reachableFromToken)
1214         {
1215             stderr.writeln("Warning: Nonterminal ", d.name, " used in token");
1216         }
1217     }
1218 
1219     return symbolInfos;
1220 }
1221 
1222 Symbol getSymbolByName(EBNFGrammar grammar, string name)
1223 {
1224     Symbol ls;
1225     if (name.startsWith("\"") || (grammar.allowTokenNonterminals
1226             && grammar.symbolInfos[name].isToken))
1227     {
1228         if (name !in grammar.symbolInfos || !grammar.symbolInfos[name].isIgnoreToken)
1229             ls = grammar.tokens.id(name);
1230         else
1231             throw new Exception("IgnoreTokens cannot be used");
1232     }
1233     else
1234         ls = grammar.nonterminals.id(name);
1235     return ls;
1236 }
1237 
1238 void addAnnotation(ref Annotations annotations, Tree tree)
1239 in (tree.nonterminalID == nonterminalIDFor!"Annotation")
1240 {
1241     assert(tree.childs.length == 3);
1242     assert(tree.childs[0].content == "@");
1243     string name = tree.childs[1].content;
1244     if (tree.childs[2] !is null)
1245     {
1246         name ~= "(" ~ concatTree(tree.childs[2].childs[1]).strip() ~ ")";
1247     }
1248     annotations.add(name);
1249 }
1250 
1251 SymbolInstance createSymbol(EBNFGrammar grammar, Tree tree, EBNF ebnf)
1252 {
1253     SymbolInstance symbol;
1254 
1255     if (tree.nonterminalID == nonterminalIDFor!"AnnotatedExpression")
1256     {
1257         if (tree.childs[1] !is null)
1258             symbol.symbolInstanceName = tree.childs[1].childs[0].content;
1259         foreach (c; tree.childs[2].memberOrDefault!"childs")
1260         {
1261             if (c.childs[0].content == "<")
1262                 symbol.unwrapProduction = true;
1263             else if (c.childs[0].content == "^")
1264                 symbol.dropNode = true;
1265             else
1266                 assert(false, c.childs[0].content);
1267         }
1268     }
1269     Tree realTree = tree;
1270     while (realTree.nonterminalID == nonterminalIDFor!"AnnotatedExpression")
1271     {
1272         foreach (c; realTree.childs[0].memberOrDefault!"childs")
1273         {
1274             if (c.nonterminalID == nonterminalIDFor!"Annotation")
1275                 addAnnotation(symbol.annotations, c);
1276             else if (c.nonterminalID == nonterminalIDFor!"NegativeLookahead")
1277             {
1278                 assert(c.childs.length == 2);
1279                 assert(c.childs[0].content == "!");
1280                 if (c.childs[1].nodeType == NodeType.nonterminal
1281                         && c.childs[1].nonterminalID.among(nonterminalIDFor!"Token",
1282                             nonterminalIDFor!"Name"))
1283                 {
1284                     assert(c.childs[1].childs.length == 1);
1285                     symbol.negLookaheads ~= getSymbolByName(grammar, c.childs[1].childs[0].content);
1286                 }
1287                 else if (c.childs[1].nodeType == NodeType.token && c.childs[1].content == "anytoken")
1288                 {
1289                     throw new Exception("anytoken not implemented here");
1290                 }
1291                 else
1292                     assert(false, c.childs[1].name);
1293             }
1294             else
1295                 assert(false, c.name);
1296         }
1297         realTree = realTree.childs[$ - 1];
1298     }
1299 
1300     TagUsage[] tags;
1301     foreach (a; symbol.annotations.otherAnnotations)
1302     {
1303         ref TagUsage findTag(TagID tag)
1304         {
1305             foreach (ref t; tags)
1306                 if (t.tag == tag)
1307                     return t;
1308             tags ~= TagUsage(tag);
1309             return tags[$ - 1];
1310         }
1311 
1312         if (a.startsWith("inheritTag("))
1313         {
1314             assert(a.endsWith(")"));
1315             foreach (t; a[11 .. $ - 1].split(", "))
1316             {
1317                 t = t.strip();
1318                 findTag(grammar.tags.id(t)).inherit = true;
1319             }
1320         }
1321         else if (a.startsWith("needTag("))
1322         {
1323             assert(a.endsWith(")"));
1324             foreach (t; a[8 .. $ - 1].split(", "))
1325             {
1326                 t = t.strip();
1327                 findTag(grammar.tags.id(t)).needed = true;
1328             }
1329         }
1330         else if (a.startsWith("rejectTag("))
1331         {
1332             assert(a.endsWith(")"));
1333             foreach (t; a[10 .. $ - 1].split(", "))
1334             {
1335                 t = t.strip();
1336                 findTag(grammar.tags.id(t)).reject = true;
1337             }
1338         }
1339     }
1340     tags.sort();
1341     symbol.tags = tags.idup;
1342 
1343     string name = ebnfTreeToString(realTree);
1344 
1345     if (symbol.annotations.contains!"regArray" && realTree.nonterminalID.among(
1346             nonterminalIDFor!"Repetition", nonterminalIDFor!"RepetitionPlus"))
1347         name = "@regArray " ~ name;
1348 
1349     if (realTree.nonterminalID != nonterminalIDFor!"MacroInstance")
1350         name = name.replace(" ", "_");
1351 
1352     bool alreadyDone = (name in grammar.nonterminals.ids) !is null;
1353     if (alreadyDone && realTree.nonterminalID != nonterminalIDFor!"Token"
1354             && realTree.nonterminalID != nonterminalIDFor!"SubToken")
1355     {
1356         symbol.symbol = grammar.nonterminals.id(name);
1357         return symbol;
1358     }
1359 
1360     if (realTree.nonterminalID == nonterminalIDFor!"Token")
1361     {
1362         symbol.symbol = grammar.tokens.id(realTree.childs[0].content);
1363         grammar.tokens[symbol.toTokenID].annotations.add(symbol.annotations);
1364     }
1365     else if (realTree.nonterminalID == nonterminalIDFor!"Name")
1366     {
1367         if (grammar.allowTokenNonterminals
1368                 && grammar.symbolInfos[realTree.childs[0].content].isToken)
1369         {
1370             if (!grammar.symbolInfos[realTree.childs[0].content].isIgnoreToken)
1371                 symbol.symbol = grammar.tokens.id(realTree.childs[0].content);
1372             else
1373                 throw new Exception("IgnoreTokens cannot be used");
1374         }
1375         else
1376             symbol.symbol = grammar.nonterminals.id(realTree.childs[0].content);
1377     }
1378     else if (realTree.nonterminalID == nonterminalIDFor!"Optional")
1379     {
1380         assert(realTree.childs.length == 2);
1381         auto innerSymbol = createSymbol(grammar, realTree.childs[0], ebnf);
1382 
1383         Production* production = new Production;
1384         production.nonterminalID = grammar.nonterminals.id(name);
1385         production.isVirtual = true;
1386         innerSymbol.unwrapProduction = true;
1387         production.symbols = [innerSymbol];
1388         grammar.addProduction(production);
1389 
1390         Production* production2 = new Production;
1391         production2.nonterminalID = grammar.nonterminals.id(name);
1392         production2.isVirtual = true;
1393         production2.symbols = [];
1394         grammar.addProduction(production2);
1395 
1396         symbol.symbol = grammar.nonterminals.id(name);
1397     }
1398     else if (realTree.nonterminalID == nonterminalIDFor!"Repetition")
1399     {
1400         assert(realTree.childs.length == 2);
1401         /* Repetition reuses RepetitionPlus, so parser conflicts are avoided. */
1402         Tree tree2 = realTree;
1403         tree2 = new TreeNonterminal(nonterminalIDFor!"RepetitionPlus", ProductionID.max,
1404                 [tree2.childs[0], new TreeToken("+", &grammarInfo)], &grammarInfo);
1405         auto innerSymbol = createSymbol(grammar, tree2, ebnf);
1406 
1407         Production* production = new Production;
1408         production.nonterminalID = grammar.nonterminals.id(name);
1409         production.isVirtual = true;
1410         grammar.addProduction(production);
1411 
1412         Production* production2 = new Production;
1413         production2.nonterminalID = grammar.nonterminals.id(name);
1414         production2.isVirtual = true;
1415         production2.symbols = [innerSymbol];
1416         grammar.addProduction(production2);
1417 
1418         grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("array");
1419         if (symbol.annotations.contains!"regArray")
1420             grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("regArray");
1421         symbol.symbol = grammar.nonterminals.id(name);
1422     }
1423     else if (realTree.nonterminalID == nonterminalIDFor!"RepetitionPlus")
1424     {
1425         assert(realTree.childs.length == 2);
1426         auto innerSymbol = createSymbol(grammar, realTree.childs[0], ebnf);
1427 
1428         Production* production = new Production;
1429         production.nonterminalID = grammar.nonterminals.id(name);
1430         production.isVirtual = true;
1431         production.symbols = [innerSymbol];
1432         grammar.addProduction(production);
1433 
1434         Production* production2 = new Production;
1435         production2.nonterminalID = grammar.nonterminals.id(name);
1436         production2.isVirtual = true;
1437         production2.symbols = [
1438             immutable(SymbolInstance)(grammar.nonterminals.id(name)), innerSymbol
1439         ];
1440         grammar.addProduction(production2);
1441 
1442         grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("array");
1443         if (symbol.annotations.contains!"regArray")
1444             grammar.nonterminals[grammar.nonterminals.id(name)].annotations.add("regArray");
1445         symbol.symbol = grammar.nonterminals.id(name);
1446     }
1447     else if (realTree.nonterminalID == nonterminalIDFor!"MacroInstance")
1448     {
1449         if (grammar.allowTokenNonterminals
1450                 && grammar.symbolInfos[realTree.childs[0].content].isToken)
1451         {
1452             if (!grammar.symbolInfos[realTree.childs[0].content].isIgnoreToken)
1453                 symbol.symbol = grammar.tokens.id(name);
1454             else
1455                 throw new Exception("IgnoreTokens cannot be used");
1456         }
1457         else
1458         {
1459             symbol.symbol = grammar.nonterminals.id(name);
1460 
1461             Tree[] macroParameters;
1462             foreach (c; realTree.childs[2].memberOrDefault!"childs")
1463                 if (!c.isToken)
1464                     macroParameters ~= c;
1465             foreach (d; ebnf.symbols)
1466             {
1467                 if (d.name == realTree.childs[0].content
1468                         && (d.parameters.length == macroParameters.length || (d.variadicParameterIndex != size_t.max
1469                             && macroParameters.length >= d.parameters.length - 1)))
1470                 {
1471                     Tree[string] table;
1472                     Tree[] parametersHere = macroParameters;
1473                     if (d.variadicParameterIndex != size_t.max)
1474                     {
1475                         Tree[] parametersBeforeVariadic = macroParameters[0
1476                             .. d.variadicParameterIndex];
1477                         size_t numAfterVariadic = d.parameters.length - 1 - d
1478                             .variadicParameterIndex;
1479                         Tree[] parametersVariadic = macroParameters[d.variadicParameterIndex
1480                             .. $ - numAfterVariadic];
1481                         Tree[] parametersAfterVariadic = macroParameters[$ - numAfterVariadic .. $];
1482                         Tree variadicTuple = new TreeNonterminal(nonterminalIDFor!"Tuple",
1483                                 ProductionID.max, [
1484                                     new TreeToken("t(", &grammarInfo),
1485                                     new TreeArray(parametersVariadic, &grammarInfo),
1486                                     new TreeToken(")", &grammarInfo)
1487                                 ], &grammarInfo);
1488                         parametersHere = parametersBeforeVariadic
1489                             ~ variadicTuple ~ parametersAfterVariadic;
1490                     }
1491                     foreach (i, c; parametersHere)
1492                     {
1493                         table[d.parameters[i]] = c;
1494                     }
1495                     NonterminalID id2 = createGrammar(grammar, name,
1496                             replaceNames(table, d.exprTree), ebnf);
1497 
1498                     grammar.nonterminals[id2].annotations = Annotations(d.annotations);
1499 
1500                     assert(id2 == symbol, text(realTree.childs[0].content, " ", symbol, " ", id2));
1501                 }
1502             }
1503         }
1504     }
1505     else if (realTree.nonterminalID == nonterminalIDFor!"Concatenation")
1506     {
1507         symbol.symbol = createGrammar(grammar, name, realTree, ebnf);
1508     }
1509     else if (realTree.nonterminalID == nonterminalIDFor!"Alternation")
1510     {
1511         symbol.symbol = createGrammar(grammar, name, realTree, ebnf);
1512     }
1513     else if (realTree.nonterminalID == nonterminalIDFor!"SubToken")
1514     {
1515         symbol = createSymbol(grammar, realTree.childs[0], ebnf);
1516         if (realTree.childs[2].nonterminalID == nonterminalIDFor!"Token")
1517         {
1518             symbol.subToken = realTree.childs[2].childs[0].content;
1519         }
1520         else
1521             throw new Exception("subterminal only for strings implemented");
1522     }
1523     else if (realTree.nonterminalID == nonterminalIDFor!"TokenMinus")
1524     {
1525         if (grammar.isLexerGrammar)
1526         {
1527             symbol.symbol = grammar.nonterminals.id("$tokenminus" ~ name);
1528 
1529             auto innerSymbol1 = createSymbol(grammar, realTree.childs[0], ebnf);
1530             auto innerSymbol2 = createSymbol(grammar, realTree.childs[2], ebnf);
1531 
1532             foreach (p; grammar.productions)
1533             {
1534                 if (p.nonterminalID == symbol.toNonterminalID)
1535                 {
1536                     assert(p.symbols[0] == innerSymbol1);
1537                     assert(p.symbols[1] == innerSymbol2);
1538                     return symbol;
1539                 }
1540             }
1541 
1542             Production* production = new Production;
1543             production.nonterminalID = grammar.nonterminals.id("$tokenminus" ~ name);
1544             production.isVirtual = true;
1545             production.symbols = [innerSymbol1, innerSymbol2];
1546             grammar.addProduction(production);
1547         }
1548         else
1549         {
1550             enforce(false, "Using token minus outside token or fragment: " ~ name);
1551         }
1552     }
1553     else if (realTree.nonterminalID == nonterminalIDFor!"ParenExpression")
1554     {
1555         symbol.symbol = createSymbol(grammar, realTree.childs[1], ebnf);
1556     }
1557     else
1558         enforce(0, text("Unexpected expression ", realTree.name));
1559 
1560     return symbol;
1561 }
1562 
1563 NonterminalID createGrammar(EBNFGrammar grammar, string name, Tree tree, EBNF ebnf,
1564         bool isVirtual = false)
1565 {
1566     if (name.length == 0)
1567     {
1568         name = ebnfTreeToString(tree);
1569     }
1570 
1571     Production* production = new Production;
1572     production.nonterminalID = grammar.nonterminals.id(name);
1573     production.isVirtual = isVirtual;
1574 
1575     while (tree.nonterminalID == nonterminalIDFor!"AnnotatedExpression"
1576             && tree.childs[0].memberOrDefault!"childs".length == 0
1577             && tree.childs[1] is null && tree.childs[2].memberOrDefault!"childs".length == 0)
1578         tree = tree.childs[$ - 1];
1579 
1580     if (tree.name == "Concatenation")
1581     {
1582         assert(tree.nonterminalID == nonterminalIDFor!"Concatenation");
1583         foreach (c; tree.childs[$ - 1].memberOrDefault!"childs")
1584         {
1585             if (c.nonterminalID == nonterminalIDFor!"Annotation")
1586                 addAnnotation(production.annotations, c);
1587             else if (c.nonterminalID == nonterminalIDFor!"NegativeLookahead")
1588             {
1589                 assert(c.childs.length == 2);
1590                 assert(c.childs[0].content == "!");
1591                 if (c.childs[1].nodeType == NodeType.nonterminal
1592                         && c.childs[1].nonterminalID.among(nonterminalIDFor!"Token",
1593                             nonterminalIDFor!"Name"))
1594                 {
1595                     assert(c.childs[1].childs.length == 1);
1596                     auto negLookaheadSymbol = getSymbolByName(grammar,
1597                             c.childs[1].childs[0].content);
1598                     if (!grammar.isLexerGrammar && !negLookaheadSymbol.isToken)
1599                         throw new Exception(text("Error: Nonterminal ", c.childs[1].childs[0].content,
1600                                 " used as negative lookahead for production"));
1601                     production.negLookaheads ~= negLookaheadSymbol;
1602                 }
1603                 else if (c.childs[1].nodeType == NodeType.token && c.childs[1].content == "anytoken")
1604                 {
1605                     production.negLookaheadsAnytoken = true;
1606                 }
1607                 else
1608                     assert(false, c.childs[1].name);
1609             }
1610         }
1611 
1612         TagID[] tags;
1613         foreach (a; production.annotations.otherAnnotations)
1614         {
1615             if (a.startsWith("tag("))
1616             {
1617                 assert(a.endsWith(")"));
1618                 foreach (t; a[4 .. $ - 1].split(", "))
1619                 {
1620                     t = t.strip();
1621                     tags.addOnce(grammar.tags.id(t));
1622                 }
1623             }
1624         }
1625         tags.sort();
1626         production.tags = tags.idup;
1627 
1628         if (tree.childs.length >= 2)
1629             production.symbols ~= createSymbol(grammar, tree.childs[0], ebnf);
1630         if (tree.childs.length == 3)
1631             foreach (c; tree.childs[1].memberOrDefault!"childs")
1632             {
1633                 if (!c.isToken)
1634                     production.symbols ~= createSymbol(grammar, c, ebnf);
1635             }
1636 
1637         if (production.symbols.length == 0 && !production.annotations.canFind("empty"))
1638             throw new Exception("Empty concatenation missing @empty.");
1639     }
1640     else if (tree.nonterminalID == nonterminalIDFor!"Alternation")
1641     {
1642         foreach (c; tree.childs)
1643         {
1644             if (!c.isToken)
1645                 createGrammar(grammar, name, c, ebnf);
1646         }
1647         return grammar.nonterminals.id(name);
1648     }
1649     else if (tree.nonterminalID == nonterminalIDFor!"VariadicList")
1650     {
1651         throw new Exception(text("can't use variadic parameter here ", tree));
1652     }
1653     else
1654         production.symbols ~= createSymbol(grammar, tree, ebnf);
1655 
1656     grammar.addProduction(production);
1657     return grammar.nonterminals.id(name);
1658 }
1659 
1660 EBNFGrammar createGrammar(EBNF ebnf)
1661 {
1662     EBNFGrammar grammar = new EBNFGrammar(generateSymbolInfos(ebnf));
1663     grammar.tokens.id("$end");
1664     assert(grammar.tokens.getID("$end") == TokenID(0));
1665 
1666     grammar.startTokenID = ebnf.startTokenID;
1667     grammar.startNonterminalID = ebnf.startNonterminalID;
1668     grammar.startProductionID = ebnf.startProductionID;
1669 
1670     grammar.allowTokenNonterminals = true;
1671 
1672     foreach (i, d; ebnf.symbols)
1673     {
1674         if (d.parameters.length)
1675             continue;
1676 
1677         if (!grammar.symbolInfos[d.name].reachableFromStartNoToken)
1678             continue;
1679 
1680         NonterminalID id = grammar.nonterminals.id(d.name);
1681         if (i == 0 || d.annotations.canFind("start"))
1682             grammar.startNonterminals ~= StartNonterminal(id);
1683 
1684         grammar.nonterminals[grammar.nonterminals.id(d.name)].annotations = Annotations(
1685                 d.annotations);
1686     }
1687     foreach (i, d; ebnf.symbols)
1688     {
1689         if (!grammar.symbolInfos[d.name].isToken || grammar.symbolInfos[d.name].isIgnoreToken)
1690             continue;
1691         grammar.tokens.id(d.name);
1692     }
1693 
1694     foreach (i, d; ebnf.symbols)
1695     {
1696         if (d.parameters.length)
1697             continue;
1698 
1699         if (!grammar.symbolInfos[d.name].reachableFromStartNoToken)
1700             continue;
1701 
1702         createGrammar(grammar, d.name, d.exprTree, ebnf);
1703     }
1704 
1705     foreach (s; grammar.symbolInfos)
1706     {
1707         if (s.isToken && !s.isIgnoreToken && s.reachableFromStart)
1708         {
1709             auto id = grammar.tokens.id(s.name);
1710             foreach (d; s.declarations)
1711                 grammar.tokens[id.toTokenID].annotations.add(d.annotations);
1712         }
1713     }
1714 
1715     grammar.calcNonterminalCanBeEmpty();
1716 
1717     grammar.fillProductionsForNonterminal();
1718     grammar.calcNonterminalTypes();
1719 
1720     foreach (i, m; ebnf.matchingTokens)
1721     {
1722         TokenID t1 = grammar.tokens.id(m[0]);
1723         TokenID t2 = grammar.tokens.id(m[1]);
1724         grammar.matchingTokens ~= [t1, t2];
1725     }
1726     checkGrammar(grammar);
1727 
1728     foreach (t; grammar.tokens.allIDs)
1729     {
1730         if (grammar.tokens[t].annotations.contains!"inContextOnly"())
1731             grammar.inContextOnlyTokens.addOnce(t);
1732     }
1733     return grammar;
1734 }
1735 
1736 void checkGrammar(EBNFGrammar grammar)
1737 {
1738     foreach (p; grammar.productions)
1739     {
1740         if (p is null)
1741             continue;
1742     }
1743 
1744     Appender!(NonterminalID[]) path;
1745     bool[NonterminalID] inPath;
1746     void checkCycles2(NonterminalID nonterminalID)
1747     {
1748         if (nonterminalID in inPath)
1749             throw new Exception(text("cycle ", path.data.map!(n => grammar.getSymbolName(n))
1750                     .array, grammar.getSymbolName(nonterminalID)));
1751         foreach (p; grammar.getProductions(nonterminalID))
1752         {
1753             foreach (i; 0 .. p.symbols.length)
1754             {
1755                 if (p.symbols[i].isToken)
1756                     continue;
1757                 if (!grammar.canForward(p, i))
1758                     continue;
1759                 path.put(nonterminalID);
1760                 inPath[nonterminalID] = true;
1761                 checkCycles2(p.symbols[i].toNonterminalID);
1762                 inPath.remove(nonterminalID);
1763                 path.shrinkTo(path.data.length - 1);
1764             }
1765         }
1766     }
1767 
1768     foreach (i, ref nonterminal; grammar.nonterminals.vals)
1769     {
1770         checkCycles2(NonterminalID(i.to!SymbolID));
1771     }
1772 
1773     void checkCycles3(NonterminalID nonterminalID, bool hasDirectProd)
1774     {
1775         if (nonterminalID in inPath)
1776         {
1777             if (path.data[0] == nonterminalID && hasDirectProd)
1778                 throw new Exception(text("cycle3 ", path.data.map!(n => grammar.getSymbolName(n))
1779                         .array, grammar.getSymbolName(nonterminalID)));
1780             else
1781                 return;
1782         }
1783         foreach (p; grammar.getProductions(nonterminalID))
1784         {
1785             foreach (i; 0 .. p.symbols.length)
1786             {
1787                 if (p.symbols[i].isToken)
1788                     continue;
1789                 if (!grammar.canForwardPrefix(p, i))
1790                     continue;
1791                 path.put(nonterminalID);
1792                 inPath[nonterminalID] = true;
1793                 checkCycles3(p.symbols[i].toNonterminalID, hasDirectProd || i > 0);
1794                 inPath.remove(nonterminalID);
1795                 path.shrinkTo(path.data.length - 1);
1796             }
1797         }
1798     }
1799 
1800     foreach (i, ref nonterminal; grammar.nonterminals.vals)
1801     {
1802         checkCycles3(NonterminalID(i.to!SymbolID), false);
1803     }
1804 
1805     struct SymbolKey
1806     {
1807         Symbol symbol;
1808         string subToken;
1809         immutable(TagUsage)[] tags;
1810     }
1811 
1812     struct ProductionKey
1813     {
1814         NonterminalID nonterminalID;
1815         SymbolKey[] symbols;
1816     }
1817 
1818     const(Production)*[ProductionKey] productionsDone;
1819     foreach (p; grammar.productions)
1820     {
1821         auto key = ProductionKey(p.nonterminalID,
1822                 p.symbols.map!(s => SymbolKey(s.symbol, s.subToken, s.tags)).array);
1823         if (key in productionsDone)
1824         {
1825             throw new Exception(text("Error: Duplicate production:\n  ",
1826                     grammar.productionString(p), "\n  ",
1827                     grammar.productionString(productionsDone[key])));
1828         }
1829         productionsDone[key] = p;
1830     }
1831 }
1832 
1833 EBNFGrammar createLexerGrammar(EBNF ebnf, EBNFGrammar realGrammar)
1834 {
1835     EBNFGrammar lexerGrammar = new EBNFGrammar(realGrammar.symbolInfos);
1836     lexerGrammar.isLexerGrammar = true;
1837     lexerGrammar.tokens.id("$end");
1838     assert(lexerGrammar.tokens.getID("$end") == TokenID(0));
1839 
1840     lexerGrammar.startTokenID = ebnf.startTokenID;
1841     lexerGrammar.startNonterminalID = ebnf.startNonterminalID;
1842     lexerGrammar.startProductionID = ebnf.startProductionID;
1843 
1844     foreach (token; realGrammar.tokens.allIDs)
1845     {
1846         string name = realGrammar.tokens[token].name;
1847         if (name == "$end")
1848             name = "$null";
1849         auto nonterminal = lexerGrammar.nonterminals.id(name);
1850         assert(nonterminal.id == token.id);
1851         if (name != "$null")
1852             lexerGrammar.startNonterminals ~= StartNonterminal(nonterminal);
1853         lexerGrammar.nonterminals[nonterminal].annotations.add(
1854                 realGrammar.tokens[token].annotations);
1855     }
1856 
1857     foreach (i, d; ebnf.symbols)
1858     {
1859         if (d.parameters.length)
1860             continue;
1861 
1862         if (!lexerGrammar.symbolInfos[d.name].reachableFromToken)
1863             continue;
1864 
1865         NonterminalID id = lexerGrammar.nonterminals.id(d.name);
1866 
1867         lexerGrammar.nonterminals[id].annotations.add(d.annotations);
1868     }
1869     foreach (i, d; ebnf.symbols)
1870     {
1871         if (d.parameters.length)
1872             continue;
1873 
1874         if (!lexerGrammar.symbolInfos[d.name].reachableFromToken)
1875             continue;
1876 
1877         NonterminalID id = createGrammar(lexerGrammar, d.name, d.exprTree, ebnf);
1878     }
1879 
1880     lexerGrammar.fillProductionsForNonterminal();
1881 
1882     return lexerGrammar;
1883 }
1884 
1885 EBNFGrammar createOptEmptyGrammar(EBNF ebnf, EBNFGrammar realGrammar)
1886 {
1887     EBNFGrammar newGrammar = new EBNFGrammar(realGrammar.symbolInfos, realGrammar);
1888     newGrammar.tokens = realGrammar.tokens;
1889     newGrammar.nonterminals = realGrammar.nonterminals;
1890     newGrammar.tags = realGrammar.tags;
1891     newGrammar.startNonterminals = realGrammar.startNonterminals;
1892     newGrammar.allowTokenNonterminals = realGrammar.allowTokenNonterminals;
1893     newGrammar.matchingTokens = realGrammar.matchingTokens;
1894     newGrammar.inContextOnlyTokens = realGrammar.inContextOnlyTokens;
1895 
1896     newGrammar.startTokenID = ebnf.startTokenID;
1897     newGrammar.startNonterminalID = ebnf.startNonterminalID;
1898     newGrammar.startProductionID = ebnf.startProductionID;
1899 
1900     BitSet!NonterminalID nonterminalCanBeEmpty;
1901     BitSet!NonterminalID nonterminalCanBeNonEmpty;
1902     realGrammar.calcNonterminalCanBeEmpty(nonterminalCanBeEmpty, nonterminalCanBeNonEmpty, true);
1903 
1904     bool canBeEmpty(const Symbol symbol)
1905     {
1906         if (symbol.isToken)
1907             return false;
1908         return nonterminalCanBeEmpty[symbol.toNonterminalID];
1909     }
1910 
1911     bool canBeNonEmpty(const Symbol symbol)
1912     {
1913         if (symbol.isToken)
1914             return true;
1915         return nonterminalCanBeNonEmpty[symbol.toNonterminalID];
1916     }
1917 
1918     foreach (i, n; realGrammar.startNonterminals)
1919         if (canBeEmpty(n.nonterminal))
1920             newGrammar.startNonterminals[i].needsEmptyProduction = true;
1921 
1922     foreach (i, p; realGrammar.productionsData)
1923     {
1924         if (p is null)
1925         {
1926             newGrammar.productionsData ~= null;
1927             continue;
1928         }
1929         assert(p.productionID == i);
1930         assert(newGrammar.productionsData.length == i);
1931 
1932         bool anyCanBeEmpty, anyCanBeNonEmpty;
1933         foreach (s; p.symbols)
1934         {
1935             if (canBeEmpty(s))
1936                 anyCanBeEmpty = true;
1937             if (canBeNonEmpty(s))
1938                 anyCanBeNonEmpty = true;
1939         }
1940 
1941         if ((anyCanBeEmpty || !anyCanBeNonEmpty || p.symbols.length == 0)
1942                 && !(p.symbols.length == 0
1943                     && realGrammar.nonterminals[p.nonterminalID].annotations.contains!"noOptEmpty"))
1944             newGrammar.productionsData ~= null;
1945         else
1946             newGrammar.productionsData ~= p;
1947     }
1948 
1949     foreach (i, p; realGrammar.productionsData)
1950     {
1951         if (p is null)
1952             continue;
1953         if (p.symbols.length == 0)
1954             continue;
1955         if (newGrammar.productionsData[i]!is null)
1956             continue; // cant be empty
1957 
1958         void buildGraph(immutable(SymbolInstance)[] symbolsAvailable, size_t[] symbolPositions,
1959                 immutable(SymbolInstance)[] symbolsSelected, bool afterSkippedEager)
1960         {
1961             if (symbolsAvailable.length == 0)
1962             {
1963                 if (symbolsSelected.length == 0)
1964                     return;
1965                 Production* p2 = new Production();
1966                 p2.nonterminalID = p.nonterminalID;
1967                 p2.symbols = symbolsSelected;
1968                 p2.annotations = p.annotations;
1969                 p2.negLookaheads = p.negLookaheads.dup;
1970                 p2.negLookaheadsAnytoken = p.negLookaheadsAnytoken;
1971                 p2.rewriteRules = [RewriteRule(p, symbolPositions, 0)];
1972                 p2.tags = p.tags;
1973                 if (afterSkippedEager)
1974                     p2.annotations.add("eagerEnd");
1975                 newGrammar.addProduction(p2);
1976             }
1977             else
1978             {
1979                 auto s = symbolsAvailable[0];
1980                 if (canBeEmpty(p.symbols[symbolPositions.length]))
1981                     buildGraph(symbolsAvailable[1 .. $], s.dropNode ? symbolPositions : (symbolPositions ~ [
1982                                 size_t.max
1983                             ]), symbolsSelected, afterSkippedEager
1984                             || symbolsAvailable[0].annotations.contains!"eager");
1985                 if (canBeNonEmpty(p.symbols[symbolPositions.length]))
1986                     buildGraph(symbolsAvailable[1 .. $], s.dropNode ? symbolPositions
1987                             : (symbolPositions ~ [symbolsSelected.length]),
1988                             symbolsSelected ~ symbolsAvailable[0], false);
1989             }
1990         }
1991 
1992         buildGraph(p.symbols, [], [], false);
1993     }
1994 
1995     newGrammar.calcNonterminalCanBeEmpty();
1996     newGrammar.fillProductionsForNonterminal();
1997     newGrammar.calcNonterminalTypes();
1998 
1999     checkGrammar(newGrammar);
2000 
2001     return newGrammar;
2002 }
2003 
2004 EBNFGrammar createGrammarWithoutDeactivatedProductions(EBNFGrammar realGrammar)
2005 {
2006     EBNFGrammar newGrammar = new EBNFGrammar(realGrammar.symbolInfos, realGrammar);
2007     newGrammar.tokens = realGrammar.tokens;
2008     newGrammar.nonterminals = realGrammar.nonterminals;
2009     newGrammar.tags = realGrammar.tags;
2010     newGrammar.startNonterminals = realGrammar.startNonterminals;
2011     newGrammar.allowTokenNonterminals = realGrammar.allowTokenNonterminals;
2012     newGrammar.matchingTokens = realGrammar.matchingTokens;
2013     newGrammar.inContextOnlyTokens = realGrammar.inContextOnlyTokens;
2014 
2015     newGrammar.startTokenID = realGrammar.startTokenID;
2016     newGrammar.startNonterminalID = realGrammar.startNonterminalID;
2017     newGrammar.startProductionID = realGrammar.startProductionID;
2018 
2019     foreach (i, p; realGrammar.productionsData)
2020     {
2021         if (p is null)
2022         {
2023             newGrammar.productionsData ~= null;
2024             continue;
2025         }
2026 
2027         assert(p.productionID == i);
2028         assert(newGrammar.productionsData.length == i);
2029 
2030         if (p.annotations.contains!"deactivated"())
2031             newGrammar.productionsData ~= null;
2032         else
2033             newGrammar.productionsData ~= p;
2034     }
2035 
2036     newGrammar.calcNonterminalCanBeEmpty();
2037     newGrammar.fillProductionsForNonterminal();
2038     newGrammar.calcNonterminalTypes();
2039 
2040     checkGrammar(newGrammar);
2041 
2042     return newGrammar;
2043 }
2044 
2045 Graph!(SymbolWithConstraint, NonterminalID) buildRegArrayGraph(EBNFGrammar grammar,
2046         ref NonterminalID[] regArrayNonterminals)
2047 {
2048     alias G = Graph!(SymbolWithConstraint, NonterminalID);
2049     alias NodeID = G.NodeID;
2050 
2051     bool[NonterminalWithConstraint] done;
2052     regArrayNonterminals = [];
2053 
2054     G g = new G();
2055     g.start = g.addNode("start");
2056 
2057     void buildGraph(NonterminalWithConstraint n, string indent)
2058     {
2059         assert(!n.constraint.negLookaheads.canFind(n.nonterminalID));
2060         if (n in done)
2061             return;
2062         done[n] = true;
2063         if (grammar.nonterminals[n.nonterminalID].annotations.contains!"array"()
2064                 && grammar.nonterminals[n.nonterminalID].annotations.contains!"regArray"())
2065         {
2066             regArrayNonterminals ~= n.nonterminalID;
2067 
2068             void buildGraphPart(NonterminalWithConstraint n, NodeID start, NodeID end, string indent)
2069             {
2070                 assert(!n.constraint.negLookaheads.canFind(n.nonterminalID));
2071                 if (!grammar.nonterminals[n.nonterminalID].annotations.contains!"array"()
2072                         && !grammar.nonterminals[n.nonterminalID].name.endsWith("?"))
2073                 {
2074                     foreach (m2; grammar.directUnwrapClosure(n))
2075                     {
2076                         SymbolWithConstraint s = SymbolWithConstraint(m2.nonterminalID);
2077                         bool edgeNeeded;
2078                         foreach (p; grammar.getProductions(m2.nonterminalID))
2079                         {
2080                             if (grammar.isProductionAllowed(m2, p))
2081                                 edgeNeeded = true;
2082                         }
2083                         if (!edgeNeeded)
2084                             continue;
2085                         foreach (x; m2.constraint.negLookaheads)
2086                         {
2087                             s.constraint.negLookaheads.addOnce(x);
2088                         }
2089                         g.addEdge(start, end, s, EdgeFlags.none);
2090                     }
2091                     buildGraph(n, indent ~ " ");
2092                     return;
2093                 }
2094 
2095                 bool needsNonterminalEdge;
2096 
2097                 bool hasLeftRec, hasRightRec, hasNonRec;
2098 
2099                 foreach (p; grammar.getProductions(n.nonterminalID))
2100                 {
2101                     if (!grammar.isProductionAllowed(n, p))
2102                         continue;
2103                     if (p.symbols.length == 0)
2104                     {
2105                         hasNonRec = true;
2106                         continue;
2107                     }
2108                     if (p.symbols[0].symbol == n.nonterminalID)
2109                         hasLeftRec = true;
2110                     if (p.symbols[$ - 1].symbol == n.nonterminalID)
2111                         hasRightRec = true;
2112                     if (p.symbols[0].symbol != n.nonterminalID
2113                             && p.symbols[$ - 1].symbol != n.nonterminalID)
2114                         hasNonRec = true;
2115                 }
2116 
2117                 NodeID loopStart;
2118                 if (hasLeftRec)
2119                     loopStart = g.addNode("loopStart");
2120                 else if (hasRightRec)
2121                     loopStart = start;
2122 
2123                 foreach (p; grammar.getProductions(n.nonterminalID))
2124                 {
2125                     if (!grammar.isProductionAllowed(n, p))
2126                         continue;
2127 
2128                     immutable(SymbolInstance)[] symbols = p.symbols;
2129                     bool thisIsRightRec;
2130                     bool thisIsLeftRec;
2131                     bool first = true;
2132                     // use only left recursion.
2133                     // using both left and right recursion would turn S=@empty|S a| b S; into S=(a|b)*;
2134                     if (symbols.length && symbols[0].symbol == n.nonterminalID)
2135                     {
2136                         thisIsLeftRec = true;
2137                         symbols = symbols[1 .. $];
2138                         first = false;
2139                     }
2140                     if (symbols.length && symbols[$ - 1].symbol == n.nonterminalID)
2141                     {
2142                         thisIsRightRec = true;
2143                         symbols = symbols[0 .. $ - 1];
2144                     }
2145                     enforce(!thisIsLeftRec || !thisIsRightRec);
2146 
2147                     NodeID node;
2148                     if (!thisIsLeftRec)
2149                     {
2150                         node = g.addNode("start p");
2151                         g.addEdge(start, node, SymbolWithConstraint.invalid);
2152                     }
2153                     else
2154                         node = loopStart;
2155 
2156                     foreach (s; symbols)
2157                     {
2158                         NodeID node2 = g.addNode("");
2159                         if (s.isToken)
2160                         {
2161                             g.addEdge(node, node2, SymbolWithConstraint(s));
2162                         }
2163                         else
2164                         {
2165                             buildGraphPart(grammar.nextNonterminalWithConstraint(n.constraint,
2166                                     s, first), node, node2, indent ~ "  ");
2167                         }
2168                         node = node2;
2169                         first = false;
2170                     }
2171 
2172                     if (!thisIsRightRec)
2173                     {
2174                         g.addEdge(node, end, SymbolWithConstraint.invalid);
2175                     }
2176                     else
2177                     {
2178                         g.addEdge(node, start, SymbolWithConstraint.invalid);
2179                     }
2180                     if (hasLeftRec && !thisIsRightRec)
2181                     {
2182                         g.addEdge(node, loopStart, SymbolWithConstraint.invalid);
2183                     }
2184                 }
2185             }
2186 
2187             string name = grammar.nonterminals[n.nonterminalID].name;
2188             NodeID startM = g.addNode("start " ~ name);
2189             g.addEdge(g.start, startM, SymbolWithConstraint.invalid);
2190             NodeID endM = g.addNode("end");
2191             g.get(endM).results = [n.nonterminalID];
2192             buildGraphPart(n, startM, endM, indent ~ "  ");
2193         }
2194         else
2195         {
2196             foreach (p; grammar.getProductions(n.nonterminalID))
2197             {
2198                 if (!grammar.isProductionAllowed(n, p))
2199                     continue;
2200                 bool first = true;
2201                 foreach (s; p.symbols)
2202                 {
2203                     if (!s.isToken)
2204                         buildGraph(NonterminalWithConstraint(s.toNonterminalID), indent);
2205                     first = false;
2206                 }
2207             }
2208         }
2209     }
2210 
2211     foreach (n; grammar.startNonterminals)
2212         buildGraph(NonterminalWithConstraint(n.nonterminal), "");
2213 
2214     return g;
2215 }
2216 
2217 EBNFGrammar createRegArrayGrammar(EBNF ebnf, EBNFGrammar realGrammar)
2218 {
2219     EBNFGrammar newGrammar = new EBNFGrammar(realGrammar.symbolInfos /*, realGrammar*/ );
2220     newGrammar.tokens = realGrammar.tokens;
2221     newGrammar.nonterminals = realGrammar.nonterminals;
2222     newGrammar.tags = realGrammar.tags;
2223     newGrammar.startNonterminals = realGrammar.startNonterminals;
2224     newGrammar.allowTokenNonterminals = realGrammar.allowTokenNonterminals;
2225     newGrammar.matchingTokens = realGrammar.matchingTokens;
2226     newGrammar.inContextOnlyTokens = realGrammar.inContextOnlyTokens;
2227 
2228     newGrammar.startTokenID = ebnf.startTokenID;
2229     newGrammar.startNonterminalID = ebnf.startNonterminalID;
2230     newGrammar.startProductionID = ebnf.startProductionID;
2231 
2232     NonterminalID[] regArrayNonterminals;
2233     Graph!(SymbolWithConstraint, NonterminalID) g = buildRegArrayGraph(realGrammar,
2234             regArrayNonterminals);
2235     g = g.makeDeterministic(g.start, false).minimizeDFA;
2236 
2237     foreach (i, p; realGrammar.productionsData)
2238     {
2239         if (p is null)
2240         {
2241             newGrammar.productionsData ~= null;
2242             continue;
2243         }
2244 
2245         assert(p.productionID == i);
2246         assert(newGrammar.productionsData.length == i);
2247 
2248         if (regArrayNonterminals.canFind(p.nonterminalID))
2249             newGrammar.productionsData ~= null;
2250         else
2251             newGrammar.productionsData ~= p;
2252     }
2253 
2254     NonterminalID[size_t] regarrayNonterminals;
2255     SymbolWithConstraint[][Tuple!(size_t, size_t)] regarrayEdgeSymbols;
2256     NonterminalID[Tuple!(size_t, size_t)] regarrayEdgeNonterminals;
2257 
2258     bool isFirstStateDest;
2259     foreach (id; g.nodeIDs)
2260     {
2261         auto n = g.get(id);
2262         foreach (i, e; g.getEdges(id))
2263         {
2264             if (e.next.id == g.start.id)
2265                 isFirstStateDest = true;
2266             auto fromTo = tuple!(size_t, size_t)(id.id, e.next.id);
2267             if (fromTo !in regarrayEdgeSymbols)
2268                 regarrayEdgeSymbols[fromTo] = [];
2269             regarrayEdgeSymbols[fromTo].addOnce(e.symbol);
2270         }
2271     }
2272     bool needsFirstState = isFirstStateDest || g.get(g.start).results.length > 0;
2273 
2274     foreach (id; g.nodeIDs)
2275     {
2276         if (id == g.start && !needsFirstState)
2277             continue;
2278         string name;
2279         auto results = g.get(id).results;
2280         if (results.length == 1)
2281             name = text("$regarray_", realGrammar.nonterminals[results[0]].name, "_", id.id);
2282         else
2283             name = text("$regarray_", id.id);
2284         NonterminalID n = newGrammar.nonterminals.id(name);
2285         regarrayNonterminals[id.id] = n;
2286         newGrammar.nonterminals[n].annotations.add("array");
2287         newGrammar.nonterminals[n].annotations.add("directUnwrap");
2288     }
2289 
2290     foreach (fromTo; regarrayEdgeSymbols.sortedKeys)
2291     {
2292         auto symbols = regarrayEdgeSymbols[fromTo];
2293         SymbolInstance s;
2294         if (symbols.length == 1)
2295         {
2296             s = SymbolInstance(symbols[0].symbol);
2297             s.annotations.add("excludeDirectUnwrap");
2298             s.negLookaheads = symbols[0].constraint.negLookaheads;
2299         }
2300         else
2301         {
2302             string name;
2303             name = text("$regarrayedge_", fromTo[0], "_", fromTo[1]);
2304             NonterminalID n = newGrammar.nonterminals.id(name);
2305             newGrammar.nonterminals[n].annotations.add("directUnwrap");
2306             s = SymbolInstance(n);
2307             regarrayEdgeNonterminals[fromTo] = n;
2308         }
2309 
2310         Production* p2 = new Production();
2311         p2.nonterminalID = regarrayNonterminals[fromTo[1]];
2312         if (!isFirstStateDest && fromTo[0] == 0)
2313             continue;
2314         else
2315         {
2316             SymbolInstance s0 = SymbolInstance(regarrayNonterminals[fromTo[0]]);
2317             s0.annotations.add("inheritAnyTag");
2318             p2.symbols = [s0, s];
2319         }
2320         p2.annotations = Annotations();
2321         newGrammar.addProduction(p2);
2322     }
2323 
2324     foreach (fromTo; regarrayEdgeSymbols.sortedKeys)
2325     {
2326         auto symbols = regarrayEdgeSymbols[fromTo];
2327         if (symbols.length == 1)
2328             continue;
2329         foreach (symbol; symbols)
2330         {
2331             Production* p2 = new Production();
2332             p2.nonterminalID = regarrayEdgeNonterminals[fromTo];
2333             SymbolInstance si = SymbolInstance(symbol.symbol, "", "", true);
2334             si.annotations.add("excludeDirectUnwrap");
2335             si.negLookaheads = symbol.constraint.negLookaheads;
2336             p2.symbols = [si];
2337             p2.annotations = Annotations();
2338             newGrammar.addProduction(p2);
2339         }
2340     }
2341 
2342     bool[size_t] startEdgeDone;
2343     foreach (e; g.getEdges(g.start))
2344     {
2345         if (e.next.id in startEdgeDone)
2346             continue;
2347         startEdgeDone[e.next.id] = true;
2348 
2349         auto fromTo = tuple!(size_t, size_t)(g.start.id, e.next.id);
2350         SymbolInstance s;
2351         if (regarrayEdgeSymbols[fromTo].length == 1)
2352         {
2353             s = SymbolInstance(regarrayEdgeSymbols[fromTo][0].symbol);
2354             s.annotations.add("excludeDirectUnwrap");
2355             s.negLookaheads = regarrayEdgeSymbols[fromTo][0].constraint.negLookaheads;
2356         }
2357         else
2358         {
2359             s = SymbolInstance(regarrayEdgeNonterminals[fromTo]);
2360             s.annotations.add("inheritAnyTag");
2361         }
2362 
2363         Production* p2 = new Production();
2364         p2.nonterminalID = regarrayNonterminals[e.next.id];
2365         p2.symbols = [s];
2366         p2.annotations = Annotations();
2367         newGrammar.addProduction(p2);
2368     }
2369 
2370     foreach (r; g.get(g.start).results)
2371     {
2372         Production* p2 = new Production();
2373         p2.nonterminalID = r;
2374         p2.symbols = [];
2375         p2.annotations = Annotations();
2376         newGrammar.addProduction(p2);
2377     }
2378 
2379     foreach (id; g.nodeIDs)
2380     {
2381         if (id == g.start && !isFirstStateDest)
2382             continue;
2383         auto n = g.get(id);
2384         foreach (r; n.results)
2385         {
2386             Production* p2 = new Production();
2387             p2.nonterminalID = r;
2388             SymbolInstance s0 = SymbolInstance(regarrayNonterminals[id.id], "", "", true);
2389             s0.annotations.add("inheritAnyTag");
2390             p2.symbols = [s0];
2391             p2.annotations = Annotations();
2392             newGrammar.addProduction(p2);
2393             newGrammar.nonterminals[r].annotations.add("directUnwrap");
2394         }
2395     }
2396 
2397     newGrammar.calcNonterminalCanBeEmpty();
2398     newGrammar.fillProductionsForNonterminal();
2399     newGrammar.calcNonterminalTypes();
2400 
2401     checkGrammar(newGrammar);
2402 
2403     return newGrammar;
2404 }
2405 
2406 void writeFinalGrammarFile(string finalgrammarfilename, const EBNFGrammar grammar,
2407         const EBNFGrammar lexerGrammar)
2408 {
2409     if (finalgrammarfilename.length)
2410     {
2411         File f = File(finalgrammarfilename, "w");
2412 
2413         foreach (n; grammar.nonterminals.allIDs)
2414         {
2415             f.writeln(grammar.getSymbolName(n), grammar.nonterminals[n].annotations.toStringCode());
2416             foreach (i, p; grammar.getProductions(n))
2417             {
2418                 f.write("\t", i ? "|" : "=", grammar.productionStringRhs(p));
2419                 if (p.symbols.length == 0)
2420                     f.write(" @empty");
2421                 if (p.isVirtual)
2422                     f.write(" [virtual]");
2423                 f.writeln();
2424             }
2425             f.writeln("\t;");
2426         }
2427         f.writeln("\n// Lexer grammar:\n");
2428         bool[NonterminalID] startNonterminals;
2429         foreach (n; lexerGrammar.startNonterminals)
2430             startNonterminals[n.nonterminal] = true;
2431         foreach (n; lexerGrammar.nonterminals.allIDs)
2432         {
2433             if (lexerGrammar.getProductions(n).length == 0)
2434                 continue;
2435             if (n in startNonterminals)
2436                 f.write("token ");
2437             else
2438                 f.write("fragment ");
2439             f.writeln(lexerGrammar.getSymbolName(n),
2440                     lexerGrammar.nonterminals[n].annotations.toStringCode());
2441             foreach (i, p; lexerGrammar.getProductions(n))
2442             {
2443                 f.write("\t", i ? "|" : "=", lexerGrammar.productionStringRhs(p));
2444                 if (p.symbols.length == 0)
2445                     f.write(" @empty");
2446                 if (p.isVirtual)
2447                     f.write(" [virtual]");
2448                 f.writeln();
2449             }
2450             f.writeln("\t;");
2451         }
2452     }
2453 }