1 
2 //          Copyright Tim Schendekehl 2023.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          https://www.boost.org/LICENSE_1_0.txt)
6 
7 module dparsergen.generator.parsercodegencommon;
8 import dparsergen.core.utils;
9 import dparsergen.generator.codewriter;
10 import dparsergen.generator.grammar;
11 import dparsergen.generator.ids;
12 import dparsergen.generator.parser;
13 import dparsergen.generator.production;
14 import std.algorithm;
15 import std.conv;
16 import std.range;
17 
18 string nonterminalIDCode(EBNFGrammar grammar, NonterminalID nonterminalID)
19 {
20     if (nonterminalID.id == SymbolID.max)
21         return "SymbolID.max";
22     if (nonterminalID.id >= grammar.nonterminals.vals.length)
23         return text(grammar.startNonterminalID + nonterminalID.id);
24     return text(grammar.startNonterminalID + nonterminalID.id, "/*",
25             grammar.nonterminals[nonterminalID].name, "*/");
26 }
27 
28 string tokenDCode(string tokName)
29 {
30     if (tokName.startsWith('\"') || tokName.startsWith('\''))
31     {
32         string r = "q{";
33         r ~= tokName;
34         r ~= "}";
35         return r;
36     }
37     else if (tokName.startsWith('['))
38     {
39         assert(tokName[$ - 1] == ']');
40         string r = "\"[";
41         r ~= escapeD(tokName[1 .. $ - 1]);
42         r ~= "]\"";
43         return r;
44     }
45     else if (tokName == "$end")
46         return "\"" ~ tokName ~ "\"";
47     else
48         return "\"" ~ escapeD(tokName) ~ "\"";
49 }
50 
51 string tokenDCode(const Token tok)
52 {
53     return tokenDCode(tok.name);
54 }
55 
56 bool isIdentifierStr(string s)
57 {
58     foreach (char c; s)
59     {
60         if (!c.inCharSet!"_0-9a-zA-Z")
61             return false;
62     }
63     return true;
64 }
65 
66 string parseFunctionName(LRGraph graph, size_t stateNr, string prefix = "parse")
67 in (stateNr < graph.states.length)
68 {
69     if (graph.states[stateNr].elements.length)
70     {
71         auto e = graph.states[stateNr].elements[0];
72         if (e.isStartElement && e.dotPos == 0)
73         {
74             string name = graph.grammar.getSymbolName(e.production.symbols[0]);
75             if (graph.states[stateNr].isStartNode && isIdentifierStr(name))
76                 return text(prefix, name, "/*", stateNr, "*/");
77             else
78                 return text(prefix, stateNr, "/*", name, "*/");
79         }
80     }
81     return text(prefix, stateNr);
82 }
83 
84 string reduceFunctionName(LRGraph graph, const Production* production, string prefix = "reduce")
85 {
86     string name = graph.grammar.getSymbolName(production.nonterminalID);
87     if (isIdentifierStr(name))
88         return text(prefix, production.productionID, "_", name, "/*",
89                 graph.grammar.productionString(production), "*/");
90     else
91         return text(prefix, production.productionID, "/*",
92                 graph.grammar.productionString(production), "*/");
93 }
94 
95 string nonterminalFlagsToCode(NonterminalFlags t)
96 {
97     if (t == NonterminalFlags.none)
98         return "NonterminalFlags.none";
99     string r;
100     static foreach (n; [
101             "empty", "nonterminal", "string", "array", "arrayOfNonterminal",
102             "arrayOfString"
103         ])
104     {
105         {
106             mixin("NonterminalFlags x = NonterminalFlags." ~ n ~ ";");
107             if (t & x)
108             {
109                 r ~= " | NonterminalFlags." ~ n;
110                 t = t & ~x;
111             }
112         }
113     }
114     assert(t == NonterminalFlags.none);
115     return r[3 .. $];
116 }
117 
118 string createAllTokensCode(EBNFGrammar grammar)
119 {
120     string allTokensCode;
121     foreach (i, t; grammar.tokens.vals)
122     {
123         allTokensCode ~= text("    /* ", i + grammar.startTokenID,
124                 ": */ immutable(Token)(", t.tokenDCode, ", ", t.annotations.toString, "),\n");
125     }
126     return allTokensCode;
127 }
128 
129 string createAllNonterminalsCode(EBNFGrammar grammar)
130 {
131     string allNonterminalsCode;
132     foreach (i, n; grammar.nonterminals.vals)
133     {
134         allNonterminalsCode ~= text("    /* ", i + grammar.startNonterminalID, ": */ immutable(Nonterminal)(", "\"",
135                 n.name.escapeD, "\", ", n.flags.nonterminalFlagsToCode, ", ",
136                 n.annotations.toString, ", [", n.buildNonterminals.map!(
137                     x => (x + grammar.startNonterminalID).text).joiner(", "), "]),\n");
138     }
139     return allNonterminalsCode;
140 }
141 
142 string createAllProductionsCode(EBNFGrammar grammar)
143 {
144     string code;
145     foreach (t; grammar.origGrammar.productionsData)
146     {
147         if (t is null)
148         {
149             code ~= "    immutable(Production)(),\n";
150         }
151         else
152         {
153             code ~= text("    // ", t.productionID + grammar.startProductionID,
154                     ": ", grammar.productionString(t), "\n");
155             code ~= text("    immutable(Production)(immutable(NonterminalID)(",
156                     t.nonterminalID.id + grammar.startNonterminalID, "), [");
157 
158             if (t.symbols.length)
159             {
160                 foreach (i, s; t.symbols)
161                 {
162                     code ~= text("\n                immutable(SymbolInstance)(",
163                         s.symbol, ", ",
164                         s.subToken.toDStringLiteral, ", ",
165                         s.symbolInstanceName.toDStringLiteral, ", ",
166                         s.unwrapProduction, ", ",
167                         s.dropNode, ", ",
168                         s.annotations, ", ",
169                         s.negLookaheads, ")",
170                         i + 1 < t.symbols.length ? "," : "");
171                 }
172                 code ~= "\n            ";
173             }
174 
175             code ~= text("], ",
176                     t.annotations.toString, ", ",
177                     t.negLookaheads, ", ",
178                     t.negLookaheadsAnytoken, ", ",
179                     t.isVirtual);
180 
181             code ~= text("),\n");
182         }
183     }
184     return code;
185 }
186 
187 void createParseStateComments(ref CodeWriter code, LRGraph graph,
188         const LRElementSet elements, const NonterminalID[][] stackDelayedReduce = [
189         ])
190 {
191     auto grammar = graph.grammar;
192 
193     if (elements.simpleLLState)
194         code.writeln("// simpleLLState: ", grammar.getSymbolName(elements.onlyNonterminal));
195 
196     assert(stackDelayedReduce.length == 0
197             || stackDelayedReduce.length == elements.stackSize, code.data);
198     foreach (i, nonterminals; stackDelayedReduce)
199     {
200         if (nonterminals.length == 0)
201             continue;
202         code.write("// delayed reduce at ", long(i) - long(elements.stackSize), ":");
203         foreach (n; nonterminals)
204             code.write(" ", grammar.getSymbolName(n));
205         code.writeln();
206     }
207 
208     {
209         Appender!(string) app;
210         enum numColumns = 4;
211         size_t[numColumns + 1][] positions;
212         positions.length = elements.length;
213 
214         foreach (k, e; elements)
215         {
216             size_t col = 0;
217             positions[k][col++] = app.data.length;
218 
219             app.put("//  ");
220             app.put(grammar.getSymbolName(e.production.nonterminalID));
221 
222             positions[k][col++] = app.data.length;
223 
224             app.put(" -> ");
225 
226             positions[k][col++] = app.data.length;
227 
228             if (e.extraConstraint.disabled)
229                 app.put("@@disabled@@ ");
230 
231             e.toStringOnlyProductionBeforeDot(graph, app);
232 
233             positions[k][col++] = app.data.length;
234 
235             app.put(".");
236             if (e.isNextValid(grammar))
237             {
238                 auto constraint = graph.grammar.nextSymbolWithConstraint(e.extraConstraint,
239                         e.next(grammar), e.dotPos == 0).constraint;
240                 if (e.dotPos == 0)
241                     foreach (l; e.extraConstraint.negLookaheads)
242                     {
243                         app.put("!");
244                         app.put(grammar.getSymbolName(l));
245                         app.put(" ");
246                     }
247                 foreach (t; constraint.tags)
248                 {
249                     if (t.reject)
250                         app.put(text("@rejectTag(", grammar.tags[t.tag].name, ") "));
251                     if (t.needed)
252                         app.put(text("@needTag(", grammar.tags[t.tag].name, ") "));
253                 }
254             }
255             e.toStringOnlyProductionAfterDot(graph, app);
256 
257             e.toStringOnlyLookahead(graph, app);
258 
259             e.toStringOnlyEnd(graph, app);
260 
261             positions[k][col++] = app.data.length;
262         }
263 
264         size_t[numColumns] columnSizes;
265         foreach (k, e; elements)
266         {
267             foreach (col; 0 .. numColumns)
268             {
269                 size_t size = positions[k][col + 1] - positions[k][col];
270                 if (size > columnSizes[col])
271                     columnSizes[col] = size;
272             }
273         }
274         foreach (k, e; elements)
275         {
276             foreach (col; 0 .. numColumns)
277             {
278                 size_t size = positions[k][col + 1] - positions[k][col];
279                 bool alignRight = col == 2;
280                 if (alignRight)
281                     foreach (_; size .. columnSizes[col])
282                         code.write(' ');
283                 code.write(app.data[positions[k][col] .. positions[k][col + 1]]);
284                 if (!alignRight && col + 1 < numColumns)
285                     foreach (_; size .. columnSizes[col])
286                         code.write(' ');
287             }
288             code.writeln();
289         }
290     }
291 
292     if (graph.globalOptions.directUnwrap)
293     {
294         bool[NonterminalWithConstraint] directUnwrapDone;
295         foreach (k, e; elements)
296         {
297             if (!e.isNextNonterminal(grammar))
298                 continue;
299             auto n = e.next(grammar);
300             if (elements.descentNonterminals.canFind(n.toNonterminalID))
301                 continue;
302             if (NonterminalWithConstraint(n.toNonterminalID,
303                     Constraint(n.negLookaheads, n.tags)) in directUnwrapDone)
304                 continue;
305             directUnwrapDone[NonterminalWithConstraint(n.toNonterminalID,
306                         Constraint(n.negLookaheads, n.tags))] = true;
307             foreach (m2; e.nextNonterminals(grammar, graph.globalOptions.directUnwrap))
308             {
309                 if (n == m2.nonterminalID)
310                     continue;
311                 code.write("//  ", grammar.getSymbolName(n.toNonterminalID),
312                         " ---> ", grammar.getSymbolName(m2.nonterminalID));
313                 if (m2.constraint.disabled)
314                     code.write("     @@disabled@@");
315                 if (m2.constraint.negLookaheads.length)
316                 {
317                     code.write("     negLookahead:");
318                     foreach (l; m2.constraint.negLookaheads)
319                         code.write(" ", grammar.getSymbolName(l));
320                 }
321                 if (m2.constraint.tags.length)
322                 {
323                     code.write("     tags:");
324                     foreach (t; m2.constraint.tags)
325                     {
326                         if (t.reject)
327                             code.write(" @rejectTag(", grammar.tags[t.tag].name, ")");
328                         if (t.needed)
329                             code.write(" @needTag(", grammar.tags[t.tag].name, ")");
330                     }
331                 }
332                 code.writeln();
333             }
334         }
335     }
336     foreach (n; elements.descentNonterminals)
337     {
338         code.write("// descent ", grammar.getSymbolName(n));
339         code.writeln();
340     }
341 }
342 
343 void createParseStateComments(ref CodeWriter code, LRGraph graph, size_t stateNr,
344         const LRGraphNode node)
345 {
346     auto grammar = graph.grammar;
347 
348     code.writeln("// path: ",
349             node.shortestSymbolPath.map!(s => grammar.getSymbolName(s)).joiner(" "));
350     code.writeln("// type: ", node.type);
351 
352     createParseStateComments(code, graph, node.elements, node.stackDelayedReduce);
353 }
354 
355 struct CommaGen
356 {
357     string commaStr = ", ";
358     bool alreadyCalled;
359     this(string commaStr)
360     {
361         this.commaStr = commaStr;
362     }
363 
364     @disable this(this);
365     string opCall(bool setCalled = true)
366     {
367         if (alreadyCalled)
368             return commaStr;
369         if (setCalled)
370             alreadyCalled = true;
371         return "";
372     }
373 }
374 
375 string symbolNameCode(EBNFGrammar grammar, Symbol s)
376 {
377     string name = grammar.getSymbolName(s);
378     if (isIdentifierStr(name))
379     {
380         return "Symbol" ~ name;
381     }
382     else
383     {
384         if (s.isToken)
385             return text("Token", s.id, "/+", name, "+/");
386         else
387             return text("Nonterminal", s.id, "/+", name, "+/");
388     }
389 }
390 
391 string reduceTagsCode(alias stackTagVar)(EBNFGrammar grammar, const Production* production)
392 {
393     assert(grammar.tags.vals.length);
394 
395     string r;
396     bool isEmpty = true;
397     foreach (t; production.tags)
398     {
399         if (r.length)
400             r ~= " ";
401         if (!isEmpty)
402             r ~= "| ";
403         isEmpty = false;
404         r ~= "Tag." ~ grammar.tags[t].name;
405     }
406 
407     foreach (k; 1 .. production.symbols.length + 1)
408     {
409         if (production.symbols[$ - k].isToken)
410             continue;
411         auto possibleTags = grammar
412             .nonterminals[production.symbols[$ - k].toNonterminalID].possibleTags;
413         if (production.symbols[$ - k].unwrapProduction
414                 || production.symbols[$ - k].annotations.contains!"inheritAnyTag"
415                 || (production.symbols.length == 1
416                     && production.symbols[0].symbol == production.nonterminalID))
417         {
418             if (r.length)
419                 r ~= " ";
420             if (possibleTags.length == 0)
421                 r ~= "/+";
422             if (!isEmpty)
423                 r ~= "| ";
424             r ~= stackTagVar(k);
425             if (possibleTags.length == 0)
426                 r ~= "+/";
427             if (possibleTags.length)
428                 isEmpty = false;
429         }
430         else
431         {
432             foreach (t; production.symbols[$ - k].tags)
433             {
434                 if (t.inherit)
435                 {
436                     if (r.length)
437                         r ~= " ";
438                     bool possible = possibleTags.canFind(t.tag);
439                     if (!possible)
440                         r ~= "/+";
441                     if (!isEmpty)
442                         r ~= "| ";
443                     r ~= text("(", stackTagVar(k), " & Tag.", grammar.tags[t.tag].name, ")");
444                     if (!possible)
445                         r ~= "+/";
446                     if (possible)
447                         isEmpty = false;
448                 }
449             }
450         }
451     }
452     if (isEmpty)
453     {
454         if (r.length)
455             r ~= " ";
456         r ~= "Tag.init";
457     }
458     return r;
459 }
460 
461 bool checkTagsCode(alias stackTagVar)(ref CodeWriter code, EBNFGrammar grammar,
462         const Production* production)
463 {
464     bool needsIf;
465     foreach (k; 1 .. production.symbols.length + 1)
466     {
467         if (production.symbols[$ - k].isToken)
468             continue;
469         auto possibleTags = grammar
470             .nonterminals[production.symbols[$ - k].toNonterminalID].possibleTags;
471         foreach (t; production.symbols[$ - k].tags)
472         {
473             if (!possibleTags.canFind(t.tag))
474                 continue;
475             if (t.reject || t.needed)
476             {
477                 if (!needsIf)
478                     code.write("if (");
479                 else
480                     code.write(" || ");
481                 needsIf = true;
482 
483                 if (t.reject && t.needed)
484                     code.write("true /* Tag needed and rejected: ", grammar.tags[t.tag].name, "*/");
485                 else if (t.reject)
486                     code.write("(", stackTagVar(k), " & Tag.", grammar.tags[t.tag].name, ")");
487                 else if (t.needed)
488                     code.write("!(", stackTagVar(k), " & Tag.", grammar.tags[t.tag].name, ")");
489             }
490         }
491     }
492     if (needsIf)
493         code.writeln(")");
494     return needsIf;
495 }
496 
497 string tokenSetCode(EBNFGrammar grammar, const BitSet!TokenID set, string var, bool inverted = false)
498 {
499     size_t count;
500     string inner;
501     foreach (t; set.bitsSet)
502     {
503         if (t == grammar.tokens.getID("$end"))
504             continue;
505         if (count)
506             inner ~= inverted ? " && " : " || ";
507         count++;
508         inner ~= var ~ ".front.symbol " ~ (inverted
509                 ? "!=" : "==") ~ " Lexer.tokenID!" ~ grammar.tokens[t].tokenDCode;
510     }
511     string r;
512     if (set[grammar.tokens.getID("$end")])
513     {
514         r = (inverted ? "!" : "") ~ var ~ ".empty";
515         if (count)
516             r ~= (inverted ? " && " : " || ") ~ inner;
517     }
518     else
519     {
520         r = (inverted ? "" : "!") ~ var ~ ".empty";
521         if (count > 1)
522             r ~= (inverted ? " || " : " && ") ~ "(" ~ inner ~ ")";
523         else if (count)
524             r ~= (inverted ? " || " : " && ") ~ inner;
525     }
526     return r;
527 }