1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.generator.parsercodegencommon; 8 import dparsergen.core.utils; 9 import dparsergen.generator.codewriter; 10 import dparsergen.generator.grammar; 11 import dparsergen.generator.ids; 12 import dparsergen.generator.parser; 13 import dparsergen.generator.production; 14 import std.algorithm; 15 import std.conv; 16 import std.range; 17 18 string nonterminalIDCode(EBNFGrammar grammar, NonterminalID nonterminalID) 19 { 20 if (nonterminalID.id == SymbolID.max) 21 return "SymbolID.max"; 22 if (nonterminalID.id >= grammar.nonterminals.vals.length) 23 return text(grammar.startNonterminalID + nonterminalID.id); 24 return text(grammar.startNonterminalID + nonterminalID.id, "/*", 25 grammar.nonterminals[nonterminalID].name, "*/"); 26 } 27 28 string tokenDCode(string tokName) 29 { 30 if (tokName.startsWith('\"') || tokName.startsWith('\'')) 31 { 32 string r = "q{"; 33 r ~= tokName; 34 r ~= "}"; 35 return r; 36 } 37 else if (tokName.startsWith('[')) 38 { 39 assert(tokName[$ - 1] == ']'); 40 string r = "\"["; 41 r ~= escapeD(tokName[1 .. $ - 1]); 42 r ~= "]\""; 43 return r; 44 } 45 else if (tokName == "$end") 46 return "\"" ~ tokName ~ "\""; 47 else 48 return "\"" ~ escapeD(tokName) ~ "\""; 49 } 50 51 string tokenDCode(const Token tok) 52 { 53 return tokenDCode(tok.name); 54 } 55 56 bool isIdentifierStr(string s) 57 { 58 foreach (char c; s) 59 { 60 if (!c.inCharSet!"_0-9a-zA-Z") 61 return false; 62 } 63 return true; 64 } 65 66 string parseFunctionName(LRGraph graph, size_t stateNr, string prefix = "parse") 67 in (stateNr < graph.states.length) 68 { 69 if (graph.states[stateNr].elements.length) 70 { 71 auto e = graph.states[stateNr].elements[0]; 72 if (e.isStartElement && e.dotPos == 0) 73 { 74 string name = graph.grammar.getSymbolName(e.production.symbols[0]); 75 if (graph.states[stateNr].isStartNode && isIdentifierStr(name)) 76 return text(prefix, name, "/*", stateNr, "*/"); 77 else 78 return text(prefix, stateNr, "/*", name, "*/"); 79 } 80 } 81 return text(prefix, stateNr); 82 } 83 84 string reduceFunctionName(LRGraph graph, const Production* production, string prefix = "reduce") 85 { 86 string name = graph.grammar.getSymbolName(production.nonterminalID); 87 if (isIdentifierStr(name)) 88 return text(prefix, production.productionID, "_", name, "/*", 89 graph.grammar.productionString(production), "*/"); 90 else 91 return text(prefix, production.productionID, "/*", 92 graph.grammar.productionString(production), "*/"); 93 } 94 95 string nonterminalFlagsToCode(NonterminalFlags t) 96 { 97 if (t == NonterminalFlags.none) 98 return "NonterminalFlags.none"; 99 string r; 100 static foreach (n; [ 101 "empty", "nonterminal", "string", "array", "arrayOfNonterminal", 102 "arrayOfString" 103 ]) 104 { 105 { 106 mixin("NonterminalFlags x = NonterminalFlags." ~ n ~ ";"); 107 if (t & x) 108 { 109 r ~= " | NonterminalFlags." ~ n; 110 t = t & ~x; 111 } 112 } 113 } 114 assert(t == NonterminalFlags.none); 115 return r[3 .. $]; 116 } 117 118 string createAllTokensCode(EBNFGrammar grammar) 119 { 120 string allTokensCode; 121 foreach (i, t; grammar.tokens.vals) 122 { 123 allTokensCode ~= text(" /* ", i + grammar.startTokenID, 124 ": */ immutable(Token)(", t.tokenDCode, ", ", t.annotations.toString, "),\n"); 125 } 126 return allTokensCode; 127 } 128 129 string createAllNonterminalsCode(EBNFGrammar grammar) 130 { 131 string allNonterminalsCode; 132 foreach (i, n; grammar.nonterminals.vals) 133 { 134 allNonterminalsCode ~= text(" /* ", i + grammar.startNonterminalID, ": */ immutable(Nonterminal)(", "\"", 135 n.name.escapeD, "\", ", n.flags.nonterminalFlagsToCode, ", ", 136 n.annotations.toString, ", [", n.buildNonterminals.map!( 137 x => (x + grammar.startNonterminalID).text).joiner(", "), "]),\n"); 138 } 139 return allNonterminalsCode; 140 } 141 142 string createAllProductionsCode(EBNFGrammar grammar) 143 { 144 string code; 145 foreach (t; grammar.origGrammar.productionsData) 146 { 147 if (t is null) 148 { 149 code ~= " immutable(Production)(),\n"; 150 } 151 else 152 { 153 code ~= text(" // ", t.productionID + grammar.startProductionID, 154 ": ", grammar.productionString(t), "\n"); 155 code ~= text(" immutable(Production)(immutable(NonterminalID)(", 156 t.nonterminalID.id + grammar.startNonterminalID, "), ["); 157 158 if (t.symbols.length) 159 { 160 foreach (i, s; t.symbols) 161 { 162 code ~= text("\n immutable(SymbolInstance)(", 163 s.symbol, ", ", 164 s.subToken.toDStringLiteral, ", ", 165 s.symbolInstanceName.toDStringLiteral, ", ", 166 s.unwrapProduction, ", ", 167 s.dropNode, ", ", 168 s.annotations, ", ", 169 s.negLookaheads, ")", 170 i + 1 < t.symbols.length ? "," : ""); 171 } 172 code ~= "\n "; 173 } 174 175 code ~= text("], ", 176 t.annotations.toString, ", ", 177 t.negLookaheads, ", ", 178 t.negLookaheadsAnytoken, ", ", 179 t.isVirtual); 180 181 code ~= text("),\n"); 182 } 183 } 184 return code; 185 } 186 187 void createParseStateComments(ref CodeWriter code, LRGraph graph, 188 const LRElementSet elements, const NonterminalID[][] stackDelayedReduce = [ 189 ]) 190 { 191 auto grammar = graph.grammar; 192 193 if (elements.simpleLLState) 194 code.writeln("// simpleLLState: ", grammar.getSymbolName(elements.onlyNonterminal)); 195 196 assert(stackDelayedReduce.length == 0 197 || stackDelayedReduce.length == elements.stackSize, code.data); 198 foreach (i, nonterminals; stackDelayedReduce) 199 { 200 if (nonterminals.length == 0) 201 continue; 202 code.write("// delayed reduce at ", long(i) - long(elements.stackSize), ":"); 203 foreach (n; nonterminals) 204 code.write(" ", grammar.getSymbolName(n)); 205 code.writeln(); 206 } 207 208 { 209 Appender!(string) app; 210 enum numColumns = 4; 211 size_t[numColumns + 1][] positions; 212 positions.length = elements.length; 213 214 foreach (k, e; elements) 215 { 216 size_t col = 0; 217 positions[k][col++] = app.data.length; 218 219 app.put("// "); 220 app.put(grammar.getSymbolName(e.production.nonterminalID)); 221 222 positions[k][col++] = app.data.length; 223 224 app.put(" -> "); 225 226 positions[k][col++] = app.data.length; 227 228 if (e.extraConstraint.disabled) 229 app.put("@@disabled@@ "); 230 231 e.toStringOnlyProductionBeforeDot(graph, app); 232 233 positions[k][col++] = app.data.length; 234 235 app.put("."); 236 if (e.isNextValid(grammar)) 237 { 238 auto constraint = graph.grammar.nextSymbolWithConstraint(e.extraConstraint, 239 e.next(grammar), e.dotPos == 0).constraint; 240 if (e.dotPos == 0) 241 foreach (l; e.extraConstraint.negLookaheads) 242 { 243 app.put("!"); 244 app.put(grammar.getSymbolName(l)); 245 app.put(" "); 246 } 247 foreach (t; constraint.tags) 248 { 249 if (t.reject) 250 app.put(text("@rejectTag(", grammar.tags[t.tag].name, ") ")); 251 if (t.needed) 252 app.put(text("@needTag(", grammar.tags[t.tag].name, ") ")); 253 } 254 } 255 e.toStringOnlyProductionAfterDot(graph, app); 256 257 e.toStringOnlyLookahead(graph, app); 258 259 e.toStringOnlyEnd(graph, app); 260 261 positions[k][col++] = app.data.length; 262 } 263 264 size_t[numColumns] columnSizes; 265 foreach (k, e; elements) 266 { 267 foreach (col; 0 .. numColumns) 268 { 269 size_t size = positions[k][col + 1] - positions[k][col]; 270 if (size > columnSizes[col]) 271 columnSizes[col] = size; 272 } 273 } 274 foreach (k, e; elements) 275 { 276 foreach (col; 0 .. numColumns) 277 { 278 size_t size = positions[k][col + 1] - positions[k][col]; 279 bool alignRight = col == 2; 280 if (alignRight) 281 foreach (_; size .. columnSizes[col]) 282 code.write(' '); 283 code.write(app.data[positions[k][col] .. positions[k][col + 1]]); 284 if (!alignRight && col + 1 < numColumns) 285 foreach (_; size .. columnSizes[col]) 286 code.write(' '); 287 } 288 code.writeln(); 289 } 290 } 291 292 if (graph.globalOptions.directUnwrap) 293 { 294 bool[NonterminalWithConstraint] directUnwrapDone; 295 foreach (k, e; elements) 296 { 297 if (!e.isNextNonterminal(grammar)) 298 continue; 299 auto n = e.next(grammar); 300 if (elements.descentNonterminals.canFind(n.toNonterminalID)) 301 continue; 302 if (NonterminalWithConstraint(n.toNonterminalID, 303 Constraint(n.negLookaheads, n.tags)) in directUnwrapDone) 304 continue; 305 directUnwrapDone[NonterminalWithConstraint(n.toNonterminalID, 306 Constraint(n.negLookaheads, n.tags))] = true; 307 foreach (m2; e.nextNonterminals(grammar, graph.globalOptions.directUnwrap)) 308 { 309 if (n == m2.nonterminalID) 310 continue; 311 code.write("// ", grammar.getSymbolName(n.toNonterminalID), 312 " ---> ", grammar.getSymbolName(m2.nonterminalID)); 313 if (m2.constraint.disabled) 314 code.write(" @@disabled@@"); 315 if (m2.constraint.negLookaheads.length) 316 { 317 code.write(" negLookahead:"); 318 foreach (l; m2.constraint.negLookaheads) 319 code.write(" ", grammar.getSymbolName(l)); 320 } 321 if (m2.constraint.tags.length) 322 { 323 code.write(" tags:"); 324 foreach (t; m2.constraint.tags) 325 { 326 if (t.reject) 327 code.write(" @rejectTag(", grammar.tags[t.tag].name, ")"); 328 if (t.needed) 329 code.write(" @needTag(", grammar.tags[t.tag].name, ")"); 330 } 331 } 332 code.writeln(); 333 } 334 } 335 } 336 foreach (n; elements.descentNonterminals) 337 { 338 code.write("// descent ", grammar.getSymbolName(n)); 339 code.writeln(); 340 } 341 } 342 343 void createParseStateComments(ref CodeWriter code, LRGraph graph, size_t stateNr, 344 const LRGraphNode node) 345 { 346 auto grammar = graph.grammar; 347 348 code.writeln("// path: ", 349 node.shortestSymbolPath.map!(s => grammar.getSymbolName(s)).joiner(" ")); 350 code.writeln("// type: ", node.type); 351 352 createParseStateComments(code, graph, node.elements, node.stackDelayedReduce); 353 } 354 355 struct CommaGen 356 { 357 string commaStr = ", "; 358 bool alreadyCalled; 359 this(string commaStr) 360 { 361 this.commaStr = commaStr; 362 } 363 364 @disable this(this); 365 string opCall(bool setCalled = true) 366 { 367 if (alreadyCalled) 368 return commaStr; 369 if (setCalled) 370 alreadyCalled = true; 371 return ""; 372 } 373 } 374 375 string symbolNameCode(EBNFGrammar grammar, Symbol s) 376 { 377 string name = grammar.getSymbolName(s); 378 if (isIdentifierStr(name)) 379 { 380 return "Symbol" ~ name; 381 } 382 else 383 { 384 if (s.isToken) 385 return text("Token", s.id, "/+", name, "+/"); 386 else 387 return text("Nonterminal", s.id, "/+", name, "+/"); 388 } 389 } 390 391 string reduceTagsCode(alias stackTagVar)(EBNFGrammar grammar, const Production* production) 392 { 393 assert(grammar.tags.vals.length); 394 395 string r; 396 bool isEmpty = true; 397 foreach (t; production.tags) 398 { 399 if (r.length) 400 r ~= " "; 401 if (!isEmpty) 402 r ~= "| "; 403 isEmpty = false; 404 r ~= "Tag." ~ grammar.tags[t].name; 405 } 406 407 foreach (k; 1 .. production.symbols.length + 1) 408 { 409 if (production.symbols[$ - k].isToken) 410 continue; 411 auto possibleTags = grammar 412 .nonterminals[production.symbols[$ - k].toNonterminalID].possibleTags; 413 if (production.symbols[$ - k].unwrapProduction 414 || production.symbols[$ - k].annotations.contains!"inheritAnyTag" 415 || (production.symbols.length == 1 416 && production.symbols[0].symbol == production.nonterminalID)) 417 { 418 if (r.length) 419 r ~= " "; 420 if (possibleTags.length == 0) 421 r ~= "/+"; 422 if (!isEmpty) 423 r ~= "| "; 424 r ~= stackTagVar(k); 425 if (possibleTags.length == 0) 426 r ~= "+/"; 427 if (possibleTags.length) 428 isEmpty = false; 429 } 430 else 431 { 432 foreach (t; production.symbols[$ - k].tags) 433 { 434 if (t.inherit) 435 { 436 if (r.length) 437 r ~= " "; 438 bool possible = possibleTags.canFind(t.tag); 439 if (!possible) 440 r ~= "/+"; 441 if (!isEmpty) 442 r ~= "| "; 443 r ~= text("(", stackTagVar(k), " & Tag.", grammar.tags[t.tag].name, ")"); 444 if (!possible) 445 r ~= "+/"; 446 if (possible) 447 isEmpty = false; 448 } 449 } 450 } 451 } 452 if (isEmpty) 453 { 454 if (r.length) 455 r ~= " "; 456 r ~= "Tag.init"; 457 } 458 return r; 459 } 460 461 bool checkTagsCode(alias stackTagVar)(ref CodeWriter code, EBNFGrammar grammar, 462 const Production* production) 463 { 464 bool needsIf; 465 foreach (k; 1 .. production.symbols.length + 1) 466 { 467 if (production.symbols[$ - k].isToken) 468 continue; 469 auto possibleTags = grammar 470 .nonterminals[production.symbols[$ - k].toNonterminalID].possibleTags; 471 foreach (t; production.symbols[$ - k].tags) 472 { 473 if (!possibleTags.canFind(t.tag)) 474 continue; 475 if (t.reject || t.needed) 476 { 477 if (!needsIf) 478 code.write("if ("); 479 else 480 code.write(" || "); 481 needsIf = true; 482 483 if (t.reject && t.needed) 484 code.write("true /* Tag needed and rejected: ", grammar.tags[t.tag].name, "*/"); 485 else if (t.reject) 486 code.write("(", stackTagVar(k), " & Tag.", grammar.tags[t.tag].name, ")"); 487 else if (t.needed) 488 code.write("!(", stackTagVar(k), " & Tag.", grammar.tags[t.tag].name, ")"); 489 } 490 } 491 } 492 if (needsIf) 493 code.writeln(")"); 494 return needsIf; 495 } 496 497 string tokenSetCode(EBNFGrammar grammar, const BitSet!TokenID set, string var, bool inverted = false) 498 { 499 size_t count; 500 string inner; 501 foreach (t; set.bitsSet) 502 { 503 if (t == grammar.tokens.getID("$end")) 504 continue; 505 if (count) 506 inner ~= inverted ? " && " : " || "; 507 count++; 508 inner ~= var ~ ".front.symbol " ~ (inverted 509 ? "!=" : "==") ~ " Lexer.tokenID!" ~ grammar.tokens[t].tokenDCode; 510 } 511 string r; 512 if (set[grammar.tokens.getID("$end")]) 513 { 514 r = (inverted ? "!" : "") ~ var ~ ".empty"; 515 if (count) 516 r ~= (inverted ? " && " : " || ") ~ inner; 517 } 518 else 519 { 520 r = (inverted ? "" : "!") ~ var ~ ".empty"; 521 if (count > 1) 522 r ~= (inverted ? " || " : " && ") ~ "(" ~ inner ~ ")"; 523 else if (count) 524 r ~= (inverted ? " || " : " && ") ~ inner; 525 } 526 return r; 527 }