1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.generator.generator; 8 import dparsergen.core.dynamictree; 9 import dparsergen.core.location; 10 import dparsergen.core.nodetype; 11 import dparsergen.core.parseexception; 12 import dparsergen.core.utils; 13 import dparsergen.generator.ebnf; 14 import dparsergen.generator.globaloptions; 15 import dparsergen.generator.grammar; 16 import dparsergen.generator.grammarebnf; 17 import dparsergen.generator.grammarebnf_lexer; 18 import dparsergen.generator.parser; 19 import dparsergen.generator.parsercodegen; 20 import dparsergen.generator.production; 21 import dparsergen.generator.glrparsercodegen; 22 import std.algorithm; 23 import std.conv; 24 import std.getopt; 25 import std.exception; 26 import std.file; 27 import std.path; 28 import std.stdio; 29 import std.string; 30 31 void toAnnotations(Tree)(Tree tree, ref Declaration d) 32 { 33 if (tree !is null) 34 { 35 foreach (i, p; tree.childs) 36 { 37 assert(p.name == "Annotation"); 38 assert(p.childs.length == 3); 39 if (p.childs[0].content == "@") 40 { 41 string name = p.childs[1].content; 42 if (p.childs[2]!is null) 43 { 44 name ~= "(" ~ concatTree(p.childs[2].childs[1]).strip() ~ ")"; 45 } 46 d.annotations ~= name; 47 } 48 else 49 assert(false); 50 } 51 } 52 } 53 54 Declaration toDeclaration(Tree)(Tree tree) 55 { 56 if (tree.name == "SymbolDeclaration") 57 { 58 Declaration r; 59 auto childs = tree.childs; 60 assert(childs.length.among(5, 7)); 61 if (childs[0]!is null) 62 { 63 assert(childs[0].name == "DeclarationType"); 64 if (childs[0].childs[0].content == "token") 65 r.type = DeclarationType.token; 66 if (childs[0].childs[0].content == "fragment") 67 r.type = DeclarationType.fragment; 68 } 69 r.name = childs[1].content; 70 if (childs[2]!is null) 71 { 72 foreach (i, p; childs[2].childs[1].childs) 73 { 74 if (i % 2 == 0) 75 { 76 assert(p.name == "MacroParameter"); 77 if (p.childs.length >= 2) 78 { 79 if (r.variadicParameterIndex != size_t.max) 80 throw new Exception("too many variadic parameters"); 81 r.variadicParameterIndex = r.parameters.length; 82 } 83 r.parameters ~= p.childs[0].content; 84 } 85 else 86 assert(p.content == ","); 87 } 88 } 89 childs[3].toAnnotations(r); 90 if (childs.length == 7) 91 r.exprTree = childs[$ - 2]; 92 return r; 93 } 94 else 95 assert(0, tree.toString); 96 } 97 98 typeof(next(Tree.init))[] toArray(alias next, Tree)(Tree tree, string listName) 99 { 100 if (tree is null) 101 return []; 102 if (tree.name == listName || tree.nodeType == NodeType.array) 103 { 104 typeof(next(tree))[] r; 105 foreach (c; tree.childs) 106 { 107 if (!c.isToken) 108 r ~= toArray!next(c, listName); 109 } 110 return r; 111 } 112 if (tree.name != listName) 113 return [next(tree)]; 114 assert(0); 115 } 116 117 EBNF toEBNF(Tree)(Tree tree, DocComment[] docComments) 118 { 119 EBNF r; 120 assert(tree.name == "EBNF"); 121 assert(tree.childs.length == 1); 122 123 size_t numGlobalDocumentation; 124 while (docComments.length && docComments[numGlobalDocumentation].end.line <= tree.start.line) 125 numGlobalDocumentation++; 126 if (numGlobalDocumentation 127 && docComments[numGlobalDocumentation - 1].end.line >= tree.start.line - 1) 128 { 129 numGlobalDocumentation--; 130 while (numGlobalDocumentation 131 && docComments[numGlobalDocumentation - 1].end.line 132 >= docComments[numGlobalDocumentation].start.line - 1) 133 numGlobalDocumentation--; 134 } 135 foreach (comment; docComments[0 .. numGlobalDocumentation]) 136 { 137 if (r.globalDocumentation.length) 138 r.globalDocumentation ~= "\n"; 139 r.globalDocumentation ~= comment.content; 140 } 141 docComments = docComments[numGlobalDocumentation .. $]; 142 143 void convertTree(Tree tree) 144 { 145 if (tree.nodeType == NodeType.array) 146 { 147 foreach (c; tree.childs) 148 { 149 convertTree(c); 150 } 151 return; 152 } 153 154 if (tree.name == "SymbolDeclaration") 155 { 156 string documentation; 157 while (docComments.length && docComments[0].end.line <= tree.end.line) 158 { 159 if (documentation.length) 160 documentation ~= "\n"; 161 documentation ~= docComments[0].content; 162 docComments = docComments[1 .. $]; 163 } 164 165 r.symbols ~= toDeclaration(tree); 166 r.symbols[$ - 1].documentation = documentation; 167 } 168 else if (tree.name == "MatchDeclaration") 169 { 170 assert(tree.childs.length == 4); 171 r.matchingTokens ~= [ 172 tree.childs[1].childs[0].content, tree.childs[2].childs[0].content 173 ]; 174 } 175 else if (tree.name == "Import") 176 { 177 assert(tree.childs.length == 3); 178 r.imports ~= tree.childs[1].content; 179 } 180 else if (tree.name == "OptionDeclaration") 181 { 182 assert(tree.childs.length == 2); 183 184 bool found = false; 185 static foreach (name; [ 186 "startTokenID", "startNonterminalID", "startProductionID" 187 ]) 188 { 189 if (tree.childs[0].content == name) 190 { 191 assert(!found); 192 found = true; 193 mixin("r." ~ name ~ " = to!size_t(tree.childs[1].content);"); 194 } 195 } 196 assert(found); 197 } 198 else 199 assert(false, tree.name); 200 } 201 202 convertTree(tree.childs[0]); 203 return r; 204 } 205 206 struct DocComment 207 { 208 string content; 209 LocationAll start; 210 LocationAll end; 211 } 212 213 struct LexerWrapper 214 { 215 Lexer!(LocationAll, true) lexer; 216 217 alias Location = LocationAll; 218 alias LocationDiff = typeof(Location.init - Location.init); 219 220 this(string input, Location startLocation = Location.init) 221 { 222 lexer = Lexer!(LocationAll, true)(input, startLocation); 223 skipIgnoredTokens(); 224 } 225 226 enum tokenID(string tok) = lexer.tokenID!(tok); 227 228 string tokenName(size_t id) 229 { 230 return lexer.tokenName(id); 231 } 232 233 ref front() 234 { 235 return lexer.front; 236 } 237 238 bool empty() 239 { 240 return lexer.empty; 241 } 242 243 void popFront() 244 { 245 lexer.popFront(); 246 skipIgnoredTokens(); 247 } 248 249 DocComment[] docComments; 250 251 string cleanDocComment(string ignoredChars)(string content) 252 { 253 while (content.length && content[0].inCharSet!(ignoredChars)) 254 content = content[1 .. $]; 255 if (content.startsWith("\r\n")) 256 content = content[2 .. $]; 257 else if (content.startsWith("\n")) 258 content = content[1 .. $]; 259 while (content.length && content[$ - 1].inCharSet!ignoredChars) 260 content = content[0 .. $ - 1]; 261 bool first = true; 262 string commonPrefix; 263 foreach (line; content.lineSplitter) 264 { 265 line = line.stripRight; 266 size_t i; 267 if (first) 268 { 269 while (i < line.length && line[i].inCharSet!ignoredChars) 270 i++; 271 } 272 else 273 { 274 while (i < line.length && i < commonPrefix.length && line[i] == commonPrefix[i]) 275 i++; 276 if (i >= line.length) 277 continue; 278 } 279 commonPrefix = line[0 .. i]; 280 first = false; 281 } 282 string content2; 283 foreach (line; content.lineSplitter) 284 { 285 if (line.length > commonPrefix.length) 286 content2 ~= line[commonPrefix.length .. $].stripRight ~ "\n"; 287 else 288 content2 ~= "\n"; 289 } 290 return content2; 291 } 292 293 void skipIgnoredTokens() 294 { 295 while (!lexer.empty && lexer.front.isIgnoreToken) 296 { 297 if (lexer.front.symbol == lexer.tokenID!"LineComment" 298 && lexer.front.content.startsWith("///")) 299 { 300 docComments ~= DocComment(lexer.front.content[3 .. $].strip() ~ "\n", 301 lexer.front.currentLocation, lexer.front.currentTokenEnd); 302 } 303 if (lexer.front.symbol == lexer.tokenID!"BlockComment" 304 && lexer.front.content.startsWith("/**")) 305 { 306 string content = cleanDocComment!"* \t"(lexer.front.content[3 .. $ - 2]); 307 docComments ~= DocComment(content, lexer.front.currentLocation, 308 lexer.front.currentTokenEnd); 309 } 310 if (lexer.front.symbol == lexer.tokenID!"NestingBlockComment" 311 && lexer.front.content.startsWith("/++")) 312 { 313 string content = cleanDocComment!"+ \t"(lexer.front.content[3 .. $ - 2]); 314 docComments ~= DocComment(content, lexer.front.currentLocation, 315 lexer.front.currentTokenEnd); 316 } 317 lexer.popFront; 318 } 319 } 320 } 321 322 EBNF parseEBNF2(string str, string filename) 323 { 324 alias Creator = DynamicParseTreeCreator!(dparsergen.generator.grammarebnf, 325 LocationAll, LocationRangeStartEnd); 326 auto creator = new Creator; 327 EBNF ebnf; 328 try 329 { 330 LexerWrapper lexer = LexerWrapper(str, LocationAll.init); 331 Tree tree = parse!(Creator, LexerWrapper)(lexer, creator); 332 ebnf = toEBNF(tree, lexer.docComments); 333 if (!lexer.empty) 334 { 335 throw new SingleParseException!LocationAll("input left after parse", 336 lexer.front.currentLocation, lexer.front.currentTokenEnd); 337 } 338 } 339 catch (ParseException e) 340 { 341 string message; 342 auto e2 = cast(SingleParseException!LocationAll) e.maxEndException(); 343 message ~= filename ~ ":"; 344 if (e2 !is null) 345 { 346 message ~= text(e2.markStart.line + 1, ":", e2.markStart.offset, ":"); 347 } 348 message ~= " "; 349 e2.toString(str, (data) { message ~= data; }, 350 ExceptionStringFlags.noBacktrace | ExceptionStringFlags.noLocation); 351 message = message.strip(); 352 stderr.writeln(message); 353 throw e; 354 } 355 356 return ebnf; 357 } 358 359 EBNF[] readEBNFFiles(string firstfilename) 360 { 361 string input = readText(firstfilename); 362 363 EBNF[] ebnfs = [input.parseEBNF2(firstfilename)]; 364 365 auto imports = new TodoList!string; 366 void addImports(string currentFilename, string[] importDecls) 367 { 368 foreach (filename; importDecls) 369 { 370 assert(filename[0] == '"'); 371 assert(filename[$ - 1] == '"'); 372 filename = filename[1 .. $ - 1]; 373 filename = absolutePath(filename, dirName(absolutePath(currentFilename))); 374 imports.put(filename); 375 } 376 } 377 378 addImports(firstfilename, ebnfs[0].imports); 379 foreach (filename; imports.keys) 380 { 381 string input2 = readText(filename); 382 EBNF ebnf2 = input2.parseEBNF2(filename); 383 ebnfs ~= ebnf2; 384 addImports(filename, ebnf2.imports); 385 } 386 return ebnfs; 387 } 388 389 int main(string[] args) 390 { 391 GlobalOptions globalOptions = new GlobalOptions; 392 string packagename; 393 string parsermodule; 394 string lexermodule; 395 bool optimizationEmpty; 396 bool regexlookahead; 397 string outfilename; 398 string dotfilename; 399 string lexerfilename; 400 string lexerdfafilename; 401 string finalgrammarfilename; 402 string docfilenamehtml; 403 string docfilenamemd; 404 405 auto helpInformation = getopt( 406 args, 407 "o|parser", "Output filename for parser", &outfilename, 408 "module", "Set module name for parser", &parsermodule, 409 "lexer-module", "Set module name for lexer", &lexermodule, 410 "package", "Set package for parser and lexer", &packagename, 411 "lexer", "Generate lexer in this file", &lexerfilename, 412 "glr", "Generate parser with GLR instead of LALR", &globalOptions.glrParser, 413 "combinedreduce", "Allows to resolve some conflicts", () { globalOptions.delayedReduce = DelayedReduce.combined; }, 414 "mergesimilarstates", "Reduce number of parser states by combining some states", &globalOptions.mergeSimilarStates, 415 "lexer-dfa", "Generate graph for lexer", &lexerdfafilename, 416 "graph", "Generate graph for parser", &dotfilename, 417 "finalgrammar", "Generate grammar file after all grammar rewritings", &finalgrammarfilename, 418 "doc-html", "Generate documentation in HTML format", &docfilenamehtml, 419 "doc-md", "Generate documentation in Markdown format", &docfilenamemd, 420 "optdescent", "Try to make decisions in the parser earlier", &globalOptions.optimizationDescent, 421 "optempty", "Rewrite grammar to remove empty productions", &optimizationEmpty, 422 "regexlookahead", "Try to resolve conflicts with arbitrary lookahead", ®exlookahead, 423 ); 424 425 if (helpInformation.helpWanted || args.length != 2) 426 { 427 if (helpInformation.options[$ - 1].optShort == "-h") 428 helpInformation.options[$ - 1].help = "Print this help and exit"; 429 430 size_t ls, ll; 431 foreach (it; helpInformation.options) 432 { 433 ls = max(ls, it.optShort.length); 434 ll = max(ll, it.optLong.length); 435 } 436 437 writeln("dparsergen grammar.ebnf [OPTIONS]"); 438 foreach (it; helpInformation.options) 439 { 440 writefln("%*s %s%*s%s%s", ls, it.optShort, 441 it.optLong, ll - it.optLong.length, "", 442 it.help.length ? " " : "", it.help); 443 } 444 445 return 0; 446 } 447 448 string grammarfilename = args[1]; 449 450 if (parsermodule.length == 0) 451 { 452 parsermodule = baseName(outfilename, ".d"); 453 } 454 455 if (lexermodule.length == 0) 456 { 457 lexermodule = baseName(lexerfilename, ".d"); 458 } 459 460 if (packagename.length) 461 { 462 parsermodule = packagename ~ "." ~ parsermodule; 463 lexermodule = packagename ~ "." ~ lexermodule; 464 } 465 466 if (globalOptions.glrParser) 467 { 468 globalOptions.delayedReduce = DelayedReduce.none; 469 } 470 471 EBNF[] ebnfs; 472 try 473 { 474 ebnfs = readEBNFFiles(grammarfilename); 475 } 476 catch (ParseException e) 477 { 478 /* Already printed in parseEBNF2. */ 479 return 1; 480 } 481 catch (Exception e) 482 { 483 if (e.msg == "Enforcement failed") 484 stderr.writeln(e); 485 else 486 stderr.writeln(e.msg); 487 return 1; 488 } 489 490 EBNF ebnf = ebnfs[0]; 491 foreach (ebnf2; ebnfs[1 .. $]) 492 { 493 ebnf.symbols ~= ebnf2.symbols; 494 ebnf.matchingTokens ~= ebnf2.matchingTokens; 495 } 496 497 if (docfilenamehtml.length) 498 { 499 import dparsergen.generator.doc; 500 501 genDoc(ebnfs[0], docfilenamehtml, DocType.html); 502 } 503 if (docfilenamemd.length) 504 { 505 import dparsergen.generator.doc; 506 507 genDoc(ebnfs[0], docfilenamemd, DocType.markdown); 508 } 509 510 EBNFGrammar grammar; 511 try 512 { 513 grammar = ebnf.createGrammar(); 514 } 515 catch (Exception e) 516 { 517 if (e.msg == "Enforcement failed") 518 stderr.writeln(e); 519 else 520 stderr.writeln(e.msg); 521 return 1; 522 } 523 if (globalOptions.glrParser) 524 grammar.tokens.id("$flushreduces"); 525 526 bool hasRegArray; 527 bool hasDeactivated; 528 void checkAnnotations(Annotations annotations) 529 { 530 if (annotations.contains!"regArray") 531 hasRegArray = true; 532 if (annotations.contains!"deactivated") 533 hasDeactivated = true; 534 if (annotations.contains!"directUnwrap") 535 globalOptions.directUnwrap = true; 536 } 537 538 foreach (i; grammar.nonterminals.allIDs) 539 checkAnnotations(grammar.nonterminals[i].annotations); 540 foreach (i; grammar.tokens.allIDs) 541 checkAnnotations(grammar.tokens[i].annotations); 542 foreach (p; grammar.productions) 543 { 544 checkAnnotations(p.annotations); 545 foreach (s; p.symbols) 546 checkAnnotations(s.annotations); 547 } 548 549 string symbolName(Symbol n) 550 { 551 if (n.id == size_t.max) 552 return ""; 553 return grammar.getSymbolName(n); 554 } 555 556 enforce(!hasRegArray || !hasDeactivated); 557 558 if (hasRegArray) 559 grammar = createRegArrayGrammar(ebnf, grammar); 560 561 if (optimizationEmpty) 562 grammar = createOptEmptyGrammar(ebnf, grammar); 563 564 if (hasDeactivated) 565 grammar = createGrammarWithoutDeactivatedProductions(grammar); 566 567 EBNFGrammar lexerGrammar; 568 if (lexerfilename.length || finalgrammarfilename) 569 lexerGrammar = createLexerGrammar(ebnf, grammar); 570 571 if (finalgrammarfilename.length) 572 { 573 writeFinalGrammarFile(finalgrammarfilename, grammar, lexerGrammar); 574 } 575 576 if (outfilename.length) 577 { 578 LRGraph graphWithDeactivated = null; 579 if (hasDeactivated) 580 graphWithDeactivated = makeLRGraph(grammar.origGrammar, globalOptions); 581 auto graph = makeLRGraph(grammar, globalOptions, graphWithDeactivated); 582 foreach (i, ref s; graph.states) 583 { 584 bool deactivated; 585 foreach (element; s.elements) 586 if (element.production.annotations.contains!"deactivated"()) 587 deactivated = true; 588 if (deactivated) 589 graph.states[i] = new LRGraphNode(); 590 } 591 592 const(char)[] output; 593 if (globalOptions.glrParser) 594 output = dparsergen.generator.glrparsercodegen.createParserModule(graph, parsermodule); 595 else 596 output = dparsergen.generator.parsercodegen.createParserModule(graph, 597 parsermodule, regexlookahead); 598 std.file.write(outfilename, output); 599 } 600 601 if (lexerfilename.length) 602 { 603 import dparsergen.generator.lexergenerator; 604 605 try 606 { 607 const(char)[] output = createLexerCode(lexerGrammar, lexermodule, lexerdfafilename); 608 std.file.write(lexerfilename, output); 609 } 610 catch (Exception e) 611 { 612 if (e.msg == "Enforcement failed") 613 stderr.writeln(e); 614 else 615 stderr.writeln(e.msg); 616 return 1; 617 } 618 } 619 620 return 0; 621 }