1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.generator.nfa; 8 import dparsergen.core.utils; 9 import dparsergen.generator.ids; 10 import std.algorithm; 11 import std.conv; 12 import std.range; 13 import std.stdio; 14 import std.typecons; 15 16 struct HybridSet(ID) 17 { 18 ID[] data; 19 BitSet!ID alreadySetBits; 20 size_t graphPointer; 21 this(size_t l, size_t graphPointer) 22 { 23 this.graphPointer = graphPointer; 24 alreadySetBits = BitSet!ID(l); 25 } 26 27 private bool isSetImpl(ID i) const 28 { 29 assert(i.graphPointer == graphPointer); 30 return alreadySetBits[i]; 31 } 32 33 bool opIndex(ID i) const 34 { 35 return isSetImpl(i); 36 } 37 38 void opIndexAssign(bool value, ID i) 39 { 40 assert(i.graphPointer == graphPointer, text(i.graphPointer, " == ", graphPointer)); 41 assert(value); 42 if (!isSetImpl(i)) 43 data ~= i; 44 alreadySetBits[i] = value; 45 assert(alreadySetBits[i] == value); 46 } 47 48 inout(ID)[] bitsSet() inout 49 { 50 return data; 51 } 52 53 void reset() 54 { 55 if (data.length) 56 { 57 data = []; 58 alreadySetBits.arr[] = false; 59 } 60 } 61 } 62 63 unittest 64 { 65 struct ID 66 { 67 size_t id; 68 size_t graphPointer; 69 enum invalid = ID(size_t.max); 70 } 71 72 HybridSet!ID h = HybridSet!ID(10000, 0); 73 assert(h.bitsSet.array == []); 74 h[ID(42)] = true; 75 assert(h.bitsSet.array == [ID(42)]); 76 h[ID(100)] = true; 77 assert(h.bitsSet.array == [ID(42), ID(100)]); 78 } 79 80 alias Set = HybridSet; 81 82 enum EdgeFlags 83 { 84 none = 0, 85 recEdge = 1, 86 backwards = 2, 87 matchingTokenEdge = 4, 88 storeStart = 0x40, 89 storeEnd = 0x80, 90 compareStart = 0x100, 91 compareMiddle = 0x200, 92 compareEndTrue = 0x400, 93 compareEndFalse = 0x800, 94 storeFlags = storeStart | storeEnd, 95 compareFlags = compareStart | compareMiddle | compareEndTrue | compareEndFalse, 96 matchingFlags = storeFlags | compareFlags // Flags have to match, to treat edges the same 97 } 98 99 final class Graph(Symbol_, Result_, EdgeExtra_ = string) 100 { 101 alias Symbol = Symbol_; 102 alias Result = Result_; 103 alias EdgeExtra = EdgeExtra_; 104 struct NodeID 105 { 106 size_t id = size_t.max; 107 size_t graphPointer; 108 enum invalid = NodeID(size_t.max); 109 110 int opCmp(NodeID other) const 111 { 112 if (id == size_t.max && other.id == size_t.max) 113 return 0; 114 if (id == size_t.max) 115 return -1; 116 if (other.id == size_t.max) 117 return 1; 118 assert(graphPointer == other.graphPointer); 119 if (id < other.id) 120 return -1; 121 if (id > other.id) 122 return 1; 123 return 0; 124 } 125 } 126 127 struct Edge 128 { 129 Symbol symbol; 130 NodeID next; 131 EdgeExtra extra; 132 EdgeFlags flags; 133 } 134 135 static class Node 136 { 137 string name; // for debugging 138 Edge[] edges; 139 immutable(Result)[] results; 140 override string toString() const 141 { 142 return text("Node ", name); 143 } 144 } 145 146 size_t graphPointer() const 147 { 148 return cast(size_t) cast(void*) this; 149 } 150 151 Node[] nodes; 152 NodeID start, end; 153 NodeID[] otherStarts; 154 155 NodeID[2][] invisibleEdges; 156 157 auto nodeIDs() const 158 { 159 static struct R 160 { 161 const Graph graph; 162 size_t i; 163 bool empty() const 164 { 165 return i >= graph.nodes.length; 166 } 167 168 NodeID front() const 169 { 170 return NodeID(i, graph.graphPointer); 171 } 172 173 void popFront() 174 { 175 i++; 176 } 177 } 178 179 return R(this); 180 } 181 182 Node get(const NodeID i) 183 in (i.graphPointer == graphPointer) 184 do 185 { 186 return nodes[i.id]; 187 } 188 189 NodeID getNext(NodeID n, Symbol s, EdgeFlags flags = EdgeFlags.none) 190 { 191 foreach (e; getEdges(n)) 192 { 193 if (e.symbol == s 194 && (e.flags & EdgeFlags.matchingFlags) == (flags & EdgeFlags.matchingFlags)) 195 return e.next; 196 } 197 return NodeID.invalid; 198 } 199 200 Edge* getNextEdge(NodeID n, Symbol s, EdgeFlags flags = EdgeFlags.none) 201 { 202 foreach (ref e; getEdges(n)) 203 { 204 if (e.symbol == s 205 && (e.flags & EdgeFlags.matchingFlags) == (flags & EdgeFlags.matchingFlags)) 206 return &e; 207 } 208 return null; 209 } 210 211 NodeID addNode(string name, immutable(Result)[] results = []) 212 { 213 auto n = new Node(); 214 n.name = name; 215 n.results = results; 216 nodes ~= n; 217 return NodeID(nodes.length - 1, graphPointer); 218 } 219 220 void addEdge(NodeID from, NodeID to, Symbol symbol, EdgeFlags flags = EdgeFlags.none, 221 EdgeExtra extra = EdgeExtra.init, size_t line = __LINE__) 222 in 223 { 224 assert(from.graphPointer == graphPointer); 225 assert(to.graphPointer == graphPointer); 226 } 227 do 228 { 229 get(from).edges ~= Edge(symbol, to, extra, flags); 230 } 231 232 NodeID nodeID(size_t i) 233 { 234 assert(i < nodes.length); 235 return NodeID(i, cast(size_t) cast(void*) this); 236 } 237 238 NodeID[2] addCopy(Graph src) 239 { 240 immutable delta = nodes.length; 241 nodes.length += src.nodes.length; 242 foreach (i, n; src.nodes) 243 { 244 Node n2 = new Node(); 245 n2.name = n.name; 246 n2.results = n.results; 247 n2.edges.length = n.edges.length; 248 foreach (k, e; n.edges) 249 { 250 n2.edges[k] = Edge(e.symbol, NodeID(e.next.id + delta, 251 cast(size_t) cast(void*) this), EdgeExtra.init, e.flags); 252 } 253 nodes[i + delta] = n2; 254 } 255 return [ 256 NodeID(src.start.id + delta, cast(size_t) cast(void*) this), 257 NodeID(src.end.id + delta, cast(size_t) cast(void*) this) 258 ]; 259 } 260 261 Edge[] getEdges(NodeID nodeID) 262 { 263 return get(nodeID).edges; 264 } 265 266 Set!NodeID createSet() const 267 { 268 return Set!(NodeID)(nodes.length, graphPointer); 269 } 270 } 271 272 void simulate(G)(G graph, Set!(G.NodeID) current, G.Symbol symbol, 273 ref Set!(G.NodeID) next, EdgeFlags flags = EdgeFlags.none) 274 in 275 { 276 assert(current.graphPointer == graph.graphPointer); 277 assert(next.graphPointer == graph.graphPointer); 278 } 279 do 280 { 281 foreach (node; current.bitsSet) 282 { 283 foreach (e; graph.getEdges(node)) 284 { 285 auto eflags = e.flags; 286 if ((flags & EdgeFlags.matchingFlags) && !(e.flags & EdgeFlags.matchingFlags)) 287 eflags |= flags & EdgeFlags.compareFlags; 288 if (e.symbol == symbol 289 && (eflags & EdgeFlags.matchingFlags) == (flags & EdgeFlags.matchingFlags)) 290 { 291 next[e.next] = true; 292 } 293 } 294 } 295 completeEmpty!G(graph, next); 296 } 297 298 Set!(G.NodeID) simulate(G)(G graph, Set!(G.NodeID) current, G.Symbol symbol) 299 in 300 { 301 assert(current.graphPointer == graph.graphPointer); 302 } 303 do 304 { 305 assert(current.graphPointer == graph.graphPointer); 306 Set!(G.NodeID) next = graph.createSet(); 307 simulate!G(graph, current, symbol, next); 308 return next; 309 } 310 311 void completeEmpty(G)(G graph, ref Set!(G.NodeID) current) 312 in 313 { 314 assert(current.graphPointer == graph.graphPointer); 315 } 316 do 317 { 318 void add(G.NodeID node) 319 { 320 foreach (e; graph.getEdges(node)) 321 { 322 if (e.symbol == G.Symbol.invalid) 323 { 324 if (current[e.next]) 325 continue; 326 current[e.next] = true; 327 add(e.next); 328 } 329 } 330 } 331 332 foreach (node; current.bitsSet) 333 { 334 add(node); 335 } 336 } 337 338 Set!(G.NodeID) simulate(G)(G graph, G.NodeID start, G.Symbol[] input) 339 in 340 { 341 assert(start.graphPointer == graph.graphPointer); 342 } 343 do 344 { 345 Set!(G.NodeID) current = graph.createSet(); 346 current[start] = true; 347 completeEmpty!G(graph, current); 348 foreach (s; input) 349 { 350 current = simulate!G(graph, current, s); 351 } 352 return current; 353 } 354 355 G.Result[] reachableResults(G)(G graph, G.NodeID start) 356 in 357 { 358 assert(start.graphPointer == graph.graphPointer); 359 } 360 do 361 { 362 bool[G.NodeID] done; 363 bool[G.Result] results; 364 void findReachable(G.NodeID node) 365 { 366 if (node in done) 367 return; 368 done[node] = true; 369 foreach (r; graph.get(node).results) 370 { 371 results[r] = true; 372 } 373 foreach (e; graph.get(node).edges) 374 { 375 findReachable(e.next); 376 } 377 } 378 379 findReachable(start); 380 G.Result[] resultsArr; 381 foreach (r, v; results) 382 { 383 resultsArr ~= r; 384 } 385 return resultsArr; 386 } 387 388 Graph!(G.Symbol, G.Result, G.EdgeExtra) makeDeterministic(G)(G inputGraph, G.NodeID start, 389 bool stopOnSingleResult = false, 390 immutable(G.Result)[]delegate(immutable(G.Result)[]) filterResult = null) 391 in 392 { 393 assert(start.graphPointer == inputGraph.graphPointer); 394 } 395 do 396 { 397 alias Symbol = G.Symbol; 398 alias Result = G.Result; 399 alias NodeID = G.NodeID; 400 alias EdgeExtra = G.EdgeExtra; 401 alias Node = G.Node; 402 Graph!(Symbol, Result, EdgeExtra) g = new Graph!(Symbol, Result, EdgeExtra)(); 403 Set!NodeID startset = inputGraph.createSet(); 404 startset[start] = true; 405 completeEmpty!G(inputGraph, startset); 406 407 NodeID[immutable(NodeID)[]] states; 408 size_t uniqueID; 409 NodeID add(Set!NodeID s, size_t depth) 410 { 411 NodeID[] atmp = s.bitsSet.array; 412 atmp.sort!"a.id < b.id"(); 413 immutable NodeID[] a = atmp.idup; 414 if (a in states) 415 return states[a]; 416 NodeID nodeID = g.addNode(text(uniqueID++ /*s.bitsSet.map!(n=>inputGraph.get(n).name[]).joiner("\\n")*/ )); 417 Node node = g.get(nodeID); 418 states[a] = nodeID; 419 static struct NodeEdgeID 420 { 421 NodeID nodeID; 422 size_t edgeID; 423 } 424 425 NodeEdgeID[Tuple!(Symbol, EdgeFlags)] done; 426 Set!NodeID next = inputGraph.createSet(); 427 foreach (n; s.bitsSet) 428 { 429 addOnce(node.results, inputGraph.get(n).results); 430 } 431 if (filterResult !is null) 432 node.results = filterResult(node.results); 433 if (stopOnSingleResult) 434 { 435 if (node.results.length <= 1) 436 return nodeID; 437 } 438 foreach (n; s.bitsSet) 439 { 440 foreach (e; inputGraph.getEdges(n)) 441 { 442 if (e.symbol == Symbol.invalid) 443 continue; 444 auto key = tuple!(Symbol, EdgeFlags)(e.symbol, e.flags & EdgeFlags.matchingFlags); 445 if (key in done) 446 { 447 g.getEdges(done[key].nodeID)[done[key].edgeID].extra ~= e.extra; 448 continue; 449 } 450 next.reset(); 451 simulate!G(inputGraph, s, e.symbol, next, e.flags); 452 node.edges ~= G.Edge(e.symbol, add(next, depth + 1), e.extra, e.flags); 453 done[key] = NodeEdgeID(nodeID, node.edges.length - 1); 454 } 455 } 456 return nodeID; 457 } 458 459 g.start = add(startset, 0); 460 461 return g; 462 } 463 464 string defaultNodeResultsText(G)(G graph, G.NodeID node) 465 { 466 return text(graph.get(node).results); 467 } 468 469 void toGraphViz(alias tokenName = text, alias resultsText = defaultNodeResultsText, G)(G g, 470 string filename) 471 { 472 toGraphViz2!((ts) => ts.map!(e => text(tokenName(e.symbol), " (", e.extra, 473 ")")).join("\n"), resultsText, G)(g, filename); 474 } 475 476 void toGraphViz2(alias tokensName = (ts) => ts.map!(t => text(t)).join("\n"), 477 alias resultsText = defaultNodeResultsText, G)(G g, string filename) 478 { 479 File f = File(filename, "w"); 480 f.writeln("digraph G {"); 481 f.writeln("\trankdir=LR;"); 482 f.writeln("\tgraph [fontname = \"arial\"];"); 483 f.writeln("\tnode [fontname = \"arial\"];"); 484 f.writeln("\tedge [fontname = \"arial\"];"); 485 f.writeln("\tnode[shape = circle];"); 486 toGraphViz2!(tokensName, resultsText, G)(g, f); 487 f.writeln("\t\"\" [shape = none];"); 488 if (g.start != G.NodeID.invalid) 489 f.writeln("\t\"\" -> \"node", g.start.id, "\";"); 490 foreach (s; g.otherStarts) 491 f.writeln("\t\"\" -> \"node", s.id, "\";"); 492 f.writeln("}"); 493 } 494 495 void toGraphViz(alias tokenName = text, alias resultsText = defaultNodeResultsText, G)(G g, 496 File f, size_t idoffset = 0) 497 { 498 toGraphViz2!((ts) => ts.map!(e => text(tokenName(e.symbol), " (", e.extra, 499 ")")).join("\n"), resultsText, G)(g, f, idoffset); 500 } 501 502 void toGraphViz2(alias tokensName, alias resultsText, G)(G g, File f, size_t idoffset = 0) 503 { 504 string nodeStr(G.NodeID nid) 505 { 506 return text("\"node", nid.id + idoffset, "\""); 507 } 508 509 foreach (id; g.nodeIDs) 510 { 511 auto n = g.get(id); 512 f.write("\t", nodeStr(id), " ["); 513 string label = n.name; 514 if (n.results.length > 0) 515 { 516 f.writeln("shape = doublecircle"); 517 } 518 label ~= "\n" ~ resultsText(g, id); 519 f.write(" label =\"", label.escapeD, "\""); 520 f.writeln("];"); 521 struct Key 522 { 523 G.NodeID next; 524 bool backwards; 525 EdgeFlags flags; 526 } 527 528 G.Edge[][Key] edgeBuckets; 529 foreach (e; g.getEdges(id)) 530 { 531 Key key = Key(e.next, (e.flags & EdgeFlags.backwards) != 0, 532 e.flags & EdgeFlags.matchingFlags); 533 if (key !in edgeBuckets) 534 edgeBuckets[key] = []; 535 edgeBuckets[key] ~= e; 536 } 537 foreach (key, e; edgeBuckets) 538 { 539 if (key.backwards) 540 f.writeln("\t", nodeStr(key.next), " -> ", nodeStr(id), 541 " [ label = \"", tokensName(e).escapeD, "\"", 542 (e.any!(x => (x.flags & EdgeFlags.matchingTokenEdge) != 0) 543 ? " arrowType=tee" : ""), " dir=back", "];"); 544 else 545 f.writeln("\t", nodeStr(id), " -> ", nodeStr(key.next), 546 " [ label = \"", tokensName(e).escapeD, "\"", 547 (e.any!(x => (x.flags & EdgeFlags.matchingTokenEdge) != 0) 548 ? " arrowType=tee" : ""), "];"); 549 } 550 } 551 foreach (e; g.invisibleEdges) 552 { 553 f.writeln("\t", nodeStr(e[0]), " -> ", nodeStr(e[1]), " [ label = \"\" style=dashed];"); 554 } 555 } 556 557 Graph!(Symbol, Result, EdgeExtra) minimizeDFA(Symbol, Result, EdgeExtra)( 558 Graph!(Symbol, Result, EdgeExtra) g, bool verbose = false, bool anyEdges = false) 559 { 560 alias NodeID = Graph!(Symbol, Result, EdgeExtra).NodeID; 561 562 NodeID[][] partitions; 563 foreach (node; g.nodeIDs) 564 { 565 bool found = false; 566 foreach (ref p; partitions) 567 { 568 bool[Result] resFound; 569 bool areEqual = true; 570 foreach (res; g.get(node).results) 571 { 572 resFound[res] = true; 573 } 574 foreach (res; g.get(p[0]).results) 575 { 576 if (res !in resFound) 577 areEqual = false; 578 resFound[res] = false; 579 } 580 foreach (res, x; resFound) 581 if (x) 582 areEqual = false; 583 if (areEqual) 584 { 585 p ~= node; 586 found = true; 587 } 588 } 589 if (!found) 590 { 591 partitions ~= [node]; 592 } 593 } 594 595 void printPartitions() 596 { 597 writeln("partitions:"); 598 foreach (i, p; partitions) 599 writeln(i, ": ", p.map!(n => g.get(n).name).join(", ")); 600 } 601 602 if (verbose) 603 printPartitions(); 604 605 bool changed; 606 size_t[NodeID] partitionIDs; 607 608 void updatePartitionIDs() 609 { 610 partitionIDs = null; 611 foreach (i, p; partitions) 612 { 613 foreach (n; p) 614 partitionIDs[n] = i; 615 } 616 } 617 618 do 619 { 620 updatePartitionIDs(); 621 622 NodeID[][] newPartitions; 623 foreach (p; partitions) 624 { 625 if (!anyEdges) 626 { 627 alias Key = Tuple!(Symbol, EdgeFlags); 628 NodeID[][size_t[Key]] next; 629 630 foreach (node; p) 631 { 632 size_t[Key] edgePartitions; 633 foreach (e; g.get(node).edges) 634 { 635 edgePartitions[Key(e.symbol, e.flags & EdgeFlags.matchingFlags)] = partitionIDs[e 636 .next]; 637 } 638 next[edgePartitions] ~= node; 639 } 640 foreach (p2; next) 641 newPartitions ~= p2; 642 } 643 else 644 { 645 NodeID[][immutable(size_t)[]] next; 646 foreach (node; p) 647 { 648 size_t[] nextNodes; 649 foreach (e; g.get(node).edges) 650 { 651 nextNodes.addOnce(partitionIDs[e.next]); 652 } 653 nextNodes.sort(); 654 next[nextNodes.idup] ~= node; 655 } 656 657 foreach (p2; next) 658 { 659 alias Key = Tuple!(Symbol, EdgeFlags); 660 NodeID[][] newPartitions2; 661 size_t[Key][] partitionEdges; 662 663 foreach (node; p2) 664 { 665 bool good; 666 foreach (i; 0 .. newPartitions2.length) 667 { 668 good = true; 669 foreach (e; g.get(node).edges) 670 { 671 auto key = Key(e.symbol, e.flags & EdgeFlags.matchingFlags); 672 if (key in partitionEdges[i] 673 && partitionEdges[i][key] != partitionIDs[e.next]) 674 { 675 good = false; 676 break; 677 } 678 } 679 if (good) 680 { 681 foreach (e; g.get(node).edges) 682 { 683 auto key = Key(e.symbol, e.flags & EdgeFlags.matchingFlags); 684 partitionEdges[i][key] = partitionIDs[e.next]; 685 } 686 newPartitions2[i] ~= node; 687 break; 688 } 689 } 690 if (!good) 691 { 692 newPartitions2 ~= [node]; 693 partitionEdges ~= null; 694 foreach (e; g.get(node).edges) 695 { 696 auto key = Key(e.symbol, e.flags & EdgeFlags.matchingFlags); 697 partitionEdges[$ - 1][key] = partitionIDs[e.next]; 698 } 699 } 700 } 701 702 newPartitions ~= newPartitions2; 703 } 704 } 705 } 706 changed = newPartitions.length > partitions.length; 707 partitions = newPartitions; 708 709 if (verbose) 710 printPartitions(); 711 } 712 while (changed); 713 714 NodeID[NodeID] nodeEquivalentMap; 715 716 foreach (p; partitions) 717 { 718 foreach (n; p[1 .. $]) 719 nodeEquivalentMap[n] = p[0]; 720 } 721 updatePartitionIDs(); 722 723 Graph!(Symbol, Result, EdgeExtra) r = new Graph!(Symbol, Result, EdgeExtra)(); 724 NodeID[NodeID] nodeMap; 725 size_t uniqueNr; 726 foreach (n; g.nodeIDs) 727 { 728 NodeID rn = n; 729 while (rn in nodeEquivalentMap) 730 rn = nodeEquivalentMap[rn]; 731 732 if (rn !in nodeMap) 733 { 734 string name = partitions[partitionIDs[n]].map!(i => i.id.text).join(", "); 735 nodeMap[rn] = r.addNode(name, g.get(rn).results); 736 } 737 738 nodeMap[n] = nodeMap[rn]; 739 } 740 if (g.start != NodeID.invalid) 741 r.start = nodeMap[g.start]; 742 foreach (s; g.otherStarts) 743 r.otherStarts ~= nodeMap[s]; 744 745 foreach (n; g.nodeIDs) 746 { 747 foreach (e; g.get(n).edges) 748 { 749 auto e2 = r.getNextEdge(nodeMap[n], e.symbol, e.flags); 750 if (e2 is null) 751 r.addEdge(nodeMap[n], nodeMap[e.next], e.symbol, 752 e.flags & EdgeFlags.matchingFlags, e.extra); 753 else 754 e2.extra ~= e.extra; 755 } 756 } 757 758 return r; 759 } 760 761 Graph!(Symbol, Result, EdgeExtra) removeDeadStates(Symbol, Result, EdgeExtra)( 762 Graph!(Symbol, Result, EdgeExtra) g) 763 { 764 alias G = Graph!(Symbol, Result, EdgeExtra); 765 auto isAlive = g.createSet(); 766 foreach (n; g.nodeIDs) 767 { 768 if (g.get(n).results.length) 769 isAlive[n] = true; 770 } 771 bool changed; 772 do 773 { 774 changed = false; 775 foreach (n; g.nodeIDs) 776 { 777 if (isAlive[n]) 778 continue; 779 foreach (e; g.getEdges(n)) 780 { 781 if (isAlive[e.next]) 782 { 783 isAlive[n] = true; 784 changed = true; 785 } 786 } 787 } 788 } 789 while (changed); 790 791 G.NodeID[G.NodeID] nodeMap; 792 G r = new G(); 793 G.NodeID mapNode(G.NodeID n) 794 { 795 if (n !in nodeMap) 796 { 797 nodeMap[n] = r.addNode(g.get(n).name, g.get(n).results); 798 } 799 return nodeMap[n]; 800 } 801 802 r.start = mapNode(g.start); 803 foreach (n; g.nodeIDs) 804 { 805 if (!isAlive[n]) 806 continue; 807 foreach (e; g.getEdges(n)) 808 { 809 if (!isAlive[e.next]) 810 continue; 811 r.addEdge(mapNode(n), mapNode(e.next), e.symbol, e.flags, e.extra); 812 } 813 } 814 return r; 815 }