1 
2 //          Copyright Tim Schendekehl 2023.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          https://www.boost.org/LICENSE_1_0.txt)
6 
7 module dparsergen.generator.nfa;
8 import dparsergen.core.utils;
9 import dparsergen.generator.ids;
10 import std.algorithm;
11 import std.conv;
12 import std.range;
13 import std.stdio;
14 import std.typecons;
15 
16 struct HybridSet(ID)
17 {
18     ID[] data;
19     BitSet!ID alreadySetBits;
20     size_t graphPointer;
21     this(size_t l, size_t graphPointer)
22     {
23         this.graphPointer = graphPointer;
24         alreadySetBits = BitSet!ID(l);
25     }
26 
27     private bool isSetImpl(ID i) const
28     {
29         assert(i.graphPointer == graphPointer);
30         return alreadySetBits[i];
31     }
32 
33     bool opIndex(ID i) const
34     {
35         return isSetImpl(i);
36     }
37 
38     void opIndexAssign(bool value, ID i)
39     {
40         assert(i.graphPointer == graphPointer, text(i.graphPointer, " == ", graphPointer));
41         assert(value);
42         if (!isSetImpl(i))
43             data ~= i;
44         alreadySetBits[i] = value;
45         assert(alreadySetBits[i] == value);
46     }
47 
48     inout(ID)[] bitsSet() inout
49     {
50         return data;
51     }
52 
53     void reset()
54     {
55         if (data.length)
56         {
57             data = [];
58             alreadySetBits.arr[] = false;
59         }
60     }
61 }
62 
63 unittest
64 {
65     struct ID
66     {
67         size_t id;
68         size_t graphPointer;
69         enum invalid = ID(size_t.max);
70     }
71 
72     HybridSet!ID h = HybridSet!ID(10000, 0);
73     assert(h.bitsSet.array == []);
74     h[ID(42)] = true;
75     assert(h.bitsSet.array == [ID(42)]);
76     h[ID(100)] = true;
77     assert(h.bitsSet.array == [ID(42), ID(100)]);
78 }
79 
80 alias Set = HybridSet;
81 
82 enum EdgeFlags
83 {
84     none = 0,
85     recEdge = 1,
86     backwards = 2,
87     matchingTokenEdge = 4,
88     storeStart = 0x40,
89     storeEnd = 0x80,
90     compareStart = 0x100,
91     compareMiddle = 0x200,
92     compareEndTrue = 0x400,
93     compareEndFalse = 0x800,
94     storeFlags = storeStart | storeEnd,
95     compareFlags = compareStart | compareMiddle | compareEndTrue | compareEndFalse,
96     matchingFlags = storeFlags | compareFlags // Flags have to match, to treat edges the same
97 }
98 
99 final class Graph(Symbol_, Result_, EdgeExtra_ = string)
100 {
101     alias Symbol = Symbol_;
102     alias Result = Result_;
103     alias EdgeExtra = EdgeExtra_;
104     struct NodeID
105     {
106         size_t id = size_t.max;
107         size_t graphPointer;
108         enum invalid = NodeID(size_t.max);
109 
110         int opCmp(NodeID other) const
111         {
112             if (id == size_t.max && other.id == size_t.max)
113                 return 0;
114             if (id == size_t.max)
115                 return -1;
116             if (other.id == size_t.max)
117                 return 1;
118             assert(graphPointer == other.graphPointer);
119             if (id < other.id)
120                 return -1;
121             if (id > other.id)
122                 return 1;
123             return 0;
124         }
125     }
126 
127     struct Edge
128     {
129         Symbol symbol;
130         NodeID next;
131         EdgeExtra extra;
132         EdgeFlags flags;
133     }
134 
135     static class Node
136     {
137         string name; // for debugging
138         Edge[] edges;
139         immutable(Result)[] results;
140         override string toString() const
141         {
142             return text("Node ", name);
143         }
144     }
145 
146     size_t graphPointer() const
147     {
148         return cast(size_t) cast(void*) this;
149     }
150 
151     Node[] nodes;
152     NodeID start, end;
153     NodeID[] otherStarts;
154 
155     NodeID[2][] invisibleEdges;
156 
157     auto nodeIDs() const
158     {
159         static struct R
160         {
161             const Graph graph;
162             size_t i;
163             bool empty() const
164             {
165                 return i >= graph.nodes.length;
166             }
167 
168             NodeID front() const
169             {
170                 return NodeID(i, graph.graphPointer);
171             }
172 
173             void popFront()
174             {
175                 i++;
176             }
177         }
178 
179         return R(this);
180     }
181 
182     Node get(const NodeID i)
183     in (i.graphPointer == graphPointer)
184     do
185     {
186         return nodes[i.id];
187     }
188 
189     NodeID getNext(NodeID n, Symbol s, EdgeFlags flags = EdgeFlags.none)
190     {
191         foreach (e; getEdges(n))
192         {
193             if (e.symbol == s
194                     && (e.flags & EdgeFlags.matchingFlags) == (flags & EdgeFlags.matchingFlags))
195                 return e.next;
196         }
197         return NodeID.invalid;
198     }
199 
200     Edge* getNextEdge(NodeID n, Symbol s, EdgeFlags flags = EdgeFlags.none)
201     {
202         foreach (ref e; getEdges(n))
203         {
204             if (e.symbol == s
205                     && (e.flags & EdgeFlags.matchingFlags) == (flags & EdgeFlags.matchingFlags))
206                 return &e;
207         }
208         return null;
209     }
210 
211     NodeID addNode(string name, immutable(Result)[] results = [])
212     {
213         auto n = new Node();
214         n.name = name;
215         n.results = results;
216         nodes ~= n;
217         return NodeID(nodes.length - 1, graphPointer);
218     }
219 
220     void addEdge(NodeID from, NodeID to, Symbol symbol, EdgeFlags flags = EdgeFlags.none,
221             EdgeExtra extra = EdgeExtra.init, size_t line = __LINE__)
222     in
223     {
224         assert(from.graphPointer == graphPointer);
225         assert(to.graphPointer == graphPointer);
226     }
227     do
228     {
229         get(from).edges ~= Edge(symbol, to, extra, flags);
230     }
231 
232     NodeID nodeID(size_t i)
233     {
234         assert(i < nodes.length);
235         return NodeID(i, cast(size_t) cast(void*) this);
236     }
237 
238     NodeID[2] addCopy(Graph src)
239     {
240         immutable delta = nodes.length;
241         nodes.length += src.nodes.length;
242         foreach (i, n; src.nodes)
243         {
244             Node n2 = new Node();
245             n2.name = n.name;
246             n2.results = n.results;
247             n2.edges.length = n.edges.length;
248             foreach (k, e; n.edges)
249             {
250                 n2.edges[k] = Edge(e.symbol, NodeID(e.next.id + delta,
251                         cast(size_t) cast(void*) this), EdgeExtra.init, e.flags);
252             }
253             nodes[i + delta] = n2;
254         }
255         return [
256             NodeID(src.start.id + delta, cast(size_t) cast(void*) this),
257             NodeID(src.end.id + delta, cast(size_t) cast(void*) this)
258         ];
259     }
260 
261     Edge[] getEdges(NodeID nodeID)
262     {
263         return get(nodeID).edges;
264     }
265 
266     Set!NodeID createSet() const
267     {
268         return Set!(NodeID)(nodes.length, graphPointer);
269     }
270 }
271 
272 void simulate(G)(G graph, Set!(G.NodeID) current, G.Symbol symbol,
273         ref Set!(G.NodeID) next, EdgeFlags flags = EdgeFlags.none)
274 in
275 {
276     assert(current.graphPointer == graph.graphPointer);
277     assert(next.graphPointer == graph.graphPointer);
278 }
279 do
280 {
281     foreach (node; current.bitsSet)
282     {
283         foreach (e; graph.getEdges(node))
284         {
285             auto eflags = e.flags;
286             if ((flags & EdgeFlags.matchingFlags) && !(e.flags & EdgeFlags.matchingFlags))
287                 eflags |= flags & EdgeFlags.compareFlags;
288             if (e.symbol == symbol
289                     && (eflags & EdgeFlags.matchingFlags) == (flags & EdgeFlags.matchingFlags))
290             {
291                 next[e.next] = true;
292             }
293         }
294     }
295     completeEmpty!G(graph, next);
296 }
297 
298 Set!(G.NodeID) simulate(G)(G graph, Set!(G.NodeID) current, G.Symbol symbol)
299 in
300 {
301     assert(current.graphPointer == graph.graphPointer);
302 }
303 do
304 {
305     assert(current.graphPointer == graph.graphPointer);
306     Set!(G.NodeID) next = graph.createSet();
307     simulate!G(graph, current, symbol, next);
308     return next;
309 }
310 
311 void completeEmpty(G)(G graph, ref Set!(G.NodeID) current)
312 in
313 {
314     assert(current.graphPointer == graph.graphPointer);
315 }
316 do
317 {
318     void add(G.NodeID node)
319     {
320         foreach (e; graph.getEdges(node))
321         {
322             if (e.symbol == G.Symbol.invalid)
323             {
324                 if (current[e.next])
325                     continue;
326                 current[e.next] = true;
327                 add(e.next);
328             }
329         }
330     }
331 
332     foreach (node; current.bitsSet)
333     {
334         add(node);
335     }
336 }
337 
338 Set!(G.NodeID) simulate(G)(G graph, G.NodeID start, G.Symbol[] input)
339 in
340 {
341     assert(start.graphPointer == graph.graphPointer);
342 }
343 do
344 {
345     Set!(G.NodeID) current = graph.createSet();
346     current[start] = true;
347     completeEmpty!G(graph, current);
348     foreach (s; input)
349     {
350         current = simulate!G(graph, current, s);
351     }
352     return current;
353 }
354 
355 G.Result[] reachableResults(G)(G graph, G.NodeID start)
356 in
357 {
358     assert(start.graphPointer == graph.graphPointer);
359 }
360 do
361 {
362     bool[G.NodeID] done;
363     bool[G.Result] results;
364     void findReachable(G.NodeID node)
365     {
366         if (node in done)
367             return;
368         done[node] = true;
369         foreach (r; graph.get(node).results)
370         {
371             results[r] = true;
372         }
373         foreach (e; graph.get(node).edges)
374         {
375             findReachable(e.next);
376         }
377     }
378 
379     findReachable(start);
380     G.Result[] resultsArr;
381     foreach (r, v; results)
382     {
383         resultsArr ~= r;
384     }
385     return resultsArr;
386 }
387 
388 Graph!(G.Symbol, G.Result, G.EdgeExtra) makeDeterministic(G)(G inputGraph, G.NodeID start,
389         bool stopOnSingleResult = false,
390         immutable(G.Result)[]delegate(immutable(G.Result)[]) filterResult = null)
391 in
392 {
393     assert(start.graphPointer == inputGraph.graphPointer);
394 }
395 do
396 {
397     alias Symbol = G.Symbol;
398     alias Result = G.Result;
399     alias NodeID = G.NodeID;
400     alias EdgeExtra = G.EdgeExtra;
401     alias Node = G.Node;
402     Graph!(Symbol, Result, EdgeExtra) g = new Graph!(Symbol, Result, EdgeExtra)();
403     Set!NodeID startset = inputGraph.createSet();
404     startset[start] = true;
405     completeEmpty!G(inputGraph, startset);
406 
407     NodeID[immutable(NodeID)[]] states;
408     size_t uniqueID;
409     NodeID add(Set!NodeID s, size_t depth)
410     {
411         NodeID[] atmp = s.bitsSet.array;
412         atmp.sort!"a.id < b.id"();
413         immutable NodeID[] a = atmp.idup;
414         if (a in states)
415             return states[a];
416         NodeID nodeID = g.addNode(text(uniqueID++ /*s.bitsSet.map!(n=>inputGraph.get(n).name[]).joiner("\\n")*/ ));
417         Node node = g.get(nodeID);
418         states[a] = nodeID;
419         static struct NodeEdgeID
420         {
421             NodeID nodeID;
422             size_t edgeID;
423         }
424 
425         NodeEdgeID[Tuple!(Symbol, EdgeFlags)] done;
426         Set!NodeID next = inputGraph.createSet();
427         foreach (n; s.bitsSet)
428         {
429             addOnce(node.results, inputGraph.get(n).results);
430         }
431         if (filterResult !is null)
432             node.results = filterResult(node.results);
433         if (stopOnSingleResult)
434         {
435             if (node.results.length <= 1)
436                 return nodeID;
437         }
438         foreach (n; s.bitsSet)
439         {
440             foreach (e; inputGraph.getEdges(n))
441             {
442                 if (e.symbol == Symbol.invalid)
443                     continue;
444                 auto key = tuple!(Symbol, EdgeFlags)(e.symbol, e.flags & EdgeFlags.matchingFlags);
445                 if (key in done)
446                 {
447                     g.getEdges(done[key].nodeID)[done[key].edgeID].extra ~= e.extra;
448                     continue;
449                 }
450                 next.reset();
451                 simulate!G(inputGraph, s, e.symbol, next, e.flags);
452                 node.edges ~= G.Edge(e.symbol, add(next, depth + 1), e.extra, e.flags);
453                 done[key] = NodeEdgeID(nodeID, node.edges.length - 1);
454             }
455         }
456         return nodeID;
457     }
458 
459     g.start = add(startset, 0);
460 
461     return g;
462 }
463 
464 string defaultNodeResultsText(G)(G graph, G.NodeID node)
465 {
466     return text(graph.get(node).results);
467 }
468 
469 void toGraphViz(alias tokenName = text, alias resultsText = defaultNodeResultsText, G)(G g,
470         string filename)
471 {
472     toGraphViz2!((ts) => ts.map!(e => text(tokenName(e.symbol), " (", e.extra,
473             ")")).join("\n"), resultsText, G)(g, filename);
474 }
475 
476 void toGraphViz2(alias tokensName = (ts) => ts.map!(t => text(t)).join("\n"),
477         alias resultsText = defaultNodeResultsText, G)(G g, string filename)
478 {
479     File f = File(filename, "w");
480     f.writeln("digraph G {");
481     f.writeln("\trankdir=LR;");
482     f.writeln("\tgraph [fontname = \"arial\"];");
483     f.writeln("\tnode [fontname = \"arial\"];");
484     f.writeln("\tedge [fontname = \"arial\"];");
485     f.writeln("\tnode[shape = circle];");
486     toGraphViz2!(tokensName, resultsText, G)(g, f);
487     f.writeln("\t\"\" [shape = none];");
488     if (g.start != G.NodeID.invalid)
489         f.writeln("\t\"\" -> \"node", g.start.id, "\";");
490     foreach (s; g.otherStarts)
491         f.writeln("\t\"\" -> \"node", s.id, "\";");
492     f.writeln("}");
493 }
494 
495 void toGraphViz(alias tokenName = text, alias resultsText = defaultNodeResultsText, G)(G g,
496         File f, size_t idoffset = 0)
497 {
498     toGraphViz2!((ts) => ts.map!(e => text(tokenName(e.symbol), " (", e.extra,
499             ")")).join("\n"), resultsText, G)(g, f, idoffset);
500 }
501 
502 void toGraphViz2(alias tokensName, alias resultsText, G)(G g, File f, size_t idoffset = 0)
503 {
504     string nodeStr(G.NodeID nid)
505     {
506         return text("\"node", nid.id + idoffset, "\"");
507     }
508 
509     foreach (id; g.nodeIDs)
510     {
511         auto n = g.get(id);
512         f.write("\t", nodeStr(id), " [");
513         string label = n.name;
514         if (n.results.length > 0)
515         {
516             f.writeln("shape = doublecircle");
517         }
518         label ~= "\n" ~ resultsText(g, id);
519         f.write("  label =\"", label.escapeD, "\"");
520         f.writeln("];");
521         struct Key
522         {
523             G.NodeID next;
524             bool backwards;
525             EdgeFlags flags;
526         }
527 
528         G.Edge[][Key] edgeBuckets;
529         foreach (e; g.getEdges(id))
530         {
531             Key key = Key(e.next, (e.flags & EdgeFlags.backwards) != 0,
532                     e.flags & EdgeFlags.matchingFlags);
533             if (key !in edgeBuckets)
534                 edgeBuckets[key] = [];
535             edgeBuckets[key] ~= e;
536         }
537         foreach (key, e; edgeBuckets)
538         {
539             if (key.backwards)
540                 f.writeln("\t", nodeStr(key.next), " -> ", nodeStr(id),
541                         " [ label = \"", tokensName(e).escapeD, "\"",
542                         (e.any!(x => (x.flags & EdgeFlags.matchingTokenEdge) != 0)
543                             ? " arrowType=tee" : ""), " dir=back", "];");
544             else
545                 f.writeln("\t", nodeStr(id), " -> ", nodeStr(key.next),
546                         " [ label = \"", tokensName(e).escapeD, "\"",
547                         (e.any!(x => (x.flags & EdgeFlags.matchingTokenEdge) != 0)
548                             ? " arrowType=tee" : ""), "];");
549         }
550     }
551     foreach (e; g.invisibleEdges)
552     {
553         f.writeln("\t", nodeStr(e[0]), " -> ", nodeStr(e[1]), " [ label = \"\" style=dashed];");
554     }
555 }
556 
557 Graph!(Symbol, Result, EdgeExtra) minimizeDFA(Symbol, Result, EdgeExtra)(
558         Graph!(Symbol, Result, EdgeExtra) g, bool verbose = false, bool anyEdges = false)
559 {
560     alias NodeID = Graph!(Symbol, Result, EdgeExtra).NodeID;
561 
562     NodeID[][] partitions;
563     foreach (node; g.nodeIDs)
564     {
565         bool found = false;
566         foreach (ref p; partitions)
567         {
568             bool[Result] resFound;
569             bool areEqual = true;
570             foreach (res; g.get(node).results)
571             {
572                 resFound[res] = true;
573             }
574             foreach (res; g.get(p[0]).results)
575             {
576                 if (res !in resFound)
577                     areEqual = false;
578                 resFound[res] = false;
579             }
580             foreach (res, x; resFound)
581                 if (x)
582                     areEqual = false;
583             if (areEqual)
584             {
585                 p ~= node;
586                 found = true;
587             }
588         }
589         if (!found)
590         {
591             partitions ~= [node];
592         }
593     }
594 
595     void printPartitions()
596     {
597         writeln("partitions:");
598         foreach (i, p; partitions)
599             writeln(i, ": ", p.map!(n => g.get(n).name).join(", "));
600     }
601 
602     if (verbose)
603         printPartitions();
604 
605     bool changed;
606     size_t[NodeID] partitionIDs;
607 
608     void updatePartitionIDs()
609     {
610         partitionIDs = null;
611         foreach (i, p; partitions)
612         {
613             foreach (n; p)
614                 partitionIDs[n] = i;
615         }
616     }
617 
618     do
619     {
620         updatePartitionIDs();
621 
622         NodeID[][] newPartitions;
623         foreach (p; partitions)
624         {
625             if (!anyEdges)
626             {
627                 alias Key = Tuple!(Symbol, EdgeFlags);
628                 NodeID[][size_t[Key]] next;
629 
630                 foreach (node; p)
631                 {
632                     size_t[Key] edgePartitions;
633                     foreach (e; g.get(node).edges)
634                     {
635                         edgePartitions[Key(e.symbol, e.flags & EdgeFlags.matchingFlags)] = partitionIDs[e
636                             .next];
637                     }
638                     next[edgePartitions] ~= node;
639                 }
640                 foreach (p2; next)
641                     newPartitions ~= p2;
642             }
643             else
644             {
645                 NodeID[][immutable(size_t)[]] next;
646                 foreach (node; p)
647                 {
648                     size_t[] nextNodes;
649                     foreach (e; g.get(node).edges)
650                     {
651                         nextNodes.addOnce(partitionIDs[e.next]);
652                     }
653                     nextNodes.sort();
654                     next[nextNodes.idup] ~= node;
655                 }
656 
657                 foreach (p2; next)
658                 {
659                     alias Key = Tuple!(Symbol, EdgeFlags);
660                     NodeID[][] newPartitions2;
661                     size_t[Key][] partitionEdges;
662 
663                     foreach (node; p2)
664                     {
665                         bool good;
666                         foreach (i; 0 .. newPartitions2.length)
667                         {
668                             good = true;
669                             foreach (e; g.get(node).edges)
670                             {
671                                 auto key = Key(e.symbol, e.flags & EdgeFlags.matchingFlags);
672                                 if (key in partitionEdges[i]
673                                         && partitionEdges[i][key] != partitionIDs[e.next])
674                                 {
675                                     good = false;
676                                     break;
677                                 }
678                             }
679                             if (good)
680                             {
681                                 foreach (e; g.get(node).edges)
682                                 {
683                                     auto key = Key(e.symbol, e.flags & EdgeFlags.matchingFlags);
684                                     partitionEdges[i][key] = partitionIDs[e.next];
685                                 }
686                                 newPartitions2[i] ~= node;
687                                 break;
688                             }
689                         }
690                         if (!good)
691                         {
692                             newPartitions2 ~= [node];
693                             partitionEdges ~= null;
694                             foreach (e; g.get(node).edges)
695                             {
696                                 auto key = Key(e.symbol, e.flags & EdgeFlags.matchingFlags);
697                                 partitionEdges[$ - 1][key] = partitionIDs[e.next];
698                             }
699                         }
700                     }
701 
702                     newPartitions ~= newPartitions2;
703                 }
704             }
705         }
706         changed = newPartitions.length > partitions.length;
707         partitions = newPartitions;
708 
709         if (verbose)
710             printPartitions();
711     }
712     while (changed);
713 
714     NodeID[NodeID] nodeEquivalentMap;
715 
716     foreach (p; partitions)
717     {
718         foreach (n; p[1 .. $])
719             nodeEquivalentMap[n] = p[0];
720     }
721     updatePartitionIDs();
722 
723     Graph!(Symbol, Result, EdgeExtra) r = new Graph!(Symbol, Result, EdgeExtra)();
724     NodeID[NodeID] nodeMap;
725     size_t uniqueNr;
726     foreach (n; g.nodeIDs)
727     {
728         NodeID rn = n;
729         while (rn in nodeEquivalentMap)
730             rn = nodeEquivalentMap[rn];
731 
732         if (rn !in nodeMap)
733         {
734             string name = partitions[partitionIDs[n]].map!(i => i.id.text).join(", ");
735             nodeMap[rn] = r.addNode(name, g.get(rn).results);
736         }
737 
738         nodeMap[n] = nodeMap[rn];
739     }
740     if (g.start != NodeID.invalid)
741         r.start = nodeMap[g.start];
742     foreach (s; g.otherStarts)
743         r.otherStarts ~= nodeMap[s];
744 
745     foreach (n; g.nodeIDs)
746     {
747         foreach (e; g.get(n).edges)
748         {
749             auto e2 = r.getNextEdge(nodeMap[n], e.symbol, e.flags);
750             if (e2 is null)
751                 r.addEdge(nodeMap[n], nodeMap[e.next], e.symbol,
752                         e.flags & EdgeFlags.matchingFlags, e.extra);
753             else
754                 e2.extra ~= e.extra;
755         }
756     }
757 
758     return r;
759 }
760 
761 Graph!(Symbol, Result, EdgeExtra) removeDeadStates(Symbol, Result, EdgeExtra)(
762         Graph!(Symbol, Result, EdgeExtra) g)
763 {
764     alias G = Graph!(Symbol, Result, EdgeExtra);
765     auto isAlive = g.createSet();
766     foreach (n; g.nodeIDs)
767     {
768         if (g.get(n).results.length)
769             isAlive[n] = true;
770     }
771     bool changed;
772     do
773     {
774         changed = false;
775         foreach (n; g.nodeIDs)
776         {
777             if (isAlive[n])
778                 continue;
779             foreach (e; g.getEdges(n))
780             {
781                 if (isAlive[e.next])
782                 {
783                     isAlive[n] = true;
784                     changed = true;
785                 }
786             }
787         }
788     }
789     while (changed);
790 
791     G.NodeID[G.NodeID] nodeMap;
792     G r = new G();
793     G.NodeID mapNode(G.NodeID n)
794     {
795         if (n !in nodeMap)
796         {
797             nodeMap[n] = r.addNode(g.get(n).name, g.get(n).results);
798         }
799         return nodeMap[n];
800     }
801 
802     r.start = mapNode(g.start);
803     foreach (n; g.nodeIDs)
804     {
805         if (!isAlive[n])
806             continue;
807         foreach (e; g.getEdges(n))
808         {
809             if (!isAlive[e.next])
810                 continue;
811             r.addEdge(mapNode(n), mapNode(e.next), e.symbol, e.flags, e.extra);
812         }
813     }
814     return r;
815 }