1 
2 //          Copyright Tim Schendekehl 2023.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          https://www.boost.org/LICENSE_1_0.txt)
6 
7 module dparsergen.core.charlexer;
8 import dparsergen.core.grammarinfo;
9 import dparsergen.core.location;
10 import std.array;
11 import std.conv;
12 
13 /**
14 Simple lexer, which treats every byte as a token. This is only used
15 for tests.
16 */
17 struct CharLexer
18 {
19     string input;
20 
21     /**
22     Creates a character based lexer.
23 
24     Params:
25         input = Whole input text.
26         startLocation = Initial location at start of input.
27     */
28     this(string input, LocationBytes startLocation = LocationBytes.init)
29     {
30         this.input = input;
31         this.front.currentLocation = startLocation;
32         popFront;
33     }
34 
35     /**
36     Gets the internal ID for token with name `tok`.
37     */
38     template tokenID(string tok)
39     {
40         static if (tok.length == 3 && tok[0] == '\"' && tok[2] == '\"')
41             enum tokenID = tok[1];
42         else static if (tok == "$end")
43             enum tokenID = 256;
44         else static if (tok == "$flushreduces")
45             enum tokenID = 257;
46         else
47             static assert(false, "CharLexer does not support token " ~ tok);
48     }
49 
50     /**
51     Gets the name for token with ID `id`.
52     */
53     string tokenName(SymbolID id)
54     {
55         if (id == 256)
56             return "$end";
57         if (id == 257)
58             return "$flushreduces";
59         return text("\"", cast(char) id, "\"");
60     }
61 
62     /**
63     Stores information about the current token.
64     */
65     static struct Front
66     {
67         /**
68         Text content of this token.
69         */
70         string content;
71 
72         /**
73         ID of this token.
74         */
75         SymbolID symbol;
76 
77         /**
78         Start location of this token.
79         */
80         LocationBytes currentLocation;
81 
82         /**
83         End location of this token.
84         */
85         LocationBytes currentTokenEnd()
86         {
87             return LocationBytes(cast(typeof(currentLocation.bytePos))(
88                     currentLocation.bytePos + content.length));
89         }
90     }
91 
92     /// ditto
93     Front front;
94 
95     /**
96     True if all tokens are consumed.
97     */
98     bool empty;
99 
100     /**
101     Advances to the next token and updates front and empty.
102     */
103     void popFront()
104     {
105         input = input[front.content.length .. $];
106         front.currentLocation.bytePos += front.content.length;
107         if (input.empty)
108         {
109             front.content = "";
110             front.symbol = SymbolID(0);
111             empty = true;
112         }
113         else
114         {
115             auto t = input[0 .. 1];
116             front.content = t;
117             front.symbol = t[0];
118             empty = false;
119         }
120     }
121 }