1 
2 //          Copyright Tim Schendekehl 2023.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          https://www.boost.org/LICENSE_1_0.txt)
6 
7 module dparsergen.core.grammarinfo;
8 
9 /**
10 Type used for IDs of nonterminals and tokens. The IDs can overlap. Use
11 [Symbol] for also distinguishing between nonterminals and tokens.
12 */
13 alias SymbolID = ushort;
14 
15 /**
16 Type used for IDs of productions.
17 */
18 alias ProductionID = ushort;
19 
20 /**
21 ID for nonterminal or token.
22 */
23 struct Symbol
24 {
25     /**
26     Is this a token. It is a nonterminal otherwise. Use [NonterminalID]
27     or [TokenID] if the type is known at compile time.
28     */
29     bool isToken;
30 
31     /**
32     ID of the nonterminal or token as integer. Can be SymbolID.max if invalid.
33     */
34     SymbolID id = SymbolID.max;
35 
36     /**
37     Convert to [NonterminalID] if it is a nonterminal.
38     */
39     NonterminalID toNonterminalID() const
40     in (!isToken)
41     {
42         return NonterminalID(id);
43     }
44 
45     /**
46     Convert to [TokenID] if it is a token.
47     */
48     TokenID toTokenID() const
49     in (isToken)
50     {
51         return TokenID(id);
52     }
53 
54     /**
55     Constant for invalid symbol.
56     */
57     enum invalid = Symbol(false, SymbolID.max);
58 
59     /**
60     Compare symbols.
61     */
62     int opCmp(Symbol other) const pure nothrow
63     {
64         if (isToken < other.isToken)
65             return -1;
66         if (isToken > other.isToken)
67             return 1;
68         if (id < other.id)
69             return -1;
70         if (id > other.id)
71             return 1;
72         return 0;
73     }
74 }
75 
76 /**
77 ID for nonterminal.
78 */
79 struct NonterminalID
80 {
81     enum isToken = false;
82 
83     /**
84     ID of the nonterminal as integer. Can be SymbolID.max if invalid.
85     */
86     SymbolID id = SymbolID.max;
87 
88     /**
89     Convert to [Symbol].
90     */
91     Symbol toSymbol() const pure nothrow
92     {
93         return Symbol(isToken, id);
94     }
95 
96     alias toSymbol this;
97 
98     /**
99     Constant for invalid symbol.
100     */
101     enum invalid = NonterminalID(SymbolID.max);
102 
103     /**
104     Compare nonterminal IDs.
105     */
106     int opCmp(NonterminalID other) const pure nothrow
107     {
108         if (id < other.id)
109             return -1;
110         if (id > other.id)
111             return 1;
112         return 0;
113     }
114 }
115 
116 /**
117 ID for token.
118 */
119 struct TokenID
120 {
121     enum isToken = true;
122 
123     /**
124     ID of the token as integer. Can be SymbolID.max if invalid.
125     */
126     SymbolID id = SymbolID.max;
127 
128     /**
129     Convert to [Symbol].
130     */
131     Symbol toSymbol() const pure nothrow
132     {
133         return Symbol(isToken, id);
134     }
135 
136     alias toSymbol this;
137 
138     /**
139     Constant for invalid symbol.
140     */
141     enum invalid = TokenID(SymbolID.max);
142 
143     /**
144     Compare token IDs.
145     */
146     int opCmp(TokenID other) const pure nothrow
147     {
148         if (id < other.id)
149             return -1;
150         if (id > other.id)
151             return 1;
152         return 0;
153     }
154 }
155 
156 /**
157 Flags with information about nonterminals.
158 */
159 enum NonterminalFlags
160 {
161     /// No flags.
162     none = 0,
163 
164     /// The nonterminal can be empty.
165     empty = 0x01,
166     /// This is a normal nonterminal and not a string or array.
167     nonterminal = 0x02,
168     /// This nonterminal should be stored as string.
169     string = 0x04,
170     /// This nonterminal can be a normal nonterminal or a string.
171     anySingle = nonterminal | string,
172 
173     /// This nonterminal is an array.
174     array = 0x10,
175     /// The array can contain normal nonterminals.
176     arrayOfNonterminal = 0x20,
177     /// The array can contain strings.
178     arrayOfString = 0x40,
179     /// The array can contain normal nonterminals and strings.
180     anyArray = array | arrayOfNonterminal | arrayOfString
181 }
182 
183 /**
184 Metadata about a token.
185 */
186 struct Token
187 {
188     /**
189     Name of the token.
190     */
191     string name;
192 
193     /**
194     Annotations for the token from the grammar file.
195     */
196     string[] annotations;
197 }
198 
199 /**
200 Metadata about a nonterminal.
201 */
202 struct Nonterminal
203 {
204     /**
205     Name of the nonterminal
206     */
207     string name;
208 
209     /**
210     Flags with informations about the nonterminal.
211     */
212     NonterminalFlags flags;
213 
214     /**
215     Annotations for the nonterminal from the grammar file.
216     */
217     string[] annotations;
218 
219     /**
220     Nonterminals reachable through unwrap productions, which can be created.
221     */
222     immutable(SymbolID)[] buildNonterminals;
223 }
224 
225 /**
226 Metadata about a symbol inside a production.
227 */
228 struct SymbolInstance
229 {
230     /**
231     ID of the symbol.
232     */
233     Symbol symbol;
234     alias symbol this;
235 
236     /**
237     Expected content for tokens with only one allowed value.
238     */
239     string subToken;
240 
241     /**
242     Optional name for this symbol inside the production.
243     */
244     string symbolInstanceName;
245 
246     /**
247     The production should be replaced with this symbol in the parse tree.
248     */
249     bool unwrapProduction;
250 
251     /**
252     This symbol should not be represented as a node in the parse tree.
253     */
254     bool dropNode;
255 
256     /**
257     Annotations for the symbol from the grammar file.
258     */
259     string[] annotations;
260 
261     /**
262     Negative lookahead for this symbol.
263     */
264     immutable(Symbol)[] negLookaheads;
265 }
266 
267 /**
268 Metadata about production.
269 */
270 struct Production
271 {
272     /**
273     Nonterminal production by this production.
274     */
275     NonterminalID nonterminalID = NonterminalID(SymbolID.max);
276 
277     /**
278     List of symbols needed for this production.
279     */
280     immutable(SymbolInstance)[] symbols;
281 
282     /**
283     Annotations for the production from the grammar file.
284     */
285     string[] annotations;
286 
287     /**
288     Negative lookahead at the end of this production.
289     */
290     Symbol[] negLookaheads;
291 
292     /**
293     Only end of file allowed after this production.
294     */
295     bool negLookaheadsAnytoken;
296 
297     /**
298     The production was automatically generated.
299     */
300     bool isVirtual;
301 }
302 
303 /**
304 Information about the grammar for use at runtime.
305 */
306 struct GrammarInfo
307 {
308     /**
309     Offset for IDs of all tokens in allTokens.
310     */
311     SymbolID startTokenID;
312 
313     /**
314     Offset for IDs of all nonterminals in allNonterminals.
315     */
316     SymbolID startNonterminalID;
317 
318     /**
319     Offset for IDs of all productions in allProductions.
320     */
321     ProductionID startProductionID;
322 
323     /**
324     Information about all tokens from the grammar.
325     */
326     Token[] allTokens;
327 
328     /**
329     Information about all nonterminals from the grammar.
330     */
331     Nonterminal[] allNonterminals;
332 
333     /**
334     Information about all productions from the grammar.
335     */
336     Production[] allProductions;
337 }