1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.core.grammarinfo; 8 9 /** 10 Type used for IDs of nonterminals and tokens. The IDs can overlap. Use 11 [Symbol] for also distinguishing between nonterminals and tokens. 12 */ 13 alias SymbolID = ushort; 14 15 /** 16 Type used for IDs of productions. 17 */ 18 alias ProductionID = ushort; 19 20 /** 21 ID for nonterminal or token. 22 */ 23 struct Symbol 24 { 25 /** 26 Is this a token. It is a nonterminal otherwise. Use [NonterminalID] 27 or [TokenID] if the type is known at compile time. 28 */ 29 bool isToken; 30 31 /** 32 ID of the nonterminal or token as integer. Can be SymbolID.max if invalid. 33 */ 34 SymbolID id = SymbolID.max; 35 36 /** 37 Convert to [NonterminalID] if it is a nonterminal. 38 */ 39 NonterminalID toNonterminalID() const 40 in (!isToken) 41 { 42 return NonterminalID(id); 43 } 44 45 /** 46 Convert to [TokenID] if it is a token. 47 */ 48 TokenID toTokenID() const 49 in (isToken) 50 { 51 return TokenID(id); 52 } 53 54 /** 55 Constant for invalid symbol. 56 */ 57 enum invalid = Symbol(false, SymbolID.max); 58 59 /** 60 Compare symbols. 61 */ 62 int opCmp(Symbol other) const pure nothrow 63 { 64 if (isToken < other.isToken) 65 return -1; 66 if (isToken > other.isToken) 67 return 1; 68 if (id < other.id) 69 return -1; 70 if (id > other.id) 71 return 1; 72 return 0; 73 } 74 } 75 76 /** 77 ID for nonterminal. 78 */ 79 struct NonterminalID 80 { 81 enum isToken = false; 82 83 /** 84 ID of the nonterminal as integer. Can be SymbolID.max if invalid. 85 */ 86 SymbolID id = SymbolID.max; 87 88 /** 89 Convert to [Symbol]. 90 */ 91 Symbol toSymbol() const pure nothrow 92 { 93 return Symbol(isToken, id); 94 } 95 96 alias toSymbol this; 97 98 /** 99 Constant for invalid symbol. 100 */ 101 enum invalid = NonterminalID(SymbolID.max); 102 103 /** 104 Compare nonterminal IDs. 105 */ 106 int opCmp(NonterminalID other) const pure nothrow 107 { 108 if (id < other.id) 109 return -1; 110 if (id > other.id) 111 return 1; 112 return 0; 113 } 114 } 115 116 /** 117 ID for token. 118 */ 119 struct TokenID 120 { 121 enum isToken = true; 122 123 /** 124 ID of the token as integer. Can be SymbolID.max if invalid. 125 */ 126 SymbolID id = SymbolID.max; 127 128 /** 129 Convert to [Symbol]. 130 */ 131 Symbol toSymbol() const pure nothrow 132 { 133 return Symbol(isToken, id); 134 } 135 136 alias toSymbol this; 137 138 /** 139 Constant for invalid symbol. 140 */ 141 enum invalid = TokenID(SymbolID.max); 142 143 /** 144 Compare token IDs. 145 */ 146 int opCmp(TokenID other) const pure nothrow 147 { 148 if (id < other.id) 149 return -1; 150 if (id > other.id) 151 return 1; 152 return 0; 153 } 154 } 155 156 /** 157 Flags with information about nonterminals. 158 */ 159 enum NonterminalFlags 160 { 161 /// No flags. 162 none = 0, 163 164 /// The nonterminal can be empty. 165 empty = 0x01, 166 /// This is a normal nonterminal and not a string or array. 167 nonterminal = 0x02, 168 /// This nonterminal should be stored as string. 169 string = 0x04, 170 /// This nonterminal can be a normal nonterminal or a string. 171 anySingle = nonterminal | string, 172 173 /// This nonterminal is an array. 174 array = 0x10, 175 /// The array can contain normal nonterminals. 176 arrayOfNonterminal = 0x20, 177 /// The array can contain strings. 178 arrayOfString = 0x40, 179 /// The array can contain normal nonterminals and strings. 180 anyArray = array | arrayOfNonterminal | arrayOfString 181 } 182 183 /** 184 Metadata about a token. 185 */ 186 struct Token 187 { 188 /** 189 Name of the token. 190 */ 191 string name; 192 193 /** 194 Annotations for the token from the grammar file. 195 */ 196 string[] annotations; 197 } 198 199 /** 200 Metadata about a nonterminal. 201 */ 202 struct Nonterminal 203 { 204 /** 205 Name of the nonterminal 206 */ 207 string name; 208 209 /** 210 Flags with informations about the nonterminal. 211 */ 212 NonterminalFlags flags; 213 214 /** 215 Annotations for the nonterminal from the grammar file. 216 */ 217 string[] annotations; 218 219 /** 220 Nonterminals reachable through unwrap productions, which can be created. 221 */ 222 immutable(SymbolID)[] buildNonterminals; 223 } 224 225 /** 226 Metadata about a symbol inside a production. 227 */ 228 struct SymbolInstance 229 { 230 /** 231 ID of the symbol. 232 */ 233 Symbol symbol; 234 alias symbol this; 235 236 /** 237 Expected content for tokens with only one allowed value. 238 */ 239 string subToken; 240 241 /** 242 Optional name for this symbol inside the production. 243 */ 244 string symbolInstanceName; 245 246 /** 247 The production should be replaced with this symbol in the parse tree. 248 */ 249 bool unwrapProduction; 250 251 /** 252 This symbol should not be represented as a node in the parse tree. 253 */ 254 bool dropNode; 255 256 /** 257 Annotations for the symbol from the grammar file. 258 */ 259 string[] annotations; 260 261 /** 262 Negative lookahead for this symbol. 263 */ 264 immutable(Symbol)[] negLookaheads; 265 } 266 267 /** 268 Metadata about production. 269 */ 270 struct Production 271 { 272 /** 273 Nonterminal production by this production. 274 */ 275 NonterminalID nonterminalID = NonterminalID(SymbolID.max); 276 277 /** 278 List of symbols needed for this production. 279 */ 280 immutable(SymbolInstance)[] symbols; 281 282 /** 283 Annotations for the production from the grammar file. 284 */ 285 string[] annotations; 286 287 /** 288 Negative lookahead at the end of this production. 289 */ 290 Symbol[] negLookaheads; 291 292 /** 293 Only end of file allowed after this production. 294 */ 295 bool negLookaheadsAnytoken; 296 297 /** 298 The production was automatically generated. 299 */ 300 bool isVirtual; 301 } 302 303 /** 304 Information about the grammar for use at runtime. 305 */ 306 struct GrammarInfo 307 { 308 /** 309 Offset for IDs of all tokens in allTokens. 310 */ 311 SymbolID startTokenID; 312 313 /** 314 Offset for IDs of all nonterminals in allNonterminals. 315 */ 316 SymbolID startNonterminalID; 317 318 /** 319 Offset for IDs of all productions in allProductions. 320 */ 321 ProductionID startProductionID; 322 323 /** 324 Information about all tokens from the grammar. 325 */ 326 Token[] allTokens; 327 328 /** 329 Information about all nonterminals from the grammar. 330 */ 331 Nonterminal[] allNonterminals; 332 333 /** 334 Information about all productions from the grammar. 335 */ 336 Production[] allProductions; 337 }