1 2 // Copyright Tim Schendekehl 2023. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // https://www.boost.org/LICENSE_1_0.txt) 6 7 module dparsergen.core.charlexer; 8 import dparsergen.core.grammarinfo; 9 import dparsergen.core.location; 10 import std.array; 11 import std.conv; 12 13 /** 14 Simple lexer, which treats every byte as a token. This is only used 15 for tests. 16 */ 17 struct CharLexer 18 { 19 string input; 20 21 /** 22 Creates a character based lexer. 23 24 Params: 25 input = Whole input text. 26 startLocation = Initial location at start of input. 27 */ 28 this(string input, LocationBytes startLocation = LocationBytes.init) 29 { 30 this.input = input; 31 this.front.currentLocation = startLocation; 32 popFront; 33 } 34 35 /** 36 Gets the internal ID for token with name `tok`. 37 */ 38 template tokenID(string tok) 39 { 40 static if (tok.length == 3 && tok[0] == '\"' && tok[2] == '\"') 41 enum tokenID = tok[1]; 42 else static if (tok == "$end") 43 enum tokenID = 256; 44 else static if (tok == "$flushreduces") 45 enum tokenID = 257; 46 else 47 static assert(false, "CharLexer does not support token " ~ tok); 48 } 49 50 /** 51 Gets the name for token with ID `id`. 52 */ 53 string tokenName(SymbolID id) 54 { 55 if (id == 256) 56 return "$end"; 57 if (id == 257) 58 return "$flushreduces"; 59 return text("\"", cast(char) id, "\""); 60 } 61 62 /** 63 Stores information about the current token. 64 */ 65 static struct Front 66 { 67 /** 68 Text content of this token. 69 */ 70 string content; 71 72 /** 73 ID of this token. 74 */ 75 SymbolID symbol; 76 77 /** 78 Start location of this token. 79 */ 80 LocationBytes currentLocation; 81 82 /** 83 End location of this token. 84 */ 85 LocationBytes currentTokenEnd() 86 { 87 return LocationBytes(cast(typeof(currentLocation.bytePos))( 88 currentLocation.bytePos + content.length)); 89 } 90 } 91 92 /// ditto 93 Front front; 94 95 /** 96 True if all tokens are consumed. 97 */ 98 bool empty; 99 100 /** 101 Advances to the next token and updates front and empty. 102 */ 103 void popFront() 104 { 105 input = input[front.content.length .. $]; 106 front.currentLocation.bytePos += front.content.length; 107 if (input.empty) 108 { 109 front.content = ""; 110 front.symbol = SymbolID(0); 111 empty = true; 112 } 113 else 114 { 115 auto t = input[0 .. 1]; 116 front.content = t; 117 front.symbol = t[0]; 118 empty = false; 119 } 120 } 121 }