Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 371 lines (313 sloc) 17.843 kB
f926899 Initial check-in.
uli authored
1 /*
2 * BCParser.h
3 * 1FACH
4 *
5 * Created by Uli Kusterer on Sun Jul 20 2003.
6 * Copyright (c) 2003 M. Uli Kusterer. All rights reserved.
7 *
8 */
9
10 /* -----------------------------------------------------------------------------
11 Headers:
12 -------------------------------------------------------------------------- */
13
14 #include <deque>
15 #include <map>
16 #include <string>
17 #include <stdexcept>
18 #include "BCInstruction.h"
19
20
21 #ifndef BCPARSER_DEBUG
22 #define BCPARSER_DEBUG 0
23 #endif
24
25 #ifndef BCDEBUG_TOKENIZER
26 #define BCDEBUG_TOKENIZER BCPARSER_DEBUG
27 #endif
28
29 #if BCPARSER_DEBUG
30 #define DEBUG_OUTPUT(n) printf("%s",n)
31 #define DEBUG_OUTPUT2(n,o) printf(n,o)
32 #define DEBUG_OUTPUT3(n,o,p) printf(n,o,p)
33 #define DEBUG_OUTPUT4(n,o,p,q) printf(n,o,p,q)
34 #else
35 #define DEBUG_OUTPUT(n) // (n)
36 #define DEBUG_OUTPUT2(n,o) // (n,o)
37 #define DEBUG_OUTPUT3(n,o,p) // (n,o,p)
38 #define DEBUG_OUTPUT4(n,o,p,q) // (n,o,p,q)
39 #endif
40
41
42 /* -----------------------------------------------------------------------------
43 BCToken:
44 Constants and struct definition for the data structure used for keeping
45 a list of tokens after tokenizing.
46 -------------------------------------------------------------------------- */
47
48 // Possible values for mTokenType/during tokenizing:
49 enum BCTokenTypeEnum
50 {
51 BCT_WHITESPACE = 0, // We're currently scanning whitespace. There should never be a token that has this type in BCParser::mTokens. This may be used as an indicator for end-of-list.
52 BCT_NEWLINE, // A line break of any kind.
53 BCT_IDENTIFIER, // Any kind of identifier, operator etc.
54 BCT_STRING, // A double-quoted string constant.
55 BCT_INTEGER, // An identifier consisting only of numerical digits.
56 BCT_DOUBLE, // An identifier consisting only of numerical digits and a period.
57 };
58
59
60 // More readable variant of BCT_WHITESPACE when used as end-of-list indicator:
61 #define BCT_INVALID BCT_WHITESPACE
62
63
64 // Possible values for mTokenID:
65 // If you add something here, you must also add it to mIdentifiers in BCParser's constructor.
66 enum BCTokenIDEnum
67 {
68 BCTI_ARBITRARY = 0, // Not a specially-recognized token.
69 BCTI_ON, // "on" for message handlers.
70 BCTI_END, // "end" for indicating end if, end handler etc.
71 BCTI_IF, // "if" ... then ...
72 BCTI_THEN, // if ... "then" ...
73 BCTI_ELSE, // if ... then ... "else"
74 BCTI_PUT, // "put" ... into ...
75 BCTI_INTO, // put ... "into" ...
76 BCTI_PASS, // "pass" through ...
77 BCTI_THROUGH, // pass "through" ...
78 BCTI_OPERATOR_PLUS, // "+"
79 BCTI_OPERATOR_MINUS, // "-"
80 BCTI_OPERATOR_MULTIPLY, // "*"
81 BCTI_OPERATOR_DIVIDE, // "/"
82 BCTI_OPERATOR_LIST_START, // "["
83 BCTI_OPERATOR_LIST_END, // "]"
84 BCTI_OPERATOR_IS, // "is" or "="
85 BCTI_OPERATOR_COMMA, // ","
86 BCTI_OPERATOR_AMPERSAND, // "&"
87 BCTI_WITH, // "with"
88 BCTI_OPERATOR_APOSTROPHE, // "'" for "object's property"
89 BCTI_OPERATOR_OPEN_BRACKET, // "("
90 BCTI_OPERATOR_CLOSE_BRACKET, // ")"
91 BCTI_PROPERTY, // "property" or "properties"
92 BCTI_OPERATOR_MOD, // "mod" for remainder of division operator.
93 BCTI_SYSTEM, // "system" identifier for system-native API calls.
94 BCTI_S, // "s" for "object's property"
95 BCTI_THE, // "the" for "the result" and such stuff
96 BCTI_RESULT, // "result" for "the result"
97 BCTI_NEW, // "new" as in "new function xy" [FFI]
98 BCTI_FUNCTION, // "function" as in "new function xy" [FFI]
99 BCTI_IN, // "in" as in "new function xy in <library>" [FFI]
100 BCTI_RETURNS, // "in" as in "new function xy returns type" [FFI]
101 BCTI_NOTHING, // "nothing" as in "new function xy returns nothing" for void functions [FFI]
102 BCTI_INTEGER, // "integer" as in "new function xy( integer )" for short integers [FFI]
103 BCTI_CHARACTER, // "character" data type [FFI]
104 BCTI_POINTER, // "pointer" data type [FFI]
105 BCTI_PASCAL, // "pascal" data type qualifier [FFI]
106 BCTI_STRING, // "string" data type [FFI]
107 BCTI_FOUNDATION, // "foundation" data type qualifier for Macs [FFI]
108 BCTI_UNSIGNED, // "unsigned" data type qualifier [FFI]
109 BCTI_DECIMAL, // "decimal" data type qualifier [FFI]
110 BCTI_NUMBER, // "number" data type [FFI]
111 BCTI_BIG, // "big" data type qualifier [FFI]
112 BCTI_OPERATOR_NOT, // binary "is not" operator or unary "not".
113 BCTI_OPERATOR_IS_NOT, // binary single-character "is not" operator ("=/="), and token type returned for "is not" by ParseBinaryOperator.
114 BCTI_CONTAINER, // "container" qualifier for pass-by-reference of parameters.
115 BCTI_AS, // "as" qualifier indicating name for associative array item.
116 BCTI_REPEAT, // "repeat" identifier used in loops.
117 BCTI_WHILE, // "while" qualifier used in repeat loops.
118 BCTI_TRUE, // "true" constant (1).
119 BCTI_FALSE, // "false" constant (0).
120 BCTI_RETURN, // "return" constant (ASCII character 13) or "return" command.
121 BCTI_LINEFEED, // "linefeed" constant (ASCII character 10).
122 BCTI_NULL, // "null" constant (ASCII character 0).
123 BCTI_TAB, // "tab" constant (ASCII character 9).
124 BCTI_OPERATOR_GREATER_THAN, // ">" operator
125 BCTI_OPERATOR_LESS_THAN, // "<" operator
126 };
127
128
129 // Character constants (may be platform-specific):
130
131 // Unicode 0x0226:
132 #define BCC_IS_NOT_SIGN_STR "­"
133 #define BCC_IS_NOT_SIGN_CHAR '­'
134
135 // Possible return values for ParseValue:
136 enum
137 {
138 BCV_NONE, // Nothing eligible for a value found.
139 BCV_STRING, // String constant parsed.
140 BCV_INTEGER, // Integer constant parsed.
141 BCV_DOUBLE, // Decimal number constant parsed.
142 BCV_LIST, // Constant list of values parsed.
143 BCV_OBJECT, // Object descriptor parsed.
144 BCV_DYNAMIC, // Any non-constant value, like a function result or list entry.
145 };
146
147
148 /* This is a data structure which we use as a shorthand notation for a token
149 (a "word"). It may seem silly to keep info about a word of four characters
150 in a 16-byte data structure, but since this one is fixed-size, scanning
151 forward and backward is a lot faster, and comparisons usually only involve
152 comparing four bytes instead of an entire string. */
153
154 typedef unsigned int BCTokenType;
155 typedef unsigned int BCTokenID;
156 typedef unsigned int BCOffset;
157
158 struct BCToken
159 {
160 BCTokenType mTokenType; // String, number, identifier?
161 BCTokenID mTokenID; // Number to identify some tokens we recognize right away.
162 BCOffset mStartOffs; // Offset to first char of token in string.
163 BCOffset mEndOffs; // Offset to last char of token in string.
164 };
165
166
167 // Masks for our tokenizer's state machine:
168 enum BCTokenState
169 {
170 BC_TOKEN_STATE_WHITESPACE = 0,
171 BC_TOKEN_STATE_IDENTIFIER = (1 << 0),
172 BC_TOKEN_STATE_STRING = (1 << 1),
173 BC_TOKEN_STATE_ESCAPE_SEQ = (1 << 2), // Only when BC_TOKEN_STATE_STRING is set.
174 BC_TOKEN_STATE_INTEGER = (1 << 3), // Check this *and* BC_TOKEN_STATE_NUMBER. If NUMBER is set and this isn't, it means we already found a decimal point.
175 BC_TOKEN_STATE_NUMBER = (1 << 4),
176 BC_TOKEN_STATE_COMMENT = (1 << 5),
177 BC_TOKEN_STATE_BLOCK_COMMENT = (1 << 6),
178 };
179
180
181 /* -----------------------------------------------------------------------------
182 Data Types:
183 -------------------------------------------------------------------------- */
184
185 class BCSysFcnEntry
186 {
187 public:
188 std::string mFunctionSignature; // Function signature (param and return value types).
189 std::string mFunctionNameLib; // Name of function and library.
190
191 public:
192 BCSysFcnEntry() {};
193 BCSysFcnEntry( const BCSysFcnEntry* sfe ) { mFunctionSignature.assign( sfe->mFunctionSignature ); mFunctionNameLib.assign( sfe->mFunctionNameLib ); };
194 BCSysFcnEntry( const BCSysFcnEntry& sfe ) { mFunctionSignature.assign( sfe.mFunctionSignature ); mFunctionNameLib.assign( sfe.mFunctionNameLib ); };
195 BCSysFcnEntry( const std::string& sig, const std::string& nm ) : mFunctionSignature(sig), mFunctionNameLib(nm) {};
196 BCSysFcnEntry( const char* sig, const char* nm ) : mFunctionSignature(sig), mFunctionNameLib(nm) {};
197
198 BCSysFcnEntry& operator =( const BCSysFcnEntry& e ) { mFunctionSignature.assign( e.mFunctionSignature ); mFunctionNameLib.assign( e.mFunctionNameLib ); return *this; };
199 //const BCSysFcnEntry& operator =( const BCSysFcnEntry& e ) { mFunctionSignature.assign( e.mFunctionSignature ); mFunctionNameLib.assign( e.mFunctionNameLib ); return *this; };
200 };
201
202 class BCClassEntry
203 {
204 public:
205 std::string mClassName; // Name of this class.
206 BCClassEntry* mSuperclass; // Superclass of this class, NULL if none.
207 unsigned int mSuperclassCount; // Number of classes we inherit stuff from.
208 std::map<std::string,unsigned int> mInstanceVars; // Variable name --> slot offset mappings.
209
210 public:
211 BCClassEntry() {};
212 BCClassEntry( std::string nm, BCClassEntry* su ) : mClassName(nm), mSuperclass(su) { CalcSuperclassCount(); };
213 BCClassEntry( const BCClassEntry* tm ) { mClassName.assign( tm->mClassName ); mSuperclass = tm->mSuperclass; mSuperclassCount = tm->mSuperclassCount; mInstanceVars = tm->mInstanceVars; };
214 BCClassEntry( const BCClassEntry& tm ) { mClassName.assign( tm.mClassName ); mSuperclass = tm.mSuperclass; mSuperclassCount = tm.mSuperclassCount; mInstanceVars = tm.mInstanceVars; };
215
216 protected:
217 void CalcSuperclassCount() { if( !mSuperclass ) { mSuperclassCount = 0; return; } mSuperclassCount = mSuperclass->mSuperclassCount +1; };
218 };
219
220 typedef std::deque<BCToken> BCTokenList; // A list of BCToken objects.
221 typedef std::deque<BCInstruction> BCInstructionList; // A list of BCInstruction structs.
222 typedef std::map<std::string,BCTokenID> BCIdentifierIDMap; // Mapping from identifier names to their token IDs. Used by the tokenizer.
223 typedef std::map<BCTokenID,BCUInt32> BCOperatorInstrMap; // Mapping from a certain operator's token ID to the associated instruction ID. Note that some operators may actually consist of several tokens, in which case the token ID is a "virtual" ID that represents *both* tokens or an equivalent token's ID.
224 typedef std::map<BCUInt32,int> BCOpPrecedenceMap; // Mapping from a particular operator's instruction ID to an operator precedence value, to decide which operation in an expression will be executed first.
225 typedef std::map<std::string,BCSysFcnEntry> BCSystemFunctionMap; // Mapping from system function name to its signature.
226 typedef std::map<std::string,BCClassEntry> BCClassMap; // Mapping from class names to type information.
227 typedef std::map<std::string,int> BCVarNameToIndexMap; // Mapping from local var to its stack index.
228 typedef std::map<std::string,BCUInt32> BCConstantStringsMap; // Look-up table that allows re-using string constant instructions. AddStringInstruction() adds all strings it creates to this, and whenever a string is requested it returns an existing entry from this table, where possible.
229 typedef std::map<BCTokenID,BCUInt32> BCIntegerConstMap; // Register all integral constants with this.
230 typedef std::map<BCTokenID,std::string> BCStringConstMap; // Register all built-in string or character constants with this.
231
232
233 /* -----------------------------------------------------------------------------
234 Exceptions:
235 These are a couple of exception objects that are thrown when an error
236 occurs.
237 -------------------------------------------------------------------------- */
238
239 // Couldn't create a file:
240 class BCCreateFileError : public std::runtime_error
241 {
242 public:
243 BCCreateFileError( const std::string name ) : std::runtime_error(name) {};
244
245 virtual const char* what() const throw()
246 {
247 static char vMsg[512];
248
249 snprintf( vMsg, sizeof(vMsg), "Can't create file \"%s\".", std::runtime_error::what() );
250
251 return vMsg;
252 };
253 };
254
255
256 // Couldn't write to a file:
257 class BCWriteFileError : public std::runtime_error
258 {
259 public:
260 BCWriteFileError( const std::string name ) : std::runtime_error(name) {};
261 };
262
263
264 // Syntax error or something like that in the parser:
265 class BCParserError : public std::runtime_error
266 {
267 unsigned int mOffset; // Offset into the text where the error occurred.
268
269 public:
270 BCParserError( const std::string name, unsigned int offs = 0 ) : std::runtime_error(name) { mOffset = offs; };
271
272 virtual const char* what() const throw()
273 {
274 static char vMsg[512];
275
276 snprintf( vMsg, sizeof(vMsg), "%s (offset %u).", std::runtime_error::what(), mOffset );
277
278 return vMsg;
279 };
280 };
281
282
283 /* -----------------------------------------------------------------------------
284 BCParser:
285 The parser class. This is an object which can be used to turn text into
286 instructions by tokenizing and parsing the text. There are also some
287 methods that allow adding certain instructions to the code (e.g. if you
288 just want to evaluate an expression and print its result to the
289 console), and some methods that allow saving code to disk.
290 -------------------------------------------------------------------------- */
291
292 class BCParser
293 {
294 protected:
295 BCTokenList mTokens; // List of tokens (after Tokenize() has been called).
296 BCInstructionList mCode; // Code (after Parse() has been called).
297 char* mText; // Text being tokenized/parsed (only until parsing is complete)
298 bool mCompileForDebug; // Generate instructions that aid in source-level debugging?
299 std::string mCurrentFilename; // Name of current source file.
300 BCConstantStringsMap mConstantStrings; // String constant -> String instruction index mappings used by AddStringInstruction to allow re-use.
301 static BCIdentifierIDMap sIdentifiers; // List of identifiers (after construction).
302 static BCOperatorInstrMap sOperatorInstrs; // Mappings from operator token ID to instruction type.
303 static BCOpPrecedenceMap sOpPrecedences; // Mappings from operator instruction type to operator precedence (higher means gets the left argument).
304 static BCSystemFunctionMap sSystemFunctions; // Mappings from system function name to signature.
305 static BCClassMap sClasses; // Information about the different classes we've seen so far.
306 static BCUInt32 sObjectIDSeed; // Number for a new object's ID. If you use it, add one to it so the next user gets a fresh one.
307 static BCIntegerConstMap sIntegralConstants; // Mappings from token type for a constant to its integer value.
308 static BCStringConstMap sStringConstants; // Mappings from token type for a constant to its integral value.
309
310 public:
311 BCParser();
312
313 void SetCurrentFilename( const std::string& s) { mCurrentFilename.assign( s ); }; // File path where the debugger should look for this source file.
314
315 void Tokenize( const char* inText, size_t len ); // Call this first, and hand it your text to be parsed. The text needn't be zero-terminated, and length shouldn't include any terminating zero bytes if you add them. You mustn't dispose of the text until it's been parsed.
316 void ClearTokens() { mTokens.clear(); };
317
318 void Parse(); // Call this or ParseMethodBody second.
319 void ParseMethodBody( BCTokenList::iterator& itty, BCUInt32* numParams, BCVarNameToIndexMap& vNameToIndex, BCToken* stopTokens = NULL );
320 void ParseMethodBody( BCUInt32* numParams, BCVarNameToIndexMap& vNameToIndex );
321 void ParseHeaderFile();
322
323 void AddEndInstruction(); // Mark end of code block.
324 void AddPrintResultInstructions(); // Adds a "print" instruction that outputs the variable "the result". Useful if you want to generate code using ParseMethodBody() that outputs the result instead of just leaving it on the stack to rot.
325 void AddPrintInstruction();
326
327 void PrintAllTokens();
328 void PrintCode( std::ostream& outs ); // Prints the code in human-readable form to the specified stream.
329 void SaveToFile( const char* fpath ); // Saves raw code (without instances).
330
331 void SetCompileForDebug( bool n ) { mCompileForDebug = n; };
332
333 static void InitLookupTables(); // Called by the first BCParser's constructor automatically.
334
335 protected:
336 void EndToken( std::string& ioCurrTokenStr, BCToken* ioCurrToken, unsigned int inNewStart );
337 void ParseObject( BCTokenList::iterator& itty );
338 void ParseExpression( BCTokenList::iterator& itty, BCUInt32* numParams, BCVarNameToIndexMap& vNameToIndex );
339 unsigned int ParseBinaryOperator( BCTokenList::iterator& itty );
340 unsigned int ParseValue( BCTokenList::iterator& itty, BCUInt32* numParams, BCVarNameToIndexMap& vNameToIndex, bool needContainer = false );
341 bool ParseMethod( BCTokenList::iterator& itty );
342 BCUInt32* AddPushLocalVarsInstruction( int numVars );
343 BCUInt32 AddStringInstruction( std::string& str, bool addPushInstr = true );
344 void AddDebugInstructions( BCTokenList::iterator& itty );
345 void AppendInstruction( BCInstruction& instr );
346 char TokensToTypeChar( BCTokenList::iterator& itty );
347 void TokenizeOneWhitespaceChar( size_t x, std::string& vCurrTokenStr, unsigned *ioTokenState,
348 BCToken* vCurrToken );
349 bool IsOperatorChar( char c );
350 void TokenizeOneNewlineChar( size_t x, std::string& vCurrTokenStr,
351 unsigned *ioTokenState,
352 BCToken* vCurrToken );
353 void TokenizeOneOperatorChar( size_t x, std::string& vCurrTokenStr,
354 unsigned *ioTokenState,
355 BCToken* vCurrToken );
356 void TokenizeOneIdentifierChar( size_t x, std::string& vCurrTokenStr,
357 unsigned *ioTokenState,
358 BCToken* vCurrToken );
359 void TokenizeOneNumberChar( size_t x, std::string& vCurrTokenStr,
360 unsigned *ioTokenState,
361 BCToken* vCurrToken );
362 void TokenizeOneCommentChar( size_t x, std::string& vCurrTokenStr,
363 unsigned *ioTokenState,
364 BCToken* vCurrToken );
365 void TokenizeOneBlockCommentChar( size_t x, std::string& vCurrTokenStr,
366 unsigned *ioTokenState,
367 BCToken* vCurrToken );
368 void AppendEscapedCharsFor( char escapeSequence, std::string& vCurrTokenStr );
369 void TokenizeOneStringChar( size_t x, std::string& vCurrTokenStr, unsigned *ioTokenState,
370 BCToken* vCurrToken );
371 };
Something went wrong with that request. Please try again.