diff --git a/src/lex.c b/src/lex.c index cca87ab..82ef786 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,5 +1,6 @@ #include #include +#include #include "lex.h" #include "input.h" @@ -8,72 +9,143 @@ #include "sym.h" #include "ast.h" -bool look_done = false; -struct Token look_tok; +#define MAX_IDENTIFIER_LENGTH 256 -struct Token lex() -{ - int ch; - struct Token tok; +typedef enum { + TOKEN_END_OF_PROGRAM, + TOKEN_OPERATOR_ADD, + TOKEN_OPERATOR_SUB, + TOKEN_OPERATOR_MUL, + TOKEN_OPERATOR_DIV, + TOKEN_LEFT_PARENTHESIS, + TOKEN_RIGHT_PARENTHESIS, + TOKEN_ASSIGNMENT, + TOKEN_SEMICOLON, + TOKEN_IDENTIFIER, + TOKEN_NUMBER, + TOKEN_ERROR +} TokenType; - // If lookaheaded - if (look_done) { - look_done = false; +typedef struct { + TokenType type; + void* attribute; + int line; + int column; + char* error_message; +} Token; - return look_tok; +typedef struct { + char* buffer; + int position; + int length; + int line; + int column; +} InputBuffer; + +InputBuffer input_buffer; + +void init_input_buffer() { + input_buffer.buffer = NULL; + input_buffer.position = 0; + input_buffer.length = 0; + input_buffer.line = 1; + input_buffer.column = 0; +} + +void free_input_buffer() { + if (input_buffer.buffer != NULL) { + free(input_buffer.buffer); + input_buffer.buffer = NULL; + } +} + +bool load_input_buffer(const char* filename) { + free_input_buffer(); + + FILE* file = fopen(filename, "rb"); + if (file == NULL) { + return false; + } + + fseek(file, 0, SEEK_END); + input_buffer.length = ftell(file); + fseek(file, 0, SEEK_SET); + + input_buffer.buffer = (char*)malloc(input_buffer.length + 1); + if (input_buffer.buffer == NULL) { + fclose(file); + return false; + } + + input_buffer.position = 0; + input_buffer.buffer[input_buffer.length] = '\0'; + + size_t result = fread(input_buffer.buffer, 1, input_buffer.length, file); + fclose(file); + + if (result != input_buffer.length) { + free_input_buffer(); + return false; + } + + return true; +} + +int read_char() { + if (input_buffer.position >= input_buffer.length) { + return EOF; + } + + char c = input_buffer.buffer[input_buffer.position++]; + input_buffer.column++; + + if (c == '\n') { + input_buffer.line++; + input_buffer.column = 0; + } + + return (int)c; +} + +void unread_char(int c) { + if (c == EOF) { + return; } - eat: - switch (ch = fgetc(get_file())) { - case ' ': case '\n': goto eat; - case EOF: tok.type = EOP; break; - case '+': tok.type = OP1; tok.attr = ADD_TYPE; break; - case '-': tok.type = OP1; tok.attr = SUB_TYPE; break; - case '*': tok.type = OP2; tok.attr = MUL_TYPE; break; - case '/': tok.type = OP2; tok.attr = DIV_TYPE; break; - case '(': tok.type = LBR; break; - case ')': tok.type = RBR; break; - case '=': tok.type = EQ; break; - case ';': tok.type = SEM; break; - default: - // ID - if (isalpha(ch)) { - char *id_name = safe_malloc(MAX_LEN); - int len = 0; - id_name[len++] = ch; - do { - if (MAX_LEN == len) - fatal_error("Lexer: Variable name is too long"); - id_name[len++] = (ch = fgetc(get_file())); - } while (isalpha(ch) || isdigit(ch)); - id_name[len - 1] = '\0'; - - tok.type = ID; - tok.attr = add_sym(id_name); - - // NUM - } else if (isdigit(ch)) { - int val = ch - '0'; - while (isdigit(ch = fgetc(get_file()))) - val = val * 10 + (ch - '0'); - - tok.type = NUM; - tok.attr = val; - - // Error - } else { - fatal_error("Lexer: Unexpected symbol"); - } - - ungetc(ch, get_file()); + input_buffer.position--; + input_buffer.column--; + + if (c == '\n') { + input_buffer.line--; + int i = input_buffer.position - 1; + while (i >= 0 && input_buffer.buffer[i] != '\n') { + i--; } - return tok; + input_buffer.column = input_buffer.position - i - 1; + } +} + +bool is_identifier_start(int c) { + return isalpha(c) || c == '_'; } -struct Token lookahead() -{ - look_tok = lex(); - look_done = true; +bool is_identifier_body(int c) { + return isalnum(c) || c == '_'; +} + +Token lex_identifier() { + char* buffer = (char*)malloc(MAX_IDENTIFIER_LENGTH + 1); + int i = 0; + + int c = read_char(); + while (is_identifier_body(c) && i < MAX_IDENTIFIER_LENGTH) { + buffer[i++] = (char)c; + c = read_char(); + } + unread_char(c); + buffer[i] = '\0'; - return look_tok; -} \ No newline at end of file + Token token; + token.type = TOKEN_IDENTIFIER; + token.attribute = add_sym(buffer); + token.line = input diff --git a/src/sym.c b/src/sym.c index 9fcbaf5..2ee60b5 100644 --- a/src/sym.c +++ b/src/sym.c @@ -5,36 +5,45 @@ #include "func.h" struct Table* symbol_table[MAX_SYMBOL_TABLE_SIZE]; -int table_size = 0; +size_t table_size = 0; -struct Table *get_sym(int id) +struct Table *get_sym(size_t id) { return symbol_table[id]; } -int add_sym(char *name) +size_t add_sym(const char *name) { - int i; - for (i = 0; i < table_size; i++) { + for (size_t i = 0; i < table_size; i++) { if (strcmp(symbol_table[i]->name, name) == 0) return symbol_table[i]->id; } struct Table *item = safe_malloc(sizeof(struct Table)); + if (item == NULL) { + fprintf(stderr, "Error: Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + item->id = table_size; - item->name = name; + item->name = safe_malloc(strlen(name) + 1); + if (item->name == NULL) { + fprintf(stderr, "Error: Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + strcpy(item->name, name); symbol_table[table_size] = item; return table_size++; } -void set_sym(int id, int val) +void set_sym(const size_t id, const int val) { symbol_table[id]->val = val; } -int get_table_size() +size_t get_table_size() { return table_size; -} \ No newline at end of file +} diff --git a/src/sym.h b/src/sym.h index 77a85b1..da85092 100644 --- a/src/sym.h +++ b/src/sym.h @@ -1,20 +1,30 @@ -#ifndef SYM_TABLE -#define SYM_TABLE +#ifndef SYM_H +#define SYM_H -#define MAX_SYMBOL_TABLE_SIZE 100 +#include "defs.h" -#include "libtinycompiler_export.h" +#define MAX_SYMBOL_TABLE_SIZE 1024 -struct Table -{ - int id; - int val; - char *name; +struct Table { +int id; +char *name; +int val; }; -LIBTINYCOMPILER_EXPORT struct Table *get_sym(int id); -LIBTINYCOMPILER_EXPORT int add_sym(char *name); -LIBTINYCOMPILER_EXPORT void set_sym(int id, int val); -LIBTINYCOMPILER_EXPORT int get_table_size(); +// Returns the symbol table entry with the given id. +// If the id is out of range, behavior is undefined. +struct Table *get_sym(int id); + +// Adds a symbol to the symbol table and returns its id. +// If the symbol already exists in the table, returns its existing id. +// If the symbol table is full, behavior is undefined. +int add_sym(char *name); + +// Sets the value of the symbol with the given id. +// If the id is out of range, behavior is undefined. +void set_sym(int id, int val); + +// Returns the number of symbols in the symbol table. +int get_table_size(); #endif \ No newline at end of file diff --git a/src/vm.c b/src/vm.c index 85429cf..bc0fcae 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,5 +1,4 @@ #include - #include "vm.h" #include "stack.h" #include "sym.h" @@ -12,28 +11,52 @@ byte next_byte() return *cur_byte++; } -void run(byte *code) +void run(const byte *code) { - int arg1, arg2; + if (code == NULL) { + fprintf(stderr, "Error: Byte code is NULL\n"); + return; + } cur_byte = code; - next_op: - switch (next_byte()) { - case PUSH: push(next_byte()); goto next_op; - case READ: push(get_sym(next_byte())->val); goto next_op; - case WRITE: set_sym(next_byte(), pop()); goto next_op; - case ADD: POP_BOTH; push(arg1 + arg2); goto next_op; - case SUB: POP_BOTH; push(arg1 - arg2); goto next_op; - case MUL: POP_BOTH; push(arg1 * arg2); goto next_op; - case DIV: POP_BOTH; push(arg1 / arg2); goto next_op; - case RET: { - int i; - for (i = 0; i < get_table_size(); i++) { - printf("%s = %i\n", get_sym(i)->name, get_sym(i)->val); - } - } + while (1) { + byte opcode = next_byte(); - return; + switch (opcode) { + case PUSH: + push(next_byte()); + break; + case READ: + push(get_sym(next_byte())->val); + break; + case WRITE: + set_sym(next_byte(), pop()); + break; + case ADD: + POP_BOTH; + push(arg1 + arg2); + break; + case SUB: + POP_BOTH; + push(arg1 - arg2); + break; + case MUL: + POP_BOTH; + push(arg1 * arg2); + break; + case DIV: + POP_BOTH; + push(arg1 / arg2); + break; + case RET: + for (int i = 0; i < get_table_size(); i++) { + printf("%s = %i\n", get_sym(i)->name, get_sym(i)->val); + } + return; + default: + fprintf(stderr, "Error: Unknown opcode %d\n", opcode); + return; + } } -} \ No newline at end of file +} diff --git a/src/vm.h b/src/vm.h index 65d1a05..d9db131 100644 --- a/src/vm.h +++ b/src/vm.h @@ -4,8 +4,13 @@ #include "libtinycompiler_export.h" #include "defs.h" -#define POP_BOTH arg2 = pop(); arg1 = pop() - -LIBTINYCOMPILER_EXPORT void run(byte *code); +/** + * Runs the bytecode in the specified code buffer. + * + * @param code The byte code to execute. This buffer must remain valid throughout + * the execution of the function and must contain a sequence of bytecode + * instructions. If the buffer is NULL or empty, the function has no effect. + */ +LIBTINYCOMPILER_EXPORT void run(const byte *code); #endif \ No newline at end of file