From: Preston Pan Date: Sat, 12 Apr 2025 00:15:04 +0000 (-0700) Subject: transfer to toughbook X-Git-Url: https://ret2pop.net/gitweb/monorepo.git?a=commitdiff_plain;ds=sidebyside;p=stepone.git transfer to toughbook --- diff --git a/examples/main.sp b/examples/main.sp index 0b472da..86f18fb 100644 --- a/examples/main.sp +++ b/examples/main.sp @@ -15,6 +15,8 @@ type HigherHigherOrder = GetType; ;; Compose<[Maybe, dp, p], string> => p>> +hello: (Array) -> Array, 10>; + main: (int, p) -> int; ;; or an alternate definition @@ -32,11 +34,11 @@ XOR RBX, RBX PUSH RBX '''; -hello = (x) { +hello = fn (x) { return x + 1 } -main = (argc, argv) { +main = fn (argc, argv) { x: int; y: int -> int; diff --git a/include/ht.h b/include/ht.h index 3c0167a..992c3eb 100644 --- a/include/ht.h +++ b/include/ht.h @@ -1,5 +1,6 @@ #ifndef HT_H #define HT_H + #include #include "bstring.h" #include "array.h" @@ -17,10 +18,14 @@ typedef struct { pair_t *init_pair(string_t *key, void *value); -void ht_add(ht_t *ht, string_t *key, void *value); +ht_t *init_ht(size_t size); + +void *ht_add(ht_t *ht, string_t *key, void *value); void *ht_pop(ht_t *ht, string_t *key); +void *ht_get(ht_t *ht, string_t *key); + bool ht_exists(ht_t *ht, string_t *key); void ht_free(ht_t *ht, void (*freefunc)(void *)); diff --git a/include/lexer.h b/include/lexer.h index 341f765..f9a1a67 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -6,7 +6,7 @@ typedef struct { unsigned int row; unsigned int col; /* Filename */ - string_t *f; + char *f; /* value */ string_t *v; @@ -27,6 +27,7 @@ typedef struct { TT_STR, TT_INT, TT_FLOAT, + TT_DOT, /* Multiline string */ TT_MSTR, TT_CHAR, @@ -40,9 +41,16 @@ typedef struct { TT_MOD, TT_TIMES, TT_DIVIDE, + TT_NOT, TT_AND, TT_OR, + TT_SHR, + TT_SHL, + TT_BAND, + TT_BOR, + TT_BNOT, + TT_ARROW, } /* type */ @@ -65,6 +73,8 @@ enum { token_t *init_token(lexer_t *l, string_t *v); +void token_free(void *t); + lexer_t *init_lexer(char *source); void lexer_move(lexer_t *l); diff --git a/include/parser.h b/include/parser.h index 5a9c17c..441e1ee 100644 --- a/include/parser.h +++ b/include/parser.h @@ -2,22 +2,17 @@ #define PARSER_H #include "bstring.h" #include "lexer.h" +#include "array.h" -typedef struct { +typedef struct AST_STRUCT_ { enum { - AST_ROOT, AST_TYPE, + AST_TYPE_FUNCALL, AST_TYPE_FUNCTION, AST_TYPE_ATOM, AST_TYPE_LIST, - AST_TYPE_DICT, - - AST_KIND, - AST_KIND_ALL, - AST_KIND_ATOM, - AST_KIND_FUNC, - AST_KIND_ARRAY, - AST_KIND_DICT, + AST_TYPE_STRUCT, + AST_TYPEDEC, AST_STRUCT, AST_ENUM, @@ -26,9 +21,30 @@ typedef struct { AST_WHILE, AST_VARDEC, AST_VARDEF, + + AST_LIST, + AST_CHAR, + AST_STR, + AST_INT, + AST_FUNC, + AST_ID, + AST_BINOP, + AST_MONO_OP, + + AST_FUNCALL, + AST_PARAMS, }t; string_t *value; + int priority; + + union { + array_t *subnodes; + struct { + struct AST_STRUCT_ *subnode; + struct AST_STRUCT_ *s2; + }; + }; } ast_t; typedef struct { @@ -38,7 +54,25 @@ typedef struct { ast_t *init_ast(string_t *v); +void parser_move(parser_t *p); + +void parser_consume(parser_t *p, int type); + parser_t *init_parser(char *source); -ast_t *parse_all(parser_t *p); +ast_t *parse_type(parser_t *p); + +ast_t *parse_typedec(parser_t *p); + +ast_t *parse_expr(parser_t *p); + +ast_t *parse_block(parser_t *p); + +ast_t *parse_statement(parser_t *p); + +ast_t *parse_global(parser_t *p); + +array_t *parse_all(parser_t *p); + +void parser_error(parser_t *p); #endif diff --git a/include/validate.h b/include/validate.h index b47c8c6..ac29fb9 100644 --- a/include/validate.h +++ b/include/validate.h @@ -3,6 +3,7 @@ #include "parser.h" #include "ht.h" +#define HT_SIZE 500 typedef struct { ht_t *builtin_table; @@ -11,6 +12,8 @@ typedef struct { array_t *stack; } eval_t; +eval_t *init_eval(); + /* Intersection, union */ ast_t *eval_typeop(eval_t *e, ast_t *a); @@ -23,10 +26,10 @@ void eval_typedec(eval_t *e, ast_t *a); /* eval the type system */ void eval_all(eval_t *e, ast_t *root); +/*! eval error */ +void eval_error(eval_t *e); + /*! analyze things like undefined variables, unreachable code, etc... */ void analyze(ast_t *root); -/*! typechecking the actual program code */ -void validate_all(eval_t *e, ast_t *root); - #endif diff --git a/src/ht.c b/src/ht.c index da06216..e0b0d0d 100644 --- a/src/ht.c +++ b/src/ht.c @@ -1,8 +1,8 @@ #include "../include/ht.h" #include "../include/array.h" #include "../include/misc.h" -#include #include +#include pair_t *init_pair(string_t *key, void *value) { pair_t *pair = safe_calloc(1, sizeof(pair_t)); @@ -18,9 +18,9 @@ ht_t *init_ht(size_t size) { return ht; } -void ht_add(ht_t *ht, string_t *key, void *value) { +void *ht_add(ht_t *ht, string_t *key, void *value) { if (!ht || !key) - return; + return NULL; unsigned long bnum = hash(key->s); array_t *bucket = ht->buckets[bnum]; pair_t *p; @@ -28,6 +28,9 @@ void ht_add(ht_t *ht, string_t *key, void *value) { for (int i = 0; i < bucket->length; i++) { pair_t *exp = bucket->items[i]; if (strcmp(exp->key->s, key->s) == 0) { + void *x = exp->value; + exp->value = value; + return x; } } p = init_pair(key, value); @@ -37,8 +40,47 @@ void ht_add(ht_t *ht, string_t *key, void *value) { p = init_pair(key, value); array_push(ht->buckets[bnum], p); } + return NULL; +} + +void pair_free(void *x, void (*freefunc)(void *)) { + pair_t *p = x; + freefunc(p->value); + string_free(p->key); + free(p); } -unsigned long hash(char *key) { - return 0; +void empty_pair_free(void *x) { + pair_t *p = x; + string_free(p->key); + free(p); +} + +void ht_free(ht_t *ht, void (*freefunc)(void *)) { + pair_t *p; + array_t *a; + for (int i = 0; i < ht->size; i++) { + a = ht->buckets[i]; + for (int j = 0; j < a->length; j++) { + p = a->items[j]; + string_free(p->key); + freefunc(p->value); + free(p); + } + free(a->items); + free(a); + } + free(ht->buckets); + free(ht); +} + +/* DJB2 HASH FUNCTION */ +unsigned long hash(char *str) { + unsigned long hash = 5381; + int c; + + while ((c = *str++)) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return hash; } diff --git a/src/lexer.c b/src/lexer.c index 5453a16..8e0e678 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -6,11 +6,11 @@ #include "../include/lexer.h" #include "../include/misc.h" -#define KEYWORD_LEN 5 +#define KEYWORD_LEN 6 char keywords[][KEYWORD_LEN] = { "if", "while", - "for", "type", "enum" + "for", "type", "enum", "fn" }; token_t *init_token(lexer_t *l, string_t *v) { @@ -21,6 +21,13 @@ token_t *init_token(lexer_t *l, string_t *v) { return tok; } +void token_free(void *t) { + token_t *x = t; + if (x->v) + string_free(x->v); + free(x); +} + token_t *init_token_with_linenum(int row, int col, string_t *v, int type) { token_t *tok = safe_calloc(1, sizeof(token_t)); tok->row = row; @@ -186,6 +193,8 @@ token_t *lexer_collect_next(lexer_t *l) { return lexer_move_with(l, TT_RBRACKET); case ',': return lexer_move_with(l, TT_COMMA); + case '.': + return lexer_move_with(l, TT_DOT); case '+': return lexer_move_with(l, TT_PLUS); case '*': @@ -194,10 +203,14 @@ token_t *lexer_collect_next(lexer_t *l) { return lexer_move_with(l, TT_DIVIDE); case '%': return lexer_move_with(l, TT_MOD); + case '&': return lexer_move_with(l, TT_AND); case '|': return lexer_move_with(l, TT_OR); + case '!': + return lexer_move_with(l, TT_NOT); + case '=': row = l->row; col = l->col; diff --git a/src/parser.c b/src/parser.c index 6bd9a74..a0b1a94 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,4 +1,371 @@ #include #include +#include + #include "../include/parser.h" +#include "../include/misc.h" +#include "../include/lexer.h" + +ast_t *init_ast(string_t *v) { + ast_t *a = safe_calloc(1, sizeof(ast_t)); + a->value = v; + return a; +} + +void parser_move(parser_t *p) { + if (p->t) { + token_free(p->t); + p->t = lexer_collect_next(p->l); + } +} + +void parser_lmove(parser_t *p) { + if (p->t) + p->t = lexer_collect_next(p->l); +} + +void parser_consume(parser_t *p, int type) { + if (!p->t) + parser_error(p); + if (p->t->t != type) + parser_error(p); + + parser_move(p); +} + +ast_t *parse_type_array(parser_t *p) { + ast_t *a = init_ast(NULL); + a->subnodes = init_array(); + parser_consume(p, TT_LBRACKET); + + while (p->t->t != TT_RBRACKET) { + array_push(a->subnodes, parse_type(p)); + + switch (p->t->t) { + case TT_COMMA: + parser_consume(p, TT_COMMA); + break; + case TT_RBRACKET: + break; + default: + parser_error(p); + } + } + + parser_consume(p, TT_RBRACKET); + return a; +} + +ast_t *parse_type_funcall_params(parser_t *p) { + ast_t *a = init_ast(NULL); + a->subnodes = init_array(); + + parser_consume(p, TT_LANGLE); + + while (p->t->t != TT_RANGLE) { + array_push(a->subnodes, parse_type(p)); + switch (p->t->t) { + case TT_COMMA: + parser_consume(p, TT_COMMA); + break; + case TT_RANGLE: + break; + default: + parser_error(p); + } + } + parser_consume(p, TT_RANGLE); + return a; +} + +ast_t *parse_id(parser_t *p) { + string_t *s = p->t->v; + if (p->t->t != TT_ID) + parser_error(p); + parser_lmove(p); + ast_t *a = init_ast(s); + a->t = AST_ID; + return a; +} + +ast_t *parse_type_struct(parser_t *p) { + parser_consume(p, TT_LBRACE); + ast_t *a = init_ast(NULL); + a->t = AST_TYPE_STRUCT; + a->subnodes = init_array(); + ast_t *pair; + while (p->t->t != TT_RBRACE) { + pair = init_ast(NULL); + pair->subnode = parse_id(p); + parser_consume(p, TT_COLON); + pair->s2 = parse_type(p); + array_push(a->subnodes, pair); + switch (p->t->t) { + case TT_COMMA: + parser_consume(p, TT_COMMA); + break; + case TT_RBRACE: + break; + default: + parser_error(p); + } + } + parser_consume(p, TT_RBRACE); + return a; +} + +ast_t *parse_type(parser_t *p) { + switch (p->t->t) { + case TT_LBRACKET: + return parse_type_array(p); + case TT_LBRACE: + return parse_type_struct(p); + default: + break; + } +} + +ast_t *parse_typedec(parser_t *p) { + ast_t *retval = init_ast(p->t->v); + retval->t = AST_TYPEDEC; + parser_lmove(p); + parser_consume(p, TT_ASSIGN); + retval->subnode = parse_type(p); + parser_consume(p, TT_SEMI); + return retval; +} + +ast_t *parse_list(parser_t *p) { + ast_t *a = init_ast(NULL); + a->subnodes = init_array(); + parser_consume(p, TT_LBRACKET); + while (p->t->t != TT_RBRACKET) { + array_push(a->subnodes, parse_expr(p)); + switch (p->t->t) { + case TT_COMMA: + parser_consume(p, TT_COMMA); + break; + case TT_RBRACKET: + break; + default: + parser_error(p); + } + } + parser_consume(p, TT_RBRACKET); + return a; +} + +static ast_t *parse_function(parser_t *p) { + ast_t *retval = init_ast(NULL); + retval->t = AST_FUNC; + ast_t *params = init_ast(NULL); + retval->t = AST_PARAMS; + ast_t *cur_param; + string_t *cur; + + parser_consume(p, TT_LPAREN); + while (p->t->t != TT_RPAREN) { + if (p->t->t != TT_ID) + parser_error(p); + + cur = p->t->v; + cur_param = init_ast(cur); + cur_param->t = AST_ID; + parser_lmove(p); + + switch (p->t->t) { + case TT_COMMA: + parser_consume(p, TT_COMMA); + break; + case TT_RPAREN: + break; + default: + parser_error(p); + } + } + + parser_consume(p, TT_RPAREN); + retval->subnode = params; + retval->s2 = parse_block(p); + return retval; +} + +static ast_t *parse_binop(parser_t *p) { + if (p->t->t == TT_LPAREN) { + } else { + } + return NULL; +} + +static ast_t *parse_mono_op(parser_t *p) { + string_t *val = p->t->v; + ast_t *mono_op; + if (!(strcmp(val->s, "!") == 0 || strcmp(val->s, "~") == 0)) { + parser_error(p); + return NULL; + } + parser_lmove(p); + mono_op = init_ast(val); + mono_op->t = AST_MONO_OP; + mono_op->subnode = parse_expr(p); + return mono_op; +} + +ast_t *parse_char(parser_t *p) { + ast_t *c = init_ast(p->t->v); + parser_lmove(p); + c->t = AST_CHAR; + return c; +} + +ast_t *parse_string(parser_t *p) { + ast_t *c = init_ast(p->t->v); + parser_lmove(p); + c->t = AST_STR; + return c; +} + +ast_t *parse_funcall_params(parser_t *p) { + ast_t *funcall_params = init_ast(NULL); + funcall_params->t = AST_PARAMS; + funcall_params->subnodes = init_array(); + parser_consume(p, TT_LPAREN); + while (p->t->t != TT_RPAREN) { + array_push(funcall_params->subnodes, parse_expr(p)); + switch (p->t->t) { + case TT_COMMA: + parser_consume(p, TT_COMMA); + break; + case TT_RPAREN: + break; + default: + parser_error(p); + } + } + return funcall_params; +} + +ast_t *parse_var_or_funcall(parser_t *p) { + ast_t *var_or_funcall = init_ast(p->t->v); + ast_t *r1; + parser_lmove(p); + switch (p->t->t) { + case TT_LPAREN: + r1 = parse_funcall_params(p); + var_or_funcall->subnode = r1; + return var_or_funcall; + case TT_PLUS: + case TT_MINUS: + case TT_TIMES: + case TT_DIVIDE: + case TT_MOD: + + case TT_SHR: + case TT_SHL: + case TT_BAND: + case TT_BOR: + + case TT_COLON: + r1 = parse_type(p); + var_or_funcall->t = AST_VARDEC; + var_or_funcall->subnodes = init_array(); + array_push(var_or_funcall->subnodes, var_or_funcall); + array_push(var_or_funcall->subnodes, r1); + return var_or_funcall; + default: + parser_error(p); + return NULL; + } +} + +ast_t *parse_expr(parser_t *p) { + ast_t *a; + if (strcmp(p->t->v->s, "fn") == 0) { + parser_consume(p, TT_KEYWORD); + return parse_function(p); + } + switch (p->t->t) { + case TT_LPAREN: + case TT_INT: + return parse_binop(p); + case TT_NOT: + case TT_BNOT: + return parse_mono_op(p); + case TT_STR: + return parse_string(p); + case TT_CHAR: + return parse_char(p); + case TT_ID: + return parse_var_or_funcall(p); + default: + parser_error(p); + return NULL; + } +} + +ast_t *parse_statement(parser_t *p) { + return NULL; +} + +ast_t *parse_block(parser_t *p) { + ast_t *block = init_ast(NULL); + block->subnodes = init_array(); + block->t = AST_BLOCK; + parser_consume(p, TT_LBRACE); + + while (p->t->t != TT_RBRACE) + array_push(block->subnodes, parse_statement(p)); + + parser_consume(p, TT_RBRACE); + return block; +} + +static ast_t *parse_code(parser_t *p) { + string_t *val = p->t->v; + ast_t *retval = init_ast(val); + parser_lmove(p); + int cmp = p->t->t; + switch (cmp) { + case TT_COLON: + parser_consume(p, TT_COLON); + retval->subnode = parse_type(p); + retval->t = AST_VARDEC; + parser_consume(p, TT_SEMI); + break; + case TT_ASSIGN: + parser_consume(p, TT_ASSIGN); + retval->subnode = parse_expr(p); + retval->t = AST_VARDEF; + break; + default: + parser_error(p); + } + + return retval; +} + +ast_t *parse_global(parser_t *p) { + char *cmp = p->t->v->s; + if (!cmp) + parser_error(p); + + if (strcmp(cmp, "type") == 0) { + parser_move(p); + return parse_typedec(p); + } + else + return parse_code(p); +} + +array_t *parse_all(parser_t *p) { + array_t *asts = init_array(); + while (p->t) + array_push(asts, parse_global(p)); + return asts; +} + +void parser_error(parser_t *p) { + printf("PARSER ERROR!!!!\n"); + exit(1); +} diff --git a/src/validate.c b/src/validate.c new file mode 100644 index 0000000..2adf473 --- /dev/null +++ b/src/validate.c @@ -0,0 +1,24 @@ +#include "../include/validate.h" +#include "../include/misc.h" +#include "../include/ht.h" +#include + +eval_t *init_eval() { + eval_t *e = safe_calloc(1, sizeof(eval_t)); + e->stack = init_array(); + e->builtin_table = init_ht(HT_SIZE); + e->type_table = init_ht(HT_SIZE); + return e; +} + +void eval_all(eval_t *e, ast_t *root) { + if (!root || !root->subnodes) + return; + + for (int i = 0; i < root->subnodes->length; i++) { + } +} + +void eval_error(eval_t *e) { + +}