From: Preston Pan Date: Sat, 5 Apr 2025 21:40:54 +0000 (-0700) Subject: first commit X-Git-Url: https://ret2pop.net/gitweb/monorepo.git?a=commitdiff_plain;h=1851e4d6e482b4ea166dda34b50c8dd4bbad50b4;p=stepone.git first commit --- 1851e4d6e482b4ea166dda34b50c8dd4bbad50b4 diff --git a/README.org b/README.org new file mode 100644 index 0000000..e69de29 diff --git a/examples/example.sp b/examples/example.sp new file mode 100644 index 0000000..02f2522 --- /dev/null +++ b/examples/example.sp @@ -0,0 +1,7 @@ +type map = match> +}> + +type MapType = typeof; + diff --git a/examples/main.sp b/examples/main.sp new file mode 100644 index 0000000..0b472da --- /dev/null +++ b/examples/main.sp @@ -0,0 +1,51 @@ +type string = p; +type dp = p>; +type Maybe = X | NULL; + +type ComposeHelper => atom], B: atom> = match(A) { + [] -> B + _ -> ComposeHelper, (car)> +} + +type Compose Type], B: Type> = ComposeHelper, B>; + +type HigherOrder = GetType; + +type HigherHigherOrder = GetType; + +;; Compose<[Maybe, dp, p], string> => p>> + +main: (int, p) -> int; + +;; or an alternate definition +main: (int, dp) -> int; + +hello: int -> int; + +;; This is how you define ASM. You just trust the runtime system +;; to not blow out the stack; the type system is not a guarantee here. +main = (argc, argv) asm ''' +;; ASM STUFF +;; set RBX to 0 +XOR RBX, RBX +;; return 0 +PUSH RBX +'''; + +hello = (x) { + return x + 1 +} + +main = (argc, argv) { + x: int; + y: int -> int; + + x = 4; + y = hello; + return y(x + 2); +} + +type Human = { + age: int, + name: string, +} \ No newline at end of file diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..89a0fb8 --- /dev/null +++ b/flake.nix @@ -0,0 +1,3 @@ +{ + description = ""; +} diff --git a/include/array.h b/include/array.h new file mode 100644 index 0000000..9adfadf --- /dev/null +++ b/include/array.h @@ -0,0 +1,18 @@ +#ifndef ARRAY_H +#define ARRAY_H +#include + +typedef struct { + void ** items; + int length; + size_t size; +} array_t; + +array_t *init_array(); + +void array_push(array_t *a, void *x); + +void *array_pop(array_t *a); + +void array_free(void *a, void (*freefunc)(void *)); +#endif diff --git a/include/bstring.h b/include/bstring.h new file mode 100644 index 0000000..8ded820 --- /dev/null +++ b/include/bstring.h @@ -0,0 +1,20 @@ +#ifndef BSTRING_H +#define BSTRING_H +#include + +typedef struct { + char *s; + unsigned int l; + size_t size; +} string_t; + +string_t *init_string(char *s); + +char string_pop(string_t *s); + +void string_push(string_t *s, char c); + +void string_concat(string_t *s1, const char *s2); + +void string_free(void *s); +#endif diff --git a/include/ht.h b/include/ht.h new file mode 100644 index 0000000..3c0167a --- /dev/null +++ b/include/ht.h @@ -0,0 +1,30 @@ +#ifndef HT_H +#define HT_H +#include +#include "bstring.h" +#include "array.h" + +/* A hash table is an array of arrays. */ +typedef struct { + string_t *key; + void *value; +} pair_t; + +typedef struct { + array_t **buckets; + size_t size; +} ht_t; + +pair_t *init_pair(string_t *key, void *value); + +void ht_add(ht_t *ht, string_t *key, void *value); + +void *ht_pop(ht_t *ht, string_t *key); + +bool ht_exists(ht_t *ht, string_t *key); + +void ht_free(ht_t *ht, void (*freefunc)(void *)); + +unsigned long hash(char *key); + +#endif diff --git a/include/lexer.h b/include/lexer.h new file mode 100644 index 0000000..341f765 --- /dev/null +++ b/include/lexer.h @@ -0,0 +1,77 @@ +#ifndef LEXER_H +#define LEXER_H +#include "bstring.h" + +typedef struct { + unsigned int row; + unsigned int col; + /* Filename */ + string_t *f; + + /* value */ + string_t *v; + enum { + TT_LPAREN, + TT_RPAREN, + TT_LBRACE, + TT_RBRACE, + TT_LBRACKET, + TT_RBRACKET, + TT_LANGLE, + TT_RANGLE, + TT_GTE, + TT_LTE, + TT_EQ, + TT_KEYWORD, + TT_ID, + TT_STR, + TT_INT, + TT_FLOAT, + /* Multiline string */ + TT_MSTR, + TT_CHAR, + TT_COLON, + TT_SEMI, + TT_COMMA, + TT_ASSIGN, + + TT_PLUS, + TT_MINUS, + TT_MOD, + TT_TIMES, + TT_DIVIDE, + TT_AND, + TT_OR, + + TT_ARROW, + } + /* type */ + t; +} token_t; + +typedef struct { + char c; + unsigned int i; + char *source; + unsigned int row; + unsigned int col; +} lexer_t; + +enum { + LERR_UNRECOGNIZED_CHAR, + LERR_NO_CLOSING_STR, + LERR +}; + +token_t *init_token(lexer_t *l, string_t *v); + +lexer_t *init_lexer(char *source); + +void lexer_move(lexer_t *l); + +void lexer_skip_whitespace(lexer_t *l); + +token_t *lexer_collect_next(lexer_t *l); + +void lexer_error(lexer_t *lexer, int err); +#endif diff --git a/include/misc.h b/include/misc.h new file mode 100644 index 0000000..ebf3037 --- /dev/null +++ b/include/misc.h @@ -0,0 +1,9 @@ +#ifndef MISC_H +#define MISC_H +#include + +void die(char *message); + +void *safe_calloc(size_t nmemb, size_t size); + +#endif diff --git a/include/parser.h b/include/parser.h new file mode 100644 index 0000000..5a9c17c --- /dev/null +++ b/include/parser.h @@ -0,0 +1,44 @@ +#ifndef PARSER_H +#define PARSER_H +#include "bstring.h" +#include "lexer.h" + +typedef struct { + enum { + AST_ROOT, + AST_TYPE, + AST_TYPE_FUNCTION, + AST_TYPE_ATOM, + AST_TYPE_LIST, + AST_TYPE_DICT, + + AST_KIND, + AST_KIND_ALL, + AST_KIND_ATOM, + AST_KIND_FUNC, + AST_KIND_ARRAY, + AST_KIND_DICT, + + AST_STRUCT, + AST_ENUM, + AST_BLOCK, + AST_IF_ELSE, + AST_WHILE, + AST_VARDEC, + AST_VARDEF, + }t; + + string_t *value; +} ast_t; + +typedef struct { + lexer_t *l; + token_t *t; +} parser_t; + +ast_t *init_ast(string_t *v); + +parser_t *init_parser(char *source); + +ast_t *parse_all(parser_t *p); +#endif diff --git a/include/validate.h b/include/validate.h new file mode 100644 index 0000000..b47c8c6 --- /dev/null +++ b/include/validate.h @@ -0,0 +1,32 @@ +#ifndef VALIDATE_H +#define VALIDATE_H + +#include "parser.h" +#include "ht.h" + +typedef struct { + ht_t *builtin_table; + ht_t *type_table; + /* stack frame */ + array_t *stack; +} eval_t; + +/* Intersection, union */ +ast_t *eval_typeop(eval_t *e, ast_t *a); + +/* Evaluates type function */ +ast_t *eval_typecall(eval_t *e, ast_t *a); + +/* Type declaration */ +void eval_typedec(eval_t *e, ast_t *a); + +/* eval the type system */ +void eval_all(eval_t *e, ast_t *root); + +/*! analyze things like undefined variables, unreachable code, etc... */ +void analyze(ast_t *root); + +/*! typechecking the actual program code */ +void validate_all(eval_t *e, ast_t *root); + +#endif diff --git a/src/array.c b/src/array.c new file mode 100644 index 0000000..899d6d9 --- /dev/null +++ b/src/array.c @@ -0,0 +1,39 @@ +#include "../include/array.h" +#include "../include/misc.h" + +#include + +array_t *init_array() { + array_t *a = safe_calloc(1, sizeof(array_t)); + a->size = 10; + a->items = safe_calloc(10, sizeof(void *)); + a->length = 0; + return a; +} + +void array_push(array_t *a, void *v) { + if (!a) + return; + + if (a->length + 2 >= a->size) { + a->size *= 2; + a->items = realloc(a->items, a->size); + } + a->items[a->length] = v; + a->length++; +} + +void *array_pop(array_t *a) { + if (!a || a->length <= 0) + return NULL; + a->length--; + return a->items[a->length]; +} + +void array_free(void *x, void (*freefunc)(void *)) { + array_t *a = x; + for (int i = 0; i < a->length; i++) + freefunc(a->items[i]); + free(a->items); + free(a); +} diff --git a/src/bstring.c b/src/bstring.c new file mode 100644 index 0000000..03a7376 --- /dev/null +++ b/src/bstring.c @@ -0,0 +1,48 @@ +#include "../include/bstring.h" +#include "../include/misc.h" + +#include +#include +#define STRING_SIZE 20 + +string_t *init_string(char *source) { + if (!source) { + string_t *s = safe_calloc(1, sizeof(string_t)); + s->l = 10; + s->size = STRING_SIZE; + s->s = safe_calloc(STRING_SIZE, sizeof(char)); + return s; + } + return NULL; +} + +void string_push(string_t *s, char c) { + if (!s) + return; + if (s->l + 2 >= s->size) { + s->size *= 2; + s->s = realloc(s->s, s->size * sizeof(char)); + } + s->s[s->l] = c; + s->l++; +} + +void string_concat(string_t *s1, const char *s2) { + for (int i = 0; i < strlen(s2); i++) + string_push(s1, s2[i]); +} + +char string_pop(string_t *s) { + if (!s || s->l <= 0) + return '\0'; + s->l --; + char c = s->s[s->l]; + s->s[s->l] = '\0'; + return c; +} + +void string_free(void *x) { + string_t *s = x; + free(s->s); + free(s); +} diff --git a/src/ht.c b/src/ht.c new file mode 100644 index 0000000..da06216 --- /dev/null +++ b/src/ht.c @@ -0,0 +1,44 @@ +#include "../include/ht.h" +#include "../include/array.h" +#include "../include/misc.h" +#include +#include + +pair_t *init_pair(string_t *key, void *value) { + pair_t *pair = safe_calloc(1, sizeof(pair_t)); + pair->key = key; + pair->value = value; + return pair; +} + +ht_t *init_ht(size_t size) { + ht_t *ht = safe_calloc(1, sizeof(ht_t)); + ht->size = size; + ht->buckets = safe_calloc(size, sizeof(array_t *)); + return ht; +} + +void ht_add(ht_t *ht, string_t *key, void *value) { + if (!ht || !key) + return; + unsigned long bnum = hash(key->s); + array_t *bucket = ht->buckets[bnum]; + pair_t *p; + if (bucket) { + for (int i = 0; i < bucket->length; i++) { + pair_t *exp = bucket->items[i]; + if (strcmp(exp->key->s, key->s) == 0) { + } + } + p = init_pair(key, value); + array_push(bucket, p); + } else { + ht->buckets[bnum] = init_array(); + p = init_pair(key, value); + array_push(ht->buckets[bnum], p); + } +} + +unsigned long hash(char *key) { + return 0; +} diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..5453a16 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include "../include/lexer.h" +#include "../include/misc.h" + +#define KEYWORD_LEN 5 + +char keywords[][KEYWORD_LEN] = { + "if", "while", + "for", "type", "enum" +}; + +token_t *init_token(lexer_t *l, string_t *v) { + token_t *tok = safe_calloc(1, sizeof(token_t)); + tok->col = l->col; + tok->row = l->row; + tok->v = v; + return tok; +} + +token_t *init_token_with_linenum(int row, int col, string_t *v, int type) { + token_t *tok = safe_calloc(1, sizeof(token_t)); + tok->row = row; + tok->col = col; + tok->v = v; + tok->t = type; + return tok; +} + +lexer_t *init_lexer(char *source) { + if (strlen(source) == 0) + return NULL; + lexer_t *lexer = safe_calloc(1, sizeof(lexer_t)); + lexer->source = source; + lexer->i = 0; + lexer->c = lexer->source[lexer->i]; + lexer->row = 1; + lexer->col = 1; + return lexer; +} + +void lexer_move(lexer_t *l) { + if (l->c != '\0') { + l->i++; + l->c = l->source[l->i]; + if (l->c == '\n') { + l->col = 1; + l->row ++; + } + } +} + +void lexer_skip_whitespace(lexer_t *l) { + while (isspace(l->c) && l->c != '\0') + lexer_move(l); +} + +static token_t *lexer_collect_id(lexer_t *l) { + string_t *v = init_string(NULL); + token_t *t = init_token(l, v); + while (isalnum(l->c)) { + string_push(v, l->c); + lexer_move(l); + } + + for (int i = 0; i < KEYWORD_LEN; i++) { + if (strcmp(v->s, keywords[i]) == 0) { + t->t = TT_KEYWORD; + return t; + } + } + t->t = TT_ID; + return t; +} + +static token_t *lexer_collect_str(lexer_t *l) { + string_t *v = init_string(NULL); + token_t *t = init_token(l, v); + lexer_move(l); + while (l->c != '"' && l->c != '\0') { + string_push(v, l->c); + lexer_move(l); + } + lexer_move(l); + t->t = TT_STR; + return t; +} + +static token_t *lexer_collect_mstr(lexer_t *l) { + string_t *v = init_string(NULL); + token_t *t = init_token(l, v); + lexer_move(l); + while (l->c != '`') { + string_push(v, l->c); + lexer_move(l); + } + lexer_move(l); + t->t = TT_MSTR; + return t; +} + +static token_t *lexer_collect_int(lexer_t *l) { + string_t *v = init_string(NULL); + token_t *t = init_token(l, v); + bool isf = false; + while (isdigit(l->c) || l->c == '.') { + if (!isf && l->c == '.') + isf = true; + else if (l->c == '.') + break; + string_push(v, l->c); + lexer_move(l); + } + if (isf) + t->t = TT_FLOAT; + else + t->t = TT_INT; + return t; +} + +static token_t *lexer_collect_char(lexer_t *l) { + string_t *v = init_string(NULL); + token_t *t = init_token(l, v); + lexer_move(l); + if (l->c == '\\') { + string_push(v, '\\'); + lexer_move(l); + } + + if (l->c == '\0') { + lexer_error(l, LERR_NO_CLOSING_STR); + } + + string_push(v, l->c); + lexer_move(l); + + if (l->c != '\'') { + lexer_error(l, LERR_NO_CLOSING_STR); + } + lexer_move(l); + return t; +} + +static token_t *lexer_move_with(lexer_t *l, int type) { + token_t *t = init_token(l, NULL); + t->t = type; + lexer_move(l); + return t; +} + +token_t *lexer_collect_next(lexer_t *l) { + token_t *t; + int row; + int col; + + lexer_skip_whitespace(l); + + if (isalpha(l->c)) + return lexer_collect_id(l); + else if (isdigit(l->c)) + return lexer_collect_int(l); + + switch (l->c) { + case '\'': + return lexer_collect_char(l); + case '"': + return lexer_collect_str(l); + case ':': + return lexer_move_with(l, TT_COLON); + case ';': + return lexer_move_with(l, TT_SEMI); + case '[': + return lexer_move_with(l, TT_LBRACKET); + case ']': + return lexer_move_with(l, TT_RBRACKET); + case '{': + return lexer_move_with(l, TT_LBRACE); + case '}': + return lexer_move_with(l, TT_RBRACE); + case '(': + return lexer_move_with(l, TT_LBRACKET); + case ')': + return lexer_move_with(l, TT_RBRACKET); + case ',': + return lexer_move_with(l, TT_COMMA); + case '+': + return lexer_move_with(l, TT_PLUS); + case '*': + return lexer_move_with(l, TT_TIMES); + case '/': + return lexer_move_with(l, TT_DIVIDE); + case '%': + return lexer_move_with(l, TT_MOD); + case '&': + return lexer_move_with(l, TT_AND); + case '|': + return lexer_move_with(l, TT_OR); + case '=': + row = l->row; + col = l->col; + + lexer_move(l); + if (l->c == '=') { + lexer_move(l); + return init_token_with_linenum(row, col, NULL, TT_EQ); + } + return init_token_with_linenum(row, col, NULL, TT_ASSIGN); + case '<': + row = l->row; + col = l->col; + + lexer_move(l); + if (l->c == '=') { + lexer_move(l); + return init_token_with_linenum(row, col, NULL, TT_LTE); + } + return init_token_with_linenum(row, col, NULL, TT_LANGLE); + case '>': + row = l->row; + col = l->col; + + lexer_move(l); + if (l->c == '=') { + lexer_move(l); + return init_token_with_linenum(row, col, NULL, TT_GTE); + } + return init_token_with_linenum(row, col, NULL, TT_RANGLE); + case '-': + row = l->row; + col = l->col; + + lexer_move(l); + if (l->c == '>') { + lexer_move(l); + return init_token_with_linenum(row, col, NULL, TT_ARROW); + } + return init_token_with_linenum(row, col, NULL, TT_MINUS); + default: + lexer_error(l, LERR_UNRECOGNIZED_CHAR); + return NULL; + } +} + +void lexer_error(lexer_t *l, int err) { + printf("error lol\n"); +} diff --git a/src/misc.c b/src/misc.c new file mode 100644 index 0000000..c26cee0 --- /dev/null +++ b/src/misc.c @@ -0,0 +1,9 @@ +#include "../include/misc.h" + +#include + +void *safe_calloc(size_t nmemb, size_t size) { + void *x = calloc(nmemb, size); + if (!x) die("die: calloc"); + return x; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..6bd9a74 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,4 @@ +#include +#include + +#include "../include/parser.h"