From: Preston Pan <ret2pop@gmail.com>
Date: Sat, 5 Apr 2025 21:40:54 +0000 (-0700)
Subject: first commit
X-Git-Url: https://ret2pop.net/gitweb/monorepo.git?a=commitdiff_plain;h=1851e4d6e482b4ea166dda34b50c8dd4bbad50b4;p=stepone.git

first commit
---

1851e4d6e482b4ea166dda34b50c8dd4bbad50b4
diff --git a/README.org b/README.org
new file mode 100644
index 0000000..e69de29
diff --git a/examples/example.sp b/examples/example.sp
new file mode 100644
index 0000000..02f2522
--- /dev/null
+++ b/examples/example.sp
@@ -0,0 +1,7 @@
+type map<A: [Type]> = match<A, {
+  []: SomethingElse
+  _: map<cdr<A>>
+}>
+
+type MapType = typeof<map>;
+
diff --git a/examples/main.sp b/examples/main.sp
new file mode 100644
index 0000000..0b472da
--- /dev/null
+++ b/examples/main.sp
@@ -0,0 +1,51 @@
+type string = p<char>;
+type dp<X: *> = p<p<X>>;
+type Maybe<X: *> = X | NULL;
+
+type ComposeHelper<A: [<atom> => atom], B: atom> = match(A) {
+  [] -> B
+  _ -> ComposeHelper<cdr<A>, (car<A>)<B>>
+}
+
+type Compose<A: [Type -> Type], B: Type> = ComposeHelper<Reverse<A>, B>;
+
+type HigherOrder = GetType<Compose>;
+
+type HigherHigherOrder = GetType<HigherOrder>;
+
+;; Compose<[Maybe, dp, p], string> => p<dp<Maybe<string>>>
+
+main: (int, p<string>) -> int;
+
+;; or an alternate definition
+main: (int, dp<char>) -> int;
+
+hello: int -> int;
+
+;; This is how you define ASM. You just trust the runtime system
+;; to not blow out the stack; the type system is not a guarantee here.
+main = (argc, argv) asm '''
+;; ASM STUFF
+;; set RBX to 0
+XOR RBX, RBX
+;; return 0
+PUSH RBX
+''';
+
+hello = (x) {
+  return x + 1
+}
+
+main = (argc, argv) {
+  x: int;
+  y: int -> int;
+
+  x = 4;
+  y = hello;
+  return y(x + 2);
+}
+
+type Human = {
+  age: int,
+  name: string,
+}
\ No newline at end of file
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..89a0fb8
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,3 @@
+{
+  description = "";
+}
diff --git a/include/array.h b/include/array.h
new file mode 100644
index 0000000..9adfadf
--- /dev/null
+++ b/include/array.h
@@ -0,0 +1,18 @@
+#ifndef ARRAY_H
+#define ARRAY_H
+#include <stdlib.h>
+
+typedef struct {
+  void ** items;
+  int length;
+  size_t size;
+} array_t;
+
+array_t *init_array();
+
+void array_push(array_t *a, void *x);
+
+void *array_pop(array_t *a);
+
+void array_free(void *a, void (*freefunc)(void *));
+#endif
diff --git a/include/bstring.h b/include/bstring.h
new file mode 100644
index 0000000..8ded820
--- /dev/null
+++ b/include/bstring.h
@@ -0,0 +1,20 @@
+#ifndef BSTRING_H
+#define BSTRING_H
+#include <stdlib.h>
+
+typedef struct {
+  char *s;
+  unsigned int l;
+  size_t size;
+} string_t;
+
+string_t *init_string(char *s);
+
+char string_pop(string_t *s);
+
+void string_push(string_t *s, char c);
+
+void string_concat(string_t *s1, const char *s2);
+
+void string_free(void *s);
+#endif
diff --git a/include/ht.h b/include/ht.h
new file mode 100644
index 0000000..3c0167a
--- /dev/null
+++ b/include/ht.h
@@ -0,0 +1,30 @@
+#ifndef HT_H
+#define HT_H
+#include <stdbool.h>
+#include "bstring.h"
+#include "array.h"
+
+/* A hash table is an array of arrays. */
+typedef struct {
+  string_t *key;
+  void *value;
+} pair_t;
+
+typedef struct {
+  array_t **buckets;
+  size_t size;
+} ht_t;
+
+pair_t *init_pair(string_t *key, void *value);
+
+void ht_add(ht_t *ht, string_t *key, void *value);
+
+void *ht_pop(ht_t *ht, string_t *key);
+
+bool ht_exists(ht_t *ht, string_t *key);
+
+void ht_free(ht_t *ht, void (*freefunc)(void *));
+
+unsigned long hash(char *key);
+
+#endif
diff --git a/include/lexer.h b/include/lexer.h
new file mode 100644
index 0000000..341f765
--- /dev/null
+++ b/include/lexer.h
@@ -0,0 +1,77 @@
+#ifndef LEXER_H
+#define LEXER_H
+#include "bstring.h"
+
+typedef struct {
+  unsigned int row;
+  unsigned int col;
+  /* Filename */
+  string_t *f;
+
+  /* value */
+  string_t *v;
+  enum {
+    TT_LPAREN,
+    TT_RPAREN,
+    TT_LBRACE,
+    TT_RBRACE,
+    TT_LBRACKET,
+    TT_RBRACKET,
+    TT_LANGLE,
+    TT_RANGLE,
+    TT_GTE,
+    TT_LTE,
+    TT_EQ,
+    TT_KEYWORD,
+    TT_ID,
+    TT_STR,
+    TT_INT,
+    TT_FLOAT,
+    /* Multiline string */
+    TT_MSTR,
+    TT_CHAR,
+    TT_COLON,
+    TT_SEMI,
+    TT_COMMA,
+    TT_ASSIGN,
+
+    TT_PLUS,
+    TT_MINUS,
+    TT_MOD,
+    TT_TIMES,
+    TT_DIVIDE,
+    TT_AND,
+    TT_OR,
+
+    TT_ARROW,
+  }
+  /* type */
+    t;
+} token_t;
+
+typedef struct {
+  char c;
+  unsigned int i;
+  char *source;
+  unsigned int row;
+  unsigned int col;
+} lexer_t;
+
+enum {
+  LERR_UNRECOGNIZED_CHAR,
+  LERR_NO_CLOSING_STR,
+  LERR
+};
+
+token_t *init_token(lexer_t *l, string_t *v);
+
+lexer_t *init_lexer(char *source);
+
+void lexer_move(lexer_t *l);
+
+void lexer_skip_whitespace(lexer_t *l);
+
+token_t *lexer_collect_next(lexer_t *l);
+
+void lexer_error(lexer_t *lexer, int err);
+#endif
diff --git a/include/misc.h b/include/misc.h
new file mode 100644
index 0000000..ebf3037
--- /dev/null
+++ b/include/misc.h
@@ -0,0 +1,9 @@
+#ifndef MISC_H
+#define MISC_H
+#include <stdlib.h>
+
+void die(char *message);
+
+void *safe_calloc(size_t nmemb, size_t size);
+
+#endif
diff --git a/include/parser.h b/include/parser.h
new file mode 100644
index 0000000..5a9c17c
--- /dev/null
+++ b/include/parser.h
@@ -0,0 +1,44 @@
+#ifndef PARSER_H
+#define PARSER_H
+#include "bstring.h"
+#include "lexer.h"
+
+typedef struct {
+  enum {
+    AST_ROOT,
+    AST_TYPE,
+    AST_TYPE_FUNCTION,
+    AST_TYPE_ATOM,
+    AST_TYPE_LIST,
+    AST_TYPE_DICT,
+
+    AST_KIND,
+    AST_KIND_ALL,
+    AST_KIND_ATOM,
+    AST_KIND_FUNC,
+    AST_KIND_ARRAY,
+    AST_KIND_DICT,
+
+    AST_STRUCT,
+    AST_ENUM,
+    AST_BLOCK,
+    AST_IF_ELSE,
+    AST_WHILE,
+    AST_VARDEC,
+    AST_VARDEF,
+  }t;
+
+  string_t *value;
+} ast_t;
+
+typedef struct {
+  lexer_t *l;
+  token_t *t;
+} parser_t;
+
+ast_t *init_ast(string_t *v);
+
+parser_t *init_parser(char *source);
+
+ast_t *parse_all(parser_t *p);
+#endif
diff --git a/include/validate.h b/include/validate.h
new file mode 100644
index 0000000..b47c8c6
--- /dev/null
+++ b/include/validate.h
@@ -0,0 +1,32 @@
+#ifndef VALIDATE_H
+#define VALIDATE_H
+
+#include "parser.h"
+#include "ht.h"
+
+typedef struct {
+  ht_t *builtin_table;
+  ht_t *type_table;
+  /* stack frame */
+  array_t *stack;
+} eval_t;
+
+/* Intersection, union */
+ast_t *eval_typeop(eval_t *e, ast_t *a);
+
+/* Evaluates type function */
+ast_t *eval_typecall(eval_t *e, ast_t *a);
+
+/* Type declaration */
+void eval_typedec(eval_t *e, ast_t *a);
+
+/* eval the type system */
+void eval_all(eval_t *e, ast_t *root);
+
+/*! analyze things like undefined variables, unreachable code, etc... */
+void analyze(ast_t *root);
+
+/*! typechecking the actual program code */
+void validate_all(eval_t *e, ast_t *root);
+
+#endif
diff --git a/src/array.c b/src/array.c
new file mode 100644
index 0000000..899d6d9
--- /dev/null
+++ b/src/array.c
@@ -0,0 +1,39 @@
+#include "../include/array.h"
+#include "../include/misc.h"
+
+#include <stdlib.h>
+
+array_t *init_array() {
+  array_t *a = safe_calloc(1, sizeof(array_t));
+  a->size = 10;
+  a->items = safe_calloc(10, sizeof(void *));
+  a->length = 0;
+  return a;
+}
+
+void array_push(array_t *a, void *v) {
+  if (!a)
+    return;
+
+  if (a->length + 2 >= a->size) {
+    a->size *= 2;
+    a->items = realloc(a->items, a->size);
+  }
+  a->items[a->length] = v;
+  a->length++;
+}
+
+void *array_pop(array_t *a) {
+  if (!a || a->length <= 0)
+    return NULL;
+  a->length--;
+  return a->items[a->length];
+}
+
+void array_free(void *x, void (*freefunc)(void *)) {
+  array_t *a = x;
+  for (int i = 0; i < a->length; i++)
+    freefunc(a->items[i]);
+  free(a->items);
+  free(a);
+}
diff --git a/src/bstring.c b/src/bstring.c
new file mode 100644
index 0000000..03a7376
--- /dev/null
+++ b/src/bstring.c
@@ -0,0 +1,48 @@
+#include "../include/bstring.h"
+#include "../include/misc.h"
+
+#include <stdlib.h>
+#include <string.h>
+#define STRING_SIZE 20
+
+string_t *init_string(char *source) {
+  if (!source) {
+    string_t *s = safe_calloc(1, sizeof(string_t));
+    s->l = 10;
+    s->size = STRING_SIZE;
+    s->s = safe_calloc(STRING_SIZE, sizeof(char));
+    return s;
+  }
+  return NULL;
+}
+
+void string_push(string_t *s, char c) {
+  if (!s)
+    return;
+  if (s->l + 2 >= s->size) {
+    s->size *= 2;
+    s->s = realloc(s->s, s->size * sizeof(char));
+  }
+  s->s[s->l] = c;
+  s->l++;
+}
+
+void string_concat(string_t *s1, const char *s2) {
+  for (int i = 0; i < strlen(s2); i++)
+    string_push(s1, s2[i]);
+}
+
+char string_pop(string_t *s) {
+  if (!s || s->l <= 0)
+    return '\0';
+  s->l --;
+  char c = s->s[s->l];
+  s->s[s->l] = '\0';
+  return c;
+}
+
+void string_free(void *x) {
+  string_t *s = x;
+  free(s->s);
+  free(s);
+}
diff --git a/src/ht.c b/src/ht.c
new file mode 100644
index 0000000..da06216
--- /dev/null
+++ b/src/ht.c
@@ -0,0 +1,44 @@
+#include "../include/ht.h"
+#include "../include/array.h"
+#include "../include/misc.h"
+#include <string.h>
+#include <stdlib.h>
+
+pair_t *init_pair(string_t *key, void *value) {
+  pair_t *pair = safe_calloc(1, sizeof(pair_t));
+  pair->key = key;
+  pair->value = value;
+  return pair;
+}
+
+ht_t *init_ht(size_t size) {
+  ht_t *ht = safe_calloc(1, sizeof(ht_t));
+  ht->size = size;
+  ht->buckets = safe_calloc(size, sizeof(array_t *));
+  return ht;
+}
+
+void ht_add(ht_t *ht, string_t *key, void *value) {
+  if (!ht || !key)
+    return;
+  unsigned long bnum = hash(key->s);
+  array_t *bucket = ht->buckets[bnum];
+  pair_t *p;
+  if (bucket) {
+    for (int i = 0; i < bucket->length; i++) {
+      pair_t *exp = bucket->items[i];
+      if (strcmp(exp->key->s, key->s) == 0) {
+      }
+    }
+    p = init_pair(key, value);
+    array_push(bucket, p);
+  } else {
+    ht->buckets[bnum] = init_array();
+    p = init_pair(key, value);
+    array_push(ht->buckets[bnum], p);
+  }
+}
+
+unsigned long hash(char *key) {
+  return 0;
+}
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..5453a16
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,249 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include "../include/lexer.h"
+#include "../include/misc.h"
+
+#define KEYWORD_LEN 5
+
+char keywords[][KEYWORD_LEN] = {
+    "if", "while",
+    "for", "type", "enum" 
+};
+
+token_t *init_token(lexer_t *l, string_t *v) {
+  token_t *tok = safe_calloc(1, sizeof(token_t));
+  tok->col = l->col;
+  tok->row = l->row;
+  tok->v = v;
+  return tok;
+}
+
+token_t *init_token_with_linenum(int row, int col, string_t *v, int type) {
+  token_t *tok = safe_calloc(1, sizeof(token_t));
+  tok->row = row;
+  tok->col = col;
+  tok->v = v;
+  tok->t = type;
+  return tok;
+}
+
+lexer_t *init_lexer(char *source) {
+  if (strlen(source) == 0)
+    return NULL;
+  lexer_t *lexer = safe_calloc(1, sizeof(lexer_t));
+  lexer->source = source;
+  lexer->i = 0;
+  lexer->c = lexer->source[lexer->i];
+  lexer->row = 1;
+  lexer->col = 1;
+  return lexer;
+}
+
+void lexer_move(lexer_t *l) {
+  if (l->c != '\0') {
+    l->i++;
+    l->c = l->source[l->i];
+    if (l->c == '\n') {
+      l->col = 1;
+      l->row ++;
+    }
+  }
+}
+
+void lexer_skip_whitespace(lexer_t *l) {
+  while (isspace(l->c) && l->c != '\0')
+    lexer_move(l);
+}
+
+static token_t *lexer_collect_id(lexer_t *l) {
+  string_t *v = init_string(NULL);
+  token_t *t = init_token(l, v);
+  while (isalnum(l->c)) {
+    string_push(v, l->c);
+    lexer_move(l);
+  }
+
+  for (int i = 0; i < KEYWORD_LEN; i++) {
+    if (strcmp(v->s, keywords[i]) == 0) {
+      t->t = TT_KEYWORD;
+      return t;
+    }
+  }
+  t->t = TT_ID;
+  return t;
+}
+
+static token_t *lexer_collect_str(lexer_t *l) {
+  string_t *v = init_string(NULL);
+  token_t *t = init_token(l, v);
+  lexer_move(l);
+  while (l->c != '"' && l->c != '\0') {
+    string_push(v, l->c);
+    lexer_move(l);
+  }
+  lexer_move(l);
+  t->t = TT_STR;
+  return t;
+}
+
+static token_t *lexer_collect_mstr(lexer_t *l) {
+  string_t *v = init_string(NULL);
+  token_t *t = init_token(l, v);
+  lexer_move(l);
+  while (l->c != '`') {
+    string_push(v, l->c);
+    lexer_move(l);
+  }
+  lexer_move(l);
+  t->t = TT_MSTR;
+  return t;
+}
+
+static token_t *lexer_collect_int(lexer_t *l) {
+  string_t *v = init_string(NULL);
+  token_t *t = init_token(l, v);
+  bool isf = false;
+  while (isdigit(l->c) || l->c == '.') {
+    if (!isf && l->c == '.')
+      isf = true;
+    else if (l->c == '.')
+      break;
+    string_push(v, l->c);
+    lexer_move(l);
+  }
+  if (isf)
+    t->t = TT_FLOAT;
+  else
+    t->t = TT_INT;
+  return t;
+}
+
+static token_t *lexer_collect_char(lexer_t *l) {
+  string_t *v = init_string(NULL);
+  token_t *t = init_token(l, v);
+  lexer_move(l);
+  if (l->c == '\\') {
+    string_push(v, '\\');
+    lexer_move(l);
+  }
+
+  if (l->c == '\0') {
+    lexer_error(l, LERR_NO_CLOSING_STR);
+  }
+
+  string_push(v, l->c);
+  lexer_move(l);
+
+  if (l->c != '\'') {
+    lexer_error(l, LERR_NO_CLOSING_STR);
+  }
+  lexer_move(l);
+  return t;
+}
+
+static token_t *lexer_move_with(lexer_t *l, int type) {
+  token_t *t = init_token(l, NULL);
+  t->t = type;
+  lexer_move(l);
+  return t;
+}
+
+token_t *lexer_collect_next(lexer_t *l) {
+  token_t *t;
+  int row;
+  int col;
+  
+  lexer_skip_whitespace(l);
+
+  if (isalpha(l->c))
+    return lexer_collect_id(l);
+  else if (isdigit(l->c))
+    return lexer_collect_int(l);
+
+  switch (l->c) {
+  case '\'':
+    return lexer_collect_char(l);
+  case '"':
+    return lexer_collect_str(l);
+  case ':':
+    return lexer_move_with(l, TT_COLON);
+  case ';':
+    return lexer_move_with(l, TT_SEMI);
+  case '[':
+    return lexer_move_with(l, TT_LBRACKET);
+  case ']':
+    return lexer_move_with(l, TT_RBRACKET);
+  case '{':
+    return lexer_move_with(l, TT_LBRACE);
+  case '}':
+    return lexer_move_with(l, TT_RBRACE);
+  case '(':
+    return lexer_move_with(l, TT_LBRACKET);
+  case ')':
+    return lexer_move_with(l, TT_RBRACKET);
+  case ',':
+    return lexer_move_with(l, TT_COMMA);
+  case '+':
+    return lexer_move_with(l, TT_PLUS);
+  case '*':
+    return lexer_move_with(l, TT_TIMES);
+  case '/':
+    return lexer_move_with(l, TT_DIVIDE);
+  case '%':
+    return lexer_move_with(l, TT_MOD);
+  case '&':
+    return lexer_move_with(l, TT_AND);
+  case '|':
+    return lexer_move_with(l, TT_OR);
+  case '=':
+    row = l->row;
+    col = l->col;
+
+    lexer_move(l);
+    if (l->c == '=') {
+      lexer_move(l);
+      return init_token_with_linenum(row, col, NULL, TT_EQ);
+    }
+    return init_token_with_linenum(row, col, NULL, TT_ASSIGN);
+  case '<':
+    row = l->row;
+    col = l->col;
+
+    lexer_move(l);
+    if (l->c == '=') {
+      lexer_move(l);
+      return init_token_with_linenum(row, col, NULL, TT_LTE);
+    }
+    return init_token_with_linenum(row, col, NULL, TT_LANGLE);
+  case '>':
+    row = l->row;
+    col = l->col;
+
+    lexer_move(l);
+    if (l->c == '=') {
+      lexer_move(l);
+      return init_token_with_linenum(row, col, NULL, TT_GTE);
+    }
+    return init_token_with_linenum(row, col, NULL, TT_RANGLE);
+  case '-':
+    row = l->row;
+    col = l->col;
+
+    lexer_move(l);
+    if (l->c == '>') {
+      lexer_move(l);
+      return init_token_with_linenum(row, col, NULL, TT_ARROW);
+    }
+    return init_token_with_linenum(row, col, NULL, TT_MINUS);
+  default:
+    lexer_error(l, LERR_UNRECOGNIZED_CHAR);
+    return NULL;
+  }
+}
+
+void lexer_error(lexer_t *l, int err) {
+  printf("error lol\n");
+}
diff --git a/src/misc.c b/src/misc.c
new file mode 100644
index 0000000..c26cee0
--- /dev/null
+++ b/src/misc.c
@@ -0,0 +1,9 @@
+#include "../include/misc.h"
+
+#include <stdlib.h>
+
+void *safe_calloc(size_t nmemb, size_t size) {
+  void *x = calloc(nmemb, size);
+  if (!x) die("die: calloc");
+  return x;
+}
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..6bd9a74
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,4 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../include/parser.h"