From a9b57de1456321cb214dad99a5df4c4d42648902 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Mon, 13 Jun 2022 07:18:08 +0200 Subject: [PATCH] add ciel --- ciel/README.md | 36 ++ ciel/charsqueeze.py | 111 +++++ ciel/ciel.c | 1067 +++++++++++++++++++++++++++++++++++++++++++ ciel/enfer.c | 643 ++++++++++++++++++++++++++ 4 files changed, 1857 insertions(+) create mode 100644 ciel/README.md create mode 100644 ciel/charsqueeze.py create mode 100644 ciel/ciel.c create mode 100644 ciel/enfer.c diff --git a/ciel/README.md b/ciel/README.md new file mode 100644 index 0000000..81dd16d --- /dev/null +++ b/ciel/README.md @@ -0,0 +1,36 @@ +it's a **C** **I**nteger **E**xpression **L**anguageā€¦? maybe? + +ciel doesn't actually do anything useful, +and it's more of an exercise in masochism. +read some notes from the C file itself: + +```c +/* this code uses no #includes, because i've lost my mind. */ +/* it also makes it easier to compile with hobbyists' compilers. */ +/* this code is somewhat optimized for LOC after preprocessing with noccom. */ +/* ( noccom: https://github.com/notwa/rc/blob/master/sh/noccom ) */ +/* this code makes extensive use of the comma operator. */ +/* i tried to avoid using the preprocessor for any side-effects. */ +``` + +at the time of writing, ciel fits into 643 LOC, +under my arbitrary measure of lines-of-code. +you can find the mangled source code in `enfer.c`. + +i avoid having multiple semicolons in one line, with these exceptions: +"for" loops, switch-case-breaks, and alongside closing braces. +also, most lines fit into 80 columns. there's a couple that don't (yet). + +perhaps of note is the relatively compact robin-hood hash table implementation. +it supports looking (peeking), pushing, pulling (popping), +and expansion/growth (but not shrinkage). + +there's also some utilization of perfect hashes +found by an external program through brute force. +these efficiently map operator strings to their respective numeric enumerations. + +*aside:* ciel already has its own git repo with a ton of history, +but it's not for the public (i.e. it's an absolute clusterfuck). + +*aside:* i wrote ciel before i really knew what Forth was, and now +i kinda want to make my own Forth instead of whatever the heck this is. diff --git a/ciel/charsqueeze.py b/ciel/charsqueeze.py new file mode 100644 index 0000000..ae2dc17 --- /dev/null +++ b/ciel/charsqueeze.py @@ -0,0 +1,111 @@ +lines = r""" +#define OPS \ +X(TOPCOM, 1) +X(TOPSET, 2) +X(TOPLOR, 3) +X(TOPLXOR, 4) +X(TOPLAND, 5) +X(TUNLNOT, 6) +X(TOPEQ, 7) +X(TOPNE, 7) +X(TOPLT, 7) +X(TOPGT, 7) +X(TOPLE, 7) +X(TOPGE, 7) +X(TOPOR, 8) +X(TOPXOR, 9) +X(TOPAND, 10) +X(TOPSHL, 11) +X(TOPSHR, 11) +X(TOPADD, 12) +X(TOPSUB, 12) +X(TOPMUL, 13) +X(TOPDIV, 13) +X(TOPMOD, 13) +X(TUNNOT, 14) +X(TUNINV, 14) +X(TUNPOS, 14) +X(TUNNEG, 14) +X(TOPPOW, 15) +X(TOPTYPE, 16) +X(TOPLT_S, 0) +X(TOPGT_S, 0) +X(TOPLE_S, 0) +X(TOPGE_S, 0) +X(TOPSHL_S, 0) +X(TOPSHR_S, 0) +X(TOPDIV_S, 0) +X(TOPMOD_S, 0) +X(TOPPOW_S, 0) +""" + +lines = r""" +const char +*fall = "failed to allocate memory", +*farg = "malformed argc/argv", +*fbignum = "number too large", +*femp = "empty token", +*feof = "unexpected end of file", +*ffread = "read failed", +*fid = "identifiers cannot be longer than " STR(MAXLEN) " bytes", +*finc8 = "incomplete UTF-8", +*finv8 = "invalid UTF-8", +*fmiscl = "missing \")\"", +*fmisop = "missing \"(\"", +*fmop = "missing operand", +*fnan = "not a number", +*fnot = "no token", +*fnotid = "left-hand operand must be an identifier", +*fnumpre = "invalid numeric prefix", +*fopmod = "operands have different modulos", +*fopsign = "operands have different signedness", +*fout8 = "out of range for UTF-8", +*fstart = "invalid starting token", +*ftype = "invalid type", +*fuchar = "unknown character", +*fundef = "undefined", +*funex = "unexpected", +*funkop = "unknown operator", +""" + +lines = lines.strip().split('\n') +header, lines = lines[0], lines[1:] +lines = sorted(lines, key=lambda k: -len(k)) + +ispp = header[0] == '#' + +def tryout(s, *used): + if len(s) > 80 - (6 if ispp else 4): + return None, None + best_str = s + best_used = used + for j, other in enumerate(lines): + if j in used: + continue + ts, tused = tryout(s + ' ' + other, j, *used) + if ts is None: + continue + if len(ts) > len(best_str): + best_str = ts + best_used = tused + return best_str, best_used + +used = [] +print(header) +for i, line in enumerate(lines): + if i in used: + continue + s, tused = tryout(line, i, *used) + used += tused + + islast = not any(j not in used for j in range(i, len(lines))) + if islast: + if s[-1] == ',': + print(' ' + s[:-1] + ';') + else: + print(' ' + s) + else: + if ispp: + print(' ' + s + ' \\') + else: + print(' ' + s) diff --git a/ciel/ciel.c b/ciel/ciel.c new file mode 100644 index 0000000..9625206 --- /dev/null +++ b/ciel/ciel.c @@ -0,0 +1,1067 @@ +/* ciel: a C Integer Expression Language. {{{1 */ +/* or maybe C-Inspired? but not really. */ + +/* + Copyright (C) 2019 Connor Olding + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/* this code uses no #includes, because i've lost my mind. */ +/* it also makes it easier to compile with hobbyists' compilers. */ +/* this code is somewhat optimized for LOC after preprocessing with noccom. */ +/* ( noccom: https://github.com/notwa/rc/blob/master/sh/noccom ) */ +/* this code makes extensive use of the comma operator. */ +/* i tried to avoid using the preprocessor for any side-effects. */ + +/* alpha {{{1 */ + +/* sorry, this was really bugging me: */ +#define elif else if + +/* this should be plenty. MAXLEN is for identifiers, etc. */ +#define PAGES 256 +#define CIEL_PAGESIZE 4096 +/* 256 is long enough to hold 64 emojis as an identifier. */ +#define MAXLEN 256 + +#define XSTR(e) #e +#define STR(e) XSTR(e) + +typedef unsigned char u8; +typedef signed char s8; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned int u32; +typedef signed int s32; + +#if defined(_WIN32) || defined(__i386) || defined(__ARM_ARCH) +typedef unsigned long long u64; +typedef signed long long s64; +#define U64(x) (x ## ull) +#else +typedef unsigned long u64; +typedef signed long s64; +#define U64(x) (x ## ul) +#endif + +#if defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) || defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__) +typedef u64 size_t; +typedef s64 ssize_t; +#else +typedef u32 size_t; +typedef s32 ssize_t; +#endif + +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +_Static_assert(sizeof(u8) == 1, "u8 is incorrect"); +_Static_assert(sizeof(s8) == 1, "s8 is incorrect"); +_Static_assert(sizeof(u16) == 2, "u16 is incorrect"); +_Static_assert(sizeof(s16) == 2, "s16 is incorrect"); +_Static_assert(sizeof(u32) == 4, "u32 is incorrect"); +_Static_assert(sizeof(s32) == 4, "s32 is incorrect"); +_Static_assert(sizeof(u64) == 8, "u64 is incorrect"); +_Static_assert(sizeof(s64) == 8, "s64 is incorrect"); +_Static_assert(sizeof(ssize_t) == sizeof(void *), "ssize_t is incorrect"); +_Static_assert(sizeof(size_t) == sizeof(void *), "size_t is incorrect"); +#endif + +#ifndef COSMOPOLITAN_H_ + +/* these are the only external functions used. + * most compilers will handle the linking. */ +ssize_t read(int fildes, void *buf, size_t nbyte); +ssize_t write(int fildes, const void *buf, size_t nbyte); +void *malloc(size_t size); +void free(void *ptr); + +/* wgtcc doesn't allow pointers to be set to integral zero. */ +#define NULL (void*)0 + +typedef int bool; + +#endif + +/* beta {{{1 */ + +struct slice { + char *str; + s32 len; +}; + +static bool sliceeq(struct slice a, struct slice b) { + /* NOTE: this assumes slices were produced by pushc, defined later. */ + s32 i; + if (a.len != b.len) return 0; + if (a.str == b.str) return 1; + for (i = 0; i < (a.len + 3) / 4; i++) { + if (((u32 *)a.str)[i] != ((u32 *)b.str)[i]) return 0; + } + return 1; +} + +static u64 udiv(u64 a, u64 b) { + return b ? a / b : 0; +} + +static s64 sdiv(s64 a, s64 b) { + return b ? a / b : 0; +} + +static u64 umod(u64 a, u64 b) { + return b ? a % b : 0; +} + +static s64 smod(s64 a, s64 b) { + /* signed modulo, not a remainder: -1 % 10 is 9, not -1. the divisor + * must be added to the result if the operands have differing signs. */ + return b ? a % b + ((a ^ b) < 0 ? b: 0) : 0; +} + +static u64 upow(u64 b, u64 e) { + u64 r = 1; + while (e) { + if (e & 1) r *= b; + e >>= 1, b *= b; + } + return r; +} + +static s64 spow(s64 b, s64 e) { + if (b == 1) return 1; + elif (b == -1) return e & 1 ? -1 : 1; + if (e < 0) return 0; + /* signed overflow is undefined, so resort to the unsigned operation. */ + return upow(b, e); +} + +/* shifts in C are full of undefined/implementation-specific behavior, even + * between gcc & clang, so many checks must be made to make them consistent. */ + +static u64 ushl(u64 a, u64 b) { + return b < 64 ? a << b : 0; +} + +static u64 ushr(u64 a, u64 b) { + return b < 64 ? a >> b : 0; +} + +static s64 sshl(s64 a, s64 b) { + if (b < -63) return a < 0 ? -1 : 0; + elif (b < 0) return a >> b; + elif (b < 63) return a << b; + else return 0; +} + +static s64 sshr(s64 a, s64 b) { + if (b < -63) return 0; + elif (b < 0) return a << b; + elif (b < 63) return a >> b; + else return a < 0 ? -1 : 0; +} + +static u64 rol(u64 a, u64 b) { + return b ? (a << (b & 63)) | (a >> (64 - (b & 63))) : a; +} + +static u64 ror(u64 a, u64 b) { + return b ? (a >> (b & 63)) | (a << (64 - (b & 63))) : a; +} + +static bool isalpha_(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool isnum(char c) { + return c >= '0' && c <= '9'; +} + +static bool isident(char c, bool notfirst) { + if (notfirst) return isalpha_(c) || c == '_' || isnum(c); + else return isalpha_(c) || c == '_' || c == '\\'; +} + +static bool isspace_(char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +static bool isopen(char c) { + return c == '{' || c == '[' || c == '('; +} + +static bool isclose(char c) { + return c == ')' || c == ']' || c == '}'; +} + +static bool isop(char c) { + /* this is a broad check that includes some extra characters */ + return c >= '!' && c <= '~' && !(isident(c, 1) || isopen(c) || isclose(c)); +} + +/* basically strlen but without the name conflict */ +static ssize_t zlen(const char *str) { + ssize_t len = 0; + while (str[len]) len++; + return len; +} + +static bool writeok = 1; + +static void write_(int fd, const char *str, ssize_t len) { + /* a wrapper around write() that stops writing once writing fails. */ + if (len > 0 && writeok) writeok = write(fd, str, len) == len; +} + +static void writestr(int fd, const char *str) { + write_(fd, str, zlen(str)); +} + +static void writepair(int fd, struct slice p) { + write_(fd, p.str, p.len); +} + +static void writech(int fd, char c) { + write_(fd, &c, 1); +} + +static void writeline(int fd, const char *str) { + writestr(fd, str), writech(1, '\n'); +} + +static void writeu64(int fd, u64 x) { + char str[21], *start = str + 20; + for (*start = 0; *--start = x % 10 + '0', x /= 10; ); + writestr(fd, start); +} + +static void writeint(int fd, s32 x) { + writeu64(fd, x < 0 ? writech(fd, '-'), 0 - (u64)x : (u64)x); +} + +static void writepad(int fd, s32 x, s32 digits) { + while (--digits) if ((x /= 10) == 0) writech(fd, ' '); +} + +/* gamma {{{1 */ + +/* OK: success, BAD: user's fault, EXCEPTION: dev's fault, OOM: out of memory */ +enum result {OK, BAD, EXCEPTION, OOM}; + +char *pages[PAGES]; +s32 pagelen = 0; +struct slice iden; /* iden.str is assumed to be u32-aligned by the hash table */ + +static enum result pushc(char c) { + if (pagelen > PAGES) return OOM; + if (!iden.str) iden.str = pages[pagelen++] = malloc(CIEL_PAGESIZE); + if (!iden.str) return OOM; + if (iden.len >= MAXLEN) return EXCEPTION; + iden.str[iden.len++] = c; + return OK; +} + +static struct slice nextiden() { + /* note to self: it's possible to squeeze idens onto the previous page */ + struct slice ret = iden; + if (!iden.len) return ret; + iden.str += iden.len, iden.len = 0; + while ((size_t)iden.str & 3) *iden.str++ = 0; + if (iden.str - pages[pagelen - 1] > CIEL_PAGESIZE - MAXLEN) iden.str = NULL; + return ret; +} + +static u32 uhash(u32 h) { + return ++h, h ^= h >> 16, h *= 0xA66C52B5, h; +} + +static u32 shash(struct slice s) { + u32 h = 1; /* arbitrary */ + s32 i; + for (i = 0; i < s.len; i++) h = uhash(h + s.str[i]); + /* never return 0 to prevent confusion with empty entries. */ + return h ? h : 1; +} + +struct entry { /* aka bucket */ + u32 h; /* hash of the identifier's name */ + s32 len; /* the length of the identifier */ + char *str; /* the name of the identifier */ + void *v; /* the value associated with the identifier */ +}; + +struct entry *space = NULL; +size_t hsize = 0, hmask = 0, hused = 0, hn = 0; +enum hmode {HLOOK, HPUSH, HPULL}; + +/* NOTE: do *not* use empty/zero strings with these hash table functions. */ + +static enum result hinit(size_t n) { + size_t i; + struct entry empty = {0}; + hn = n, hsize = 1 << n, space = malloc(hsize * sizeof(struct entry)); + for (i = 0; i < hsize && space; i++) space[i] = empty; + return hmask = hsize - 1, hused = 0, space ? OK : OOM; +} + +static bool entryeq(struct entry a, struct entry b) { + /* TODO: delete this function and inline its only use. */ + struct slice sa, sb; + sa.str = a.str, sa.len = a.len, sb.str = b.str, sb.len = b.len; + return sliceeq(sa, sb); +} + +static bool hall(enum hmode m, struct slice k, void **v, u32 h) { + /* NOTE: do not use this function directly. + * use hlook, hpush, and hpull instead. */ + /* when m is HLOOK, return found */ + /* when m is HPUSH, return success */ + /* when m is HPULL, return found_and_success */ + + size_t i, j; + struct entry a; + a.h = h = h ? h : shash(k), a.len = k.len, a.str = k.str, a.v = v ? *v : 0; + + for (i = 0; i < hsize; i++) { + size_t ind = (h + i) & hmask; + struct entry b = space[ind]; + + if (!b.h && m == HPUSH) hused++, space[ind] = a; + if (!b.h) return m == HPUSH; + + if (a.h == b.h && entryeq(a, b)) { + if (m == HPULL) { + for (j = 1; j < hsize; j++) { + size_t jnd = (h + i + j) & hmask; + if (b = space[jnd], b.h) { + size_t bdib = (jnd - b.h) & hmask; + if (!bdib) break; + space[ind] = space[jnd], ind = jnd; + } + } + hused--, space[ind].h = 0; + } elif (m == HPUSH) space[ind].v = *v; + elif (m == HLOOK) *v = b.v; + return 1; + } + + if (i) { + /* NOTE: using i in place of adib seems to perform the same...? */ + size_t adib = (ind - a.h) & hmask, bdib = (ind - b.h) & hmask; + if (bdib < adib && m == HPUSH) space[ind] = a, a = b; + elif (bdib < adib) return 0; + } + } + + return 0; +} + +static enum result hpush(struct slice k, void *v) { + return hall(HPUSH, k, &v, 0) ? OK : EXCEPTION; +} + +static bool hlook(struct slice k, void **v) { + /* returns 1 and points v to the entry's value when a match is found */ + return hall(HLOOK, k, v, 0); +} + +static enum result hpull(struct slice k) { + return hall(HPULL, k, NULL, 0) ? OK : EXCEPTION; +} + +static enum result rehash() { + size_t i; + struct entry *oldspace = space; + enum result hres = hinit(hn + 1); + if (hres != OK) return hres; + for (i = 0; i < hsize / 2; i++) { + struct entry e = oldspace[i]; + struct slice k; + k.str = e.str, k.len = e.len; + if (e.h && !hall(HPUSH, k, &e.v, e.h)) return EXCEPTION; + } + return free(oldspace), OK; +} + +/* delta {{{1 */ + +/* operators and their precedence. + * unary operators, TOPSET, and TOPPOW are right-associative. + * operators with a precedence of 0 are variations that are parsed further in. + * the expressions for unary operators are dummied here and handled elsewhere. + */ +#define OPS \ + X(TOPCOM, 1, right) X(TOPSET, 2, 0) X(TOPLOR, 3, left || right) \ + X(TOPLXOR, 4, !!left ^ !!right) X(TOPLAND, 5, left && right) \ + X(TUNLNOT, 6, 0) \ + X(TOPEQ, 7, left == right) X(TOPGE, 7, left >= right) \ + X(TOPGT, 7, left > right) X(TOPLE, 7, left <= right) \ + X(TOPLT, 7, left < right) X(TOPNE, 7, left != right) \ + X(TOPOR, 8, left | right) X(TOPXOR, 9, left ^ right) \ + X(TOPAND, 10, left & right) X(TOPSHL, 11, ushl(left, right)) \ + X(TOPSHR, 11, ushr(left, right)) X(TOPADD, 12, left + right) \ + X(TOPSUB, 12, left - right) X(TOPDIV, 13, udiv(left, right)) \ + X(TOPMOD, 13, umod(left, right)) X(TOPMUL, 13, left * right) \ + X(TUNBOOL, 14, 0) X(TUNINV, 14, 0) X(TUNNEG, 14, 0) \ + X(TUNNOT, 14, 0) X(TUNPOS, 14, 0) \ + X(TOPPOW, 15, upow(left, right)) X(TOPTYPE, 16, left) \ + X(TOPDIV_S, 0, sdiv(left, right)) \ + X(TOPGE_S, 0, (s64)left >= (s64)right) \ + X(TOPGT_S, 0, (s64)left > (s64)right) \ + X(TOPLE_S, 0, (s64)left <= (s64)right) \ + X(TOPLT_S, 0, (s64)left < (s64)right) \ + X(TOPMOD_S, 0, smod(left, right)) X(TOPPOW_S, 0, spow(left, right)) \ + X(TOPSHL_S, 0, sshl(left, right)) X(TOPSHR_S, 0, sshr(left, right)) + +/* these are the types returned by the lexer/tokenizer. */ +/* the lexer doesn't actually return TTYPE, but it's convenient to have here. */ +enum ttype { TTUNK, TTSPACE, TTID, TTYPE, TTNUM, TTOP, TTOPEN, TTCLOSE, TTEND }; + +/* this enum used to refer to C types, but now it's totally different. */ +enum ctype { + TUNK, TSIGNED, TUNSIGNED, TNUMBIN, TNUMOCT, TNUMDEC, TNUMHEX, +#define X(a, b, c) a, + OPS CTYPE_END +#undef X +}; + +static const s8 precedence[] = { +#define X(a, b, c) b, + OPS 0 +#undef X +}; + +struct token { + /* note: it's assumed that 0 is a good default for all of these fields. */ + struct token *next, *prev; /* in input stream order */ + struct token *rpnnext; /* in shunted reverse polish notation order */ + struct token *pops; /* previous token in shunting yard stack */ + s32 row, col, depth; /* depth is how many parens deep the token is */ + struct slice id; /* the string that produced this token, never modified */ + enum ttype tt; /* lexer/tokenizer type */ + enum ctype ct; /* parser type */ + u64 value, modulo; /* value is currently just for integer constants */ +}; + +struct token *head = NULL, *tail = NULL, *rpnhead = NULL, *rpntail = NULL; +struct token *stack = NULL; + +static enum result pusht(struct token tok) { + /* TODO: use pages like with identifiers to reduce malloc calls. */ + struct token *htok = malloc(sizeof(struct token)); + if (!htok) return OOM; + *htok = tok; + if (!head) head = htok; + if (tail) tail->next = htok, htok->prev = tail; + tail = htok; + return OK; +} + +static enum result popt(struct token *out) { + if (!head) return EXCEPTION; + if (out) *out = *tail; + if (head == tail) free(head), head = NULL, tail = NULL; + elif (tail) tail = tail->prev, free(tail->next); + return OK; +} + +static struct token *popstack() { + struct token *res = stack; + if (stack) stack = stack->pops, res->pops = NULL; + return res; +} + +/* handy list of operators: + ! !! != + & && + * ** + < << <= + = == + > >> >= + ^ ^^ + | || + ~ ~~ + % + , - / ; +*/ + +static bool isop1(char c) { + /* unambiguous one-character operators, used to assist the tokenizer */ + /* this includes parenthesis because they are similarly unambiguous. */ + return c == '%' || c == '(' || c == ')' || c == '+' + || c == ',' || c == '-' || c == '/' || c == ';'; +} + +static bool isop2(char c) { + /* operators that have doubled variants, for instance: = has == */ + return c == '!' || c == '&' || c == '*' || c == '<' || c == '=' + || c == '>' || c == '^' || c == '|' || c == '~'; +} + +static bool isop3(char c) { + /* operators that have variants ending with =, for instance: ! has != */ + return c == '!' || c == '<' || c == '>'; +} + +static bool isanyop(char a, char b, char c) { + return isop1(b) || isop1(c) + || (b == a && isop2(a)) || (b == '=' && isop3(a)); +} + +static bool isunary(enum ctype ct) { + return ct == TUNLNOT || (ct >= TUNBOOL && ct <= TUNPOS); +} + +static bool isright(enum ctype ct) { + /* right-associative operators */ + return isunary(ct) || ct == TOPSET || ct == TOPPOW; +} + +/* epsilon {{{1 */ + +const char + *fid = "identifiers cannot be longer than " STR(MAXLEN) " bytes", + *fnotid = "left-hand operand must be an identifier", *fnan = "not a number", + *fopsign = "operands have different signedness", *fmiscl = "missing \")\"", + *fopmod = "operands have different modulos", *farg = "malformed argc/argv", + *fall = "failed to allocate memory", *fnumpre = "invalid numeric prefix", + *fstart = "invalid starting token", *fout8 = "out of range for UTF-8", + *ueof = "unexpected end of file", *femp = "empty token", *fnot = "no token", + *fbignum = "number too large", *fundef = "undefined", *funex = "unexpected", + *fuchar = "unknown character", *funkop = "unknown operator", + *finc8 = "incomplete UTF-8", *fmisop = "missing \"(\"", + *fmop = "missing operand", *ffread = "read failed", *ftype = "invalid type", + *finv8 = "invalid UTF-8"; + +/* tokenizer state: */ +u8 c0 = 0, c1 = 0, c2 = 0; /* input char delay line, c0 being the most recent */ +s32 row = 1, col = 1; /* position in file */ +s32 utf8 = 0; /* nonzero: number of continuations to expect */ +char buf[4096]; /* input read() buffer */ +ssize_t bufind = 0, bufread = 0, bytesread = 0; /* bytesread is a facade */ + +static void writepos(int fd) { + writeint(fd, row), writech(fd, ':'), writeint(fd, col), writech(fd, ' '); +} + +static enum result usererror(const char *str) { + return writepos(2), writeline(2, str), BAD; +} + +static enum result tokerror(const char *str, const struct token tok) { + /* NOTE: modifying row/col is assuming an unrecoverable error */ + row = tok.row, col = tok.col, writepos(2), writestr(2, str); + return writestr(2, ": \""), writepair(2, tok.id), writestr(2, "\"\n"), BAD; +} + +/* +static void tokdebug(struct token tok) { + s32 ctlen = 9; + switch (tok.tt) { + case TTUNK: writestr(1, " TTUNK"); break; + case TTSPACE: writestr(1, "TTSPACE"); break; + case TTID: writestr(1, " TTID"); break; + case TTYPE: writestr(1, " TTYPE"); break; + case TTNUM: writestr(1, " TTNUM"); break; + case TTOP: writestr(1, " TTOP"); break; + case TTOPEN: writestr(1, " TTOPEN"); break; + case TTCLOSE: writestr(1, "TTCLOSE"); break; + case TTEND: writestr(1, " TTEND"); break; + default: writestr(1, " ???"); + } + writech(1, '('); + switch (tok.ct) { + case TUNK: writestr(1, "TUNK, "); break; + case TSIGNED: writestr(1, "TSIGNED, "); break; + case TUNSIGNED: writestr(1, "TUNSIGNED,"); break; + case TNUMBIN: writestr(1, "TNUMBIN, "); break; + case TNUMOCT: writestr(1, "TNUMOCT, "); break; + case TNUMDEC: writestr(1, "TNUMDEC, "); break; + case TNUMHEX: writestr(1, "TNUMHEX, "); break; +#define X(a, b, c) case a: writestr(1, STR(a) ","); ctlen = zlen(STR(a)); break; + OPS +#undef X + default: writestr(1, "???, "); break; + } + while (ctlen < 9) writech(1, ' '), ctlen++; + writeint(1, tok.depth), writestr(1, ") at "); + writepad(1, tok.row, 2), writepad(1, tok.col, 2); + writeint(1, tok.row), writech(1, ':'), writeint(1, tok.col); + writestr(1, ": \""); writepair(1, tok.id); + writestr(1, "\" ("), writeu64(1, tok.value); + if (tok.modulo) writestr(1, " % "), writeu64(1, tok.modulo); + writestr(1, ")\n"); +} +*/ + +static enum result parsedec(struct slice id, u64 *out) { + /* NOTE: assumes id.len > 0 */ + s32 i, c = id.str[0]; + u64 value = 0; + if (id.len > 20) return EXCEPTION; + if (id.len == 20 && c > '1' && c <= '9') return EXCEPTION; + for (i = 0, c = id.str[i]; i < id.len; c = id.str[++i]) { + if (c < '0' || c > '9') return BAD; + value = value * 10 + (c - '0'); + } + if (id.len == 20 && value >> 63 != 1) return EXCEPTION; /* overflow */ + return *out = value, OK; +} + +static enum result parsenum(struct token *tok) { + /* this is basically atoi, strtoll, parseInt, or whatever you call it. */ + /* this variant only handles decimal with no prefix, + * and binary, octal, hex with the prefixes 0b, 0o, and 0x respectively. */ + /* NOTE: assumes id.len > 0 */ + /* NOTE: it's possible to collapse TNUMBIN, TNUMOCT, TNUMDEC to save LOC, + * but this could result in some really awful machine code. */ + struct slice id = tok->id; /* not necessary but speeds up access */ + u64 value = 0; /* likewise */ + s32 i, c = id.str[0]; + + if (tok->ct == TNUMBIN) { + if (id.len > 66) return EXCEPTION; + for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) { + if (c != '0' && c != '1') return BAD; + value = value * 2 + (c - '0'); + } + + } elif (tok->ct == TNUMOCT) { + if (id.len > 34) return EXCEPTION; + for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) { + if (c < '0' || c > '7') return BAD; + value = value * 8 + (c - '0'); + } + + } elif (tok->ct == TNUMDEC) { + enum result pres = parsedec(id, &value); + if (pres != OK) return pres; + + } elif (tok->ct == TNUMHEX) { + if (id.len > 18) return EXCEPTION; + for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) { + if (c >= '0' && c <= '9') value = value * 16 + c - '0'; + elif (c >= 'A' && c <= 'F') value = value * 16 + c - 'A' + 10; + elif (c >= 'a' && c <= 'f') value = value * 16 + c - 'a' + 10; + else return BAD; + } + } else return EXCEPTION; + + if (tok->ct != TNUMDEC && i == 2) return BAD; /* prefix but no number */ + + return tok->value = value, OK; +} + +static enum result utfeat(u8 c) { + /* handle UTF-8 encoded bytes, assuming nothing else has matched so far */ + bool cont = c >= 0x80 && c < 0xC0; + if (utf8 && cont) utf8--; + elif ((utf8 && !cont) || cont) return usererror(finv8); + elif (c < 0x80) return usererror(fuchar); + elif (c >= 0xC0 && c < 0xE0) utf8 = 1; + elif (c >= 0xE0 && c < 0xF0) utf8 = 2; + elif (c >= 0xF0 && c < 0xF6) utf8 = 3; + else return usererror(fout8); + return OK; +} + +static bool nextchar(int fd) { + if (++bufind >= bufread) { + bytesread = bufread = read(fd, buf, 4096), bufind = 0; + if (bufread < 1) return 0; + } + return c0 = buf[bufind], bytesread = 1; +} + +enum result tokenize(int fd) { + /* split strings in fd into tokens with rough types, AKA a lexer */ + enum ttype tt = TTUNK, newtype = TTUNK; + struct token tok = {0}; + s32 idrow = row, idcol = col, oldutf8 = 0; + + while (bytesread = 0, c0 || nextchar(fd)) { + if (c1 == '\r' && c0 == '\n') goto next; /* "\r\n" is treated as one */ + + oldutf8 = utf8; + if (isspace_(c0)) newtype = TTSPACE; + elif (isident(c0, tt == TTID)) newtype = TTID; + elif (isop(c0)) newtype = TTOP; + elif (isopen(c0)) newtype = TTOPEN; + elif (isclose(c0)) newtype = TTCLOSE; + elif (isnum(c0)) newtype = TTNUM; + elif (bytesread && utfeat(c0)) return BAD; + else newtype = TTID; /* unicode is always treated as an identifier */ + + if (tt == TTNUM && newtype == TTID); /* so that "0x123" is one token */ + elif (tt == TTUNK) tt = newtype; + elif (newtype != tt || isanyop(c2, c1, c0)) break; /* token boundary */ + + if (tt == TTSPACE || pushc(c0) == OK); /* construct token string */ + elif (iden.len >= MAXLEN) return usererror(fid); + else return OOM; + + if (c0 == '\r' || c0 == '\n') col=1, row++; + elif (c0 < 0x80 || c0 >= 0xE0) col++; /* correctly handle UTF-8 */ + + next: c2 = c1, c1 = c0, c0 = 0, bytesread = 0; + } + + if (bufread < 0) return writeline(2, ffread), EXCEPTION; + elif (newtype != TTUNK); /* parses the final character correctly */ + elif (bytesread == 0) tt = TTEND; + if (oldutf8) return usererror(finc8); + + tok.id = nextiden(), tok.tt = tt, tok.row = idrow, tok.col = idcol; + return pusht(tok); +} + +enum result shunt(struct token *tok) { + /* perform the shunting yard algorithm to produce reverse polish notation */ + + if (!tok) { /* finish up */ + while (stack) { + if (stack->tt == TTOPEN) return tokerror(fmiscl, *stack); + if (!rpntail) rpntail = popstack(); /* only first iteration */ + rpntail = rpntail->rpnnext = popstack(); + } + + } elif (!rpntail) { /* handle first token */ + if (tok->tt == TTID || tok->tt == TTNUM) rpnhead = rpntail = tok; + elif (tok->tt == TTOPEN || (tok->tt == TTOP && isunary(tok->ct))) { + tok->pops = stack, stack = tok; + } else return tokerror(fstart, *tok); + + } else { /* handle everything inbetween */ + if (tok->tt == TTID || tok->tt == TTNUM || tok->tt == TTYPE) { + rpntail = rpntail->rpnnext = tok; + } elif (tok->tt == TTOPEN) tok->pops = stack, stack = tok; + elif (tok->tt == TTCLOSE) { + while (stack && stack->tt != TTOPEN) { + rpntail = rpntail->rpnnext = popstack(); + } + if (!stack) return tokerror(fmisop, *tok); + popstack(); + + } elif (tok->tt == TTOP) { + s32 pre = precedence[tok->ct - TOPCOM] + isright(tok->ct); + /* NOTE: TOPTYPE occurs before TUNNEG, is this okay? */ + while (stack && stack->tt == TTOP) { + if (isunary(tok->ct) && stack->ct == TOPPOW) break; + elif (pre > precedence[stack->ct - TOPCOM]) break; + rpntail = rpntail->rpnnext = popstack(); + } + tok->pops = stack, stack = tok; + + } else return tokerror("TODO", *tok), EXCEPTION; + + } + return OK; +} + +static enum ctype decideop(char a, char b) { + static const char *checks = "^^>>;\0+\0\0\0<=\0\0,\0*\0|\0**=\0<\0&&\0\0~\0" + "<<\0\0%\0&\0\0\0-\0>=>\0==~~/\0!!!\0||^\0!="; + static const enum ctype mapping[] = { + TOPLXOR,TOPSHR,TOPTYPE,TOPADD,TUNK,TOPLE,TUNK,TOPCOM,TOPMUL,TOPOR,TOPPOW, + TOPSET,TOPLT,TOPLAND,TUNK,TUNINV,TOPSHL,TUNK,TOPMOD,TOPAND,TUNK,TOPSUB, + TOPGE,TOPGT,TOPEQ,TUNLNOT,TOPDIV,TUNBOOL,TUNNOT,TOPLOR,TOPXOR,TOPNE + }; + /* perfect hashing found by brute force: */ + u32 i = uhash(uhash(0x3561F0B7 + a) + b) & 31; + return (a != checks[2 * i] || b != checks[2 * i + 1]) ? TUNK : mapping[i]; +} + +static enum ctype decideoplong(char a, char b, char c, char d) { + static const char *checks = + "bor\0pow\0eq\0\0le\0\0shl\0\0\0\0\0not\0or\0\0mod\0band\0\0\0\0\0\0\0" + "\0mul\0\0\0\0\0\0\0\0\0bnotne\0\0pos\0shr\0bool\0\0\0\0xor\0add\0inv\0" + "com\0ge\0\0lt\0\0set\0and\0neg\0sub\0div\0gt\0\0bxor\0\0\0\0type"; + static const enum ctype mapping[] = { + TOPOR,TOPPOW,TOPEQ,TOPLE,TOPSHL,TUNK,TUNLNOT,TOPLOR,TOPMOD,TOPAND,TUNK, + TUNK,TOPMUL,TUNK,TUNK,TUNNOT,TOPNE,TUNPOS,TOPSHR,TUNBOOL,TUNK,TOPLXOR, + TOPADD,TUNINV,TOPCOM,TOPGE,TOPLT,TOPSET,TOPLAND,TUNNEG,TOPSUB,TOPDIV, + TOPGT,TOPXOR,TUNK,TOPTYPE + }; + /* perfect hashing found by brute force: */ + u32 i = uhash(uhash(uhash(uhash(0xC0FF1042 + a) + b) + c) + d) % 36; + const char *ch = checks + 4 * i; + if (a != ch[0] || b != ch[1] || c != ch[2] || d != ch[3]) return TUNK; + return mapping[i]; +} + +enum result parse() { + /* restructure the tokens from tokenize and parse literals */ + enum result res = OK, pres = OK, sres = OK; + s32 depth = 0; + bool hint_op = 0, was_op = 0, expect_unempty = 0; + bool expect_type = 0, expect_nonid = 0, expect_operand = 0; + + while ((res = tokenize(0)) == OK) { + struct token tok; + s32 ss = 0; + if (!tail) return writeline(2, fnot), EXCEPTION; + tok = *tail; + + if (tok.tt == TTEND) break; + elif (tok.tt == TTSPACE) goto next; + elif (tok.id.len < 1) return tokerror(femp, tok), EXCEPTION; + else ss = 256 * tok.id.str[0] + tok.id.str[1]; + + was_op = tok.tt == TTOP; + + if (tok.tt == TTID && expect_type && !expect_nonid) { + struct slice idnum; + u64 value = 0; + + if (tok.id.str[0] == 'u') tok.ct = TUNSIGNED; + elif (tok.id.str[0] == 's') tok.ct = TSIGNED; + elif (tok.id.str[0] == 'm') tok.ct = TUNSIGNED; + else badtype: return tokerror(ftype, tok); + + idnum.str = tok.id.str + 1, idnum.len = tok.id.len - 1; + + if ((pres = parsedec(idnum, &value)) != OK) goto badtype; + if (tok.id.str[0] != 'm' && value - 1 > 63) goto badtype; + /* note that 1 << 64 = 0 */ + tok.modulo = (tok.id.str[0] == 'm') ? value : ushl(1, value); + tok.tt = TTYPE; + + } elif (tok.tt == TTID && tok.id.str[0] == '\\') { + char a = tok.id.str[1], b = tok.id.str[2], c = tok.id.str[3]; + char d = tok.id.len > 4 ? tok.id.str[4] : '\0'; + tok.tt = TTOP, tok.ct = decideoplong(a, b, c, d); + if (tok.ct == TUNK || tok.id.len > 5) return tokerror(funkop, tok); + + } elif (tok.tt == TTID) { + + } elif (tok.tt == TTOP) { + tok.ct = decideop(tok.id.str[0], tok.id.str[1]); + if (tok.ct == TOPADD && !hint_op) tok.ct = TUNPOS; + elif (tok.ct == TOPSUB && !hint_op) tok.ct = TUNNEG; + elif (tok.ct == TUNK || tok.id.len > 2) return tokerror(funkop, tok); + + } elif (tok.tt == TTNUM && !expect_nonid) { + if (ss == 256 * '0' + 'b') tok.ct = TNUMBIN; + elif (ss == 256 * '0' + 'o') tok.ct = TNUMOCT; + elif (ss == 256 * '0' + 'x') tok.ct = TNUMHEX; + elif (ss == 256 * '0') tok.ct = TNUMDEC; + elif (tok.id.str[0] == '0') return tokerror(fnumpre, tok); + else tok.ct = TNUMDEC; + pres = parsenum(&tok); + if (pres == BAD) return tokerror(fnan, tok); + elif (pres == EXCEPTION) return tokerror(fbignum, tok); + + } elif (tok.tt == TTOPEN) { + if (ss == 256 * '(') depth++; + else return tokerror(fuchar, tok); + } elif (tok.tt == TTCLOSE) { + if (ss == 256 * ')') depth--; + else return tokerror(fuchar, tok); + if (expect_unempty) goto unexpected; + } else goto unexpected; /* or unimplemented */ + + next: + tok.depth = depth; + *tail = tok; /* update depth, value, etc. */ + + if (tok.tt == TTSPACE) continue; + + if (expect_nonid && isunary(tok.ct)) goto unexpected; + if (expect_type && tok.tt != TTYPE) goto unexpected; + expect_nonid = tok.tt == TTID || tok.tt == TTNUM; + expect_type = tok.ct == TOPTYPE; + expect_unempty = tok.tt == TTOPEN; + hint_op = expect_nonid || tok.tt == TTCLOSE || tok.tt == TTYPE; + if (expect_nonid || tok.tt == TTYPE) expect_operand = 0; + elif (isunary(tok.ct)) expect_operand = 1; + elif ((was_op || tok.tt == TTCLOSE) && expect_operand) goto unexpected; + elif (was_op) expect_operand = 1; + if ((sres = shunt(tail)) != OK) return sres; + } + + if (res == OK) { + if (expect_operand) return usererror(ueof); + return shunt(NULL); + } + return (res == OOM) ? writeline(2, fall), OOM : res; + + unexpected: return tokerror(funex, *tail); +} + +static enum result operate1(struct token op, struct token *out) { + /* execute unary operators. */ + struct token *ta = popstack(); + u64 value, right = ta->value; + + switch (op.ct) { + case TUNLNOT: value = !right; break; + case TUNNOT: value = !right; break; + case TUNBOOL: value = !!right; break; + case TUNINV: value = ~right; break; + case TUNPOS: value = right; break; + case TUNNEG: value = 0 - right; break; + default: return tokerror("TODO unary operator", op), EXCEPTION; + } + + out->value = value, out->ct = ta->ct, out->modulo = ta->modulo; + return OK; +} + +static enum ctype signop(enum ctype ct) { + switch (ct) { + case TOPLT: return TOPLT_S; + case TOPGT: return TOPGT_S; + case TOPLE: return TOPLE_S; + case TOPGE: return TOPGE_S; + case TOPSHL: return TOPSHL_S; + case TOPSHR: return TOPSHR_S; + case TOPDIV: return TOPDIV_S; + case TOPMOD: return TOPMOD_S; + case TOPPOW: return TOPPOW_S; + default: return ct; + } +} + +static enum result operate2(struct token op, struct token *out) { + /* execute binary operators. */ + struct token *tb = popstack(), *ta = popstack(), *other = tb; + u64 value, left = ta->value, right = tb->value; + u64 modleft = ta->modulo, modright = tb->modulo; + u64 signleft = ta->ct == TSIGNED, signright = tb->ct == TSIGNED; + bool inherit = (ta->ct >= TNUMBIN && ta->ct <= TNUMHEX) || + (tb->ct >= TNUMBIN && tb->ct <= TNUMHEX); + + if (op.ct == TOPTYPE || inherit); + elif (modleft != modright) return tokerror(fopmod, op); + elif (signleft != signright) return tokerror(fopsign, op); + + if (signleft || signright) op.ct = signop(op.ct); + + switch (op.ct) { +#define X(a, b, c) case a: value = c; break; + OPS +#undef X + default: return tokerror("TODO binary operator", op), EXCEPTION; + } + + if (other->ct >= TNUMBIN && other->ct <= TNUMHEX) other = ta; + out->value = value, out->ct = other->ct, out->modulo = other->modulo; + return OK; +} + +static enum result assign(struct token *out) { + enum result hres; + struct token *tb = popstack(), *ta = popstack(); + + if (!hlook(ta->id, (void **)&ta)) { + if ((hres = hpush(ta->id, ta)) != OK) return hres; + if (hused >= hsize * 3 / 4 && (hres = rehash()) == OOM) { + return writeline(2, fall), OOM; + } elif (hres != OK) return hres; + } elif (ta->modulo != tb->modulo) return tokerror(fopmod, *ta); + elif (ta->ct == TSIGNED && tb->ct == TUNSIGNED) return tokerror(fopsign, *ta); + elif (ta->ct == TUNSIGNED && tb->ct == TSIGNED) return tokerror(fopsign, *ta); + + *out = *ta, out->tt = TTNUM; + ta->value = tb->value, ta->modulo = tb->modulo, ta->ct = tb->ct; + return OK; +} + +static void modulate(struct token *tok) { + u64 halfmod = tok->modulo ? tok->modulo / 2 : U64(1) << 63; + tok->value = tok->modulo ? tok->value % tok->modulo : tok->value; + if (tok->ct == TSIGNED && tok->value >= halfmod) tok->value -= tok->modulo; +} + +enum result evaluate() { + /* runs the program produced by parse() */ + struct token *p = rpnhead; + + enum result ores = hinit(1); + if (ores == OOM) return writeline(2, fall), OOM; + + while (p) { + struct token tok = *p, *v; + + if (tok.tt == TTNUM || tok.tt == TTYPE || tok.tt == TTID) { + p->pops = stack, stack = p; + + } elif (tok.tt == TTOP) { + struct token out = {0}; + out.tt = TTNUM, out.ct = TUNSIGNED; + + if (!stack) return tokerror(fmop, tok); + + /* if (stack->pops) tokdebug(*stack->pops); */ + /* tokdebug(tok), tokdebug(*stack); */ + + if (stack->tt == TTID) { + /* check the right-hand side */ + if (!hlook(stack->id, (void **)&v)) { + return tokerror(fundef, *stack); + } + stack->value = v->value, stack->modulo = v->modulo; + stack->ct = v->ct; + } + + if (isunary(tok.ct)) { + if ((ores = operate1(tok, &out)) != OK) return ores; + } elif (!stack->pops) return tokerror(fmop, tok); + elif (tok.ct == TOPSET) { + if (stack->pops->tt != TTID) return tokerror(fnotid, tok); + if ((ores = assign(&out)) != OK) return ores; + } elif (stack->pops->tt == TTID) { + /* check the left-hand side */ + if (!hlook(stack->pops->id, (void **)&v)) { + return tokerror(fundef, *stack->pops); + } + stack->pops->value = v->value, stack->pops->modulo = v->modulo; + stack->pops->ct = v->ct; + if ((ores = operate2(tok, &out)) != OK) return ores; + } elif ((ores = operate2(tok, &out)) != OK) return ores; + + modulate(&out); + + /* tokdebug(out), writech(1, '\n'); */ + + out.pops = stack; + if (pusht(out) == OOM) return writeline(2, fall), OOM; + stack = tail; + + } else return tokerror("TODO type", tok), EXCEPTION; + + p = tok.rpnnext; + } + + if (stack) writeu64(1, stack->value), writech(1, '\n'); + /* if (stack && stack->pops) return tokdebug(*stack->pops), EXCEPTION; */ + if (stack && stack->pops) return tokerror(funex, *stack->pops), EXCEPTION; + + return OK; +} + +int main(int argc, char **argv) { + int ret = 0; + /* writeu64(2, sizeof(struct entry)), writech(2, '\n'); */ + if (argc <= 0 || !argv || !argv[0]) return writeline(2, farg), BAD; + /* TODO: + * -u flag: disables input buffering. + * -n flag: returns 1 from main if expression value is nonzero. + * -z flag: returns 1 from main if expression value is zero. + */ + pages[0] = iden.str = NULL; + iden.len = 0; + if ((ret = parse()) == OK) ret = evaluate(); + if (space) free(space); + while (popt(NULL) == OK); + while (pagelen-->0) free(pages[pagelen]); + return ret; +} diff --git a/ciel/enfer.c b/ciel/enfer.c new file mode 100644 index 0000000..3bc1488 --- /dev/null +++ b/ciel/enfer.c @@ -0,0 +1,643 @@ +#define elif else if +#define PAGES 256 +#define CIEL_PAGESIZE 4096 +#define MAXLEN 256 +#define XSTR(e) #e +#define STR(e) XSTR(e) +typedef unsigned char u8; +typedef signed char s8; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned int u32; +typedef signed int s32; +#if defined(_WIN32) || defined(__i386) || defined(__ARM_ARCH) +typedef unsigned long long u64; +typedef signed long long s64; +#define U64(x) (x ## ull) +#else +typedef unsigned long u64; +typedef signed long s64; +#define U64(x) (x ## ul) +#endif +#if defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) || defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__) +typedef u64 size_t; +typedef s64 ssize_t; +#else +typedef u32 size_t; +typedef s32 ssize_t; +#endif +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +_Static_assert(sizeof(u8) == 1, "u8 is incorrect"); +_Static_assert(sizeof(s8) == 1, "s8 is incorrect"); +_Static_assert(sizeof(u16) == 2, "u16 is incorrect"); +_Static_assert(sizeof(s16) == 2, "s16 is incorrect"); +_Static_assert(sizeof(u32) == 4, "u32 is incorrect"); +_Static_assert(sizeof(s32) == 4, "s32 is incorrect"); +_Static_assert(sizeof(u64) == 8, "u64 is incorrect"); +_Static_assert(sizeof(s64) == 8, "s64 is incorrect"); +_Static_assert(sizeof(ssize_t) == sizeof(void *), "ssize_t is incorrect"); +_Static_assert(sizeof(size_t) == sizeof(void *), "size_t is incorrect"); +#endif +#ifndef COSMOPOLITAN_H_ +ssize_t read(int fildes, void *buf, size_t nbyte); +ssize_t write(int fildes, const void *buf, size_t nbyte); +void *malloc(size_t size); +void free(void *ptr); +#define NULL (void*)0 +typedef int bool; +#endif +struct slice { +char *str; +s32 len; }; +static bool sliceeq(struct slice a, struct slice b) { +s32 i; +if (a.len != b.len) return 0; +if (a.str == b.str) return 1; +for (i = 0; i < (a.len + 3) / 4; i++) { +if (((u32 *)a.str)[i] != ((u32 *)b.str)[i]) return 0; } +return 1; } +static u64 udiv(u64 a, u64 b) { +return b ? a / b : 0; } +static s64 sdiv(s64 a, s64 b) { +return b ? a / b : 0; } +static u64 umod(u64 a, u64 b) { +return b ? a % b : 0; } +static s64 smod(s64 a, s64 b) { +return b ? a % b + ((a ^ b) < 0 ? b: 0) : 0; } +static u64 upow(u64 b, u64 e) { +u64 r = 1; +while (e) { +if (e & 1) r *= b; +e >>= 1, b *= b; } +return r; } +static s64 spow(s64 b, s64 e) { +if (b == 1) return 1; +elif (b == -1) return e & 1 ? -1 : 1; +if (e < 0) return 0; +return upow(b, e); } +static u64 ushl(u64 a, u64 b) { +return b < 64 ? a << b : 0; } +static u64 ushr(u64 a, u64 b) { +return b < 64 ? a >> b : 0; } +static s64 sshl(s64 a, s64 b) { +if (b < -63) return a < 0 ? -1 : 0; +elif (b < 0) return a >> b; +elif (b < 63) return a << b; +else return 0; } +static s64 sshr(s64 a, s64 b) { +if (b < -63) return 0; +elif (b < 0) return a << b; +elif (b < 63) return a >> b; +else return a < 0 ? -1 : 0; } +static u64 rol(u64 a, u64 b) { +return b ? (a << (b & 63)) | (a >> (64 - (b & 63))) : a; } +static u64 ror(u64 a, u64 b) { +return b ? (a >> (b & 63)) | (a << (64 - (b & 63))) : a; } +static bool isalpha_(char c) { +return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } +static bool isnum(char c) { +return c >= '0' && c <= '9'; } +static bool isident(char c, bool notfirst) { +if (notfirst) return isalpha_(c) || c == '_' || isnum(c); +else return isalpha_(c) || c == '_' || c == '\\'; } +static bool isspace_(char c) { +return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } +static bool isopen(char c) { +return c == '{' || c == '[' || c == '('; } +static bool isclose(char c) { +return c == ')' || c == ']' || c == '}'; } +static bool isop(char c) { +return c >= '!' && c <= '~' && !(isident(c, 1) || isopen(c) || isclose(c)); } +static ssize_t zlen(const char *str) { +ssize_t len = 0; +while (str[len]) len++; +return len; } +static bool writeok = 1; +static void write_(int fd, const char *str, ssize_t len) { +if (len > 0 && writeok) writeok = write(fd, str, len) == len; } +static void writestr(int fd, const char *str) { +write_(fd, str, zlen(str)); } +static void writepair(int fd, struct slice p) { +write_(fd, p.str, p.len); } +static void writech(int fd, char c) { +write_(fd, &c, 1); } +static void writeline(int fd, const char *str) { +writestr(fd, str), writech(1, '\n'); } +static void writeu64(int fd, u64 x) { +char str[21], *start = str + 20; +for (*start = 0; *--start = x % 10 + '0', x /= 10; ); +writestr(fd, start); } +static void writeint(int fd, s32 x) { +writeu64(fd, x < 0 ? writech(fd, '-'), 0 - (u64)x : (u64)x); } +static void writepad(int fd, s32 x, s32 digits) { +while (--digits) if ((x /= 10) == 0) writech(fd, ' '); } +enum result {OK, BAD, EXCEPTION, OOM}; +char *pages[PAGES]; +s32 pagelen = 0; +struct slice iden; +static enum result pushc(char c) { +if (pagelen > PAGES) return OOM; +if (!iden.str) iden.str = pages[pagelen++] = malloc(CIEL_PAGESIZE); +if (!iden.str) return OOM; +if (iden.len >= MAXLEN) return EXCEPTION; +iden.str[iden.len++] = c; +return OK; } +static struct slice nextiden() { +struct slice ret = iden; +if (!iden.len) return ret; +iden.str += iden.len, iden.len = 0; +while ((size_t)iden.str & 3) *iden.str++ = 0; +if (iden.str - pages[pagelen - 1] > CIEL_PAGESIZE - MAXLEN) iden.str = NULL; +return ret; } +static u32 uhash(u32 h) { +return ++h, h ^= h >> 16, h *= 0xA66C52B5, h; } +static u32 shash(struct slice s) { +u32 h = 1; +s32 i; +for (i = 0; i < s.len; i++) h = uhash(h + s.str[i]); +return h ? h : 1; } +struct entry { +u32 h; +s32 len; +char *str; +void *v; }; +struct entry *space = NULL; +size_t hsize = 0, hmask = 0, hused = 0, hn = 0; +enum hmode {HLOOK, HPUSH, HPULL}; +static enum result hinit(size_t n) { +size_t i; +struct entry empty = {0}; +hn = n, hsize = 1 << n, space = malloc(hsize * sizeof(struct entry)); +for (i = 0; i < hsize && space; i++) space[i] = empty; +return hmask = hsize - 1, hused = 0, space ? OK : OOM; } +static bool entryeq(struct entry a, struct entry b) { +struct slice sa, sb; +sa.str = a.str, sa.len = a.len, sb.str = b.str, sb.len = b.len; +return sliceeq(sa, sb); } +static bool hall(enum hmode m, struct slice k, void **v, u32 h) { +size_t i, j; +struct entry a; +a.h = h = h ? h : shash(k), a.len = k.len, a.str = k.str, a.v = v ? *v : 0; +for (i = 0; i < hsize; i++) { +size_t ind = (h + i) & hmask; +struct entry b = space[ind]; +if (!b.h && m == HPUSH) hused++, space[ind] = a; +if (!b.h) return m == HPUSH; +if (a.h == b.h && entryeq(a, b)) { +if (m == HPULL) { +for (j = 1; j < hsize; j++) { +size_t jnd = (h + i + j) & hmask; +if (b = space[jnd], b.h) { +size_t bdib = (jnd - b.h) & hmask; +if (!bdib) break; +space[ind] = space[jnd], ind = jnd; } } +hused--, space[ind].h = 0; +} elif (m == HPUSH) space[ind].v = *v; +elif (m == HLOOK) *v = b.v; +return 1; } +if (i) { +size_t adib = (ind - a.h) & hmask, bdib = (ind - b.h) & hmask; +if (bdib < adib && m == HPUSH) space[ind] = a, a = b; +elif (bdib < adib) return 0; } } +return 0; } +static enum result hpush(struct slice k, void *v) { +return hall(HPUSH, k, &v, 0) ? OK : EXCEPTION; } +static bool hlook(struct slice k, void **v) { +return hall(HLOOK, k, v, 0); } +static enum result hpull(struct slice k) { +return hall(HPULL, k, NULL, 0) ? OK : EXCEPTION; } +static enum result rehash() { +size_t i; +struct entry *oldspace = space; +enum result hres = hinit(hn + 1); +if (hres != OK) return hres; +for (i = 0; i < hsize / 2; i++) { +struct entry e = oldspace[i]; +struct slice k; +k.str = e.str, k.len = e.len; +if (e.h && !hall(HPUSH, k, &e.v, e.h)) return EXCEPTION; } +return free(oldspace), OK; } +#define OPS \ +X(TOPCOM, 1, right) X(TOPSET, 2, 0) X(TOPLOR, 3, left || right) \ +X(TOPLXOR, 4, !!left ^ !!right) X(TOPLAND, 5, left && right) \ +X(TUNLNOT, 6, 0) \ +X(TOPEQ, 7, left == right) X(TOPGE, 7, left >= right) \ +X(TOPGT, 7, left > right) X(TOPLE, 7, left <= right) \ +X(TOPLT, 7, left < right) X(TOPNE, 7, left != right) \ +X(TOPOR, 8, left | right) X(TOPXOR, 9, left ^ right) \ +X(TOPAND, 10, left & right) X(TOPSHL, 11, ushl(left, right)) \ +X(TOPSHR, 11, ushr(left, right)) X(TOPADD, 12, left + right) \ +X(TOPSUB, 12, left - right) X(TOPDIV, 13, udiv(left, right)) \ +X(TOPMOD, 13, umod(left, right)) X(TOPMUL, 13, left * right) \ +X(TUNBOOL, 14, 0) X(TUNINV, 14, 0) X(TUNNEG, 14, 0) \ +X(TUNNOT, 14, 0) X(TUNPOS, 14, 0) \ +X(TOPPOW, 15, upow(left, right)) X(TOPTYPE, 16, left) \ +X(TOPDIV_S, 0, sdiv(left, right)) \ +X(TOPGE_S, 0, (s64)left >= (s64)right) \ +X(TOPGT_S, 0, (s64)left > (s64)right) \ +X(TOPLE_S, 0, (s64)left <= (s64)right) \ +X(TOPLT_S, 0, (s64)left < (s64)right) \ +X(TOPMOD_S, 0, smod(left, right)) X(TOPPOW_S, 0, spow(left, right)) \ +X(TOPSHL_S, 0, sshl(left, right)) X(TOPSHR_S, 0, sshr(left, right)) +enum ttype { TTUNK, TTSPACE, TTID, TTYPE, TTNUM, TTOP, TTOPEN, TTCLOSE, TTEND }; +enum ctype { +TUNK, TSIGNED, TUNSIGNED, TNUMBIN, TNUMOCT, TNUMDEC, TNUMHEX, +#define X(a, b, c) a, +OPS CTYPE_END +#undef X +}; +static const s8 precedence[] = { +#define X(a, b, c) b, +OPS 0 +#undef X +}; +struct token { +struct token *next, *prev; +struct token *rpnnext; +struct token *pops; +s32 row, col, depth; +struct slice id; +enum ttype tt; +enum ctype ct; +u64 value, modulo; }; +struct token *head = NULL, *tail = NULL, *rpnhead = NULL, *rpntail = NULL; +struct token *stack = NULL; +static enum result pusht(struct token tok) { +struct token *htok = malloc(sizeof(struct token)); +if (!htok) return OOM; +*htok = tok; +if (!head) head = htok; +if (tail) tail->next = htok, htok->prev = tail; +tail = htok; +return OK; } +static enum result popt(struct token *out) { +if (!head) return EXCEPTION; +if (out) *out = *tail; +if (head == tail) free(head), head = NULL, tail = NULL; +elif (tail) tail = tail->prev, free(tail->next); +return OK; } +static struct token *popstack() { +struct token *res = stack; +if (stack) stack = stack->pops, res->pops = NULL; +return res; } +static bool isop1(char c) { +return c == '%' || c == '(' || c == ')' || c == '+' +|| c == ',' || c == '-' || c == '/' || c == ';'; } +static bool isop2(char c) { +return c == '!' || c == '&' || c == '*' || c == '<' || c == '=' +|| c == '>' || c == '^' || c == '|' || c == '~'; } +static bool isop3(char c) { +return c == '!' || c == '<' || c == '>'; } +static bool isanyop(char a, char b, char c) { +return isop1(b) || isop1(c) +|| (b == a && isop2(a)) || (b == '=' && isop3(a)); } +static bool isunary(enum ctype ct) { +return ct == TUNLNOT || (ct >= TUNBOOL && ct <= TUNPOS); } +static bool isright(enum ctype ct) { +return isunary(ct) || ct == TOPSET || ct == TOPPOW; } +const char +*fid = "identifiers cannot be longer than " STR(MAXLEN) " bytes", +*fnotid = "left-hand operand must be an identifier", *fnan = "not a number", +*fopsign = "operands have different signedness", *fmiscl = "missing \")\"", +*fopmod = "operands have different modulos", *farg = "malformed argc/argv", +*fall = "failed to allocate memory", *fnumpre = "invalid numeric prefix", +*fstart = "invalid starting token", *fout8 = "out of range for UTF-8", +*ueof = "unexpected end of file", *femp = "empty token", *fnot = "no token", +*fbignum = "number too large", *fundef = "undefined", *funex = "unexpected", +*fuchar = "unknown character", *funkop = "unknown operator", +*finc8 = "incomplete UTF-8", *fmisop = "missing \"(\"", +*fmop = "missing operand", *ffread = "read failed", *ftype = "invalid type", +*finv8 = "invalid UTF-8"; +u8 c0 = 0, c1 = 0, c2 = 0; +s32 row = 1, col = 1; +s32 utf8 = 0; +char buf[4096]; +ssize_t bufind = 0, bufread = 0, bytesread = 0; +static void writepos(int fd) { +writeint(fd, row), writech(fd, ':'), writeint(fd, col), writech(fd, ' '); } +static enum result usererror(const char *str) { +return writepos(2), writeline(2, str), BAD; } +static enum result tokerror(const char *str, const struct token tok) { +row = tok.row, col = tok.col, writepos(2), writestr(2, str); +return writestr(2, ": \""), writepair(2, tok.id), writestr(2, "\"\n"), BAD; } +static enum result parsedec(struct slice id, u64 *out) { +s32 i, c = id.str[0]; +u64 value = 0; +if (id.len > 20) return EXCEPTION; +if (id.len == 20 && c > '1' && c <= '9') return EXCEPTION; +for (i = 0, c = id.str[i]; i < id.len; c = id.str[++i]) { +if (c < '0' || c > '9') return BAD; +value = value * 10 + (c - '0'); } +if (id.len == 20 && value >> 63 != 1) return EXCEPTION; +return *out = value, OK; } +static enum result parsenum(struct token *tok) { +struct slice id = tok->id; +u64 value = 0; +s32 i, c = id.str[0]; +if (tok->ct == TNUMBIN) { +if (id.len > 66) return EXCEPTION; +for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) { +if (c != '0' && c != '1') return BAD; +value = value * 2 + (c - '0'); } +} elif (tok->ct == TNUMOCT) { +if (id.len > 34) return EXCEPTION; +for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) { +if (c < '0' || c > '7') return BAD; +value = value * 8 + (c - '0'); } +} elif (tok->ct == TNUMDEC) { +enum result pres = parsedec(id, &value); +if (pres != OK) return pres; +} elif (tok->ct == TNUMHEX) { +if (id.len > 18) return EXCEPTION; +for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) { +if (c >= '0' && c <= '9') value = value * 16 + c - '0'; +elif (c >= 'A' && c <= 'F') value = value * 16 + c - 'A' + 10; +elif (c >= 'a' && c <= 'f') value = value * 16 + c - 'a' + 10; +else return BAD; } +} else return EXCEPTION; +if (tok->ct != TNUMDEC && i == 2) return BAD; +return tok->value = value, OK; } +static enum result utfeat(u8 c) { +bool cont = c >= 0x80 && c < 0xC0; +if (utf8 && cont) utf8--; +elif ((utf8 && !cont) || cont) return usererror(finv8); +elif (c < 0x80) return usererror(fuchar); +elif (c >= 0xC0 && c < 0xE0) utf8 = 1; +elif (c >= 0xE0 && c < 0xF0) utf8 = 2; +elif (c >= 0xF0 && c < 0xF6) utf8 = 3; +else return usererror(fout8); +return OK; } +static bool nextchar(int fd) { +if (++bufind >= bufread) { +bytesread = bufread = read(fd, buf, 4096), bufind = 0; +if (bufread < 1) return 0; } +return c0 = buf[bufind], bytesread = 1; } +enum result tokenize(int fd) { +enum ttype tt = TTUNK, newtype = TTUNK; +struct token tok = {0}; +s32 idrow = row, idcol = col, oldutf8 = 0; +while (bytesread = 0, c0 || nextchar(fd)) { +if (c1 == '\r' && c0 == '\n') goto next; +oldutf8 = utf8; +if (isspace_(c0)) newtype = TTSPACE; +elif (isident(c0, tt == TTID)) newtype = TTID; +elif (isop(c0)) newtype = TTOP; +elif (isopen(c0)) newtype = TTOPEN; +elif (isclose(c0)) newtype = TTCLOSE; +elif (isnum(c0)) newtype = TTNUM; +elif (bytesread && utfeat(c0)) return BAD; +else newtype = TTID; +if (tt == TTNUM && newtype == TTID); +elif (tt == TTUNK) tt = newtype; +elif (newtype != tt || isanyop(c2, c1, c0)) break; +if (tt == TTSPACE || pushc(c0) == OK); +elif (iden.len >= MAXLEN) return usererror(fid); +else return OOM; +if (c0 == '\r' || c0 == '\n') col=1, row++; +elif (c0 < 0x80 || c0 >= 0xE0) col++; +next: c2 = c1, c1 = c0, c0 = 0, bytesread = 0; } +if (bufread < 0) return writeline(2, ffread), EXCEPTION; +elif (newtype != TTUNK); +elif (bytesread == 0) tt = TTEND; +if (oldutf8) return usererror(finc8); +tok.id = nextiden(), tok.tt = tt, tok.row = idrow, tok.col = idcol; +return pusht(tok); } +enum result shunt(struct token *tok) { +if (!tok) { +while (stack) { +if (stack->tt == TTOPEN) return tokerror(fmiscl, *stack); +if (!rpntail) rpntail = popstack(); +rpntail = rpntail->rpnnext = popstack(); } +} elif (!rpntail) { +if (tok->tt == TTID || tok->tt == TTNUM) rpnhead = rpntail = tok; +elif (tok->tt == TTOPEN || (tok->tt == TTOP && isunary(tok->ct))) { +tok->pops = stack, stack = tok; +} else return tokerror(fstart, *tok); +} else { +if (tok->tt == TTID || tok->tt == TTNUM || tok->tt == TTYPE) { +rpntail = rpntail->rpnnext = tok; +} elif (tok->tt == TTOPEN) tok->pops = stack, stack = tok; +elif (tok->tt == TTCLOSE) { +while (stack && stack->tt != TTOPEN) { +rpntail = rpntail->rpnnext = popstack(); } +if (!stack) return tokerror(fmisop, *tok); +popstack(); +} elif (tok->tt == TTOP) { +s32 pre = precedence[tok->ct - TOPCOM] + isright(tok->ct); +while (stack && stack->tt == TTOP) { +if (isunary(tok->ct) && stack->ct == TOPPOW) break; +elif (pre > precedence[stack->ct - TOPCOM]) break; +rpntail = rpntail->rpnnext = popstack(); } +tok->pops = stack, stack = tok; +} else return tokerror("TODO", *tok), EXCEPTION; } +return OK; } +static enum ctype decideop(char a, char b) { +static const char *checks = "^^>>;\0+\0\0\0<=\0\0,\0*\0|\0**=\0<\0&&\0\0~\0" +"<<\0\0%\0&\0\0\0-\0>=>\0==~~/\0!!!\0||^\0!="; +static const enum ctype mapping[] = { +TOPLXOR,TOPSHR,TOPTYPE,TOPADD,TUNK,TOPLE,TUNK,TOPCOM,TOPMUL,TOPOR,TOPPOW, +TOPSET,TOPLT,TOPLAND,TUNK,TUNINV,TOPSHL,TUNK,TOPMOD,TOPAND,TUNK,TOPSUB, +TOPGE,TOPGT,TOPEQ,TUNLNOT,TOPDIV,TUNBOOL,TUNNOT,TOPLOR,TOPXOR,TOPNE }; +u32 i = uhash(uhash(0x3561F0B7 + a) + b) & 31; +return (a != checks[2 * i] || b != checks[2 * i + 1]) ? TUNK : mapping[i]; } +static enum ctype decideoplong(char a, char b, char c, char d) { +static const char *checks = +"bor\0pow\0eq\0\0le\0\0shl\0\0\0\0\0not\0or\0\0mod\0band\0\0\0\0\0\0\0" +"\0mul\0\0\0\0\0\0\0\0\0bnotne\0\0pos\0shr\0bool\0\0\0\0xor\0add\0inv\0" +"com\0ge\0\0lt\0\0set\0and\0neg\0sub\0div\0gt\0\0bxor\0\0\0\0type"; +static const enum ctype mapping[] = { +TOPOR,TOPPOW,TOPEQ,TOPLE,TOPSHL,TUNK,TUNLNOT,TOPLOR,TOPMOD,TOPAND,TUNK, +TUNK,TOPMUL,TUNK,TUNK,TUNNOT,TOPNE,TUNPOS,TOPSHR,TUNBOOL,TUNK,TOPLXOR, +TOPADD,TUNINV,TOPCOM,TOPGE,TOPLT,TOPSET,TOPLAND,TUNNEG,TOPSUB,TOPDIV, +TOPGT,TOPXOR,TUNK,TOPTYPE }; +u32 i = uhash(uhash(uhash(uhash(0xC0FF1042 + a) + b) + c) + d) % 36; +const char *ch = checks + 4 * i; +if (a != ch[0] || b != ch[1] || c != ch[2] || d != ch[3]) return TUNK; +return mapping[i]; } +enum result parse() { +enum result res = OK, pres = OK, sres = OK; +s32 depth = 0; +bool hint_op = 0, was_op = 0, expect_unempty = 0; +bool expect_type = 0, expect_nonid = 0, expect_operand = 0; +while ((res = tokenize(0)) == OK) { +struct token tok; +s32 ss = 0; +if (!tail) return writeline(2, fnot), EXCEPTION; +tok = *tail; +if (tok.tt == TTEND) break; +elif (tok.tt == TTSPACE) goto next; +elif (tok.id.len < 1) return tokerror(femp, tok), EXCEPTION; +else ss = 256 * tok.id.str[0] + tok.id.str[1]; +was_op = tok.tt == TTOP; +if (tok.tt == TTID && expect_type && !expect_nonid) { +struct slice idnum; +u64 value = 0; +if (tok.id.str[0] == 'u') tok.ct = TUNSIGNED; +elif (tok.id.str[0] == 's') tok.ct = TSIGNED; +elif (tok.id.str[0] == 'm') tok.ct = TUNSIGNED; +else badtype: return tokerror(ftype, tok); +idnum.str = tok.id.str + 1, idnum.len = tok.id.len - 1; +if ((pres = parsedec(idnum, &value)) != OK) goto badtype; +if (tok.id.str[0] != 'm' && value - 1 > 63) goto badtype; +tok.modulo = (tok.id.str[0] == 'm') ? value : ushl(1, value); +tok.tt = TTYPE; +} elif (tok.tt == TTID && tok.id.str[0] == '\\') { +char a = tok.id.str[1], b = tok.id.str[2], c = tok.id.str[3]; +char d = tok.id.len > 4 ? tok.id.str[4] : '\0'; +tok.tt = TTOP, tok.ct = decideoplong(a, b, c, d); +if (tok.ct == TUNK || tok.id.len > 5) return tokerror(funkop, tok); +} elif (tok.tt == TTID) { +} elif (tok.tt == TTOP) { +tok.ct = decideop(tok.id.str[0], tok.id.str[1]); +if (tok.ct == TOPADD && !hint_op) tok.ct = TUNPOS; +elif (tok.ct == TOPSUB && !hint_op) tok.ct = TUNNEG; +elif (tok.ct == TUNK || tok.id.len > 2) return tokerror(funkop, tok); +} elif (tok.tt == TTNUM && !expect_nonid) { +if (ss == 256 * '0' + 'b') tok.ct = TNUMBIN; +elif (ss == 256 * '0' + 'o') tok.ct = TNUMOCT; +elif (ss == 256 * '0' + 'x') tok.ct = TNUMHEX; +elif (ss == 256 * '0') tok.ct = TNUMDEC; +elif (tok.id.str[0] == '0') return tokerror(fnumpre, tok); +else tok.ct = TNUMDEC; +pres = parsenum(&tok); +if (pres == BAD) return tokerror(fnan, tok); +elif (pres == EXCEPTION) return tokerror(fbignum, tok); +} elif (tok.tt == TTOPEN) { +if (ss == 256 * '(') depth++; +else return tokerror(fuchar, tok); +} elif (tok.tt == TTCLOSE) { +if (ss == 256 * ')') depth--; +else return tokerror(fuchar, tok); +if (expect_unempty) goto unexpected; +} else goto unexpected; +next: +tok.depth = depth; +*tail = tok; +if (tok.tt == TTSPACE) continue; +if (expect_nonid && isunary(tok.ct)) goto unexpected; +if (expect_type && tok.tt != TTYPE) goto unexpected; +expect_nonid = tok.tt == TTID || tok.tt == TTNUM; +expect_type = tok.ct == TOPTYPE; +expect_unempty = tok.tt == TTOPEN; +hint_op = expect_nonid || tok.tt == TTCLOSE || tok.tt == TTYPE; +if (expect_nonid || tok.tt == TTYPE) expect_operand = 0; +elif (isunary(tok.ct)) expect_operand = 1; +elif ((was_op || tok.tt == TTCLOSE) && expect_operand) goto unexpected; +elif (was_op) expect_operand = 1; +if ((sres = shunt(tail)) != OK) return sres; } +if (res == OK) { +if (expect_operand) return usererror(ueof); +return shunt(NULL); } +return (res == OOM) ? writeline(2, fall), OOM : res; +unexpected: return tokerror(funex, *tail); } +static enum result operate1(struct token op, struct token *out) { +struct token *ta = popstack(); +u64 value, right = ta->value; +switch (op.ct) { +case TUNLNOT: value = !right; break; +case TUNNOT: value = !right; break; +case TUNBOOL: value = !!right; break; +case TUNINV: value = ~right; break; +case TUNPOS: value = right; break; +case TUNNEG: value = 0 - right; break; +default: return tokerror("TODO unary operator", op), EXCEPTION; } +out->value = value, out->ct = ta->ct, out->modulo = ta->modulo; +return OK; } +static enum ctype signop(enum ctype ct) { +switch (ct) { +case TOPLT: return TOPLT_S; +case TOPGT: return TOPGT_S; +case TOPLE: return TOPLE_S; +case TOPGE: return TOPGE_S; +case TOPSHL: return TOPSHL_S; +case TOPSHR: return TOPSHR_S; +case TOPDIV: return TOPDIV_S; +case TOPMOD: return TOPMOD_S; +case TOPPOW: return TOPPOW_S; +default: return ct; } } +static enum result operate2(struct token op, struct token *out) { +struct token *tb = popstack(), *ta = popstack(), *other = tb; +u64 value, left = ta->value, right = tb->value; +u64 modleft = ta->modulo, modright = tb->modulo; +u64 signleft = ta->ct == TSIGNED, signright = tb->ct == TSIGNED; +bool inherit = (ta->ct >= TNUMBIN && ta->ct <= TNUMHEX) || +(tb->ct >= TNUMBIN && tb->ct <= TNUMHEX); +if (op.ct == TOPTYPE || inherit); +elif (modleft != modright) return tokerror(fopmod, op); +elif (signleft != signright) return tokerror(fopsign, op); +if (signleft || signright) op.ct = signop(op.ct); +switch (op.ct) { +#define X(a, b, c) case a: value = c; break; +OPS +#undef X +default: return tokerror("TODO binary operator", op), EXCEPTION; } +if (other->ct >= TNUMBIN && other->ct <= TNUMHEX) other = ta; +out->value = value, out->ct = other->ct, out->modulo = other->modulo; +return OK; } +static enum result assign(struct token *out) { +enum result hres; +struct token *tb = popstack(), *ta = popstack(); +if (!hlook(ta->id, (void **)&ta)) { +if ((hres = hpush(ta->id, ta)) != OK) return hres; +if (hused >= hsize * 3 / 4 && (hres = rehash()) == OOM) { +return writeline(2, fall), OOM; +} elif (hres != OK) return hres; +} elif (ta->modulo != tb->modulo) return tokerror(fopmod, *ta); +elif (ta->ct == TSIGNED && tb->ct == TUNSIGNED) return tokerror(fopsign, *ta); +elif (ta->ct == TUNSIGNED && tb->ct == TSIGNED) return tokerror(fopsign, *ta); +*out = *ta, out->tt = TTNUM; +ta->value = tb->value, ta->modulo = tb->modulo, ta->ct = tb->ct; +return OK; } +static void modulate(struct token *tok) { +u64 halfmod = tok->modulo ? tok->modulo / 2 : U64(1) << 63; +tok->value = tok->modulo ? tok->value % tok->modulo : tok->value; +if (tok->ct == TSIGNED && tok->value >= halfmod) tok->value -= tok->modulo; } +enum result evaluate() { +struct token *p = rpnhead; +enum result ores = hinit(1); +if (ores == OOM) return writeline(2, fall), OOM; +while (p) { +struct token tok = *p, *v; +if (tok.tt == TTNUM || tok.tt == TTYPE || tok.tt == TTID) { +p->pops = stack, stack = p; +} elif (tok.tt == TTOP) { +struct token out = {0}; +out.tt = TTNUM, out.ct = TUNSIGNED; +if (!stack) return tokerror(fmop, tok); +if (stack->tt == TTID) { +if (!hlook(stack->id, (void **)&v)) { +return tokerror(fundef, *stack); } +stack->value = v->value, stack->modulo = v->modulo; +stack->ct = v->ct; } +if (isunary(tok.ct)) { +if ((ores = operate1(tok, &out)) != OK) return ores; +} elif (!stack->pops) return tokerror(fmop, tok); +elif (tok.ct == TOPSET) { +if (stack->pops->tt != TTID) return tokerror(fnotid, tok); +if ((ores = assign(&out)) != OK) return ores; +} elif (stack->pops->tt == TTID) { +if (!hlook(stack->pops->id, (void **)&v)) { +return tokerror(fundef, *stack->pops); } +stack->pops->value = v->value, stack->pops->modulo = v->modulo; +stack->pops->ct = v->ct; +if ((ores = operate2(tok, &out)) != OK) return ores; +} elif ((ores = operate2(tok, &out)) != OK) return ores; +modulate(&out); +out.pops = stack; +if (pusht(out) == OOM) return writeline(2, fall), OOM; +stack = tail; +} else return tokerror("TODO type", tok), EXCEPTION; +p = tok.rpnnext; } +if (stack) writeu64(1, stack->value), writech(1, '\n'); +if (stack && stack->pops) return tokerror(funex, *stack->pops), EXCEPTION; +return OK; } +int main(int argc, char **argv) { +int ret = 0; +if (argc <= 0 || !argv || !argv[0]) return writeline(2, farg), BAD; +pages[0] = iden.str = NULL; +iden.len = 0; +if ((ret = parse()) == OK) ret = evaluate(); +if (space) free(space); +while (popt(NULL) == OK); +while (pagelen-->0) free(pages[pagelen]); +return ret; }