backyard/ciel/ciel.c
2022-06-13 07:18:08 +02:00

1068 lines
36 KiB
C

/* ciel: a C Integer Expression Language. {{{1 */
/* or maybe C-Inspired? but not really. */
/*
Copyright (C) 2019 Connor Olding
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* this code uses no #includes, because i've lost my mind. */
/* it also makes it easier to compile with hobbyists' compilers. */
/* this code is somewhat optimized for LOC after preprocessing with noccom. */
/* ( noccom: https://github.com/notwa/rc/blob/master/sh/noccom ) */
/* this code makes extensive use of the comma operator. */
/* i tried to avoid using the preprocessor for any side-effects. */
/* alpha {{{1 */
/* sorry, this was really bugging me: */
#define elif else if
/* this should be plenty. MAXLEN is for identifiers, etc. */
#define PAGES 256
#define CIEL_PAGESIZE 4096
/* 256 is long enough to hold 64 emojis as an identifier. */
#define MAXLEN 256
#define XSTR(e) #e
#define STR(e) XSTR(e)
typedef unsigned char u8;
typedef signed char s8;
typedef unsigned short u16;
typedef signed short s16;
typedef unsigned int u32;
typedef signed int s32;
#if defined(_WIN32) || defined(__i386) || defined(__ARM_ARCH)
typedef unsigned long long u64;
typedef signed long long s64;
#define U64(x) (x ## ull)
#else
typedef unsigned long u64;
typedef signed long s64;
#define U64(x) (x ## ul)
#endif
#if defined(__amd64__) || defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) || defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
typedef u64 size_t;
typedef s64 ssize_t;
#else
typedef u32 size_t;
typedef s32 ssize_t;
#endif
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
_Static_assert(sizeof(u8) == 1, "u8 is incorrect");
_Static_assert(sizeof(s8) == 1, "s8 is incorrect");
_Static_assert(sizeof(u16) == 2, "u16 is incorrect");
_Static_assert(sizeof(s16) == 2, "s16 is incorrect");
_Static_assert(sizeof(u32) == 4, "u32 is incorrect");
_Static_assert(sizeof(s32) == 4, "s32 is incorrect");
_Static_assert(sizeof(u64) == 8, "u64 is incorrect");
_Static_assert(sizeof(s64) == 8, "s64 is incorrect");
_Static_assert(sizeof(ssize_t) == sizeof(void *), "ssize_t is incorrect");
_Static_assert(sizeof(size_t) == sizeof(void *), "size_t is incorrect");
#endif
#ifndef COSMOPOLITAN_H_
/* these are the only external functions used.
* most compilers will handle the linking. */
ssize_t read(int fildes, void *buf, size_t nbyte);
ssize_t write(int fildes, const void *buf, size_t nbyte);
void *malloc(size_t size);
void free(void *ptr);
/* wgtcc doesn't allow pointers to be set to integral zero. */
#define NULL (void*)0
typedef int bool;
#endif
/* beta {{{1 */
struct slice {
char *str;
s32 len;
};
static bool sliceeq(struct slice a, struct slice b) {
/* NOTE: this assumes slices were produced by pushc, defined later. */
s32 i;
if (a.len != b.len) return 0;
if (a.str == b.str) return 1;
for (i = 0; i < (a.len + 3) / 4; i++) {
if (((u32 *)a.str)[i] != ((u32 *)b.str)[i]) return 0;
}
return 1;
}
static u64 udiv(u64 a, u64 b) {
return b ? a / b : 0;
}
static s64 sdiv(s64 a, s64 b) {
return b ? a / b : 0;
}
static u64 umod(u64 a, u64 b) {
return b ? a % b : 0;
}
static s64 smod(s64 a, s64 b) {
/* signed modulo, not a remainder: -1 % 10 is 9, not -1. the divisor
* must be added to the result if the operands have differing signs. */
return b ? a % b + ((a ^ b) < 0 ? b: 0) : 0;
}
static u64 upow(u64 b, u64 e) {
u64 r = 1;
while (e) {
if (e & 1) r *= b;
e >>= 1, b *= b;
}
return r;
}
static s64 spow(s64 b, s64 e) {
if (b == 1) return 1;
elif (b == -1) return e & 1 ? -1 : 1;
if (e < 0) return 0;
/* signed overflow is undefined, so resort to the unsigned operation. */
return upow(b, e);
}
/* shifts in C are full of undefined/implementation-specific behavior, even
* between gcc & clang, so many checks must be made to make them consistent. */
static u64 ushl(u64 a, u64 b) {
return b < 64 ? a << b : 0;
}
static u64 ushr(u64 a, u64 b) {
return b < 64 ? a >> b : 0;
}
static s64 sshl(s64 a, s64 b) {
if (b < -63) return a < 0 ? -1 : 0;
elif (b < 0) return a >> b;
elif (b < 63) return a << b;
else return 0;
}
static s64 sshr(s64 a, s64 b) {
if (b < -63) return 0;
elif (b < 0) return a << b;
elif (b < 63) return a >> b;
else return a < 0 ? -1 : 0;
}
static u64 rol(u64 a, u64 b) {
return b ? (a << (b & 63)) | (a >> (64 - (b & 63))) : a;
}
static u64 ror(u64 a, u64 b) {
return b ? (a >> (b & 63)) | (a << (64 - (b & 63))) : a;
}
static bool isalpha_(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
static bool isnum(char c) {
return c >= '0' && c <= '9';
}
static bool isident(char c, bool notfirst) {
if (notfirst) return isalpha_(c) || c == '_' || isnum(c);
else return isalpha_(c) || c == '_' || c == '\\';
}
static bool isspace_(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
static bool isopen(char c) {
return c == '{' || c == '[' || c == '(';
}
static bool isclose(char c) {
return c == ')' || c == ']' || c == '}';
}
static bool isop(char c) {
/* this is a broad check that includes some extra characters */
return c >= '!' && c <= '~' && !(isident(c, 1) || isopen(c) || isclose(c));
}
/* basically strlen but without the name conflict */
static ssize_t zlen(const char *str) {
ssize_t len = 0;
while (str[len]) len++;
return len;
}
static bool writeok = 1;
static void write_(int fd, const char *str, ssize_t len) {
/* a wrapper around write() that stops writing once writing fails. */
if (len > 0 && writeok) writeok = write(fd, str, len) == len;
}
static void writestr(int fd, const char *str) {
write_(fd, str, zlen(str));
}
static void writepair(int fd, struct slice p) {
write_(fd, p.str, p.len);
}
static void writech(int fd, char c) {
write_(fd, &c, 1);
}
static void writeline(int fd, const char *str) {
writestr(fd, str), writech(1, '\n');
}
static void writeu64(int fd, u64 x) {
char str[21], *start = str + 20;
for (*start = 0; *--start = x % 10 + '0', x /= 10; );
writestr(fd, start);
}
static void writeint(int fd, s32 x) {
writeu64(fd, x < 0 ? writech(fd, '-'), 0 - (u64)x : (u64)x);
}
static void writepad(int fd, s32 x, s32 digits) {
while (--digits) if ((x /= 10) == 0) writech(fd, ' ');
}
/* gamma {{{1 */
/* OK: success, BAD: user's fault, EXCEPTION: dev's fault, OOM: out of memory */
enum result {OK, BAD, EXCEPTION, OOM};
char *pages[PAGES];
s32 pagelen = 0;
struct slice iden; /* iden.str is assumed to be u32-aligned by the hash table */
static enum result pushc(char c) {
if (pagelen > PAGES) return OOM;
if (!iden.str) iden.str = pages[pagelen++] = malloc(CIEL_PAGESIZE);
if (!iden.str) return OOM;
if (iden.len >= MAXLEN) return EXCEPTION;
iden.str[iden.len++] = c;
return OK;
}
static struct slice nextiden() {
/* note to self: it's possible to squeeze idens onto the previous page */
struct slice ret = iden;
if (!iden.len) return ret;
iden.str += iden.len, iden.len = 0;
while ((size_t)iden.str & 3) *iden.str++ = 0;
if (iden.str - pages[pagelen - 1] > CIEL_PAGESIZE - MAXLEN) iden.str = NULL;
return ret;
}
static u32 uhash(u32 h) {
return ++h, h ^= h >> 16, h *= 0xA66C52B5, h;
}
static u32 shash(struct slice s) {
u32 h = 1; /* arbitrary */
s32 i;
for (i = 0; i < s.len; i++) h = uhash(h + s.str[i]);
/* never return 0 to prevent confusion with empty entries. */
return h ? h : 1;
}
struct entry { /* aka bucket */
u32 h; /* hash of the identifier's name */
s32 len; /* the length of the identifier */
char *str; /* the name of the identifier */
void *v; /* the value associated with the identifier */
};
struct entry *space = NULL;
size_t hsize = 0, hmask = 0, hused = 0, hn = 0;
enum hmode {HLOOK, HPUSH, HPULL};
/* NOTE: do *not* use empty/zero strings with these hash table functions. */
static enum result hinit(size_t n) {
size_t i;
struct entry empty = {0};
hn = n, hsize = 1 << n, space = malloc(hsize * sizeof(struct entry));
for (i = 0; i < hsize && space; i++) space[i] = empty;
return hmask = hsize - 1, hused = 0, space ? OK : OOM;
}
static bool entryeq(struct entry a, struct entry b) {
/* TODO: delete this function and inline its only use. */
struct slice sa, sb;
sa.str = a.str, sa.len = a.len, sb.str = b.str, sb.len = b.len;
return sliceeq(sa, sb);
}
static bool hall(enum hmode m, struct slice k, void **v, u32 h) {
/* NOTE: do not use this function directly.
* use hlook, hpush, and hpull instead. */
/* when m is HLOOK, return found */
/* when m is HPUSH, return success */
/* when m is HPULL, return found_and_success */
size_t i, j;
struct entry a;
a.h = h = h ? h : shash(k), a.len = k.len, a.str = k.str, a.v = v ? *v : 0;
for (i = 0; i < hsize; i++) {
size_t ind = (h + i) & hmask;
struct entry b = space[ind];
if (!b.h && m == HPUSH) hused++, space[ind] = a;
if (!b.h) return m == HPUSH;
if (a.h == b.h && entryeq(a, b)) {
if (m == HPULL) {
for (j = 1; j < hsize; j++) {
size_t jnd = (h + i + j) & hmask;
if (b = space[jnd], b.h) {
size_t bdib = (jnd - b.h) & hmask;
if (!bdib) break;
space[ind] = space[jnd], ind = jnd;
}
}
hused--, space[ind].h = 0;
} elif (m == HPUSH) space[ind].v = *v;
elif (m == HLOOK) *v = b.v;
return 1;
}
if (i) {
/* NOTE: using i in place of adib seems to perform the same...? */
size_t adib = (ind - a.h) & hmask, bdib = (ind - b.h) & hmask;
if (bdib < adib && m == HPUSH) space[ind] = a, a = b;
elif (bdib < adib) return 0;
}
}
return 0;
}
static enum result hpush(struct slice k, void *v) {
return hall(HPUSH, k, &v, 0) ? OK : EXCEPTION;
}
static bool hlook(struct slice k, void **v) {
/* returns 1 and points v to the entry's value when a match is found */
return hall(HLOOK, k, v, 0);
}
static enum result hpull(struct slice k) {
return hall(HPULL, k, NULL, 0) ? OK : EXCEPTION;
}
static enum result rehash() {
size_t i;
struct entry *oldspace = space;
enum result hres = hinit(hn + 1);
if (hres != OK) return hres;
for (i = 0; i < hsize / 2; i++) {
struct entry e = oldspace[i];
struct slice k;
k.str = e.str, k.len = e.len;
if (e.h && !hall(HPUSH, k, &e.v, e.h)) return EXCEPTION;
}
return free(oldspace), OK;
}
/* delta {{{1 */
/* operators and their precedence.
* unary operators, TOPSET, and TOPPOW are right-associative.
* operators with a precedence of 0 are variations that are parsed further in.
* the expressions for unary operators are dummied here and handled elsewhere.
*/
#define OPS \
X(TOPCOM, 1, right) X(TOPSET, 2, 0) X(TOPLOR, 3, left || right) \
X(TOPLXOR, 4, !!left ^ !!right) X(TOPLAND, 5, left && right) \
X(TUNLNOT, 6, 0) \
X(TOPEQ, 7, left == right) X(TOPGE, 7, left >= right) \
X(TOPGT, 7, left > right) X(TOPLE, 7, left <= right) \
X(TOPLT, 7, left < right) X(TOPNE, 7, left != right) \
X(TOPOR, 8, left | right) X(TOPXOR, 9, left ^ right) \
X(TOPAND, 10, left & right) X(TOPSHL, 11, ushl(left, right)) \
X(TOPSHR, 11, ushr(left, right)) X(TOPADD, 12, left + right) \
X(TOPSUB, 12, left - right) X(TOPDIV, 13, udiv(left, right)) \
X(TOPMOD, 13, umod(left, right)) X(TOPMUL, 13, left * right) \
X(TUNBOOL, 14, 0) X(TUNINV, 14, 0) X(TUNNEG, 14, 0) \
X(TUNNOT, 14, 0) X(TUNPOS, 14, 0) \
X(TOPPOW, 15, upow(left, right)) X(TOPTYPE, 16, left) \
X(TOPDIV_S, 0, sdiv(left, right)) \
X(TOPGE_S, 0, (s64)left >= (s64)right) \
X(TOPGT_S, 0, (s64)left > (s64)right) \
X(TOPLE_S, 0, (s64)left <= (s64)right) \
X(TOPLT_S, 0, (s64)left < (s64)right) \
X(TOPMOD_S, 0, smod(left, right)) X(TOPPOW_S, 0, spow(left, right)) \
X(TOPSHL_S, 0, sshl(left, right)) X(TOPSHR_S, 0, sshr(left, right))
/* these are the types returned by the lexer/tokenizer. */
/* the lexer doesn't actually return TTYPE, but it's convenient to have here. */
enum ttype { TTUNK, TTSPACE, TTID, TTYPE, TTNUM, TTOP, TTOPEN, TTCLOSE, TTEND };
/* this enum used to refer to C types, but now it's totally different. */
enum ctype {
TUNK, TSIGNED, TUNSIGNED, TNUMBIN, TNUMOCT, TNUMDEC, TNUMHEX,
#define X(a, b, c) a,
OPS CTYPE_END
#undef X
};
static const s8 precedence[] = {
#define X(a, b, c) b,
OPS 0
#undef X
};
struct token {
/* note: it's assumed that 0 is a good default for all of these fields. */
struct token *next, *prev; /* in input stream order */
struct token *rpnnext; /* in shunted reverse polish notation order */
struct token *pops; /* previous token in shunting yard stack */
s32 row, col, depth; /* depth is how many parens deep the token is */
struct slice id; /* the string that produced this token, never modified */
enum ttype tt; /* lexer/tokenizer type */
enum ctype ct; /* parser type */
u64 value, modulo; /* value is currently just for integer constants */
};
struct token *head = NULL, *tail = NULL, *rpnhead = NULL, *rpntail = NULL;
struct token *stack = NULL;
static enum result pusht(struct token tok) {
/* TODO: use pages like with identifiers to reduce malloc calls. */
struct token *htok = malloc(sizeof(struct token));
if (!htok) return OOM;
*htok = tok;
if (!head) head = htok;
if (tail) tail->next = htok, htok->prev = tail;
tail = htok;
return OK;
}
static enum result popt(struct token *out) {
if (!head) return EXCEPTION;
if (out) *out = *tail;
if (head == tail) free(head), head = NULL, tail = NULL;
elif (tail) tail = tail->prev, free(tail->next);
return OK;
}
static struct token *popstack() {
struct token *res = stack;
if (stack) stack = stack->pops, res->pops = NULL;
return res;
}
/* handy list of operators:
! !! !=
& &&
* **
< << <=
= ==
> >> >=
^ ^^
| ||
~ ~~
% + , - / ;
*/
static bool isop1(char c) {
/* unambiguous one-character operators, used to assist the tokenizer */
/* this includes parenthesis because they are similarly unambiguous. */
return c == '%' || c == '(' || c == ')' || c == '+'
|| c == ',' || c == '-' || c == '/' || c == ';';
}
static bool isop2(char c) {
/* operators that have doubled variants, for instance: = has == */
return c == '!' || c == '&' || c == '*' || c == '<' || c == '='
|| c == '>' || c == '^' || c == '|' || c == '~';
}
static bool isop3(char c) {
/* operators that have variants ending with =, for instance: ! has != */
return c == '!' || c == '<' || c == '>';
}
static bool isanyop(char a, char b, char c) {
return isop1(b) || isop1(c)
|| (b == a && isop2(a)) || (b == '=' && isop3(a));
}
static bool isunary(enum ctype ct) {
return ct == TUNLNOT || (ct >= TUNBOOL && ct <= TUNPOS);
}
static bool isright(enum ctype ct) {
/* right-associative operators */
return isunary(ct) || ct == TOPSET || ct == TOPPOW;
}
/* epsilon {{{1 */
const char
*fid = "identifiers cannot be longer than " STR(MAXLEN) " bytes",
*fnotid = "left-hand operand must be an identifier", *fnan = "not a number",
*fopsign = "operands have different signedness", *fmiscl = "missing \")\"",
*fopmod = "operands have different modulos", *farg = "malformed argc/argv",
*fall = "failed to allocate memory", *fnumpre = "invalid numeric prefix",
*fstart = "invalid starting token", *fout8 = "out of range for UTF-8",
*ueof = "unexpected end of file", *femp = "empty token", *fnot = "no token",
*fbignum = "number too large", *fundef = "undefined", *funex = "unexpected",
*fuchar = "unknown character", *funkop = "unknown operator",
*finc8 = "incomplete UTF-8", *fmisop = "missing \"(\"",
*fmop = "missing operand", *ffread = "read failed", *ftype = "invalid type",
*finv8 = "invalid UTF-8";
/* tokenizer state: */
u8 c0 = 0, c1 = 0, c2 = 0; /* input char delay line, c0 being the most recent */
s32 row = 1, col = 1; /* position in file */
s32 utf8 = 0; /* nonzero: number of continuations to expect */
char buf[4096]; /* input read() buffer */
ssize_t bufind = 0, bufread = 0, bytesread = 0; /* bytesread is a facade */
static void writepos(int fd) {
writeint(fd, row), writech(fd, ':'), writeint(fd, col), writech(fd, ' ');
}
static enum result usererror(const char *str) {
return writepos(2), writeline(2, str), BAD;
}
static enum result tokerror(const char *str, const struct token tok) {
/* NOTE: modifying row/col is assuming an unrecoverable error */
row = tok.row, col = tok.col, writepos(2), writestr(2, str);
return writestr(2, ": \""), writepair(2, tok.id), writestr(2, "\"\n"), BAD;
}
/*
static void tokdebug(struct token tok) {
s32 ctlen = 9;
switch (tok.tt) {
case TTUNK: writestr(1, " TTUNK"); break;
case TTSPACE: writestr(1, "TTSPACE"); break;
case TTID: writestr(1, " TTID"); break;
case TTYPE: writestr(1, " TTYPE"); break;
case TTNUM: writestr(1, " TTNUM"); break;
case TTOP: writestr(1, " TTOP"); break;
case TTOPEN: writestr(1, " TTOPEN"); break;
case TTCLOSE: writestr(1, "TTCLOSE"); break;
case TTEND: writestr(1, " TTEND"); break;
default: writestr(1, " ???");
}
writech(1, '(');
switch (tok.ct) {
case TUNK: writestr(1, "TUNK, "); break;
case TSIGNED: writestr(1, "TSIGNED, "); break;
case TUNSIGNED: writestr(1, "TUNSIGNED,"); break;
case TNUMBIN: writestr(1, "TNUMBIN, "); break;
case TNUMOCT: writestr(1, "TNUMOCT, "); break;
case TNUMDEC: writestr(1, "TNUMDEC, "); break;
case TNUMHEX: writestr(1, "TNUMHEX, "); break;
#define X(a, b, c) case a: writestr(1, STR(a) ","); ctlen = zlen(STR(a)); break;
OPS
#undef X
default: writestr(1, "???, "); break;
}
while (ctlen < 9) writech(1, ' '), ctlen++;
writeint(1, tok.depth), writestr(1, ") at ");
writepad(1, tok.row, 2), writepad(1, tok.col, 2);
writeint(1, tok.row), writech(1, ':'), writeint(1, tok.col);
writestr(1, ": \""); writepair(1, tok.id);
writestr(1, "\" ("), writeu64(1, tok.value);
if (tok.modulo) writestr(1, " % "), writeu64(1, tok.modulo);
writestr(1, ")\n");
}
*/
static enum result parsedec(struct slice id, u64 *out) {
/* NOTE: assumes id.len > 0 */
s32 i, c = id.str[0];
u64 value = 0;
if (id.len > 20) return EXCEPTION;
if (id.len == 20 && c > '1' && c <= '9') return EXCEPTION;
for (i = 0, c = id.str[i]; i < id.len; c = id.str[++i]) {
if (c < '0' || c > '9') return BAD;
value = value * 10 + (c - '0');
}
if (id.len == 20 && value >> 63 != 1) return EXCEPTION; /* overflow */
return *out = value, OK;
}
static enum result parsenum(struct token *tok) {
/* this is basically atoi, strtoll, parseInt, or whatever you call it. */
/* this variant only handles decimal with no prefix,
* and binary, octal, hex with the prefixes 0b, 0o, and 0x respectively. */
/* NOTE: assumes id.len > 0 */
/* NOTE: it's possible to collapse TNUMBIN, TNUMOCT, TNUMDEC to save LOC,
* but this could result in some really awful machine code. */
struct slice id = tok->id; /* not necessary but speeds up access */
u64 value = 0; /* likewise */
s32 i, c = id.str[0];
if (tok->ct == TNUMBIN) {
if (id.len > 66) return EXCEPTION;
for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) {
if (c != '0' && c != '1') return BAD;
value = value * 2 + (c - '0');
}
} elif (tok->ct == TNUMOCT) {
if (id.len > 34) return EXCEPTION;
for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) {
if (c < '0' || c > '7') return BAD;
value = value * 8 + (c - '0');
}
} elif (tok->ct == TNUMDEC) {
enum result pres = parsedec(id, &value);
if (pres != OK) return pres;
} elif (tok->ct == TNUMHEX) {
if (id.len > 18) return EXCEPTION;
for (i = 2, c = id.str[i]; i < id.len; c = id.str[++i]) {
if (c >= '0' && c <= '9') value = value * 16 + c - '0';
elif (c >= 'A' && c <= 'F') value = value * 16 + c - 'A' + 10;
elif (c >= 'a' && c <= 'f') value = value * 16 + c - 'a' + 10;
else return BAD;
}
} else return EXCEPTION;
if (tok->ct != TNUMDEC && i == 2) return BAD; /* prefix but no number */
return tok->value = value, OK;
}
static enum result utfeat(u8 c) {
/* handle UTF-8 encoded bytes, assuming nothing else has matched so far */
bool cont = c >= 0x80 && c < 0xC0;
if (utf8 && cont) utf8--;
elif ((utf8 && !cont) || cont) return usererror(finv8);
elif (c < 0x80) return usererror(fuchar);
elif (c >= 0xC0 && c < 0xE0) utf8 = 1;
elif (c >= 0xE0 && c < 0xF0) utf8 = 2;
elif (c >= 0xF0 && c < 0xF6) utf8 = 3;
else return usererror(fout8);
return OK;
}
static bool nextchar(int fd) {
if (++bufind >= bufread) {
bytesread = bufread = read(fd, buf, 4096), bufind = 0;
if (bufread < 1) return 0;
}
return c0 = buf[bufind], bytesread = 1;
}
enum result tokenize(int fd) {
/* split strings in fd into tokens with rough types, AKA a lexer */
enum ttype tt = TTUNK, newtype = TTUNK;
struct token tok = {0};
s32 idrow = row, idcol = col, oldutf8 = 0;
while (bytesread = 0, c0 || nextchar(fd)) {
if (c1 == '\r' && c0 == '\n') goto next; /* "\r\n" is treated as one */
oldutf8 = utf8;
if (isspace_(c0)) newtype = TTSPACE;
elif (isident(c0, tt == TTID)) newtype = TTID;
elif (isop(c0)) newtype = TTOP;
elif (isopen(c0)) newtype = TTOPEN;
elif (isclose(c0)) newtype = TTCLOSE;
elif (isnum(c0)) newtype = TTNUM;
elif (bytesread && utfeat(c0)) return BAD;
else newtype = TTID; /* unicode is always treated as an identifier */
if (tt == TTNUM && newtype == TTID); /* so that "0x123" is one token */
elif (tt == TTUNK) tt = newtype;
elif (newtype != tt || isanyop(c2, c1, c0)) break; /* token boundary */
if (tt == TTSPACE || pushc(c0) == OK); /* construct token string */
elif (iden.len >= MAXLEN) return usererror(fid);
else return OOM;
if (c0 == '\r' || c0 == '\n') col=1, row++;
elif (c0 < 0x80 || c0 >= 0xE0) col++; /* correctly handle UTF-8 */
next: c2 = c1, c1 = c0, c0 = 0, bytesread = 0;
}
if (bufread < 0) return writeline(2, ffread), EXCEPTION;
elif (newtype != TTUNK); /* parses the final character correctly */
elif (bytesread == 0) tt = TTEND;
if (oldutf8) return usererror(finc8);
tok.id = nextiden(), tok.tt = tt, tok.row = idrow, tok.col = idcol;
return pusht(tok);
}
enum result shunt(struct token *tok) {
/* perform the shunting yard algorithm to produce reverse polish notation */
if (!tok) { /* finish up */
while (stack) {
if (stack->tt == TTOPEN) return tokerror(fmiscl, *stack);
if (!rpntail) rpntail = popstack(); /* only first iteration */
rpntail = rpntail->rpnnext = popstack();
}
} elif (!rpntail) { /* handle first token */
if (tok->tt == TTID || tok->tt == TTNUM) rpnhead = rpntail = tok;
elif (tok->tt == TTOPEN || (tok->tt == TTOP && isunary(tok->ct))) {
tok->pops = stack, stack = tok;
} else return tokerror(fstart, *tok);
} else { /* handle everything inbetween */
if (tok->tt == TTID || tok->tt == TTNUM || tok->tt == TTYPE) {
rpntail = rpntail->rpnnext = tok;
} elif (tok->tt == TTOPEN) tok->pops = stack, stack = tok;
elif (tok->tt == TTCLOSE) {
while (stack && stack->tt != TTOPEN) {
rpntail = rpntail->rpnnext = popstack();
}
if (!stack) return tokerror(fmisop, *tok);
popstack();
} elif (tok->tt == TTOP) {
s32 pre = precedence[tok->ct - TOPCOM] + isright(tok->ct);
/* NOTE: TOPTYPE occurs before TUNNEG, is this okay? */
while (stack && stack->tt == TTOP) {
if (isunary(tok->ct) && stack->ct == TOPPOW) break;
elif (pre > precedence[stack->ct - TOPCOM]) break;
rpntail = rpntail->rpnnext = popstack();
}
tok->pops = stack, stack = tok;
} else return tokerror("TODO", *tok), EXCEPTION;
}
return OK;
}
static enum ctype decideop(char a, char b) {
static const char *checks = "^^>>;\0+\0\0\0<=\0\0,\0*\0|\0**=\0<\0&&\0\0~\0"
"<<\0\0%\0&\0\0\0-\0>=>\0==~~/\0!!!\0||^\0!=";
static const enum ctype mapping[] = {
TOPLXOR,TOPSHR,TOPTYPE,TOPADD,TUNK,TOPLE,TUNK,TOPCOM,TOPMUL,TOPOR,TOPPOW,
TOPSET,TOPLT,TOPLAND,TUNK,TUNINV,TOPSHL,TUNK,TOPMOD,TOPAND,TUNK,TOPSUB,
TOPGE,TOPGT,TOPEQ,TUNLNOT,TOPDIV,TUNBOOL,TUNNOT,TOPLOR,TOPXOR,TOPNE
};
/* perfect hashing found by brute force: */
u32 i = uhash(uhash(0x3561F0B7 + a) + b) & 31;
return (a != checks[2 * i] || b != checks[2 * i + 1]) ? TUNK : mapping[i];
}
static enum ctype decideoplong(char a, char b, char c, char d) {
static const char *checks =
"bor\0pow\0eq\0\0le\0\0shl\0\0\0\0\0not\0or\0\0mod\0band\0\0\0\0\0\0\0"
"\0mul\0\0\0\0\0\0\0\0\0bnotne\0\0pos\0shr\0bool\0\0\0\0xor\0add\0inv\0"
"com\0ge\0\0lt\0\0set\0and\0neg\0sub\0div\0gt\0\0bxor\0\0\0\0type";
static const enum ctype mapping[] = {
TOPOR,TOPPOW,TOPEQ,TOPLE,TOPSHL,TUNK,TUNLNOT,TOPLOR,TOPMOD,TOPAND,TUNK,
TUNK,TOPMUL,TUNK,TUNK,TUNNOT,TOPNE,TUNPOS,TOPSHR,TUNBOOL,TUNK,TOPLXOR,
TOPADD,TUNINV,TOPCOM,TOPGE,TOPLT,TOPSET,TOPLAND,TUNNEG,TOPSUB,TOPDIV,
TOPGT,TOPXOR,TUNK,TOPTYPE
};
/* perfect hashing found by brute force: */
u32 i = uhash(uhash(uhash(uhash(0xC0FF1042 + a) + b) + c) + d) % 36;
const char *ch = checks + 4 * i;
if (a != ch[0] || b != ch[1] || c != ch[2] || d != ch[3]) return TUNK;
return mapping[i];
}
enum result parse() {
/* restructure the tokens from tokenize and parse literals */
enum result res = OK, pres = OK, sres = OK;
s32 depth = 0;
bool hint_op = 0, was_op = 0, expect_unempty = 0;
bool expect_type = 0, expect_nonid = 0, expect_operand = 0;
while ((res = tokenize(0)) == OK) {
struct token tok;
s32 ss = 0;
if (!tail) return writeline(2, fnot), EXCEPTION;
tok = *tail;
if (tok.tt == TTEND) break;
elif (tok.tt == TTSPACE) goto next;
elif (tok.id.len < 1) return tokerror(femp, tok), EXCEPTION;
else ss = 256 * tok.id.str[0] + tok.id.str[1];
was_op = tok.tt == TTOP;
if (tok.tt == TTID && expect_type && !expect_nonid) {
struct slice idnum;
u64 value = 0;
if (tok.id.str[0] == 'u') tok.ct = TUNSIGNED;
elif (tok.id.str[0] == 's') tok.ct = TSIGNED;
elif (tok.id.str[0] == 'm') tok.ct = TUNSIGNED;
else badtype: return tokerror(ftype, tok);
idnum.str = tok.id.str + 1, idnum.len = tok.id.len - 1;
if ((pres = parsedec(idnum, &value)) != OK) goto badtype;
if (tok.id.str[0] != 'm' && value - 1 > 63) goto badtype;
/* note that 1 << 64 = 0 */
tok.modulo = (tok.id.str[0] == 'm') ? value : ushl(1, value);
tok.tt = TTYPE;
} elif (tok.tt == TTID && tok.id.str[0] == '\\') {
char a = tok.id.str[1], b = tok.id.str[2], c = tok.id.str[3];
char d = tok.id.len > 4 ? tok.id.str[4] : '\0';
tok.tt = TTOP, tok.ct = decideoplong(a, b, c, d);
if (tok.ct == TUNK || tok.id.len > 5) return tokerror(funkop, tok);
} elif (tok.tt == TTID) {
} elif (tok.tt == TTOP) {
tok.ct = decideop(tok.id.str[0], tok.id.str[1]);
if (tok.ct == TOPADD && !hint_op) tok.ct = TUNPOS;
elif (tok.ct == TOPSUB && !hint_op) tok.ct = TUNNEG;
elif (tok.ct == TUNK || tok.id.len > 2) return tokerror(funkop, tok);
} elif (tok.tt == TTNUM && !expect_nonid) {
if (ss == 256 * '0' + 'b') tok.ct = TNUMBIN;
elif (ss == 256 * '0' + 'o') tok.ct = TNUMOCT;
elif (ss == 256 * '0' + 'x') tok.ct = TNUMHEX;
elif (ss == 256 * '0') tok.ct = TNUMDEC;
elif (tok.id.str[0] == '0') return tokerror(fnumpre, tok);
else tok.ct = TNUMDEC;
pres = parsenum(&tok);
if (pres == BAD) return tokerror(fnan, tok);
elif (pres == EXCEPTION) return tokerror(fbignum, tok);
} elif (tok.tt == TTOPEN) {
if (ss == 256 * '(') depth++;
else return tokerror(fuchar, tok);
} elif (tok.tt == TTCLOSE) {
if (ss == 256 * ')') depth--;
else return tokerror(fuchar, tok);
if (expect_unempty) goto unexpected;
} else goto unexpected; /* or unimplemented */
next:
tok.depth = depth;
*tail = tok; /* update depth, value, etc. */
if (tok.tt == TTSPACE) continue;
if (expect_nonid && isunary(tok.ct)) goto unexpected;
if (expect_type && tok.tt != TTYPE) goto unexpected;
expect_nonid = tok.tt == TTID || tok.tt == TTNUM;
expect_type = tok.ct == TOPTYPE;
expect_unempty = tok.tt == TTOPEN;
hint_op = expect_nonid || tok.tt == TTCLOSE || tok.tt == TTYPE;
if (expect_nonid || tok.tt == TTYPE) expect_operand = 0;
elif (isunary(tok.ct)) expect_operand = 1;
elif ((was_op || tok.tt == TTCLOSE) && expect_operand) goto unexpected;
elif (was_op) expect_operand = 1;
if ((sres = shunt(tail)) != OK) return sres;
}
if (res == OK) {
if (expect_operand) return usererror(ueof);
return shunt(NULL);
}
return (res == OOM) ? writeline(2, fall), OOM : res;
unexpected: return tokerror(funex, *tail);
}
static enum result operate1(struct token op, struct token *out) {
/* execute unary operators. */
struct token *ta = popstack();
u64 value, right = ta->value;
switch (op.ct) {
case TUNLNOT: value = !right; break;
case TUNNOT: value = !right; break;
case TUNBOOL: value = !!right; break;
case TUNINV: value = ~right; break;
case TUNPOS: value = right; break;
case TUNNEG: value = 0 - right; break;
default: return tokerror("TODO unary operator", op), EXCEPTION;
}
out->value = value, out->ct = ta->ct, out->modulo = ta->modulo;
return OK;
}
static enum ctype signop(enum ctype ct) {
switch (ct) {
case TOPLT: return TOPLT_S;
case TOPGT: return TOPGT_S;
case TOPLE: return TOPLE_S;
case TOPGE: return TOPGE_S;
case TOPSHL: return TOPSHL_S;
case TOPSHR: return TOPSHR_S;
case TOPDIV: return TOPDIV_S;
case TOPMOD: return TOPMOD_S;
case TOPPOW: return TOPPOW_S;
default: return ct;
}
}
static enum result operate2(struct token op, struct token *out) {
/* execute binary operators. */
struct token *tb = popstack(), *ta = popstack(), *other = tb;
u64 value, left = ta->value, right = tb->value;
u64 modleft = ta->modulo, modright = tb->modulo;
u64 signleft = ta->ct == TSIGNED, signright = tb->ct == TSIGNED;
bool inherit = (ta->ct >= TNUMBIN && ta->ct <= TNUMHEX) ||
(tb->ct >= TNUMBIN && tb->ct <= TNUMHEX);
if (op.ct == TOPTYPE || inherit);
elif (modleft != modright) return tokerror(fopmod, op);
elif (signleft != signright) return tokerror(fopsign, op);
if (signleft || signright) op.ct = signop(op.ct);
switch (op.ct) {
#define X(a, b, c) case a: value = c; break;
OPS
#undef X
default: return tokerror("TODO binary operator", op), EXCEPTION;
}
if (other->ct >= TNUMBIN && other->ct <= TNUMHEX) other = ta;
out->value = value, out->ct = other->ct, out->modulo = other->modulo;
return OK;
}
static enum result assign(struct token *out) {
enum result hres;
struct token *tb = popstack(), *ta = popstack();
if (!hlook(ta->id, (void **)&ta)) {
if ((hres = hpush(ta->id, ta)) != OK) return hres;
if (hused >= hsize * 3 / 4 && (hres = rehash()) == OOM) {
return writeline(2, fall), OOM;
} elif (hres != OK) return hres;
} elif (ta->modulo != tb->modulo) return tokerror(fopmod, *ta);
elif (ta->ct == TSIGNED && tb->ct == TUNSIGNED) return tokerror(fopsign, *ta);
elif (ta->ct == TUNSIGNED && tb->ct == TSIGNED) return tokerror(fopsign, *ta);
*out = *ta, out->tt = TTNUM;
ta->value = tb->value, ta->modulo = tb->modulo, ta->ct = tb->ct;
return OK;
}
static void modulate(struct token *tok) {
u64 halfmod = tok->modulo ? tok->modulo / 2 : U64(1) << 63;
tok->value = tok->modulo ? tok->value % tok->modulo : tok->value;
if (tok->ct == TSIGNED && tok->value >= halfmod) tok->value -= tok->modulo;
}
enum result evaluate() {
/* runs the program produced by parse() */
struct token *p = rpnhead;
enum result ores = hinit(1);
if (ores == OOM) return writeline(2, fall), OOM;
while (p) {
struct token tok = *p, *v;
if (tok.tt == TTNUM || tok.tt == TTYPE || tok.tt == TTID) {
p->pops = stack, stack = p;
} elif (tok.tt == TTOP) {
struct token out = {0};
out.tt = TTNUM, out.ct = TUNSIGNED;
if (!stack) return tokerror(fmop, tok);
/* if (stack->pops) tokdebug(*stack->pops); */
/* tokdebug(tok), tokdebug(*stack); */
if (stack->tt == TTID) {
/* check the right-hand side */
if (!hlook(stack->id, (void **)&v)) {
return tokerror(fundef, *stack);
}
stack->value = v->value, stack->modulo = v->modulo;
stack->ct = v->ct;
}
if (isunary(tok.ct)) {
if ((ores = operate1(tok, &out)) != OK) return ores;
} elif (!stack->pops) return tokerror(fmop, tok);
elif (tok.ct == TOPSET) {
if (stack->pops->tt != TTID) return tokerror(fnotid, tok);
if ((ores = assign(&out)) != OK) return ores;
} elif (stack->pops->tt == TTID) {
/* check the left-hand side */
if (!hlook(stack->pops->id, (void **)&v)) {
return tokerror(fundef, *stack->pops);
}
stack->pops->value = v->value, stack->pops->modulo = v->modulo;
stack->pops->ct = v->ct;
if ((ores = operate2(tok, &out)) != OK) return ores;
} elif ((ores = operate2(tok, &out)) != OK) return ores;
modulate(&out);
/* tokdebug(out), writech(1, '\n'); */
out.pops = stack;
if (pusht(out) == OOM) return writeline(2, fall), OOM;
stack = tail;
} else return tokerror("TODO type", tok), EXCEPTION;
p = tok.rpnnext;
}
if (stack) writeu64(1, stack->value), writech(1, '\n');
/* if (stack && stack->pops) return tokdebug(*stack->pops), EXCEPTION; */
if (stack && stack->pops) return tokerror(funex, *stack->pops), EXCEPTION;
return OK;
}
int main(int argc, char **argv) {
int ret = 0;
/* writeu64(2, sizeof(struct entry)), writech(2, '\n'); */
if (argc <= 0 || !argv || !argv[0]) return writeline(2, farg), BAD;
/* TODO:
* -u flag: disables input buffering.
* -n flag: returns 1 from main if expression value is nonzero.
* -z flag: returns 1 from main if expression value is zero.
*/
pages[0] = iden.str = NULL;
iden.len = 0;
if ((ret = parse()) == OK) ret = evaluate();
if (space) free(space);
while (popt(NULL) == OK);
while (pagelen-->0) free(pages[pagelen]);
return ret;
}