diff options
author | Jo-Philipp Wich <jo@mein.io> | 2021-02-17 18:28:01 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-17 18:28:01 +0100 |
commit | 679270fd3afa93cca84ab31b5041922037fec0c5 (patch) | |
tree | e55752bae52bf7eed38b91c42e990a8b116b6621 | |
parent | 77580a893283f2bde7ab46496bd3a3d7b2fc6784 (diff) | |
parent | 14e46b8e225dc329f4e14777960b10abb8a09699 (diff) |
Merge pull request #2 from jow-/rewrite
treewide: rewrite ucode interpreter
42 files changed, 9571 insertions, 11096 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9edfc6a..2046392 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,23 +32,7 @@ IF(JSONC_FOUND) INCLUDE_DIRECTORIES(${JSONC_INCLUDE_DIRS}) ENDIF() -ADD_CUSTOM_COMMAND( - OUTPUT contrib/lemon - DEPENDS contrib/lemon.c contrib/lempar.c - COMMAND gcc -o contrib/lemon contrib/lemon.c - COMMENT "Generating lemon parser generator" -) - -ADD_CUSTOM_COMMAND( - OUTPUT parser.c - DEPENDS parser.y contrib/lemon - COMMAND ./contrib/lemon parser.y - COMMENT "Generating parser.c" -) - -SET_PROPERTY(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "lemon;parser.h;parser.out") -SET_SOURCE_FILES_PROPERTIES("parser.c" PROPERTIES GENERATED TRUE COMPILE_FLAGS -Wno-error=unused-but-set-variable) -ADD_EXECUTABLE(ucode main.c ast.c lexer.c parser.c eval.c lib.c) +ADD_EXECUTABLE(ucode main.c lexer.c lib.c vm.c chunk.c value.c object.c compiler.c source.c) TARGET_LINK_LIBRARIES(ucode ${json}) CHECK_FUNCTION_EXISTS(dlopen DLOPEN_FUNCTION_EXISTS) @@ -1,694 +0,0 @@ -/* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "ast.h" -#include "lib.h" -#include "lexer.h" -#include "parser.h" - -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <string.h> -#include <math.h> -#include <regex.h> - -static size_t uc_ext_types_count = 0; -static struct uc_extended_type *uc_ext_types = NULL; - -uint32_t -uc_new_op(struct uc_state *s, int type, struct json_object *val, ...) -{ - struct uc_op *newop; - uint32_t child; - int n_op = 0; - va_list ap; - - if ((s->poolsize + 1) == UINT32_MAX) { - fprintf(stderr, "Program too large\n"); - exit(127); - } - - s->pool = xrealloc(s->pool, (s->poolsize + 1) * sizeof(*newop)); - - newop = &s->pool[s->poolsize]; - memset(newop, 0, sizeof(*newop)); - - newop->is_first = !s->poolsize; - newop->is_op = true; - newop->type = type; - newop->val = val; - - va_start(ap, val); - - while (n_op < OPn_NUM && (child = va_arg(ap, uint32_t)) != UINT32_MAX) - newop->tree.operand[n_op++] = child; - - va_end(ap); - - return s->poolsize++; -} - -uint32_t -uc_wrap_op(struct uc_state *state, uint32_t parent, ...) -{ - uint32_t child; - int n_op = 0; - va_list ap; - - va_start(ap, parent); - - while (n_op < OPn_NUM && (child = va_arg(ap, uint32_t)) != UINT32_MAX) - OPn(parent, n_op++) = child; - - va_end(ap); - - return parent; -} - -uint32_t -uc_append_op(struct uc_state *state, uint32_t a, uint32_t b) -{ - uint32_t tail_off, next_off; - - for (tail_off = a, next_off = OP_NEXT(tail_off); - next_off != 0; - tail_off = next_off, next_off = OP_NEXT(next_off)) - ; - - OP_NEXT(tail_off) = b; - - return a; -} - -static int -double_rounded_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); - double d = json_object_get_double(v); - - if (isnan(d)) - return sprintbuf(pb, strict ? "\"NaN\"" : "NaN"); - - if (d == INFINITY) - return sprintbuf(pb, strict ? "1e309" : "Infinity"); - - if (d == -INFINITY) - return sprintbuf(pb, strict ? "-1e309" : "-Infinity"); - - return sprintbuf(pb, "%g", d); -} - -struct json_object * -uc_new_double(double v) -{ - struct json_object *d = json_object_new_double(v); - - if (!d) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - json_object_set_serializer(d, double_rounded_to_string, NULL, NULL); - - return d; -} - -static int -null_obj_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - return sprintbuf(pb, "null"); -} - -struct json_object * -uc_new_null(void) -{ - struct json_object *d = json_object_new_boolean(false); - - if (!d) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - json_object_set_serializer(d, null_obj_to_string, NULL, NULL); - - return d; -} - -static void -obj_free(struct json_object *v, void *ud) -{ - struct uc_op *op = json_object_get_userdata(v); - - json_object_put(op->tag.proto); - free(ud); -} - -struct json_object * -uc_new_object(struct json_object *proto) { - struct json_object *val = xjs_new_object(); - struct uc_op *op = xalloc(sizeof(*op)); - - op->val = val; - op->type = T_LBRACE; - op->tag.proto = json_object_get(proto); - - json_object_set_serializer(val, NULL, op, obj_free); - - return op->val; -} - -static void -re_free(struct json_object *v, void *ud) -{ - struct uc_op *op = ud; - - regfree((regex_t *)op->tag.data); - free(op); -} - -static int -re_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); - struct uc_op *op = json_object_get_userdata(v); - struct json_object *s; - const char *p; - size_t len; - - sprintbuf(pb, "%s/", strict ? "\"" : ""); - - s = xjs_new_string((char *)op + sizeof(*op) + sizeof(regex_t)); - - if (strict) - for (p = json_object_to_json_string(s) + 1, len = strlen(p) - 1; len > 0; len--, p++) - sprintbuf(pb, "%c", *p); - else - sprintbuf(pb, "%s", json_object_get_string(s)); - - json_object_put(s); - - return sprintbuf(pb, "/%s%s%s%s", - op->is_reg_global ? "g" : "", - op->is_reg_icase ? "i" : "", - op->is_reg_newline ? "s" : "", - strict ? "\"" : ""); -} - -struct json_object * -uc_new_regexp(const char *source, bool icase, bool newline, bool global, char **err) { - int cflags = REG_EXTENDED, res; - struct uc_op *op; - regex_t *re; - size_t len; - - op = xalloc(sizeof(*op) + sizeof(*re) + strlen(source) + 1); - re = (regex_t *)((char *)op + sizeof(*op)); - strcpy((char *)op + sizeof(*op) + sizeof(*re), source); - - if (icase) - cflags |= REG_ICASE; - - if (newline) - cflags |= REG_NEWLINE; - - op->type = T_REGEXP; - op->tag.data = re; - op->is_reg_icase = icase; - op->is_reg_global = global; - op->is_reg_newline = newline; - - res = regcomp(re, source, cflags); - - if (res != 0) { - len = regerror(res, re, NULL, 0); - *err = xalloc(len); - - regerror(res, re, *err, len); - free(op); - - return NULL; - } - - op->val = xjs_new_object(); - - if (!op->val) { - free(op); - - return NULL; - } - - json_object_set_serializer(op->val, re_to_string, op, re_free); - - return op->val; -} - -static void -func_free(struct json_object *v, void *ud) -{ - struct uc_op *op = ud; - struct uc_function *fn = op->tag.data; - - json_object_put(fn->args); - uc_release_scope(fn->parent_scope); - - free(op); -} - -static int -func_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT), rest; - struct uc_op *op = json_object_get_userdata(v); - struct uc_function *fn = op->tag.data; - size_t i, len; - - if (op->is_arrow) - sprintbuf(pb, "%s(", strict ? "\"" : ""); - else - sprintbuf(pb, "%sfunction%s%s(", - strict ? "\"" : "", - fn->name ? " " : "", - fn->name ? fn->name : ""); - - if (fn->args) { - len = json_object_array_length(fn->args); - rest = (len > 1) && json_object_is_type(json_object_array_get_idx(fn->args, len - 1), json_type_null); - - for (i = 0; i < len - rest; i++) { - sprintbuf(pb, "%s%s%s", - i ? ", " : "", - rest && i == len - 2 ? "..." : "", - json_object_get_string(json_object_array_get_idx(fn->args, i))); - } - } - - return sprintbuf(pb, ") %s{ ... }%s", - op->is_arrow ? "=> " : "", - strict ? "\"" : ""); -} - -struct json_object * -uc_new_func(struct uc_state *state, uint32_t decl, struct uc_scope *scope) -{ - struct json_object *val = xjs_new_object(); - uint32_t name_off, args_off, arg_off; - struct uc_function *fn; - struct uc_op *op; - size_t sz; - - sz = ALIGN(sizeof(*op)) + ALIGN(sizeof(*fn)); - - name_off = OPn(decl, 0); - args_off = OPn(decl, 1); - - if (name_off) - sz += ALIGN(json_object_get_string_len(OP_VAL(name_off)) + 1); - - op = xalloc(sz); - - fn = (void *)op + ALIGN(sizeof(*op)); - fn->entry = OPn(decl, 2); - - if (name_off) - fn->name = strcpy((char *)fn + ALIGN(sizeof(*fn)), json_object_get_string(OP_VAL(name_off))); - - if (args_off) { - fn->args = xjs_new_array(); - - for (arg_off = args_off; arg_off != 0; arg_off = OP_NEXT(arg_off)) { - json_object_array_add(fn->args, json_object_get(OP_VAL(arg_off))); - - /* if the last argument is a rest one (...arg), add extra null entry */ - if (OP_IS_ELLIP(arg_off)) { - json_object_array_add(fn->args, NULL); - break; - } - } - } - - fn->source = state->function ? state->function->source : NULL; - fn->parent_scope = uc_acquire_scope(scope); - - op->val = val; - op->type = T_FUNC; - op->is_arrow = (OP_TYPE(decl) == T_ARROW); - op->tag.data = fn; - - json_object_set_serializer(val, func_to_string, op, func_free); - - return op->val; -} - -static void -exception_free(struct json_object *v, void *ud) -{ - free(ud); -} - -static int -exception_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - return sprintbuf(pb, "%s", json_object_get_string(json_object_object_get(v, "message"))); -} - -static void -add_stacktrace(struct json_object *a, struct uc_function *function, size_t off) { - struct json_object *o = xjs_new_object(); - size_t line = 1, rlen = 0, len; - bool truncated = false; - char buf[256]; - - if (function->source->filename) - json_object_object_add(o, "filename", xjs_new_string(function->source->filename)); - - if (function->name) - json_object_object_add(o, "function", xjs_new_string(function->name)); - - if (function->source->fp) { - fseek(function->source->fp, 0, SEEK_SET); - - while (fgets(buf, sizeof(buf), function->source->fp)) { - len = strlen(buf); - rlen += len; - - if (rlen > off) { - json_object_object_add(o, "line", xjs_new_int64(line)); - json_object_object_add(o, "byte", xjs_new_int64(len - (rlen - off) + (truncated ? sizeof(buf) : 0) + 1)); - break; - } - - truncated = (len > 0 && buf[len-1] != '\n'); - line += !truncated; - } - } - - json_object_array_add(a, o); -} - -__attribute__((format(printf, 3, 4))) struct json_object * -uc_new_exception(struct uc_state *s, uint32_t off, const char *fmt, ...) -{ - struct uc_callstack *callstack, *prevcall, here = {}; - struct json_object *a; - struct uc_op *op; - va_list ap; - char *p; - int len; - - op = xalloc(sizeof(*op)); - op->type = T_EXCEPTION; - op->val = xjs_new_object(); - op->off = off; - op->tag.data = s->function ? s->function->source : s->source; - - a = xjs_new_array(); - - here.next = s->callstack; - here.function = s->function; - here.off = off; - - for (callstack = &here, prevcall = NULL; callstack != NULL; - prevcall = callstack, callstack = callstack->next) - if (callstack->off && callstack->function && callstack->function->source && - (!prevcall || callstack->function != prevcall->function || callstack->off != prevcall->off)) - add_stacktrace(a, callstack->function, callstack->off); - - json_object_object_add(op->val, "stacktrace", a); - - va_start(ap, fmt); - len = xvasprintf(&p, fmt, ap); - va_end(ap); - - json_object_object_add(op->val, "message", xjs_new_string_len(p, len)); - free(p); - - if (s->exception) - json_object_put(s->exception); - - s->exception = op->val; - - json_object_set_serializer(op->val, exception_to_string, op, exception_free); - - return json_object_get(op->val); -} - -static void -scope_free(struct json_object *v, void *ud) -{ - struct uc_scope *sc = ud; - - if (sc->parent) { - uc_release_scope(json_object_get_userdata(sc->parent)); - sc->parent = NULL; - } - - sc->scope = NULL; -} - -void -uc_release_scope(struct uc_scope *sc) -{ - if (sc->refs == 0) - abort(); - - sc->refs--; - - if (sc->refs == 0) - json_object_put(sc->scope); -} - -struct uc_scope * -uc_acquire_scope(struct uc_scope *sc) -{ - sc->refs++; - - return sc; -} - -struct uc_scope * -uc_new_scope(struct uc_state *s, struct uc_scope *parent) -{ - struct uc_scope *sc; - - sc = xalloc(sizeof(*sc)); - sc->scope = xjs_new_object(); - - if (parent) - sc->parent = uc_acquire_scope(parent)->scope; - - json_object_set_userdata(sc->scope, sc, scope_free); - - sc->next = s->scopelist; - s->scopelist = sc; - - return uc_acquire_scope(sc); -} - -struct uc_scope * -uc_parent_scope(struct uc_scope *scope) -{ - return json_object_get_userdata(scope->parent); -} - -static void -uc_reset(struct uc_state *s) -{ - json_object_put(s->exception); - s->exception = NULL; - - free(s->lex.lookbehind); - free(s->lex.buf); - memset(&s->lex, 0, sizeof(s->lex)); -} - -void -uc_free(struct uc_state *s) -{ - struct uc_source *src, *src_next; - struct uc_scope *sc, *sc_next; - struct json_object *scj; - size_t n; - - if (s) { - json_object_put(s->ctx); - - for (n = 0; n < s->poolsize; n++) - json_object_put(s->pool[n].val); - - free(s->pool); - - s->pool = NULL; - s->poolsize = 0; - - uc_reset(s); - - json_object_put(s->rval); - - for (sc = s->scopelist; sc; sc = sc->next) { - scj = sc->scope; - sc->scope = NULL; - json_object_put(scj); - } - - for (sc = s->scopelist; sc; sc = sc_next) { - sc_next = sc->next; - free(sc); - } - - for (src = s->sources; src; src = src_next) { - src_next = src->next; - - if (src->fp) - fclose(src->fp); - - free(src->filename); - free(src); - } - } - - while (uc_ext_types_count > 0) - json_object_put(uc_ext_types[--uc_ext_types_count].proto); - - free(uc_ext_types); - free(s); -} - -struct json_object * -uc_parse(struct uc_state *state, FILE *fp) -{ - void *pParser; - uint32_t off; - - uc_reset(state); - - pParser = ParseAlloc(xalloc); - - while (state->lex.state != UT_LEX_EOF) { - off = uc_get_token(state, fp); - - if (state->exception) - goto out; - - if (off) - Parse(pParser, OP_TYPE(off), off, state); - - if (state->exception) - goto out; - } - - Parse(pParser, 0, 0, state); - -out: - ParseFree(pParser, free); - - return state->exception; -} - -bool -uc_register_extended_type(const char *name, struct json_object *proto, void (*freefn)(void *)) -{ - uc_ext_types = xrealloc(uc_ext_types, (uc_ext_types_count + 1) * sizeof(*uc_ext_types)); - uc_ext_types[uc_ext_types_count].name = name; - uc_ext_types[uc_ext_types_count].free = freefn; - uc_ext_types[uc_ext_types_count].proto = proto; - uc_ext_types_count++; - - return true; -} - -static int -uc_extended_type_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); - struct uc_op *op = json_object_get_userdata(v); - struct uc_extended_type *et; - - if (!op) - return 0; - - et = &uc_ext_types[op->tag.type - 1]; - - return sprintbuf(pb, "%s<%s %p>%s", - strict ? "\"" : "", - et->name, op->tag.data, - strict ? "\"" : ""); -} - -static void -uc_extended_type_free(struct json_object *v, void *ud) -{ - struct uc_op *op = json_object_get_userdata(v); - struct uc_extended_type *et; - - if (!op) - return; - - et = &uc_ext_types[op->tag.type - 1]; - - if (et->free && op->tag.data) - et->free(op->tag.data); - - json_object_put(op->tag.proto); - free(ud); -} - -struct json_object * -uc_set_extended_type(struct json_object *v, const char *name, void *data) -{ - struct uc_extended_type *et = NULL; - struct uc_op *op; - size_t n; - - for (n = 0; n < uc_ext_types_count; n++) { - if (!strcmp(name, uc_ext_types[n].name)) { - et = &uc_ext_types[n]; - break; - } - } - - if (!et) - return NULL; - - op = xalloc(sizeof(*op)); - op->val = v; - op->type = T_RESSOURCE; - op->tag.proto = json_object_get(et->proto); - op->tag.type = n + 1; - op->tag.data = data; - - json_object_set_serializer(op->val, uc_extended_type_to_string, op, uc_extended_type_free); - - return op->val; -} - -void ** -uc_get_extended_type(struct json_object *v, const char *name) -{ - struct uc_op *op = json_object_get_userdata(v); - size_t n = op ? op->tag.type : 0; - struct uc_extended_type *et; - - if (!op || op->type != T_RESSOURCE || n == 0 || n > uc_ext_types_count) - return NULL; - - et = &uc_ext_types[n - 1]; - - if (name && strcmp(et->name, name)) - return NULL; - - return &op->tag.data; -} @@ -1,359 +0,0 @@ -/* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef __AST_H_ -#define __AST_H_ - -#include <stddef.h> -#include <stdint.h> -#include <stdlib.h> -#include <stdio.h> -#include <stdbool.h> -#include <stdarg.h> -#include <string.h> - -#ifdef JSONC - #include <json.h> -#else - #include <json-c/json.h> -#endif - -#define ALIGN(x) (((x) + sizeof(size_t) - 1) & -sizeof(size_t)) - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) -#endif - -#define JSON_C_TO_STRING_STRICT (1<<31) - -enum uc_lex_state { - UT_LEX_IDENTIFY_BLOCK, - UT_LEX_BLOCK_COMMENT_START, - UT_LEX_BLOCK_EXPRESSION_START, - UT_LEX_BLOCK_EXPRESSION_EMIT_TAG, - UT_LEX_BLOCK_STATEMENT_START, - UT_LEX_BLOCK_COMMENT, - UT_LEX_IDENTIFY_TOKEN, - UT_LEX_PARSE_TOKEN, - UT_LEX_EOF -}; - -struct uc_op { - uint16_t type; - uint16_t is_first:1; - uint16_t is_op:1; - uint16_t is_overflow:1; - uint16_t is_postfix:1; - uint16_t is_for_in:1; - uint16_t is_list:1; - uint16_t is_reg_icase:1; - uint16_t is_reg_newline:1; - uint16_t is_reg_global:1; - uint16_t is_ellip:1; - uint16_t is_arrow:1; - uint32_t off; - struct json_object *val; - union { - struct { - struct json_object *proto; - size_t type; - void *data; - uint32_t off; - } tag; - struct { - uint32_t next; - uint32_t operand[4]; - } tree; - }; -}; - -struct uc_scope { - struct uc_scope *next; - struct json_object *scope, *parent; - size_t refs; -}; - -struct uc_source { - struct uc_source *next; - char *filename; - uint32_t off; - FILE *fp; -}; - -struct uc_function { - char *name; - union { - struct json_object *args; - void *cfn; - }; - struct uc_scope *parent_scope; - struct uc_source *source; - uint32_t entry; -}; - -struct uc_callstack { - struct uc_callstack *next; - struct uc_function *function; - struct uc_scope *scope; - struct json_object *ctx; - uint32_t off; -}; - -struct uc_state { - struct uc_op *pool; - uint32_t poolsize; - uint32_t main; - uint8_t srand_called:1; - uint8_t trim_blocks:1; - uint8_t lstrip_blocks:1; - uint8_t strict_declarations:1; - struct { - enum uc_lex_state state; - uint8_t eof:1; - uint8_t skip_leading_whitespace:1; - uint8_t skip_leading_newline:1; - uint8_t within_expression_block:1; - uint8_t within_statement_block:1; - uint8_t semicolon_emitted:1; - uint8_t expect_div:1; - uint8_t is_escape:1; - size_t buflen; - char *buf, *bufstart, *bufend; - size_t lookbehindlen; - char *lookbehind; - const void *tok; - char esc[5]; - uint8_t esclen; - int lead_surrogate; - size_t lastoff; - } lex; - struct json_object *ctx, *rval, *exception; - struct uc_scope *scopelist, *scope; - struct uc_source *sources, *source; - struct uc_callstack *callstack; - struct uc_function *function; - size_t calldepth; -}; - -struct uc_extended_type { - const char *name; - struct json_object *proto; - void (*free)(void *); -}; - -static inline bool uc_is_type(struct json_object *val, int type) { - struct uc_op *tag = json_object_get_userdata(val); - - return (tag && tag->type == type); -}; - -#define OP(idx) (&state->pool[idx]) -#define OP_POS(idx) OP(idx)->off -#define OP_VAL(idx) OP(idx)->val -#define OP_TYPE(idx) OP(idx)->type -#define OP_NEXT(idx) OP(idx)->tree.next -#define OP_IS_LIST(idx) OP(idx)->is_list -#define OP_IS_ELLIP(idx) OP(idx)->is_ellip -#define OP_IS_FOR_IN(idx) OP(idx)->is_for_in -#define OP_IS_POSTFIX(idx) OP(idx)->is_postfix - -#define OPn_NUM ARRAY_SIZE(((struct uc_op *)NULL)->tree.operand) -#define OPn(idx, n) OP(idx)->tree.operand[n] -#define OPn_POS(idx, n) OP(OPn(idx, n))->off -#define OPn_VAL(idx, n) OP(OPn(idx, n))->val -#define OPn_TYPE(idx, n) OP(OPn(idx, n))->type -#define OPn_IS_LIST(idx, n) OP(OPn(idx, n))->is_list -#define OPn_IS_OVERFLOW(idx, n) OP(OPn(idx, n))->is_overflow - - - -uint32_t uc_new_op(struct uc_state *s, int type, struct json_object *val, ...); -uint32_t uc_wrap_op(struct uc_state *s, uint32_t parent, ...); -uint32_t uc_append_op(struct uc_state *s, uint32_t a, uint32_t b); -struct json_object *uc_parse(struct uc_state *s, FILE *fp); -void uc_free(struct uc_state *s); - -struct json_object *uc_new_func(struct uc_state *s, uint32_t decl, struct uc_scope *scope); -struct json_object *uc_new_object(struct json_object *proto); -struct json_object *uc_new_double(double v); -struct json_object *uc_new_null(void); -struct json_object *uc_new_regexp(const char *source, bool icase, bool newline, bool global, char **err); - -__attribute__((format(printf, 3, 0))) -struct json_object *uc_new_exception(struct uc_state *s, uint32_t off, const char *fmt, ...); - -struct uc_scope *uc_new_scope(struct uc_state *s, struct uc_scope *parent); -struct uc_scope *uc_parent_scope(struct uc_scope *scope); -struct uc_scope *uc_acquire_scope(struct uc_scope *scope); -void uc_release_scope(struct uc_scope *scope); - -bool uc_register_extended_type(const char *name, struct json_object *proto, void (*freefn)(void *)); -struct json_object *uc_set_extended_type(struct json_object *v, const char *name, void *data); -void **uc_get_extended_type(struct json_object *val, const char *name); - -void *ParseAlloc(void *(*mfunc)(size_t)); -void Parse(void *pParser, int type, uint32_t off, struct uc_state *s); -void ParseFree(void *pParser, void (*ffunc)(void *)); - - -static inline uint32_t getrefcnt(struct json_object *v) { - struct { - enum json_type o_type; - uint32_t _ref_count; - } *spy = (void *)v; - - return spy ? spy->_ref_count : 0; -} - -static inline void *xalloc(size_t size) { - void *ptr = calloc(1, size); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline void *xrealloc(void *ptr, size_t size) { - ptr = realloc(ptr, size); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline char *xstrdup(const char *s) { - char *ptr = strdup(s); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline struct json_object *xjs_new_object(void) { - struct json_object *ptr = json_object_new_object(); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline struct json_object *xjs_new_array(void) { - struct json_object *ptr = json_object_new_array(); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline struct json_object *xjs_new_int64(int64_t n) { - struct json_object *ptr = json_object_new_int64(n); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline struct json_object *xjs_new_string(const char *s) { - struct json_object *ptr = json_object_new_string(s); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline struct json_object *xjs_new_string_len(const char *s, size_t len) { - struct json_object *ptr = json_object_new_string_len(s, len); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - -static inline struct json_object *xjs_new_boolean(bool v) { - struct json_object *ptr = json_object_new_boolean(v); - - if (!ptr) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return ptr; -} - - -static inline struct json_tokener *xjs_new_tokener(void) { - struct json_tokener *tok = json_tokener_new(); - - if (!tok) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return tok; -} - -static inline int xasprintf(char **strp, const char *fmt, ...) { - va_list ap; - int len; - - va_start(ap, fmt); - len = vasprintf(strp, fmt, ap); - va_end(ap); - - if (len == -1) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return len; -} - -static inline int xvasprintf(char **strp, const char *fmt, va_list ap) { - int len = vasprintf(strp, fmt, ap); - - if (len == -1) { - fprintf(stderr, "Out of memory\n"); - abort(); - } - - return len; -} - -#endif /* __AST_H_ */ @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <assert.h> + +#include "chunk.h" +#include "util.h" + +#define OFFSETINFO_BITS (sizeof(((uc_offsetinfo *)NULL)->entries[0]) * 8) +#define OFFSETINFO_BYTE_BITS 3 +#define OFFSETINFO_INSN_BITS (OFFSETINFO_BITS - OFFSETINFO_BYTE_BITS) +#define OFFSETINFO_MAX_BYTES ((1 << OFFSETINFO_BYTE_BITS) - 1) +#define OFFSETINFO_MAX_INSNS ((1 << OFFSETINFO_INSN_BITS) - 1) +#define OFFSETINFO_NUM_BYTES(n) ((n) & OFFSETINFO_MAX_BYTES) +#define OFFSETINFO_NUM_INSNS(n) ((n) >> OFFSETINFO_BYTE_BITS) +#define OFFSETINFO_ENCODE(line, insns) ((line & OFFSETINFO_MAX_BYTES) | (((insns) << OFFSETINFO_BYTE_BITS) & ~OFFSETINFO_MAX_BYTES)) + + +void +uc_chunk_init(uc_chunk *chunk) +{ + chunk->count = 0; + chunk->entries = NULL; + + chunk->ehranges.count = 0; + chunk->ehranges.entries = NULL; + + chunk->debuginfo.offsets.count = 0; + chunk->debuginfo.offsets.entries = NULL; + + chunk->debuginfo.variables.count = 0; + chunk->debuginfo.variables.entries = NULL; + + uc_vallist_init(&chunk->constants); + uc_vallist_init(&chunk->debuginfo.varnames); +} + +void +uc_chunk_free(uc_chunk *chunk) +{ + uc_vector_clear(chunk); + uc_vector_clear(&chunk->ehranges); + uc_vallist_free(&chunk->constants); + + uc_vector_clear(&chunk->debuginfo.offsets); + uc_vector_clear(&chunk->debuginfo.variables); + uc_vallist_free(&chunk->debuginfo.varnames); + + uc_chunk_init(chunk); +} + +size_t +uc_chunk_add(uc_chunk *chunk, uint8_t byte, size_t offset) +{ + uc_offsetinfo *offsets = &chunk->debuginfo.offsets; + size_t i; + + uc_vector_grow(chunk); + + chunk->entries[chunk->count] = byte; + + /* offset info is encoded in bytes, for each byte, the first three bits + * specify the number of source text bytes to advance since the last entry + * and the remaining five bits specify the amount of instructions belonging + * to any given source text offset */ + if (offset > 0 || offsets->count == 0) { + /* if this offset is farther than seven (2 ** 3 - 1) bytes apart from + * the last one, we need to emit intermediate "jump" bytes with zero + * instructions each */ + for (i = offset; i > OFFSETINFO_MAX_BYTES; i -= OFFSETINFO_MAX_BYTES) { + /* advance by 7 bytes */ + uc_vector_grow(offsets); + offsets->entries[offsets->count++] = OFFSETINFO_ENCODE(OFFSETINFO_MAX_BYTES, 0); + } + + /* advance by `i` bytes, count one instruction */ + uc_vector_grow(offsets); + offsets->entries[offsets->count++] = OFFSETINFO_ENCODE(i, 1); + } + + /* update instruction count at current offset entry */ + else { + /* since we encode the per-offset instruction count in five bits, we + * can only count up to 31 instructions. If we exceed that limit, + * emit another offset entry with the initial three bits set to zero */ + if (OFFSETINFO_NUM_INSNS(offsets->entries[offsets->count - 1]) >= OFFSETINFO_MAX_INSNS) { + /* advance by 0 bytes, count one instruction */ + uc_vector_grow(offsets); + offsets->entries[offsets->count++] = OFFSETINFO_ENCODE(0, 1); + } + else { + offsets->entries[offsets->count - 1] = OFFSETINFO_ENCODE( + OFFSETINFO_NUM_BYTES(offsets->entries[offsets->count - 1]), + OFFSETINFO_NUM_INSNS(offsets->entries[offsets->count - 1]) + 1 + ); + } + } + + return chunk->count++; +} + +void +uc_chunk_pop(uc_chunk *chunk) +{ + uc_offsetinfo *offsets = &chunk->debuginfo.offsets; + int n_insns; + + assert(chunk->count > 0); + + chunk->count--; + + n_insns = OFFSETINFO_NUM_INSNS(offsets->entries[offsets->count - 1]); + + if (n_insns > 0) { + offsets->entries[offsets->count - 1] = OFFSETINFO_ENCODE( + OFFSETINFO_NUM_BYTES(offsets->entries[offsets->count - 1]), + n_insns - 1 + ); + } + else { + offsets->count--; + } +} + +struct json_object * +uc_chunk_get_constant(uc_chunk *chunk, size_t idx) +{ + return uc_vallist_get(&chunk->constants, idx); +} + +ssize_t +uc_chunk_add_constant(uc_chunk *chunk, struct json_object *val) +{ + return uc_vallist_add(&chunk->constants, val); +} + +size_t +uc_chunk_debug_get_srcpos(uc_chunk *chunk, size_t off) +{ + uc_offsetinfo *offsets = &chunk->debuginfo.offsets; + size_t i, inum = 0, lnum = 0; + + if (!offsets->count) + return 0; + + for (i = 0; i < offsets->count && inum < off; i++) { + lnum += OFFSETINFO_NUM_BYTES(offsets->entries[i]); + inum += OFFSETINFO_NUM_INSNS(offsets->entries[i]); + } + + return lnum; +} + +void +uc_chunk_debug_add_variable(uc_chunk *chunk, size_t from, size_t to, size_t slot, bool upval, json_object *name) +{ + uc_variables *variables = &chunk->debuginfo.variables; + uc_value_list *varnames = &chunk->debuginfo.varnames; + + assert(slot <= ((size_t)-1 / 2)); + + if (upval) + slot += (size_t)-1 / 2; + + uc_vector_grow(variables); + + variables->entries[variables->count].nameidx = uc_vallist_add(varnames, name); + variables->entries[variables->count].slot = slot; + variables->entries[variables->count].from = from; + variables->entries[variables->count].to = to; + + variables->count++; +} + +json_object * +uc_chunk_debug_get_variable(uc_chunk *chunk, size_t off, size_t slot, bool upval) +{ + uc_variables *variables = &chunk->debuginfo.variables; + uc_value_list *varnames = &chunk->debuginfo.varnames; + json_object *name = NULL; + size_t i; + + assert(slot <= ((size_t)-1 / 2)); + + if (upval) + slot += (size_t)-1 / 2; + + for (i = 0; i < variables->count; i++) { + if (variables->entries[i].slot != slot || + variables->entries[i].from > off || + variables->entries[i].to < off) + continue; + + name = uc_vallist_get(varnames, variables->entries[i].nameidx); + } + + return name; +} @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __CHUNK_H_ +#define __CHUNK_H_ + +#include <stdint.h> +#include <stddef.h> + +#include "value.h" +#include "util.h" + + +typedef struct { + size_t from, to, target, slot; +} uc_ehrange; + +typedef struct { + size_t from, to, slot, nameidx; +} uc_varrange; + +uc_declare_vector(uc_ehranges, uc_ehrange); +uc_declare_vector(uc_variables, uc_varrange); +uc_declare_vector(uc_offsetinfo, uint8_t); + +typedef struct { + size_t count; + uint8_t *entries; + uc_value_list constants; + uc_ehranges ehranges; + struct { + uc_variables variables; + uc_value_list varnames; + uc_offsetinfo offsets; + } debuginfo; +} uc_chunk; + +void uc_chunk_init(uc_chunk *chunk); +void uc_chunk_free(uc_chunk *chunk); +size_t uc_chunk_add(uc_chunk *chunk, uint8_t byte, size_t line); + +ssize_t uc_chunk_add_constant(uc_chunk *chunk, struct json_object *value); +struct json_object *uc_chunk_get_constant(uc_chunk *chunk, size_t idx); +void uc_chunk_pop(uc_chunk *chunk); + +size_t uc_chunk_debug_get_srcpos(uc_chunk *chunk, size_t off); +void uc_chunk_debug_add_variable(uc_chunk *chunk, size_t from, size_t to, size_t slot, bool upval, json_object *name); +json_object *uc_chunk_debug_get_variable(uc_chunk *chunk, size_t off, size_t slot, bool upval); + +#endif /* __CHUNK_H_ */ diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..70e65d8 --- /dev/null +++ b/compiler.c @@ -0,0 +1,2622 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <assert.h> + +#include "compiler.h" +#include "chunk.h" +#include "vm.h" /* I_* */ +#include "source.h" +#include "lib.h" /* format_error_context() */ + +static void uc_compiler_compile_unary(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_binary(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_paren(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_call(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_post_inc(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_constant(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_comma(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_labelexpr(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_function(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_and(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_or(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_dot(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_subscript(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_ternary(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_array(uc_compiler *compiler, bool assignable); +static void uc_compiler_compile_object(uc_compiler *compiler, bool assignable); + +static void uc_compiler_compile_declaration(uc_compiler *compiler); +static void uc_compiler_compile_statement(uc_compiler *compiler); +static void uc_compiler_compile_expstmt(uc_compiler *compiler); + +static uc_parse_rule +uc_compiler_parse_rules[TK_ERROR + 1] = { + [TK_LPAREN] = { uc_compiler_compile_paren, uc_compiler_compile_call, P_CALL }, + [TK_SUB] = { uc_compiler_compile_unary, uc_compiler_compile_binary, P_ADD }, + [TK_ADD] = { uc_compiler_compile_unary, uc_compiler_compile_binary, P_ADD }, + [TK_COMPL] = { uc_compiler_compile_unary, NULL, P_UNARY }, + [TK_NOT] = { uc_compiler_compile_unary, NULL, P_UNARY }, + [TK_INC] = { uc_compiler_compile_unary, uc_compiler_compile_post_inc, P_INC }, + [TK_DEC] = { uc_compiler_compile_unary, uc_compiler_compile_post_inc, P_INC }, + [TK_DIV] = { NULL, uc_compiler_compile_binary, P_MUL }, + [TK_MUL] = { NULL, uc_compiler_compile_binary, P_MUL }, + [TK_MOD] = { NULL, uc_compiler_compile_binary, P_MUL }, + [TK_NUMBER] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_DOUBLE] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_STRING] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_BOOL] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_NULL] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_THIS] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_REGEXP] = { uc_compiler_compile_constant, NULL, P_NONE }, + [TK_COMMA] = { NULL, uc_compiler_compile_comma, P_COMMA }, + [TK_LABEL] = { uc_compiler_compile_labelexpr, NULL, P_NONE }, + [TK_FUNC] = { uc_compiler_compile_function, NULL, P_NONE }, + [TK_AND] = { NULL, uc_compiler_compile_and, P_AND }, + [TK_OR] = { NULL, uc_compiler_compile_or, P_OR }, + [TK_BOR] = { NULL, uc_compiler_compile_binary, P_BOR }, + [TK_BXOR] = { NULL, uc_compiler_compile_binary, P_BXOR }, + [TK_BAND] = { NULL, uc_compiler_compile_binary, P_BAND }, + [TK_EQ] = { NULL, uc_compiler_compile_binary, P_EQUAL }, + [TK_EQS] = { NULL, uc_compiler_compile_binary, P_EQUAL }, + [TK_NE] = { NULL, uc_compiler_compile_binary, P_EQUAL }, + [TK_NES] = { NULL, uc_compiler_compile_binary, P_EQUAL }, + [TK_LT] = { NULL, uc_compiler_compile_binary, P_COMPARE }, + [TK_LE] = { NULL, uc_compiler_compile_binary, P_COMPARE }, + [TK_GT] = { NULL, uc_compiler_compile_binary, P_COMPARE }, + [TK_GE] = { NULL, uc_compiler_compile_binary, P_COMPARE }, + [TK_IN] = { NULL, uc_compiler_compile_binary, P_COMPARE }, + [TK_LSHIFT] = { NULL, uc_compiler_compile_binary, P_SHIFT }, + [TK_RSHIFT] = { NULL, uc_compiler_compile_binary, P_SHIFT }, + [TK_DOT] = { NULL, uc_compiler_compile_dot, P_CALL }, + [TK_LBRACK] = { uc_compiler_compile_array, uc_compiler_compile_subscript, P_CALL }, + [TK_QMARK] = { NULL, uc_compiler_compile_ternary, P_TERNARY }, + [TK_LBRACE] = { uc_compiler_compile_object, NULL, P_NONE }, +}; + +static ssize_t +uc_compiler_declare_local(uc_compiler *compiler, json_object *name); + +static ssize_t +uc_compiler_initialize_local(uc_compiler *compiler); + +static void +uc_compiler_init(uc_compiler *compiler, const char *name, size_t srcpos, uc_source *source) +{ + json_object *varname = xjs_new_string("(callee)"); + + compiler->scope_depth = 0; + + compiler->function = uc_function_new(name, srcpos, source); + + compiler->locals.count = 0; + compiler->locals.entries = NULL; + + compiler->upvals.count = 0; + compiler->upvals.entries = NULL; + + compiler->patchlist = NULL; + + compiler->parent = NULL; + + compiler->current_srcpos = srcpos; + + /* reserve stack slot 0 */ + uc_compiler_declare_local(compiler, varname); + uc_compiler_initialize_local(compiler); + uc_value_put(varname); +} + +static uc_chunk * +uc_compiler_current_chunk(uc_compiler *compiler) +{ + return &compiler->function->chunk; +} + +__attribute__((format(printf, 3, 0))) static void +uc_compiler_syntax_error(uc_compiler *compiler, size_t off, const char *fmt, ...) +{ + size_t line = 0, byte = 0, len = 0; + char *context = NULL; + char *s, *tmp; + va_list ap; + + if (compiler->parser->synchronizing) + return; + + compiler->parser->synchronizing = true; + + if (!off) + off = uc_function_get_srcpos(compiler->function, + uc_compiler_current_chunk(compiler)->count); + + if (off) { + byte = off; + line = uc_source_get_line(compiler->function->source, &byte); + + format_error_context(&context, &len, compiler->function->source, NULL, off); + } + + va_start(ap, fmt); + xvasprintf(&s, fmt, ap); + va_end(ap); + + xasprintf(&tmp, "Syntax error: %s\n", s); + free(s); + s = tmp; + + if (line) { + xasprintf(&tmp, "%sIn line %zu, byte %zu:\n", s, line, byte); + free(s); + s = tmp; + } + + if (context) { + xasprintf(&tmp, "%s%s\n\n", s, context); + free(context); + free(s); + s = tmp; + } + + if (compiler->parser->error) { + xasprintf(&tmp, "%s%s", compiler->parser->error, s); + free(compiler->parser->error); + free(s); + compiler->parser->error = tmp; + } + else { + compiler->parser->error = s; + } +} + +static size_t +uc_compiler_set_srcpos(uc_compiler *compiler, size_t srcpos) +{ + size_t delta; + + /* ensure that lines counts are strictly increasing */ + assert(srcpos == 0 || srcpos >= compiler->current_srcpos); + + delta = srcpos ? srcpos - compiler->current_srcpos : 0; + compiler->current_srcpos += delta; + + return delta; +} + +static void +uc_compiler_parse_advance(uc_compiler *compiler) +{ + uc_value_put(compiler->parser->prev.val); + compiler->parser->prev = compiler->parser->curr; + + while (true) { + compiler->parser->curr = *uc_lexer_next_token(&compiler->parser->lex); + + if (compiler->parser->curr.type != TK_ERROR) + break; + + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, "%s", + json_object_get_string(compiler->parser->curr.val)); + + uc_value_put(compiler->parser->curr.val); + compiler->parser->curr.val = NULL; + } +} + +static void +uc_compiler_parse_consume(uc_compiler *compiler, uc_tokentype_t type) +{ + if (compiler->parser->curr.type == type) { + uc_compiler_parse_advance(compiler); + + return; + } + + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting %s", uc_get_tokenname(type)); +} + +static bool +uc_compiler_parse_check(uc_compiler *compiler, uc_tokentype_t type) +{ + return (compiler->parser->curr.type == type); +} + +static bool +uc_compiler_parse_match(uc_compiler *compiler, uc_tokentype_t type) +{ + if (!uc_compiler_parse_check(compiler, type)) + return false; + + uc_compiler_parse_advance(compiler); + + return true; +} + +static void +uc_compiler_parse_synchronize(uc_compiler *compiler) +{ + compiler->parser->synchronizing = false; + + while (compiler->parser->curr.type != TK_EOF) { + if (compiler->parser->prev.type == TK_SCOL) + return; + + switch (compiler->parser->curr.type) { + case TK_IF: + case TK_FOR: + case TK_WHILE: + case TK_SWITCH: + case TK_FUNC: + case TK_TRY: + case TK_RETURN: + case TK_BREAK: + case TK_CONTINUE: + case TK_LOCAL: + return; + + default: + break; + } + + uc_compiler_parse_advance(compiler); + } +} + +static uc_parse_rule * +uc_compiler_parse_rule(uc_tokentype_t type) +{ + return &uc_compiler_parse_rules[type]; +} + +static bool +uc_compiler_parse_at_assignment_op(uc_compiler *compiler) +{ + switch (compiler->parser->curr.type) { + case TK_ASBAND: + case TK_ASBXOR: + case TK_ASBOR: + case TK_ASLEFT: + case TK_ASRIGHT: + case TK_ASMUL: + case TK_ASDIV: + case TK_ASMOD: + case TK_ASADD: + case TK_ASSUB: + case TK_ASSIGN: + return true; + + default: + return false; + } +} + +static void +uc_compiler_parse_precedence(uc_compiler *compiler, uc_precedence_t precedence) +{ + uc_parse_rule *rule; + bool assignable; + + uc_compiler_parse_advance(compiler); + + rule = uc_compiler_parse_rule(compiler->parser->prev.type); + + if (!rule->prefix) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, "Expecting expression"); + + return; + } + + assignable = (precedence <= P_ASSIGN); + rule->prefix(compiler, assignable); + + while (precedence <= uc_compiler_parse_rule(compiler->parser->curr.type)->precedence) { + uc_compiler_parse_advance(compiler); + uc_compiler_parse_rule(compiler->parser->prev.type)->infix(compiler, assignable); + } + + if (assignable && uc_compiler_parse_at_assignment_op(compiler)) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, "Invalid left-hand side expression for assignment"); +} + +static size_t +uc_compiler_reladdr(uc_compiler *compiler, size_t from, size_t to) +{ + ssize_t delta = to - from; + + if (delta < -0x7fffffff || delta > 0x7fffffff) { + uc_compiler_syntax_error(compiler, 0, "Jump address too far"); + + return 0; + } + + return (size_t)(delta + 0x7fffffff); +} + +static size_t +uc_compiler_emit_insn(uc_compiler *compiler, size_t srcpos, enum insn_type insn) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t lineoff = uc_compiler_set_srcpos(compiler, srcpos); + + compiler->last_insn = uc_chunk_add(chunk, (uint8_t)insn, lineoff); + + return compiler->last_insn; +} + +static size_t +uc_compiler_emit_u8(uc_compiler *compiler, size_t srcpos, uint8_t n) +{ + return uc_chunk_add( + uc_compiler_current_chunk(compiler), + n, + uc_compiler_set_srcpos(compiler, srcpos)); +} + +static size_t +uc_compiler_emit_s8(uc_compiler *compiler, size_t srcpos, int8_t n) +{ + return uc_chunk_add( + uc_compiler_current_chunk(compiler), + n + 0x7f, + uc_compiler_set_srcpos(compiler, srcpos)); +} + +static size_t +uc_compiler_emit_u16(uc_compiler *compiler, size_t srcpos, uint16_t n) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t lineoff = uc_compiler_set_srcpos(compiler, srcpos); + + uc_chunk_add(chunk, n / 0x100, lineoff); + uc_chunk_add(chunk, n % 0x100, 0); + + return chunk->count - 2; +} + +static size_t +uc_compiler_emit_s16(uc_compiler *compiler, size_t srcpos, int16_t n) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t lineoff = uc_compiler_set_srcpos(compiler, srcpos); + uint16_t v = n + 0x7fff; + + uc_chunk_add(chunk, v / 0x100, lineoff); + uc_chunk_add(chunk, v % 0x100, 0); + + return chunk->count - 2; +} + +static size_t +uc_compiler_emit_u32(uc_compiler *compiler, size_t srcpos, uint32_t n) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t lineoff = uc_compiler_set_srcpos(compiler, srcpos); + + uc_chunk_add(chunk, n / 0x1000000, lineoff); + uc_chunk_add(chunk, (n / 0x10000) % 0x100, 0); + uc_chunk_add(chunk, (n / 0x100) % 0x100, 0); + uc_chunk_add(chunk, n % 0x100, 0); + + return chunk->count - 4; +} + +static size_t +uc_compiler_emit_s32(uc_compiler *compiler, size_t srcpos, int32_t n) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t lineoff = uc_compiler_set_srcpos(compiler, srcpos); + uint32_t v = n + 0x7fffffff; + + uc_chunk_add(chunk, v / 0x1000000, lineoff); + uc_chunk_add(chunk, (v / 0x10000) % 0x100, 0); + uc_chunk_add(chunk, (v / 0x100) % 0x100, 0); + uc_chunk_add(chunk, v % 0x100, 0); + + return chunk->count - 4; +} + +static uint32_t +uc_compiler_get_u32(uc_compiler *compiler, size_t off) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + + return chunk->entries[off + 0] * 0x1000000 + + chunk->entries[off + 1] * 0x10000 + + chunk->entries[off + 2] * 0x100 + + chunk->entries[off + 3]; +} + +static void +uc_compiler_set_u32(uc_compiler *compiler, size_t off, uint32_t n) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + + chunk->entries[off + 0] = n / 0x1000000; + chunk->entries[off + 1] = (n / 0x10000) % 0x100; + chunk->entries[off + 2] = (n / 0x100) % 0x100; + chunk->entries[off + 3] = n % 0x100; +} + +static size_t +uc_compiler_emit_constant(uc_compiler *compiler, size_t srcpos, json_object *val) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t cidx = uc_chunk_add_constant(chunk, val); + + uc_compiler_emit_insn(compiler, srcpos, I_LOAD); + uc_compiler_emit_u32(compiler, 0, cidx); + + return cidx; +} + +static size_t +uc_compiler_emit_regexp(uc_compiler *compiler, size_t srcpos, json_object *val) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t cidx = uc_chunk_add_constant(chunk, val); + + uc_compiler_emit_insn(compiler, srcpos, I_LREXP); + uc_compiler_emit_u32(compiler, 0, cidx); + + return cidx; +} + +static size_t +uc_compiler_emit_jmp(uc_compiler *compiler, size_t srcpos, uint32_t dest) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + + uc_compiler_emit_insn(compiler, srcpos, I_JMP); + uc_compiler_emit_u32(compiler, 0, dest ? uc_compiler_reladdr(compiler, chunk->count - 1, dest) : 0); + + return chunk->count - 5; +} + +static size_t +uc_compiler_emit_jmpz(uc_compiler *compiler, size_t srcpos, uint32_t dest) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + + uc_compiler_emit_insn(compiler, srcpos, I_JMPZ); + uc_compiler_emit_u32(compiler, 0, dest ? uc_compiler_reladdr(compiler, chunk->count - 1, dest) : 0); + + return chunk->count - 5; +} + +static ssize_t +uc_compiler_get_jmpaddr(uc_compiler *compiler, size_t off) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + + assert(chunk->entries[off] == I_JMP || chunk->entries[off] == I_JMPZ); + assert(off + 4 < chunk->count); + + return ( + chunk->entries[off + 1] * 0x1000000 + + chunk->entries[off + 2] * 0x10000 + + chunk->entries[off + 3] * 0x100 + + chunk->entries[off + 4] + ) - 0x7fffffff; +} + +static void +uc_compiler_set_jmpaddr(uc_compiler *compiler, size_t off, uint32_t dest) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t addr = uc_compiler_reladdr(compiler, off, dest); + + assert(chunk->entries[off] == I_JMP || chunk->entries[off] == I_JMPZ); + assert(off + 4 < chunk->count); + + chunk->entries[off + 1] = addr / 0x1000000; + chunk->entries[off + 2] = (addr / 0x10000) % 0x100; + chunk->entries[off + 3] = (addr / 0x100) % 0x100; + chunk->entries[off + 4] = addr % 0x100; +} + +static uc_function * +uc_compiler_finish(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + uc_locals *locals = &compiler->locals; + uc_upvals *upvals = &compiler->upvals; + size_t i; + + uc_compiler_emit_insn(compiler, 0, I_LNULL); + uc_compiler_emit_insn(compiler, 0, I_RETURN); + + for (i = 0; i < locals->count; i++) { + uc_chunk_debug_add_variable(chunk, + locals->entries[i].from, + chunk->count, + i, + false, + locals->entries[i].name); + + uc_value_put(locals->entries[i].name); + } + + for (i = 0; i < upvals->count; i++) { + uc_chunk_debug_add_variable(chunk, + 0, + chunk->count, + i, + true, + upvals->entries[i].name); + + uc_value_put(upvals->entries[i].name); + } + + uc_vector_clear(locals); + uc_vector_clear(upvals); + + if (compiler->parser->error) { + uc_value_put(compiler->function->header.jso); + + return NULL; + } + + return compiler->function; +} + +static void +uc_compiler_enter_scope(uc_compiler *compiler) +{ + compiler->scope_depth++; +} + +static void +uc_compiler_leave_scope(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + uc_locals *locals = &compiler->locals; + + compiler->scope_depth--; + + while (locals->count > 0 && locals->entries[locals->count - 1].depth > compiler->scope_depth) { + locals->count--; + + uc_chunk_debug_add_variable(chunk, + locals->entries[locals->count].from, + chunk->count, + locals->count, + false, + locals->entries[locals->count].name); + + uc_value_put(locals->entries[locals->count].name); + locals->entries[locals->count].name = NULL; + + uc_compiler_emit_insn(compiler, 0, + locals->entries[locals->count].captured ? I_CUPV : I_POP); + } +} + +static ssize_t +uc_compiler_declare_local(uc_compiler *compiler, json_object *name) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + uc_locals *locals = &compiler->locals; + const char *str1, *str2; + size_t i, len1, len2; + + //if (compiler->scope_depth == 0) + // return; + + if (locals->count >= 0x00FFFFFF) { + uc_compiler_syntax_error(compiler, 0, "Too many local variables"); + + return -1; + } + + str1 = json_object_get_string(name); + len1 = json_object_get_string_len(name); + + for (i = locals->count; i > 0; i--) { + if (locals->entries[i - 1].depth != -1 && locals->entries[i - 1].depth < compiler->scope_depth) + break; + + str2 = json_object_get_string(locals->entries[i - 1].name); + len2 = json_object_get_string_len(locals->entries[i - 1].name); + + if (len1 == len2 && !strcmp(str1, str2)) { + if (compiler->parser->config && + compiler->parser->config->strict_declarations) { + uc_compiler_syntax_error(compiler, 0, "Variable '%s' redeclared", str2); + + return -1; + } + + return i - 1; + } + } + + uc_vector_grow(locals); + + locals->entries[locals->count].name = uc_value_get(name); + locals->entries[locals->count].depth = -1; + locals->entries[locals->count].captured = false; + locals->entries[locals->count].from = chunk->count; + locals->count++; + + return -1; +} + +static ssize_t +uc_compiler_initialize_local(uc_compiler *compiler) +{ + uc_locals *locals = &compiler->locals; + + locals->entries[locals->count - 1].depth = compiler->scope_depth; + + return locals->count - 1; +} + +static ssize_t +uc_compiler_resolve_local(uc_compiler *compiler, json_object *name) +{ + uc_locals *locals = &compiler->locals; + const char *str1, *str2; + size_t i, len1, len2; + + str1 = json_object_get_string(name); + len1 = json_object_get_string_len(name); + + for (i = locals->count; i > 0; i--) { + str2 = json_object_get_string(locals->entries[i - 1].name); + len2 = json_object_get_string_len(locals->entries[i - 1].name); + + if (len1 != len2 || strcmp(str1, str2)) + continue; + + if (locals->entries[i - 1].depth == -1) { + uc_compiler_syntax_error(compiler, 0, + "Can't access lexical declaration '%s' before initialization", str2); + + return -1; + } + + return i - 1; + } + + return -1; +} + +static ssize_t +uc_compiler_add_upval(uc_compiler *compiler, ssize_t idx, bool local, json_object *name) +{ + uc_function *function = compiler->function; + uc_upvals *upvals = &compiler->upvals; + uc_upval *uv; + size_t i; + + for (i = 0, uv = upvals->entries; i < upvals->count; i++, uv = upvals->entries + i) + if (uv->index == idx && uv->local == local) + return i; + + /* XXX: encoding... */ + if (upvals->count >= (2 << 14)) { + uc_compiler_syntax_error(compiler, 0, "Too many upvalues"); + + return -1; + } + + uc_vector_grow(upvals); + + upvals->entries[upvals->count].local = local; + upvals->entries[upvals->count].index = idx; + upvals->entries[upvals->count].name = uc_value_get(name); + + function->nupvals++; + + return upvals->count++; +} + +static ssize_t +uc_compiler_resolve_upval(uc_compiler *compiler, json_object *name) +{ + ssize_t idx; + + if (!compiler->parent) + return -1; + + idx = uc_compiler_resolve_local(compiler->parent, name); + + if (idx > -1) { + compiler->parent->locals.entries[idx].captured = true; + + return uc_compiler_add_upval(compiler, idx, true, name); + } + + idx = uc_compiler_resolve_upval(compiler->parent, name); + + if (idx > -1) + return uc_compiler_add_upval(compiler, idx, false, name); + + return -1; +} + +static void +uc_compiler_backpatch(uc_compiler *compiler, size_t break_addr, size_t next_addr) +{ + uc_patchlist *pl = compiler->patchlist; + uc_patchlist *pp = pl->parent; + volatile ssize_t jmpaddr; + size_t i; + + for (i = 0; i < pl->count; i++) { + jmpaddr = uc_compiler_get_jmpaddr(compiler, pl->entries[i]); + + switch (jmpaddr) { + case TK_BREAK: + /* if we have a break addr, patch instruction */ + if (break_addr) { + uc_compiler_set_jmpaddr(compiler, pl->entries[i], break_addr); + continue; + } + + break; + + case TK_CONTINUE: + /* if we have a continue addr, patch instruction */ + if (next_addr) { + uc_compiler_set_jmpaddr(compiler, pl->entries[i], next_addr); + continue; + } + + break; + } + + /* propagate unhandled patch instructions to parent patch list */ + if (pp) { + uc_vector_grow(pp); + pp->entries[pp->count++] = pl->entries[i]; + } + } + + free(pl->entries); + + compiler->patchlist = pl->parent; +} + +static void +uc_compiler_emit_inc_dec(uc_compiler *compiler, uc_tokentype_t toktype, bool is_postfix) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + enum insn_type type; + uint32_t cidx = 0; + + /* determine kind of emitted load instruction and operand value (if any) */ + type = chunk->entries[compiler->last_insn]; + + if (type == I_LVAR || type == I_LLOC || type == I_LUPV) { + cidx = uc_compiler_get_u32(compiler, compiler->last_insn + 1); + + uc_chunk_pop(chunk); + uc_chunk_pop(chunk); + uc_chunk_pop(chunk); + uc_chunk_pop(chunk); + uc_chunk_pop(chunk); + } + + /* if we're mutating an object or array field, pop the last lval instruction + * to leave object + last field name value on stack */ + else if (type == I_LVAL) { + uc_chunk_pop(chunk); + } + else { + uc_compiler_syntax_error(compiler, 0, "Invalid increment/decrement operand"); + + return; + } + + /* add / substract 1 */ + uc_compiler_emit_insn(compiler, 0, I_LOAD8); + uc_compiler_emit_s8(compiler, 0, (toktype == TK_INC) ? 1 : -1); + + /* depending on variable type, emit corresponding increment instruction */ + switch (type) { + case I_LVAR: + uc_compiler_emit_insn(compiler, 0, I_UVAR); + uc_compiler_emit_u32(compiler, 0, (I_PLUS << 24) | cidx); + break; + + case I_LLOC: + uc_compiler_emit_insn(compiler, 0, I_ULOC); + uc_compiler_emit_u32(compiler, 0, (I_PLUS << 24) | cidx); + break; + + case I_LUPV: + uc_compiler_emit_insn(compiler, 0, I_UUPV); + uc_compiler_emit_u32(compiler, 0, (I_PLUS << 24) | cidx); + break; + + case I_LVAL: + uc_compiler_emit_insn(compiler, 0, I_UVAL); + uc_compiler_emit_u8(compiler, 0, I_PLUS); + break; + + default: + break; + } + + /* for post increment or decrement, add/substract 1 to yield final value */ + if (is_postfix) { + uc_compiler_emit_insn(compiler, 0, I_LOAD8); + uc_compiler_emit_s8(compiler, 0, 1); + + uc_compiler_emit_insn(compiler, 0, (toktype == TK_INC) ? I_SUB : I_ADD); + } +} + + +static void +uc_compiler_compile_unary(uc_compiler *compiler, bool assignable) +{ + uc_tokentype_t type = compiler->parser->prev.type; + + uc_compiler_parse_precedence(compiler, P_UNARY); + + switch (type) { + case TK_SUB: + uc_compiler_emit_insn(compiler, 0, I_MINUS); + break; + + case TK_ADD: + uc_compiler_emit_insn(compiler, 0, I_PLUS); + break; + + case TK_NOT: + uc_compiler_emit_insn(compiler, 0, I_NOT); + break; + + case TK_COMPL: + uc_compiler_emit_insn(compiler, 0, I_COMPL); + break; + + case TK_INC: + case TK_DEC: + uc_compiler_emit_inc_dec(compiler, type, false); + break; + + default: + return; + } +} + +static void +uc_compiler_compile_binary(uc_compiler *compiler, bool assignable) +{ + uc_tokentype_t type = compiler->parser->prev.type; + + uc_compiler_parse_precedence(compiler, uc_compiler_parse_rule(type)->precedence + 1); + + switch (type) { + case TK_ADD: uc_compiler_emit_insn(compiler, 0, I_ADD); break; + case TK_SUB: uc_compiler_emit_insn(compiler, 0, I_SUB); break; + case TK_MUL: uc_compiler_emit_insn(compiler, 0, I_MUL); break; + case TK_DIV: uc_compiler_emit_insn(compiler, 0, I_DIV); break; + case TK_MOD: uc_compiler_emit_insn(compiler, 0, I_MOD); break; + case TK_LSHIFT: uc_compiler_emit_insn(compiler, 0, I_LSHIFT); break; + case TK_RSHIFT: uc_compiler_emit_insn(compiler, 0, I_RSHIFT); break; + case TK_BAND: uc_compiler_emit_insn(compiler, 0, I_BAND); break; + case TK_BXOR: uc_compiler_emit_insn(compiler, 0, I_BXOR); break; + case TK_BOR: uc_compiler_emit_insn(compiler, 0, I_BOR); break; + case TK_LT: uc_compiler_emit_insn(compiler, 0, I_LT); break; + case TK_LE: + uc_compiler_emit_insn(compiler, 0, I_GT); + uc_compiler_emit_insn(compiler, 0, I_NOT); + break; + case TK_GT: uc_compiler_emit_insn(compiler, 0, I_GT); break; + case TK_GE: + uc_compiler_emit_insn(compiler, 0, I_LT); + uc_compiler_emit_insn(compiler, 0, I_NOT); + break; + case TK_EQ: uc_compiler_emit_insn(compiler, 0, I_EQ); break; + case TK_NE: uc_compiler_emit_insn(compiler, 0, I_NE); break; + case TK_EQS: uc_compiler_emit_insn(compiler, 0, I_EQS); break; + case TK_NES: uc_compiler_emit_insn(compiler, 0, I_NES); break; + case TK_IN: uc_compiler_emit_insn(compiler, 0, I_IN); break; + default: + return; + } +} + +static enum insn_type +uc_compiler_emit_variable_rw(uc_compiler *compiler, json_object *varname, uc_tokentype_t type) +{ + enum insn_type insn; + uint32_t sub_insn; + ssize_t idx; + + switch (type) { + case TK_ASADD: sub_insn = I_ADD; break; + case TK_ASSUB: sub_insn = I_SUB; break; + case TK_ASMUL: sub_insn = I_MUL; break; + case TK_ASDIV: sub_insn = I_DIV; break; + case TK_ASMOD: sub_insn = I_MOD; break; + case TK_ASBAND: sub_insn = I_BAND; break; + case TK_ASBXOR: sub_insn = I_BXOR; break; + case TK_ASBOR: sub_insn = I_BOR; break; + case TK_ASLEFT: sub_insn = I_LSHIFT; break; + case TK_ASRIGHT: sub_insn = I_RSHIFT; break; + default: sub_insn = 0; break; + } + + if (!varname) { + insn = sub_insn ? I_UVAL : (type ? I_SVAL : I_LVAL); + + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, insn); + + if (sub_insn) + uc_compiler_emit_u8(compiler, compiler->parser->prev.pos, sub_insn); + } + else if ((idx = uc_compiler_resolve_local(compiler, varname)) > -1) { + insn = sub_insn ? I_ULOC : (type ? I_SLOC : I_LLOC); + + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, insn); + uc_compiler_emit_u32(compiler, compiler->parser->prev.pos, + ((sub_insn & 0xff) << 24) | idx); + } + else if ((idx = uc_compiler_resolve_upval(compiler, varname)) > -1) { + insn = sub_insn ? I_UUPV : (type ? I_SUPV : I_LUPV); + + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, insn); + uc_compiler_emit_u32(compiler, compiler->parser->prev.pos, + ((sub_insn & 0xff) << 24) | idx); + } + else { + idx = uc_chunk_add_constant(uc_compiler_current_chunk(compiler), varname); + insn = sub_insn ? I_UVAR : (type ? I_SVAR : I_LVAR); + + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, insn); + uc_compiler_emit_u32(compiler, compiler->parser->prev.pos, + ((sub_insn & 0xff) << 24) | idx); + } + + return insn; +} + +static void +uc_compiler_compile_expression(uc_compiler *compiler) +{ + uc_compiler_parse_precedence(compiler, P_COMMA); +} + +static bool +uc_compiler_compile_assignment(uc_compiler *compiler, json_object *var) +{ + uc_tokentype_t type = compiler->parser->curr.type; + + if (uc_compiler_parse_at_assignment_op(compiler)) { + uc_compiler_parse_advance(compiler); + uc_compiler_parse_precedence(compiler, P_ASSIGN); + uc_compiler_emit_variable_rw(compiler, var, type); + + return true; + } + + return false; +} + +static bool +uc_compiler_compile_arrowfn(uc_compiler *compiler, json_object *args, bool restarg) +{ + bool array = json_object_is_type(args, json_type_array); + uc_compiler fncompiler = {}; + size_t i, pos, load_off; + uc_function *fn; + ssize_t slot; + + if (!uc_compiler_parse_match(compiler, TK_ARROW)) + return false; + + pos = compiler->parser->prev.pos; + + uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos, + compiler->function->source); + + fncompiler.parent = compiler; + fncompiler.parser = compiler->parser; + + fncompiler.function->arrow = true; + fncompiler.function->vararg = args ? restarg : false; + fncompiler.function->nargs = array ? json_object_array_length(args) : !!args; + + uc_compiler_enter_scope(&fncompiler); + + /* declare local variables for arguments */ + for (i = 0; i < fncompiler.function->nargs; i++) { + slot = uc_compiler_declare_local(&fncompiler, + array ? json_object_array_get_idx(args, i) : args); + + if (slot != -1) + uc_compiler_syntax_error(&fncompiler, pos, + "Duplicate argument names are not allowed in this context"); + + uc_compiler_initialize_local(&fncompiler); + } + + /* parse and compile body */ + if (uc_compiler_parse_match(&fncompiler, TK_LBRACE)) { + while (!uc_compiler_parse_check(&fncompiler, TK_RBRACE) && + !uc_compiler_parse_check(&fncompiler, TK_EOF)) + uc_compiler_compile_declaration(&fncompiler); + + uc_compiler_parse_consume(&fncompiler, TK_RBRACE); + + /* overwrite last pop result with return */ + if (fncompiler.function->chunk.count) { + uc_chunk_pop(&fncompiler.function->chunk); + uc_compiler_emit_insn(&fncompiler, 0, I_RETURN); + } + } + else { + uc_compiler_compile_expression(&fncompiler); + uc_compiler_emit_insn(&fncompiler, 0, I_RETURN); + } + + /* emit load instruction for function value */ + uc_compiler_emit_insn(compiler, pos, I_ARFN); + load_off = uc_compiler_emit_u32(compiler, 0, 0); + + /* encode upvalue information */ + for (i = 0; i < fncompiler.function->nupvals; i++) + uc_compiler_emit_s32(compiler, 0, + fncompiler.upvals.entries[i].local + ? -(fncompiler.upvals.entries[i].index + 1) + : fncompiler.upvals.entries[i].index); + + /* finalize function compiler */ + fn = uc_compiler_finish(&fncompiler); + + if (fn) + uc_compiler_set_u32(compiler, load_off, + uc_chunk_add_constant(uc_compiler_current_chunk(compiler), + fn->header.jso)); + + return true; +} + +static uc_tokentype_t +uc_compiler_compile_var_or_arrowfn(uc_compiler *compiler, bool assignable, json_object *name) +{ + uc_tokentype_t rv; + + if (assignable && uc_compiler_compile_assignment(compiler, name)) { + rv = TK_ASSIGN; + } + else if (uc_compiler_compile_arrowfn(compiler, name, false)) { + rv = TK_ARROW; + } + else { + uc_compiler_emit_variable_rw(compiler, name, 0); + rv = TK_LABEL; + } + + return rv; +} + +static void +uc_compiler_compile_paren(uc_compiler *compiler, bool assignable) +{ + json_object *varnames = NULL, *varname; + bool maybe_arrowfn = false; + bool restarg = false; + + /* First try to parse a complete parameter expression and remember the + * consumed label tokens as we go. */ + while (true) { + if (uc_compiler_parse_match(compiler, TK_LABEL)) { + if (!varnames) + varnames = xjs_new_array(); + + json_object_array_add(varnames, uc_value_get(compiler->parser->prev.val)); + } + else if (uc_compiler_parse_match(compiler, TK_ELLIP)) { + uc_compiler_parse_consume(compiler, TK_LABEL); + + if (!varnames) + varnames = xjs_new_array(); + + json_object_array_add(varnames, uc_value_get(compiler->parser->prev.val)); + + uc_compiler_parse_consume(compiler, TK_RPAREN); + + maybe_arrowfn = true; + restarg = true; + + break; + } + else if (uc_compiler_parse_check(compiler, TK_COMMA)) { + /* Reject consecutive commas */ + if (compiler->parser->prev.type == TK_COMMA) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting expression"); + + uc_compiler_parse_advance(compiler); + + continue; + } + else { + maybe_arrowfn = uc_compiler_parse_match(compiler, TK_RPAREN); + break; + } + } + + /* The lhs we parsed so far is elligible for an arrow function arg list, + * try to continue compiling into arrow function... */ + if (maybe_arrowfn) { + /* If we can parse the remainder as arrow function, we're done */ + if (uc_compiler_compile_arrowfn(compiler, varnames, restarg)) + goto out; + + /* ... otherwise disallow the `...` spread operator and empty + * parenthesized expressions */ + if (restarg || !varnames) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Expecting '=>' after parameter list"); + + goto out; + } + } + + /* If we reach this, the expression we parsed so far cannot be a parameter + * list for an arrow function and we might have consumed one or multiple + * consecutive labels. */ + if (varnames) { + /* Get last variable name */ + varname = json_object_array_get_idx(varnames, + json_object_array_length(varnames) - 1); + + /* If we consumed the right paren, the expression is complete and we + * only need to emit a variable read operation for the last parsed + * label since previous read operations are shadowed by subsequent ones + * in comma expressions and since pure variable reads are without + * side effects. */ + if (maybe_arrowfn) { + uc_compiler_emit_variable_rw(compiler, varname, 0); + + goto out; + } + + /* ... otherwise if the last token was a label, try continue parsing as + * assignment or arrow function expression and if that fails, as + * relational one */ + if (compiler->parser->prev.type == TK_LABEL) { + if (uc_compiler_compile_var_or_arrowfn(compiler, true, varname) == TK_LABEL) { + /* parse operand and rhs */ + while (P_TERNARY <= uc_compiler_parse_rule(compiler->parser->curr.type)->precedence) { + uc_compiler_parse_advance(compiler); + uc_compiler_parse_rule(compiler->parser->prev.type)->infix(compiler, true); + } + } + + /* If we're not at the end of the expression, we require a comma. + * Also pop intermediate result in this case. */ + if (!uc_compiler_parse_check(compiler, TK_RPAREN)) { + uc_compiler_emit_insn(compiler, 0, I_POP); + uc_compiler_parse_consume(compiler, TK_COMMA); + } + } + } + + /* When we reach this point, all already complete expression possibilities + * have been eliminated and we either need to compile the next, non-label + * expression or reached the closing paren. If neither applies, we have a + * syntax error. */ + if (!uc_compiler_parse_check(compiler, TK_RPAREN)) + uc_compiler_compile_expression(compiler); + + /* At this point we expect the end of the parenthesized expression, anything + * else is a syntax error */ + uc_compiler_parse_consume(compiler, TK_RPAREN); + +out: + uc_value_put(varnames); +} + +static void +uc_compiler_compile_call(uc_compiler *compiler, bool assignable) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + uc_jmplist spreads = {}; + enum insn_type type; + size_t i, nargs = 0; + + /* determine the kind of the lhs */ + type = chunk->entries[compiler->last_insn]; + + /* if lhs is a dot or bracket expression, pop the LVAL instruction */ + if (type == I_LVAL) + uc_chunk_pop(chunk); + + /* compile arguments */ + if (!uc_compiler_parse_check(compiler, TK_RPAREN)) { + do { + /* if this is a spread arg, remember the argument index */ + if (uc_compiler_parse_match(compiler, TK_ELLIP)) { + uc_vector_grow(&spreads); + spreads.entries[spreads.count++] = nargs; + } + + /* compile argument expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + nargs++; + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); + } + + uc_compiler_parse_consume(compiler, TK_RPAREN); + + /* if lhs is a dot or bracket expression, emit a method call */ + if (type == I_LVAL) + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_MCALL); + /* else ordinary call */ + else + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_CALL); + + if (nargs > 0xffff || spreads.count > 0xffff) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Too many function call arguments"); + + /* encode ordinary (low 16 bit) and spread argument (high 16 bit) count */ + uc_compiler_emit_u32(compiler, 0, ((spreads.count & 0xffff) << 16) | nargs); + + /* encode spread arg positions */ + for (i = 0; i < spreads.count; i++) + uc_compiler_emit_u16(compiler, 0, nargs - spreads.entries[i] - 1); + + uc_vector_clear(&spreads); +} + +static void +uc_compiler_compile_post_inc(uc_compiler *compiler, bool assignable) +{ + uc_compiler_emit_inc_dec(compiler, compiler->parser->prev.type, true); +} + +static void +uc_compiler_compile_constant(uc_compiler *compiler, bool assignable) +{ + int64_t n; + + switch (compiler->parser->prev.type) { + case TK_THIS: + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LTHIS); + break; + + case TK_NULL: + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LNULL); + break; + + case TK_BOOL: + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, + json_object_get_boolean(compiler->parser->prev.val) ? I_LTRUE : I_LFALSE); + break; + + case TK_DOUBLE: + case TK_STRING: + uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.val); + break; + + case TK_REGEXP: + uc_compiler_emit_regexp(compiler, compiler->parser->prev.pos, compiler->parser->prev.val); + break; + + case TK_NUMBER: + n = json_object_get_int64(compiler->parser->prev.val); + + if (n >= -0x7f && n <= 0x7f) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LOAD8); + uc_compiler_emit_s8(compiler, compiler->parser->prev.pos, n); + } + else if (n >= -0x7fff && n <= 0x7fff) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LOAD16); + uc_compiler_emit_s16(compiler, compiler->parser->prev.pos, n); + } + else if (n >= -0x7fffffff && n <= 0x7fffffff) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LOAD32); + uc_compiler_emit_s32(compiler, compiler->parser->prev.pos, n); + } + else { + uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.val); + } + + break; + + default: + break; + } +} + +static void +uc_compiler_compile_comma(uc_compiler *compiler, bool assignable) +{ + uc_compiler_emit_insn(compiler, 0, I_POP); + uc_compiler_parse_precedence(compiler, P_ASSIGN); +} + +static void +uc_compiler_compile_labelexpr(uc_compiler *compiler, bool assignable) +{ + json_object *label = uc_value_get(compiler->parser->prev.val); + + uc_compiler_compile_var_or_arrowfn(compiler, assignable, label); + uc_value_put(label); +} + +static bool +uc_compiler_compile_delimitted_block(uc_compiler *compiler, uc_tokentype_t endtype) +{ + while (!uc_compiler_parse_check(compiler, endtype) && + !uc_compiler_parse_check(compiler, TK_EOF)) + uc_compiler_compile_declaration(compiler); + + return uc_compiler_parse_check(compiler, endtype); +} + +static void +uc_compiler_compile_function(uc_compiler *compiler, bool assignable) +{ + uc_compiler fncompiler = {}; + json_object *name = NULL; + ssize_t slot = -1, pos; + uc_tokentype_t type; + size_t i, load_off; + uc_function *fn; + + pos = compiler->parser->prev.pos; + type = compiler->parser->prev.type; + + if (uc_compiler_parse_match(compiler, TK_LABEL)) { + name = compiler->parser->prev.val; + + /* Named functions are syntactic sugar for local variable declaration + * with function value assignment. If a name token was encountered, + * initialize a local variable for it... */ + slot = uc_compiler_declare_local(compiler, name); + + if (slot == -1) + uc_compiler_initialize_local(compiler); + } + + uc_compiler_init(&fncompiler, + name ? json_object_get_string(name) : NULL, compiler->parser->prev.pos, + compiler->function->source); + + fncompiler.parent = compiler; + fncompiler.parser = compiler->parser; + + uc_compiler_parse_consume(&fncompiler, TK_LPAREN); + + uc_compiler_enter_scope(&fncompiler); + + /* compile argument specification */ + while (true) { + if (uc_compiler_parse_check(&fncompiler, TK_RPAREN)) + break; + + if (uc_compiler_parse_match(&fncompiler, TK_ELLIP)) + fncompiler.function->vararg = true; + + if (uc_compiler_parse_match(&fncompiler, TK_LABEL)) { + fncompiler.function->nargs++; + + uc_compiler_declare_local(&fncompiler, fncompiler.parser->prev.val); + uc_compiler_initialize_local(&fncompiler); + + if (fncompiler.function->vararg || + !uc_compiler_parse_match(&fncompiler, TK_COMMA)) + break; + } + else { + uc_compiler_syntax_error(&fncompiler, fncompiler.parser->curr.pos, + "Expecting Label"); + + return; + } + } + + uc_compiler_parse_consume(&fncompiler, TK_RPAREN); + + /* parse and compile function body */ + if (uc_compiler_parse_match(&fncompiler, TK_COLON)) { + uc_compiler_compile_delimitted_block(&fncompiler, TK_ENDFUNC); + uc_compiler_parse_consume(&fncompiler, TK_ENDFUNC); + } + else if (uc_compiler_parse_match(&fncompiler, TK_LBRACE)) { + uc_compiler_compile_delimitted_block(&fncompiler, TK_RBRACE); + uc_compiler_parse_consume(&fncompiler, TK_RBRACE); + } + else { + uc_compiler_syntax_error(&fncompiler, fncompiler.parser->curr.pos, + "Expecting '{' or ':' after function parameters"); + } + + /* emit load instruction for function value */ + uc_compiler_emit_insn(compiler, pos, (type == TK_ARROW) ? I_ARFN : I_CLFN); + load_off = uc_compiler_emit_u32(compiler, 0, 0); + + /* encode upvalue information */ + for (i = 0; i < fncompiler.function->nupvals; i++) + uc_compiler_emit_s32(compiler, 0, + fncompiler.upvals.entries[i].local + ? -(fncompiler.upvals.entries[i].index + 1) + : fncompiler.upvals.entries[i].index); + + /* finalize function compiler */ + fn = uc_compiler_finish(&fncompiler); + + if (fn) + uc_compiler_set_u32(compiler, load_off, + uc_chunk_add_constant(uc_compiler_current_chunk(compiler), + fn->header.jso)); + + /* if a local variable of the same name already existed, overwrite its value + * with the compiled function here */ + if (slot != -1) { + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, slot); + uc_compiler_emit_insn(compiler, 0, I_POP); + } +} + +static void +uc_compiler_compile_and(uc_compiler *compiler, bool assignable) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t jmpz_off; + + uc_compiler_emit_insn(compiler, 0, I_COPY); + uc_compiler_emit_u8(compiler, 0, 0); + jmpz_off = uc_compiler_emit_jmpz(compiler, 0, 0); + uc_compiler_emit_insn(compiler, 0, I_POP); + uc_compiler_parse_precedence(compiler, P_AND); + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); +} + +static void +uc_compiler_compile_or(uc_compiler *compiler, bool assignable) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t jmpz_off, jmp_off; + + uc_compiler_emit_insn(compiler, 0, I_COPY); + uc_compiler_emit_u8(compiler, 0, 0); + jmpz_off = uc_compiler_emit_jmpz(compiler, 0, 0); + jmp_off = uc_compiler_emit_jmp(compiler, 0, 0); + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + uc_compiler_emit_insn(compiler, 0, I_POP); + uc_compiler_parse_precedence(compiler, P_OR); + uc_compiler_set_jmpaddr(compiler, jmp_off, chunk->count); +} + +static void +uc_compiler_compile_dot(uc_compiler *compiler, bool assignable) +{ + /* parse label lhs */ + uc_compiler_parse_consume(compiler, TK_LABEL); + uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.val); + + /* depending on context, compile into I_UVAL, I_SVAL or I_LVAL operation */ + if (!assignable || !uc_compiler_compile_assignment(compiler, NULL)) + uc_compiler_emit_variable_rw(compiler, NULL, 0); +} + +static void +uc_compiler_compile_subscript(uc_compiler *compiler, bool assignable) +{ + /* compile lhs */ + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_RBRACK); + + /* depending on context, compile into I_UVAL, I_SVAL or I_LVAL operation */ + if (!assignable || !uc_compiler_compile_assignment(compiler, NULL)) + uc_compiler_emit_variable_rw(compiler, NULL, 0); +} + +static void +uc_compiler_compile_ternary(uc_compiler *compiler, bool assignable) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t jmpz_off, jmp_off; + + /* jump to false branch */ + jmpz_off = uc_compiler_emit_jmpz(compiler, 0, 0); + + /* compile true branch */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + + /* jump after false branch */ + jmp_off = uc_compiler_emit_jmp(compiler, 0, 0); + + uc_compiler_parse_consume(compiler, TK_COLON); + + /* compile false branch */ + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + uc_compiler_parse_precedence(compiler, P_TERNARY); + uc_compiler_set_jmpaddr(compiler, jmp_off, chunk->count); +} + +static void +uc_compiler_compile_array(uc_compiler *compiler, bool assignable) +{ + size_t hint_off, hint_count = 0, len = 0; + + /* create empty array on stack */ + uc_compiler_emit_insn(compiler, 0, I_NARR); + hint_off = uc_compiler_emit_u32(compiler, 0, 0); + + /* parse initializer values */ + do { + if (uc_compiler_parse_check(compiler, TK_RBRACK)) { + break; + } + else if (uc_compiler_parse_match(compiler, TK_ELLIP)) { + /* push items on stack so far... */ + if (len > 0) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_PARR); + uc_compiler_emit_u32(compiler, 0, len); + len = 0; + } + + /* compile spread value expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + + /* emit merge operation */ + uc_compiler_emit_insn(compiler, 0, I_MARR); + } + else { + /* push items on stack so far... */ + if (len >= 0xffffffff) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_PARR); + uc_compiler_emit_u32(compiler, 0, len); + len = 0; + } + + /* compile item value expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + + hint_count++; + len++; + } + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); + + uc_compiler_parse_consume(compiler, TK_RBRACK); + + /* push items on stack */ + if (len > 0) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_PARR); + uc_compiler_emit_u32(compiler, 0, len); + } + + /* set initial size hint */ + uc_compiler_set_u32(compiler, hint_off, hint_count); +} + +static void +uc_compiler_compile_object(uc_compiler *compiler, bool assignable) +{ + size_t hint_off, hint_count = 0, len = 0; + + /* create empty object on stack */ + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_NOBJ); + hint_off = uc_compiler_emit_u32(compiler, 0, 0); + + /* parse initializer values */ + do { + /* End of object literal */ + if (uc_compiler_parse_check(compiler, TK_RBRACE)) + break; + + /* Spread operator */ + if (uc_compiler_parse_match(compiler, TK_ELLIP)) { + /* set items on stack so far... */ + if (len > 0) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_SOBJ); + uc_compiler_emit_u32(compiler, 0, len); + len = 0; + } + + /* compile spread value expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + + /* emit merge operation */ + uc_compiler_emit_insn(compiler, 0, I_MOBJ); + + continue; + } + + /* Computed property name */ + if (uc_compiler_parse_match(compiler, TK_LBRACK)) { + /* parse property name expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + + /* cosume closing bracket and colon */ + uc_compiler_parse_consume(compiler, TK_RBRACK); + uc_compiler_parse_consume(compiler, TK_COLON); + + /* parse value expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + } + + /* Property/value tuple or property shorthand */ + else { + /* parse key expression */ + if (!uc_compiler_parse_match(compiler, TK_LABEL) && + !uc_compiler_parse_match(compiler, TK_STRING)) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting label"); + + /* load label */ + uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, + compiler->parser->prev.val); + + /* If the property name is a plain label followed by a comma or + * closing curly brace, treat it as ES2015 property shorthand + * notation... */ + if (compiler->parser->prev.type == TK_LABEL && + (uc_compiler_parse_check(compiler, TK_COMMA) || + uc_compiler_parse_check(compiler, TK_RBRACE))) { + uc_compiler_emit_variable_rw(compiler, + compiler->parser->prev.val, 0); + } + + /* ... otherwise treat it as ordinary `key: value` tuple */ + else { + uc_compiler_parse_consume(compiler, TK_COLON); + + /* parse value expression */ + uc_compiler_parse_precedence(compiler, P_ASSIGN); + } + } + + /* set items on stack so far... */ + if (len >= 0xfffffffe) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_SOBJ); + uc_compiler_emit_u32(compiler, 0, len); + len = 0; + } + + hint_count += 2; + len += 2; + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); + + uc_compiler_parse_consume(compiler, TK_RBRACE); + + /* set items on stack */ + if (len > 0) { + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_SOBJ); + uc_compiler_emit_u32(compiler, 0, len); + len = 0; + } + + /* set initial size hint */ + uc_compiler_set_u32(compiler, hint_off, hint_count); +} + + +static void +uc_compiler_declare_local_null(uc_compiler *compiler, size_t srcpos, json_object *varname) +{ + ssize_t existing_slot = uc_compiler_declare_local(compiler, varname); + + uc_compiler_emit_insn(compiler, srcpos, I_LNULL); + + if (existing_slot == -1) { + uc_compiler_initialize_local(compiler); + } + else { + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, existing_slot); + uc_compiler_emit_insn(compiler, 0, I_POP); + } +} + +static size_t +uc_compiler_declare_internal(uc_compiler *compiler, size_t srcpos, const char *name) +{ +#if 0 + ssize_t existing_slot; + json_object *n; + bool strict; + + n = xjs_new_string(name); + strict = compiler->strict_declarations; + compiler->strict_declarations = false; + existing_slot = uc_compiler_declare_local(compiler, n); + compiler->strict_declarations = strict; + + uc_compiler_emit_insn(compiler, srcpos, I_LNULL); + + if (existing_slot == -1) { + uc_value_put(n); + + return uc_compiler_initialize_local(compiler); + } + else { + uc_value_put(n); + + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, existing_slot); + uc_compiler_emit_insn(compiler, 0, I_POP); + + return existing_slot; + } +#else + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + uc_locals *locals = &compiler->locals; + + //uc_compiler_emit_insn(compiler, srcpos, I_LNULL); + + uc_vector_grow(locals); + + locals->entries[locals->count].name = xjs_new_string(name); + locals->entries[locals->count].depth = compiler->scope_depth; + locals->entries[locals->count].captured = false; + locals->entries[locals->count].from = chunk->count; + + return locals->count++; +#endif +} + +static void +uc_compiler_compile_local(uc_compiler *compiler) +{ + ssize_t slot; + + do { + /* parse variable name */ + uc_compiler_parse_consume(compiler, TK_LABEL); + + /* declare local variable */ + slot = uc_compiler_declare_local(compiler, compiler->parser->prev.val); + + /* if followed by '=', parse initializer expression */ + if (uc_compiler_parse_match(compiler, TK_ASSIGN)) + uc_compiler_parse_precedence(compiler, P_ASSIGN); + /* otherwise load implicit null */ + else + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LNULL); + + /* initialize local */ + if (slot == -1) { + uc_compiler_initialize_local(compiler); + } + /* if the variable was redeclared, overwrite it */ + else { + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, slot); + uc_compiler_emit_insn(compiler, 0, I_POP); + } + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); + + uc_compiler_parse_consume(compiler, TK_SCOL); +} + +static uc_tokentype_t +uc_compiler_compile_altifblock(uc_compiler *compiler) +{ + while (true) { + switch (compiler->parser->curr.type) { + case TK_ELIF: + case TK_ELSE: + case TK_ENDIF: + case TK_EOF: + return compiler->parser->curr.type; + + default: + uc_compiler_compile_declaration(compiler); + break; + } + } + + return 0; +} + +static void +uc_compiler_compile_if(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t jmpz_off, jmp_off, i; + bool expect_endif = false; + uc_jmplist elifs = {}; + uc_tokentype_t type; + + /* parse & compile condition expression */ + uc_compiler_parse_consume(compiler, TK_LPAREN); + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_RPAREN); + + /* conditional jump to else/elif branch */ + jmpz_off = uc_compiler_emit_jmpz(compiler, 0, 0); + + if (uc_compiler_parse_match(compiler, TK_COLON)) { + while (true) { + /* compile elsif or else branch */ + type = uc_compiler_compile_altifblock(compiler); + + /* we just compiled an elsif block */ + if (!expect_endif && type == TK_ELIF) { + /* emit jump to skip to the end */ + uc_vector_grow(&elifs); + elifs.entries[elifs.count++] = uc_compiler_emit_jmp(compiler, 0, 0); + + /* point previous conditional jump to beginning of branch */ + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + + /* parse & compile elsif condition */ + uc_compiler_parse_advance(compiler); + uc_compiler_parse_consume(compiler, TK_LPAREN); + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_RPAREN); + uc_compiler_parse_consume(compiler, TK_COLON); + + /* conditional jump to else/elif branch */ + jmpz_off = uc_compiler_emit_jmpz(compiler, 0, 0); + } + else if (!expect_endif && type == TK_ELSE) { + /* emit jump to skip to the end */ + uc_vector_grow(&elifs); + elifs.entries[elifs.count++] = uc_compiler_emit_jmp(compiler, 0, 0); + + /* point previous conditional jump to beginning of branch */ + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + jmpz_off = 0; + + /* skip "else" keyword */ + uc_compiler_parse_advance(compiler); + + expect_endif = true; + } + else if (type == TK_ENDIF) { + /* if no else clause, point previous conditional jump after block */ + if (jmpz_off) + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + + /* patch the elif branch jumps to point here after the else */ + for (i = 0; i < elifs.count; i++) + uc_compiler_set_jmpaddr(compiler, elifs.entries[i], + chunk->count); + + /* skip the "endif" keyword */ + uc_compiler_parse_advance(compiler); + break; + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + expect_endif + ? "Expecting 'endif'" + : "Expecting 'elif', 'else' or 'endif'"); + + break; + } + } + + uc_vector_clear(&elifs); + } + else { + /* compile true branch */ + uc_compiler_compile_statement(compiler); + + /* ... when present, handle false branch */ + if (uc_compiler_parse_match(compiler, TK_ELSE)) { + /* jump to skip else branch */ + jmp_off = uc_compiler_emit_jmp(compiler, 0, 0); + + /* set conditional jump address */ + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + + /* compile false branch */ + uc_compiler_compile_statement(compiler); + + /* set else skip jump address */ + uc_compiler_set_jmpaddr(compiler, jmp_off, chunk->count); + } + /* ... otherwise point the conditional jump after the true branch */ + else { + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + } + } +} + +static void +uc_compiler_compile_while(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t cond_off, jmpz_off, end_off; + uc_patchlist p = {}; + + p.parent = compiler->patchlist; + compiler->patchlist = &p; + + cond_off = chunk->count; + + /* parse & compile loop condition */ + uc_compiler_parse_consume(compiler, TK_LPAREN); + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_RPAREN); + + /* conditional jump to end */ + jmpz_off = uc_compiler_emit_jmpz(compiler, 0, 0); + + /* compile loop body */ + if (uc_compiler_parse_match(compiler, TK_COLON)) { + if (!uc_compiler_compile_delimitted_block(compiler, TK_ENDWHILE)) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting 'endwhile'"); + else + uc_compiler_parse_advance(compiler); + } + else { + uc_compiler_compile_statement(compiler); + } + + end_off = chunk->count; + + /* jump back to condition */ + uc_compiler_emit_jmp(compiler, 0, cond_off); + + /* set conditional jump target */ + uc_compiler_set_jmpaddr(compiler, jmpz_off, chunk->count); + + /* patch up break/continue */ + uc_compiler_backpatch(compiler, chunk->count, end_off); +} + +static void +uc_compiler_compile_for_in(uc_compiler *compiler, bool local, uc_token *kvar, uc_token *vvar) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t skip_jmp, test_jmp, key_slot, val_slot; + uc_patchlist p = {}; + + p.parent = compiler->patchlist; + compiler->patchlist = &p; + + uc_compiler_enter_scope(compiler); + + /* declare internal loop variables */ + uc_compiler_emit_insn(compiler, 0, I_LNULL); + key_slot = uc_compiler_declare_internal(compiler, 0, "(for in key)"); + + uc_compiler_emit_insn(compiler, 0, I_LNULL); + val_slot = uc_compiler_declare_internal(compiler, 0, "(for in value)"); + + /* declare loop variables */ + if (local) { + uc_compiler_declare_local_null(compiler, kvar->pos, kvar->val); + + if (vvar) + uc_compiler_declare_local_null(compiler, vvar->pos, vvar->val); + } + + /* value to iterate */ + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_RPAREN); + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, val_slot); + + /* initial key value */ + uc_compiler_emit_insn(compiler, 0, I_LNULL); + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, key_slot); + + /* jump over variable read for first cycle */ + skip_jmp = uc_compiler_emit_jmp(compiler, 0, 0); + + /* read value */ + uc_compiler_emit_insn(compiler, 0, I_LLOC); + uc_compiler_emit_u32(compiler, 0, val_slot); + + /* read key */ + uc_compiler_emit_insn(compiler, 0, I_LLOC); + uc_compiler_emit_u32(compiler, 0, key_slot); + + /* backpatch skip jump */ + uc_compiler_set_jmpaddr(compiler, skip_jmp, chunk->count); + + /* load loop variable and get next key from object */ + uc_compiler_emit_insn(compiler, 0, vvar ? I_NEXTKV : I_NEXTK); + + /* set internal key variable */ + uc_compiler_emit_insn(compiler, 0, I_SLOC); + uc_compiler_emit_u32(compiler, 0, key_slot); + + /* test for != null */ + uc_compiler_emit_insn(compiler, 0, I_LNULL); + uc_compiler_emit_insn(compiler, 0, I_NES); + + /* jump after loop body if no next key */ + test_jmp = uc_compiler_emit_jmpz(compiler, 0, 0); + + /* set key and value variables */ + if (vvar) { + uc_compiler_emit_variable_rw(compiler, vvar->val, TK_ASSIGN); + uc_compiler_emit_insn(compiler, 0, I_POP); + } + + /* set key variable */ + uc_compiler_emit_variable_rw(compiler, kvar->val, TK_ASSIGN); + uc_compiler_emit_insn(compiler, 0, I_POP); + + /* compile loop body */ + if (uc_compiler_parse_match(compiler, TK_COLON)) { + if (!uc_compiler_compile_delimitted_block(compiler, TK_ENDFOR)) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting 'endfor'"); + else + uc_compiler_parse_advance(compiler); + } + else { + uc_compiler_compile_statement(compiler); + } + + /* jump back to retrieve next key */ + uc_compiler_emit_jmp(compiler, 0, skip_jmp + 5); + + /* back patch conditional jump */ + uc_compiler_set_jmpaddr(compiler, test_jmp, chunk->count); + + /* patch up break/continue */ + uc_compiler_backpatch(compiler, chunk->count, skip_jmp + 5); + + /* pop loop variables */ + uc_compiler_emit_insn(compiler, 0, I_POP); + + if (vvar) + uc_compiler_emit_insn(compiler, 0, I_POP); + + uc_compiler_leave_scope(compiler); +} + +static void +uc_compiler_compile_for_count(uc_compiler *compiler, bool local, uc_token *var) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t test_off = 0, incr_off, skip_off, cond_off = 0; + uc_patchlist p = {}; + + p.parent = compiler->patchlist; + compiler->patchlist = &p; + + uc_compiler_enter_scope(compiler); + + /* Initializer ---------------------------------------------------------- */ + + /* We parsed a `local x` or `local x, y` expression, so (re)declare + * last label as local initializer variable */ + if (local) + uc_compiler_declare_local_null(compiler, var->pos, var->val); + + /* If we parsed at least on label, try continue parsing as variable + * expression... */ + if (var) { + uc_compiler_compile_labelexpr(compiler, true); + uc_compiler_emit_insn(compiler, 0, I_POP); + + /* If followed by a comma, continue parsing expression */ + if (uc_compiler_parse_match(compiler, TK_COMMA)) { + uc_compiler_compile_expression(compiler); + uc_compiler_emit_insn(compiler, 0, I_POP); + } + } + /* ... otherwise try parsing an entire expression (which might be absent) */ + else if (!uc_compiler_parse_check(compiler, TK_SCOL)) { + uc_compiler_compile_expression(compiler); + uc_compiler_emit_insn(compiler, 0, I_POP); + } + + uc_compiler_parse_consume(compiler, TK_SCOL); + + + /* Condition ------------------------------------------------------------ */ + if (!uc_compiler_parse_check(compiler, TK_SCOL)) { + cond_off = chunk->count; + + uc_compiler_compile_expression(compiler); + + test_off = uc_compiler_emit_jmpz(compiler, 0, 0); + } + + uc_compiler_parse_consume(compiler, TK_SCOL); + + /* jump over incrementer */ + skip_off = uc_compiler_emit_jmp(compiler, 0, 0); + + + /* Incrementer ---------------------------------------------------------- */ + incr_off = chunk->count; + + if (!uc_compiler_parse_check(compiler, TK_RPAREN)) { + uc_compiler_compile_expression(compiler); + uc_compiler_emit_insn(compiler, 0, I_POP); + } + + uc_compiler_parse_consume(compiler, TK_RPAREN); + + /* if we have a condition, jump back to it, else continue to the loop body */ + if (cond_off) + uc_compiler_emit_jmp(compiler, 0, cond_off); + + /* back patch skip address */ + uc_compiler_set_jmpaddr(compiler, skip_off, chunk->count); + + + /* Body ----------------------------------------------------------------- */ + if (uc_compiler_parse_match(compiler, TK_COLON)) { + if (!uc_compiler_compile_delimitted_block(compiler, TK_ENDFOR)) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting 'endfor'"); + else + uc_compiler_parse_advance(compiler); + } + else { + uc_compiler_compile_statement(compiler); + } + + /* jump back to incrementer */ + uc_compiler_emit_jmp(compiler, 0, incr_off); + + /* back patch conditional jump */ + if (test_off) + uc_compiler_set_jmpaddr(compiler, test_off, chunk->count); + + /* patch up break/continue */ + uc_compiler_backpatch(compiler, chunk->count, incr_off); + + uc_compiler_leave_scope(compiler); +} + +static void +uc_compiler_compile_for(uc_compiler *compiler) +{ + uc_token keyvar = {}, valvar = {}; + bool local; + + uc_compiler_parse_consume(compiler, TK_LPAREN); + + /* check the next few tokens and see if we have either a + * `let x in` / `let x, y` expression or an ordinary initializer + * statement */ + + local = uc_compiler_parse_match(compiler, TK_LOCAL); + + if (local && !uc_compiler_parse_check(compiler, TK_LABEL)) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting label after 'local'"); + + if (uc_compiler_parse_match(compiler, TK_LABEL)) { + keyvar = compiler->parser->prev; + uc_value_get(keyvar.val); + + if (uc_compiler_parse_match(compiler, TK_COMMA)) { + uc_compiler_parse_consume(compiler, TK_LABEL); + + valvar = compiler->parser->prev; + uc_value_get(valvar.val); + } + + /* is a for-in loop */ + if (uc_compiler_parse_match(compiler, TK_IN)) { + uc_compiler_compile_for_in(compiler, local, &keyvar, + valvar.type ? &valvar : NULL); + + goto out; + } + } + + /* + * The previous expression ruled out a for-in loop, so continue parsing + * as counting for loop... + */ + uc_compiler_compile_for_count(compiler, local, + valvar.val ? &valvar : (keyvar.val ? &keyvar : NULL)); + +out: + uc_value_put(keyvar.val); + uc_value_put(valvar.val); +} + +static void +uc_compiler_compile_switch(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t i, first_jmp, skip_jmp, next_jmp, default_jmp = 0; + bool in_case = false; + uc_jmplist jmps = {}; + uc_patchlist p = {}; + + p.parent = compiler->patchlist; + compiler->patchlist = &p; + + uc_compiler_enter_scope(compiler); + + /* parse and compile match value */ + uc_compiler_parse_consume(compiler, TK_LPAREN); + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_RPAREN); + uc_compiler_parse_consume(compiler, TK_LBRACE); + uc_compiler_declare_internal(compiler, 0, "(switch value)"); + + /* skip over first condition */ + first_jmp = uc_compiler_emit_jmp(compiler, 0, 0); + + /* parse and compile case matches */ + while (!uc_compiler_parse_check(compiler, TK_RBRACE) && + !uc_compiler_parse_check(compiler, TK_EOF)) { + /* handle `default:` */ + if (uc_compiler_parse_match(compiler, TK_DEFAULT)) { + if (default_jmp) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "more than one switch default case"); + + return; + } + + uc_compiler_parse_consume(compiler, TK_COLON); + + /* jump over default case, can only be reached by fallthrough or + * conditional jump after last failed case condition */ + default_jmp = uc_compiler_emit_jmp(compiler, 0, 0); + + in_case = true; + } + + /* handle `case …:` */ + else if (uc_compiler_parse_match(compiler, TK_CASE)) { + /* jump over `case …:` label */ + uc_vector_grow(&jmps); + jmps.entries[jmps.count++] = uc_compiler_emit_jmp(compiler, 0, 0); + + /* copy condition value */ + uc_compiler_emit_insn(compiler, 0, I_COPY); + uc_compiler_emit_u8(compiler, 0, 0); + + /* compile case value expression */ + uc_compiler_compile_expression(compiler); + uc_compiler_parse_consume(compiler, TK_COLON); + + /* strict equality test */ + uc_compiler_emit_insn(compiler, 0, I_EQS); + + /* on inequality, jump to next condition */ + uc_vector_grow(&jmps); + jmps.entries[jmps.count++] = uc_compiler_emit_jmpz(compiler, 0, 0); + + in_case = true; + } + + /* handle interleaved statement */ + else if (in_case) { + uc_compiler_compile_declaration(compiler); + } + + /* a statement or expression preceeding any `default` or `case` is a + * syntax error */ + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Expecting 'case' or 'default'"); + + return; + } + } + + uc_compiler_parse_consume(compiler, TK_RBRACE); + + /* patch jump targets for cases */ + for (i = 0; i < jmps.count; i += 2) { + skip_jmp = jmps.entries[i + 0]; + next_jmp = jmps.entries[i + 1]; + + uc_compiler_set_jmpaddr(compiler, skip_jmp, next_jmp + 5); + + /* have a subsequent case, patch next jump to it */ + if (i + 2 < jmps.count) + uc_compiler_set_jmpaddr(compiler, next_jmp, jmps.entries[i + 2] + 5); + /* case was last in switch, jump to default */ + else if (default_jmp) + uc_compiler_set_jmpaddr(compiler, next_jmp, default_jmp + 5); + /* if no default, jump to end */ + else + uc_compiler_set_jmpaddr(compiler, next_jmp, chunk->count); + } + + /* if we have a default case, set target for the skip jump */ + if (default_jmp) { + /* if we have cases, jump to the first one */ + if (jmps.count) + uc_compiler_set_jmpaddr(compiler, default_jmp, jmps.entries[0] + 5); + /* ... otherwise turn jump into no-op */ + else + uc_compiler_set_jmpaddr(compiler, default_jmp, default_jmp + 5); + } + + /* if we have cases, patch initial jump after the first case condition */ + if (jmps.count) + uc_compiler_set_jmpaddr(compiler, first_jmp, jmps.entries[0] + 5); + /* ... otherwise jump into default */ + else if (default_jmp) + uc_compiler_set_jmpaddr(compiler, first_jmp, default_jmp + 5); + /* ... otherwise if no defualt, turn into no-op */ + else + uc_compiler_set_jmpaddr(compiler, first_jmp, first_jmp + 5); + + uc_vector_clear(&jmps); + + uc_compiler_leave_scope(compiler); + + uc_compiler_backpatch(compiler, chunk->count, 0); + +} + +static void +uc_compiler_compile_try(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t try_from = 0, try_to = 0, jmp_off = 0, ehvar_slot = 0; + uc_ehranges *ranges = &chunk->ehranges; + + try_from = chunk->count; + ehvar_slot = compiler->locals.count; + + /* Try block ------------------------------------------------------------ */ + uc_compiler_enter_scope(compiler); + + uc_compiler_parse_consume(compiler, TK_LBRACE); + + while (!uc_compiler_parse_check(compiler, TK_RBRACE) && + !uc_compiler_parse_check(compiler, TK_EOF)) + uc_compiler_compile_declaration(compiler); + + /* jump beyond catch branch */ + try_to = chunk->count; + jmp_off = uc_compiler_emit_jmp(compiler, 0, 0); + + uc_compiler_parse_consume(compiler, TK_RBRACE); + + uc_compiler_leave_scope(compiler); + + + /* Catch block ---------------------------------------------------------- */ + if (try_to > try_from) { + uc_vector_grow(ranges); + + ranges->entries[ranges->count].from = try_from; + ranges->entries[ranges->count].to = try_to; + ranges->entries[ranges->count].target = chunk->count; + ranges->entries[ranges->count].slot = ehvar_slot; + ranges->count++; + } + + uc_compiler_enter_scope(compiler); + + uc_compiler_parse_consume(compiler, TK_CATCH); + + /* have exception variable */ + if (uc_compiler_parse_match(compiler, TK_LPAREN)) { + uc_compiler_parse_consume(compiler, TK_LABEL); + + uc_compiler_declare_local(compiler, compiler->parser->prev.val); + uc_compiler_initialize_local(compiler); + + uc_compiler_parse_consume(compiler, TK_RPAREN); + } + /* ... else pop exception object from stack */ + else { + uc_compiler_emit_insn(compiler, 0, I_POP); + } + + uc_compiler_parse_consume(compiler, TK_LBRACE); + + while (!uc_compiler_parse_check(compiler, TK_RBRACE) && + !uc_compiler_parse_check(compiler, TK_EOF)) + uc_compiler_compile_declaration(compiler); + + uc_compiler_parse_consume(compiler, TK_RBRACE); + + uc_compiler_set_jmpaddr(compiler, jmp_off, chunk->count); + + uc_compiler_leave_scope(compiler); +} + +static void +uc_compiler_compile_control(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + uc_tokentype_t type = compiler->parser->prev.type; + uc_patchlist *p = compiler->patchlist; + uc_locals *locals = &compiler->locals; + size_t i, pos = compiler->parser->prev.pos; + + if (!p) { + uc_compiler_syntax_error(compiler, pos, + (type == TK_BREAK) + ? "break must be inside loop or switch" + : "continue must be inside loop"); + + return; + } + + /* pop locals in scope up to this point */ + for (i = locals->count; i > 0 && locals->entries[i - 1].depth == compiler->scope_depth; i--) + uc_compiler_emit_insn(compiler, 0, I_POP); + + uc_vector_grow(p); + + p->entries[p->count++] = + uc_compiler_emit_jmp(compiler, pos, chunk->count + type); + + uc_compiler_parse_consume(compiler, TK_SCOL); +} + +static void +uc_compiler_compile_return(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t off = chunk->count; + + uc_compiler_compile_expstmt(compiler); + + /* if we compiled an empty expression statement (`;`), load implicit null */ + if (chunk->count == off) + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_LNULL); + /* otherwise overwrite the final I_POP instruction with I_RETURN */ + else + uc_chunk_pop(chunk); + + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_RETURN); +} + +static void +uc_compiler_compile_tplexp(uc_compiler *compiler) +{ + uc_chunk *chunk = uc_compiler_current_chunk(compiler); + size_t off = chunk->count; + + uc_compiler_compile_expression(compiler); + + /* XXX: the lexer currently emits a superfluous trailing semicolon... */ + uc_compiler_parse_match(compiler, TK_SCOL); + + uc_compiler_parse_consume(compiler, TK_REXP); + + if (chunk->count > off) + uc_compiler_emit_insn(compiler, 0, I_PRINT); +} + +static void +uc_compiler_compile_text(uc_compiler *compiler) +{ + uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.val); + uc_compiler_emit_insn(compiler, 0, I_PRINT); +} + +static void +uc_compiler_compile_block(uc_compiler *compiler) +{ + uc_compiler_enter_scope(compiler); + + while (!uc_compiler_parse_check(compiler, TK_RBRACE) && + !uc_compiler_parse_check(compiler, TK_EOF)) + uc_compiler_compile_declaration(compiler); + + uc_compiler_parse_consume(compiler, TK_RBRACE); + + uc_compiler_leave_scope(compiler); +} + +static void +uc_compiler_compile_expstmt(uc_compiler *compiler) +{ + /* empty statement */ + if (uc_compiler_parse_match(compiler, TK_SCOL)) + return; + + uc_compiler_compile_expression(compiler); + + /* allow omitting final semicolon */ + switch (compiler->parser->curr.type) { + case TK_RBRACE: + case TK_ELSE: /* fixme: only in altblockmode */ + case TK_ELIF: + case TK_ENDIF: + case TK_ENDFOR: + case TK_ENDWHILE: + case TK_ENDFUNC: + case TK_EOF: + break; + + default: + uc_compiler_parse_consume(compiler, TK_SCOL); + + break; + } + + uc_compiler_emit_insn(compiler, 0, I_POP); +} + +static void +uc_compiler_compile_statement(uc_compiler *compiler) +{ + if (uc_compiler_parse_match(compiler, TK_IF)) + uc_compiler_compile_if(compiler); + else if (uc_compiler_parse_match(compiler, TK_WHILE)) + uc_compiler_compile_while(compiler); + else if (uc_compiler_parse_match(compiler, TK_FOR)) + uc_compiler_compile_for(compiler); + else if (uc_compiler_parse_match(compiler, TK_SWITCH)) + uc_compiler_compile_switch(compiler); + else if (uc_compiler_parse_match(compiler, TK_TRY)) + uc_compiler_compile_try(compiler); + else if (uc_compiler_parse_match(compiler, TK_FUNC)) + uc_compiler_compile_function(compiler, false); + else if (uc_compiler_parse_match(compiler, TK_BREAK)) + uc_compiler_compile_control(compiler); + else if (uc_compiler_parse_match(compiler, TK_CONTINUE)) + uc_compiler_compile_control(compiler); + else if (uc_compiler_parse_match(compiler, TK_RETURN)) + uc_compiler_compile_return(compiler); + else if (uc_compiler_parse_match(compiler, TK_TEXT)) + uc_compiler_compile_text(compiler); + else if (uc_compiler_parse_match(compiler, TK_LEXP)) + uc_compiler_compile_tplexp(compiler); + else if (uc_compiler_parse_match(compiler, TK_LBRACE)) + uc_compiler_compile_block(compiler); + else + uc_compiler_compile_expstmt(compiler); +} + +static void +uc_compiler_compile_declaration(uc_compiler *compiler) +{ + if (uc_compiler_parse_match(compiler, TK_LOCAL)) + uc_compiler_compile_local(compiler); + else + uc_compiler_compile_statement(compiler); + + if (compiler->parser->synchronizing) + uc_compiler_parse_synchronize(compiler); +} + +uc_function * +uc_compile(uc_parse_config *config, uc_source *source, char **errp) +{ + uc_parser parser = { .config = config }; + uc_compiler compiler = { .parser = &parser }; + uc_function *fn; + + uc_lexer_init(&parser.lex, config, source); + uc_compiler_init(&compiler, "main", 0, source); + + uc_compiler_parse_advance(&compiler); + + while (!uc_compiler_parse_match(&compiler, TK_EOF)) + uc_compiler_compile_declaration(&compiler); + + fn = uc_compiler_finish(&compiler); + + if (errp) + *errp = parser.error; + + uc_lexer_free(&parser.lex); + + return fn; +} diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..53ff987 --- /dev/null +++ b/compiler.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __COMPILER_H_ +#define __COMPILER_H_ + +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> + +#ifdef JSONC + #include <json.h> +#else + #include <json-c/json.h> +#endif + +#include "source.h" +#include "object.h" +#include "lexer.h" +#include "util.h" + +typedef enum { + P_NONE, + + P_COMMA, /* , */ + + P_ASSIGN, /* = += -= *= /= %= <<= >>= &= ^= |= */ + + P_TERNARY, /* ?: */ + + P_OR, /* || */ + P_AND, /* && */ + P_BOR, /* | */ + P_BXOR, /* ^ */ + P_BAND, /* & */ + + P_EQUAL, /* === !== == != */ + P_COMPARE, /* < <= > >= in */ + + P_SHIFT, /* << >> */ + + P_ADD, /* + - */ + P_MUL, /* * / % */ + + P_UNARY, /* ! ~ +… -… ++… --… */ + + P_INC, /* …++ …-- */ + + P_CALL, /* ….…, …[…], …(…) */ + + P_PRIMARY /* (…) */ +} uc_precedence_t; + +struct uc_patchlist { + struct uc_patchlist *parent; + size_t count, *entries; +}; + +typedef struct uc_patchlist uc_patchlist; + +typedef struct { + json_object *name; + ssize_t depth; + size_t from; + bool captured; +} uc_local; + +typedef struct { + json_object *name; + size_t index; + bool local; +} uc_upval; + +uc_declare_vector(uc_locals, uc_local); +uc_declare_vector(uc_upvals, uc_upval); +uc_declare_vector(uc_jmplist, size_t); + +typedef struct { + uc_parse_config *config; + uc_lexer lex; + uc_token prev, curr; + bool synchronizing; + char *error; +} uc_parser; + +struct uc_compiler { + struct uc_compiler *parent; + uc_locals locals; + uc_upvals upvals; + uc_patchlist *patchlist; + uc_function *function; + uc_parser *parser; + size_t scope_depth, current_srcpos, last_insn; + bool statement_emitted; +}; + +typedef struct uc_compiler uc_compiler; + +typedef struct { + void (*prefix)(uc_compiler *, bool); + void (*infix)(uc_compiler *, bool); + uc_precedence_t precedence; +} uc_parse_rule; + +uc_function *uc_compile(uc_parse_config *config, uc_source *source, char **errp); + +#endif /* __COMPILER_H_ */ diff --git a/contrib/lemon.c b/contrib/lemon.c deleted file mode 100644 index 85e94f7..0000000 --- a/contrib/lemon.c +++ /dev/null @@ -1,5040 +0,0 @@ -/* -** This file contains all sources (including headers) to the LEMON -** LALR(1) parser generator. The sources have been combined into a -** single file to make it easy to include LEMON in the source tree -** and Makefile of another program. -** -** The author of this program disclaims copyright. -*/ -#include <stdio.h> -#include <stdarg.h> -#include <string.h> -#include <ctype.h> -#include <stdlib.h> -#include <assert.h> - -#ifndef __WIN32__ -# if defined(_WIN32) || defined(WIN32) -# define __WIN32__ -# endif -#endif - -#ifdef __WIN32__ -#ifdef __cplusplus -extern "C" { -#endif -extern int access(const char *path, int mode); -#ifdef __cplusplus -} -#endif -#else -#include <unistd.h> -#endif - -/* #define PRIVATE static */ -#define PRIVATE - -#ifdef TEST -#define MAXRHS 5 /* Set low to exercise exception code */ -#else -#define MAXRHS 1000 -#endif - -static int showPrecedenceConflict = 0; -static char *msort(char*,char**,int(*)(const char*,const char*)); - -/* -** Compilers are getting increasingly pedantic about type conversions -** as C evolves ever closer to Ada.... To work around the latest problems -** we have to define the following variant of strlen(). -*/ -#define lemonStrlen(X) ((int)strlen(X)) - -/* -** Compilers are starting to complain about the use of sprintf() and strcpy(), -** saying they are unsafe. So we define our own versions of those routines too. -** -** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and -** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). -** The third is a helper routine for vsnprintf() that adds texts to the end of a -** buffer, making sure the buffer is always zero-terminated. -** -** The string formatter is a minimal subset of stdlib sprintf() supporting only -** a few simply conversions: -** -** %d -** %s -** %.*s -** -*/ -static void lemon_addtext( - char *zBuf, /* The buffer to which text is added */ - int *pnUsed, /* Slots of the buffer used so far */ - const char *zIn, /* Text to add */ - int nIn, /* Bytes of text to add. -1 to use strlen() */ - int iWidth /* Field width. Negative to left justify */ -){ - if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} - while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } - if( nIn==0 ) return; - memcpy(&zBuf[*pnUsed], zIn, nIn); - *pnUsed += nIn; - while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } - zBuf[*pnUsed] = 0; -} -static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ - int i, j, k, c; - int nUsed = 0; - const char *z; - char zTemp[50]; - str[0] = 0; - for(i=j=0; (c = zFormat[i])!=0; i++){ - if( c=='%' ){ - int iWidth = 0; - lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); - c = zFormat[++i]; - if( isdigit(c) || (c=='-' && isdigit(zFormat[i+1])) ){ - if( c=='-' ) i++; - while( isdigit(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; - if( c=='-' ) iWidth = -iWidth; - c = zFormat[i]; - } - if( c=='d' ){ - int v = va_arg(ap, int); - if( v<0 ){ - lemon_addtext(str, &nUsed, "-", 1, iWidth); - v = -v; - }else if( v==0 ){ - lemon_addtext(str, &nUsed, "0", 1, iWidth); - } - k = 0; - while( v>0 ){ - k++; - zTemp[sizeof(zTemp)-k] = (v%10) + '0'; - v /= 10; - } - lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); - }else if( c=='s' ){ - z = va_arg(ap, const char*); - lemon_addtext(str, &nUsed, z, -1, iWidth); - }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ - i += 2; - k = va_arg(ap, int); - z = va_arg(ap, const char*); - lemon_addtext(str, &nUsed, z, k, iWidth); - }else if( c=='%' ){ - lemon_addtext(str, &nUsed, "%", 1, 0); - }else{ - fprintf(stderr, "illegal format\n"); - exit(1); - } - j = i+1; - } - } - lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); - return nUsed; -} -static int lemon_sprintf(char *str, const char *format, ...){ - va_list ap; - int rc; - va_start(ap, format); - rc = lemon_vsprintf(str, format, ap); - va_end(ap); - return rc; -} -static void lemon_strcpy(char *dest, const char *src){ - while( (*(dest++) = *(src++))!=0 ){} -} -static void lemon_strcat(char *dest, const char *src){ - while( *dest ) dest++; - lemon_strcpy(dest, src); -} - - -/* a few forward declarations... */ -struct rule; -struct lemon; -struct action; - -static struct action *Action_new(void); -static struct action *Action_sort(struct action *); - -/********** From the file "build.h" ************************************/ -void FindRulePrecedences(); -void FindFirstSets(); -void FindStates(); -void FindLinks(); -void FindFollowSets(); -void FindActions(); - -/********* From the file "configlist.h" *********************************/ -void Configlist_init(void); -struct config *Configlist_add(struct rule *, int); -struct config *Configlist_addbasis(struct rule *, int); -void Configlist_closure(struct lemon *); -void Configlist_sort(void); -void Configlist_sortbasis(void); -struct config *Configlist_return(void); -struct config *Configlist_basis(void); -void Configlist_eat(struct config *); -void Configlist_reset(void); - -/********* From the file "error.h" ***************************************/ -void ErrorMsg(const char *, int,const char *, ...); - -/****** From the file "option.h" ******************************************/ -enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, - OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; -struct s_options { - enum option_type type; - const char *label; - char *arg; - const char *message; -}; -int OptInit(char**,struct s_options*,FILE*); -int OptNArgs(void); -char *OptArg(int); -void OptErr(int); -void OptPrint(void); - -/******** From the file "parse.h" *****************************************/ -void Parse(struct lemon *lemp); - -/********* From the file "plink.h" ***************************************/ -struct plink *Plink_new(void); -void Plink_add(struct plink **, struct config *); -void Plink_copy(struct plink **, struct plink *); -void Plink_delete(struct plink *); - -/********** From the file "report.h" *************************************/ -void Reprint(struct lemon *); -void ReportOutput(struct lemon *); -void ReportTable(struct lemon *, int); -void ReportHeader(struct lemon *); -void CompressTables(struct lemon *); -void ResortStates(struct lemon *); - -/********** From the file "set.h" ****************************************/ -void SetSize(int); /* All sets will be of size N */ -char *SetNew(void); /* A new set for element 0..N */ -void SetFree(char*); /* Deallocate a set */ -int SetAdd(char*,int); /* Add element to a set */ -int SetUnion(char *,char *); /* A <- A U B, thru element N */ -#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ - -/********** From the file "struct.h" *************************************/ -/* -** Principal data structures for the LEMON parser generator. -*/ - -typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; - -/* Symbols (terminals and nonterminals) of the grammar are stored -** in the following: */ -enum symbol_type { - TERMINAL, - NONTERMINAL, - MULTITERMINAL -}; -enum e_assoc { - LEFT, - RIGHT, - NONE, - UNK -}; -struct symbol { - const char *name; /* Name of the symbol */ - int index; /* Index number for this symbol */ - enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ - struct rule *rule; /* Linked list of rules of this (if an NT) */ - struct symbol *fallback; /* fallback token in case this token doesn't parse */ - int prec; /* Precedence if defined (-1 otherwise) */ - enum e_assoc assoc; /* Associativity if precedence is defined */ - char *firstset; /* First-set for all rules of this symbol */ - Boolean lambda; /* True if NT and can generate an empty string */ - int useCnt; /* Number of times used */ - char *destructor; /* Code which executes whenever this symbol is - ** popped from the stack during error processing */ - int destLineno; /* Line number for start of destructor */ - char *datatype; /* The data type of information held by this - ** object. Only used if type==NONTERMINAL */ - int dtnum; /* The data type number. In the parser, the value - ** stack is a union. The .yy%d element of this - ** union is the correct data type for this object */ - /* The following fields are used by MULTITERMINALs only */ - int nsubsym; /* Number of constituent symbols in the MULTI */ - struct symbol **subsym; /* Array of constituent symbols */ -}; - -/* Each production rule in the grammar is stored in the following -** structure. */ -struct rule { - struct symbol *lhs; /* Left-hand side of the rule */ - const char *lhsalias; /* Alias for the LHS (NULL if none) */ - int lhsStart; /* True if left-hand side is the start symbol */ - int ruleline; /* Line number for the rule */ - int nrhs; /* Number of RHS symbols */ - struct symbol **rhs; /* The RHS symbols */ - const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ - int line; /* Line number at which code begins */ - const char *code; /* The code executed when this rule is reduced */ - struct symbol *precsym; /* Precedence symbol for this rule */ - int index; /* An index number for this rule */ - Boolean canReduce; /* True if this rule is ever reduced */ - struct rule *nextlhs; /* Next rule with the same LHS */ - struct rule *next; /* Next rule in the global list */ -}; - -/* A configuration is a production rule of the grammar together with -** a mark (dot) showing how much of that rule has been processed so far. -** Configurations also contain a follow-set which is a list of terminal -** symbols which are allowed to immediately follow the end of the rule. -** Every configuration is recorded as an instance of the following: */ -enum cfgstatus { - COMPLETE, - INCOMPLETE -}; -struct config { - struct rule *rp; /* The rule upon which the configuration is based */ - int dot; /* The parse point */ - char *fws; /* Follow-set for this configuration only */ - struct plink *fplp; /* Follow-set forward propagation links */ - struct plink *bplp; /* Follow-set backwards propagation links */ - struct state *stp; /* Pointer to state which contains this */ - enum cfgstatus status; /* used during followset and shift computations */ - struct config *next; /* Next configuration in the state */ - struct config *bp; /* The next basis configuration */ -}; - -enum e_action { - SHIFT, - ACCEPT, - REDUCE, - ERROR, - SSCONFLICT, /* A shift/shift conflict */ - SRCONFLICT, /* Was a reduce, but part of a conflict */ - RRCONFLICT, /* Was a reduce, but part of a conflict */ - SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ - RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ - NOT_USED /* Deleted by compression */ -}; - -/* Every shift or reduce operation is stored as one of the following */ -struct action { - struct symbol *sp; /* The look-ahead symbol */ - enum e_action type; - union { - struct state *stp; /* The new state, if a shift */ - struct rule *rp; /* The rule, if a reduce */ - } x; - struct action *next; /* Next action for this state */ - struct action *collide; /* Next action with the same hash */ -}; - -/* Each state of the generated parser's finite state machine -** is encoded as an instance of the following structure. */ -struct state { - struct config *bp; /* The basis configurations for this state */ - struct config *cfp; /* All configurations in this set */ - int statenum; /* Sequential number for this state */ - struct action *ap; /* Array of actions for this state */ - int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ - int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ - int iDflt; /* Default action */ -}; -#define NO_OFFSET (-2147483647) - -/* A followset propagation link indicates that the contents of one -** configuration followset should be propagated to another whenever -** the first changes. */ -struct plink { - struct config *cfp; /* The configuration to which linked */ - struct plink *next; /* The next propagate link */ -}; - -/* The state vector for the entire parser generator is recorded as -** follows. (LEMON uses no global variables and makes little use of -** static variables. Fields in the following structure can be thought -** of as begin global variables in the program.) */ -struct lemon { - struct state **sorted; /* Table of states sorted by state number */ - struct rule *rule; /* List of all rules */ - int nstate; /* Number of states */ - int nrule; /* Number of rules */ - int nsymbol; /* Number of terminal and nonterminal symbols */ - int nterminal; /* Number of terminal symbols */ - struct symbol **symbols; /* Sorted array of pointers to symbols */ - int errorcnt; /* Number of errors */ - struct symbol *errsym; /* The error symbol */ - struct symbol *wildcard; /* Token that matches anything */ - char *name; /* Name of the generated parser */ - char *arg; /* Declaration of the 3th argument to parser */ - char *tokentype; /* Type of terminal symbols in the parser stack */ - char *vartype; /* The default type of non-terminal symbols */ - char *start; /* Name of the start symbol for the grammar */ - char *stacksize; /* Size of the parser stack */ - char *include; /* Code to put at the start of the C file */ - char *error; /* Code to execute when an error is seen */ - char *overflow; /* Code to execute on a stack overflow */ - char *failure; /* Code to execute on parser failure */ - char *accept; /* Code to execute when the parser excepts */ - char *extracode; /* Code appended to the generated file */ - char *tokendest; /* Code to execute to destroy token data */ - char *vardest; /* Code for the default non-terminal destructor */ - char *filename; /* Name of the input file */ - char *outname; /* Name of the current output file */ - char *tokenprefix; /* A prefix added to token names in the .h file */ - int nconflict; /* Number of parsing conflicts */ - int tablesize; /* Size of the parse tables */ - int basisflag; /* Print only basis configurations */ - int has_fallback; /* True if any %fallback is seen in the grammar */ - int nolinenosflag; /* True if #line statements should not be printed */ - char *argv0; /* Name of the program */ -}; - -#define MemoryCheck(X) if((X)==0){ \ - extern void memory_error(); \ - memory_error(); \ -} - -/**************** From the file "table.h" *********************************/ -/* -** All code in this file has been automatically generated -** from a specification in the file -** "table.q" -** by the associative array code building program "aagen". -** Do not edit this file! Instead, edit the specification -** file, then rerun aagen. -*/ -/* -** Code for processing tables in the LEMON parser generator. -*/ -/* Routines for handling a strings */ - -const char *Strsafe(const char *); - -void Strsafe_init(void); -int Strsafe_insert(const char *); -const char *Strsafe_find(const char *); - -/* Routines for handling symbols of the grammar */ - -struct symbol *Symbol_new(const char *); -int Symbolcmpp(const void *, const void *); -void Symbol_init(void); -int Symbol_insert(struct symbol *, const char *); -struct symbol *Symbol_find(const char *); -struct symbol *Symbol_Nth(int); -int Symbol_count(void); -struct symbol **Symbol_arrayof(void); - -/* Routines to manage the state table */ - -int Configcmp(const char *, const char *); -struct state *State_new(void); -void State_init(void); -int State_insert(struct state *, struct config *); -struct state *State_find(struct config *); -struct state **State_arrayof(/* */); - -/* Routines used for efficiency in Configlist_add */ - -void Configtable_init(void); -int Configtable_insert(struct config *); -struct config *Configtable_find(struct config *); -void Configtable_clear(int(*)(struct config *)); - -/****************** From the file "action.c" *******************************/ -/* -** Routines processing parser actions in the LEMON parser generator. -*/ - -/* Allocate a new parser action */ -static struct action *Action_new(void){ - static struct action *freelist = 0; - struct action *newaction; - - if( freelist==0 ){ - int i; - int amt = 100; - freelist = (struct action *)calloc(amt, sizeof(struct action)); - if( freelist==0 ){ - fprintf(stderr,"Unable to allocate memory for a new parser action."); - exit(1); - } - for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; - freelist[amt-1].next = 0; - } - newaction = freelist; - freelist = freelist->next; - return newaction; -} - -/* Compare two actions for sorting purposes. Return negative, zero, or -** positive if the first action is less than, equal to, or greater than -** the first -*/ -static int actioncmp( - struct action *ap1, - struct action *ap2 -){ - int rc; - rc = ap1->sp->index - ap2->sp->index; - if( rc==0 ){ - rc = (int)ap1->type - (int)ap2->type; - } - if( rc==0 && ap1->type==REDUCE ){ - rc = ap1->x.rp->index - ap2->x.rp->index; - } - if( rc==0 ){ - rc = (int) (ap2 - ap1); - } - return rc; -} - -/* Sort parser actions */ -static struct action *Action_sort( - struct action *ap -){ - ap = (struct action *)msort((char *)ap,(char **)&ap->next, - (int(*)(const char*,const char*))actioncmp); - return ap; -} - -void Action_add( - struct action **app, - enum e_action type, - struct symbol *sp, - char *arg -){ - struct action *newaction; - newaction = Action_new(); - newaction->next = *app; - *app = newaction; - newaction->type = type; - newaction->sp = sp; - if( type==SHIFT ){ - newaction->x.stp = (struct state *)arg; - }else{ - newaction->x.rp = (struct rule *)arg; - } -} -/********************** New code to implement the "acttab" module ***********/ -/* -** This module implements routines use to construct the yy_action[] table. -*/ - -/* -** The state of the yy_action table under construction is an instance of -** the following structure. -** -** The yy_action table maps the pair (state_number, lookahead) into an -** action_number. The table is an array of integers pairs. The state_number -** determines an initial offset into the yy_action array. The lookahead -** value is then added to this initial offset to get an index X into the -** yy_action array. If the aAction[X].lookahead equals the value of the -** of the lookahead input, then the value of the action_number output is -** aAction[X].action. If the lookaheads do not match then the -** default action for the state_number is returned. -** -** All actions associated with a single state_number are first entered -** into aLookahead[] using multiple calls to acttab_action(). Then the -** actions for that single state_number are placed into the aAction[] -** array with a single call to acttab_insert(). The acttab_insert() call -** also resets the aLookahead[] array in preparation for the next -** state number. -*/ -struct lookahead_action { - int lookahead; /* Value of the lookahead token */ - int action; /* Action to take on the given lookahead */ -}; -typedef struct acttab acttab; -struct acttab { - int nAction; /* Number of used slots in aAction[] */ - int nActionAlloc; /* Slots allocated for aAction[] */ - struct lookahead_action - *aAction, /* The yy_action[] table under construction */ - *aLookahead; /* A single new transaction set */ - int mnLookahead; /* Minimum aLookahead[].lookahead */ - int mnAction; /* Action associated with mnLookahead */ - int mxLookahead; /* Maximum aLookahead[].lookahead */ - int nLookahead; /* Used slots in aLookahead[] */ - int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ -}; - -/* Return the number of entries in the yy_action table */ -#define acttab_size(X) ((X)->nAction) - -/* The value for the N-th entry in yy_action */ -#define acttab_yyaction(X,N) ((X)->aAction[N].action) - -/* The value for the N-th entry in yy_lookahead */ -#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) - -/* Free all memory associated with the given acttab */ -void acttab_free(acttab *p){ - free( p->aAction ); - free( p->aLookahead ); - free( p ); -} - -/* Allocate a new acttab structure */ -acttab *acttab_alloc(void){ - acttab *p = (acttab *) calloc( 1, sizeof(*p) ); - if( p==0 ){ - fprintf(stderr,"Unable to allocate memory for a new acttab."); - exit(1); - } - memset(p, 0, sizeof(*p)); - return p; -} - -/* Add a new action to the current transaction set. -** -** This routine is called once for each lookahead for a particular -** state. -*/ -void acttab_action(acttab *p, int lookahead, int action){ - if( p->nLookahead>=p->nLookaheadAlloc ){ - p->nLookaheadAlloc += 25; - p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, - sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); - if( p->aLookahead==0 ){ - fprintf(stderr,"malloc failed\n"); - exit(1); - } - } - if( p->nLookahead==0 ){ - p->mxLookahead = lookahead; - p->mnLookahead = lookahead; - p->mnAction = action; - }else{ - if( p->mxLookahead<lookahead ) p->mxLookahead = lookahead; - if( p->mnLookahead>lookahead ){ - p->mnLookahead = lookahead; - p->mnAction = action; - } - } - p->aLookahead[p->nLookahead].lookahead = lookahead; - p->aLookahead[p->nLookahead].action = action; - p->nLookahead++; -} - -/* -** Add the transaction set built up with prior calls to acttab_action() -** into the current action table. Then reset the transaction set back -** to an empty set in preparation for a new round of acttab_action() calls. -** -** Return the offset into the action table of the new transaction. -*/ -int acttab_insert(acttab *p){ - int i, j, k, n; - assert( p->nLookahead>0 ); - - /* Make sure we have enough space to hold the expanded action table - ** in the worst case. The worst case occurs if the transaction set - ** must be appended to the current action table - */ - n = p->mxLookahead + 1; - if( p->nAction + n >= p->nActionAlloc ){ - int oldAlloc = p->nActionAlloc; - p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; - p->aAction = (struct lookahead_action *) realloc( p->aAction, - sizeof(p->aAction[0])*p->nActionAlloc); - if( p->aAction==0 ){ - fprintf(stderr,"malloc failed\n"); - exit(1); - } - for(i=oldAlloc; i<p->nActionAlloc; i++){ - p->aAction[i].lookahead = -1; - p->aAction[i].action = -1; - } - } - - /* Scan the existing action table looking for an offset that is a - ** duplicate of the current transaction set. Fall out of the loop - ** if and when the duplicate is found. - ** - ** i is the index in p->aAction[] where p->mnLookahead is inserted. - */ - for(i=p->nAction-1; i>=0; i--){ - if( p->aAction[i].lookahead==p->mnLookahead ){ - /* All lookaheads and actions in the aLookahead[] transaction - ** must match against the candidate aAction[i] entry. */ - if( p->aAction[i].action!=p->mnAction ) continue; - for(j=0; j<p->nLookahead; j++){ - k = p->aLookahead[j].lookahead - p->mnLookahead + i; - if( k<0 || k>=p->nAction ) break; - if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; - if( p->aLookahead[j].action!=p->aAction[k].action ) break; - } - if( j<p->nLookahead ) continue; - - /* No possible lookahead value that is not in the aLookahead[] - ** transaction is allowed to match aAction[i] */ - n = 0; - for(j=0; j<p->nAction; j++){ - if( p->aAction[j].lookahead<0 ) continue; - if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; - } - if( n==p->nLookahead ){ - break; /* An exact match is found at offset i */ - } - } - } - - /* If no existing offsets exactly match the current transaction, find an - ** an empty offset in the aAction[] table in which we can add the - ** aLookahead[] transaction. - */ - if( i<0 ){ - /* Look for holes in the aAction[] table that fit the current - ** aLookahead[] transaction. Leave i set to the offset of the hole. - ** If no holes are found, i is left at p->nAction, which means the - ** transaction will be appended. */ - for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){ - if( p->aAction[i].lookahead<0 ){ - for(j=0; j<p->nLookahead; j++){ - k = p->aLookahead[j].lookahead - p->mnLookahead + i; - if( k<0 ) break; - if( p->aAction[k].lookahead>=0 ) break; - } - if( j<p->nLookahead ) continue; - for(j=0; j<p->nAction; j++){ - if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; - } - if( j==p->nAction ){ - break; /* Fits in empty slots */ - } - } - } - } - /* Insert transaction set at index i. */ - for(j=0; j<p->nLookahead; j++){ - k = p->aLookahead[j].lookahead - p->mnLookahead + i; - p->aAction[k] = p->aLookahead[j]; - if( k>=p->nAction ) p->nAction = k+1; - } - p->nLookahead = 0; - - /* Return the offset that is added to the lookahead in order to get the - ** index into yy_action of the action */ - return i - p->mnLookahead; -} - -/********************** From the file "build.c" *****************************/ -/* -** Routines to construction the finite state machine for the LEMON -** parser generator. -*/ - -/* Find a precedence symbol of every rule in the grammar. -** -** Those rules which have a precedence symbol coded in the input -** grammar using the "[symbol]" construct will already have the -** rp->precsym field filled. Other rules take as their precedence -** symbol the first RHS symbol with a defined precedence. If there -** are not RHS symbols with a defined precedence, the precedence -** symbol field is left blank. -*/ -void FindRulePrecedences(struct lemon *xp) -{ - struct rule *rp; - for(rp=xp->rule; rp; rp=rp->next){ - if( rp->precsym==0 ){ - int i, j; - for(i=0; i<rp->nrhs && rp->precsym==0; i++){ - struct symbol *sp = rp->rhs[i]; - if( sp->type==MULTITERMINAL ){ - for(j=0; j<sp->nsubsym; j++){ - if( sp->subsym[j]->prec>=0 ){ - rp->precsym = sp->subsym[j]; - break; - } - } - }else if( sp->prec>=0 ){ - rp->precsym = rp->rhs[i]; - } - } - } - } - return; -} - -/* Find all nonterminals which will generate the empty string. -** Then go back and compute the first sets of every nonterminal. -** The first set is the set of all terminal symbols which can begin -** a string generated by that nonterminal. -*/ -void FindFirstSets(struct lemon *lemp) -{ - int i, j; - struct rule *rp; - int progress; - - for(i=0; i<lemp->nsymbol; i++){ - lemp->symbols[i]->lambda = LEMON_FALSE; - } - for(i=lemp->nterminal; i<lemp->nsymbol; i++){ - lemp->symbols[i]->firstset = SetNew(); - } - - /* First compute all lambdas */ - do{ - progress = 0; - for(rp=lemp->rule; rp; rp=rp->next){ - if( rp->lhs->lambda ) continue; - for(i=0; i<rp->nrhs; i++){ - struct symbol *sp = rp->rhs[i]; - assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); - if( sp->lambda==LEMON_FALSE ) break; - } - if( i==rp->nrhs ){ - rp->lhs->lambda = LEMON_TRUE; - progress = 1; - } - } - }while( progress ); - - /* Now compute all first sets */ - do{ - struct symbol *s1, *s2; - progress = 0; - for(rp=lemp->rule; rp; rp=rp->next){ - s1 = rp->lhs; - for(i=0; i<rp->nrhs; i++){ - s2 = rp->rhs[i]; - if( s2->type==TERMINAL ){ - progress += SetAdd(s1->firstset,s2->index); - break; - }else if( s2->type==MULTITERMINAL ){ - for(j=0; j<s2->nsubsym; j++){ - progress += SetAdd(s1->firstset,s2->subsym[j]->index); - } - break; - }else if( s1==s2 ){ - if( s1->lambda==LEMON_FALSE ) break; - }else{ - progress += SetUnion(s1->firstset,s2->firstset); - if( s2->lambda==LEMON_FALSE ) break; - } - } - } - }while( progress ); - return; -} - -/* Compute all LR(0) states for the grammar. Links -** are added to between some states so that the LR(1) follow sets -** can be computed later. -*/ -PRIVATE struct state *getstate(struct lemon *); /* forward reference */ -void FindStates(struct lemon *lemp) -{ - struct symbol *sp; - struct rule *rp; - - Configlist_init(); - - /* Find the start symbol */ - if( lemp->start ){ - sp = Symbol_find(lemp->start); - if( sp==0 ){ - ErrorMsg(lemp->filename,0, -"The specified start symbol \"%s\" is not \ -in a nonterminal of the grammar. \"%s\" will be used as the start \ -symbol instead.",lemp->start,lemp->rule->lhs->name); - lemp->errorcnt++; - sp = lemp->rule->lhs; - } - }else{ - sp = lemp->rule->lhs; - } - - /* Make sure the start symbol doesn't occur on the right-hand side of - ** any rule. Report an error if it does. (YACC would generate a new - ** start symbol in this case.) */ - for(rp=lemp->rule; rp; rp=rp->next){ - int i; - for(i=0; i<rp->nrhs; i++){ - if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ - ErrorMsg(lemp->filename,0, -"The start symbol \"%s\" occurs on the \ -right-hand side of a rule. This will result in a parser which \ -does not work properly.",sp->name); - lemp->errorcnt++; - } - } - } - - /* The basis configuration set for the first state - ** is all rules which have the start symbol as their - ** left-hand side */ - for(rp=sp->rule; rp; rp=rp->nextlhs){ - struct config *newcfp; - rp->lhsStart = 1; - newcfp = Configlist_addbasis(rp,0); - SetAdd(newcfp->fws,0); - } - - /* Compute the first state. All other states will be - ** computed automatically during the computation of the first one. - ** The returned pointer to the first state is not used. */ - (void)getstate(lemp); - return; -} - -/* Return a pointer to a state which is described by the configuration -** list which has been built from calls to Configlist_add. -*/ -PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ -PRIVATE struct state *getstate(struct lemon *lemp) -{ - struct config *cfp, *bp; - struct state *stp; - - /* Extract the sorted basis of the new state. The basis was constructed - ** by prior calls to "Configlist_addbasis()". */ - Configlist_sortbasis(); - bp = Configlist_basis(); - - /* Get a state with the same basis */ - stp = State_find(bp); - if( stp ){ - /* A state with the same basis already exists! Copy all the follow-set - ** propagation links from the state under construction into the - ** preexisting state, then return a pointer to the preexisting state */ - struct config *x, *y; - for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ - Plink_copy(&y->bplp,x->bplp); - Plink_delete(x->fplp); - x->fplp = x->bplp = 0; - } - cfp = Configlist_return(); - Configlist_eat(cfp); - }else{ - /* This really is a new state. Construct all the details */ - Configlist_closure(lemp); /* Compute the configuration closure */ - Configlist_sort(); /* Sort the configuration closure */ - cfp = Configlist_return(); /* Get a pointer to the config list */ - stp = State_new(); /* A new state structure */ - MemoryCheck(stp); - stp->bp = bp; /* Remember the configuration basis */ - stp->cfp = cfp; /* Remember the configuration closure */ - stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ - stp->ap = 0; /* No actions, yet. */ - State_insert(stp,stp->bp); /* Add to the state table */ - buildshifts(lemp,stp); /* Recursively compute successor states */ - } - return stp; -} - -/* -** Return true if two symbols are the same. -*/ -int same_symbol(struct symbol *a, struct symbol *b) -{ - int i; - if( a==b ) return 1; - if( a->type!=MULTITERMINAL ) return 0; - if( b->type!=MULTITERMINAL ) return 0; - if( a->nsubsym!=b->nsubsym ) return 0; - for(i=0; i<a->nsubsym; i++){ - if( a->subsym[i]!=b->subsym[i] ) return 0; - } - return 1; -} - -/* Construct all successor states to the given state. A "successor" -** state is any state which can be reached by a shift action. -*/ -PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) -{ - struct config *cfp; /* For looping thru the config closure of "stp" */ - struct config *bcfp; /* For the inner loop on config closure of "stp" */ - struct config *newcfg; /* */ - struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ - struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ - struct state *newstp; /* A pointer to a successor state */ - - /* Each configuration becomes complete after it contibutes to a successor - ** state. Initially, all configurations are incomplete */ - for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; - - /* Loop through all configurations of the state "stp" */ - for(cfp=stp->cfp; cfp; cfp=cfp->next){ - if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ - if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ - Configlist_reset(); /* Reset the new config set */ - sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ - - /* For every configuration in the state "stp" which has the symbol "sp" - ** following its dot, add the same configuration to the basis set under - ** construction but with the dot shifted one symbol to the right. */ - for(bcfp=cfp; bcfp; bcfp=bcfp->next){ - if( bcfp->status==COMPLETE ) continue; /* Already used */ - if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ - bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ - if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ - bcfp->status = COMPLETE; /* Mark this config as used */ - newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); - Plink_add(&newcfg->bplp,bcfp); - } - - /* Get a pointer to the state described by the basis configuration set - ** constructed in the preceding loop */ - newstp = getstate(lemp); - - /* The state "newstp" is reached from the state "stp" by a shift action - ** on the symbol "sp" */ - if( sp->type==MULTITERMINAL ){ - int i; - for(i=0; i<sp->nsubsym; i++){ - Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); - } - }else{ - Action_add(&stp->ap,SHIFT,sp,(char *)newstp); - } - } -} - -/* -** Construct the propagation links -*/ -void FindLinks(struct lemon *lemp) -{ - int i; - struct config *cfp, *other; - struct state *stp; - struct plink *plp; - - /* Housekeeping detail: - ** Add to every propagate link a pointer back to the state to - ** which the link is attached. */ - for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ - cfp->stp = stp; - } - } - - /* Convert all backlinks into forward links. Only the forward - ** links are used in the follow-set computation. */ - for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ - for(plp=cfp->bplp; plp; plp=plp->next){ - other = plp->cfp; - Plink_add(&other->fplp,cfp); - } - } - } -} - -/* Compute all followsets. -** -** A followset is the set of all symbols which can come immediately -** after a configuration. -*/ -void FindFollowSets(struct lemon *lemp) -{ - int i; - struct config *cfp; - struct plink *plp; - int progress; - int change; - - for(i=0; i<lemp->nstate; i++){ - for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ - cfp->status = INCOMPLETE; - } - } - - do{ - progress = 0; - for(i=0; i<lemp->nstate; i++){ - for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ - if( cfp->status==COMPLETE ) continue; - for(plp=cfp->fplp; plp; plp=plp->next){ - change = SetUnion(plp->cfp->fws,cfp->fws); - if( change ){ - plp->cfp->status = INCOMPLETE; - progress = 1; - } - } - cfp->status = COMPLETE; - } - } - }while( progress ); -} - -static int resolve_conflict(struct action *,struct action *); - -/* Compute the reduce actions, and resolve conflicts. -*/ -void FindActions(struct lemon *lemp) -{ - int i,j; - struct config *cfp; - struct state *stp; - struct symbol *sp; - struct rule *rp; - - /* Add all of the reduce actions - ** A reduce action is added for each element of the followset of - ** a configuration which has its dot at the extreme right. - */ - for(i=0; i<lemp->nstate; i++){ /* Loop over all states */ - stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ - if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ - for(j=0; j<lemp->nterminal; j++){ - if( SetFind(cfp->fws,j) ){ - /* Add a reduce action to the state "stp" which will reduce by the - ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ - Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); - } - } - } - } - } - - /* Add the accepting token */ - if( lemp->start ){ - sp = Symbol_find(lemp->start); - if( sp==0 ) sp = lemp->rule->lhs; - }else{ - sp = lemp->rule->lhs; - } - /* Add to the first state (which is always the starting state of the - ** finite state machine) an action to ACCEPT if the lookahead is the - ** start nonterminal. */ - Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); - - /* Resolve conflicts */ - for(i=0; i<lemp->nstate; i++){ - struct action *ap, *nap; - struct state *stp; - stp = lemp->sorted[i]; - /* assert( stp->ap ); */ - stp->ap = Action_sort(stp->ap); - for(ap=stp->ap; ap && ap->next; ap=ap->next){ - for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ - /* The two actions "ap" and "nap" have the same lookahead. - ** Figure out which one should be used */ - lemp->nconflict += resolve_conflict(ap,nap); - } - } - } - - /* Report an error for each rule that can never be reduced. */ - for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; - for(i=0; i<lemp->nstate; i++){ - struct action *ap; - for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ - if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; - } - } - for(rp=lemp->rule; rp; rp=rp->next){ - if( rp->canReduce ) continue; - ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); - lemp->errorcnt++; - } -} - -/* Resolve a conflict between the two given actions. If the -** conflict can't be resolved, return non-zero. -** -** NO LONGER TRUE: -** To resolve a conflict, first look to see if either action -** is on an error rule. In that case, take the action which -** is not associated with the error rule. If neither or both -** actions are associated with an error rule, then try to -** use precedence to resolve the conflict. -** -** If either action is a SHIFT, then it must be apx. This -** function won't work if apx->type==REDUCE and apy->type==SHIFT. -*/ -static int resolve_conflict( - struct action *apx, - struct action *apy -){ - struct symbol *spx, *spy; - int errcnt = 0; - assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ - if( apx->type==SHIFT && apy->type==SHIFT ){ - apy->type = SSCONFLICT; - errcnt++; - } - if( apx->type==SHIFT && apy->type==REDUCE ){ - spx = apx->sp; - spy = apy->x.rp->precsym; - if( spy==0 || spx->prec<0 || spy->prec<0 ){ - /* Not enough precedence information. */ - apy->type = SRCONFLICT; - errcnt++; - }else if( spx->prec>spy->prec ){ /* higher precedence wins */ - apy->type = RD_RESOLVED; - }else if( spx->prec<spy->prec ){ - apx->type = SH_RESOLVED; - }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ - apy->type = RD_RESOLVED; /* associativity */ - }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ - apx->type = SH_RESOLVED; - }else{ - assert( spx->prec==spy->prec && spx->assoc==NONE ); - apx->type = ERROR; - } - }else if( apx->type==REDUCE && apy->type==REDUCE ){ - spx = apx->x.rp->precsym; - spy = apy->x.rp->precsym; - if( spx==0 || spy==0 || spx->prec<0 || - spy->prec<0 || spx->prec==spy->prec ){ - apy->type = RRCONFLICT; - errcnt++; - }else if( spx->prec>spy->prec ){ - apy->type = RD_RESOLVED; - }else if( spx->prec<spy->prec ){ - apx->type = RD_RESOLVED; - } - }else{ - assert( - apx->type==SH_RESOLVED || - apx->type==RD_RESOLVED || - apx->type==SSCONFLICT || - apx->type==SRCONFLICT || - apx->type==RRCONFLICT || - apy->type==SH_RESOLVED || - apy->type==RD_RESOLVED || - apy->type==SSCONFLICT || - apy->type==SRCONFLICT || - apy->type==RRCONFLICT - ); - /* The REDUCE/SHIFT case cannot happen because SHIFTs come before - ** REDUCEs on the list. If we reach this point it must be because - ** the parser conflict had already been resolved. */ - } - return errcnt; -} -/********************* From the file "configlist.c" *************************/ -/* -** Routines to processing a configuration list and building a state -** in the LEMON parser generator. -*/ - -static struct config *freelist = 0; /* List of free configurations */ -static struct config *current = 0; /* Top of list of configurations */ -static struct config **currentend = 0; /* Last on list of configs */ -static struct config *basis = 0; /* Top of list of basis configs */ -static struct config **basisend = 0; /* End of list of basis configs */ - -/* Return a pointer to a new configuration */ -PRIVATE struct config *newconfig(){ - struct config *newcfg; - if( freelist==0 ){ - int i; - int amt = 3; - freelist = (struct config *)calloc( amt, sizeof(struct config) ); - if( freelist==0 ){ - fprintf(stderr,"Unable to allocate memory for a new configuration."); - exit(1); - } - for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; - freelist[amt-1].next = 0; - } - newcfg = freelist; - freelist = freelist->next; - return newcfg; -} - -/* The configuration "old" is no longer used */ -PRIVATE void deleteconfig(struct config *old) -{ - old->next = freelist; - freelist = old; -} - -/* Initialized the configuration list builder */ -void Configlist_init(){ - current = 0; - currentend = ¤t; - basis = 0; - basisend = &basis; - Configtable_init(); - return; -} - -/* Initialized the configuration list builder */ -void Configlist_reset(){ - current = 0; - currentend = ¤t; - basis = 0; - basisend = &basis; - Configtable_clear(0); - return; -} - -/* Add another configuration to the configuration list */ -struct config *Configlist_add( - struct rule *rp, /* The rule */ - int dot /* Index into the RHS of the rule where the dot goes */ -){ - struct config *cfp, model; - - assert( currentend!=0 ); - model.rp = rp; - model.dot = dot; - cfp = Configtable_find(&model); - if( cfp==0 ){ - cfp = newconfig(); - cfp->rp = rp; - cfp->dot = dot; - cfp->fws = SetNew(); - cfp->stp = 0; - cfp->fplp = cfp->bplp = 0; - cfp->next = 0; - cfp->bp = 0; - *currentend = cfp; - currentend = &cfp->next; - Configtable_insert(cfp); - } - return cfp; -} - -/* Add a basis configuration to the configuration list */ -struct config *Configlist_addbasis(struct rule *rp, int dot) -{ - struct config *cfp, model; - - assert( basisend!=0 ); - assert( currentend!=0 ); - model.rp = rp; - model.dot = dot; - cfp = Configtable_find(&model); - if( cfp==0 ){ - cfp = newconfig(); - cfp->rp = rp; - cfp->dot = dot; - cfp->fws = SetNew(); - cfp->stp = 0; - cfp->fplp = cfp->bplp = 0; - cfp->next = 0; - cfp->bp = 0; - *currentend = cfp; - currentend = &cfp->next; - *basisend = cfp; - basisend = &cfp->bp; - Configtable_insert(cfp); - } - return cfp; -} - -/* Compute the closure of the configuration list */ -void Configlist_closure(struct lemon *lemp) -{ - struct config *cfp, *newcfp; - struct rule *rp, *newrp; - struct symbol *sp, *xsp; - int i, dot; - - assert( currentend!=0 ); - for(cfp=current; cfp; cfp=cfp->next){ - rp = cfp->rp; - dot = cfp->dot; - if( dot>=rp->nrhs ) continue; - sp = rp->rhs[dot]; - if( sp->type==NONTERMINAL ){ - if( sp->rule==0 && sp!=lemp->errsym ){ - ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", - sp->name); - lemp->errorcnt++; - } - for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ - newcfp = Configlist_add(newrp,0); - for(i=dot+1; i<rp->nrhs; i++){ - xsp = rp->rhs[i]; - if( xsp->type==TERMINAL ){ - SetAdd(newcfp->fws,xsp->index); - break; - }else if( xsp->type==MULTITERMINAL ){ - int k; - for(k=0; k<xsp->nsubsym; k++){ - SetAdd(newcfp->fws, xsp->subsym[k]->index); - } - break; - }else{ - SetUnion(newcfp->fws,xsp->firstset); - if( xsp->lambda==LEMON_FALSE ) break; - } - } - if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); - } - } - } - return; -} - -/* Sort the configuration list */ -void Configlist_sort(){ - current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); - currentend = 0; - return; -} - -/* Sort the basis configuration list */ -void Configlist_sortbasis(){ - basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); - basisend = 0; - return; -} - -/* Return a pointer to the head of the configuration list and -** reset the list */ -struct config *Configlist_return(){ - struct config *old; - old = current; - current = 0; - currentend = 0; - return old; -} - -/* Return a pointer to the head of the configuration list and -** reset the list */ -struct config *Configlist_basis(){ - struct config *old; - old = basis; - basis = 0; - basisend = 0; - return old; -} - -/* Free all elements of the given configuration list */ -void Configlist_eat(struct config *cfp) -{ - struct config *nextcfp; - for(; cfp; cfp=nextcfp){ - nextcfp = cfp->next; - assert( cfp->fplp==0 ); - assert( cfp->bplp==0 ); - if( cfp->fws ) SetFree(cfp->fws); - deleteconfig(cfp); - } - return; -} -/***************** From the file "error.c" *********************************/ -/* -** Code for printing error message. -*/ - -void ErrorMsg(const char *filename, int lineno, const char *format, ...){ - va_list ap; - fprintf(stderr, "%s:%d: ", filename, lineno); - va_start(ap, format); - vfprintf(stderr,format,ap); - va_end(ap); - fprintf(stderr, "\n"); -} -/**************** From the file "main.c" ************************************/ -/* -** Main program file for the LEMON parser generator. -*/ - -/* Report an out-of-memory condition and abort. This function -** is used mostly by the "MemoryCheck" macro in struct.h -*/ -void memory_error(){ - fprintf(stderr,"Out of memory. Aborting...\n"); - exit(1); -} - -static int nDefine = 0; /* Number of -D options on the command line */ -static char **azDefine = 0; /* Name of the -D macros */ - -/* This routine is called with the argument to each -D command-line option. -** Add the macro defined to the azDefine array. -*/ -static void handle_D_option(char *z){ - char **paz; - nDefine++; - azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); - if( azDefine==0 ){ - fprintf(stderr,"out of memory\n"); - exit(1); - } - paz = &azDefine[nDefine-1]; - *paz = (char *) malloc( lemonStrlen(z)+1 ); - if( *paz==0 ){ - fprintf(stderr,"out of memory\n"); - exit(1); - } - lemon_strcpy(*paz, z); - for(z=*paz; *z && *z!='='; z++){} - *z = 0; -} - -static char *user_templatename = NULL; -static void handle_T_option(char *z){ - user_templatename = (char *) malloc( lemonStrlen(z)+1 ); - if( user_templatename==0 ){ - memory_error(); - } - lemon_strcpy(user_templatename, z); -} - -/* The main program. Parse the command line and do it... */ -int main(int argc, char **argv) -{ - static int version = 0; - static int rpflag = 0; - static int basisflag = 0; - static int compress = 0; - static int quiet = 0; - static int statistics = 0; - static int mhflag = 0; - static int nolinenosflag = 0; - static int noResort = 0; - static struct s_options options[] = { - {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, - {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, - {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."}, - {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, - {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, - {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."}, - {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."}, - {OPT_FLAG, "p", (char*)&showPrecedenceConflict, - "Show conflicts resolved by precedence rules"}, - {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, - {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"}, - {OPT_FLAG, "s", (char*)&statistics, - "Print parser stats to standard output."}, - {OPT_FLAG, "x", (char*)&version, "Print the version number."}, - {OPT_FLAG,0,0,0} - }; - int i; - int exitcode; - struct lemon lem; - - OptInit(argv,options,stderr); - if( version ){ - printf("Lemon version 1.0\n"); - exit(0); - } - if( OptNArgs()!=1 ){ - fprintf(stderr,"Exactly one filename argument is required.\n"); - exit(1); - } - memset(&lem, 0, sizeof(lem)); - lem.errorcnt = 0; - - /* Initialize the machine */ - Strsafe_init(); - Symbol_init(); - State_init(); - lem.argv0 = argv[0]; - lem.filename = OptArg(0); - lem.basisflag = basisflag; - lem.nolinenosflag = nolinenosflag; - Symbol_new("$"); - lem.errsym = Symbol_new("error"); - lem.errsym->useCnt = 0; - - /* Parse the input file */ - Parse(&lem); - if( lem.errorcnt ) exit(lem.errorcnt); - if( lem.nrule==0 ){ - fprintf(stderr,"Empty grammar.\n"); - exit(1); - } - - /* Count and index the symbols of the grammar */ - Symbol_new("{default}"); - lem.nsymbol = Symbol_count(); - lem.symbols = Symbol_arrayof(); - for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; - qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); - for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; - while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } - assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); - lem.nsymbol = i - 1; - for(i=1; isupper(lem.symbols[i]->name[0]); i++); - lem.nterminal = i; - - /* Generate a reprint of the grammar, if requested on the command line */ - if( rpflag ){ - Reprint(&lem); - }else{ - /* Initialize the size for all follow and first sets */ - SetSize(lem.nterminal+1); - - /* Find the precedence for every production rule (that has one) */ - FindRulePrecedences(&lem); - - /* Compute the lambda-nonterminals and the first-sets for every - ** nonterminal */ - FindFirstSets(&lem); - - /* Compute all LR(0) states. Also record follow-set propagation - ** links so that the follow-set can be computed later */ - lem.nstate = 0; - FindStates(&lem); - lem.sorted = State_arrayof(); - - /* Tie up loose ends on the propagation links */ - FindLinks(&lem); - - /* Compute the follow set of every reducible configuration */ - FindFollowSets(&lem); - - /* Compute the action tables */ - FindActions(&lem); - - /* Compress the action tables */ - if( compress==0 ) CompressTables(&lem); - - /* Reorder and renumber the states so that states with fewer choices - ** occur at the end. This is an optimization that helps make the - ** generated parser tables smaller. */ - if( noResort==0 ) ResortStates(&lem); - - /* Generate a report of the parser generated. (the "y.output" file) */ - if( !quiet ) ReportOutput(&lem); - - /* Generate the source code for the parser */ - ReportTable(&lem, mhflag); - - /* Produce a header file for use by the scanner. (This step is - ** omitted if the "-m" option is used because makeheaders will - ** generate the file for us.) */ - if( !mhflag ) ReportHeader(&lem); - } - if( statistics ){ - printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", - lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); - printf(" %d states, %d parser table entries, %d conflicts\n", - lem.nstate, lem.tablesize, lem.nconflict); - } - if( lem.nconflict > 0 ){ - fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); - } - - /* return 0 on success, 1 on failure. */ - exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; - exit(exitcode); - return (exitcode); -} -/******************** From the file "msort.c" *******************************/ -/* -** A generic merge-sort program. -** -** USAGE: -** Let "ptr" be a pointer to some structure which is at the head of -** a null-terminated list. Then to sort the list call: -** -** ptr = msort(ptr,&(ptr->next),cmpfnc); -** -** In the above, "cmpfnc" is a pointer to a function which compares -** two instances of the structure and returns an integer, as in -** strcmp. The second argument is a pointer to the pointer to the -** second element of the linked list. This address is used to compute -** the offset to the "next" field within the structure. The offset to -** the "next" field must be constant for all structures in the list. -** -** The function returns a new pointer which is the head of the list -** after sorting. -** -** ALGORITHM: -** Merge-sort. -*/ - -/* -** Return a pointer to the next structure in the linked list. -*/ -#define NEXT(A) (*(char**)(((char*)A)+offset)) - -/* -** Inputs: -** a: A sorted, null-terminated linked list. (May be null). -** b: A sorted, null-terminated linked list. (May be null). -** cmp: A pointer to the comparison function. -** offset: Offset in the structure to the "next" field. -** -** Return Value: -** A pointer to the head of a sorted list containing the elements -** of both a and b. -** -** Side effects: -** The "next" pointers for elements in the lists a and b are -** changed. -*/ -static char *merge( - char *a, - char *b, - int (*cmp)(const char*,const char*), - int offset -){ - char *ptr, *head; - - if( a==0 ){ - head = b; - }else if( b==0 ){ - head = a; - }else{ - if( (*cmp)(a,b)<=0 ){ - ptr = a; - a = NEXT(a); - }else{ - ptr = b; - b = NEXT(b); - } - head = ptr; - while( a && b ){ - if( (*cmp)(a,b)<=0 ){ - NEXT(ptr) = a; - ptr = a; - a = NEXT(a); - }else{ - NEXT(ptr) = b; - ptr = b; - b = NEXT(b); - } - } - if( a ) NEXT(ptr) = a; - else NEXT(ptr) = b; - } - return head; -} - -/* -** Inputs: -** list: Pointer to a singly-linked list of structures. -** next: Pointer to pointer to the second element of the list. -** cmp: A comparison function. -** -** Return Value: -** A pointer to the head of a sorted list containing the elements -** orginally in list. -** -** Side effects: -** The "next" pointers for elements in list are changed. -*/ -#define LISTSIZE 30 -static char *msort( - char *list, - char **next, - int (*cmp)(const char*,const char*) -){ - unsigned long offset; - char *ep; - char *set[LISTSIZE]; - int i; - offset = (unsigned long)next - (unsigned long)list; - for(i=0; i<LISTSIZE; i++) set[i] = 0; - while( list ){ - ep = list; - list = NEXT(list); - NEXT(ep) = 0; - for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){ - ep = merge(ep,set[i],cmp,offset); - set[i] = 0; - } - set[i] = ep; - } - ep = 0; - for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset); - return ep; -} -/************************ From the file "option.c" **************************/ -static char **argv; -static struct s_options *op; -static FILE *errstream; - -#define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0) - -/* -** Print the command line with a carrot pointing to the k-th character -** of the n-th field. -*/ -static void errline(int n, int k, FILE *err) -{ - int spcnt, i; - if( argv[0] ) fprintf(err,"%s",argv[0]); - spcnt = lemonStrlen(argv[0]) + 1; - for(i=1; i<n && argv[i]; i++){ - fprintf(err," %s",argv[i]); - spcnt += lemonStrlen(argv[i])+1; - } - spcnt += k; - for(; argv[i]; i++) fprintf(err," %s",argv[i]); - if( spcnt<20 ){ - fprintf(err,"\n%*s^-- here\n",spcnt,""); - }else{ - fprintf(err,"\n%*shere --^\n",spcnt-7,""); - } -} - -/* -** Return the index of the N-th non-switch argument. Return -1 -** if N is out of range. -*/ -static int argindex(int n) -{ - int i; - int dashdash = 0; - if( argv!=0 && *argv!=0 ){ - for(i=1; argv[i]; i++){ - if( dashdash || !ISOPT(argv[i]) ){ - if( n==0 ) return i; - n--; - } - if( strcmp(argv[i],"--")==0 ) dashdash = 1; - } - } - return -1; -} - -static char emsg[] = "Command line syntax error: "; - -/* -** Process a flag command line argument. -*/ -static int handleflags(int i, FILE *err) -{ - int v; - int errcnt = 0; - int j; - for(j=0; op[j].label; j++){ - if( strncmp(&argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break; - } - v = argv[i][0]=='-' ? 1 : 0; - if( op[j].label==0 ){ - if( err ){ - fprintf(err,"%sundefined option.\n",emsg); - errline(i,1,err); - } - errcnt++; - }else if( op[j].type==OPT_FLAG ){ - *((int*)op[j].arg) = v; - }else if( op[j].type==OPT_FFLAG ){ - (*(void(*)(int))(op[j].arg))(v); - }else if( op[j].type==OPT_FSTR ){ - (*(void(*)(char *))(op[j].arg))(&argv[i][2]); - }else{ - if( err ){ - fprintf(err,"%smissing argument on switch.\n",emsg); - errline(i,1,err); - } - errcnt++; - } - return errcnt; -} - -/* -** Process a command line switch which has an argument. -*/ -static int handleswitch(int i, FILE *err) -{ - int lv = 0; - double dv = 0.0; - char *sv = 0, *end; - char *cp; - int j; - int errcnt = 0; - cp = strchr(argv[i],'='); - assert( cp!=0 ); - *cp = 0; - for(j=0; op[j].label; j++){ - if( strcmp(argv[i],op[j].label)==0 ) break; - } - *cp = '='; - if( op[j].label==0 ){ - if( err ){ - fprintf(err,"%sundefined option.\n",emsg); - errline(i,0,err); - } - errcnt++; - }else{ - cp++; - switch( op[j].type ){ - case OPT_FLAG: - case OPT_FFLAG: - if( err ){ - fprintf(err,"%soption requires an argument.\n",emsg); - errline(i,0,err); - } - errcnt++; - break; - case OPT_DBL: - case OPT_FDBL: - dv = strtod(cp,&end); - if( *end ){ - if( err ){ - fprintf(err,"%sillegal character in floating-point argument.\n",emsg); - errline(i,((unsigned long)end)-(unsigned long)argv[i],err); - } - errcnt++; - } - break; - case OPT_INT: - case OPT_FINT: - lv = strtol(cp,&end,0); - if( *end ){ - if( err ){ - fprintf(err,"%sillegal character in integer argument.\n",emsg); - errline(i,((unsigned long)end)-(unsigned long)argv[i],err); - } - errcnt++; - } - break; - case OPT_STR: - case OPT_FSTR: - sv = cp; - break; - } - switch( op[j].type ){ - case OPT_FLAG: - case OPT_FFLAG: - break; - case OPT_DBL: - *(double*)(op[j].arg) = dv; - break; - case OPT_FDBL: - (*(void(*)(double))(op[j].arg))(dv); - break; - case OPT_INT: - *(int*)(op[j].arg) = lv; - break; - case OPT_FINT: - (*(void(*)(int))(op[j].arg))((int)lv); - break; - case OPT_STR: - *(char**)(op[j].arg) = sv; - break; - case OPT_FSTR: - (*(void(*)(char *))(op[j].arg))(sv); - break; - } - } - return errcnt; -} - -int OptInit(char **a, struct s_options *o, FILE *err) -{ - int errcnt = 0; - argv = a; - op = o; - errstream = err; - if( argv && *argv && op ){ - int i; - for(i=1; argv[i]; i++){ - if( argv[i][0]=='+' || argv[i][0]=='-' ){ - errcnt += handleflags(i,err); - }else if( strchr(argv[i],'=') ){ - errcnt += handleswitch(i,err); - } - } - } - if( errcnt>0 ){ - fprintf(err,"Valid command line options for \"%s\" are:\n",*a); - OptPrint(); - exit(1); - } - return 0; -} - -int OptNArgs(){ - int cnt = 0; - int dashdash = 0; - int i; - if( argv!=0 && argv[0]!=0 ){ - for(i=1; argv[i]; i++){ - if( dashdash || !ISOPT(argv[i]) ) cnt++; - if( strcmp(argv[i],"--")==0 ) dashdash = 1; - } - } - return cnt; -} - -char *OptArg(int n) -{ - int i; - i = argindex(n); - return i>=0 ? argv[i] : 0; -} - -void OptErr(int n) -{ - int i; - i = argindex(n); - if( i>=0 ) errline(i,0,errstream); -} - -void OptPrint(){ - int i; - int max, len; - max = 0; - for(i=0; op[i].label; i++){ - len = lemonStrlen(op[i].label) + 1; - switch( op[i].type ){ - case OPT_FLAG: - case OPT_FFLAG: - break; - case OPT_INT: - case OPT_FINT: - len += 9; /* length of "<integer>" */ - break; - case OPT_DBL: - case OPT_FDBL: - len += 6; /* length of "<real>" */ - break; - case OPT_STR: - case OPT_FSTR: - len += 8; /* length of "<string>" */ - break; - } - if( len>max ) max = len; - } - for(i=0; op[i].label; i++){ - switch( op[i].type ){ - case OPT_FLAG: - case OPT_FFLAG: - fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message); - break; - case OPT_INT: - case OPT_FINT: - fprintf(errstream," %s=<integer>%*s %s\n",op[i].label, - (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message); - break; - case OPT_DBL: - case OPT_FDBL: - fprintf(errstream," %s=<real>%*s %s\n",op[i].label, - (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message); - break; - case OPT_STR: - case OPT_FSTR: - fprintf(errstream," %s=<string>%*s %s\n",op[i].label, - (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message); - break; - } - } -} -/*********************** From the file "parse.c" ****************************/ -/* -** Input file parser for the LEMON parser generator. -*/ - -/* The state of the parser */ -enum e_state { - INITIALIZE, - WAITING_FOR_DECL_OR_RULE, - WAITING_FOR_DECL_KEYWORD, - WAITING_FOR_DECL_ARG, - WAITING_FOR_PRECEDENCE_SYMBOL, - WAITING_FOR_ARROW, - IN_RHS, - LHS_ALIAS_1, - LHS_ALIAS_2, - LHS_ALIAS_3, - RHS_ALIAS_1, - RHS_ALIAS_2, - PRECEDENCE_MARK_1, - PRECEDENCE_MARK_2, - RESYNC_AFTER_RULE_ERROR, - RESYNC_AFTER_DECL_ERROR, - WAITING_FOR_DESTRUCTOR_SYMBOL, - WAITING_FOR_DATATYPE_SYMBOL, - WAITING_FOR_FALLBACK_ID, - WAITING_FOR_WILDCARD_ID, - WAITING_FOR_CLASS_ID, - WAITING_FOR_CLASS_TOKEN -}; -struct pstate { - char *filename; /* Name of the input file */ - int tokenlineno; /* Linenumber at which current token starts */ - int errorcnt; /* Number of errors so far */ - char *tokenstart; /* Text of current token */ - struct lemon *gp; /* Global state vector */ - enum e_state state; /* The state of the parser */ - struct symbol *fallback; /* The fallback token */ - struct symbol *tkclass; /* Token class symbol */ - struct symbol *lhs; /* Left-hand side of current rule */ - const char *lhsalias; /* Alias for the LHS */ - int nrhs; /* Number of right-hand side symbols seen */ - struct symbol *rhs[MAXRHS]; /* RHS symbols */ - const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ - struct rule *prevrule; /* Previous rule parsed */ - const char *declkeyword; /* Keyword of a declaration */ - char **declargslot; /* Where the declaration argument should be put */ - int insertLineMacro; /* Add #line before declaration insert */ - int *decllinenoslot; /* Where to write declaration line number */ - enum e_assoc declassoc; /* Assign this association to decl arguments */ - int preccounter; /* Assign this precedence to decl arguments */ - struct rule *firstrule; /* Pointer to first rule in the grammar */ - struct rule *lastrule; /* Pointer to the most recently parsed rule */ -}; - -/* Parse a single token */ -static void parseonetoken(struct pstate *psp) -{ - const char *x; - x = Strsafe(psp->tokenstart); /* Save the token permanently */ -#if 0 - printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, - x,psp->state); -#endif - switch( psp->state ){ - case INITIALIZE: - psp->prevrule = 0; - psp->preccounter = 0; - psp->firstrule = psp->lastrule = 0; - psp->gp->nrule = 0; - /* Fall thru to next case */ - case WAITING_FOR_DECL_OR_RULE: - if( x[0]=='%' ){ - psp->state = WAITING_FOR_DECL_KEYWORD; - }else if( islower(x[0]) ){ - psp->lhs = Symbol_new(x); - psp->nrhs = 0; - psp->lhsalias = 0; - psp->state = WAITING_FOR_ARROW; - }else if( x[0]=='{' ){ - if( psp->prevrule==0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, -"There is no prior rule upon which to attach the code \ -fragment which begins on this line."); - psp->errorcnt++; - }else if( psp->prevrule->code!=0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, -"Code fragment beginning on this line is not the first \ -to follow the previous rule."); - psp->errorcnt++; - }else{ - psp->prevrule->line = psp->tokenlineno; - psp->prevrule->code = &x[1]; - } - }else if( x[0]=='[' ){ - psp->state = PRECEDENCE_MARK_1; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Token \"%s\" should be either \"%%\" or a nonterminal name.", - x); - psp->errorcnt++; - } - break; - case PRECEDENCE_MARK_1: - if( !isupper(x[0]) ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "The precedence symbol must be a terminal."); - psp->errorcnt++; - }else if( psp->prevrule==0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "There is no prior rule to assign precedence \"[%s]\".",x); - psp->errorcnt++; - }else if( psp->prevrule->precsym!=0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, -"Precedence mark on this line is not the first \ -to follow the previous rule."); - psp->errorcnt++; - }else{ - psp->prevrule->precsym = Symbol_new(x); - } - psp->state = PRECEDENCE_MARK_2; - break; - case PRECEDENCE_MARK_2: - if( x[0]!=']' ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Missing \"]\" on precedence mark."); - psp->errorcnt++; - } - psp->state = WAITING_FOR_DECL_OR_RULE; - break; - case WAITING_FOR_ARROW: - if( x[0]==':' && x[1]==':' && x[2]=='=' ){ - psp->state = IN_RHS; - }else if( x[0]=='(' ){ - psp->state = LHS_ALIAS_1; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Expected to see a \":\" following the LHS symbol \"%s\".", - psp->lhs->name); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case LHS_ALIAS_1: - if( isalpha(x[0]) ){ - psp->lhsalias = x; - psp->state = LHS_ALIAS_2; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "\"%s\" is not a valid alias for the LHS \"%s\"\n", - x,psp->lhs->name); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case LHS_ALIAS_2: - if( x[0]==')' ){ - psp->state = LHS_ALIAS_3; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case LHS_ALIAS_3: - if( x[0]==':' && x[1]==':' && x[2]=='=' ){ - psp->state = IN_RHS; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Missing \"->\" following: \"%s(%s)\".", - psp->lhs->name,psp->lhsalias); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case IN_RHS: - if( x[0]=='.' ){ - struct rule *rp; - rp = (struct rule *)calloc( sizeof(struct rule) + - sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); - if( rp==0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Can't allocate enough memory for this rule."); - psp->errorcnt++; - psp->prevrule = 0; - }else{ - int i; - rp->ruleline = psp->tokenlineno; - rp->rhs = (struct symbol**)&rp[1]; - rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); - for(i=0; i<psp->nrhs; i++){ - rp->rhs[i] = psp->rhs[i]; - rp->rhsalias[i] = psp->alias[i]; - } - rp->lhs = psp->lhs; - rp->lhsalias = psp->lhsalias; - rp->nrhs = psp->nrhs; - rp->code = 0; - rp->precsym = 0; - rp->index = psp->gp->nrule++; - rp->nextlhs = rp->lhs->rule; - rp->lhs->rule = rp; - rp->next = 0; - if( psp->firstrule==0 ){ - psp->firstrule = psp->lastrule = rp; - }else{ - psp->lastrule->next = rp; - psp->lastrule = rp; - } - psp->prevrule = rp; - } - psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( isalpha(x[0]) ){ - if( psp->nrhs>=MAXRHS ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Too many symbols on RHS of rule beginning at \"%s\".", - x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - }else{ - psp->rhs[psp->nrhs] = Symbol_new(x); - psp->alias[psp->nrhs] = 0; - psp->nrhs++; - } - }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ - struct symbol *msp = psp->rhs[psp->nrhs-1]; - if( msp->type!=MULTITERMINAL ){ - struct symbol *origsp = msp; - msp = (struct symbol *) calloc(1,sizeof(*msp)); - memset(msp, 0, sizeof(*msp)); - msp->type = MULTITERMINAL; - msp->nsubsym = 1; - msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); - msp->subsym[0] = origsp; - msp->name = origsp->name; - psp->rhs[psp->nrhs-1] = msp; - } - msp->nsubsym++; - msp->subsym = (struct symbol **) realloc(msp->subsym, - sizeof(struct symbol*)*msp->nsubsym); - msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); - if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Cannot form a compound containing a non-terminal"); - psp->errorcnt++; - } - }else if( x[0]=='(' && psp->nrhs>0 ){ - psp->state = RHS_ALIAS_1; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Illegal character on RHS of rule: \"%s\".",x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case RHS_ALIAS_1: - if( isalpha(x[0]) ){ - psp->alias[psp->nrhs-1] = x; - psp->state = RHS_ALIAS_2; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", - x,psp->rhs[psp->nrhs-1]->name); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case RHS_ALIAS_2: - if( x[0]==')' ){ - psp->state = IN_RHS; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); - psp->errorcnt++; - psp->state = RESYNC_AFTER_RULE_ERROR; - } - break; - case WAITING_FOR_DECL_KEYWORD: - if( isalpha(x[0]) ){ - psp->declkeyword = x; - psp->declargslot = 0; - psp->decllinenoslot = 0; - psp->insertLineMacro = 1; - psp->state = WAITING_FOR_DECL_ARG; - if( strcmp(x,"name")==0 ){ - psp->declargslot = &(psp->gp->name); - psp->insertLineMacro = 0; - }else if( strcmp(x,"include")==0 ){ - psp->declargslot = &(psp->gp->include); - }else if( strcmp(x,"code")==0 ){ - psp->declargslot = &(psp->gp->extracode); - }else if( strcmp(x,"token_destructor")==0 ){ - psp->declargslot = &psp->gp->tokendest; - }else if( strcmp(x,"default_destructor")==0 ){ - psp->declargslot = &psp->gp->vardest; - }else if( strcmp(x,"token_prefix")==0 ){ - psp->declargslot = &psp->gp->tokenprefix; - psp->insertLineMacro = 0; - }else if( strcmp(x,"syntax_error")==0 ){ - psp->declargslot = &(psp->gp->error); - }else if( strcmp(x,"parse_accept")==0 ){ - psp->declargslot = &(psp->gp->accept); - }else if( strcmp(x,"parse_failure")==0 ){ - psp->declargslot = &(psp->gp->failure); - }else if( strcmp(x,"stack_overflow")==0 ){ - psp->declargslot = &(psp->gp->overflow); - }else if( strcmp(x,"extra_argument")==0 ){ - psp->declargslot = &(psp->gp->arg); - psp->insertLineMacro = 0; - }else if( strcmp(x,"token_type")==0 ){ - psp->declargslot = &(psp->gp->tokentype); - psp->insertLineMacro = 0; - }else if( strcmp(x,"default_type")==0 ){ - psp->declargslot = &(psp->gp->vartype); - psp->insertLineMacro = 0; - }else if( strcmp(x,"stack_size")==0 ){ - psp->declargslot = &(psp->gp->stacksize); - psp->insertLineMacro = 0; - }else if( strcmp(x,"start_symbol")==0 ){ - psp->declargslot = &(psp->gp->start); - psp->insertLineMacro = 0; - }else if( strcmp(x,"left")==0 ){ - psp->preccounter++; - psp->declassoc = LEFT; - psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; - }else if( strcmp(x,"right")==0 ){ - psp->preccounter++; - psp->declassoc = RIGHT; - psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; - }else if( strcmp(x,"nonassoc")==0 ){ - psp->preccounter++; - psp->declassoc = NONE; - psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; - }else if( strcmp(x,"destructor")==0 ){ - psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; - }else if( strcmp(x,"type")==0 ){ - psp->state = WAITING_FOR_DATATYPE_SYMBOL; - }else if( strcmp(x,"fallback")==0 ){ - psp->fallback = 0; - psp->state = WAITING_FOR_FALLBACK_ID; - }else if( strcmp(x,"wildcard")==0 ){ - psp->state = WAITING_FOR_WILDCARD_ID; - }else if( strcmp(x,"token_class")==0 ){ - psp->state = WAITING_FOR_CLASS_ID; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Unknown declaration keyword: \"%%%s\".",x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - } - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Illegal declaration keyword: \"%s\".",x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - } - break; - case WAITING_FOR_DESTRUCTOR_SYMBOL: - if( !isalpha(x[0]) ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Symbol name missing after %%destructor keyword"); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - }else{ - struct symbol *sp = Symbol_new(x); - psp->declargslot = &sp->destructor; - psp->decllinenoslot = &sp->destLineno; - psp->insertLineMacro = 1; - psp->state = WAITING_FOR_DECL_ARG; - } - break; - case WAITING_FOR_DATATYPE_SYMBOL: - if( !isalpha(x[0]) ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Symbol name missing after %%type keyword"); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - }else{ - struct symbol *sp = Symbol_find(x); - if((sp) && (sp->datatype)){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Symbol %%type \"%s\" already defined", x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - }else{ - if (!sp){ - sp = Symbol_new(x); - } - psp->declargslot = &sp->datatype; - psp->insertLineMacro = 0; - psp->state = WAITING_FOR_DECL_ARG; - } - } - break; - case WAITING_FOR_PRECEDENCE_SYMBOL: - if( x[0]=='.' ){ - psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( isupper(x[0]) ){ - struct symbol *sp; - sp = Symbol_new(x); - if( sp->prec>=0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "Symbol \"%s\" has already be given a precedence.",x); - psp->errorcnt++; - }else{ - sp->prec = psp->preccounter; - sp->assoc = psp->declassoc; - } - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Can't assign a precedence to \"%s\".",x); - psp->errorcnt++; - } - break; - case WAITING_FOR_DECL_ARG: - if( x[0]=='{' || x[0]=='\"' || isalnum(x[0]) ){ - const char *zOld, *zNew; - char *zBuf, *z; - int nOld, n, nLine, nNew, nBack; - int addLineMacro; - char zLine[50]; - zNew = x; - if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; - nNew = lemonStrlen(zNew); - if( *psp->declargslot ){ - zOld = *psp->declargslot; - }else{ - zOld = ""; - } - nOld = lemonStrlen(zOld); - n = nOld + nNew + 20; - addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && - (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); - if( addLineMacro ){ - for(z=psp->filename, nBack=0; *z; z++){ - if( *z=='\\' ) nBack++; - } - lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); - nLine = lemonStrlen(zLine); - n += nLine + lemonStrlen(psp->filename) + nBack; - } - *psp->declargslot = (char *) realloc(*psp->declargslot, n); - zBuf = *psp->declargslot + nOld; - if( addLineMacro ){ - if( nOld && zBuf[-1]!='\n' ){ - *(zBuf++) = '\n'; - } - memcpy(zBuf, zLine, nLine); - zBuf += nLine; - *(zBuf++) = '"'; - for(z=psp->filename; *z; z++){ - if( *z=='\\' ){ - *(zBuf++) = '\\'; - } - *(zBuf++) = *z; - } - *(zBuf++) = '"'; - *(zBuf++) = '\n'; - } - if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ - psp->decllinenoslot[0] = psp->tokenlineno; - } - memcpy(zBuf, zNew, nNew); - zBuf += nNew; - *zBuf = 0; - psp->state = WAITING_FOR_DECL_OR_RULE; - }else{ - ErrorMsg(psp->filename,psp->tokenlineno, - "Illegal argument to %%%s: %s",psp->declkeyword,x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - } - break; - case WAITING_FOR_FALLBACK_ID: - if( x[0]=='.' ){ - psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( !isupper(x[0]) ){ - ErrorMsg(psp->filename, psp->tokenlineno, - "%%fallback argument \"%s\" should be a token", x); - psp->errorcnt++; - }else{ - struct symbol *sp = Symbol_new(x); - if( psp->fallback==0 ){ - psp->fallback = sp; - }else if( sp->fallback ){ - ErrorMsg(psp->filename, psp->tokenlineno, - "More than one fallback assigned to token %s", x); - psp->errorcnt++; - }else{ - sp->fallback = psp->fallback; - psp->gp->has_fallback = 1; - } - } - break; - case WAITING_FOR_WILDCARD_ID: - if( x[0]=='.' ){ - psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( !isupper(x[0]) ){ - ErrorMsg(psp->filename, psp->tokenlineno, - "%%wildcard argument \"%s\" should be a token", x); - psp->errorcnt++; - }else{ - struct symbol *sp = Symbol_new(x); - if( psp->gp->wildcard==0 ){ - psp->gp->wildcard = sp; - }else{ - ErrorMsg(psp->filename, psp->tokenlineno, - "Extra wildcard to token: %s", x); - psp->errorcnt++; - } - } - break; - case WAITING_FOR_CLASS_ID: - if( !islower(x[0]) ){ - ErrorMsg(psp->filename, psp->tokenlineno, - "%%token_class must be followed by an identifier: ", x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - }else if( Symbol_find(x) ){ - ErrorMsg(psp->filename, psp->tokenlineno, - "Symbol \"%s\" already used", x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - }else{ - psp->tkclass = Symbol_new(x); - psp->tkclass->type = MULTITERMINAL; - psp->state = WAITING_FOR_CLASS_TOKEN; - } - break; - case WAITING_FOR_CLASS_TOKEN: - if( x[0]=='.' ){ - psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){ - struct symbol *msp = psp->tkclass; - msp->nsubsym++; - msp->subsym = (struct symbol **) realloc(msp->subsym, - sizeof(struct symbol*)*msp->nsubsym); - if( !isupper(x[0]) ) x++; - msp->subsym[msp->nsubsym-1] = Symbol_new(x); - }else{ - ErrorMsg(psp->filename, psp->tokenlineno, - "%%token_class argument \"%s\" should be a token", x); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - } - break; - case RESYNC_AFTER_RULE_ERROR: -/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; -** break; */ - case RESYNC_AFTER_DECL_ERROR: - if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; - if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; - break; - } -} - -/* Run the preprocessor over the input file text. The global variables -** azDefine[0] through azDefine[nDefine-1] contains the names of all defined -** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and -** comments them out. Text in between is also commented out as appropriate. -*/ -static void preprocess_input(char *z){ - int i, j, k, n; - int exclude = 0; - int start = 0; - int lineno = 1; - int start_lineno = 1; - for(i=0; z[i]; i++){ - if( z[i]=='\n' ) lineno++; - if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; - if( strncmp(&z[i],"%endif",6)==0 && isspace(z[i+6]) ){ - if( exclude ){ - exclude--; - if( exclude==0 ){ - for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; - } - } - for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; - }else if( (strncmp(&z[i],"%ifdef",6)==0 && isspace(z[i+6])) - || (strncmp(&z[i],"%ifndef",7)==0 && isspace(z[i+7])) ){ - if( exclude ){ - exclude++; - }else{ - for(j=i+7; isspace(z[j]); j++){} - for(n=0; z[j+n] && !isspace(z[j+n]); n++){} - exclude = 1; - for(k=0; k<nDefine; k++){ - if( strncmp(azDefine[k],&z[j],n)==0 && lemonStrlen(azDefine[k])==n ){ - exclude = 0; - break; - } - } - if( z[i+3]=='n' ) exclude = !exclude; - if( exclude ){ - start = i; - start_lineno = lineno; - } - } - for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; - } - } - if( exclude ){ - fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno); - exit(1); - } -} - -/* In spite of its name, this function is really a scanner. It read -** in the entire input file (all at once) then tokenizes it. Each -** token is passed to the function "parseonetoken" which builds all -** the appropriate data structures in the global state vector "gp". -*/ -void Parse(struct lemon *gp) -{ - struct pstate ps; - FILE *fp; - char *filebuf; - int filesize; - int lineno; - int c; - char *cp, *nextcp; - int startline = 0; - - memset(&ps, '\0', sizeof(ps)); - ps.gp = gp; - ps.filename = gp->filename; - ps.errorcnt = 0; - ps.state = INITIALIZE; - - /* Begin by reading the input file */ - fp = fopen(ps.filename,"rb"); - if( fp==0 ){ - ErrorMsg(ps.filename,0,"Can't open this file for reading."); - gp->errorcnt++; - return; - } - fseek(fp,0,2); - filesize = ftell(fp); - rewind(fp); - filebuf = (char *)malloc( filesize+1 ); - if( filesize>100000000 || filebuf==0 ){ - ErrorMsg(ps.filename,0,"Input file too large."); - gp->errorcnt++; - fclose(fp); - return; - } - if( fread(filebuf,1,filesize,fp)!=filesize ){ - ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", - filesize); - free(filebuf); - gp->errorcnt++; - fclose(fp); - return; - } - fclose(fp); - filebuf[filesize] = 0; - - /* Make an initial pass through the file to handle %ifdef and %ifndef */ - preprocess_input(filebuf); - - /* Now scan the text of the input file */ - lineno = 1; - for(cp=filebuf; (c= *cp)!=0; ){ - if( c=='\n' ) lineno++; /* Keep track of the line number */ - if( isspace(c) ){ cp++; continue; } /* Skip all white space */ - if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ - cp+=2; - while( (c= *cp)!=0 && c!='\n' ) cp++; - continue; - } - if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ - cp+=2; - while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ - if( c=='\n' ) lineno++; - cp++; - } - if( c ) cp++; - continue; - } - ps.tokenstart = cp; /* Mark the beginning of the token */ - ps.tokenlineno = lineno; /* Linenumber on which token begins */ - if( c=='\"' ){ /* String literals */ - cp++; - while( (c= *cp)!=0 && c!='\"' ){ - if( c=='\n' ) lineno++; - cp++; - } - if( c==0 ){ - ErrorMsg(ps.filename,startline, -"String starting on this line is not terminated before the end of the file."); - ps.errorcnt++; - nextcp = cp; - }else{ - nextcp = cp+1; - } - }else if( c=='{' ){ /* A block of C code */ - int level; - cp++; - for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ - if( c=='\n' ) lineno++; - else if( c=='{' ) level++; - else if( c=='}' ) level--; - else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ - int prevc; - cp = &cp[2]; - prevc = 0; - while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ - if( c=='\n' ) lineno++; - prevc = c; - cp++; - } - }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ - cp = &cp[2]; - while( (c= *cp)!=0 && c!='\n' ) cp++; - if( c ) lineno++; - }else if( c=='\'' || c=='\"' ){ /* String a character literals */ - int startchar, prevc; - startchar = c; - prevc = 0; - for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ - if( c=='\n' ) lineno++; - if( prevc=='\\' ) prevc = 0; - else prevc = c; - } - } - } - if( c==0 ){ - ErrorMsg(ps.filename,ps.tokenlineno, -"C code starting on this line is not terminated before the end of the file."); - ps.errorcnt++; - nextcp = cp; - }else{ - nextcp = cp+1; - } - }else if( isalnum(c) ){ /* Identifiers */ - while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++; - nextcp = cp; - }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ - cp += 3; - nextcp = cp; - }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){ - cp += 2; - while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++; - nextcp = cp; - }else{ /* All other (one character) operators */ - cp++; - nextcp = cp; - } - c = *cp; - *cp = 0; /* Null terminate the token */ - parseonetoken(&ps); /* Parse the token */ - *cp = c; /* Restore the buffer */ - cp = nextcp; - } - free(filebuf); /* Release the buffer after parsing */ - gp->rule = ps.firstrule; - gp->errorcnt = ps.errorcnt; -} -/*************************** From the file "plink.c" *********************/ -/* -** Routines processing configuration follow-set propagation links -** in the LEMON parser generator. -*/ -static struct plink *plink_freelist = 0; - -/* Allocate a new plink */ -struct plink *Plink_new(){ - struct plink *newlink; - - if( plink_freelist==0 ){ - int i; - int amt = 100; - plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); - if( plink_freelist==0 ){ - fprintf(stderr, - "Unable to allocate memory for a new follow-set propagation link.\n"); - exit(1); - } - for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; - plink_freelist[amt-1].next = 0; - } - newlink = plink_freelist; - plink_freelist = plink_freelist->next; - return newlink; -} - -/* Add a plink to a plink list */ -void Plink_add(struct plink **plpp, struct config *cfp) -{ - struct plink *newlink; - newlink = Plink_new(); - newlink->next = *plpp; - *plpp = newlink; - newlink->cfp = cfp; -} - -/* Transfer every plink on the list "from" to the list "to" */ -void Plink_copy(struct plink **to, struct plink *from) -{ - struct plink *nextpl; - while( from ){ - nextpl = from->next; - from->next = *to; - *to = from; - from = nextpl; - } -} - -/* Delete every plink on the list */ -void Plink_delete(struct plink *plp) -{ - struct plink *nextpl; - - while( plp ){ - nextpl = plp->next; - plp->next = plink_freelist; - plink_freelist = plp; - plp = nextpl; - } -} -/*********************** From the file "report.c" **************************/ -/* -** Procedures for generating reports and tables in the LEMON parser generator. -*/ - -/* Generate a filename with the given suffix. Space to hold the -** name comes from malloc() and must be freed by the calling -** function. -*/ -PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) -{ - char *name; - char *cp; - - name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 ); - if( name==0 ){ - fprintf(stderr,"Can't allocate space for a filename.\n"); - exit(1); - } - lemon_strcpy(name,lemp->filename); - cp = strrchr(name,'.'); - if( cp ) *cp = 0; - lemon_strcat(name,suffix); - return name; -} - -/* Open a file with a name based on the name of the input file, -** but with a different (specified) suffix, and return a pointer -** to the stream */ -PRIVATE FILE *file_open( - struct lemon *lemp, - const char *suffix, - const char *mode -){ - FILE *fp; - - if( lemp->outname ) free(lemp->outname); - lemp->outname = file_makename(lemp, suffix); - fp = fopen(lemp->outname,mode); - if( fp==0 && *mode=='w' ){ - fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); - lemp->errorcnt++; - return 0; - } - return fp; -} - -/* Duplicate the input file without comments and without actions -** on rules */ -void Reprint(struct lemon *lemp) -{ - struct rule *rp; - struct symbol *sp; - int i, j, maxlen, len, ncolumns, skip; - printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); - maxlen = 10; - for(i=0; i<lemp->nsymbol; i++){ - sp = lemp->symbols[i]; - len = lemonStrlen(sp->name); - if( len>maxlen ) maxlen = len; - } - ncolumns = 76/(maxlen+5); - if( ncolumns<1 ) ncolumns = 1; - skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; - for(i=0; i<skip; i++){ - printf("//"); - for(j=i; j<lemp->nsymbol; j+=skip){ - sp = lemp->symbols[j]; - assert( sp->index==j ); - printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); - } - printf("\n"); - } - for(rp=lemp->rule; rp; rp=rp->next){ - printf("%s",rp->lhs->name); - /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ - printf(" ::="); - for(i=0; i<rp->nrhs; i++){ - sp = rp->rhs[i]; - if( sp->type==MULTITERMINAL ){ - printf(" %s", sp->subsym[0]->name); - for(j=1; j<sp->nsubsym; j++){ - printf("|%s", sp->subsym[j]->name); - } - }else{ - printf(" %s", sp->name); - } - /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ - } - printf("."); - if( rp->precsym ) printf(" [%s]",rp->precsym->name); - /* if( rp->code ) printf("\n %s",rp->code); */ - printf("\n"); - } -} - -void ConfigPrint(FILE *fp, struct config *cfp) -{ - struct rule *rp; - struct symbol *sp; - int i, j; - rp = cfp->rp; - fprintf(fp,"%s ::=",rp->lhs->name); - for(i=0; i<=rp->nrhs; i++){ - if( i==cfp->dot ) fprintf(fp," *"); - if( i==rp->nrhs ) break; - sp = rp->rhs[i]; - if( sp->type==MULTITERMINAL ){ - fprintf(fp," %s", sp->subsym[0]->name); - for(j=1; j<sp->nsubsym; j++){ - fprintf(fp,"|%s",sp->subsym[j]->name); - } - }else{ - fprintf(fp," %s", sp->name); - } - } -} - -/* #define TEST */ -#if 0 -/* Print a set */ -PRIVATE void SetPrint(out,set,lemp) -FILE *out; -char *set; -struct lemon *lemp; -{ - int i; - char *spacer; - spacer = ""; - fprintf(out,"%12s[",""); - for(i=0; i<lemp->nterminal; i++){ - if( SetFind(set,i) ){ - fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); - spacer = " "; - } - } - fprintf(out,"]\n"); -} - -/* Print a plink chain */ -PRIVATE void PlinkPrint(out,plp,tag) -FILE *out; -struct plink *plp; -char *tag; -{ - while( plp ){ - fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); - ConfigPrint(out,plp->cfp); - fprintf(out,"\n"); - plp = plp->next; - } -} -#endif - -/* Print an action to the given file descriptor. Return FALSE if -** nothing was actually printed. -*/ -int PrintAction(struct action *ap, FILE *fp, int indent){ - int result = 1; - switch( ap->type ){ - case SHIFT: - fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum); - break; - case REDUCE: - fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); - break; - case ACCEPT: - fprintf(fp,"%*s accept",indent,ap->sp->name); - break; - case ERROR: - fprintf(fp,"%*s error",indent,ap->sp->name); - break; - case SRCONFLICT: - case RRCONFLICT: - fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", - indent,ap->sp->name,ap->x.rp->index); - break; - case SSCONFLICT: - fprintf(fp,"%*s shift %-3d ** Parsing conflict **", - indent,ap->sp->name,ap->x.stp->statenum); - break; - case SH_RESOLVED: - if( showPrecedenceConflict ){ - fprintf(fp,"%*s shift %-3d -- dropped by precedence", - indent,ap->sp->name,ap->x.stp->statenum); - }else{ - result = 0; - } - break; - case RD_RESOLVED: - if( showPrecedenceConflict ){ - fprintf(fp,"%*s reduce %-3d -- dropped by precedence", - indent,ap->sp->name,ap->x.rp->index); - }else{ - result = 0; - } - break; - case NOT_USED: - result = 0; - break; - } - return result; -} - -/* Generate the "y.output" log file */ -void ReportOutput(struct lemon *lemp) -{ - int i; - struct state *stp; - struct config *cfp; - struct action *ap; - FILE *fp; - - fp = file_open(lemp,".out","wb"); - if( fp==0 ) return; - for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - fprintf(fp,"State %d:\n",stp->statenum); - if( lemp->basisflag ) cfp=stp->bp; - else cfp=stp->cfp; - while( cfp ){ - char buf[20]; - if( cfp->dot==cfp->rp->nrhs ){ - lemon_sprintf(buf,"(%d)",cfp->rp->index); - fprintf(fp," %5s ",buf); - }else{ - fprintf(fp," "); - } - ConfigPrint(fp,cfp); - fprintf(fp,"\n"); -#if 0 - SetPrint(fp,cfp->fws,lemp); - PlinkPrint(fp,cfp->fplp,"To "); - PlinkPrint(fp,cfp->bplp,"From"); -#endif - if( lemp->basisflag ) cfp=cfp->bp; - else cfp=cfp->next; - } - fprintf(fp,"\n"); - for(ap=stp->ap; ap; ap=ap->next){ - if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); - } - fprintf(fp,"\n"); - } - fprintf(fp, "----------------------------------------------------\n"); - fprintf(fp, "Symbols:\n"); - for(i=0; i<lemp->nsymbol; i++){ - int j; - struct symbol *sp; - - sp = lemp->symbols[i]; - fprintf(fp, " %3d: %s", i, sp->name); - if( sp->type==NONTERMINAL ){ - fprintf(fp, ":"); - if( sp->lambda ){ - fprintf(fp, " <lambda>"); - } - for(j=0; j<lemp->nterminal; j++){ - if( sp->firstset && SetFind(sp->firstset, j) ){ - fprintf(fp, " %s", lemp->symbols[j]->name); - } - } - } - fprintf(fp, "\n"); - } - fclose(fp); - return; -} - -/* Search for the file "name" which is in the same directory as -** the exacutable */ -PRIVATE char *pathsearch(char *argv0, char *name, int modemask) -{ - const char *pathlist; - char *pathbufptr; - char *pathbuf; - char *path,*cp; - char c; - -#ifdef __WIN32__ - cp = strrchr(argv0,'\\'); -#else - cp = strrchr(argv0,'/'); -#endif - if( cp ){ - c = *cp; - *cp = 0; - path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); - if( path ) lemon_sprintf(path,"%s/%s",argv0,name); - *cp = c; - }else{ - pathlist = getenv("PATH"); - if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; - pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); - path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); - if( (pathbuf != 0) && (path!=0) ){ - pathbufptr = pathbuf; - lemon_strcpy(pathbuf, pathlist); - while( *pathbuf ){ - cp = strchr(pathbuf,':'); - if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; - c = *cp; - *cp = 0; - lemon_sprintf(path,"%s/%s",pathbuf,name); - *cp = c; - if( c==0 ) pathbuf[0] = 0; - else pathbuf = &cp[1]; - if( access(path,modemask)==0 ) break; - } - free(pathbufptr); - } - } - return path; -} - -/* Given an action, compute the integer value for that action -** which is to be put in the action table of the generated machine. -** Return negative if no action should be generated. -*/ -PRIVATE int compute_action(struct lemon *lemp, struct action *ap) -{ - int act; - switch( ap->type ){ - case SHIFT: act = ap->x.stp->statenum; break; - case REDUCE: act = ap->x.rp->index + lemp->nstate; break; - case ERROR: act = lemp->nstate + lemp->nrule; break; - case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; - default: act = -1; break; - } - return act; -} - -#define LINESIZE 1000 -/* The next cluster of routines are for reading the template file -** and writing the results to the generated parser */ -/* The first function transfers data from "in" to "out" until -** a line is seen which begins with "%%". The line number is -** tracked. -** -** if name!=0, then any word that begin with "Parse" is changed to -** begin with *name instead. -*/ -PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) -{ - int i, iStart; - char line[LINESIZE]; - while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ - (*lineno)++; - iStart = 0; - if( name ){ - for(i=0; line[i]; i++){ - if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 - && (i==0 || !isalpha(line[i-1])) - ){ - if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); - fprintf(out,"%s",name); - i += 4; - iStart = i+1; - } - } - } - fprintf(out,"%s",&line[iStart]); - } -} - -/* The next function finds the template file and opens it, returning -** a pointer to the opened file. */ -PRIVATE FILE *tplt_open(struct lemon *lemp) -{ - static char templatename[] = "lempar.c"; - char buf[1000]; - FILE *in; - char *tpltname; - char *cp; - - /* first, see if user specified a template filename on the command line. */ - if (user_templatename != 0) { - if( access(user_templatename,004)==-1 ){ - fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", - user_templatename); - lemp->errorcnt++; - return 0; - } - in = fopen(user_templatename,"rb"); - if( in==0 ){ - fprintf(stderr,"Can't open the template file \"%s\".\n",user_templatename); - lemp->errorcnt++; - return 0; - } - return in; - } - - cp = strrchr(lemp->filename,'.'); - if( cp ){ - lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); - }else{ - lemon_sprintf(buf,"%s.lt",lemp->filename); - } - if( access(buf,004)==0 ){ - tpltname = buf; - }else if( access(templatename,004)==0 ){ - tpltname = templatename; - }else{ - tpltname = pathsearch(lemp->argv0,templatename,0); - } - if( tpltname==0 ){ - fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", - templatename); - lemp->errorcnt++; - return 0; - } - in = fopen(tpltname,"rb"); - if( in==0 ){ - fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); - lemp->errorcnt++; - return 0; - } - return in; -} - -/* Print a #line directive line to the output file. */ -PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) -{ - fprintf(out,"#line %d \"",lineno); - while( *filename ){ - if( *filename == '\\' ) putc('\\',out); - putc(*filename,out); - filename++; - } - fprintf(out,"\"\n"); -} - -/* Print a string to the file and keep the linenumber up to date */ -PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) -{ - if( str==0 ) return; - while( *str ){ - putc(*str,out); - if( *str=='\n' ) (*lineno)++; - str++; - } - if( str[-1]!='\n' ){ - putc('\n',out); - (*lineno)++; - } - if (!lemp->nolinenosflag) { - (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); - } - return; -} - -/* -** The following routine emits code for the destructor for the -** symbol sp -*/ -void emit_destructor_code( - FILE *out, - struct symbol *sp, - struct lemon *lemp, - int *lineno -){ - char *cp = 0; - - if( sp->type==TERMINAL ){ - cp = lemp->tokendest; - if( cp==0 ) return; - fprintf(out,"{\n"); (*lineno)++; - }else if( sp->destructor ){ - cp = sp->destructor; - fprintf(out,"{\n"); (*lineno)++; - if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,sp->destLineno,lemp->filename); } - }else if( lemp->vardest ){ - cp = lemp->vardest; - if( cp==0 ) return; - fprintf(out,"{\n"); (*lineno)++; - }else{ - assert( 0 ); /* Cannot happen */ - } - for(; *cp; cp++){ - if( *cp=='$' && cp[1]=='$' ){ - fprintf(out,"(yypminor->yy%d)",sp->dtnum); - cp++; - continue; - } - if( *cp=='\n' ) (*lineno)++; - fputc(*cp,out); - } - fprintf(out,"\n"); (*lineno)++; - if (!lemp->nolinenosflag) { - (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); - } - fprintf(out,"}\n"); (*lineno)++; - return; -} - -/* -** Return TRUE (non-zero) if the given symbol has a destructor. -*/ -int has_destructor(struct symbol *sp, struct lemon *lemp) -{ - int ret; - if( sp->type==TERMINAL ){ - ret = lemp->tokendest!=0; - }else{ - ret = lemp->vardest!=0 || sp->destructor!=0; - } - return ret; -} - -/* -** Append text to a dynamically allocated string. If zText is 0 then -** reset the string to be empty again. Always return the complete text -** of the string (which is overwritten with each call). -** -** n bytes of zText are stored. If n==0 then all of zText up to the first -** \000 terminator is stored. zText can contain up to two instances of -** %d. The values of p1 and p2 are written into the first and second -** %d. -** -** If n==-1, then the previous character is overwritten. -*/ -PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ - static char empty[1] = { 0 }; - static char *z = 0; - static int alloced = 0; - static int used = 0; - int c; - char zInt[40]; - if( zText==0 ){ - used = 0; - return z; - } - if( n<=0 ){ - if( n<0 ){ - used += n; - assert( used>=0 ); - } - n = lemonStrlen(zText); - } - if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ - alloced = n + sizeof(zInt)*2 + used + 200; - z = (char *) realloc(z, alloced); - } - if( z==0 ) return empty; - while( n-- > 0 ){ - c = *(zText++); - if( c=='%' && n>0 && zText[0]=='d' ){ - lemon_sprintf(zInt, "%d", p1); - p1 = p2; - lemon_strcpy(&z[used], zInt); - used += lemonStrlen(&z[used]); - zText++; - n--; - }else{ - z[used++] = c; - } - } - z[used] = 0; - return z; -} - -/* -** zCode is a string that is the action associated with a rule. Expand -** the symbols in this string so that the refer to elements of the parser -** stack. -*/ -PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){ - char *cp, *xp; - int i; - char lhsused = 0; /* True if the LHS element has been used */ - char used[MAXRHS]; /* True for each RHS element which is used */ - - for(i=0; i<rp->nrhs; i++) used[i] = 0; - lhsused = 0; - - if( rp->code==0 ){ - static char newlinestr[2] = { '\n', '\0' }; - rp->code = newlinestr; - rp->line = rp->ruleline; - } - - append_str(0,0,0,0); - - /* This const cast is wrong but harmless, if we're careful. */ - for(cp=(char *)rp->code; *cp; cp++){ - if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){ - char saved; - for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++); - saved = *xp; - *xp = 0; - if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ - append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0); - cp = xp; - lhsused = 1; - }else{ - for(i=0; i<rp->nrhs; i++){ - if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ - if( cp!=rp->code && cp[-1]=='@' ){ - /* If the argument is of the form @X then substituted - ** the token number of X, not the value of X */ - append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); - }else{ - struct symbol *sp = rp->rhs[i]; - int dtnum; - if( sp->type==MULTITERMINAL ){ - dtnum = sp->subsym[0]->dtnum; - }else{ - dtnum = sp->dtnum; - } - append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); - } - cp = xp; - used[i] = 1; - break; - } - } - } - *xp = saved; - } - append_str(cp, 1, 0, 0); - } /* End loop */ - - /* Check to make sure the LHS has been used */ - if( rp->lhsalias && !lhsused ){ - ErrorMsg(lemp->filename,rp->ruleline, - "Label \"%s\" for \"%s(%s)\" is never used.", - rp->lhsalias,rp->lhs->name,rp->lhsalias); - lemp->errorcnt++; - } - - /* Generate destructor code for RHS symbols which are not used in the - ** reduce code */ - for(i=0; i<rp->nrhs; i++){ - if( rp->rhsalias[i] && !used[i] ){ - ErrorMsg(lemp->filename,rp->ruleline, - "Label %s for \"%s(%s)\" is never used.", - rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); - lemp->errorcnt++; - }else if( rp->rhsalias[i]==0 ){ - if( has_destructor(rp->rhs[i],lemp) ){ - append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, - rp->rhs[i]->index,i-rp->nrhs+1); - }else{ - /* No destructor defined for this term */ - } - } - } - if( rp->code ){ - cp = append_str(0,0,0,0); - rp->code = Strsafe(cp?cp:""); - } -} - -/* -** Generate code which executes when the rule "rp" is reduced. Write -** the code to "out". Make sure lineno stays up-to-date. -*/ -PRIVATE void emit_code( - FILE *out, - struct rule *rp, - struct lemon *lemp, - int *lineno -){ - const char *cp; - - /* Generate code to do the reduce action */ - if( rp->code ){ - if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,rp->line,lemp->filename); } - fprintf(out,"{%s",rp->code); - for(cp=rp->code; *cp; cp++){ - if( *cp=='\n' ) (*lineno)++; - } /* End loop */ - fprintf(out,"}\n"); (*lineno)++; - if (!lemp->nolinenosflag) { (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); } - } /* End if( rp->code ) */ - - return; -} - -/* -** Print the definition of the union used for the parser's data stack. -** This union contains fields for every possible data type for tokens -** and nonterminals. In the process of computing and printing this -** union, also set the ".dtnum" field of every terminal and nonterminal -** symbol. -*/ -void print_stack_union( - FILE *out, /* The output stream */ - struct lemon *lemp, /* The main info structure for this parser */ - int *plineno, /* Pointer to the line number */ - int mhflag /* True if generating makeheaders output */ -){ - int lineno = *plineno; /* The line number of the output */ - char **types; /* A hash table of datatypes */ - int arraysize; /* Size of the "types" array */ - int maxdtlength; /* Maximum length of any ".datatype" field. */ - char *stddt; /* Standardized name for a datatype */ - int i,j; /* Loop counters */ - unsigned hash; /* For hashing the name of a type */ - const char *name; /* Name of the parser */ - - /* Allocate and initialize types[] and allocate stddt[] */ - arraysize = lemp->nsymbol * 2; - types = (char**)calloc( arraysize, sizeof(char*) ); - if( types==0 ){ - fprintf(stderr,"Out of memory.\n"); - exit(1); - } - for(i=0; i<arraysize; i++) types[i] = 0; - maxdtlength = 0; - if( lemp->vartype ){ - maxdtlength = lemonStrlen(lemp->vartype); - } - for(i=0; i<lemp->nsymbol; i++){ - int len; - struct symbol *sp = lemp->symbols[i]; - if( sp->datatype==0 ) continue; - len = lemonStrlen(sp->datatype); - if( len>maxdtlength ) maxdtlength = len; - } - stddt = (char*)malloc( maxdtlength*2 + 1 ); - if( stddt==0 ){ - fprintf(stderr,"Out of memory.\n"); - exit(1); - } - - /* Build a hash table of datatypes. The ".dtnum" field of each symbol - ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is - ** used for terminal symbols. If there is no %default_type defined then - ** 0 is also used as the .dtnum value for nonterminals which do not specify - ** a datatype using the %type directive. - */ - for(i=0; i<lemp->nsymbol; i++){ - struct symbol *sp = lemp->symbols[i]; - char *cp; - if( sp==lemp->errsym ){ - sp->dtnum = arraysize+1; - continue; - } - if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ - sp->dtnum = 0; - continue; - } - cp = sp->datatype; - if( cp==0 ) cp = lemp->vartype; - j = 0; - while( isspace(*cp) ) cp++; - while( *cp ) stddt[j++] = *cp++; - while( j>0 && isspace(stddt[j-1]) ) j--; - stddt[j] = 0; - if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ - sp->dtnum = 0; - continue; - } - hash = 0; - for(j=0; stddt[j]; j++){ - hash = hash*53 + stddt[j]; - } - hash = (hash & 0x7fffffff)%arraysize; - while( types[hash] ){ - if( strcmp(types[hash],stddt)==0 ){ - sp->dtnum = hash + 1; - break; - } - hash++; - if( hash>=(unsigned)arraysize ) hash = 0; - } - if( types[hash]==0 ){ - sp->dtnum = hash + 1; - types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); - if( types[hash]==0 ){ - fprintf(stderr,"Out of memory.\n"); - exit(1); - } - lemon_strcpy(types[hash],stddt); - } - } - - /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ - name = lemp->name ? lemp->name : "Parse"; - lineno = *plineno; - if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } - fprintf(out,"#define %sTOKENTYPE %s\n",name, - lemp->tokentype?lemp->tokentype:"void*"); lineno++; - if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } - fprintf(out,"typedef union {\n"); lineno++; - fprintf(out," int yyinit;\n"); lineno++; - fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; - for(i=0; i<arraysize; i++){ - if( types[i]==0 ) continue; - fprintf(out," %s yy%d;\n",types[i],i+1); lineno++; - free(types[i]); - } - if( lemp->errsym->useCnt ){ - fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; - } - free(stddt); - free(types); - fprintf(out,"} YYMINORTYPE;\n"); lineno++; - *plineno = lineno; -} - -/* -** Return the name of a C datatype able to represent values between -** lwr and upr, inclusive. -*/ -static const char *minimum_size_type(int lwr, int upr){ - if( lwr>=0 ){ - if( upr<=255 ){ - return "unsigned char"; - }else if( upr<65535 ){ - return "unsigned short int"; - }else{ - return "unsigned int"; - } - }else if( lwr>=-127 && upr<=127 ){ - return "signed char"; - }else if( lwr>=-32767 && upr<32767 ){ - return "short"; - }else{ - return "int"; - } -} - -/* -** Each state contains a set of token transaction and a set of -** nonterminal transactions. Each of these sets makes an instance -** of the following structure. An array of these structures is used -** to order the creation of entries in the yy_action[] table. -*/ -struct axset { - struct state *stp; /* A pointer to a state */ - int isTkn; /* True to use tokens. False for non-terminals */ - int nAction; /* Number of actions */ - int iOrder; /* Original order of action sets */ -}; - -/* -** Compare to axset structures for sorting purposes -*/ -static int axset_compare(const void *a, const void *b){ - struct axset *p1 = (struct axset*)a; - struct axset *p2 = (struct axset*)b; - int c; - c = p2->nAction - p1->nAction; - if( c==0 ){ - c = p2->iOrder - p1->iOrder; - } - assert( c!=0 || p1==p2 ); - return c; -} - -/* -** Write text on "out" that describes the rule "rp". -*/ -static void writeRuleText(FILE *out, struct rule *rp){ - int j; - fprintf(out,"%s ::=", rp->lhs->name); - for(j=0; j<rp->nrhs; j++){ - struct symbol *sp = rp->rhs[j]; - if( sp->type!=MULTITERMINAL ){ - fprintf(out," %s", sp->name); - }else{ - int k; - fprintf(out," %s", sp->subsym[0]->name); - for(k=1; k<sp->nsubsym; k++){ - fprintf(out,"|%s",sp->subsym[k]->name); - } - } - } -} - - -/* Generate C source code for the parser */ -void ReportTable( - struct lemon *lemp, - int mhflag /* Output in makeheaders format if true */ -){ - FILE *out, *in; - char line[LINESIZE]; - int lineno; - struct state *stp; - struct action *ap; - struct rule *rp; - struct acttab *pActtab; - int i, j, n; - const char *name; - int mnTknOfst, mxTknOfst; - int mnNtOfst, mxNtOfst; - struct axset *ax; - - in = tplt_open(lemp); - if( in==0 ) return; - out = file_open(lemp,".c","wb"); - if( out==0 ){ - fclose(in); - return; - } - lineno = 1; - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate the include code, if any */ - tplt_print(out,lemp,lemp->include,&lineno); - if( mhflag ){ - char *name = file_makename(lemp, ".h"); - fprintf(out,"#include \"%s\"\n", name); lineno++; - free(name); - } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate #defines for all tokens */ - if( mhflag ){ - const char *prefix; - fprintf(out,"#if INTERFACE\n"); lineno++; - if( lemp->tokenprefix ) prefix = lemp->tokenprefix; - else prefix = ""; - for(i=1; i<lemp->nterminal; i++){ - fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); - lineno++; - } - fprintf(out,"#endif\n"); lineno++; - } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate the defines */ - fprintf(out,"#define YYCODETYPE %s\n", - minimum_size_type(0, lemp->nsymbol+1)); lineno++; - fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; - fprintf(out,"#define YYACTIONTYPE %s\n", - minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; - if( lemp->wildcard ){ - fprintf(out,"#define YYWILDCARD %d\n", - lemp->wildcard->index); lineno++; - } - print_stack_union(out,lemp,&lineno,mhflag); - fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; - if( lemp->stacksize ){ - fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; - }else{ - fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; - } - fprintf(out, "#endif\n"); lineno++; - if( mhflag ){ - fprintf(out,"#if INTERFACE\n"); lineno++; - } - name = lemp->name ? lemp->name : "Parse"; - if( lemp->arg && lemp->arg[0] ){ - int i; - i = lemonStrlen(lemp->arg); - while( i>=1 && isspace(lemp->arg[i-1]) ) i--; - while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; - fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; - fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; - fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", - name,lemp->arg,&lemp->arg[i]); lineno++; - fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", - name,&lemp->arg[i],&lemp->arg[i]); lineno++; - }else{ - fprintf(out,"#define %sARG_SDECL\n",name); lineno++; - fprintf(out,"#define %sARG_PDECL\n",name); lineno++; - fprintf(out,"#define %sARG_FETCH\n",name); lineno++; - fprintf(out,"#define %sARG_STORE\n",name); lineno++; - } - if( mhflag ){ - fprintf(out,"#endif\n"); lineno++; - } - fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; - fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; - if( lemp->errsym->useCnt ){ - fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; - fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; - } - if( lemp->has_fallback ){ - fprintf(out,"#define YYFALLBACK 1\n"); lineno++; - } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate the action table and its associates: - ** - ** yy_action[] A single table containing all actions. - ** yy_lookahead[] A table containing the lookahead for each entry in - ** yy_action. Used to detect hash collisions. - ** yy_shift_ofst[] For each state, the offset into yy_action for - ** shifting terminals. - ** yy_reduce_ofst[] For each state, the offset into yy_action for - ** shifting non-terminals after a reduce. - ** yy_default[] Default action for each state. - */ - - /* Compute the actions on all states and count them up */ - ax = (struct axset *) calloc(lemp->nstate*2, sizeof(ax[0])); - if( ax==0 ){ - fprintf(stderr,"malloc failed\n"); - exit(1); - } - for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - ax[i*2].stp = stp; - ax[i*2].isTkn = 1; - ax[i*2].nAction = stp->nTknAct; - ax[i*2+1].stp = stp; - ax[i*2+1].isTkn = 0; - ax[i*2+1].nAction = stp->nNtAct; - } - mxTknOfst = mnTknOfst = 0; - mxNtOfst = mnNtOfst = 0; - - /* Compute the action table. In order to try to keep the size of the - ** action table to a minimum, the heuristic of placing the largest action - ** sets first is used. - */ - for(i=0; i<lemp->nstate*2; i++) ax[i].iOrder = i; - qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); - pActtab = acttab_alloc(); - for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){ - stp = ax[i].stp; - if( ax[i].isTkn ){ - for(ap=stp->ap; ap; ap=ap->next){ - int action; - if( ap->sp->index>=lemp->nterminal ) continue; - action = compute_action(lemp, ap); - if( action<0 ) continue; - acttab_action(pActtab, ap->sp->index, action); - } - stp->iTknOfst = acttab_insert(pActtab); - if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; - if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; - }else{ - for(ap=stp->ap; ap; ap=ap->next){ - int action; - if( ap->sp->index<lemp->nterminal ) continue; - if( ap->sp->index==lemp->nsymbol ) continue; - action = compute_action(lemp, ap); - if( action<0 ) continue; - acttab_action(pActtab, ap->sp->index, action); - } - stp->iNtOfst = acttab_insert(pActtab); - if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; - if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; - } - } - free(ax); - - /* Output the yy_action table */ - n = acttab_size(pActtab); - fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; - fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; - for(i=j=0; i<n; i++){ - int action = acttab_yyaction(pActtab, i); - if( action<0 ) action = lemp->nstate + lemp->nrule + 2; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", action); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } - } - fprintf(out, "};\n"); lineno++; - - /* Output the yy_lookahead table */ - fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; - for(i=j=0; i<n; i++){ - int la = acttab_yylookahead(pActtab, i); - if( la<0 ) la = lemp->nsymbol; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", la); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } - } - fprintf(out, "};\n"); lineno++; - - /* Output the yy_shift_ofst[] table */ - fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; - n = lemp->nstate; - while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; - fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; - fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; - fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; - fprintf(out, "static const %s yy_shift_ofst[] = {\n", - minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; - for(i=j=0; i<n; i++){ - int ofst; - stp = lemp->sorted[i]; - ofst = stp->iTknOfst; - if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", ofst); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } - } - fprintf(out, "};\n"); lineno++; - - /* Output the yy_reduce_ofst[] table */ - fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; - n = lemp->nstate; - while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; - fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; - fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; - fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; - fprintf(out, "static const %s yy_reduce_ofst[] = {\n", - minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; - for(i=j=0; i<n; i++){ - int ofst; - stp = lemp->sorted[i]; - ofst = stp->iNtOfst; - if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", ofst); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } - } - fprintf(out, "};\n"); lineno++; - - /* Output the default action table */ - fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; - n = lemp->nstate; - for(i=j=0; i<n; i++){ - stp = lemp->sorted[i]; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", stp->iDflt); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } - } - fprintf(out, "};\n"); lineno++; - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate the table of fallback tokens. - */ - if( lemp->has_fallback ){ - int mx = lemp->nterminal - 1; - while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } - for(i=0; i<=mx; i++){ - struct symbol *p = lemp->symbols[i]; - if( p->fallback==0 ){ - fprintf(out, " 0, /* %10s => nothing */\n", p->name); - }else{ - fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, - p->name, p->fallback->name); - } - lineno++; - } - } - tplt_xfer(lemp->name, in, out, &lineno); - - /* Generate a table containing the symbolic name of every symbol - */ - for(i=0; i<lemp->nsymbol; i++){ - lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); - fprintf(out," %-15s",line); - if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } - } - if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate a table containing a text string that describes every - ** rule in the rule set of the grammar. This information is used - ** when tracing REDUCE actions. - */ - for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ - assert( rp->index==i ); - fprintf(out," /* %3d */ \"", i); - writeRuleText(out, rp); - fprintf(out,"\",\n"); lineno++; - } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate code which executes every time a symbol is popped from - ** the stack while processing errors or while destroying the parser. - ** (In other words, generate the %destructor actions) - */ - if( lemp->tokendest ){ - int once = 1; - for(i=0; i<lemp->nsymbol; i++){ - struct symbol *sp = lemp->symbols[i]; - if( sp==0 || sp->type!=TERMINAL ) continue; - if( once ){ - fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; - once = 0; - } - fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; - } - for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); - if( i<lemp->nsymbol ){ - emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); - fprintf(out," break;\n"); lineno++; - } - } - if( lemp->vardest ){ - struct symbol *dflt_sp = 0; - int once = 1; - for(i=0; i<lemp->nsymbol; i++){ - struct symbol *sp = lemp->symbols[i]; - if( sp==0 || sp->type==TERMINAL || - sp->index<=0 || sp->destructor!=0 ) continue; - if( once ){ - fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; - once = 0; - } - fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; - dflt_sp = sp; - } - if( dflt_sp!=0 ){ - emit_destructor_code(out,dflt_sp,lemp,&lineno); - } - fprintf(out," break;\n"); lineno++; - } - for(i=0; i<lemp->nsymbol; i++){ - struct symbol *sp = lemp->symbols[i]; - if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; - fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; - - /* Combine duplicate destructors into a single case */ - for(j=i+1; j<lemp->nsymbol; j++){ - struct symbol *sp2 = lemp->symbols[j]; - if( sp2 && sp2->type!=TERMINAL && sp2->destructor - && sp2->dtnum==sp->dtnum - && strcmp(sp->destructor,sp2->destructor)==0 ){ - fprintf(out," case %d: /* %s */\n", - sp2->index, sp2->name); lineno++; - sp2->destructor = 0; - } - } - - emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); - fprintf(out," break;\n"); lineno++; - } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate code which executes whenever the parser stack overflows */ - tplt_print(out,lemp,lemp->overflow,&lineno); - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate the table of rule information - ** - ** Note: This code depends on the fact that rules are number - ** sequentually beginning with 0. - */ - for(rp=lemp->rule; rp; rp=rp->next){ - fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; - } - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate code which execution during each REDUCE action */ - for(rp=lemp->rule; rp; rp=rp->next){ - translate_code(lemp, rp); - } - /* First output rules other than the default: rule */ - for(rp=lemp->rule; rp; rp=rp->next){ - struct rule *rp2; /* Other rules with the same action */ - if( rp->code==0 ) continue; - if( rp->code[0]=='\n' && rp->code[1]==0 ) continue; /* Will be default: */ - fprintf(out," case %d: /* ", rp->index); - writeRuleText(out, rp); - fprintf(out, " */\n"); lineno++; - for(rp2=rp->next; rp2; rp2=rp2->next){ - if( rp2->code==rp->code ){ - fprintf(out," case %d: /* ", rp2->index); - writeRuleText(out, rp2); - fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->index); lineno++; - rp2->code = 0; - } - } - emit_code(out,rp,lemp,&lineno); - fprintf(out," break;\n"); lineno++; - rp->code = 0; - } - /* Finally, output the default: rule. We choose as the default: all - ** empty actions. */ - fprintf(out," default:\n"); lineno++; - for(rp=lemp->rule; rp; rp=rp->next){ - if( rp->code==0 ) continue; - assert( rp->code[0]=='\n' && rp->code[1]==0 ); - fprintf(out," /* (%d) ", rp->index); - writeRuleText(out, rp); - fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->index); lineno++; - } - fprintf(out," break;\n"); lineno++; - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate code which executes if a parse fails */ - tplt_print(out,lemp,lemp->failure,&lineno); - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate code which executes when a syntax error occurs */ - tplt_print(out,lemp,lemp->error,&lineno); - tplt_xfer(lemp->name,in,out,&lineno); - - /* Generate code which executes when the parser accepts its input */ - tplt_print(out,lemp,lemp->accept,&lineno); - tplt_xfer(lemp->name,in,out,&lineno); - - /* Append any addition code the user desires */ - tplt_print(out,lemp,lemp->extracode,&lineno); - - fclose(in); - fclose(out); - return; -} - -/* Generate a header file for the parser */ -void ReportHeader(struct lemon *lemp) -{ - FILE *out, *in; - const char *prefix; - char line[LINESIZE]; - char pattern[LINESIZE]; - int i; - - if( lemp->tokenprefix ) prefix = lemp->tokenprefix; - else prefix = ""; - in = file_open(lemp,".h","rb"); - if( in ){ - int nextChar; - for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ - lemon_sprintf(pattern,"#define %s%-30s %3d\n", - prefix,lemp->symbols[i]->name,i); - if( strcmp(line,pattern) ) break; - } - nextChar = fgetc(in); - fclose(in); - if( i==lemp->nterminal && nextChar==EOF ){ - /* No change in the file. Don't rewrite it. */ - return; - } - } - out = file_open(lemp,".h","wb"); - if( out ){ - for(i=1; i<lemp->nterminal; i++){ - fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); - } - fclose(out); - } - return; -} - -/* Reduce the size of the action tables, if possible, by making use -** of defaults. -** -** In this version, we take the most frequent REDUCE action and make -** it the default. Except, there is no default if the wildcard token -** is a possible look-ahead. -*/ -void CompressTables(struct lemon *lemp) -{ - struct state *stp; - struct action *ap, *ap2; - struct rule *rp, *rp2, *rbest; - int nbest, n; - int i; - int usesWildcard; - - for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - nbest = 0; - rbest = 0; - usesWildcard = 0; - - for(ap=stp->ap; ap; ap=ap->next){ - if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ - usesWildcard = 1; - } - if( ap->type!=REDUCE ) continue; - rp = ap->x.rp; - if( rp->lhsStart ) continue; - if( rp==rbest ) continue; - n = 1; - for(ap2=ap->next; ap2; ap2=ap2->next){ - if( ap2->type!=REDUCE ) continue; - rp2 = ap2->x.rp; - if( rp2==rbest ) continue; - if( rp2==rp ) n++; - } - if( n>nbest ){ - nbest = n; - rbest = rp; - } - } - - /* Do not make a default if the number of rules to default - ** is not at least 1 or if the wildcard token is a possible - ** lookahead. - */ - if( nbest<1 || usesWildcard ) continue; - - - /* Combine matching REDUCE actions into a single default */ - for(ap=stp->ap; ap; ap=ap->next){ - if( ap->type==REDUCE && ap->x.rp==rbest ) break; - } - assert( ap ); - ap->sp = Symbol_new("{default}"); - for(ap=ap->next; ap; ap=ap->next){ - if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; - } - stp->ap = Action_sort(stp->ap); - } -} - - -/* -** Compare two states for sorting purposes. The smaller state is the -** one with the most non-terminal actions. If they have the same number -** of non-terminal actions, then the smaller is the one with the most -** token actions. -*/ -static int stateResortCompare(const void *a, const void *b){ - const struct state *pA = *(const struct state**)a; - const struct state *pB = *(const struct state**)b; - int n; - - n = pB->nNtAct - pA->nNtAct; - if( n==0 ){ - n = pB->nTknAct - pA->nTknAct; - if( n==0 ){ - n = pB->statenum - pA->statenum; - } - } - assert( n!=0 ); - return n; -} - - -/* -** Renumber and resort states so that states with fewer choices -** occur at the end. Except, keep state 0 as the first state. -*/ -void ResortStates(struct lemon *lemp) -{ - int i; - struct state *stp; - struct action *ap; - - for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - stp->nTknAct = stp->nNtAct = 0; - stp->iDflt = lemp->nstate + lemp->nrule; - stp->iTknOfst = NO_OFFSET; - stp->iNtOfst = NO_OFFSET; - for(ap=stp->ap; ap; ap=ap->next){ - if( compute_action(lemp,ap)>=0 ){ - if( ap->sp->index<lemp->nterminal ){ - stp->nTknAct++; - }else if( ap->sp->index<lemp->nsymbol ){ - stp->nNtAct++; - }else{ - stp->iDflt = compute_action(lemp, ap); - } - } - } - } - qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), - stateResortCompare); - for(i=0; i<lemp->nstate; i++){ - lemp->sorted[i]->statenum = i; - } -} - - -/***************** From the file "set.c" ************************************/ -/* -** Set manipulation routines for the LEMON parser generator. -*/ - -static int size = 0; - -/* Set the set size */ -void SetSize(int n) -{ - size = n+1; -} - -/* Allocate a new set */ -char *SetNew(){ - char *s; - s = (char*)calloc( size, 1); - if( s==0 ){ - extern void memory_error(); - memory_error(); - } - return s; -} - -/* Deallocate a set */ -void SetFree(char *s) -{ - free(s); -} - -/* Add a new element to the set. Return TRUE if the element was added -** and FALSE if it was already there. */ -int SetAdd(char *s, int e) -{ - int rv; - assert( e>=0 && e<size ); - rv = s[e]; - s[e] = 1; - return !rv; -} - -/* Add every element of s2 to s1. Return TRUE if s1 changes. */ -int SetUnion(char *s1, char *s2) -{ - int i, progress; - progress = 0; - for(i=0; i<size; i++){ - if( s2[i]==0 ) continue; - if( s1[i]==0 ){ - progress = 1; - s1[i] = 1; - } - } - return progress; -} -/********************** From the file "table.c" ****************************/ -/* -** All code in this file has been automatically generated -** from a specification in the file -** "table.q" -** by the associative array code building program "aagen". -** Do not edit this file! Instead, edit the specification -** file, then rerun aagen. -*/ -/* -** Code for processing tables in the LEMON parser generator. -*/ - -PRIVATE unsigned strhash(const char *x) -{ - unsigned h = 0; - while( *x ) h = h*13 + *(x++); - return h; -} - -/* Works like strdup, sort of. Save a string in malloced memory, but -** keep strings in a table so that the same string is not in more -** than one place. -*/ -const char *Strsafe(const char *y) -{ - const char *z; - char *cpy; - - if( y==0 ) return 0; - z = Strsafe_find(y); - if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){ - lemon_strcpy(cpy,y); - z = cpy; - Strsafe_insert(z); - } - MemoryCheck(z); - return z; -} - -/* There is one instance of the following structure for each -** associative array of type "x1". -*/ -struct s_x1 { - int size; /* The number of available slots. */ - /* Must be a power of 2 greater than or */ - /* equal to 1 */ - int count; /* Number of currently slots filled */ - struct s_x1node *tbl; /* The data stored here */ - struct s_x1node **ht; /* Hash table for lookups */ -}; - -/* There is one instance of this structure for every data element -** in an associative array of type "x1". -*/ -typedef struct s_x1node { - const char *data; /* The data */ - struct s_x1node *next; /* Next entry with the same hash */ - struct s_x1node **from; /* Previous link */ -} x1node; - -/* There is only one instance of the array, which is the following */ -static struct s_x1 *x1a; - -/* Allocate a new associative array */ -void Strsafe_init(){ - if( x1a ) return; - x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); - if( x1a ){ - x1a->size = 1024; - x1a->count = 0; - x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); - if( x1a->tbl==0 ){ - free(x1a); - x1a = 0; - }else{ - int i; - x1a->ht = (x1node**)&(x1a->tbl[1024]); - for(i=0; i<1024; i++) x1a->ht[i] = 0; - } - } -} -/* Insert a new record into the array. Return TRUE if successful. -** Prior data with the same key is NOT overwritten */ -int Strsafe_insert(const char *data) -{ - x1node *np; - unsigned h; - unsigned ph; - - if( x1a==0 ) return 0; - ph = strhash(data); - h = ph & (x1a->size-1); - np = x1a->ht[h]; - while( np ){ - if( strcmp(np->data,data)==0 ){ - /* An existing entry with the same key is found. */ - /* Fail because overwrite is not allows. */ - return 0; - } - np = np->next; - } - if( x1a->count>=x1a->size ){ - /* Need to make the hash table bigger */ - int i,size; - struct s_x1 array; - array.size = size = x1a->size*2; - array.count = x1a->count; - array.tbl = (x1node*)calloc(size, sizeof(x1node) + sizeof(x1node*)); - if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x1node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; - for(i=0; i<x1a->count; i++){ - x1node *oldnp, *newnp; - oldnp = &(x1a->tbl[i]); - h = strhash(oldnp->data) & (size-1); - newnp = &(array.tbl[i]); - if( array.ht[h] ) array.ht[h]->from = &(newnp->next); - newnp->next = array.ht[h]; - newnp->data = oldnp->data; - newnp->from = &(array.ht[h]); - array.ht[h] = newnp; - } - free(x1a->tbl); - *x1a = array; - } - /* Insert the new data */ - h = ph & (x1a->size-1); - np = &(x1a->tbl[x1a->count++]); - np->data = data; - if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); - np->next = x1a->ht[h]; - x1a->ht[h] = np; - np->from = &(x1a->ht[h]); - return 1; -} - -/* Return a pointer to data assigned to the given key. Return NULL -** if no such key. */ -const char *Strsafe_find(const char *key) -{ - unsigned h; - x1node *np; - - if( x1a==0 ) return 0; - h = strhash(key) & (x1a->size-1); - np = x1a->ht[h]; - while( np ){ - if( strcmp(np->data,key)==0 ) break; - np = np->next; - } - return np ? np->data : 0; -} - -/* Return a pointer to the (terminal or nonterminal) symbol "x". -** Create a new symbol if this is the first time "x" has been seen. -*/ -struct symbol *Symbol_new(const char *x) -{ - struct symbol *sp; - - sp = Symbol_find(x); - if( sp==0 ){ - sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); - MemoryCheck(sp); - sp->name = Strsafe(x); - sp->type = isupper(*x) ? TERMINAL : NONTERMINAL; - sp->rule = 0; - sp->fallback = 0; - sp->prec = -1; - sp->assoc = UNK; - sp->firstset = 0; - sp->lambda = LEMON_FALSE; - sp->destructor = 0; - sp->destLineno = 0; - sp->datatype = 0; - sp->useCnt = 0; - Symbol_insert(sp,sp->name); - } - sp->useCnt++; - return sp; -} - -/* Compare two symbols for sorting purposes. Return negative, -** zero, or positive if a is less then, equal to, or greater -** than b. -** -** Symbols that begin with upper case letters (terminals or tokens) -** must sort before symbols that begin with lower case letters -** (non-terminals). And MULTITERMINAL symbols (created using the -** %token_class directive) must sort at the very end. Other than -** that, the order does not matter. -** -** We find experimentally that leaving the symbols in their original -** order (the order they appeared in the grammar file) gives the -** smallest parser tables in SQLite. -*/ -int Symbolcmpp(const void *_a, const void *_b) -{ - const struct symbol *a = *(const struct symbol **) _a; - const struct symbol *b = *(const struct symbol **) _b; - int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; - int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; - return i1==i2 ? a->index - b->index : i1 - i2; -} - -/* There is one instance of the following structure for each -** associative array of type "x2". -*/ -struct s_x2 { - int size; /* The number of available slots. */ - /* Must be a power of 2 greater than or */ - /* equal to 1 */ - int count; /* Number of currently slots filled */ - struct s_x2node *tbl; /* The data stored here */ - struct s_x2node **ht; /* Hash table for lookups */ -}; - -/* There is one instance of this structure for every data element -** in an associative array of type "x2". -*/ -typedef struct s_x2node { - struct symbol *data; /* The data */ - const char *key; /* The key */ - struct s_x2node *next; /* Next entry with the same hash */ - struct s_x2node **from; /* Previous link */ -} x2node; - -/* There is only one instance of the array, which is the following */ -static struct s_x2 *x2a; - -/* Allocate a new associative array */ -void Symbol_init(){ - if( x2a ) return; - x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); - if( x2a ){ - x2a->size = 128; - x2a->count = 0; - x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); - if( x2a->tbl==0 ){ - free(x2a); - x2a = 0; - }else{ - int i; - x2a->ht = (x2node**)&(x2a->tbl[128]); - for(i=0; i<128; i++) x2a->ht[i] = 0; - } - } -} -/* Insert a new record into the array. Return TRUE if successful. -** Prior data with the same key is NOT overwritten */ -int Symbol_insert(struct symbol *data, const char *key) -{ - x2node *np; - unsigned h; - unsigned ph; - - if( x2a==0 ) return 0; - ph = strhash(key); - h = ph & (x2a->size-1); - np = x2a->ht[h]; - while( np ){ - if( strcmp(np->key,key)==0 ){ - /* An existing entry with the same key is found. */ - /* Fail because overwrite is not allows. */ - return 0; - } - np = np->next; - } - if( x2a->count>=x2a->size ){ - /* Need to make the hash table bigger */ - int i,size; - struct s_x2 array; - array.size = size = x2a->size*2; - array.count = x2a->count; - array.tbl = (x2node*)calloc(size, sizeof(x2node) + sizeof(x2node*)); - if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x2node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; - for(i=0; i<x2a->count; i++){ - x2node *oldnp, *newnp; - oldnp = &(x2a->tbl[i]); - h = strhash(oldnp->key) & (size-1); - newnp = &(array.tbl[i]); - if( array.ht[h] ) array.ht[h]->from = &(newnp->next); - newnp->next = array.ht[h]; - newnp->key = oldnp->key; - newnp->data = oldnp->data; - newnp->from = &(array.ht[h]); - array.ht[h] = newnp; - } - free(x2a->tbl); - *x2a = array; - } - /* Insert the new data */ - h = ph & (x2a->size-1); - np = &(x2a->tbl[x2a->count++]); - np->key = key; - np->data = data; - if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); - np->next = x2a->ht[h]; - x2a->ht[h] = np; - np->from = &(x2a->ht[h]); - return 1; -} - -/* Return a pointer to data assigned to the given key. Return NULL -** if no such key. */ -struct symbol *Symbol_find(const char *key) -{ - unsigned h; - x2node *np; - - if( x2a==0 ) return 0; - h = strhash(key) & (x2a->size-1); - np = x2a->ht[h]; - while( np ){ - if( strcmp(np->key,key)==0 ) break; - np = np->next; - } - return np ? np->data : 0; -} - -/* Return the n-th data. Return NULL if n is out of range. */ -struct symbol *Symbol_Nth(int n) -{ - struct symbol *data; - if( x2a && n>0 && n<=x2a->count ){ - data = x2a->tbl[n-1].data; - }else{ - data = 0; - } - return data; -} - -/* Return the size of the array */ -int Symbol_count() -{ - return x2a ? x2a->count : 0; -} - -/* Return an array of pointers to all data in the table. -** The array is obtained from malloc. Return NULL if memory allocation -** problems, or if the array is empty. */ -struct symbol **Symbol_arrayof() -{ - struct symbol **array; - int i,size; - if( x2a==0 ) return 0; - size = x2a->count; - array = (struct symbol **)calloc(size, sizeof(struct symbol *)); - if( array ){ - for(i=0; i<size; i++) array[i] = x2a->tbl[i].data; - } - return array; -} - -/* Compare two configurations */ -int Configcmp(const char *_a,const char *_b) -{ - const struct config *a = (struct config *) _a; - const struct config *b = (struct config *) _b; - int x; - x = a->rp->index - b->rp->index; - if( x==0 ) x = a->dot - b->dot; - return x; -} - -/* Compare two states */ -PRIVATE int statecmp(struct config *a, struct config *b) -{ - int rc; - for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ - rc = a->rp->index - b->rp->index; - if( rc==0 ) rc = a->dot - b->dot; - } - if( rc==0 ){ - if( a ) rc = 1; - if( b ) rc = -1; - } - return rc; -} - -/* Hash a state */ -PRIVATE unsigned statehash(struct config *a) -{ - unsigned h=0; - while( a ){ - h = h*571 + a->rp->index*37 + a->dot; - a = a->bp; - } - return h; -} - -/* Allocate a new state structure */ -struct state *State_new() -{ - struct state *newstate; - newstate = (struct state *)calloc(1, sizeof(struct state) ); - MemoryCheck(newstate); - return newstate; -} - -/* There is one instance of the following structure for each -** associative array of type "x3". -*/ -struct s_x3 { - int size; /* The number of available slots. */ - /* Must be a power of 2 greater than or */ - /* equal to 1 */ - int count; /* Number of currently slots filled */ - struct s_x3node *tbl; /* The data stored here */ - struct s_x3node **ht; /* Hash table for lookups */ -}; - -/* There is one instance of this structure for every data element -** in an associative array of type "x3". -*/ -typedef struct s_x3node { - struct state *data; /* The data */ - struct config *key; /* The key */ - struct s_x3node *next; /* Next entry with the same hash */ - struct s_x3node **from; /* Previous link */ -} x3node; - -/* There is only one instance of the array, which is the following */ -static struct s_x3 *x3a; - -/* Allocate a new associative array */ -void State_init(){ - if( x3a ) return; - x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); - if( x3a ){ - x3a->size = 128; - x3a->count = 0; - x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); - if( x3a->tbl==0 ){ - free(x3a); - x3a = 0; - }else{ - int i; - x3a->ht = (x3node**)&(x3a->tbl[128]); - for(i=0; i<128; i++) x3a->ht[i] = 0; - } - } -} -/* Insert a new record into the array. Return TRUE if successful. -** Prior data with the same key is NOT overwritten */ -int State_insert(struct state *data, struct config *key) -{ - x3node *np; - unsigned h; - unsigned ph; - - if( x3a==0 ) return 0; - ph = statehash(key); - h = ph & (x3a->size-1); - np = x3a->ht[h]; - while( np ){ - if( statecmp(np->key,key)==0 ){ - /* An existing entry with the same key is found. */ - /* Fail because overwrite is not allows. */ - return 0; - } - np = np->next; - } - if( x3a->count>=x3a->size ){ - /* Need to make the hash table bigger */ - int i,size; - struct s_x3 array; - array.size = size = x3a->size*2; - array.count = x3a->count; - array.tbl = (x3node*)calloc(size, sizeof(x3node) + sizeof(x3node*)); - if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x3node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; - for(i=0; i<x3a->count; i++){ - x3node *oldnp, *newnp; - oldnp = &(x3a->tbl[i]); - h = statehash(oldnp->key) & (size-1); - newnp = &(array.tbl[i]); - if( array.ht[h] ) array.ht[h]->from = &(newnp->next); - newnp->next = array.ht[h]; - newnp->key = oldnp->key; - newnp->data = oldnp->data; - newnp->from = &(array.ht[h]); - array.ht[h] = newnp; - } - free(x3a->tbl); - *x3a = array; - } - /* Insert the new data */ - h = ph & (x3a->size-1); - np = &(x3a->tbl[x3a->count++]); - np->key = key; - np->data = data; - if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); - np->next = x3a->ht[h]; - x3a->ht[h] = np; - np->from = &(x3a->ht[h]); - return 1; -} - -/* Return a pointer to data assigned to the given key. Return NULL -** if no such key. */ -struct state *State_find(struct config *key) -{ - unsigned h; - x3node *np; - - if( x3a==0 ) return 0; - h = statehash(key) & (x3a->size-1); - np = x3a->ht[h]; - while( np ){ - if( statecmp(np->key,key)==0 ) break; - np = np->next; - } - return np ? np->data : 0; -} - -/* Return an array of pointers to all data in the table. -** The array is obtained from malloc. Return NULL if memory allocation -** problems, or if the array is empty. */ -struct state **State_arrayof() -{ - struct state **array; - int i,size; - if( x3a==0 ) return 0; - size = x3a->count; - array = (struct state **)calloc(size, sizeof(struct state *)); - if( array ){ - for(i=0; i<size; i++) array[i] = x3a->tbl[i].data; - } - return array; -} - -/* Hash a configuration */ -PRIVATE unsigned confighash(struct config *a) -{ - unsigned h=0; - h = h*571 + a->rp->index*37 + a->dot; - return h; -} - -/* There is one instance of the following structure for each -** associative array of type "x4". -*/ -struct s_x4 { - int size; /* The number of available slots. */ - /* Must be a power of 2 greater than or */ - /* equal to 1 */ - int count; /* Number of currently slots filled */ - struct s_x4node *tbl; /* The data stored here */ - struct s_x4node **ht; /* Hash table for lookups */ -}; - -/* There is one instance of this structure for every data element -** in an associative array of type "x4". -*/ -typedef struct s_x4node { - struct config *data; /* The data */ - struct s_x4node *next; /* Next entry with the same hash */ - struct s_x4node **from; /* Previous link */ -} x4node; - -/* There is only one instance of the array, which is the following */ -static struct s_x4 *x4a; - -/* Allocate a new associative array */ -void Configtable_init(){ - if( x4a ) return; - x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); - if( x4a ){ - x4a->size = 64; - x4a->count = 0; - x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); - if( x4a->tbl==0 ){ - free(x4a); - x4a = 0; - }else{ - int i; - x4a->ht = (x4node**)&(x4a->tbl[64]); - for(i=0; i<64; i++) x4a->ht[i] = 0; - } - } -} -/* Insert a new record into the array. Return TRUE if successful. -** Prior data with the same key is NOT overwritten */ -int Configtable_insert(struct config *data) -{ - x4node *np; - unsigned h; - unsigned ph; - - if( x4a==0 ) return 0; - ph = confighash(data); - h = ph & (x4a->size-1); - np = x4a->ht[h]; - while( np ){ - if( Configcmp((const char *) np->data,(const char *) data)==0 ){ - /* An existing entry with the same key is found. */ - /* Fail because overwrite is not allows. */ - return 0; - } - np = np->next; - } - if( x4a->count>=x4a->size ){ - /* Need to make the hash table bigger */ - int i,size; - struct s_x4 array; - array.size = size = x4a->size*2; - array.count = x4a->count; - array.tbl = (x4node*)calloc(size, sizeof(x4node) + sizeof(x4node*)); - if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x4node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; - for(i=0; i<x4a->count; i++){ - x4node *oldnp, *newnp; - oldnp = &(x4a->tbl[i]); - h = confighash(oldnp->data) & (size-1); - newnp = &(array.tbl[i]); - if( array.ht[h] ) array.ht[h]->from = &(newnp->next); - newnp->next = array.ht[h]; - newnp->data = oldnp->data; - newnp->from = &(array.ht[h]); - array.ht[h] = newnp; - } - free(x4a->tbl); - *x4a = array; - } - /* Insert the new data */ - h = ph & (x4a->size-1); - np = &(x4a->tbl[x4a->count++]); - np->data = data; - if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); - np->next = x4a->ht[h]; - x4a->ht[h] = np; - np->from = &(x4a->ht[h]); - return 1; -} - -/* Return a pointer to data assigned to the given key. Return NULL -** if no such key. */ -struct config *Configtable_find(struct config *key) -{ - int h; - x4node *np; - - if( x4a==0 ) return 0; - h = confighash(key) & (x4a->size-1); - np = x4a->ht[h]; - while( np ){ - if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; - np = np->next; - } - return np ? np->data : 0; -} - -/* Remove all data from the table. Pass each data to the function "f" -** as it is removed. ("f" may be null to avoid this step.) */ -void Configtable_clear(int(*f)(struct config *)) -{ - int i; - if( x4a==0 || x4a->count==0 ) return; - if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data); - for(i=0; i<x4a->size; i++) x4a->ht[i] = 0; - x4a->count = 0; - return; -} diff --git a/contrib/lempar.c b/contrib/lempar.c deleted file mode 100644 index a4e3c07..0000000 --- a/contrib/lempar.c +++ /dev/null @@ -1,851 +0,0 @@ -/* Driver template for the LEMON parser generator. -** The author disclaims copyright to this source code. -*/ -/* First off, code is included that follows the "include" declaration -** in the input grammar file. */ -#include <stdio.h> -%% -/* Next is all token values, in a form suitable for use by makeheaders. -** This section will be null unless lemon is run with the -m switch. -*/ -/* -** These constants (all generated automatically by the parser generator) -** specify the various kinds of tokens (terminals) that the parser -** understands. -** -** Each symbol here is a terminal symbol in the grammar. -*/ -%% -/* Make sure the INTERFACE macro is defined. -*/ -#ifndef INTERFACE -# define INTERFACE 1 -#endif -/* The next thing included is series of defines which control -** various aspects of the generated parser. -** YYCODETYPE is the data type used for storing terminal -** and nonterminal numbers. "unsigned char" is -** used if there are fewer than 250 terminals -** and nonterminals. "int" is used otherwise. -** YYNOCODE is a number of type YYCODETYPE which corresponds -** to no legal terminal or nonterminal number. This -** number is used to fill in empty slots of the hash -** table. -** YYFALLBACK If defined, this indicates that one or more tokens -** have fall-back values which should be used if the -** original value of the token will not parse. -** YYACTIONTYPE is the data type used for storing terminal -** and nonterminal numbers. "unsigned char" is -** used if there are fewer than 250 rules and -** states combined. "int" is used otherwise. -** ParseTOKENTYPE is the data type used for minor tokens given -** directly to the parser from the tokenizer. -** YYMINORTYPE is the data type used for all minor tokens. -** This is typically a union of many types, one of -** which is ParseTOKENTYPE. The entry in the union -** for base tokens is called "yy0". -** YYSTACKDEPTH is the maximum depth of the parser's stack. If -** zero the stack is dynamically sized using realloc() -** ParseARG_SDECL A static variable declaration for the %extra_argument -** ParseARG_PDECL A parameter declaration for the %extra_argument -** ParseARG_STORE Code to store %extra_argument into yypParser -** ParseARG_FETCH Code to extract %extra_argument from yypParser -** YYNSTATE the combined number of states. -** YYNRULE the number of rules in the grammar -** YYERRORSYMBOL is the code number of the error symbol. If not -** defined, then do no error processing. -*/ -%% -#define YY_NO_ACTION (YYNSTATE+YYNRULE+2) -#define YY_ACCEPT_ACTION (YYNSTATE+YYNRULE+1) -#define YY_ERROR_ACTION (YYNSTATE+YYNRULE) - -/* The yyzerominor constant is used to initialize instances of -** YYMINORTYPE objects to zero. */ -static const YYMINORTYPE yyzerominor = { 0 }; - -/* Define the yytestcase() macro to be a no-op if is not already defined -** otherwise. -** -** Applications can choose to define yytestcase() in the %include section -** to a macro that can assist in verifying code coverage. For production -** code the yytestcase() macro should be turned off. But it is useful -** for testing. -*/ -#ifndef yytestcase -# define yytestcase(X) -#endif - - -/* Next are the tables used to determine what action to take based on the -** current state and lookahead token. These tables are used to implement -** functions that take a state number and lookahead value and return an -** action integer. -** -** Suppose the action integer is N. Then the action is determined as -** follows -** -** 0 <= N < YYNSTATE Shift N. That is, push the lookahead -** token onto the stack and goto state N. -** -** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE. -** -** N == YYNSTATE+YYNRULE A syntax error has occurred. -** -** N == YYNSTATE+YYNRULE+1 The parser accepts its input. -** -** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused -** slots in the yy_action[] table. -** -** The action table is constructed as a single large table named yy_action[]. -** Given state S and lookahead X, the action is computed as -** -** yy_action[ yy_shift_ofst[S] + X ] -** -** If the index value yy_shift_ofst[S]+X is out of range or if the value -** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X or if yy_shift_ofst[S] -** is equal to YY_SHIFT_USE_DFLT, it means that the action is not in the table -** and that yy_default[S] should be used instead. -** -** The formula above is for computing the action when the lookahead is -** a terminal symbol. If the lookahead is a non-terminal (as occurs after -** a reduce action) then the yy_reduce_ofst[] array is used in place of -** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of -** YY_SHIFT_USE_DFLT. -** -** The following are the tables generated in this section: -** -** yy_action[] A single table containing all actions. -** yy_lookahead[] A table containing the lookahead for each entry in -** yy_action. Used to detect hash collisions. -** yy_shift_ofst[] For each state, the offset into yy_action for -** shifting terminals. -** yy_reduce_ofst[] For each state, the offset into yy_action for -** shifting non-terminals after a reduce. -** yy_default[] Default action for each state. -*/ -%% - -/* The next table maps tokens into fallback tokens. If a construct -** like the following: -** -** %fallback ID X Y Z. -** -** appears in the grammar, then ID becomes a fallback token for X, Y, -** and Z. Whenever one of the tokens X, Y, or Z is input to the parser -** but it does not parse, the type of the token is changed to ID and -** the parse is retried before an error is thrown. -*/ -#ifdef YYFALLBACK -static const YYCODETYPE yyFallback[] = { -%% -}; -#endif /* YYFALLBACK */ - -/* The following structure represents a single element of the -** parser's stack. Information stored includes: -** -** + The state number for the parser at this level of the stack. -** -** + The value of the token stored at this level of the stack. -** (In other words, the "major" token.) -** -** + The semantic value stored at this level of the stack. This is -** the information used by the action routines in the grammar. -** It is sometimes called the "minor" token. -*/ -struct yyStackEntry { - YYACTIONTYPE stateno; /* The state-number */ - YYCODETYPE major; /* The major token value. This is the code - ** number for the token at this stack level */ - YYMINORTYPE minor; /* The user-supplied minor token value. This - ** is the value of the token */ -}; -typedef struct yyStackEntry yyStackEntry; - -/* The state of the parser is completely contained in an instance of -** the following structure */ -struct yyParser { - int yyidx; /* Index of top element in stack */ -#ifdef YYTRACKMAXSTACKDEPTH - int yyidxMax; /* Maximum value of yyidx */ -#endif - int yyerrcnt; /* Shifts left before out of the error */ - ParseARG_SDECL /* A place to hold %extra_argument */ -#if YYSTACKDEPTH<=0 - int yystksz; /* Current side of the stack */ - yyStackEntry *yystack; /* The parser's stack */ -#else - yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ -#endif -}; -typedef struct yyParser yyParser; - -#ifndef NDEBUG -#include <stdio.h> -static FILE *yyTraceFILE = 0; -static char *yyTracePrompt = 0; -#endif /* NDEBUG */ - -#ifndef NDEBUG -/* -** Turn parser tracing on by giving a stream to which to write the trace -** and a prompt to preface each trace message. Tracing is turned off -** by making either argument NULL -** -** Inputs: -** <ul> -** <li> A FILE* to which trace output should be written. -** If NULL, then tracing is turned off. -** <li> A prefix string written at the beginning of every -** line of trace output. If NULL, then tracing is -** turned off. -** </ul> -** -** Outputs: -** None. -*/ -void ParseTrace(FILE *TraceFILE, char *zTracePrompt); -void ParseTrace(FILE *TraceFILE, char *zTracePrompt){ - yyTraceFILE = TraceFILE; - yyTracePrompt = zTracePrompt; - if( yyTraceFILE==0 ) yyTracePrompt = 0; - else if( yyTracePrompt==0 ) yyTraceFILE = 0; -} -#endif /* NDEBUG */ - -#ifndef NDEBUG -/* For tracing shifts, the names of all terminals and nonterminals -** are required. The following table supplies these names */ -static const char *const yyTokenName[] = { -%% -}; -#endif /* NDEBUG */ - -#ifndef NDEBUG -/* For tracing reduce actions, the names of all rules are required. -*/ -static const char *const yyRuleName[] = { -%% -}; -#endif /* NDEBUG */ - - -#if YYSTACKDEPTH<=0 -/* -** Try to increase the size of the parser stack. -*/ -static void yyGrowStack(yyParser *p){ - int newSize; - yyStackEntry *pNew; - - newSize = p->yystksz*2 + 100; - pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); - if( pNew ){ - p->yystack = pNew; - p->yystksz = newSize; -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sStack grows to %d entries!\n", - yyTracePrompt, p->yystksz); - } -#endif - } -} -#endif - -/* -** This function allocates a new parser. -** The only argument is a pointer to a function which works like -** malloc. -** -** Inputs: -** A pointer to the function used to allocate memory. -** -** Outputs: -** A pointer to a parser. This pointer is used in subsequent calls -** to Parse and ParseFree. -*/ -void *ParseAlloc(void *(*mallocProc)(size_t)){ - yyParser *pParser; - pParser = (yyParser*)(*mallocProc)( (size_t)sizeof(yyParser) ); - if( pParser ){ - pParser->yyidx = -1; -#ifdef YYTRACKMAXSTACKDEPTH - pParser->yyidxMax = 0; -#endif -#if YYSTACKDEPTH<=0 - pParser->yystack = NULL; - pParser->yystksz = 0; - yyGrowStack(pParser); -#endif - } - return pParser; -} - -/* The following function deletes the value associated with a -** symbol. The symbol can be either a terminal or nonterminal. -** "yymajor" is the symbol code, and "yypminor" is a pointer to -** the value. -*/ -static void yy_destructor( - yyParser *yypParser, /* The parser */ - YYCODETYPE yymajor, /* Type code for object to destroy */ - YYMINORTYPE *yypminor /* The object to be destroyed */ -){ - ParseARG_FETCH; - switch( yymajor ){ - /* Here is inserted the actions which take place when a - ** terminal or non-terminal is destroyed. This can happen - ** when the symbol is popped from the stack during a - ** reduce or during error processing or when a parser is - ** being destroyed before it is finished parsing. - ** - ** Note: during a reduce, the only symbols destroyed are those - ** which appear on the RHS of the rule, but which are not used - ** inside the C code. - */ -%% - default: break; /* If no destructor action specified: do nothing */ - } -} - -/* -** Pop the parser's stack once. -** -** If there is a destructor routine associated with the token which -** is popped from the stack, then call it. -** -** Return the major token number for the symbol popped. -*/ -static int yy_pop_parser_stack(yyParser *pParser){ - YYCODETYPE yymajor; - yyStackEntry *yytos = &pParser->yystack[pParser->yyidx]; - - if( pParser->yyidx<0 ) return 0; -#ifndef NDEBUG - if( yyTraceFILE && pParser->yyidx>=0 ){ - fprintf(yyTraceFILE,"%sPopping %s\n", - yyTracePrompt, - yyTokenName[yytos->major]); - } -#endif - yymajor = yytos->major; - yy_destructor(pParser, yymajor, &yytos->minor); - pParser->yyidx--; - return yymajor; -} - -/* -** Deallocate and destroy a parser. Destructors are all called for -** all stack elements before shutting the parser down. -** -** Inputs: -** <ul> -** <li> A pointer to the parser. This should be a pointer -** obtained from ParseAlloc. -** <li> A pointer to a function used to reclaim memory obtained -** from malloc. -** </ul> -*/ -void ParseFree( - void *p, /* The parser to be deleted */ - void (*freeProc)(void*) /* Function used to reclaim memory */ -){ - yyParser *pParser = (yyParser*)p; - if( pParser==0 ) return; - while( pParser->yyidx>=0 ) yy_pop_parser_stack(pParser); -#if YYSTACKDEPTH<=0 - free(pParser->yystack); -#endif - (*freeProc)((void*)pParser); -} - -/* -** Return the peak depth of the stack for a parser. -*/ -#ifdef YYTRACKMAXSTACKDEPTH -int ParseStackPeak(void *p){ - yyParser *pParser = (yyParser*)p; - return pParser->yyidxMax; -} -#endif - -/* -** Find the appropriate action for a parser given the terminal -** look-ahead token iLookAhead. -** -** If the look-ahead token is YYNOCODE, then check to see if the action is -** independent of the look-ahead. If it is, return the action, otherwise -** return YY_NO_ACTION. -*/ -static int yy_find_shift_action( - yyParser *pParser, /* The parser */ - YYCODETYPE iLookAhead /* The look-ahead token */ -){ - int i; - int stateno = pParser->yystack[pParser->yyidx].stateno; - - if( stateno>YY_SHIFT_COUNT - || (i = yy_shift_ofst[stateno])==YY_SHIFT_USE_DFLT ){ - return yy_default[stateno]; - } - assert( iLookAhead!=YYNOCODE ); - i += iLookAhead; - if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ - if( iLookAhead>0 ){ -#ifdef YYFALLBACK - YYCODETYPE iFallback; /* Fallback token */ - if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0]) - && (iFallback = yyFallback[iLookAhead])!=0 ){ -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE, "%sFALLBACK %s => %s\n", - yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]); - } -#endif - return yy_find_shift_action(pParser, iFallback); - } -#endif -#ifdef YYWILDCARD - { - int j = i - iLookAhead + YYWILDCARD; - if( -#if YY_SHIFT_MIN+YYWILDCARD<0 - j>=0 && -#endif -#if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT - j<YY_ACTTAB_COUNT && -#endif - yy_lookahead[j]==YYWILDCARD - ){ -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n", - yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[YYWILDCARD]); - } -#endif /* NDEBUG */ - return yy_action[j]; - } - } -#endif /* YYWILDCARD */ - } - return yy_default[stateno]; - }else{ - return yy_action[i]; - } -} - -/* -** Find the appropriate action for a parser given the non-terminal -** look-ahead token iLookAhead. -** -** If the look-ahead token is YYNOCODE, then check to see if the action is -** independent of the look-ahead. If it is, return the action, otherwise -** return YY_NO_ACTION. -*/ -static int yy_find_reduce_action( - int stateno, /* Current state number */ - YYCODETYPE iLookAhead /* The look-ahead token */ -){ - int i; -#ifdef YYERRORSYMBOL - if( stateno>YY_REDUCE_COUNT ){ - return yy_default[stateno]; - } -#else - assert( stateno<=YY_REDUCE_COUNT ); -#endif - i = yy_reduce_ofst[stateno]; - assert( i!=YY_REDUCE_USE_DFLT ); - assert( iLookAhead!=YYNOCODE ); - i += iLookAhead; -#ifdef YYERRORSYMBOL - if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ - return yy_default[stateno]; - } -#else - assert( i>=0 && i<YY_ACTTAB_COUNT ); - assert( yy_lookahead[i]==iLookAhead ); -#endif - return yy_action[i]; -} - -/* -** The following routine is called if the stack overflows. -*/ -static void yyStackOverflow(yyParser *yypParser, YYMINORTYPE *yypMinor){ - ParseARG_FETCH; - yypParser->yyidx--; -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt); - } -#endif - while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); - /* Here code is inserted which will execute if the parser - ** stack every overflows */ -%% - ParseARG_STORE; /* Suppress warning about unused %extra_argument var */ -} - -/* -** Perform a shift action. -*/ -static void yy_shift( - yyParser *yypParser, /* The parser to be shifted */ - int yyNewState, /* The new state to shift in */ - int yyMajor, /* The major token to shift in */ - YYMINORTYPE *yypMinor /* Pointer to the minor token to shift in */ -){ - yyStackEntry *yytos; - yypParser->yyidx++; -#ifdef YYTRACKMAXSTACKDEPTH - if( yypParser->yyidx>yypParser->yyidxMax ){ - yypParser->yyidxMax = yypParser->yyidx; - } -#endif -#if YYSTACKDEPTH>0 - if( yypParser->yyidx>=YYSTACKDEPTH ){ - yyStackOverflow(yypParser, yypMinor); - return; - } -#else - if( yypParser->yyidx>=yypParser->yystksz ){ - yyGrowStack(yypParser); - if( yypParser->yyidx>=yypParser->yystksz ){ - yyStackOverflow(yypParser, yypMinor); - return; - } - } -#endif - yytos = &yypParser->yystack[yypParser->yyidx]; - yytos->stateno = (YYACTIONTYPE)yyNewState; - yytos->major = (YYCODETYPE)yyMajor; - yytos->minor = *yypMinor; -#ifndef NDEBUG - if( yyTraceFILE && yypParser->yyidx>0 ){ - int i; - fprintf(yyTraceFILE,"%sShift %d\n",yyTracePrompt,yyNewState); - fprintf(yyTraceFILE,"%sStack:",yyTracePrompt); - for(i=1; i<=yypParser->yyidx; i++) - fprintf(yyTraceFILE," %s",yyTokenName[yypParser->yystack[i].major]); - fprintf(yyTraceFILE,"\n"); - } -#endif -} - -/* The following table contains information about every rule that -** is used during the reduce. -*/ -static const struct { - YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */ - unsigned char nrhs; /* Number of right-hand side symbols in the rule */ -} yyRuleInfo[] = { -%% -}; - -static void yy_accept(yyParser*); /* Forward Declaration */ - -/* -** Perform a reduce action and the shift that must immediately -** follow the reduce. -*/ -static void yy_reduce( - yyParser *yypParser, /* The parser */ - int yyruleno /* Number of the rule by which to reduce */ -){ - int yygoto; /* The next state */ - int yyact; /* The next action */ - YYMINORTYPE yygotominor; /* The LHS of the rule reduced */ - yyStackEntry *yymsp; /* The top of the parser's stack */ - int yysize; /* Amount to pop the stack */ - ParseARG_FETCH; - yymsp = &yypParser->yystack[yypParser->yyidx]; -#ifndef NDEBUG - if( yyTraceFILE && yyruleno>=0 - && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ - fprintf(yyTraceFILE, "%sReduce [%s].\n", yyTracePrompt, - yyRuleName[yyruleno]); - } -#endif /* NDEBUG */ - - /* Silence complaints from purify about yygotominor being uninitialized - ** in some cases when it is copied into the stack after the following - ** switch. yygotominor is uninitialized when a rule reduces that does - ** not set the value of its left-hand side nonterminal. Leaving the - ** value of the nonterminal uninitialized is utterly harmless as long - ** as the value is never used. So really the only thing this code - ** accomplishes is to quieten purify. - ** - ** 2007-01-16: The wireshark project (www.wireshark.org) reports that - ** without this code, their parser segfaults. I'm not sure what there - ** parser is doing to make this happen. This is the second bug report - ** from wireshark this week. Clearly they are stressing Lemon in ways - ** that it has not been previously stressed... (SQLite ticket #2172) - */ - /*memset(&yygotominor, 0, sizeof(yygotominor));*/ - yygotominor = yyzerominor; - - - switch( yyruleno ){ - /* Beginning here are the reduction cases. A typical example - ** follows: - ** case 0: - ** #line <lineno> <grammarfile> - ** { ... } // User supplied code - ** #line <lineno> <thisfile> - ** break; - */ -%% - }; - yygoto = yyRuleInfo[yyruleno].lhs; - yysize = yyRuleInfo[yyruleno].nrhs; - yypParser->yyidx -= yysize; - yyact = yy_find_reduce_action(yymsp[-yysize].stateno,(YYCODETYPE)yygoto); - if( yyact < YYNSTATE ){ -#ifdef NDEBUG - /* If we are not debugging and the reduce action popped at least - ** one element off the stack, then we can push the new element back - ** onto the stack here, and skip the stack overflow test in yy_shift(). - ** That gives a significant speed improvement. */ - if( yysize ){ - yypParser->yyidx++; - yymsp -= yysize-1; - yymsp->stateno = (YYACTIONTYPE)yyact; - yymsp->major = (YYCODETYPE)yygoto; - yymsp->minor = yygotominor; - }else -#endif - { - yy_shift(yypParser,yyact,yygoto,&yygotominor); - } - }else{ - assert( yyact == YYNSTATE + YYNRULE + 1 ); - yy_accept(yypParser); - } -} - -/* -** The following code executes when the parse fails -*/ -#ifndef YYNOERRORRECOVERY -static void yy_parse_failed( - yyParser *yypParser /* The parser */ -){ - ParseARG_FETCH; -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); - } -#endif - while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); - /* Here code is inserted which will be executed whenever the - ** parser fails */ -%% - ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ -} -#endif /* YYNOERRORRECOVERY */ - -/* -** The following code executes when a syntax error first occurs. -*/ -static void yy_syntax_error( - yyParser *yypParser, /* The parser */ - int yymajor, /* The major type of the error token */ - YYMINORTYPE yyminor /* The minor type of the error token */ -){ - ParseARG_FETCH; -#define TOKEN (yyminor.yy0) -%% - ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ -} - -/* -** The following is executed when the parser accepts -*/ -static void yy_accept( - yyParser *yypParser /* The parser */ -){ - ParseARG_FETCH; -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); - } -#endif - while( yypParser->yyidx>=0 ) yy_pop_parser_stack(yypParser); - /* Here code is inserted which will be executed whenever the - ** parser accepts */ -%% - ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ -} - -/* The main parser program. -** The first argument is a pointer to a structure obtained from -** "ParseAlloc" which describes the current state of the parser. -** The second argument is the major token number. The third is -** the minor token. The fourth optional argument is whatever the -** user wants (and specified in the grammar) and is available for -** use by the action routines. -** -** Inputs: -** <ul> -** <li> A pointer to the parser (an opaque structure.) -** <li> The major token number. -** <li> The minor token number. -** <li> An option argument of a grammar-specified type. -** </ul> -** -** Outputs: -** None. -*/ -void Parse( - void *yyp, /* The parser */ - int yymajor, /* The major token code number */ - ParseTOKENTYPE yyminor /* The value for the token */ - ParseARG_PDECL /* Optional %extra_argument parameter */ -){ - YYMINORTYPE yyminorunion; - int yyact; /* The parser action. */ - int yyendofinput; /* True if we are at the end of input */ -#ifdef YYERRORSYMBOL - int yyerrorhit = 0; /* True if yymajor has invoked an error */ -#endif - yyParser *yypParser; /* The parser */ - - /* (re)initialize the parser, if necessary */ - yypParser = (yyParser*)yyp; - if( yypParser->yyidx<0 ){ -#if YYSTACKDEPTH<=0 - if( yypParser->yystksz <=0 ){ - /*memset(&yyminorunion, 0, sizeof(yyminorunion));*/ - yyminorunion = yyzerominor; - yyStackOverflow(yypParser, &yyminorunion); - return; - } -#endif - yypParser->yyidx = 0; - yypParser->yyerrcnt = -1; - yypParser->yystack[0].stateno = 0; - yypParser->yystack[0].major = 0; - } - yyminorunion.yy0 = yyminor; - yyendofinput = (yymajor==0); - ParseARG_STORE; - -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sInput %s\n",yyTracePrompt,yyTokenName[yymajor]); - } -#endif - - do{ - yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); - if( yyact<YYNSTATE ){ - assert( !yyendofinput ); /* Impossible to shift the $ token */ - yy_shift(yypParser,yyact,yymajor,&yyminorunion); - yypParser->yyerrcnt--; - yymajor = YYNOCODE; - }else if( yyact < YYNSTATE + YYNRULE ){ - yy_reduce(yypParser,yyact-YYNSTATE); - }else{ - assert( yyact == YY_ERROR_ACTION ); -#ifdef YYERRORSYMBOL - int yymx; -#endif -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); - } -#endif -#ifdef YYERRORSYMBOL - /* A syntax error has occurred. - ** The response to an error depends upon whether or not the - ** grammar defines an error token "ERROR". - ** - ** This is what we do if the grammar does define ERROR: - ** - ** * Call the %syntax_error function. - ** - ** * Begin popping the stack until we enter a state where - ** it is legal to shift the error symbol, then shift - ** the error symbol. - ** - ** * Set the error count to three. - ** - ** * Begin accepting and shifting new tokens. No new error - ** processing will occur until three tokens have been - ** shifted successfully. - ** - */ - if( yypParser->yyerrcnt<0 ){ - yy_syntax_error(yypParser,yymajor,yyminorunion); - } - yymx = yypParser->yystack[yypParser->yyidx].major; - if( yymx==YYERRORSYMBOL || yyerrorhit ){ -#ifndef NDEBUG - if( yyTraceFILE ){ - fprintf(yyTraceFILE,"%sDiscard input token %s\n", - yyTracePrompt,yyTokenName[yymajor]); - } -#endif - yy_destructor(yypParser, (YYCODETYPE)yymajor,&yyminorunion); - yymajor = YYNOCODE; - }else{ - while( - yypParser->yyidx >= 0 && - yymx != YYERRORSYMBOL && - (yyact = yy_find_reduce_action( - yypParser->yystack[yypParser->yyidx].stateno, - YYERRORSYMBOL)) >= YYNSTATE - ){ - yy_pop_parser_stack(yypParser); - } - if( yypParser->yyidx < 0 || yymajor==0 ){ - yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); - yy_parse_failed(yypParser); - yymajor = YYNOCODE; - }else if( yymx!=YYERRORSYMBOL ){ - YYMINORTYPE u2; - u2.YYERRSYMDT = 0; - yy_shift(yypParser,yyact,YYERRORSYMBOL,&u2); - } - } - yypParser->yyerrcnt = 3; - yyerrorhit = 1; -#elif defined(YYNOERRORRECOVERY) - /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to - ** do any kind of error recovery. Instead, simply invoke the syntax - ** error routine and continue going as if nothing had happened. - ** - ** Applications can set this macro (for example inside %include) if - ** they intend to abandon the parse upon the first syntax error seen. - */ - yy_syntax_error(yypParser,yymajor,yyminorunion); - yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); - yymajor = YYNOCODE; - -#else /* YYERRORSYMBOL is not defined */ - /* This is what we do if the grammar does not define ERROR: - ** - ** * Report an error message, and throw away the input token. - ** - ** * If the input token is $, then fail the parse. - ** - ** As before, subsequent error messages are suppressed until - ** three input tokens have been successfully shifted. - */ - if( yypParser->yyerrcnt<=0 ){ - yy_syntax_error(yypParser,yymajor,yyminorunion); - } - yypParser->yyerrcnt = 3; - yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); - if( yyendofinput ){ - yy_parse_failed(yypParser); - } - yymajor = YYNOCODE; -#endif - } - }while( yymajor!=YYNOCODE && yypParser->yyidx>=0 ); - return; -} @@ -1,1832 +0,0 @@ -/* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "lexer.h" -#include "parser.h" -#include "eval.h" -#include "lib.h" - -#include <math.h> -#include <ctype.h> -#include <errno.h> -#include <stdlib.h> -#include <stdarg.h> -#include <regex.h> - -bool -uc_val_is_truish(struct json_object *val) -{ - struct uc_op *tag = json_object_get_userdata(val); - double d; - - switch (tag ? tag->type : 0) { - case T_EXCEPTION: - return false; - - default: - switch (json_object_get_type(val)) { - case json_type_int: - return (json_object_get_int64(val) != 0); - - case json_type_double: - d = json_object_get_double(val); - - return (d != 0 && !isnan(d)); - - case json_type_boolean: - return (json_object_get_boolean(val) != false); - - case json_type_string: - return (json_object_get_string_len(val) > 0); - - case json_type_array: - case json_type_object: - return true; - - default: - return false; - } - } -} - -enum json_type -uc_cast_number(struct json_object *v, int64_t *n, double *d) -{ - bool is_double = false; - const char *s; - char *e; - - *d = 0.0; - *n = 0; - - switch (json_object_get_type(v)) { - case json_type_int: - *n = json_object_get_int64(v); - - return json_type_int; - - case json_type_double: - *d = json_object_get_double(v); - - return json_type_double; - - case json_type_null: - return json_type_int; - - case json_type_boolean: - *n = json_object_get_boolean(v) ? 1 : 0; - - return json_type_int; - - case json_type_string: - s = json_object_get_string(v); - - while (isspace(*s)) - s++; - - if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X') && isxdigit(s[2])) { - *n = strtoll(s, &e, 16); - } - else if (s[0] == '0' && isdigit(s[2])) { - *n = strtoll(s, &e, 8); - } - else { - *n = strtoll(s, &e, 10); - - if (*e == '.') { - *d = strtod(s, &e); - is_double = (e > s); - } - } - - while (isspace(*e)) - e++; - - if (*e) { - *d = NAN; - - return json_type_double; - } - - if (is_double) - return json_type_double; - - return json_type_int; - - default: - *d = NAN; - - return json_type_double; - } -} - -static struct json_object * -uc_execute_op(struct uc_state *state, uint32_t off); - -static struct json_object * -uc_execute_op_sequence(struct uc_state *state, uint32_t off); - -static struct json_object * -uc_execute_list(struct uc_state *state, uint32_t off); - -static char * -uc_ref_to_str(struct uc_state *state, uint32_t off) -{ - int child_type = OPn_TYPE(off, 1); - int op_type = OP_TYPE(off); - const char *l; - char *s, *p; - - switch (op_type) { - case T_DOT: - s = uc_ref_to_str(state, OPn(off, 0)); - l = (child_type == T_LABEL) ? json_object_get_string(OPn_VAL(off, 1)) : "???"; - - if (asprintf(&p, "%s.%s", s ? s : "(...)", l) == -1) - p = NULL; - - free(s); - - return p; - - case T_LBRACK: - if (!OP_IS_POSTFIX(off)) - return NULL; - - /* fall through */ - - case T_LPAREN: - s = uc_ref_to_str(state, OPn(off, 0)); - - switch (child_type) { - case T_STRING: - l = json_object_to_json_string_ext(OPn_VAL(off, 1), JSON_C_TO_STRING_NOSLASHESCAPE); - break; - - case T_NUMBER: - case T_LABEL: - case T_BOOL: - l = json_object_get_string(OPn_VAL(off, 1)); - break; - - default: - l = "..."; - } - - if (asprintf(&p, "%s%c%s%c", s ? s : "(...)", - (op_type == T_LPAREN) ? '(' : '[', l, - (op_type == T_LPAREN) ? ')' : ']') == -1) - p = NULL; - - free(s); - - return p; - - case T_LABEL: - return strdup(json_object_get_string(OP_VAL(off))); - - default: - return NULL; - } -} - -static struct json_object * -uc_getref(struct uc_state *state, uint32_t off, struct json_object **key) -{ - uint32_t off1 = OPn(off, 0); - uint32_t off2 = OPn(off, 1); - int type = OP_TYPE(off); - struct uc_scope *sc, *next; - struct json_object *val; - - if (key) - *key = NULL; - - if (type == T_DOT) { - if (key) - *key = off2 ? json_object_get(OP_VAL(off2)) : NULL; - - return uc_execute_op_sequence(state, off1); - } - else if (type == T_LBRACK && OP_IS_POSTFIX(off)) { - if (key) { - val = off2 ? uc_execute_op_sequence(state, off2) : NULL; - - if (uc_is_type(val, T_EXCEPTION)) - return val; - - *key = val; - } - - return uc_execute_op_sequence(state, off1); - } - else if (type == T_LABEL) { - sc = state->scope; - - while (true) { - if (json_object_object_get_ex(sc->scope, json_object_get_string(OP_VAL(off)), NULL)) - break; - - next = uc_parent_scope(sc); - - if (!next) { - if (state->strict_declarations) { - return uc_new_exception(state, OP_POS(off), - "Reference error: access to undeclared variable %s", - json_object_get_string(OP_VAL(off))); - } - - break; - } - - sc = next; - } - - if (key) - *key = json_object_get(OP_VAL(off)); - - return json_object_get(sc->scope); - } - else { - if (key) - *key = NULL; - - return NULL; - } -} - -static struct json_object * -uc_getref_required(struct uc_state *state, uint32_t off, struct json_object **key) -{ - uint32_t child_off = OPn(off, 0); - struct json_object *scope, *skey, *rv; - char *lhs; - - scope = uc_getref(state, off, &skey); - - if (!json_object_is_type(scope, json_type_array) && - !json_object_is_type(scope, json_type_object)) { - if (!uc_is_type(scope, T_EXCEPTION)) { - lhs = child_off ? uc_ref_to_str(state, child_off) : NULL; - - if (lhs) { - rv = uc_new_exception(state, OPn_POS(off, 0), - "Type error: `%s` is %s", - lhs, scope ? "not an array or object" : "null"); - free(lhs); - } - else { - rv = uc_new_exception(state, OPn_POS(off, 0), - "Type error: left-hand side is not an array or object"); - } - - json_object_put(scope); - } - else { - rv = scope; - } - - json_object_put(skey); - - *key = NULL; - return rv; - } - - *key = skey; - return scope; -} - -static struct json_object * -uc_getproto(struct json_object *obj) -{ - struct uc_op *tag = json_object_get_userdata(obj); - - if (!tag || (tag->type != T_LBRACE && tag->type <= __T_MAX) || !tag->val) - return NULL; - - return tag->tag.proto; -} - -static struct json_object * -uc_getval(struct json_object *scope, struct json_object *key) -{ - struct json_object *o, *v; - int64_t idx; - double d; - - if (!key) - return NULL; - - if (json_object_is_type(scope, json_type_array)) { - /* only consider doubles with integer values as array keys */ - if (json_object_is_type(key, json_type_double)) { - d = json_object_get_double(key); - - if ((double)(int64_t)(d) != d) - return NULL; - - idx = (int64_t)d; - } - else { - errno = 0; - idx = json_object_get_int64(key); - - if (errno != 0) - return NULL; - } - - return json_object_get(json_object_array_get_idx(scope, idx)); - } - - for (o = scope; o; o = uc_getproto(o)) { - if (!json_object_is_type(o, json_type_object)) - continue; - - if (json_object_object_get_ex(o, key ? json_object_get_string(key) : "null", &v)) - return json_object_get(v); - } - - return NULL; -} - -static struct json_object * -uc_setval(struct json_object *scope, struct json_object *key, struct json_object *val) -{ - int64_t idx; - - if (!key) - return NULL; - - if (json_object_is_type(scope, json_type_array)) { - errno = 0; - idx = json_object_get_int64(key); - - if (errno != 0) - return NULL; - - if (json_object_array_put_idx(scope, idx, val)) - return NULL; - - return json_object_get(val); - } - - if (json_object_object_add(scope, key ? json_object_get_string(key) : "null", val)) - return NULL; - - return json_object_get(val); -} - -static struct json_object * -uc_execute_assign(struct uc_state *state, uint32_t off) -{ - uint32_t label_off = OPn(off, 0); - uint32_t value_off = OPn(off, 1); - struct json_object *scope, *key, *val; - - scope = uc_getref_required(state, label_off, &key); - - if (!key) - return scope; - - val = uc_execute_op_sequence(state, value_off); - - if (!uc_is_type(val, T_EXCEPTION)) - uc_setval(scope, key, val); - - json_object_put(scope); - json_object_put(key); - - return val; -} - -static struct json_object * -uc_execute_local(struct uc_state *state, uint32_t off) -{ - struct json_object *val, *rv = NULL; - uint32_t assign_off, label_off; - - for (assign_off = OPn(off, 0); assign_off != 0; assign_off = OP_NEXT(assign_off)) { - switch (OP_TYPE(assign_off)) { - case T_ASSIGN: - label_off = OPn(assign_off, 0); - val = uc_execute_op_sequence(state, OPn(assign_off, 1)); - - if (uc_is_type(val, T_EXCEPTION)) - return val; - - break; - - case T_LABEL: - label_off = assign_off; - val = NULL; - break; - - default: - continue; - } - - if (label_off) { - json_object_put(rv); - rv = uc_setval(state->scope->scope, OP_VAL(label_off), val); - } - } - - return rv; -} - -static struct json_object * -uc_execute_op_sequence(struct uc_state *state, uint32_t off); - -static bool -uc_test_condition(struct uc_state *state, uint32_t off) -{ - struct json_object *val = uc_execute_op_sequence(state, off); - bool istrue = uc_val_is_truish(val); - - json_object_put(val); - - return istrue; -} - -static struct json_object * -uc_execute_if(struct uc_state *state, uint32_t off) -{ - uint32_t cond_off = OPn(off, 0); - uint32_t then_off = OPn(off, 1); - uint32_t else_off = OPn(off, 2); - bool res = uc_test_condition(state, cond_off); - - if (state->exception) - return json_object_get(state->exception); - else if (res) - return uc_execute_op_sequence(state, then_off); - else if (else_off) - return uc_execute_op_sequence(state, else_off); - - return NULL; -} - -static struct json_object * -uc_execute_for(struct uc_state *state, uint32_t off) -{ - struct json_object *kscope, *vscope, *val, *item, *ik, *iv = NULL, *rv = NULL; - uint32_t init_off = OPn(off, 0); - uint32_t cond_off = OPn(off, 1); - uint32_t step_off = OPn(off, 2); - uint32_t body_off = OPn(off, 3); - uint32_t ik_off, iv_off; - size_t arridx, arrlen; - bool local = false; - struct uc_op *tag; - - /* for (x in ...) loop variant */ - if (OP_IS_FOR_IN(off)) { - if (OP_TYPE(init_off) == T_LOCAL) { - local = true; - init_off = OPn(init_off, 0); - } - - ik_off = OPn(init_off, 0); - ik = OP_VAL(ik_off); - kscope = local ? state->scope->scope : uc_getref(state, ik_off, NULL); - - if (uc_is_type(kscope, T_EXCEPTION)) - return kscope; - - iv_off = OP_NEXT(ik_off); - - if (iv_off) { - iv = OP_VAL(iv_off); - vscope = local ? kscope : uc_getref(state, iv_off, NULL); - - if (uc_is_type(vscope, T_EXCEPTION)) - return vscope; - } - - val = uc_execute_op_sequence(state, OPn(init_off, 1)); - - if (uc_is_type(val, T_EXCEPTION)) - return val; - - if (json_object_is_type(val, json_type_array)) { - for (arridx = 0, arrlen = json_object_array_length(val); - arridx < arrlen; arridx++) { - item = json_object_array_get_idx(val, arridx); - - if (iv) { - uc_setval(kscope, ik, xjs_new_int64(arridx)); - uc_setval(vscope, iv, item); - } - else { - uc_setval(kscope, ik, item); - } - - json_object_put(rv); - - rv = uc_execute_op_sequence(state, body_off); - tag = json_object_get_userdata(rv); - - switch (tag ? tag->type : 0) { - case T_RETURN: - case T_EXCEPTION: - json_object_put(val); - - return rv; - - case T_BREAK: - json_object_put(val); - json_object_put(rv); - - return NULL; - } - } - } - else if (json_object_is_type(val, json_type_object)) { - json_object_object_foreach(val, key, item) { - json_object_put(uc_setval(kscope, ik, xjs_new_string(key))); - - if (iv) - uc_setval(vscope, iv, item); - - json_object_put(rv); - - rv = uc_execute_op_sequence(state, body_off); - tag = json_object_get_userdata(rv); - - switch (tag ? tag->type : 0) { - case T_RETURN: - case T_EXCEPTION: - json_object_put(val); - - return rv; - - case T_BREAK: - json_object_put(val); - json_object_put(rv); - - return NULL; - } - } - } - - json_object_put(val); - json_object_put(rv); - - return NULL; - } - - if (init_off) { - val = uc_execute_op_sequence(state, init_off); - - if (uc_is_type(val, T_EXCEPTION)) - return val; - - json_object_put(val); - } - - while (cond_off ? uc_test_condition(state, cond_off) : true) { - json_object_put(rv); - - rv = uc_execute_op_sequence(state, body_off); - tag = json_object_get_userdata(rv); - - switch (tag ? tag->type : 0) { - case T_RETURN: - case T_EXCEPTION: - return rv; - - case T_BREAK: - json_object_put(rv); - - return NULL; - } - - if (step_off) { - val = uc_execute_op_sequence(state, step_off); - - if (uc_is_type(val, T_EXCEPTION)) { - json_object_put(rv); - - return val; - } - - json_object_put(val); - } - } - - json_object_put(rv); - - return NULL; -} - -static struct json_object * -uc_execute_while(struct uc_state *state, uint32_t off) -{ - uint32_t test = OPn(off, 0); - uint32_t body = OPn(off, 1); - struct json_object *v, *rv = NULL; - struct uc_op *tag = NULL; - bool cond; - - while (1) { - json_object_put(rv); - - v = test ? uc_execute_op_sequence(state, test) : NULL; - cond = test ? uc_val_is_truish(v) : true; - - if (uc_is_type(v, T_EXCEPTION)) - return v; - - json_object_put(v); - - if (!cond) - return NULL; - - rv = uc_execute_op_sequence(state, body); - tag = json_object_get_userdata(rv); - - switch (tag ? tag->type : 0) { - case T_RETURN: - case T_EXCEPTION: - return rv; - - case T_BREAK: - json_object_put(rv); - - return NULL; - } - } - - json_object_put(rv); - - return NULL; -} - -static struct json_object * -uc_execute_and_or(struct uc_state *state, uint32_t off) -{ - bool is_or = (OP_TYPE(off) == T_OR); - struct json_object *val = NULL; - uint32_t op_off; - int i = 0; - - for (op_off = OPn(off, 0); op_off != 0 && i < OPn_NUM; op_off = OPn(off, ++i)) { - json_object_put(val); - - val = uc_execute_op_sequence(state, op_off); - - if (uc_is_type(val, T_EXCEPTION)) - break; - - if (uc_val_is_truish(val) == is_or) - break; - } - - return val; -} - -bool -uc_cmp(int how, struct json_object *v1, struct json_object *v2) -{ - enum json_type t1 = json_object_get_type(v1); - enum json_type t2 = json_object_get_type(v2); - int64_t n1, n2, delta; - double d1, d2; - - if (t1 == json_type_string && t2 == json_type_string) { - delta = strcmp(json_object_get_string(v1), json_object_get_string(v2)); - } - else { - if ((t1 == json_type_array && t2 == json_type_array) || - (t1 == json_type_object && t2 == json_type_object)) { - delta = (void *)v1 - (void *)v2; - } - else { - t1 = uc_cast_number(v1, &n1, &d1); - t2 = uc_cast_number(v2, &n2, &d2); - - if (t1 == json_type_double || t2 == json_type_double) { - d1 = (t1 == json_type_double) ? d1 : (double)n1; - d2 = (t2 == json_type_double) ? d2 : (double)n2; - - if (d1 == d2) - delta = 0; - else if (d1 < d2) - delta = -1; - else - delta = 1; - } - else { - delta = n1 - n2; - } - } - } - - switch (how) { - case T_LT: - return (delta < 0); - - case T_LE: - return (delta <= 0); - - case T_GT: - return (delta > 0); - - case T_GE: - return (delta >= 0); - - case T_EQ: - return (delta == 0); - - case T_NE: - return (delta != 0); - - default: - return false; - } -} - -static struct json_object * -_uc_get_operands(struct uc_state *state, uint32_t op_off, size_t n, struct json_object **v) -{ - struct json_object *ctx = NULL; - uint32_t child_off; - size_t i, j; - - for (i = 0; i < n; i++) { - child_off = OPn(op_off, i); - - if (child_off && OP_IS_LIST(child_off)) - v[i] = uc_execute_list(state, child_off); - else if (child_off) - v[i] = uc_execute_op_sequence(state, child_off); - else - v[i] = NULL; - - if (i == 0) - ctx = json_object_get(state->ctx); - - if (uc_is_type(v[i], T_EXCEPTION)) { - json_object_put(ctx); - - for (j = 0; j < i; j++) - json_object_put(v[j]); - - return v[i]; - } - } - - json_object_put(state->ctx); - state->ctx = ctx; - - return NULL; -} - -#define uc_get_operands(state, off, vals) \ - do { \ - struct json_object *ex = _uc_get_operands(state, off, ARRAY_SIZE(vals), vals); \ - if (ex) return ex; \ - } while(0) - -static struct json_object * -uc_execute_rel(struct uc_state *state, uint32_t off) -{ - struct json_object *v[2], *rv; - - uc_get_operands(state, off, v); - - rv = xjs_new_boolean(uc_cmp(OP_TYPE(off), v[0], v[1])); - - json_object_put(v[0]); - json_object_put(v[1]); - - return rv; -} - -static bool -uc_eq(struct json_object *v1, struct json_object *v2) -{ - struct uc_op *tag1 = json_object_get_userdata(v1); - struct uc_op *tag2 = json_object_get_userdata(v2); - enum json_type t1 = json_object_get_type(v1); - enum json_type t2 = json_object_get_type(v2); - - if ((tag1 ? tag1->type : 0) != (tag2 ? tag2->type : 0)) - return false; - - if (t1 != t2) - return false; - - switch (t1) { - case json_type_array: - case json_type_object: - return (v1 == v2); - - case json_type_boolean: - return (json_object_get_boolean(v1) == json_object_get_boolean(v2)); - - case json_type_double: - if (isnan(json_object_get_double(v1)) || isnan(json_object_get_double(v2))) - return false; - - return (json_object_get_double(v1) == json_object_get_double(v2)); - - case json_type_int: - return (json_object_get_int64(v1) == json_object_get_int64(v2)); - - case json_type_string: - return !strcmp(json_object_get_string(v1), json_object_get_string(v2)); - - case json_type_null: - return true; - } - - return false; -} - -static struct json_object * -uc_execute_equality(struct uc_state *state, uint32_t off) -{ - struct json_object *v[2], *rv; - bool equal = false; - - uc_get_operands(state, off, v); - - equal = uc_eq(v[0], v[1]); - rv = xjs_new_boolean((OP_TYPE(off) == T_EQS) ? equal : !equal); - - json_object_put(v[0]); - json_object_put(v[1]); - - return rv; -} - -static struct json_object * -uc_execute_in(struct uc_state *state, uint32_t off) -{ - struct json_object *v[2], *item; - size_t arrlen, arridx; - bool found = false; - const char *key; - - uc_get_operands(state, off, v); - - if (json_object_is_type(v[1], json_type_array)) { - for (arridx = 0, arrlen = json_object_array_length(v[1]); - arridx < arrlen; arridx++) { - item = json_object_array_get_idx(v[1], arridx); - - if (uc_cmp(T_EQ, v[0], item)) { - found = true; - break; - } - } - } - else if (json_object_is_type(v[1], json_type_object)) { - key = v[0] ? json_object_get_string(v[0]) : "null"; - found = json_object_object_get_ex(v[1], key, NULL); - } - - json_object_put(v[0]); - json_object_put(v[1]); - - return xjs_new_boolean(found); -} - -static struct json_object * -uc_execute_inc_dec(struct uc_state *state, uint32_t off) -{ - bool is_inc = (OP_TYPE(off) == T_INC); - struct json_object *val, *nval, *scope, *key; - int64_t n; - double d; - - scope = uc_getref_required(state, OPn(off, 0), &key); - - if (!key) - return scope; - - val = uc_getval(scope, key); - - json_object_put(scope); - json_object_put(key); - - if (uc_cast_number(val, &n, &d) == json_type_double) - nval = uc_new_double(d + (is_inc ? 1.0 : -1.0)); - else - nval = xjs_new_int64(n + (is_inc ? 1 : -1)); - - json_object_put(uc_setval(scope, key, nval)); - - /* postfix inc/dec, return old val */ - if (OP_IS_POSTFIX(off)) - return val; - - json_object_put(val); - - return json_object_get(nval); -} - -static struct json_object * -uc_execute_list(struct uc_state *state, uint32_t off) -{ - struct json_object *ex, *val, *arr = xjs_new_array(); - size_t i; - - while (off) { - val = uc_execute_op(state, off); - - if (uc_is_type(val, T_EXCEPTION)) { - json_object_put(arr); - - return val; - } - - if (OP_IS_ELLIP(off)) { - if (!json_object_is_type(val, json_type_array)) { - ex = uc_new_exception(state, OP_POS(off), - "Type error: (%s) is not iterable", - json_object_get_string(val)); - - json_object_put(arr); - json_object_put(val); - - return ex; - } - - for (i = 0; i < json_object_array_length(val); i++) - json_object_array_add(arr, json_object_get(json_object_array_get_idx(val, i))); - - json_object_put(val); - } - else { - json_object_array_add(arr, val); - } - - off = OP_NEXT(off); - } - - return arr; -} - -static struct json_object * -uc_execute_object(struct uc_state *state, uint32_t off) -{ - struct json_object *ex, *v, *obj = uc_new_object(NULL); - uint32_t key_off; - char *istr; - size_t i; - - for (key_off = OPn(off, 0); key_off != 0; key_off = OP_NEXT(key_off)) { - v = uc_execute_op_sequence(state, OPn(key_off, 0)); - - if (uc_is_type(v, T_EXCEPTION)) { - json_object_put(obj); - - return v; - } - - if (OP_TYPE(key_off) == T_ELLIP) { - switch (json_object_get_type(v)) { - case json_type_object: - ; /* a label can only be part of a statement and a declaration is not a statement */ - json_object_object_foreach(v, vk, vv) - json_object_object_add(obj, vk, json_object_get(vv)); - - json_object_put(v); - - break; - - case json_type_array: - for (i = 0; i < json_object_array_length(v); i++) { - xasprintf(&istr, "%zu", i); - json_object_object_add(obj, istr, json_object_get(json_object_array_get_idx(v, i))); - free(istr); - } - - json_object_put(v); - - break; - - default: - ex = uc_new_exception(state, OP_POS(key_off), - "Type error: (%s) is not iterable", - json_object_get_string(v)); - - json_object_put(obj); - json_object_put(v); - - return ex; - } - } - else { - json_object_object_add(obj, json_object_get_string(OP_VAL(key_off)), v); - } - } - - return obj; -} - -struct json_object * -uc_invoke(struct uc_state *state, uint32_t off, struct json_object *this, - struct json_object *func, struct json_object *argvals) -{ - struct uc_op *tag = json_object_get_userdata(func); - struct json_object *arr, *rv = NULL; - struct uc_callstack callstack = {}; - struct uc_function *fn, *prev_fn; - size_t arridx, arglen; - struct uc_scope *sc; - uint32_t tag_off; - uc_c_fn *fptr; - int tag_type; - bool rest; - - if (!tag) - return NULL; - - if (state->calldepth >= 1000) - return uc_new_exception(state, OP_POS(off), "Runtime error: Too much recursion"); - - callstack.next = state->callstack; - callstack.function = state->function; - callstack.off = OP_POS(off); - - if (tag->is_arrow) - callstack.ctx = state->callstack ? json_object_get(state->callstack->ctx) : NULL; - else - callstack.ctx = json_object_get(this ? this : state->ctx); - - state->callstack = &callstack; - state->calldepth++; - - fn = tag->tag.data; - - prev_fn = state->function; - state->function = fn; - - /* is native function */ - if (tag->type == T_CFUNC) { - fptr = (uc_c_fn *)fn->cfn; - rv = fptr ? fptr(state, off, argvals) : NULL; - } - - /* is ucode function */ - else { - callstack.scope = uc_new_scope(state, fn->parent_scope); - - sc = state->scope; - state->scope = uc_acquire_scope(callstack.scope); - - if (fn->args) { - arglen = json_object_array_length(fn->args); - rest = (arglen > 1) && json_object_is_type(json_object_array_get_idx(fn->args, arglen - 1), json_type_null); - - for (arridx = 0; arridx < arglen - rest; arridx++) { - /* if the last argument is a rest one (...arg), put all remaining parameter values in an array */ - if (rest && arridx == arglen - 2) { - arr = xjs_new_array(); - - uc_setval(callstack.scope->scope, - json_object_array_get_idx(fn->args, arridx), - arr); - - for (; argvals && arridx < json_object_array_length(argvals); arridx++) - json_object_array_add(arr, json_object_get(json_object_array_get_idx(argvals, arridx))); - - break; - } - - uc_setval(callstack.scope->scope, json_object_array_get_idx(fn->args, arridx), - argvals ? json_object_array_get_idx(argvals, arridx) : NULL); - } - } - - rv = uc_execute_op_sequence(state, fn->entry); - tag = json_object_get_userdata(rv); - tag_off = tag ? tag->off : 0; - tag_type = tag ? tag->type : 0; - - switch (tag_type) { - case T_BREAK: - case T_CONTINUE: - json_object_put(rv); - rv = uc_new_exception(state, OP_POS(tag_off), - "Syntax error: %s statement must be inside loop", - uc_get_tokenname(tag_type)); - break; - - case T_RETURN: - json_object_put(rv); - rv = json_object_get(state->rval); - break; - } - - /* we left the function, pop the function scope... */ - uc_release_scope(state->scope); - state->scope = sc; - - /* ... and release it */ - uc_release_scope(callstack.scope); - - } - - state->function = prev_fn; - - json_object_put(callstack.ctx); - state->callstack = callstack.next; - state->calldepth--; - - return rv; -} - -static struct json_object * -uc_execute_call(struct uc_state *state, uint32_t off) -{ - struct json_object *v[2], *rv; - struct uc_op *decl; - char *lhs; - - uc_get_operands(state, off, v); - - decl = json_object_get_userdata(v[0]); - - if (!decl || (decl->type != T_FUNC && decl->type != T_CFUNC)) { - lhs = uc_ref_to_str(state, OPn(off, 0)); - - rv = uc_new_exception(state, OPn_POS(off, 0), - "Type error: %s is not a function", - lhs ? lhs : "left-hand side expression"); - - free(lhs); - } - else { - if (v[1] == NULL) - v[1] = xjs_new_array(); - - rv = uc_invoke(state, off, NULL, v[0], v[1]); - } - - json_object_put(v[0]); - json_object_put(v[1]); - - return rv; -} - -static void -uc_write_str(struct json_object *v) -{ - const char *p; - size_t len; - - switch (json_object_get_type(v)) { - case json_type_object: - case json_type_array: - p = json_object_to_json_string_ext(v, JSON_C_TO_STRING_NOSLASHESCAPE|JSON_C_TO_STRING_SPACED); - len = strlen(p); - break; - - case json_type_string: - p = json_object_get_string(v); - len = json_object_get_string_len(v); - break; - - case json_type_null: - p = ""; - len = 0; - break; - - default: - p = json_object_get_string(v); - len = strlen(p); - } - - fwrite(p, 1, len, stdout); -} - -static struct json_object * -uc_execute_exp(struct uc_state *state, uint32_t off) -{ - struct json_object *val = uc_execute_op_sequence(state, OPn(off, 0)); - struct uc_op *tag = val ? json_object_get_userdata(val) : NULL; - - switch (tag ? tag->type : 0) { - case T_EXCEPTION: - printf("<exception: %s>", json_object_get_string(val)); - break; - - default: - uc_write_str(val); - break; - } - - json_object_put(val); - - return NULL; -} - -static struct json_object * -uc_execute_unary_plus_minus(struct uc_state *state, uint32_t off) -{ - bool is_sub = (OP_TYPE(off) == T_SUB); - struct json_object *v[1]; - enum json_type t; - int64_t n; - double d; - - uc_get_operands(state, off, v); - - t = uc_cast_number(v[0], &n, &d); - - json_object_put(v[0]); - - switch (t) { - case json_type_int: - if (OPn_IS_OVERFLOW(off, 0)) - return xjs_new_int64(((n >= 0) == is_sub) ? INT64_MIN : INT64_MAX); - - return xjs_new_int64(is_sub ? -n : n); - - default: - return uc_new_double(is_sub ? -d : d); - } -} - -static struct json_object * -uc_execute_arith(struct uc_state *state, uint32_t off) -{ - int type = OP_TYPE(off); - struct json_object *v[2], *rv; - enum json_type t1, t2; - const char *s1, *s2; - size_t len1, len2; - int64_t n1, n2; - double d1, d2; - char *s; - - if (!OPn(off, 1)) - return uc_execute_unary_plus_minus(state, off); - - uc_get_operands(state, off, v); - - if (type == T_ADD && - (json_object_is_type(v[0], json_type_string) || - json_object_is_type(v[1], json_type_string))) { - s1 = v[0] ? json_object_get_string(v[0]) : "null"; - s2 = v[1] ? json_object_get_string(v[1]) : "null"; - len1 = strlen(s1); - len2 = strlen(s2); - s = xalloc(len1 + len2 + 1); - - snprintf(s, len1 + len2 + 1, "%s%s", s1, s2); - - rv = xjs_new_string(s); - - json_object_put(v[0]); - json_object_put(v[1]); - free(s); - - return rv; - } - - t1 = uc_cast_number(v[0], &n1, &d1); - t2 = uc_cast_number(v[1], &n2, &d2); - - json_object_put(v[0]); - json_object_put(v[1]); - - if (t1 == json_type_double || t2 == json_type_double) { - d1 = (t1 == json_type_double) ? d1 : (double)n1; - d2 = (t2 == json_type_double) ? d2 : (double)n2; - - switch (type) { - case T_ADD: - return uc_new_double(d1 + d2); - - case T_SUB: - return uc_new_double(d1 - d2); - - case T_MUL: - return uc_new_double(d1 * d2); - - case T_DIV: - if (d2 == 0.0) - return uc_new_double(INFINITY); - else if (isnan(d2)) - return uc_new_double(NAN); - else if (!isfinite(d2)) - return uc_new_double(isfinite(d1) ? 0.0 : NAN); - - return uc_new_double(d1 / d2); - - case T_MOD: - return uc_new_double(NAN); - } - } - - switch (type) { - case T_ADD: - return xjs_new_int64(n1 + n2); - - case T_SUB: - return xjs_new_int64(n1 - n2); - - case T_MUL: - return xjs_new_int64(n1 * n2); - - case T_DIV: - if (n2 == 0) - return uc_new_double(INFINITY); - - return xjs_new_int64(n1 / n2); - - case T_MOD: - return xjs_new_int64(n1 % n2); - } - - return uc_new_double(NAN); -} - -static struct json_object * -uc_execute_bitop(struct uc_state *state, uint32_t off) -{ - struct json_object *v[2]; - int64_t n1, n2; - double d; - - uc_get_operands(state, off, v); - - if (uc_cast_number(v[0], &n1, &d) == json_type_double) - n1 = isnan(d) ? 0 : (int64_t)d; - - if (uc_cast_number(v[1], &n2, &d) == json_type_double) - n2 = isnan(d) ? 0 : (int64_t)d; - - json_object_put(v[0]); - json_object_put(v[1]); - - switch (OP_TYPE(off)) { - case T_LSHIFT: - return xjs_new_int64(n1 << n2); - - case T_RSHIFT: - return xjs_new_int64(n1 >> n2); - - case T_BAND: - return xjs_new_int64(n1 & n2); - - case T_BXOR: - return xjs_new_int64(n1 ^ n2); - - case T_BOR: - return xjs_new_int64(n1 | n2); - - default: - return NULL; - } -} - -static struct json_object * -uc_execute_not(struct uc_state *state, uint32_t off) -{ - return xjs_new_boolean(!uc_test_condition(state, OPn(off, 0))); -} - -static struct json_object * -uc_execute_compl(struct uc_state *state, uint32_t off) -{ - struct json_object *v[1]; - int64_t n; - double d; - - uc_get_operands(state, off, v); - - if (uc_cast_number(v[0], &n, &d) == json_type_double) - n = isnan(d) ? 0 : (int64_t)d; - - json_object_put(v[0]); - - return xjs_new_int64(~n); -} - -static void -uc_free_tag(struct json_object *v, void *ud) -{ - free(ud); -} - -static struct json_object * -uc_execute_return(struct uc_state *state, uint32_t off) -{ - struct uc_op *cpy = xalloc(sizeof(*cpy)); - struct json_object *v[1], *rv; - - memcpy(cpy, OP(off), sizeof(*cpy)); - cpy->off = off; - - uc_get_operands(state, off, v); - - json_object_put(state->rval); - state->rval = v[0]; - - rv = xjs_new_boolean(false); - - json_object_set_userdata(rv, cpy, uc_free_tag); - - return rv; -} - -static struct json_object * -uc_execute_break_cont(struct uc_state *state, uint32_t off) -{ - struct uc_op *cpy = xalloc(sizeof(*cpy)); - struct json_object *rv = xjs_new_int64(0); - - memcpy(cpy, OP(off), sizeof(*cpy)); - cpy->off = off; - - json_object_set_userdata(rv, cpy, uc_free_tag); - - return rv; -} - -static struct json_object * -uc_execute_function(struct uc_state *state, uint32_t off) -{ - struct json_object *obj = uc_new_func(state, off, state->scope); - struct json_object *val = OPn_VAL(off, 0); - - if (val) - uc_setval(state->scope->scope, val, obj); - - return obj; -} - -static struct json_object * -uc_execute_this(struct uc_state *state, uint32_t off) -{ - return json_object_get(state->callstack->ctx); -} - -static struct json_object * -uc_execute_try_catch(struct uc_state *state, uint32_t off) -{ - struct json_object *evar, *rv; - struct uc_op *tag; - - rv = uc_execute_op_sequence(state, OPn(off, 0)); - - if (uc_is_type(rv, T_EXCEPTION)) { - evar = OPn_VAL(off, 1); - - if (evar) { - /* remove the T_EXCEPTION type from the object to avoid handling - * it as a new exception in the catch block */ - tag = json_object_get_userdata(rv); - tag->type = T_LBRACE; - - json_object_put(uc_setval(state->scope->scope, evar, - json_object_get(rv))); - } - - json_object_put(state->exception); - state->exception = NULL; - - json_object_put(rv); - rv = uc_execute_op_sequence(state, OPn(off, 2)); - } - - return rv; -} - -static bool -uc_match_case(struct uc_state *state, struct json_object *v, uint32_t case_off) -{ - struct json_object *caseval = uc_execute_op_sequence(state, OPn(case_off, 0)); - bool rv = uc_eq(v, caseval); - - json_object_put(caseval); - return rv; -} - -static struct json_object * -uc_execute_switch_case(struct uc_state *state, uint32_t off) -{ - uint32_t case_off, default_off = 0, jmp_off = 0; - struct json_object *v[1], *rv = NULL; - - uc_get_operands(state, off, v); - - /* First try to find matching case... */ - for (case_off = OPn(off, 1); case_off != 0; case_off = OP_NEXT(case_off)) { - /* remember default case and throw on dupes */ - if (OP_TYPE(case_off) == T_DEFAULT) { - if (default_off) { - json_object_put(v[0]); - - return uc_new_exception(state, OP_POS(case_off), - "Syntax error: more than one switch default case"); - } - - default_off = case_off; - continue; - } - - /* Found a matching case, remember jump offset */ - if (uc_match_case(state, v[0], case_off)) { - jmp_off = case_off; - break; - } - } - - /* jump to matching case (or default) and continue until break */ - for (case_off = jmp_off ? jmp_off : default_off; case_off != 0; case_off = OP_NEXT(case_off)) { - json_object_put(rv); - - if (OP_TYPE(case_off) == T_DEFAULT) - rv = uc_execute_op_sequence(state, OPn(case_off, 0)); - else - rv = uc_execute_op_sequence(state, OPn(case_off, 1)); - - if (uc_is_type(rv, T_BREAK)) { - json_object_put(rv); - rv = NULL; - break; - } - else if (uc_is_type(rv, T_RETURN) || uc_is_type(rv, T_EXCEPTION) || uc_is_type(rv, T_CONTINUE)) { - break; - } - } - - json_object_put(v[0]); - - return rv; -} - -static struct json_object * -uc_execute_atom(struct uc_state *state, uint32_t off) -{ - return json_object_get(OP_VAL(off)); -} - -static struct json_object * -uc_execute_text(struct uc_state *state, uint32_t off) -{ - printf("%s", json_object_get_string(OP_VAL(off))); - - return NULL; -} - -static struct json_object * -uc_execute_label(struct uc_state *state, uint32_t off) -{ - struct json_object *scope, *key, *val; - - scope = uc_getref(state, off, &key); - - json_object_put(state->ctx); - state->ctx = NULL; - - if (state->strict_declarations && scope == NULL) { - return uc_new_exception(state, OP_POS(off), - "Reference error: %s is not defined", - json_object_get_string(OP_VAL(off))); - } - - val = uc_getval(scope, key); - json_object_put(scope); - json_object_put(key); - - return val; -} - -static struct json_object * -uc_execute_dot(struct uc_state *state, uint32_t off) -{ - struct json_object *scope, *key, *val; - - scope = uc_getref_required(state, off, &key); - - json_object_put(state->ctx); - state->ctx = json_object_get(scope); - - if (!key) - return scope; - - val = uc_getval(scope, key); - json_object_put(scope); - json_object_put(key); - - return val; -} - -static struct json_object * -uc_execute_lbrack(struct uc_state *state, uint32_t off) -{ - /* postfix access */ - if (OP_IS_POSTFIX(off)) - return uc_execute_dot(state, off); - - return uc_execute_list(state, OPn(off, 0)); -} - -static struct json_object * -uc_execute_exp_list(struct uc_state *state, uint32_t off) -{ - return uc_execute_op_sequence(state, OPn(off, 0)); -} - -static struct json_object *(*fns[__T_MAX])(struct uc_state *, uint32_t) = { - [T_NUMBER] = uc_execute_atom, - [T_DOUBLE] = uc_execute_atom, - [T_STRING] = uc_execute_atom, - [T_REGEXP] = uc_execute_atom, - [T_BOOL] = uc_execute_atom, - [T_NULL] = uc_execute_atom, - [T_THIS] = uc_execute_this, - [T_FUNC] = uc_execute_function, - [T_ARROW] = uc_execute_function, - [T_TEXT] = uc_execute_text, - [T_ASSIGN] = uc_execute_assign, - [T_LOCAL] = uc_execute_local, - [T_LABEL] = uc_execute_label, - [T_DOT] = uc_execute_dot, - [T_LBRACK] = uc_execute_lbrack, - [T_LBRACE] = uc_execute_object, - [T_IF] = uc_execute_if, - [T_ELIF] = uc_execute_if, - [T_QMARK] = uc_execute_if, - [T_FOR] = uc_execute_for, - [T_WHILE] = uc_execute_while, - [T_AND] = uc_execute_and_or, - [T_OR] = uc_execute_and_or, - [T_LT] = uc_execute_rel, - [T_LE] = uc_execute_rel, - [T_GT] = uc_execute_rel, - [T_GE] = uc_execute_rel, - [T_EQ] = uc_execute_rel, - [T_NE] = uc_execute_rel, - [T_EQS] = uc_execute_equality, - [T_NES] = uc_execute_equality, - [T_IN] = uc_execute_in, - [T_INC] = uc_execute_inc_dec, - [T_DEC] = uc_execute_inc_dec, - [T_LPAREN] = uc_execute_call, - [T_LEXP] = uc_execute_exp, - [T_ADD] = uc_execute_arith, - [T_SUB] = uc_execute_arith, - [T_MUL] = uc_execute_arith, - [T_DIV] = uc_execute_arith, - [T_MOD] = uc_execute_arith, - [T_LSHIFT] = uc_execute_bitop, - [T_RSHIFT] = uc_execute_bitop, - [T_BAND] = uc_execute_bitop, - [T_BXOR] = uc_execute_bitop, - [T_BOR] = uc_execute_bitop, - [T_COMPL] = uc_execute_compl, - [T_NOT] = uc_execute_not, - [T_RETURN] = uc_execute_return, - [T_BREAK] = uc_execute_break_cont, - [T_CONTINUE] = uc_execute_break_cont, - [T_TRY] = uc_execute_try_catch, - [T_SWITCH] = uc_execute_switch_case, - [T_COMMA] = uc_execute_exp_list, -}; - -static struct json_object * -uc_execute_op(struct uc_state *state, uint32_t off) -{ - int type = OP_TYPE(off); - - if (!fns[type]) - return uc_new_exception(state, OP_POS(off), - "Runtime error: Unrecognized opcode %d", type); - - return fns[type](state, off); -} - -static struct json_object * -uc_execute_op_sequence(struct uc_state *state, uint32_t off) -{ - struct json_object *v = NULL; - struct uc_op *tag = NULL; - - while (off) { - json_object_put(v); - - v = uc_execute_op(state, off); - tag = v ? json_object_get_userdata(v) : NULL; - - switch (tag ? tag->type : 0) { - case T_BREAK: - case T_CONTINUE: - case T_RETURN: - case T_EXCEPTION: - return v; - } - - off = OP_NEXT(off); - } - - return v; -} - -static void -uc_globals_init(struct uc_state *state, struct json_object *scope) -{ - struct json_object *arr = xjs_new_array(); - const char *p, *last; - - for (p = last = LIB_SEARCH_PATH;; p++) { - if (*p == ':' || *p == '\0') { - json_object_array_add(arr, xjs_new_string_len(last, p - last)); - - if (!*p) - break; - - last = p + 1; - } - } - - json_object_object_add(scope, "REQUIRE_SEARCH_PATH", arr); -} - -static void -uc_register_variable(struct json_object *scope, const char *key, struct json_object *val) -{ - char *name = strdup(key); - char *p; - - if (!name) - return; - - for (p = name; *p; p++) - if (!isalnum(*p) && *p != '_') - *p = '_'; - - json_object_object_add(scope, name, val); - free(name); -} - -struct json_object * -uc_run(struct uc_state *state, struct json_object *env, struct json_object *modules) -{ - struct json_object *args, *rv; - struct uc_function fn = {}; - size_t i; - - state->scope = uc_new_scope(state, NULL); - state->ctx = NULL; - - fn.source = state->source; - state->function = &fn; - - if (env) { - json_object_object_foreach(env, key, val) - uc_register_variable(state->scope->scope, key, json_object_get(val)); - } - - uc_globals_init(state, state->scope->scope); - uc_lib_init(state, state->scope->scope); - - if (modules) { - args = xjs_new_array(); - - for (i = 0; i < json_object_array_length(modules); i++) { - json_object_array_put_idx(args, 0, json_object_get(json_object_array_get_idx(modules, i))); - - rv = uc_invoke(state, 0, NULL, - json_object_object_get(state->scope->scope, "require"), - args); - - if (uc_is_type(rv, T_EXCEPTION)) - goto out; - - uc_register_variable(state->scope->scope, - json_object_get_string(json_object_array_get_idx(modules, i)), - rv); - } - - json_object_put(args); - } - - rv = uc_execute_source(state, state->source, state->scope); - -out: - uc_release_scope(state->scope); - - return rv; -} @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -25,11 +25,11 @@ #include <errno.h> #include <endian.h> -#include "ast.h" +#include "vm.h" #include "lib.h" #include "lexer.h" -#include "parser.h" +#define UC_LEX_CONTINUE_PARSING (void *)1 struct keyword { int type; @@ -48,7 +48,7 @@ struct token { char pat[4]; }; int plen; - uint32_t (*parse)(struct uc_state *s); + uc_token *(*parse)(uc_lexer *); }; #define dec(o) \ @@ -58,109 +58,110 @@ struct token { (((x) >= 'a') ? (10 + (x) - 'a') : \ (((x) >= 'A') ? (10 + (x) - 'A') : dec(x))) -static uint32_t parse_comment(struct uc_state *); -static uint32_t parse_string(struct uc_state *); -static uint32_t parse_regexp(struct uc_state *); -static uint32_t parse_number(struct uc_state *); -static uint32_t parse_label(struct uc_state *); +static uc_token *parse_comment(uc_lexer *); +static uc_token *parse_string(uc_lexer *); +static uc_token *parse_regexp(uc_lexer *); +static uc_token *parse_number(uc_lexer *); +static uc_token *parse_label(uc_lexer *); static const struct token tokens[] = { - { T_ASLEFT, { .pat = "<<=" }, 3 }, - { T_ASRIGHT, { .pat = ">>=" }, 3 }, - { T_LEXP, { .pat = "{{-" }, 3 }, - { T_REXP, { .pat = "-}}" }, 3 }, - { T_LSTM, { .pat = "{%+" }, 3 }, - { T_LSTM, { .pat = "{%-" }, 3 }, - { T_RSTM, { .pat = "-%}" }, 3 }, - { T_EQS, { .pat = "===" }, 3 }, - { T_NES, { .pat = "!==" }, 3 }, - { T_ELLIP, { .pat = "..." }, 3 }, - { T_AND, { .pat = "&&" }, 2 }, - { T_ASADD, { .pat = "+=" }, 2 }, - { T_ASBAND, { .pat = "&=" }, 2 }, - { T_ASBOR, { .pat = "|=" }, 2 }, - { T_ASBXOR, { .pat = "^=" }, 2 }, - //{ T_ASDIV, { .pat = "/=" }, 2 }, - { T_ASMOD, { .pat = "%=" }, 2 }, - { T_ASMUL, { .pat = "*=" }, 2 }, - { T_ASSUB, { .pat = "-=" }, 2 }, - { T_DEC, { .pat = "--" }, 2 }, - { T_INC, { .pat = "++" }, 2 }, - { T_EQ, { .pat = "==" }, 2 }, - { T_NE, { .pat = "!=" }, 2 }, - { T_LE, { .pat = "<=" }, 2 }, - { T_GE, { .pat = ">=" }, 2 }, - { T_LSHIFT, { .pat = "<<" }, 2 }, - { T_RSHIFT, { .pat = ">>" }, 2 }, + { TK_ASLEFT, { .pat = "<<=" }, 3 }, + { TK_ASRIGHT, { .pat = ">>=" }, 3 }, + { TK_LEXP, { .pat = "{{-" }, 3 }, + { TK_REXP, { .pat = "-}}" }, 3 }, + { TK_LSTM, { .pat = "{%+" }, 3 }, + { TK_LSTM, { .pat = "{%-" }, 3 }, + { TK_RSTM, { .pat = "-%}" }, 3 }, + { TK_EQS, { .pat = "===" }, 3 }, + { TK_NES, { .pat = "!==" }, 3 }, + { TK_ELLIP, { .pat = "..." }, 3 }, + { TK_AND, { .pat = "&&" }, 2 }, + { TK_ASADD, { .pat = "+=" }, 2 }, + { TK_ASBAND, { .pat = "&=" }, 2 }, + { TK_ASBOR, { .pat = "|=" }, 2 }, + { TK_ASBXOR, { .pat = "^=" }, 2 }, + //{ TK_ASDIV, { .pat = "/=" }, 2 }, + { TK_ASMOD, { .pat = "%=" }, 2 }, + { TK_ASMUL, { .pat = "*=" }, 2 }, + { TK_ASSUB, { .pat = "-=" }, 2 }, + { TK_DEC, { .pat = "--" }, 2 }, + { TK_INC, { .pat = "++" }, 2 }, + { TK_EQ, { .pat = "==" }, 2 }, + { TK_NE, { .pat = "!=" }, 2 }, + { TK_LE, { .pat = "<=" }, 2 }, + { TK_GE, { .pat = ">=" }, 2 }, + { TK_LSHIFT, { .pat = "<<" }, 2 }, + { TK_RSHIFT, { .pat = ">>" }, 2 }, { 0, { .pat = "//" }, 2, parse_comment }, { 0, { .pat = "/*" }, 2, parse_comment }, - { T_OR, { .pat = "||" }, 2 }, - { T_LEXP, { .pat = "{{" }, 2 }, - { T_REXP, { .pat = "}}" }, 2 }, - { T_LSTM, { .pat = "{%" }, 2 }, - { T_RSTM, { .pat = "%}" }, 2 }, - { T_ARROW, { .pat = "=>" }, 2 }, - { T_ADD, { .pat = "+" }, 1 }, - { T_ASSIGN, { .pat = "=" }, 1 }, - { T_BAND, { .pat = "&" }, 1 }, - { T_BOR, { .pat = "|" }, 1 }, - { T_LBRACK, { .pat = "[" }, 1 }, - { T_RBRACK, { .pat = "]" }, 1 }, - { T_BXOR, { .pat = "^" }, 1 }, - { T_LBRACE, { .pat = "{" }, 1 }, - { T_RBRACE, { .pat = "}" }, 1 }, - { T_COLON, { .pat = ":" }, 1 }, - { T_COMMA, { .pat = "," }, 1 }, - { T_COMPL, { .pat = "~" }, 1 }, - //{ T_DIV, { .pat = "/" }, 1 }, - { T_GT, { .pat = ">" }, 1 }, - { T_NOT, { .pat = "!" }, 1 }, - { T_LT, { .pat = "<" }, 1 }, - { T_MOD, { .pat = "%" }, 1 }, - { T_MUL, { .pat = "*" }, 1 }, - { T_LPAREN, { .pat = "(" }, 1 }, - { T_RPAREN, { .pat = ")" }, 1 }, - { T_QMARK, { .pat = "?" }, 1 }, - { T_SCOL, { .pat = ";" }, 1 }, - { T_SUB, { .pat = "-" }, 1 }, - { T_DOT, { .pat = "." }, 1 }, - { T_STRING, { .pat = "'" }, 1, parse_string }, - { T_STRING, { .pat = "\"" }, 1, parse_string }, - { T_REGEXP, { .pat = "/" }, 1, parse_regexp }, - { T_LABEL, { .pat = "_" }, 1, parse_label }, - { T_LABEL, { .pat = "az" }, 0, parse_label }, - { T_LABEL, { .pat = "AZ" }, 0, parse_label }, - { T_NUMBER, { .pat = "09" }, 0, parse_number }, + { TK_OR, { .pat = "||" }, 2 }, + { TK_LEXP, { .pat = "{{" }, 2 }, + { TK_REXP, { .pat = "}}" }, 2 }, + { TK_LSTM, { .pat = "{%" }, 2 }, + { TK_RSTM, { .pat = "%}" }, 2 }, + { TK_ARROW, { .pat = "=>" }, 2 }, + { TK_ADD, { .pat = "+" }, 1 }, + { TK_ASSIGN, { .pat = "=" }, 1 }, + { TK_BAND, { .pat = "&" }, 1 }, + { TK_BOR, { .pat = "|" }, 1 }, + { TK_LBRACK, { .pat = "[" }, 1 }, + { TK_RBRACK, { .pat = "]" }, 1 }, + { TK_BXOR, { .pat = "^" }, 1 }, + { TK_LBRACE, { .pat = "{" }, 1 }, + { TK_RBRACE, { .pat = "}" }, 1 }, + { TK_COLON, { .pat = ":" }, 1 }, + { TK_COMMA, { .pat = "," }, 1 }, + { TK_COMPL, { .pat = "~" }, 1 }, + //{ TK_DIV, { .pat = "/" }, 1 }, + { TK_GT, { .pat = ">" }, 1 }, + { TK_NOT, { .pat = "!" }, 1 }, + { TK_LT, { .pat = "<" }, 1 }, + { TK_MOD, { .pat = "%" }, 1 }, + { TK_MUL, { .pat = "*" }, 1 }, + { TK_LPAREN, { .pat = "(" }, 1 }, + { TK_RPAREN, { .pat = ")" }, 1 }, + { TK_QMARK, { .pat = "?" }, 1 }, + { TK_SCOL, { .pat = ";" }, 1 }, + //{ TK_SUB, { .pat = "-" }, 1 }, + { TK_DOT, { .pat = "." }, 1 }, + { TK_STRING, { .pat = "'" }, 1, parse_string }, + { TK_STRING, { .pat = "\"" }, 1, parse_string }, + { TK_REGEXP, { .pat = "/" }, 1, parse_regexp }, + { TK_LABEL, { .pat = "_" }, 1, parse_label }, + { TK_LABEL, { .pat = "az" }, 0, parse_label }, + { TK_LABEL, { .pat = "AZ" }, 0, parse_label }, + { TK_NUMBER, { .pat = "-" }, 1, parse_number }, + { TK_NUMBER, { .pat = "09" }, 0, parse_number }, }; static const struct keyword reserved_words[] = { - { T_ENDFUNC, "endfunction", 11 }, - { T_DOUBLE, "Infinity", 8, { .d = INFINITY } }, - { T_CONTINUE, "continue", 8 }, - { T_ENDWHILE, "endwhile", 8 }, - { T_FUNC, "function", 8 }, - { T_DEFAULT, "default", 7 }, - { T_RETURN, "return", 6 }, - { T_ENDFOR, "endfor", 6 }, - { T_SWITCH, "switch", 6 }, - { T_LOCAL, "local", 5 }, - { T_ENDIF, "endif", 5 }, - { T_WHILE, "while", 5 }, - { T_BREAK, "break", 5 }, - { T_CATCH, "catch", 5 }, - { T_BOOL, "false", 5, { .b = false } }, - { T_BOOL, "true", 4, { .b = true } }, - { T_ELIF, "elif", 4 }, - { T_ELSE, "else", 4 }, - { T_THIS, "this", 4 }, - { T_NULL, "null", 4 }, - { T_CASE, "case", 4 }, - { T_DOUBLE, "NaN", 3, { .d = NAN } }, - { T_TRY, "try", 3 }, - { T_FOR, "for", 3 }, - { T_LOCAL, "let", 3 }, - { T_IF, "if", 2 }, - { T_IN, "in", 2 }, + { TK_ENDFUNC, "endfunction", 11 }, + { TK_DOUBLE, "Infinity", 8, { .d = INFINITY } }, + { TK_CONTINUE, "continue", 8 }, + { TK_ENDWHILE, "endwhile", 8 }, + { TK_FUNC, "function", 8 }, + { TK_DEFAULT, "default", 7 }, + { TK_RETURN, "return", 6 }, + { TK_ENDFOR, "endfor", 6 }, + { TK_SWITCH, "switch", 6 }, + { TK_LOCAL, "local", 5 }, + { TK_ENDIF, "endif", 5 }, + { TK_WHILE, "while", 5 }, + { TK_BREAK, "break", 5 }, + { TK_CATCH, "catch", 5 }, + { TK_BOOL, "false", 5, { .b = false } }, + { TK_BOOL, "true", 4, { .b = true } }, + { TK_ELIF, "elif", 4 }, + { TK_ELSE, "else", 4 }, + { TK_THIS, "this", 4 }, + { TK_NULL, "null", 4 }, + { TK_CASE, "case", 4 }, + { TK_DOUBLE, "NaN", 3, { .d = NAN } }, + { TK_TRY, "try", 3 }, + { TK_FOR, "for", 3 }, + { TK_LOCAL, "let", 3 }, + { TK_IF, "if", 2 }, + { TK_IN, "in", 2 }, }; @@ -221,125 +222,216 @@ utf8enc(char **out, int *rem, int code) /* length of the longest token in our lookup table */ #define UT_LEX_MAX_TOKEN_LEN 3 -static uint32_t emit_op(struct uc_state *state, uint32_t pos, int type, struct json_object *val) +static uc_token * +emit_op(uc_lexer *lex, uint32_t pos, int type, struct json_object *val) { - uint32_t off = uc_new_op(state, type, val, UINT32_MAX); - - OP(off)->off = pos; + lex->curr.type = type; + lex->curr.val = val; + lex->curr.pos = pos; /* Follow JSLint logic and treat a slash after any of the * `(,=:[!&|?{};` characters as the beginning of a regex * literal... */ switch (type) { - case T_LPAREN: - case T_COMMA: - - case T_ASADD: - case T_ASBAND: - case T_ASBOR: - case T_ASBXOR: - case T_ASDIV: - case T_ASLEFT: - case T_ASMOD: - case T_ASMUL: - case T_ASRIGHT: - case T_ASSIGN: - case T_ASSUB: - case T_EQ: - case T_EQS: - case T_GE: - case T_LE: - case T_NE: - case T_NES: - - case T_COLON: - case T_LBRACK: - case T_NOT: - - case T_AND: - case T_BAND: - - case T_OR: - case T_BOR: - - case T_QMARK: - - case T_LBRACE: - case T_RBRACE: - - case T_LSTM: - case T_LEXP: - - case T_SCOL: - state->lex.expect_div = false; + case TK_LPAREN: + case TK_COMMA: + + case TK_ASADD: + case TK_ASBAND: + case TK_ASBOR: + case TK_ASBXOR: + case TK_ASDIV: + case TK_ASLEFT: + case TK_ASMOD: + case TK_ASMUL: + case TK_ASRIGHT: + case TK_ASSIGN: + case TK_ASSUB: + case TK_EQ: + case TK_EQS: + case TK_GE: + case TK_LE: + case TK_NE: + case TK_NES: + + case TK_COLON: + case TK_LBRACK: + case TK_NOT: + + case TK_AND: + case TK_BAND: + + case TK_OR: + case TK_BOR: + + case TK_QMARK: + + case TK_LBRACE: + case TK_RBRACE: + + case TK_LSTM: + case TK_LEXP: + + case TK_SCOL: + lex->expect_div = false; break; default: - state->lex.expect_div = true; + lex->expect_div = true; } - return off; + return &lex->curr; } -static void lookbehind_append(struct uc_state *s, const char *data, size_t len) +static void lookbehind_append(uc_lexer *lex, const char *data, size_t len) { if (len) { - s->lex.lookbehind = xrealloc(s->lex.lookbehind, s->lex.lookbehindlen + len); - memcpy(s->lex.lookbehind + s->lex.lookbehindlen, data, len); - s->lex.lookbehindlen += len; + lex->lookbehind = xrealloc(lex->lookbehind, lex->lookbehindlen + len); + memcpy(lex->lookbehind + lex->lookbehindlen, data, len); + lex->lookbehindlen += len; } } -static void lookbehind_reset(struct uc_state *s) { - free(s->lex.lookbehind); - s->lex.lookbehind = NULL; - s->lex.lookbehindlen = 0; +static void lookbehind_reset(uc_lexer *lex) { + free(lex->lookbehind); + lex->lookbehind = NULL; + lex->lookbehindlen = 0; } -static uint32_t lookbehind_to_text(struct uc_state *s, uint32_t pos, int type, const char *strip_trailing_chars) { - uint32_t rv = 0; +static uc_token * +lookbehind_to_text(uc_lexer *lex, uint32_t pos, int type, const char *strip_trailing_chars) { + uc_token *rv = NULL; - if (s->lex.lookbehind) { + if (lex->lookbehind) { if (strip_trailing_chars) { - while (s->lex.lookbehindlen > 0 && strchr(strip_trailing_chars, s->lex.lookbehind[s->lex.lookbehindlen-1])) - s->lex.lookbehindlen--; + while (lex->lookbehindlen > 0 && strchr(strip_trailing_chars, lex->lookbehind[lex->lookbehindlen-1])) + lex->lookbehindlen--; } - rv = emit_op(s, pos, type, xjs_new_string_len(s->lex.lookbehind, s->lex.lookbehindlen)); + rv = emit_op(lex, pos, type, xjs_new_string_len(lex->lookbehind, lex->lookbehindlen)); - lookbehind_reset(s); + lookbehind_reset(lex); } return rv; } -static inline size_t buf_remaining(struct uc_state *s) { - return (s->lex.bufend - s->lex.bufstart); +static inline size_t +buf_remaining(uc_lexer *lex) { + return (lex->bufend - lex->bufstart); } -static inline bool _buf_startswith(struct uc_state *s, const char *str, size_t len) { - return (buf_remaining(s) >= len && !strncmp(s->lex.bufstart, str, len)); +static inline bool +_buf_startswith(uc_lexer *lex, const char *str, size_t len) { + return (buf_remaining(lex) >= len && !strncmp(lex->bufstart, str, len)); } #define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1) -static void buf_consume(struct uc_state *s, ssize_t len) { - s->lex.bufstart += len; - s->source->off += len; +#if 0 +static void add_lineinfo(struct uc_state *s, size_t off) +{ + uc_lineinfo *lines = &s->source->lineinfo; + size_t linelen; + + linelen = off - s->lex.lastlineoff; + + /* lineinfo is encoded in bytes: the most significant bit specifies whether + * to advance the line count by one or not, while the remaining 7 bits encode + * the amounts of bytes on the current line. + * + * If a line has more than 127 characters, the first byte will be set to + * 0xff (1 1111111) and subsequent bytes will encode the remaining characters + * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus + * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). + * + * The newline character itself is not counted, so an empty line is encoded as + * 0x80 (1:0000000). + */ + uc_vector_grow(lines); + lines->entries[lines->count++] = 0x80 + (linelen & 0x7f); + linelen -= (linelen & 0x7f); + + while (linelen > 0) { + uc_vector_grow(lines); + lines->entries[lines->count++] = (linelen & 0x7f); + linelen -= (linelen & 0x7f); + } + + s->lex.lastlineoff = off + 1; + s->lex.line++; } +#endif -static uint32_t -parse_comment(struct uc_state *s) +static void +next_lineinfo(uc_lexer *lex) { - const struct token *tok = s->lex.tok; - const char *ptr, *end; - size_t elen; + uc_lineinfo *lines = &lex->source->lineinfo; - if (!buf_remaining(s)) { - uc_new_exception(s, s->lex.lastoff, "Syntax error: Unterminated comment"); + uc_vector_grow(lines); + lines->entries[lines->count++] = 0x80; +} + +static void +update_lineinfo(uc_lexer *lex, size_t off) +{ + uc_lineinfo *lines = &lex->source->lineinfo; + uint8_t *entry; - return 0; + entry = uc_vector_last(lines); + + if ((entry[0] & 0x7f) + off <= 0x7f) { + entry[0] += off; } + else { + off -= (0x7f - (entry[0] & 0x7f)); + entry[0] |= 0x7f; + + while (off > 0) { + uc_vector_grow(lines); + entry = uc_vector_last(lines); + entry[1] = (off & 0x7f); + off -= (off & 0x7f); + lines->count++; + } + } +} + +static void +buf_consume(uc_lexer *lex, size_t len) { + size_t i, linelen; + + if (!lex->source->lineinfo.count) + next_lineinfo(lex); + + for (i = 0, linelen = 0; i < len; i++) { + if (lex->bufstart[i] == '\n') { + update_lineinfo(lex, linelen); + next_lineinfo(lex); + + linelen = 0; + } + else { + linelen++; + } + } + + if (linelen) + update_lineinfo(lex, linelen); + + lex->bufstart += len; + lex->source->off += len; +} + +static uc_token * +parse_comment(uc_lexer *lex) +{ + const struct token *tok = lex->tok; + const char *ptr, *end; + size_t elen; + + if (!buf_remaining(lex)) + return emit_op(lex, lex->lastoff, TK_ERROR, xjs_new_string("Unterminated comment")); if (!strcmp(tok->pat, "//")) { end = "\n"; @@ -350,20 +442,21 @@ parse_comment(struct uc_state *s) elen = 2; } - for (ptr = s->lex.bufstart; ptr < s->lex.bufend - elen; ptr++) { + for (ptr = lex->bufstart; ptr < lex->bufend - elen; ptr++) { if (!strncmp(ptr, end, elen)) { - buf_consume(s, (ptr - s->lex.bufstart) + elen); + buf_consume(lex, (ptr - lex->bufstart) + elen); - return UINT32_MAX; + return UC_LEX_CONTINUE_PARSING; } } - buf_consume(s, ptr - s->lex.bufstart); + buf_consume(lex, ptr - lex->bufstart); - return 0; + return NULL; } -static void append_utf8(struct uc_state *s, int code) { +static void +append_utf8(uc_lexer *lex, int code) { char ustr[8], *up; int rem; @@ -371,38 +464,35 @@ static void append_utf8(struct uc_state *s, int code) { rem = sizeof(ustr); if (utf8enc(&up, &rem, code)) - lookbehind_append(s, ustr, up - ustr); + lookbehind_append(lex, ustr, up - ustr); } -static uint32_t -parse_string(struct uc_state *s) +static uc_token * +parse_string(uc_lexer *lex) { - const struct token *tok = s->lex.tok; + const struct token *tok = lex->tok; char q = tok->pat[0]; char *ptr, *c; - uint32_t rv; + uc_token *rv; int code; - if (!buf_remaining(s)) { - uc_new_exception(s, s->lex.lastoff, "Syntax error: Unterminated string"); - - return 0; - } + if (!buf_remaining(lex)) + return emit_op(lex, lex->lastoff, TK_ERROR, xjs_new_string("Unterminated string")); - for (ptr = s->lex.bufstart; ptr < s->lex.bufend; ptr++) { + for (ptr = lex->bufstart; ptr < lex->bufend; ptr++) { /* continuation of escape sequence */ - if (s->lex.is_escape) { - if (s->lex.esclen == 0) { + if (lex->is_escape) { + if (lex->esclen == 0) { /* non-unicode escape following a lead surrogate, emit replacement... */ - if (s->lex.lead_surrogate && *ptr != 'u') { - append_utf8(s, 0xFFFD); - s->lex.lead_surrogate = 0; + if (lex->lead_surrogate && *ptr != 'u') { + append_utf8(lex, 0xFFFD); + lex->lead_surrogate = 0; } switch ((q == '/') ? 0 : *ptr) { case 'u': case 'x': - s->lex.esc[s->lex.esclen++] = *ptr; + lex->esc[lex->esclen++] = *ptr; break; case '0': @@ -413,65 +503,62 @@ parse_string(struct uc_state *s) case '5': case '6': case '7': - s->lex.esc[s->lex.esclen++] = 'o'; - s->lex.esc[s->lex.esclen++] = *ptr; + lex->esc[lex->esclen++] = 'o'; + lex->esc[lex->esclen++] = *ptr; break; default: - s->lex.is_escape = false; + lex->is_escape = false; c = strchr("a\ab\be\ef\fn\nr\rt\tv\v", *ptr); if (c && *c >= 'a') { - lookbehind_append(s, c + 1, 1); + lookbehind_append(lex, c + 1, 1); } else { /* regex mode => retain backslash */ if (q == '/') - lookbehind_append(s, "\\", 1); + lookbehind_append(lex, "\\", 1); - lookbehind_append(s, ptr, 1); + lookbehind_append(lex, ptr, 1); } - buf_consume(s, (ptr + 1) - s->lex.bufstart); + buf_consume(lex, (ptr + 1) - lex->bufstart); break; } } else { - switch (s->lex.esc[0]) { + switch (lex->esc[0]) { case 'u': - if (s->lex.esclen < 5) { - if (!isxdigit(*ptr)) { - uc_new_exception(s, s->source->off + s->lex.esclen + 1, "Syntax error: Invalid escape sequence"); + if (lex->esclen < 5) { + if (!isxdigit(*ptr)) + return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, xjs_new_string("Invalid escape sequence")); - return 0; - } - - s->lex.esc[s->lex.esclen++] = *ptr; + lex->esc[lex->esclen++] = *ptr; } - if (s->lex.esclen == 5) { - code = hex(s->lex.esc[1]) * 16 * 16 * 16 + - hex(s->lex.esc[2]) * 16 * 16 + - hex(s->lex.esc[3]) * 16 + - hex(s->lex.esc[4]); + if (lex->esclen == 5) { + code = hex(lex->esc[1]) * 16 * 16 * 16 + + hex(lex->esc[2]) * 16 * 16 + + hex(lex->esc[3]) * 16 + + hex(lex->esc[4]); /* is a leading surrogate value */ if ((code & 0xFC00) == 0xD800) { /* found a subsequent leading surrogate, ignore and emit replacement char for previous one */ - if (s->lex.lead_surrogate) - append_utf8(s, 0xFFFD); + if (lex->lead_surrogate) + append_utf8(lex, 0xFFFD); /* store surrogate value and advance to next escape sequence */ - s->lex.lead_surrogate = code; + lex->lead_surrogate = code; } /* is a trailing surrogate value */ else if ((code & 0xFC00) == 0xDC00) { /* found a trailing surrogate following a leading one, combine and encode */ - if (s->lex.lead_surrogate) { - code = 0x10000 + ((s->lex.lead_surrogate & 0x3FF) << 10) + (code & 0x3FF); - s->lex.lead_surrogate = 0; + if (lex->lead_surrogate) { + code = 0x10000 + ((lex->lead_surrogate & 0x3FF) << 10) + (code & 0x3FF); + lex->lead_surrogate = 0; } /* trailing surrogate not following a leading one, ignore and use replacement char */ @@ -479,87 +566,81 @@ parse_string(struct uc_state *s) code = 0xFFFD; } - append_utf8(s, code); + append_utf8(lex, code); } /* is a normal codepoint */ else { - append_utf8(s, code); + append_utf8(lex, code); } - s->lex.esclen = 0; - s->lex.is_escape = false; - buf_consume(s, (ptr + 1) - s->lex.bufstart); + lex->esclen = 0; + lex->is_escape = false; + buf_consume(lex, (ptr + 1) - lex->bufstart); } break; case 'x': - if (s->lex.esclen < 3) { - if (!isxdigit(*ptr)) { - uc_new_exception(s, s->source->off + s->lex.esclen + 1, "Syntax error: Invalid escape sequence"); - - return 0; - } + if (lex->esclen < 3) { + if (!isxdigit(*ptr)) + return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, xjs_new_string("Invalid escape sequence")); - s->lex.esc[s->lex.esclen++] = *ptr; + lex->esc[lex->esclen++] = *ptr; } - if (s->lex.esclen == 3) { - append_utf8(s, hex(s->lex.esc[1]) * 16 + hex(s->lex.esc[2])); + if (lex->esclen == 3) { + append_utf8(lex, hex(lex->esc[1]) * 16 + hex(lex->esc[2])); - s->lex.esclen = 0; - s->lex.is_escape = false; - buf_consume(s, (ptr + 1) - s->lex.bufstart); + lex->esclen = 0; + lex->is_escape = false; + buf_consume(lex, (ptr + 1) - lex->bufstart); } break; case 'o': - if (s->lex.esclen < 4) { + if (lex->esclen < 4) { /* found a non-octal char */ if (*ptr < '0' || *ptr > '7') { /* pad sequence to three chars */ - switch (s->lex.esclen) { + switch (lex->esclen) { case 3: - s->lex.esc[3] = s->lex.esc[2]; - s->lex.esc[2] = s->lex.esc[1]; - s->lex.esc[1] = '0'; + lex->esc[3] = lex->esc[2]; + lex->esc[2] = lex->esc[1]; + lex->esc[1] = '0'; break; case 2: - s->lex.esc[3] = s->lex.esc[1]; - s->lex.esc[2] = '0'; - s->lex.esc[1] = '0'; + lex->esc[3] = lex->esc[1]; + lex->esc[2] = '0'; + lex->esc[1] = '0'; break; } - s->lex.esclen = 4; - buf_consume(s, ptr-- - s->lex.bufstart); + lex->esclen = 4; + buf_consume(lex, ptr-- - lex->bufstart); } /* append */ else { - s->lex.esc[s->lex.esclen++] = *ptr; - buf_consume(s, (ptr + 1) - s->lex.bufstart); + lex->esc[lex->esclen++] = *ptr; + buf_consume(lex, (ptr + 1) - lex->bufstart); } } - if (s->lex.esclen == 4) { - code = dec(s->lex.esc[1]) * 8 * 8 + - dec(s->lex.esc[2]) * 8 + - dec(s->lex.esc[3]); + if (lex->esclen == 4) { + code = dec(lex->esc[1]) * 8 * 8 + + dec(lex->esc[2]) * 8 + + dec(lex->esc[3]); - if (code > 255) { - uc_new_exception(s, s->source->off + s->lex.esclen + 1, "Syntax error: Invalid escape sequence"); + if (code > 255) + return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, xjs_new_string("Invalid escape sequence")); - return 0; - } - - append_utf8(s, code); + append_utf8(lex, code); - s->lex.esclen = 0; - s->lex.is_escape = false; + lex->esclen = 0; + lex->is_escape = false; } break; @@ -569,29 +650,29 @@ parse_string(struct uc_state *s) /* terminating char */ else if (*ptr == q) { - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, (ptr + 1) - s->lex.bufstart); + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, (ptr + 1) - lex->bufstart); - rv = lookbehind_to_text(s, s->lex.lastoff, T_STRING, NULL); + rv = lookbehind_to_text(lex, lex->lastoff, TK_STRING, NULL); if (!rv) - rv = emit_op(s, s->lex.lastoff, T_STRING, xjs_new_string_len("", 0)); + rv = emit_op(lex, lex->lastoff, TK_STRING, xjs_new_string_len("", 0)); return rv; } /* escape sequence start */ else if (*ptr == '\\') { - s->lex.is_escape = true; - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, ptr - s->lex.bufstart); + lex->is_escape = true; + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, ptr - lex->bufstart); } } - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, ptr - s->lex.bufstart); + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, ptr - lex->bufstart); - return 0; + return NULL; } @@ -614,90 +695,85 @@ enum { UT_LEX_PARSE_REGEX_FLAGS }; -static uint32_t -parse_regexp(struct uc_state *state) +static uc_token * +parse_regexp(uc_lexer *lex) { - struct json_object *pattern; - struct uc_op *op; - uint32_t rv; - char *err; + bool is_reg_global = false, is_reg_icase = false, is_reg_newline = false; + uc_token *rv; + size_t len; + char *s; - switch (state->lex.esc[0]) { + switch (lex->esc[0]) { case UT_LEX_PARSE_REGEX_INIT: - if (state->lex.expect_div) { - state->lex.expect_div = false; + if (lex->expect_div) { + lex->expect_div = false; - if (buf_startswith(state, "=")) { - buf_consume(state, 1); + if (buf_startswith(lex, "=")) { + buf_consume(lex, 1); - return emit_op(state, state->source->off, T_ASDIV, NULL); + return emit_op(lex, lex->source->off, TK_ASDIV, NULL); } - return emit_op(state, state->source->off, T_DIV, NULL); + return emit_op(lex, lex->source->off, TK_DIV, NULL); } - state->lex.esc[0] = UT_LEX_PARSE_REGEX_PATTERN; + lex->esc[0] = UT_LEX_PARSE_REGEX_PATTERN; break; case UT_LEX_PARSE_REGEX_PATTERN: - rv = parse_string(state); + rv = parse_string(lex); - if (rv != 0 && rv != UINT32_MAX) { - state->lex.lookbehind = (char *)OP(rv); - state->lex.esc[0] = UT_LEX_PARSE_REGEX_FLAGS; + if (rv && rv->type == TK_ERROR) + return rv; + + if (rv != NULL && rv != UC_LEX_CONTINUE_PARSING) { + lex->lookbehind = (char *)rv; + lex->esc[0] = UT_LEX_PARSE_REGEX_FLAGS; } break; case UT_LEX_PARSE_REGEX_FLAGS: - op = (struct uc_op *)state->lex.lookbehind; + rv = (uc_token *)lex->lookbehind; - while (state->lex.bufstart < state->lex.bufend) { - switch (state->lex.bufstart[0]) { + while (lex->bufstart < lex->bufend) { + switch (lex->bufstart[0]) { case 'g': - buf_consume(state, 1); - op->is_reg_global = true; + buf_consume(lex, 1); + is_reg_global = true; break; case 'i': - buf_consume(state, 1); - op->is_reg_icase = true; + buf_consume(lex, 1); + is_reg_icase = true; break; case 's': - buf_consume(state, 1); - op->is_reg_newline = true; + buf_consume(lex, 1); + is_reg_newline = true; break; default: - state->lex.lookbehind = NULL; - - pattern = uc_new_regexp(json_object_get_string(op->val), - op->is_reg_icase, - op->is_reg_newline, - op->is_reg_global, - &err); + lex->lookbehind = NULL; - json_object_put(op->val); + len = xasprintf(&s, "%c%*s", + (is_reg_global << 0) | (is_reg_icase << 1) | (is_reg_newline << 2), + json_object_get_string_len(rv->val), + json_object_get_string(rv->val)); - op->type = T_REGEXP; - op->val = pattern; + json_object_set_string_len(rv->val, s, len); + free(s); - if (!pattern) { - uc_new_exception(state, op->off, "Syntax error: %s", err); - free(err); + rv->type = TK_REGEXP; - return 0; - } - - return op - state->pool; + return rv; } } break; } - return 0; + return NULL; } @@ -711,50 +787,50 @@ parse_regexp(struct uc_state *state) * -UT_ERROR_OVERLONG_STRING Label too long */ -static uint32_t -parse_label(struct uc_state *s) +static uc_token * +parse_label(uc_lexer *lex) { - const struct token *tok = s->lex.tok; + const struct token *tok = lex->tok; const struct keyword *word; - uint32_t rv; + uc_token *rv; char *ptr; size_t i; - if (!s->lex.lookbehind && tok->plen) - lookbehind_append(s, tok->pat, tok->plen); + if (!lex->lookbehind && tok->plen) + lookbehind_append(lex, tok->pat, tok->plen); - if (!buf_remaining(s) || (s->lex.bufstart[0] != '_' && !isalnum(s->lex.bufstart[0]))) { + if (!buf_remaining(lex) || (lex->bufstart[0] != '_' && !isalnum(lex->bufstart[0]))) { for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { - if (s->lex.lookbehindlen == word->plen && !strncmp(s->lex.lookbehind, word->pat, word->plen)) { - lookbehind_reset(s); + if (lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) { + lookbehind_reset(lex); switch (word->type) { - case T_DOUBLE: - rv = emit_op(s, s->source->off - word->plen, word->type, uc_new_double(word->d)); + case TK_DOUBLE: + rv = emit_op(lex, lex->source->off - word->plen, word->type, uc_double_new(word->d)); break; - case T_BOOL: - rv = emit_op(s, s->source->off - word->plen, word->type, xjs_new_boolean(word->b)); + case TK_BOOL: + rv = emit_op(lex, lex->source->off - word->plen, word->type, xjs_new_boolean(word->b)); break; default: - rv = emit_op(s, s->source->off - word->plen, word->type, NULL); + rv = emit_op(lex, lex->source->off - word->plen, word->type, NULL); } return rv; } } - return lookbehind_to_text(s, s->source->off - s->lex.lookbehindlen, T_LABEL, NULL); + return lookbehind_to_text(lex, lex->source->off - lex->lookbehindlen, TK_LABEL, NULL); } - for (ptr = s->lex.bufstart; ptr < s->lex.bufend && (*ptr == '_' || isalnum(*ptr)); ptr++) + for (ptr = lex->bufstart; ptr < lex->bufend && (*ptr == '_' || isalnum(*ptr)); ptr++) ; - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, ptr - s->lex.bufstart); + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, ptr - lex->bufstart); - return 0; + return NULL; } @@ -769,198 +845,206 @@ parse_label(struct uc_state *s) */ static inline bool -is_numeric_char(struct uc_state *s, char c) +is_numeric_char(uc_lexer *lex, char c) { - char prev = s->lex.lookbehindlen ? s->lex.lookbehind[s->lex.lookbehindlen-1] : 0; + char prev = lex->lookbehindlen ? lex->lookbehind[lex->lookbehindlen-1] : 0; if ((prev == 'e' || prev == 'E') && (c == '-' || c == '+')) return true; - return (isxdigit(c) || c == 'x' || c == 'X' || c == '.'); + return prev ? (isxdigit(c) || c == 'x' || c == 'X' || c == '.') : (isdigit(c) || c == '.'); } -static uint32_t -parse_number(struct uc_state *state) +static uc_token * +parse_number(uc_lexer *lex) { - uint32_t rv = 0; + const struct token *tok = lex->tok; + uc_token *rv = NULL; long long int n; char *ptr, *e; double d; - if (!buf_remaining(state) || !is_numeric_char(state, state->lex.bufstart[0])) { - lookbehind_append(state, "\0", 1); + if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) { + if (lex->lookbehindlen == 0 && !is_numeric_char(lex, lex->bufstart[0])) + return emit_op(lex, lex->source->off, TK_SUB, NULL); - n = strtoll(state->lex.lookbehind, &e, 0); + lookbehind_append(lex, "\0", 1); + + n = strtoll(lex->lookbehind, &e, 0); if (*e == '.' || *e == 'e' || *e == 'E') { - d = strtod(state->lex.lookbehind, &e); + d = strtod(lex->lookbehind, &e); + + if (tok->pat[0] == '-') + d = -d; - if (e > state->lex.lookbehind && *e == 0) - rv = emit_op(state, state->source->off - (e - state->lex.lookbehind), T_DOUBLE, uc_new_double(d)); + if (e > lex->lookbehind && *e == 0) + rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, uc_double_new(d)); else - uc_new_exception(state, state->source->off - (state->lex.lookbehindlen - (e - state->lex.lookbehind) - 1), - "Syntax error: Invalid number literal"); + rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, xjs_new_string("Invalid number literal")); } else if (*e == 0) { - rv = emit_op(state, state->source->off - (e - state->lex.lookbehind), T_NUMBER, xjs_new_int64(n)); - OP(rv)->is_overflow = (errno == ERANGE); + if (tok->pat[0] == '-') + n = (errno == ERANGE) ? INT64_MIN : -n; + + rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, xjs_new_int64(n)); + //OP(rv)->is_overflow = (errno == ERANGE); } else { - uc_new_exception(state, state->source->off - (state->lex.lookbehindlen - (e - state->lex.lookbehind) - 1), - "Syntax error: Invalid number literal"); + rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, xjs_new_string("Invalid number literal")); } - lookbehind_reset(state); + lookbehind_reset(lex); return rv; } - for (ptr = state->lex.bufstart; ptr < state->lex.bufend && is_numeric_char(state, *ptr); ptr++) + for (ptr = lex->bufstart; ptr < lex->bufend && is_numeric_char(lex, *ptr); ptr++) ; - lookbehind_append(state, state->lex.bufstart, ptr - state->lex.bufstart); - buf_consume(state, ptr - state->lex.bufstart); + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, ptr - lex->bufstart); - return 0; + return NULL; } -static uint32_t -lex_step(struct uc_state *s, FILE *fp) +static uc_token * +lex_step(uc_lexer *lex, FILE *fp) { uint32_t masks[] = { 0, le32toh(0x000000ff), le32toh(0x0000ffff), le32toh(0x00ffffff), le32toh(0xffffffff) }; union { uint32_t n; char str[4]; } search; const struct token *tok; size_t rlen, rem; char *ptr, c; - uint32_t rv; + uc_token *rv; size_t i; /* only less than UT_LEX_MAX_TOKEN_LEN unreach buffer chars remaining, * move the remaining bytes to the beginning and read more data */ - if (buf_remaining(s) < UT_LEX_MAX_TOKEN_LEN) { - if (!s->lex.buf) { - s->lex.buflen = 128; - s->lex.buf = xalloc(s->lex.buflen); + if (buf_remaining(lex) < UT_LEX_MAX_TOKEN_LEN) { + if (!lex->buf) { + lex->buflen = 128; + lex->buf = xalloc(lex->buflen); } - rem = s->lex.bufend - s->lex.bufstart; + rem = lex->bufend - lex->bufstart; - memcpy(s->lex.buf, s->lex.bufstart, rem); + memcpy(lex->buf, lex->bufstart, rem); - rlen = fread(s->lex.buf + rem, 1, s->lex.buflen - rem, fp); + rlen = fread(lex->buf + rem, 1, lex->buflen - rem, fp); - s->lex.bufstart = s->lex.buf; - s->lex.bufend = s->lex.buf + rlen + rem; + lex->bufstart = lex->buf; + lex->bufend = lex->buf + rlen + rem; if (rlen == 0 && (ferror(fp) || feof(fp))) - s->lex.eof = 1; + lex->eof = 1; } - switch (s->lex.state) { + switch (lex->state) { case UT_LEX_IDENTIFY_BLOCK: /* previous block had strip trailing whitespace flag, skip leading whitespace */ - if (s->lex.skip_leading_whitespace) { - while (buf_remaining(s) && isspace(s->lex.bufstart[0])) - buf_consume(s, 1); + if (lex->skip_leading_whitespace) { + while (buf_remaining(lex) && isspace(lex->bufstart[0])) + buf_consume(lex, 1); - s->lex.skip_leading_whitespace = false; + lex->skip_leading_whitespace = false; } /* previous block was a statement block and trim_blocks is enabld, skip leading newline */ - else if (s->lex.skip_leading_newline) { - if (buf_startswith(s, "\n")) - buf_consume(s, 1); + else if (lex->skip_leading_newline) { + if (buf_startswith(lex, "\n")) + buf_consume(lex, 1); - s->lex.skip_leading_newline = false; + lex->skip_leading_newline = false; } /* scan forward through buffer to identify start token */ - for (ptr = s->lex.bufstart; ptr < s->lex.bufend - strlen("{#"); ptr++) { + for (ptr = lex->bufstart; ptr < lex->bufend - strlen("{#"); ptr++) { /* found start of comment block */ if (!strncmp(ptr, "{#", 2)) { - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, (ptr + 2) - s->lex.bufstart); - s->lex.lastoff = s->source->off - 2; - s->lex.state = UT_LEX_BLOCK_COMMENT_START; + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, (ptr + 2) - lex->bufstart); + lex->lastoff = lex->source->off - 2; + lex->state = UT_LEX_BLOCK_COMMENT_START; - return 0; + return NULL; } /* found start of expression block */ else if (!strncmp(ptr, "{{", 2)) { - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, (ptr + 2) - s->lex.bufstart); - s->lex.lastoff = s->source->off - 2; - s->lex.state = UT_LEX_BLOCK_EXPRESSION_START; + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, (ptr + 2) - lex->bufstart); + lex->lastoff = lex->source->off - 2; + lex->state = UT_LEX_BLOCK_EXPRESSION_START; - return 0; + return NULL; } /* found start of statement block */ else if (!strncmp(ptr, "{%", 2)) { - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, (ptr + 2) - s->lex.bufstart); - s->lex.lastoff = s->source->off - 2; - s->lex.state = UT_LEX_BLOCK_STATEMENT_START; + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, (ptr + 2) - lex->bufstart); + lex->lastoff = lex->source->off - 2; + lex->state = UT_LEX_BLOCK_STATEMENT_START; - return 0; + return NULL; } } /* we're at eof */ - if (s->lex.eof) { - lookbehind_append(s, ptr, s->lex.bufend - ptr); - s->lex.state = UT_LEX_EOF; + if (lex->eof) { + lookbehind_append(lex, ptr, lex->bufend - ptr); + lex->state = UT_LEX_EOF; - return lookbehind_to_text(s, s->lex.lastoff, T_TEXT, NULL); + return lookbehind_to_text(lex, lex->lastoff, TK_TEXT, NULL); } - lookbehind_append(s, s->lex.bufstart, ptr - s->lex.bufstart); - buf_consume(s, ptr - s->lex.bufstart); + lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); + buf_consume(lex, ptr - lex->bufstart); break; case UT_LEX_BLOCK_COMMENT_START: case UT_LEX_BLOCK_EXPRESSION_START: case UT_LEX_BLOCK_STATEMENT_START: - rv = 0; - s->lex.skip_leading_whitespace = 0; + rv = NULL; + lex->skip_leading_whitespace = 0; /* strip whitespace before block */ - if (buf_startswith(s, "-")) { - rv = lookbehind_to_text(s, s->source->off, T_TEXT, " \n\t\v\f\r"); - buf_consume(s, 1); + if (buf_startswith(lex, "-")) { + rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, " \n\t\v\f\r"); + buf_consume(lex, 1); } /* disable lstrip flag (only valid for statement blocks) */ - else if (s->lex.state == UT_LEX_BLOCK_STATEMENT_START) { + else if (lex->state == UT_LEX_BLOCK_STATEMENT_START) { /* disable lstrip flag */ - if (buf_startswith(s, "+")) { - rv = lookbehind_to_text(s, s->source->off, T_TEXT, NULL); - buf_consume(s, 1); + if (buf_startswith(lex, "+")) { + rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, NULL); + buf_consume(lex, 1); } /* global block lstrip */ - else if (s->lstrip_blocks) { - rv = lookbehind_to_text(s, s->source->off, T_TEXT, " \t\v\f\r"); + else if (lex->config && lex->config->lstrip_blocks) { + rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, " \t\v\f\r"); } } else { - rv = lookbehind_to_text(s, s->source->off, T_TEXT, NULL); + rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, NULL); } - switch (s->lex.state) { + switch (lex->state) { case UT_LEX_BLOCK_COMMENT_START: - s->lex.state = UT_LEX_BLOCK_COMMENT; + lex->state = UT_LEX_BLOCK_COMMENT; break; case UT_LEX_BLOCK_STATEMENT_START: - s->lex.within_statement_block = 1; - s->lex.state = UT_LEX_IDENTIFY_TOKEN; + lex->within_statement_block = 1; + lex->state = UT_LEX_IDENTIFY_TOKEN; break; case UT_LEX_BLOCK_EXPRESSION_START: - s->lex.state = UT_LEX_BLOCK_EXPRESSION_EMIT_TAG; + lex->state = UT_LEX_BLOCK_EXPRESSION_EMIT_TAG; break; default: @@ -972,152 +1056,151 @@ lex_step(struct uc_state *s, FILE *fp) case UT_LEX_BLOCK_COMMENT: /* scan forward through buffer to identify end token */ - while (s->lex.bufstart < s->lex.bufend - 2) { - if (buf_startswith(s, "-#}")) { - s->lex.state = UT_LEX_IDENTIFY_BLOCK; - s->lex.skip_leading_whitespace = 1; - buf_consume(s, 3); - s->lex.lastoff = s->source->off; + while (lex->bufstart < lex->bufend - 2) { + if (buf_startswith(lex, "-#}")) { + lex->state = UT_LEX_IDENTIFY_BLOCK; + lex->skip_leading_whitespace = 1; + buf_consume(lex, 3); + lex->lastoff = lex->source->off; break; } - else if (buf_startswith(s, "#}")) { - s->lex.state = UT_LEX_IDENTIFY_BLOCK; - s->lex.skip_leading_whitespace = 0; - buf_consume(s, 2); - s->lex.lastoff = s->source->off; + else if (buf_startswith(lex, "#}")) { + lex->state = UT_LEX_IDENTIFY_BLOCK; + lex->skip_leading_whitespace = 0; + buf_consume(lex, 2); + lex->lastoff = lex->source->off; break; } - buf_consume(s, 1); + buf_consume(lex, 1); } /* we're at eof */ - if (s->lex.eof) - uc_new_exception(s, s->lex.lastoff, "Syntax error: Unterminated template block"); + if (lex->eof) { + lex->state = UT_LEX_EOF; + + buf_consume(lex, lex->bufend - lex->bufstart); + + return emit_op(lex, lex->lastoff, TK_ERROR, xjs_new_string("Unterminated template block")); + } break; case UT_LEX_BLOCK_EXPRESSION_EMIT_TAG: - s->lex.within_expression_block = 1; - s->lex.state = UT_LEX_IDENTIFY_TOKEN; + lex->within_expression_block = 1; + lex->state = UT_LEX_IDENTIFY_TOKEN; - return emit_op(s, s->source->off, T_LEXP, NULL); + return emit_op(lex, lex->source->off, TK_LEXP, NULL); case UT_LEX_IDENTIFY_TOKEN: /* skip leading whitespace */ - for (i = 0; i < buf_remaining(s) && isspace(s->lex.bufstart[i]); i++) + for (i = 0; i < buf_remaining(lex) && isspace(lex->bufstart[i]); i++) ; - buf_consume(s, i); + buf_consume(lex, i); - if (i > 0 && buf_remaining(s) < UT_LEX_MAX_TOKEN_LEN) - return 0; + if (i > 0 && buf_remaining(lex) < UT_LEX_MAX_TOKEN_LEN) + return NULL; for (i = 0; i < sizeof(search.str); i++) - search.str[i] = (i < buf_remaining(s)) ? s->lex.bufstart[i] : 0; + search.str[i] = (i < buf_remaining(lex)) ? lex->bufstart[i] : 0; for (i = 0, tok = tokens; i < ARRAY_SIZE(tokens); tok = &tokens[++i]) { /* remaining buffer data is shorter than token, skip */ - if (tok->plen > buf_remaining(s)) + if (tok->plen > buf_remaining(lex)) continue; - c = s->lex.bufstart[0]; + c = buf_remaining(lex) ? lex->bufstart[0] : 0; if (tok->plen ? ((search.n & masks[tok->plen]) == tok->patn) : (c >= tok->pat[0] && c <= tok->pat[1])) { - buf_consume(s, tok->plen); - - s->lex.lastoff = s->source->off - tok->plen; + lex->lastoff = lex->source->off; /* token has a parse method, switch state */ if (tok->parse) { - s->lex.tok = tok; - s->lex.state = UT_LEX_PARSE_TOKEN; + lex->tok = tok; + lex->state = UT_LEX_PARSE_TOKEN; - return 0; + buf_consume(lex, tok->plen); + + return NULL; } /* disallow nesting blocks */ - if ((s->lex.within_expression_block && - (tok->type == T_LSTM || tok->type == T_RSTM || tok->type == T_LEXP)) || - (s->lex.within_statement_block && - (tok->type == T_LEXP || tok->type == T_REXP || tok->type == T_LSTM))) { - uc_new_exception(s, s->source->off - tok->plen, "Syntax error: Template blocks may not be nested"); + if ((lex->within_expression_block && + (tok->type == TK_LSTM || tok->type == TK_RSTM || tok->type == TK_LEXP)) || + (lex->within_statement_block && + (tok->type == TK_LEXP || tok->type == TK_REXP || tok->type == TK_LSTM))) { + buf_consume(lex, tok->plen); - return 0; + return emit_op(lex, lex->source->off - tok->plen, TK_ERROR, xjs_new_string("Template blocks may not be nested")); } /* found end of block */ - else if ((s->lex.within_statement_block && tok->type == T_RSTM) || - (s->lex.within_expression_block && tok->type == T_REXP)) { + else if ((lex->within_statement_block && tok->type == TK_RSTM) || + (lex->within_expression_block && tok->type == TK_REXP)) { /* emit additional empty statement (semicolon) at end of template block */ - if (!s->lex.semicolon_emitted) { - s->lex.semicolon_emitted = true; - - /* rewind */ - buf_consume(s, -tok->plen); + if (!lex->semicolon_emitted) { + lex->semicolon_emitted = true; - return emit_op(s, s->source->off, T_SCOL, NULL); + return emit_op(lex, lex->source->off, TK_SCOL, NULL); } /* strip whitespace after block */ if (tok->pat[0] == '-') - s->lex.skip_leading_whitespace = true; + lex->skip_leading_whitespace = true; /* strip newline after statement block */ - else if (s->lex.within_statement_block && s->trim_blocks) - s->lex.skip_leading_newline = true; - - s->lex.semicolon_emitted = false; - s->lex.within_statement_block = false; - s->lex.within_expression_block = false; - s->lex.state = UT_LEX_IDENTIFY_BLOCK; - s->lex.lastoff = s->source->off; + else if (lex->within_statement_block && + lex->config && lex->config->trim_blocks) + lex->skip_leading_newline = true; + + lex->semicolon_emitted = false; + lex->within_statement_block = false; + lex->within_expression_block = false; + lex->state = UT_LEX_IDENTIFY_BLOCK; } /* do not report statement tags to the parser */ - if (tok->type != 0 && tok->type != T_LSTM && tok->type != T_RSTM) - rv = emit_op(s, s->source->off - tok->plen, tok->type, NULL); + if (tok->type != 0 && tok->type != TK_LSTM && tok->type != TK_RSTM) + rv = emit_op(lex, lex->source->off, tok->type, NULL); else - rv = 0; + rv = NULL; + + buf_consume(lex, tok->plen); return rv; } } /* no token matched and we do have remaining data, junk */ - if (buf_remaining(s)) { - uc_new_exception(s, s->source->off, "Syntax error: Unexpected character"); - - return 0; - } + if (buf_remaining(lex)) + return emit_op(lex, lex->source->off, TK_ERROR, xjs_new_string("Unexpected character")); /* we're at eof, allow unclosed statement blocks */ - if (s->lex.within_statement_block) { - s->lex.state = UT_LEX_EOF; + if (lex->within_statement_block) { + lex->state = UT_LEX_EOF; - return 0; + return NULL; } /* premature EOF */ - uc_new_exception(s, s->source->off, "Syntax error: Unterminated template block"); - - break; + return emit_op(lex, lex->source->off, TK_ERROR, xjs_new_string("Unterminated template block")); case UT_LEX_PARSE_TOKEN: - tok = s->lex.tok; - rv = tok->parse(s); + tok = lex->tok; + rv = tok->parse(lex); if (rv) { - memset(s->lex.esc, 0, sizeof(s->lex.esc)); - s->lex.state = UT_LEX_IDENTIFY_TOKEN; - s->lex.tok = NULL; + memset(lex->esc, 0, sizeof(lex->esc)); + lex->state = UT_LEX_IDENTIFY_TOKEN; + lex->tok = NULL; - if (rv == UINT32_MAX) - rv = 0; + if (rv == UC_LEX_CONTINUE_PARSING) + rv = NULL; return rv; } @@ -1129,25 +1212,66 @@ lex_step(struct uc_state *s, FILE *fp) break; } - return 0; + return NULL; } -uint32_t -uc_get_token(struct uc_state *s, FILE *fp) +void +uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source) { - uint32_t rv; + lex->state = UT_LEX_IDENTIFY_BLOCK; - while (s->lex.state != UT_LEX_EOF) { - rv = lex_step(s, fp); + lex->config = config; + lex->source = uc_source_get(source); - if (rv == 0 && s->exception) - break; + lex->eof = 0; + lex->skip_leading_whitespace = 0; + lex->skip_leading_newline = 0; + lex->within_statement_block = 0; + lex->within_statement_block = 0; + lex->semicolon_emitted = 0; + lex->expect_div = 0; + lex->is_escape = 0; + + lex->buflen = 0; + lex->buf = NULL; + lex->bufstart = NULL; + lex->bufend = NULL; + + lex->lookbehindlen = 0; + lex->lookbehind = NULL; + + lex->tok = NULL; + + lex->esclen = 0; + memset(lex->esc, 0, sizeof(lex->esc)); + + lex->lead_surrogate = 0; + + lex->lastoff = 0; +} + +void +uc_lexer_free(uc_lexer *lex) +{ + uc_source_put(lex->source); + + free(lex->lookbehind); + free(lex->buf); +} + +uc_token * +uc_lexer_next_token(uc_lexer *lex) +{ + uc_token *rv; + + while (lex->state != UT_LEX_EOF) { + rv = lex_step(lex, lex->source->fp); - if (rv > 0) + if (rv != NULL) return rv; } - return 0; + return emit_op(lex, lex->source->off, TK_EOF, NULL); } const char * @@ -1158,11 +1282,11 @@ uc_get_tokenname(int type) switch (type) { case 0: return "End of file"; - case T_STRING: return "String"; - case T_LABEL: return "Label"; - case T_NUMBER: return "Number"; - case T_DOUBLE: return "Double"; - case T_REGEXP: return "Regexp"; + case TK_STRING: return "String"; + case TK_LABEL: return "Label"; + case TK_NUMBER: return "Number"; + case TK_DOUBLE: return "Double"; + case TK_REGEXP: return "Regexp"; } for (i = 0; i < ARRAY_SIZE(tokens); i++) { @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,18 +17,156 @@ #ifndef __LEXER_H_ #define __LEXER_H_ -#include "ast.h" +#include "source.h" #define __T_MAX 82 #define T_EXCEPTION (__T_MAX + 0) #define T_CFUNC (__T_MAX + 1) #define T_RESSOURCE (__T_MAX + 2) -bool -utf8enc(char **out, int *rem, int code); -uint32_t -uc_get_token(struct uc_state *s, FILE *fp); +typedef enum { + TK_LEXP = 1, + TK_REXP, + TK_LSTM, + TK_RSTM, + TK_IF, + TK_ELSE, + TK_COMMA, + TK_ASBAND, + TK_ASBXOR, + TK_ASBOR, + TK_ASLEFT, + TK_ASRIGHT, + TK_ASMUL, + TK_ASDIV, + TK_ASMOD, + TK_ASADD, + TK_ASSUB, + TK_ASSIGN, + TK_QMARK, + TK_COLON, + TK_OR, + TK_AND, + TK_BOR, + TK_BXOR, + TK_BAND, + TK_EQ, + TK_NE, + TK_EQS, + TK_NES, + TK_LT, + TK_LE, + TK_GT, + TK_GE, + TK_IN, + TK_LSHIFT, + TK_RSHIFT, + TK_ADD, + TK_SUB, + TK_MUL, + TK_DIV, + TK_MOD, + TK_NOT, + TK_COMPL, + TK_INC, + TK_DEC, + TK_LPAREN, + TK_LBRACK, + TK_TEXT, + TK_LBRACE, + TK_RBRACE, + TK_SCOL, + TK_RPAREN, + TK_ENDIF, + TK_ELIF, + TK_WHILE, + TK_ENDWHILE, + TK_FOR, + TK_ENDFOR, + TK_FUNC, + TK_LABEL, + TK_ENDFUNC, + TK_TRY, + TK_CATCH, + TK_SWITCH, + TK_CASE, + TK_DEFAULT, + TK_ELLIP, + TK_RETURN, + TK_BREAK, + TK_CONTINUE, + TK_LOCAL, + TK_ARROW, + TK_DOT, + TK_RBRACK, + TK_BOOL, + TK_NUMBER, + TK_DOUBLE, + TK_STRING, + TK_REGEXP, + TK_NULL, + TK_THIS, + + TK_EOF, + TK_ERROR +} uc_tokentype_t; + +typedef enum { + UT_LEX_IDENTIFY_BLOCK, + UT_LEX_BLOCK_COMMENT_START, + UT_LEX_BLOCK_EXPRESSION_START, + UT_LEX_BLOCK_EXPRESSION_EMIT_TAG, + UT_LEX_BLOCK_STATEMENT_START, + UT_LEX_BLOCK_COMMENT, + UT_LEX_IDENTIFY_TOKEN, + UT_LEX_PARSE_TOKEN, + UT_LEX_EOF +} uc_lex_state_t; + +typedef struct { + uc_tokentype_t type; + json_object *val; + size_t pos; +} uc_token; + +typedef struct { + bool lstrip_blocks; + bool trim_blocks; + bool strict_declarations; +} uc_parse_config; + +typedef struct { + uc_lex_state_t state; + uc_parse_config *config; + uc_source *source; + uint8_t eof:1; + uint8_t skip_leading_whitespace:1; + uint8_t skip_leading_newline:1; + uint8_t within_expression_block:1; + uint8_t within_statement_block:1; + uint8_t semicolon_emitted:1; + uint8_t expect_div:1; + uint8_t is_escape:1; + size_t buflen; + char *buf, *bufstart, *bufend; + size_t lookbehindlen; + char *lookbehind; + const void *tok; + uc_token curr; + char esc[5]; + uint8_t esclen; + int lead_surrogate; + size_t lastoff; +} uc_lexer; + + +void uc_lexer_init(uc_lexer *lex, uc_parse_config *config, uc_source *source); +void uc_lexer_free(uc_lexer *lex); + +uc_token *uc_lexer_next_token(uc_lexer *lex); + +bool utf8enc(char **out, int *rem, int code); const char * uc_get_tokenname(int type); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,13 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "ast.h" -#include "parser.h" -#include "lexer.h" -#include "eval.h" -#include "lib.h" -#include "module.h" - #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -38,6 +31,40 @@ #include <sys/types.h> #include <sys/wait.h> +#include "lexer.h" +#include "compiler.h" +#include "vm.h" +#include "lib.h" +#include "object.h" + + +const uc_ops uc = { + .value = { + .proto = uc_prototype_new, + .cfunc = uc_cfunction_new, + .dbl = uc_double_new, + .regexp = uc_regexp_new, + .tonumber = uc_cast_number, + .ressource = uc_ressource_new + }, + + .ressource = { + .define = uc_ressource_type_add, + .create = uc_ressource_new, + .data = uc_ressource_dataptr, + .proto = uc_ressource_prototype + }, + + .vm = { + .call = uc_vm_call, + .peek = uc_vm_stack_peek, + .pop = uc_vm_stack_pop, + .push = uc_vm_stack_push, + .raise = uc_vm_raise_exception + } +}; + +const uc_ops *ops = &uc; __attribute__((format(printf, 3, 5))) static void snprintf_append(char **dptr, size_t *dlen, const char *fmt, ssize_t sz, ...) @@ -67,12 +94,15 @@ snprintf_append(char **dptr, size_t *dlen, const char *fmt, ssize_t sz, ...) snprintf_append(dptr, dlen, fmt, -1, ##__VA_ARGS__) static void -format_context_line(char **msg, size_t *msglen, const char *line, size_t off) +format_context_line(char **msg, size_t *msglen, const char *line, size_t off, bool compact) { const char *p; int padlen, i; for (p = line, padlen = 0; *p != '\n' && *p != '\0'; p++) { + if (compact && (p - line) == off) + sprintf_append(msg, msglen, "\033[22m"); + switch (*p) { case '\t': sprintf_append(msg, msglen, " "); @@ -94,6 +124,12 @@ format_context_line(char **msg, size_t *msglen, const char *line, size_t off) } } + if (compact) { + sprintf_append(msg, msglen, "\033[m\n"); + + return; + } + sprintf_append(msg, msglen, "`\n "); if (padlen < strlen("Near here ^")) { @@ -112,17 +148,73 @@ format_context_line(char **msg, size_t *msglen, const char *line, size_t off) } } -static void -format_error_context(char **msg, size_t *msglen, struct uc_source *src, struct json_object *stacktrace, size_t off) +static char * +source_filename(uc_source *src, uint32_t line) { - struct json_object *e, *fn, *file, *line, *byte; - size_t len, rlen, idx; - const char *path; + const char *name = src->filename ? basename(src->filename) : "[?]"; + static char buf[sizeof("xxxxxxxxx.uc:0000000000")]; + size_t len = strlen(name); + + if (len > 12) + snprintf(buf, sizeof(buf), "...%s:%u", name + (len - 9), line); + else + snprintf(buf, sizeof(buf), "%12s:%u", name, line); + + return buf; +} + +void +format_source_context(char **msg, size_t *msglen, uc_source *src, size_t off, bool compact) +{ + size_t len, rlen; bool truncated; char buf[256]; + long srcpos; int eline; - for (idx = 0; idx < json_object_array_length(stacktrace); idx++) { + srcpos = ftell(src->fp); + + if (srcpos == -1) + return; + + fseek(src->fp, 0, SEEK_SET); + + truncated = false; + eline = 1; + rlen = 0; + + while (fgets(buf, sizeof(buf), src->fp)) { + len = strlen(buf); + rlen += len; + + if (rlen > off) { + if (compact) + sprintf_append(msg, msglen, "\033[2;40;97m%17s %s", + source_filename(src, eline), + truncated ? "..." : ""); + else + sprintf_append(msg, msglen, "\n `%s", + truncated ? "..." : ""); + + format_context_line(msg, msglen, buf, len - (rlen - off) + (truncated ? 3 : 0), compact); + break; + } + + truncated = (len > 0 && buf[len-1] != '\n'); + eline += !truncated; + } + + fseek(src->fp, srcpos, SEEK_SET); +} + +void +format_error_context(char **msg, size_t *msglen, uc_source *src, json_object *stacktrace, size_t off) +{ + json_object *e, *fn, *file, *line, *byte; + const char *path; + size_t idx; + + for (idx = 0; idx < (stacktrace ? json_object_array_length(stacktrace) : 0); idx++) { e = json_object_array_get_idx(stacktrace, idx); fn = json_object_object_get(e, "function"); file = json_object_object_get(e, "filename"); @@ -153,94 +245,22 @@ format_error_context(char **msg, size_t *msglen, struct uc_source *src, struct j sprintf_append(msg, msglen, " called from %s%s (%s", fn ? "function " : "anonymous function", fn ? json_object_get_string(fn) : "", - json_object_get_string(file)); + file ? json_object_get_string(file) : ""); if (line && byte) sprintf_append(msg, msglen, ":%" PRId64 ":%" PRId64 ")\n", json_object_get_int64(line), json_object_get_int64(byte)); else - sprintf_append(msg, msglen, " [C])\n"); - } - } - - fseek(src->fp, 0, SEEK_SET); - - truncated = false; - eline = 1; - rlen = 0; - - while (fgets(buf, sizeof(buf), src->fp)) { - len = strlen(buf); - rlen += len; - - if (rlen > off) { - sprintf_append(msg, msglen, "\n `%s", truncated ? "..." : ""); - format_context_line(msg, msglen, buf, len - (rlen - off) + (truncated ? 3 : 0)); - break; - } - - truncated = (len > 0 && buf[len-1] != '\n'); - eline += !truncated; - } -} - -struct json_object * -uc_parse_error(struct uc_state *state, uint32_t off, uint64_t *tokens, int max_token) -{ - struct json_object *rv; - size_t msglen = 0; - bool first = true; - char *msg = NULL; - int i; - - for (i = 0; i <= max_token; i++) { - if (tokens[i / 64] & ((uint64_t)1 << (i % 64))) { - if (first) { - sprintf_append(&msg, &msglen, "Expecting %s", uc_get_tokenname(i)); - first = false; - } - else if (i < max_token) { - sprintf_append(&msg, &msglen, ", %s", uc_get_tokenname(i)); - } - else { - sprintf_append(&msg, &msglen, " or %s", uc_get_tokenname(i)); - } + sprintf_append(msg, msglen, "[C])\n"); } } - rv = uc_new_exception(state, - off ? OP_POS(off) : state->lex.lastoff, - "Syntax error: Unexpected token\n%s", msg); - free(msg); - - return rv; -} - -char * -uc_format_error(struct uc_state *state, FILE *fp) -{ - struct uc_source *src; - struct uc_op *tag; - size_t msglen = 0; - char *msg = NULL; - - tag = json_object_get_userdata(state->exception); - src = tag->tag.data; - - sprintf_append(&msg, &msglen, "%s\n", - json_object_get_string(json_object_object_get(state->exception, "message"))); - - if (tag->off) - format_error_context(&msg, &msglen, src, - json_object_object_get(state->exception, "stacktrace"), - tag->off); - - return msg; + format_source_context(msg, msglen, src, off, false); } static double -uc_cast_double(struct json_object *v) +uc_cast_double(json_object *v) { enum json_type t; int64_t n; @@ -262,7 +282,7 @@ uc_cast_double(struct json_object *v) } static int64_t -uc_cast_int64(struct json_object *v) +uc_cast_int64(json_object *v) { enum json_type t; int64_t n; @@ -285,60 +305,17 @@ uc_cast_int64(struct json_object *v) return n; } -static int -uc_c_fn_to_string(struct json_object *v, struct printbuf *pb, int level, int flags) -{ - struct uc_op *op = json_object_get_userdata(v); - struct uc_function *fn = (void *)op + ALIGN(sizeof(*op)); - - return sprintbuf(pb, "%sfunction %s(...) { [native code] }%s", - level ? "\"" : "", fn->name, level ? "\"" : ""); -} - -static void -uc_c_fn_free(struct json_object *v, void *ud) -{ - struct uc_op *op = ud; - - json_object_put(op->tag.proto); - free(ud); -} - -static bool -uc_register_function(struct uc_state *state, struct json_object *scope, const char *name, uc_c_fn *cfn) +static json_object * +uc_print_common(uc_vm *vm, size_t nargs, FILE *fh) { - struct json_object *val = xjs_new_object(); - struct uc_function *fn; - struct uc_op *op; - - op = xalloc(ALIGN(sizeof(*op)) + ALIGN(sizeof(*fn)) + ALIGN(strlen(name) + 1)); - op->val = val; - op->type = T_CFUNC; - - fn = (void *)op + ALIGN(sizeof(*op)); - fn->source = state->function ? state->function->source : NULL; - fn->name = strcpy((char *)fn + ALIGN(sizeof(*fn)), name); - fn->cfn = cfn; - - op->tag.data = fn; - - json_object_set_serializer(val, uc_c_fn_to_string, op, uc_c_fn_free); - - return json_object_object_add(scope, name, op->val); -} - -static struct json_object * -uc_print_common(struct uc_state *s, uint32_t off, struct json_object *args, FILE *fh) -{ - struct json_object *item; - size_t arridx, arrlen; + json_object *item; size_t reslen = 0; size_t len = 0; + size_t arridx; const char *p; - for (arridx = 0, arrlen = json_object_array_length(args); - arridx < arrlen; arridx++) { - item = json_object_array_get_idx(args, arridx); + for (arridx = 0; arridx < nargs; arridx++) { + item = uc_get_arg(arridx); if (json_object_is_type(item, json_type_string)) { p = json_object_get_string(item); @@ -357,16 +334,16 @@ uc_print_common(struct uc_state *s, uint32_t off, struct json_object *args, FILE } -static struct json_object * -uc_print(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_print(uc_vm *vm, size_t nargs) { - return uc_print_common(s, off, args, stdout); + return uc_print_common(vm, nargs, stdout); } -static struct json_object * -uc_length(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_length(uc_vm *vm, size_t nargs) { - struct json_object *arg = json_object_array_get_idx(args, 0); + json_object *arg = uc_get_arg(0); switch (json_object_get_type(arg)) { case json_type_object: @@ -383,18 +360,18 @@ uc_length(struct uc_state *s, uint32_t off, struct json_object *args) } } -static struct json_object * -uc_index(struct uc_state *s, uint32_t off, struct json_object *args, bool right) +static json_object * +uc_index(uc_vm *vm, size_t nargs, bool right) { - struct json_object *stack = json_object_array_get_idx(args, 0); - struct json_object *needle = json_object_array_get_idx(args, 1); + json_object *stack = uc_get_arg(0); + json_object *needle = uc_get_arg(1); size_t arridx, len, ret = -1; const char *sstr, *nstr, *p; switch (json_object_get_type(stack)) { case json_type_array: for (arridx = 0, len = json_object_array_length(stack); arridx < len; arridx++) { - if (uc_cmp(T_EQ, json_object_array_get_idx(stack, arridx), needle)) { + if (uc_cmp(TK_EQ, json_object_array_get_idx(stack, arridx), needle)) { ret = arridx; if (!right) @@ -425,42 +402,41 @@ uc_index(struct uc_state *s, uint32_t off, struct json_object *args, bool right) } } -static struct json_object * -uc_lindex(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_lindex(uc_vm *vm, size_t nargs) { - return uc_index(s, off, args, false); + return uc_index(vm, nargs, false); } -static struct json_object * -uc_rindex(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_rindex(uc_vm *vm, size_t nargs) { - return uc_index(s, off, args, true); + return uc_index(vm, nargs, true); } -static struct json_object * -uc_push(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_push(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); - struct json_object *item = NULL; - size_t arridx, arrlen; + json_object *arr = uc_get_arg(0); + json_object *item = NULL; + size_t arridx; if (!json_object_is_type(arr, json_type_array)) return NULL; - for (arridx = 1, arrlen = json_object_array_length(args); - arridx < arrlen; arridx++) { - item = json_object_array_get_idx(args, arridx); - json_object_array_add(arr, json_object_get(item)); + for (arridx = 1; arridx < nargs; arridx++) { + item = uc_get_arg(arridx); + json_object_array_add(arr, uc_value_get(item)); } - return json_object_get(item); + return uc_value_get(item); } -static struct json_object * -uc_pop(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_pop(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); - struct json_object *item = NULL; + json_object *arr = uc_get_arg(0); + json_object *item = NULL; size_t arrlen; if (!json_object_is_type(arr, json_type_array)) @@ -476,20 +452,20 @@ uc_pop(struct uc_state *s, uint32_t off, struct json_object *args) #endif } - return json_object_get(item); + return uc_value_get(item); } -static struct json_object * -uc_shift(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_shift(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); - struct json_object *item = NULL; + json_object *arr = uc_get_arg(0); + json_object *item = NULL; size_t arridx, arrlen; if (!json_object_is_type(arr, json_type_array)) return NULL; - item = json_object_get(json_object_array_get_idx(arr, 0)); + item = uc_value_get(json_object_array_get_idx(arr, 0)); arrlen = json_object_array_length(arr); for (arridx = 0; arridx < arrlen - 1; arridx++) @@ -504,46 +480,45 @@ uc_shift(struct uc_state *s, uint32_t off, struct json_object *args) return item; } -static struct json_object * -uc_unshift(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_unshift(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); - struct json_object *item = NULL; + json_object *arr = uc_get_arg(0); + json_object *item = NULL; size_t arridx, arrlen, addlen; if (!json_object_is_type(arr, json_type_array)) return NULL; arrlen = json_object_array_length(arr); - addlen = json_object_array_length(args) - 1; + addlen = nargs - 1; for (arridx = arrlen; arridx > 0; arridx--) json_object_array_put_idx(arr, arridx + addlen - 1, - json_object_get(json_object_array_get_idx(arr, arridx - 1))); + uc_value_get(json_object_array_get_idx(arr, arridx - 1))); for (arridx = 0; arridx < addlen; arridx++) { - item = json_object_array_get_idx(args, arridx + 1); - json_object_array_put_idx(arr, arridx, json_object_get(item)); + item = uc_get_arg(arridx + 1); + json_object_array_put_idx(arr, arridx, uc_value_get(item)); } return item; } -static struct json_object * -uc_chr(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_chr(uc_vm *vm, size_t nargs) { - size_t len = json_object_array_length(args); size_t idx; int64_t n; char *str; - if (!len) + if (!nargs) return xjs_new_string_len("", 0); - str = xalloc(len); + str = xalloc(nargs); - for (idx = 0; idx < len; idx++) { - n = uc_cast_int64(json_object_array_get_idx(args, idx)); + for (idx = 0; idx < nargs; idx++) { + n = uc_cast_int64(uc_get_arg(idx)); if (n < 0) n = 0; @@ -553,25 +528,25 @@ uc_chr(struct uc_state *s, uint32_t off, struct json_object *args) str[idx] = (char)n; } - return xjs_new_string_len(str, len); + return xjs_new_string_len(str, nargs); } -static struct json_object * -uc_delete(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_delete(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *rv = NULL; - size_t arridx, arrlen; + json_object *obj = uc_get_arg(0); + json_object *rv = NULL; const char *key; + size_t arridx; if (!json_object_is_type(obj, json_type_object)) return NULL; - for (arrlen = json_object_array_length(args), arridx = 1; arridx < arrlen; arridx++) { - json_object_put(rv); + for (arridx = 1; arridx < nargs; arridx++) { + uc_value_put(rv); - key = json_object_get_string(json_object_array_get_idx(args, arridx)); - rv = json_object_get(json_object_object_get(obj, key ? key : "null")); + key = json_object_get_string(uc_get_arg(arridx)); + rv = uc_value_get(json_object_object_get(obj, key ? key : "null")); json_object_object_del(obj, key ? key : "null"); } @@ -579,28 +554,21 @@ uc_delete(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_die(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_die(uc_vm *vm, size_t nargs) { - const char *msg = json_object_get_string(json_object_array_get_idx(args, 0)); - struct uc_function *prev_fn; - struct json_object *ex; - - prev_fn = s->function; - s->function = s->callstack->function; - - ex = uc_new_exception(s, s->callstack->off, "%s", msg ? msg : "Died"); + const char *msg = json_object_get_string(uc_get_arg(0)); - s->function = prev_fn; + uc_vm_raise_exception(vm, EXCEPTION_USER, msg ? msg : "Died"); - return ex; + return NULL; } -static struct json_object * -uc_exists(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_exists(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - const char *key = json_object_get_string(json_object_array_get_idx(args, 1)); + json_object *obj = uc_get_arg(0); + const char *key = json_object_get_string(uc_get_arg(1)); if (!json_object_is_type(obj, json_type_object)) return false; @@ -608,98 +576,95 @@ uc_exists(struct uc_state *s, uint32_t off, struct json_object *args) return xjs_new_boolean(json_object_object_get_ex(obj, key ? key : "null", NULL)); } -__attribute__((noreturn)) static struct json_object * -uc_exit(struct uc_state *s, uint32_t off, struct json_object *args) +__attribute__((noreturn)) static json_object * +uc_exit(uc_vm *vm, size_t nargs) { - int64_t n = uc_cast_int64(json_object_array_get_idx(args, 0)); + int64_t n = uc_cast_int64(uc_get_arg(0)); exit(n); } -static struct json_object * -uc_getenv(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_getenv(uc_vm *vm, size_t nargs) { - const char *key = json_object_get_string(json_object_array_get_idx(args, 0)); + const char *key = json_object_get_string(uc_get_arg(0)); char *val = key ? getenv(key) : NULL; return val ? xjs_new_string(val) : NULL; } -static struct json_object * -uc_filter(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_filter(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *func = json_object_array_get_idx(args, 1); - struct json_object *rv, *arr, *cmpargs; + json_object *obj = uc_get_arg(0); + json_object *func = uc_get_arg(1); + json_object *rv, *arr; size_t arridx, arrlen; if (!json_object_is_type(obj, json_type_array)) return NULL; arr = xjs_new_array(); - cmpargs = xjs_new_array(); - - json_object_array_put_idx(cmpargs, 2, json_object_get(obj)); for (arrlen = json_object_array_length(obj), arridx = 0; arridx < arrlen; arridx++) { - json_object_array_put_idx(cmpargs, 0, json_object_get(json_object_array_get_idx(obj, arridx))); - json_object_array_put_idx(cmpargs, 1, xjs_new_int64(arridx)); + /* XXX: revisit leaks */ + uc_vm_stack_push(vm, uc_value_get(func)); + uc_vm_stack_push(vm, uc_value_get(json_object_array_get_idx(obj, arridx))); + uc_vm_stack_push(vm, xjs_new_int64(arridx)); + uc_vm_stack_push(vm, uc_value_get(obj)); - rv = uc_invoke(s, off, NULL, func, cmpargs); + if (uc_vm_call(vm, false, 3)) { + uc_value_put(arr); - if (uc_is_type(rv, T_EXCEPTION)) { - json_object_put(cmpargs); - json_object_put(arr); - - return rv; + return NULL; } + rv = uc_vm_stack_pop(vm); + if (uc_val_is_truish(rv)) - json_object_array_add(arr, json_object_get(json_object_array_get_idx(obj, arridx))); + json_object_array_add(arr, uc_value_get(json_object_array_get_idx(obj, arridx))); - json_object_put(rv); + uc_value_put(rv); } - json_object_put(cmpargs); - return arr; } -static struct json_object * -uc_hex(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_hex(uc_vm *vm, size_t nargs) { - const char *val = json_object_get_string(json_object_array_get_idx(args, 0)); + const char *val = json_object_get_string(uc_get_arg(0)); int64_t n; char *e; if (!val || !isxdigit(*val)) - return uc_new_double(NAN); + return uc_double_new(NAN); n = strtoll(val, &e, 16); if (e == val || *e) - return uc_new_double(NAN); + return uc_double_new(NAN); return xjs_new_int64(n); } -static struct json_object * -uc_int(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_int(uc_vm *vm, size_t nargs) { - int64_t n = uc_cast_int64(json_object_array_get_idx(args, 0)); + int64_t n = uc_cast_int64(uc_get_arg(0)); if (errno == EINVAL || errno == EOVERFLOW) - return uc_new_double(NAN); + return uc_double_new(NAN); return xjs_new_int64(n); } -static struct json_object * -uc_join(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_join(uc_vm *vm, size_t nargs) { - const char *sep = json_object_get_string(json_object_array_get_idx(args, 0)); - struct json_object *arr = json_object_array_get_idx(args, 1); - struct json_object *rv = NULL; + const char *sep = json_object_get_string(uc_get_arg(0)); + json_object *arr = uc_get_arg(1); + json_object *rv = NULL; size_t arrlen, arridx, len = 1; const char *item; char *res, *p; @@ -750,11 +715,11 @@ out: return rv; } -static struct json_object * -uc_keys(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_keys(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *arr = NULL; + json_object *obj = uc_get_arg(0); + json_object *arr = NULL; if (!json_object_is_type(obj, json_type_object)) return NULL; @@ -767,12 +732,12 @@ uc_keys(struct uc_state *s, uint32_t off, struct json_object *args) return arr; } -static struct json_object * -uc_lc(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_lc(uc_vm *vm, size_t nargs) { - const char *str = json_object_get_string(json_object_array_get_idx(args, 0)); + const char *str = json_object_get_string(uc_get_arg(0)); size_t len = str ? strlen(str) : 0; - struct json_object *rv = NULL; + json_object *rv = NULL; char *res, *p; if (!str) @@ -792,50 +757,47 @@ uc_lc(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_map(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_map(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *func = json_object_array_get_idx(args, 1); - struct json_object *arr, *cmpargs, *rv; + json_object *obj = uc_get_arg(0); + json_object *func = uc_get_arg(1); + json_object *arr, *rv; size_t arridx, arrlen; if (!json_object_is_type(obj, json_type_array)) return NULL; arr = xjs_new_array(); - cmpargs = xjs_new_array(); - - json_object_array_put_idx(cmpargs, 2, json_object_get(obj)); for (arrlen = json_object_array_length(obj), arridx = 0; arridx < arrlen; arridx++) { - json_object_array_put_idx(cmpargs, 0, json_object_get(json_object_array_get_idx(obj, arridx))); - json_object_array_put_idx(cmpargs, 1, xjs_new_int64(arridx)); + /* XXX: revisit leaks */ + uc_vm_stack_push(vm, uc_value_get(func)); + uc_vm_stack_push(vm, uc_value_get(json_object_array_get_idx(obj, arridx))); + uc_vm_stack_push(vm, xjs_new_int64(arridx)); + uc_vm_stack_push(vm, uc_value_get(obj)); - rv = uc_invoke(s, off, NULL, func, cmpargs); + if (uc_vm_call(vm, false, 3)) { + uc_value_put(arr); - if (uc_is_type(rv, T_EXCEPTION)) { - json_object_put(cmpargs); - json_object_put(arr); - - return rv; + return NULL; } + rv = uc_vm_stack_pop(vm); + json_object_array_add(arr, rv); } - json_object_put(cmpargs); - return arr; } -static struct json_object * -uc_ord(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ord(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *rv, *pos; - size_t i, len, nargs; + json_object *obj = uc_get_arg(0); + json_object *rv, *pos; const char *str; + size_t i, len; int64_t n; if (!json_object_is_type(obj, json_type_string)) @@ -844,15 +806,13 @@ uc_ord(struct uc_state *s, uint32_t off, struct json_object *args) str = json_object_get_string(obj); len = json_object_get_string_len(obj); - nargs = json_object_array_length(args); - if (nargs == 1) return str[0] ? xjs_new_int64((int64_t)str[0]) : NULL; rv = xjs_new_array(); for (i = 1; i < nargs; i++) { - pos = json_object_array_get_idx(args, i); + pos = uc_get_arg(i); if (json_object_is_type(pos, json_type_int)) { n = json_object_get_int64(pos); @@ -872,17 +832,19 @@ uc_ord(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_type(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_type(uc_vm *vm, size_t nargs) { - struct json_object *v = json_object_array_get_idx(args, 0); - struct uc_op *tag = json_object_get_userdata(v); + json_object *v = uc_get_arg(0); + uc_objtype_t o = uc_object_type(v); - switch (tag ? tag->type : 0) { - case T_FUNC: + switch (o) { + case UC_OBJ_CFUNCTION: + case UC_OBJ_FUNCTION: + case UC_OBJ_CLOSURE: return xjs_new_string("function"); - case T_RESSOURCE: + case UC_OBJ_RESSOURCE: return xjs_new_string("ressource"); default: @@ -911,11 +873,11 @@ uc_type(struct uc_state *s, uint32_t off, struct json_object *args) } } -static struct json_object * -uc_reverse(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_reverse(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *rv = NULL; + json_object *obj = uc_get_arg(0); + json_object *rv = NULL; size_t len, arridx; const char *str; char *dup, *p; @@ -924,7 +886,7 @@ uc_reverse(struct uc_state *s, uint32_t off, struct json_object *args) rv = xjs_new_array(); for (arridx = json_object_array_length(obj); arridx > 0; arridx--) - json_object_array_add(rv, json_object_get(json_object_array_get_idx(obj, arridx - 1))); + json_object_array_add(rv, uc_value_get(json_object_array_get_idx(obj, arridx - 1))); } else if (json_object_is_type(obj, json_type_string)) { len = json_object_get_string_len(obj); @@ -944,80 +906,74 @@ uc_reverse(struct uc_state *s, uint32_t off, struct json_object *args) static struct { - struct uc_state *s; - uint32_t off; - struct json_object *fn; - struct json_object *args; - struct json_object *ex; + uc_vm *vm; + bool ex; + json_object *fn; } sort_ctx; static int sort_fn(const void *k1, const void *k2) { - struct json_object * const *v1 = k1; - struct json_object * const *v2 = k2; - struct json_object *rv; + json_object * const *v1 = k1; + json_object * const *v2 = k2; + json_object *rv; int ret; if (!sort_ctx.fn) - return !uc_cmp(T_LT, *v1, *v2); + return !uc_cmp(TK_LT, *v1, *v2); if (sort_ctx.ex) return 0; - json_object_array_put_idx(sort_ctx.args, 0, json_object_get(*v1)); - json_object_array_put_idx(sort_ctx.args, 1, json_object_get(*v2)); - - rv = uc_invoke(sort_ctx.s, sort_ctx.off, NULL, sort_ctx.fn, sort_ctx.args); + uc_vm_stack_push(sort_ctx.vm, uc_value_get(sort_ctx.fn)); + uc_vm_stack_push(sort_ctx.vm, uc_value_get(*v1)); + uc_vm_stack_push(sort_ctx.vm, uc_value_get(*v2)); - if (uc_is_type(rv, T_EXCEPTION)) { - sort_ctx.ex = rv; + if (uc_vm_call(sort_ctx.vm, false, 2)) { + sort_ctx.ex = true; return 0; } + rv = uc_vm_stack_pop(sort_ctx.vm); + ret = !uc_val_is_truish(rv); - json_object_put(rv); + uc_value_put(rv); return ret; } -static struct json_object * -uc_sort(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_sort(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); - struct json_object *fn = json_object_array_get_idx(args, 1); + json_object *arr = uc_get_arg(0); + json_object *fn = uc_get_arg(1); if (!json_object_is_type(arr, json_type_array)) return NULL; - if (fn) { - sort_ctx.s = s; - sort_ctx.off = off; - sort_ctx.fn = fn; - sort_ctx.args = xjs_new_array(); - } + sort_ctx.vm = vm; + sort_ctx.fn = fn; json_object_array_sort(arr, sort_fn); - json_object_put(sort_ctx.args); - return sort_ctx.ex ? sort_ctx.ex : json_object_get(arr); + return sort_ctx.ex ? NULL : uc_value_get(arr); } -static struct json_object * -uc_splice(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_splice(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); - int64_t ofs = uc_cast_int64(json_object_array_get_idx(args, 1)); - int64_t remlen = uc_cast_int64(json_object_array_get_idx(args, 2)); + json_object *arr = uc_get_arg(0); + int64_t ofs = uc_cast_int64(uc_get_arg(1)); + int64_t remlen = uc_cast_int64(uc_get_arg(2)); size_t arrlen, addlen, idx; if (!json_object_is_type(arr, json_type_array)) return NULL; arrlen = json_object_array_length(arr); - addlen = json_object_array_length(args); + addlen = nargs; if (addlen == 1) { ofs = 0; @@ -1068,26 +1024,26 @@ uc_splice(struct uc_state *s, uint32_t off, struct json_object *args) else if (addlen > remlen) { for (idx = arrlen; idx > ofs; idx--) json_object_array_put_idx(arr, idx + addlen - remlen - 1, - json_object_get(json_object_array_get_idx(arr, idx - 1))); + uc_value_get(json_object_array_get_idx(arr, idx - 1))); } for (idx = 0; idx < addlen; idx++) json_object_array_put_idx(arr, ofs + idx, - json_object_get(json_object_array_get_idx(args, 3 + idx))); + uc_value_get(uc_get_arg(3 + idx))); - return json_object_get(arr); + return uc_value_get(arr); } -static struct json_object * -uc_split(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_split(uc_vm *vm, size_t nargs) { - struct json_object *str = json_object_array_get_idx(args, 0); - struct json_object *sep = json_object_array_get_idx(args, 1); - struct json_object *arr = NULL; + json_object *str = uc_get_arg(0); + json_object *sep = uc_get_arg(1); + json_object *arr = NULL; const char *p, *sepstr, *splitstr; int eflags = 0, res; regmatch_t pmatch; - struct uc_op *tag; + uc_regexp *re; size_t seplen; if (!sep || !json_object_is_type(str, json_type_string)) @@ -1096,11 +1052,11 @@ uc_split(struct uc_state *s, uint32_t off, struct json_object *args) arr = xjs_new_array(); splitstr = json_object_get_string(str); - if (uc_is_type(sep, T_REGEXP)) { - tag = json_object_get_userdata(sep); + if (uc_object_is_type(sep, UC_OBJ_REGEXP)) { + re = uc_object_as_regexp(sep); while (true) { - res = regexec((regex_t *)tag->tag.data, splitstr, 1, &pmatch, eflags); + res = regexec(&re->re, splitstr, 1, &pmatch, eflags); if (res == REG_NOMATCH) break; @@ -1130,7 +1086,7 @@ uc_split(struct uc_state *s, uint32_t off, struct json_object *args) json_object_array_add(arr, xjs_new_string_len(splitstr, p - splitstr)); } else { - json_object_put(arr); + uc_value_put(arr); return NULL; } @@ -1138,12 +1094,12 @@ uc_split(struct uc_state *s, uint32_t off, struct json_object *args) return arr; } -static struct json_object * -uc_substr(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_substr(uc_vm *vm, size_t nargs) { - struct json_object *str = json_object_array_get_idx(args, 0); - int64_t ofs = uc_cast_int64(json_object_array_get_idx(args, 1)); - int64_t sublen = uc_cast_int64(json_object_array_get_idx(args, 2)); + json_object *str = uc_get_arg(0); + int64_t ofs = uc_cast_int64(uc_get_arg(1)); + int64_t sublen = uc_cast_int64(uc_get_arg(2)); const char *p; size_t len; @@ -1153,7 +1109,7 @@ uc_substr(struct uc_state *s, uint32_t off, struct json_object *args) p = json_object_get_string(str); len = json_object_get_string_len(str); - switch (json_object_array_length(args)) { + switch (nargs) { case 1: ofs = 0; sublen = len; @@ -1202,20 +1158,20 @@ uc_substr(struct uc_state *s, uint32_t off, struct json_object *args) return xjs_new_string_len(p + ofs, sublen); } -static struct json_object * -uc_time(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_time(uc_vm *vm, size_t nargs) { time_t t = time(NULL); return xjs_new_int64((int64_t)t); } -static struct json_object * -uc_uc(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uc(uc_vm *vm, size_t nargs) { - const char *str = json_object_get_string(json_object_array_get_idx(args, 0)); + const char *str = json_object_get_string(uc_get_arg(0)); size_t len = str ? strlen(str) : 0; - struct json_object *rv = NULL; + json_object *rv = NULL; char *res, *p; if (!str) @@ -1235,17 +1191,16 @@ uc_uc(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_uchr(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uchr(uc_vm *vm, size_t nargs) { - size_t len = json_object_array_length(args); size_t idx, ulen; char *p, *str; int64_t n; int rem; - for (idx = 0, ulen = 0; idx < len; idx++) { - n = uc_cast_int64(json_object_array_get_idx(args, idx)); + for (idx = 0, ulen = 0; idx < nargs; idx++) { + n = uc_cast_int64(uc_get_arg(idx)); if (errno == EINVAL || errno == EOVERFLOW || n < 0 || n > 0x10FFFF) ulen += 3; @@ -1261,8 +1216,8 @@ uc_uchr(struct uc_state *s, uint32_t off, struct json_object *args) str = xalloc(ulen); - for (idx = 0, p = str, rem = ulen; idx < len; idx++) { - n = uc_cast_int64(json_object_array_get_idx(args, idx)); + for (idx = 0, p = str, rem = ulen; idx < nargs; idx++) { + n = uc_cast_int64(uc_get_arg(idx)); if (errno == EINVAL || errno == EOVERFLOW || n < 0 || n > 0x10FFFF) n = 0xFFFD; @@ -1274,11 +1229,11 @@ uc_uchr(struct uc_state *s, uint32_t off, struct json_object *args) return xjs_new_string_len(str, ulen); } -static struct json_object * -uc_values(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_values(uc_vm *vm, size_t nargs) { - struct json_object *obj = json_object_array_get_idx(args, 0); - struct json_object *arr; + json_object *obj = uc_get_arg(0); + json_object *arr; if (!json_object_is_type(obj, json_type_object)) return NULL; @@ -1287,17 +1242,17 @@ uc_values(struct uc_state *s, uint32_t off, struct json_object *args) json_object_object_foreach(obj, key, val) { (void)key; - json_object_array_add(arr, json_object_get(val)); + json_object_array_add(arr, uc_value_get(val)); } return arr; } -static struct json_object * -uc_trim_common(struct uc_state *s, uint32_t off, struct json_object *args, bool start, bool end) +static json_object * +uc_trim_common(uc_vm *vm, size_t nargs, bool start, bool end) { - struct json_object *str = json_object_array_get_idx(args, 0); - struct json_object *chr = json_object_array_get_idx(args, 1); + json_object *str = uc_get_arg(0); + json_object *chr = uc_get_arg(1); const char *p, *c; size_t len; @@ -1333,32 +1288,32 @@ uc_trim_common(struct uc_state *s, uint32_t off, struct json_object *args, bool return xjs_new_string_len(p, len); } -static struct json_object * -uc_trim(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_trim(uc_vm *vm, size_t nargs) { - return uc_trim_common(s, off, args, true, true); + return uc_trim_common(vm, nargs, true, true); } -static struct json_object * -uc_ltrim(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ltrim(uc_vm *vm, size_t nargs) { - return uc_trim_common(s, off, args, true, false); + return uc_trim_common(vm, nargs, true, false); } -static struct json_object * -uc_rtrim(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_rtrim(uc_vm *vm, size_t nargs) { - return uc_trim_common(s, off, args, false, true); + return uc_trim_common(vm, nargs, false, true); } static size_t -uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, char **res) +uc_printf_common(uc_vm *vm, size_t nargs, char **res) { - struct json_object *fmt = json_object_array_get_idx(args, 0); + json_object *fmt = uc_get_arg(0); char *fp, sfmt[sizeof("%0- 123456789.123456789%")]; union { const char *s; int64_t n; double d; } arg; - size_t len = 0, arglen, argidx; const char *fstr, *last, *p; + size_t len = 0, argidx = 1; enum json_type t; *res = NULL; @@ -1368,9 +1323,6 @@ uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, cha else fstr = ""; - arglen = json_object_array_length(args); - argidx = 1; - for (last = p = fstr; *p; p++) { if (*p == '%') { snprintf_append(res, &len, "%s", p - last, last); @@ -1447,8 +1399,8 @@ uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, cha case 'X': t = json_type_int; - if (argidx < arglen) - arg.n = uc_cast_int64(json_object_array_get_idx(args, argidx++)); + if (argidx < nargs) + arg.n = uc_cast_int64(uc_get_arg(argidx++)); else arg.n = 0; @@ -1462,8 +1414,8 @@ uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, cha case 'G': t = json_type_double; - if (argidx < arglen) - arg.d = uc_cast_double(json_object_array_get_idx(args, argidx++)); + if (argidx < nargs) + arg.d = uc_cast_double(uc_get_arg(argidx++)); else arg.d = 0; @@ -1472,8 +1424,8 @@ uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, cha case 'c': t = json_type_int; - if (argidx < arglen) - arg.n = uc_cast_int64(json_object_array_get_idx(args, argidx++)) & 0xff; + if (argidx < nargs) + arg.n = uc_cast_int64(uc_get_arg(argidx++)) & 0xff; else arg.n = 0; @@ -1482,8 +1434,8 @@ uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, cha case 's': t = json_type_string; - if (argidx < arglen) - arg.s = json_object_get_string(json_object_array_get_idx(args, argidx++)); + if (argidx < nargs) + arg.s = json_object_get_string(uc_get_arg(argidx++)); else arg.s = NULL; @@ -1494,9 +1446,9 @@ uc_printf_common(struct uc_state *s, uint32_t off, struct json_object *args, cha case 'J': t = json_type_string; - if (argidx < arglen) + if (argidx < nargs) arg.s = json_object_to_json_string_ext( - json_object_array_get_idx(args, argidx++), + uc_get_arg(argidx++), JSON_C_TO_STRING_SPACED|JSON_C_TO_STRING_NOSLASHESCAPE|JSON_C_TO_STRING_STRICT); else arg.s = NULL; @@ -1543,14 +1495,14 @@ next: return len; } -static struct json_object * -uc_sprintf(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_sprintf(uc_vm *vm, size_t nargs) { - struct json_object *rv; + json_object *rv; char *str = NULL; size_t len; - len = uc_printf_common(s, off, args, &str); + len = uc_printf_common(vm, nargs, &str); rv = xjs_new_string_len(str, len); free(str); @@ -1558,13 +1510,13 @@ uc_sprintf(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_printf(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_printf(uc_vm *vm, size_t nargs) { char *str = NULL; size_t len; - len = uc_printf_common(s, off, args, &str); + len = uc_printf_common(vm, nargs, &str); len = fwrite(str, 1, len, stdout); free(str); @@ -1572,121 +1524,108 @@ uc_printf(struct uc_state *s, uint32_t off, struct json_object *args) return xjs_new_int64(len); } -static struct json_object * -uc_require_so(struct uc_state *state, uint32_t off, const char *path) +static bool +uc_require_so(uc_vm *vm, const char *path, json_object **res) { - void (*init)(const struct uc_ops *, struct uc_state *, struct json_object *); - struct uc_function fn = {}, *prev_fn; - struct uc_source *src, *prev_src; - struct json_object *scope; + void (*init)(const uc_ops *, uc_prototype *); + uc_prototype *scope; struct stat st; void *dlh; if (stat(path, &st)) - return NULL; + return false; dlerror(); dlh = dlopen(path, RTLD_LAZY|RTLD_LOCAL); - if (!dlh) - return uc_new_exception(state, OP_POS(off), - "Unable to dlopen file %s: %s", path, dlerror()); - - init = dlsym(dlh, "uc_module_init"); + if (!dlh) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Unable to dlopen file '%s': %s", path, dlerror()); - if (!init) - return uc_new_exception(state, OP_POS(off), - "Module %s provides no 'uc_module_init' function", path); - - src = xalloc(sizeof(*src)); - src->filename = xstrdup(path); - src->next = state->sources; + return true; + } - fn.name = "require"; - fn.source = src; + init = dlsym(dlh, "uc_module_entry"); - prev_fn = state->function; - state->function = &fn; + if (!init) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Module '%s' provides no 'uc_module_entry' function", path); - prev_src = state->source; - state->source = state->sources = src; + return true; + } - scope = xjs_new_object(); + scope = uc_prototype_new(NULL); - init(&ut, state, scope); + init(&uc, scope); - state->source = prev_src; - state->function = prev_fn; + *res = scope->header.jso; - return scope; + return true; } -struct json_object * -uc_execute_source(struct uc_state *s, struct uc_source *src, struct uc_scope *scope) +static bool +uc_require_ucode(uc_vm *vm, const char *path, uc_prototype *scope, json_object **res) { - struct json_object *entry, *rv; + uc_exception_type_t extype; + uc_prototype *prev_scope; + uc_function *function; + uc_closure *closure; + uc_source *source; + struct stat st; + char *err; - rv = uc_parse(s, src->fp); + if (stat(path, &st)) + return false; - if (!uc_is_type(rv, T_EXCEPTION)) { - entry = uc_new_func(s, s->main, scope ? scope : s->scope); + source = uc_source_new_file(path); - json_object_put(rv); - rv = uc_invoke(s, s->main, NULL, entry, NULL); + if (!source) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Unable to open file '%s': %s", path, strerror(errno)); - json_object_put(entry); + return true; } - return rv; -} + function = uc_compile(vm->config, source, &err); -static struct json_object * -uc_require_utpl(struct uc_state *state, uint32_t off, const char *path, struct uc_scope *scope) -{ - struct uc_function fn = {}, *prev_fn; - struct uc_source *src, *prev_src; - struct json_object *rv; - struct stat st; - FILE *fp; + if (!function) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Unable to compile module '%s':\n%s", path, err); - if (stat(path, &st)) - return NULL; + uc_source_put(source); + free(err); - fp = fopen(path, "rb"); + return true; + } - if (!fp) - return uc_new_exception(state, OP_POS(off), - "Unable to open file %s: %s", path, strerror(errno)); + closure = uc_closure_new(function, false); - src = xalloc(sizeof(*src)); - src->fp = fp; - src->filename = path ? xstrdup(path) : NULL; - src->next = state->sources; + uc_vm_stack_push(vm, closure->header.jso); - prev_src = state->source; - state->source = state->sources = src; + prev_scope = vm->globals; + vm->globals = scope ? scope : prev_scope; - fn.name = "require"; - fn.source = src; + extype = uc_vm_call(vm, false, 0); - prev_fn = state->function; - state->function = &fn; + vm->globals = prev_scope; - rv = uc_execute_source(state, src, scope); + if (extype == EXCEPTION_NONE) + *res = uc_vm_stack_pop(vm); - state->function = prev_fn; - state->source = prev_src; + uc_source_put(source); - return rv; + return true; } -static struct json_object * -uc_require_path(struct uc_state *s, uint32_t off, const char *path_template, const char *name) +static bool +uc_require_path(uc_vm *vm, const char *path_template, const char *name, json_object **res) { - struct json_object *rv = NULL; const char *p, *q, *last; char *path = NULL; size_t plen = 0; + bool rv = false; + + *res = NULL; p = strchr(path_template, '*'); @@ -1711,9 +1650,9 @@ uc_require_path(struct uc_state *s, uint32_t off, const char *path_template, con } if (!strcmp(p, ".so")) - rv = uc_require_so(s, off, path); + rv = uc_require_so(vm, path, res); else if (!strcmp(p, ".uc")) - rv = uc_require_utpl(s, off, path, NULL); + rv = uc_require_ucode(vm, path, NULL, res); invalid: free(path); @@ -1721,56 +1660,49 @@ invalid: return rv; } -static struct json_object * -uc_require(struct uc_state *state, uint32_t off, struct json_object *args) +static json_object * +uc_require(uc_vm *vm, size_t nargs) { - struct json_object *val = json_object_array_get_idx(args, 0); - struct json_object *search, *se, *res; - struct uc_scope *sc, *scparent; + const char *name = json_object_get_string(uc_get_arg(0)); + + json_object *val = uc_get_arg(0); + json_object *search, *se, *res; size_t arridx, arrlen; - const char *name; if (!json_object_is_type(val, json_type_string)) return NULL; - /* find root scope */ - for (sc = state->scope; sc; ) { - scparent = uc_parent_scope(sc); + name = json_object_get_string(val); + search = vm->globals ? json_object_object_get(vm->globals->header.jso, "REQUIRE_SEARCH_PATH") : NULL; - if (!scparent) - break; + if (!json_object_is_type(search, json_type_array)) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Global require search path not set"); - sc = scparent; + return NULL; } - name = json_object_get_string(val); - search = sc ? json_object_object_get(sc->scope, "REQUIRE_SEARCH_PATH") : NULL; - - if (!json_object_is_type(search, json_type_array)) - return uc_new_exception(state, off ? OP_POS(off) : 0, - "Global require search path not set"); - for (arridx = 0, arrlen = json_object_array_length(search); arridx < arrlen; arridx++) { se = json_object_array_get_idx(search, arridx); if (!json_object_is_type(se, json_type_string)) continue; - res = uc_require_path(state, off, json_object_get_string(se), name); - - if (res) + if (uc_require_path(vm, json_object_get_string(se), name, &res)) return res; } - return uc_new_exception(state, off ? OP_POS(off) : 0, - "No module named '%s' could be found", name); + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "No module named '%s' could be found", name); + + return NULL; } -static struct json_object * -uc_iptoarr(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_iptoarr(uc_vm *vm, size_t nargs) { - struct json_object *ip = json_object_array_get_idx(args, 0); - struct json_object *res; + json_object *ip = uc_get_arg(0); + json_object *res; union { uint8_t u8[4]; struct in_addr in; @@ -1804,7 +1736,7 @@ uc_iptoarr(struct uc_state *s, uint32_t off, struct json_object *args) } static int -check_byte(struct json_object *v) +check_byte(json_object *v) { int n; @@ -1819,10 +1751,10 @@ check_byte(struct json_object *v) return n; } -static struct json_object * -uc_arrtoip(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_arrtoip(uc_vm *vm, size_t nargs) { - struct json_object *arr = json_object_array_get_idx(args, 0); + json_object *arr = uc_get_arg(0); union { uint8_t u8[4]; struct in6_addr in6; @@ -1867,24 +1799,25 @@ uc_arrtoip(struct uc_state *s, uint32_t off, struct json_object *args) } } -static struct json_object * -uc_match(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_match(uc_vm *vm, size_t nargs) { - struct json_object *subject = json_object_array_get_idx(args, 0); - struct json_object *pattern = json_object_array_get_idx(args, 1); - struct uc_op *tag = json_object_get_userdata(pattern); - struct json_object *rv = NULL, *m; + json_object *subject = uc_get_arg(0); + json_object *pattern = uc_get_arg(1); + json_object *rv = NULL, *m; int eflags = 0, res, i; regmatch_t pmatch[10]; + uc_regexp *re; const char *p; - if (!uc_is_type(pattern, T_REGEXP) || !subject) + if (!uc_object_is_type(pattern, UC_OBJ_REGEXP) || !subject) return NULL; p = json_object_get_string(subject); + re = uc_object_as_regexp(pattern); while (true) { - res = regexec((regex_t *)tag->tag.data, p, ARRAY_SIZE(pmatch), pmatch, eflags); + res = regexec(&re->re, p, ARRAY_SIZE(pmatch), pmatch, eflags); if (res == REG_NOMATCH) break; @@ -1897,7 +1830,7 @@ uc_match(struct uc_state *s, uint32_t off, struct json_object *args) pmatch[i].rm_eo - pmatch[i].rm_so)); } - if (tag->is_reg_global) { + if (re->global) { if (!rv) rv = xjs_new_array(); @@ -1915,39 +1848,37 @@ uc_match(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_replace_cb(struct uc_state *s, uint32_t off, struct json_object *func, +static json_object * +uc_replace_cb(uc_vm *vm, json_object *func, const char *subject, regmatch_t *pmatch, size_t plen, char **sp, size_t *sl) { - struct json_object *cbargs = xjs_new_array(); - struct json_object *rv; + json_object *rv; size_t i; + /* XXX: revisit leaks */ + uc_vm_stack_push(vm, uc_value_get(func)); + for (i = 0; i < plen && pmatch[i].rm_so != -1; i++) { - json_object_array_add(cbargs, + uc_vm_stack_push(vm, xjs_new_string_len(subject + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so)); } - rv = uc_invoke(s, off, NULL, func, cbargs); - - if (uc_is_type(rv, T_EXCEPTION)) { - json_object_put(cbargs); + if (uc_vm_call(vm, false, i)) + return NULL; - return rv; - } + rv = uc_vm_stack_pop(vm); sprintf_append(sp, sl, "%s", rv ? json_object_get_string(rv) : "null"); - json_object_put(cbargs); - json_object_put(rv); + uc_value_put(rv); return NULL; } static void -uc_replace_str(struct uc_state *s, uint32_t off, struct json_object *str, +uc_replace_str(uc_vm *vm, json_object *str, const char *subject, regmatch_t *pmatch, size_t plen, char **sp, size_t *sl) { @@ -2011,36 +1942,37 @@ uc_replace_str(struct uc_state *s, uint32_t off, struct json_object *str, } } -static struct json_object * -uc_replace(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_replace(uc_vm *vm, size_t nargs) { - struct json_object *subject = json_object_array_get_idx(args, 0); - struct json_object *pattern = json_object_array_get_idx(args, 1); - struct json_object *replace = json_object_array_get_idx(args, 2); - struct uc_op *tag = json_object_get_userdata(pattern); - struct json_object *rv = NULL; + json_object *subject = uc_get_arg(0); + json_object *pattern = uc_get_arg(1); + json_object *replace = uc_get_arg(2); + json_object *rv = NULL; const char *sb, *p, *l; regmatch_t pmatch[10]; int eflags = 0, res; size_t sl = 0, pl; char *sp = NULL; + uc_regexp *re; if (!pattern || !subject || !replace) return NULL; - if (uc_is_type(pattern, T_REGEXP)) { + if (uc_object_is_type(pattern, UC_OBJ_REGEXP)) { p = json_object_get_string(subject); + re = uc_object_as_regexp(pattern); while (true) { - res = regexec((regex_t *)tag->tag.data, p, ARRAY_SIZE(pmatch), pmatch, eflags); + res = regexec(&re->re, p, ARRAY_SIZE(pmatch), pmatch, eflags); if (res == REG_NOMATCH) break; snprintf_append(&sp, &sl, "%s", pmatch[0].rm_so, p); - if (uc_is_type(replace, T_FUNC) || uc_is_type(replace, T_CFUNC)) { - rv = uc_replace_cb(s, off, replace, p, pmatch, ARRAY_SIZE(pmatch), &sp, &sl); + if (uc_object_is_callable(replace)) { + rv = uc_replace_cb(vm, replace, p, pmatch, ARRAY_SIZE(pmatch), &sp, &sl); if (rv) { free(sp); @@ -2049,12 +1981,12 @@ uc_replace(struct uc_state *s, uint32_t off, struct json_object *args) } } else { - uc_replace_str(s, off, replace, p, pmatch, ARRAY_SIZE(pmatch), &sp, &sl); + uc_replace_str(vm, replace, p, pmatch, ARRAY_SIZE(pmatch), &sp, &sl); } p += pmatch[0].rm_eo; - if (tag->is_reg_global) + if (re->global) eflags |= REG_NOTBOL; else break; @@ -2074,8 +2006,8 @@ uc_replace(struct uc_state *s, uint32_t off, struct json_object *args) pmatch[0].rm_so = sb - l; pmatch[0].rm_eo = pmatch[0].rm_so + pl; - if (uc_is_type(replace, T_FUNC) || uc_is_type(replace, T_CFUNC)) { - rv = uc_replace_cb(s, off, replace, l, pmatch, 1, &sp, &sl); + if (uc_object_is_callable(replace)) { + rv = uc_replace_cb(vm, replace, l, pmatch, 1, &sp, &sl); if (rv) { free(sp); @@ -2084,7 +2016,7 @@ uc_replace(struct uc_state *s, uint32_t off, struct json_object *args) } } else { - uc_replace_str(s, off, replace, l, pmatch, 1, &sp, &sl); + uc_replace_str(vm, replace, l, pmatch, 1, &sp, &sl); } l = sb + pl; @@ -2101,18 +2033,21 @@ uc_replace(struct uc_state *s, uint32_t off, struct json_object *args) return rv; } -static struct json_object * -uc_json(struct uc_state *state, uint32_t off, struct json_object *args) +static json_object * +uc_json(uc_vm *vm, size_t nargs) { - struct json_object *rv, *src = json_object_array_get_idx(args, 0); + json_object *rv, *src = uc_get_arg(0); struct json_tokener *tok = NULL; enum json_tokener_error err; const char *str; size_t len; - if (!json_object_is_type(src, json_type_string)) - return uc_new_exception(state, OP_POS(off), - "Passed value is not a string"); + if (!json_object_is_type(src, json_type_string)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed value is not a string"); + + return NULL; + } tok = xjs_new_tokener(); str = json_object_get_string(src); @@ -2122,20 +2057,26 @@ uc_json(struct uc_state *state, uint32_t off, struct json_object *args) err = json_tokener_get_error(tok); if (err == json_tokener_continue) { - json_object_put(rv); - rv = uc_new_exception(state, OP_POS(off), + uc_value_put(rv); + uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, "Unexpected end of string in JSON data"); + + return NULL; } else if (err != json_tokener_success) { - json_object_put(rv); - rv = uc_new_exception(state, OP_POS(off), + uc_value_put(rv); + uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, "Failed to parse JSON string: %s", json_tokener_error_desc(err)); + + return NULL; } else if (json_tokener_get_parse_end(tok) < len) { - json_object_put(rv); - rv = uc_new_exception(state, OP_POS(off), + uc_value_put(rv); + uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, "Trailing garbage after JSON data"); + + return NULL; } json_tokener_free(tok); @@ -2176,64 +2117,83 @@ include_path(const char *curpath, const char *incpath) return dup; } -static struct json_object * -uc_include(struct uc_state *state, uint32_t off, struct json_object *args) +static json_object * +uc_include(uc_vm *vm, size_t nargs) { - struct json_object *rv, *path = json_object_array_get_idx(args, 0); - struct json_object *scope = json_object_array_get_idx(args, 1); - struct uc_scope *sc; + json_object *path = uc_get_arg(0); + json_object *scope = uc_get_arg(1); + json_object *rv = NULL; + uc_closure *closure = NULL; + uc_prototype *sc; + size_t i; char *p; - if (!json_object_is_type(path, json_type_string)) - return uc_new_exception(state, OP_POS(off), - "Passed filename is not a string"); + if (!json_object_is_type(path, json_type_string)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed filename is not a string"); - if (scope && !json_object_is_type(scope, json_type_object)) - return uc_new_exception(state, OP_POS(off), - "Passed scope value is not an object"); + return NULL; + } - p = include_path(state->callstack->function->source->filename, json_object_get_string(path)); + if (scope && !json_object_is_type(scope, json_type_object)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed scope value is not an object"); - if (!p) - return uc_new_exception(state, OP_POS(off), - "Include file not found"); + return NULL; + } + + /* find calling closure */ + for (i = vm->callframes.count; i > 0; i--) { + closure = vm->callframes.entries[i - 1].closure; + + if (closure) + break; + } + + if (!closure) + return NULL; + + p = include_path(closure->function->source->filename, json_object_get_string(path)); + + if (!p) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "Include file not found"); + + return NULL; + } if (scope) { - sc = uc_new_scope(state, NULL); + sc = uc_prototype_new(NULL); json_object_object_foreach(scope, key, val) - json_object_object_add(sc->scope, key, json_object_get(val)); + json_object_object_add(sc->header.jso, key, uc_value_get(val)); } else { - sc = state->scope; + sc = vm->globals; } - rv = uc_require_utpl(state, off, p, sc); + if (uc_require_ucode(vm, p, sc, &rv)) + uc_value_put(rv); free(p); if (scope) - json_object_put(sc->scope); - - if (uc_is_type(rv, T_EXCEPTION)) - return rv; - - json_object_put(rv); + uc_value_put(sc->header.jso); return NULL; } -static struct json_object * -uc_warn(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_warn(uc_vm *vm, size_t nargs) { - return uc_print_common(s, off, args, stderr); + return uc_print_common(vm, nargs, stderr); } -static struct json_object * -uc_system(struct uc_state *state, uint32_t off, struct json_object *args) +static json_object * +uc_system(uc_vm *vm, size_t nargs) { - struct json_object *cmdline = json_object_array_get_idx(args, 0); - struct json_object *timeout = json_object_array_get_idx(args, 1); + json_object *cmdline = uc_get_arg(0); + json_object *timeout = uc_get_arg(1); sigset_t sigmask, sigomask; const char **arglist, *fn; struct timespec ts; @@ -2262,13 +2222,18 @@ uc_system(struct uc_state *state, uint32_t off, struct json_object *args) break; default: - return uc_new_exception(state, OP_POS(off), - "Passed command is neither string nor array"); + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Passed command is neither string nor array"); + + return NULL; } - if (timeout && (!json_object_is_type(timeout, json_type_int) || json_object_get_int64(timeout) < 0)) - return uc_new_exception(state, OP_POS(off), - "Invalid timeout specified"); + if (timeout && (!json_object_is_type(timeout, json_type_int) || json_object_get_int64(timeout) < 0)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Invalid timeout specified"); + + return NULL; + } tms = timeout ? json_object_get_int64(timeout) : 0; @@ -2339,22 +2304,31 @@ fail: sigprocmask(SIG_SETMASK, &sigomask, NULL); free(arglist); - return uc_new_exception(state, OP_POS(off), - "%s(): %s", fn, strerror(errno)); + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "%s(): %s", fn, strerror(errno)); + + return NULL; } -const struct uc_ops ut = { - .register_function = uc_register_function, - .register_type = uc_register_extended_type, - .set_type = uc_set_extended_type, - .get_type = uc_get_extended_type, - .new_object = uc_new_object, - .new_double = uc_new_double, - .invoke = uc_invoke, - .cast_number = uc_cast_number, -}; +static json_object * +uc_trace(uc_vm *vm, size_t nargs) +{ + json_object *level = uc_get_arg(0); + uint8_t prev_level; -static const struct { const char *name; uc_c_fn *func; } functions[] = { + if (!json_object_is_type(level, json_type_int)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, "Invalid level specified"); + + return NULL; + } + + prev_level = vm->trace; + vm->trace = json_object_get_int64(level); + + return xjs_new_int64(prev_level); +} + +static const uc_cfunction_list functions[] = { { "chr", uc_chr }, { "delete", uc_delete }, { "die", uc_die }, @@ -2401,14 +2375,12 @@ static const struct { const char *name; uc_c_fn *func; } functions[] = { { "include", uc_include }, { "warn", uc_warn }, { "system", uc_system }, + { "trace", uc_trace }, }; void -uc_lib_init(struct uc_state *state, struct json_object *scope) +uc_lib_init(uc_prototype *scope) { - int i; - - for (i = 0; i < sizeof(functions) / sizeof(functions[0]); i++) - uc_register_function(state, scope, functions[i].name, functions[i].func); + uc_add_proto_functions(scope, functions); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,17 +17,187 @@ #ifndef __LIB_H_ #define __LIB_H_ -#include "ast.h" +#include "vm.h" #include "lexer.h" +#include "object.h" -typedef struct json_object *(uc_c_fn)(struct uc_state *, uint32_t, struct json_object *); +typedef struct { + const char *name; + uc_cfn_ptr func; +} uc_cfunction_list; -void uc_lib_init(struct uc_state *state, struct json_object *scope); +typedef struct { + /* value operations */ + struct { + uc_prototype *(*proto)(uc_prototype *); + uc_cfunction *(*cfunc)(const char *, uc_cfn_ptr); + json_object *(*dbl)(double); + uc_regexp *(*regexp)(const char *, bool, bool, bool, char **); + uc_ressource *(*ressource)(json_object *, uc_ressource_type *, void *); + enum json_type (*tonumber)(json_object *, int64_t *, double *); + } value; -struct json_object *uc_execute_source(struct uc_state *s, struct uc_source *src, struct uc_scope *scope); + /* ressource operations */ + struct { + uc_ressource_type *(*define)(const char *, uc_prototype *, void (*)(void *)); + uc_ressource *(*create)(json_object *, uc_ressource_type *, void *); + void **(*data)(json_object *, const char *); + uc_prototype *(*proto)(json_object *); + } ressource; -struct json_object *uc_parse_error(struct uc_state *s, uint32_t off, uint64_t *tokens, int max_token); + /* VM operations */ + struct { + uc_exception_type_t (*call)(uc_vm *, bool, size_t); + json_object *(*peek)(uc_vm *, size_t); + json_object *(*pop)(uc_vm *); + void (*push)(uc_vm *, json_object *); + void (*raise)(uc_vm *, uc_exception_type_t, const char *, ...); + } vm; +} uc_ops; -char *uc_format_error(struct uc_state *state, FILE *fp); +extern const uc_ops uc; + +void uc_lib_init(uc_prototype *scope); + +void format_source_context(char **msg, size_t *msglen, uc_source *src, size_t off, bool compact); +void format_error_context(char **msg, size_t *msglen, uc_source *src, json_object *stacktrace, size_t off); + + +/* vm helper */ + +static inline void * +_uc_get_self(const uc_ops *ops, uc_vm *vm, const char *expected_type) +{ + return ops->ressource.data(vm->callframes.entries[vm->callframes.count - 1].ctx, expected_type); +} + +#define uc_get_self(...) _uc_get_self(ops, vm, __VA_ARGS__) + +static inline json_object * +_uc_get_arg(const uc_ops *ops, uc_vm *vm, size_t nargs, size_t n) +{ + if (n >= nargs) + return NULL; + + return ops->vm.peek(vm, nargs - n - 1); +} + +#define uc_get_arg(...) _uc_get_arg(ops, vm, nargs, __VA_ARGS__) + +#define uc_call(nargs) ops->vm.call(vm, false, nargs) +#define uc_push_val(val) ops->vm.push(vm, val) +#define uc_pop_val() ops->vm.pop(vm) + + +/* value helper */ + +static inline json_object * +_uc_alloc_proto(const uc_ops *ops, uc_prototype *parent) +{ + return ops->value.proto(parent)->header.jso; +} + +static inline json_object * +_uc_alloc_cfunc(const uc_ops *ops, const char *name, uc_cfn_ptr fptr) +{ + return ops->value.cfunc(name, fptr)->header.jso; +} + +static inline json_object * +_uc_alloc_double(const uc_ops *ops, double dbl) +{ + return ops->value.dbl(dbl); +} + +static inline json_object * +_uc_alloc_regexp(const uc_ops *ops, const char *pattern, bool global, bool icase, bool newline, char **errp) +{ + uc_regexp *re = ops->value.regexp(pattern, global, icase, newline, errp); + + return re ? re->header.jso : NULL; +} + +static inline json_object * +_uc_alloc_ressource(const uc_ops *ops, uc_ressource_type *type, void *data) +{ + uc_ressource *res = ops->value.ressource(xjs_new_object(), type, data); + + return res ? res->header.jso : NULL; +} + +#define uc_alloc_proto(...) _uc_alloc_proto(ops, __VA_ARGS__) +#define uc_alloc_cfunc(...) _uc_alloc_cfunc(ops, __VA_ARGS__) +#define uc_alloc_double(...) _uc_alloc_double(ops, __VA_ARGS__) +#define uc_alloc_regexp(...) _uc_alloc_regexp(ops, __VA_ARGS__) +#define uc_alloc_ressource(...) _uc_alloc_ressource(ops, __VA_ARGS__) + +static inline json_type +_uc_to_number(const uc_ops *ops, json_object *v, int64_t *n, double *d) +{ + return ops->value.tonumber(v, n, d); +} + +static inline double +_uc_to_double(const uc_ops *ops, json_object *v) +{ + int64_t n; + double d; + + return (ops->value.tonumber(v, &n, &d) == json_type_double) ? d : (double)n; +} + +static inline int64_t +_uc_to_int64(const uc_ops *ops, json_object *v) +{ + int64_t n; + double d; + + return (ops->value.tonumber(v, &n, &d) == json_type_double) ? (int64_t)d : n; +} + +#define uc_to_number(...) _uc_to_number(ops, __VA_ARGS__) +#define uc_to_double(...) _uc_to_double(ops, __VA_ARGS__) +#define uc_to_int64(...) _uc_to_int64(ops, __VA_ARGS__) + + +/* ressource type helper */ + +static inline uc_ressource_type * +_uc_declare_type(const uc_ops *ops, const char *name, const uc_cfunction_list *list, size_t len, void (*freefn)(void *)) +{ + uc_prototype *proto = ops->value.proto(NULL); + + while (len-- > 0) + json_object_object_add(proto->header.jso, list[len].name, + _uc_alloc_cfunc(ops, list[len].name, list[len].func)); + + return ops->ressource.define(name, proto, freefn); +} + +#define uc_declare_type(name, functions, freefn) \ + _uc_declare_type(ops, name, functions, ARRAY_SIZE(functions), freefn) + + +/* prototype helper */ + +static inline bool +uc_add_proto_val(uc_prototype *proto, const char *key, json_object *value) +{ + if (!proto) + return false; + + return json_object_object_add(proto->header.jso, key, value); +} + +static inline void +_uc_add_proto_functions(const uc_ops *ops, uc_prototype *proto, const uc_cfunction_list *list, size_t len) +{ + while (len-- > 0) + json_object_object_add(proto->header.jso, list[len].name, + _uc_alloc_cfunc(ops, list[len].name, list[len].func)); +} + +#define uc_add_proto_functions(proto, functions) \ + _uc_add_proto_functions(ops, proto, functions, ARRAY_SIZE(functions)) #endif /* __LIB_H_ */ @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,8 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "../module.h" - #include <stdio.h> #include <errno.h> #include <string.h> @@ -25,16 +23,19 @@ #include <sys/types.h> #include <sys/sysmacros.h> +#include "../module.h" + #define err_return(err) do { last_error = err; return NULL; } while(0) -static const struct uc_ops *ops; +//static const uc_ops *ops; +static uc_ressource_type *file_type, *proc_type, *dir_type; static int last_error = 0; -static struct json_object * -uc_fs_error(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_error(uc_vm *vm, size_t nargs) { - struct json_object *errmsg; + json_object *errmsg; if (last_error == 0) return NULL; @@ -45,17 +46,17 @@ uc_fs_error(struct uc_state *s, uint32_t off, struct json_object *args) return errmsg; } -static struct json_object * -uc_fs_read_common(struct uc_state *s, uint32_t off, struct json_object *args, const char *type) +static json_object * +uc_fs_read_common(uc_vm *vm, size_t nargs, const char *type) { - struct json_object *limit = json_object_array_get_idx(args, 0); - struct json_object *rv = NULL; + json_object *limit = uc_get_arg(0); + json_object *rv = NULL; char buf[128], *p = NULL, *tmp; size_t rlen, len = 0; const char *lstr; int64_t lsize; - FILE **fp = (FILE **)ops->get_type(s->ctx, type); + FILE **fp = uc_get_self(type); if (!fp || !*fp) err_return(EBADF); @@ -137,14 +138,14 @@ uc_fs_read_common(struct uc_state *s, uint32_t off, struct json_object *args, co return rv; } -static struct json_object * -uc_fs_write_common(struct uc_state *s, uint32_t off, struct json_object *args, const char *type) +static json_object * +uc_fs_write_common(uc_vm *vm, size_t nargs, const char *type) { - struct json_object *data = json_object_array_get_idx(args, 0); + json_object *data = uc_get_arg(0); size_t len, wsize; const char *str; - FILE **fp = (FILE **)ops->get_type(s->ctx, type); + FILE **fp = uc_get_self(type); if (!fp || !*fp) err_return(EBADF); @@ -167,10 +168,10 @@ uc_fs_write_common(struct uc_state *s, uint32_t off, struct json_object *args, c } -static struct json_object * -uc_fs_pclose(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_pclose(uc_vm *vm, size_t nargs) { - FILE **fp = (FILE **)ops->get_type(s->ctx, "fs.proc"); + FILE **fp = uc_get_self("fs.proc"); int rc; if (!fp || !*fp) @@ -191,24 +192,23 @@ uc_fs_pclose(struct uc_state *s, uint32_t off, struct json_object *args) return xjs_new_int64(0); } -static struct json_object * -uc_fs_pread(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_pread(uc_vm *vm, size_t nargs) { - return uc_fs_read_common(s, off, args, "fs.proc"); + return uc_fs_read_common(vm, nargs, "fs.proc"); } -static struct json_object * -uc_fs_pwrite(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_pwrite(uc_vm *vm, size_t nargs) { - return uc_fs_write_common(s, off, args, "fs.proc"); + return uc_fs_write_common(vm, nargs, "fs.proc"); } -static struct json_object * -uc_fs_popen(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_popen(uc_vm *vm, size_t nargs) { - struct json_object *comm = json_object_array_get_idx(args, 0); - struct json_object *mode = json_object_array_get_idx(args, 1); - struct json_object *fo; + json_object *comm = uc_get_arg(0); + json_object *mode = uc_get_arg(1); FILE *fp; if (!json_object_is_type(comm, json_type_string)) @@ -220,21 +220,14 @@ uc_fs_popen(struct uc_state *s, uint32_t off, struct json_object *args) if (!fp) err_return(errno); - fo = json_object_new_object(); - - if (!fo) { - pclose(fp); - err_return(ENOMEM); - } - - return ops->set_type(fo, "fs.proc", fp); + return uc_alloc_ressource(proc_type, fp); } -static struct json_object * -uc_fs_close(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_close(uc_vm *vm, size_t nargs) { - FILE **fp = (FILE **)ops->get_type(s->ctx, "fs.file"); + FILE **fp = uc_get_self("fs.file"); if (!fp || !*fp) err_return(EBADF); @@ -245,27 +238,27 @@ uc_fs_close(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_read(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_read(uc_vm *vm, size_t nargs) { - return uc_fs_read_common(s, off, args, "fs.file"); + return uc_fs_read_common(vm, nargs, "fs.file"); } -static struct json_object * -uc_fs_write(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_write(uc_vm *vm, size_t nargs) { - return uc_fs_write_common(s, off, args, "fs.file"); + return uc_fs_write_common(vm, nargs, "fs.file"); } -static struct json_object * -uc_fs_seek(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_seek(uc_vm *vm, size_t nargs) { - struct json_object *ofs = json_object_array_get_idx(args, 0); - struct json_object *how = json_object_array_get_idx(args, 1); + json_object *ofs = uc_get_arg(0); + json_object *how = uc_get_arg(1); int whence, res; long offset; - FILE **fp = (FILE **)ops->get_type(s->ctx, "fs.file"); + FILE **fp = uc_get_self("fs.file"); if (!fp || !*fp) err_return(EBADF); @@ -292,12 +285,12 @@ uc_fs_seek(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_tell(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_tell(uc_vm *vm, size_t nargs) { long offset; - FILE **fp = (FILE **)ops->get_type(s->ctx, "fs.file"); + FILE **fp = uc_get_self("fs.file"); if (!fp || !*fp) err_return(EBADF); @@ -310,12 +303,11 @@ uc_fs_tell(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_int64(offset); } -static struct json_object * -uc_fs_open(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_open(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); - struct json_object *mode = json_object_array_get_idx(args, 1); - struct json_object *fo; + json_object *path = uc_get_arg(0); + json_object *mode = uc_get_arg(1); FILE *fp; if (!json_object_is_type(path, json_type_string)) @@ -327,21 +319,14 @@ uc_fs_open(struct uc_state *s, uint32_t off, struct json_object *args) if (!fp) err_return(errno); - fo = json_object_new_object(); - - if (!fo) { - fclose(fp); - err_return(ENOMEM); - } - - return ops->set_type(fo, "fs.file", fp); + return uc_alloc_ressource(file_type, fp); } -static struct json_object * -uc_fs_readdir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_readdir(uc_vm *vm, size_t nargs) { - DIR **dp = (DIR **)ops->get_type(s->ctx, "fs.dir"); + DIR **dp = uc_get_self("fs.dir"); struct dirent *e; if (!dp || !*dp) @@ -356,10 +341,10 @@ uc_fs_readdir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_string(e->d_name); } -static struct json_object * -uc_fs_telldir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_telldir(uc_vm *vm, size_t nargs) { - DIR **dp = (DIR **)ops->get_type(s->ctx, "fs.dir"); + DIR **dp = uc_get_self("fs.dir"); long position; if (!dp || !*dp) @@ -373,11 +358,11 @@ uc_fs_telldir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_int64((int64_t)position); } -static struct json_object * -uc_fs_seekdir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_seekdir(uc_vm *vm, size_t nargs) { - struct json_object *ofs = json_object_array_get_idx(args, 0); - DIR **dp = (DIR **)ops->get_type(s->ctx, "fs.dir"); + json_object *ofs = uc_get_arg(0); + DIR **dp = uc_get_self("fs.dir"); long position; if (!json_object_is_type(ofs, json_type_int)) @@ -393,10 +378,10 @@ uc_fs_seekdir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_closedir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_closedir(uc_vm *vm, size_t nargs) { - DIR **dp = (DIR **)ops->get_type(s->ctx, "fs.dir"); + DIR **dp = uc_get_self("fs.dir"); if (!dp || !*dp) err_return(EBADF); @@ -407,11 +392,10 @@ uc_fs_closedir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_opendir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_opendir(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); - struct json_object *diro; + json_object *path = uc_get_arg(0); DIR *dp; if (!json_object_is_type(path, json_type_string)) @@ -422,21 +406,14 @@ uc_fs_opendir(struct uc_state *s, uint32_t off, struct json_object *args) if (!dp) err_return(errno); - diro = json_object_new_object(); - - if (!diro) { - closedir(dp); - err_return(ENOMEM); - } - - return ops->set_type(diro, "fs.dir", dp); + return uc_alloc_ressource(dir_type, dp); } -static struct json_object * -uc_fs_readlink(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_readlink(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); - struct json_object *res; + json_object *path = uc_get_arg(0); + json_object *res; ssize_t buflen = 0, rv; char *buf = NULL, *tmp; @@ -472,11 +449,11 @@ uc_fs_readlink(struct uc_state *s, uint32_t off, struct json_object *args) return res; } -static struct json_object * -uc_fs_stat_common(struct uc_state *s, uint32_t off, struct json_object *args, bool use_lstat) +static json_object * +uc_fs_stat_common(uc_vm *vm, size_t nargs, bool use_lstat) { - struct json_object *path = json_object_array_get_idx(args, 0); - struct json_object *res, *o; + json_object *path = uc_get_arg(0); + json_object *res, *o; struct stat st; int rv; @@ -556,23 +533,23 @@ uc_fs_stat_common(struct uc_state *s, uint32_t off, struct json_object *args, bo return res; } -static struct json_object * -uc_fs_stat(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_stat(uc_vm *vm, size_t nargs) { - return uc_fs_stat_common(s, off, args, false); + return uc_fs_stat_common(vm, nargs, false); } -static struct json_object * -uc_fs_lstat(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_lstat(uc_vm *vm, size_t nargs) { - return uc_fs_stat_common(s, off, args, true); + return uc_fs_stat_common(vm, nargs, true); } -static struct json_object * -uc_fs_mkdir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_mkdir(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); - struct json_object *mode = json_object_array_get_idx(args, 1); + json_object *path = uc_get_arg(0); + json_object *mode = uc_get_arg(1); if (!json_object_is_type(path, json_type_string) || (mode && !json_object_is_type(mode, json_type_int))) @@ -584,10 +561,10 @@ uc_fs_mkdir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_rmdir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_rmdir(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); + json_object *path = uc_get_arg(0); if (!json_object_is_type(path, json_type_string)) err_return(EINVAL); @@ -598,11 +575,11 @@ uc_fs_rmdir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_symlink(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_symlink(uc_vm *vm, size_t nargs) { - struct json_object *dest = json_object_array_get_idx(args, 0); - struct json_object *path = json_object_array_get_idx(args, 1); + json_object *dest = uc_get_arg(0); + json_object *path = uc_get_arg(1); if (!json_object_is_type(dest, json_type_string) || !json_object_is_type(path, json_type_string)) @@ -614,10 +591,10 @@ uc_fs_symlink(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_unlink(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_unlink(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); + json_object *path = uc_get_arg(0); if (!json_object_is_type(path, json_type_string)) err_return(EINVAL); @@ -628,10 +605,10 @@ uc_fs_unlink(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_fs_getcwd(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_getcwd(uc_vm *vm, size_t nargs) { - struct json_object *res; + json_object *res; char *buf = NULL, *tmp; size_t buflen = 0; @@ -663,10 +640,10 @@ uc_fs_getcwd(struct uc_state *s, uint32_t off, struct json_object *args) return res; } -static struct json_object * -uc_fs_chdir(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_fs_chdir(uc_vm *vm, size_t nargs) { - struct json_object *path = json_object_array_get_idx(args, 0); + json_object *path = uc_get_arg(0); if (!json_object_is_type(path, json_type_string)) err_return(EINVAL); @@ -677,13 +654,13 @@ uc_fs_chdir(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static const struct { const char *name; uc_c_fn *func; } proc_fns[] = { +static const uc_cfunction_list proc_fns[] = { { "read", uc_fs_pread }, { "write", uc_fs_pwrite }, { "close", uc_fs_pclose }, }; -static const struct { const char *name; uc_c_fn *func; } file_fns[] = { +static const uc_cfunction_list file_fns[] = { { "read", uc_fs_read }, { "write", uc_fs_write }, { "seek", uc_fs_seek }, @@ -691,14 +668,14 @@ static const struct { const char *name; uc_c_fn *func; } file_fns[] = { { "close", uc_fs_close }, }; -static const struct { const char *name; uc_c_fn *func; } dir_fns[] = { +static const uc_cfunction_list dir_fns[] = { { "read", uc_fs_readdir }, { "seek", uc_fs_seekdir }, { "tell", uc_fs_telldir }, { "close", uc_fs_closedir }, }; -static const struct { const char *name; uc_c_fn *func; } global_fns[] = { +static const uc_cfunction_list global_fns[] = { { "error", uc_fs_error }, { "open", uc_fs_open }, { "opendir", uc_fs_opendir }, @@ -715,40 +692,39 @@ static const struct { const char *name; uc_c_fn *func; } global_fns[] = { }; -static void close_proc(void *ud) { - pclose((FILE *)ud); +static void close_proc(void *ud) +{ + FILE *fp = ud; + + if (fp) + pclose(fp); } -static void close_file(void *ud) { +static void close_file(void *ud) +{ FILE *fp = ud; - if (fp != stdin && fp != stdout && fp != stderr) + if (fp && fp != stdin && fp != stdout && fp != stderr) fclose(fp); } -static void close_dir(void *ud) { - closedir((DIR *)ud); -} - -void uc_module_init(const struct uc_ops *ut, struct uc_state *s, struct json_object *scope) +static void close_dir(void *ud) { - struct json_object *proc_proto, *file_proto, *dir_proto; + DIR *dp = ud; - ops = ut; - proc_proto = ops->new_object(NULL); - file_proto = ops->new_object(NULL); - dir_proto = ops->new_object(NULL); + if (dp) + closedir(dp); +} - register_functions(s, ops, global_fns, scope); - register_functions(s, ops, proc_fns, proc_proto); - register_functions(s, ops, file_fns, file_proto); - register_functions(s, ops, dir_fns, dir_proto); +void uc_module_init(uc_prototype *scope) +{ + uc_add_proto_functions(scope, global_fns); - ops->register_type("fs.proc", proc_proto, close_proc); - ops->register_type("fs.file", file_proto, close_file); - ops->register_type("fs.dir", dir_proto, close_dir); + proc_type = uc_declare_type("fs.proc", proc_fns, close_proc); + file_type = uc_declare_type("fs.file", file_fns, close_file); + dir_type = uc_declare_type("fs.dir", dir_fns, close_dir); - json_object_object_add(scope, "stdin", ops->set_type(xjs_new_object(), "fs.file", stdin)); - json_object_object_add(scope, "stdout", ops->set_type(xjs_new_object(), "fs.file", stdout)); - json_object_object_add(scope, "stderr", ops->set_type(xjs_new_object(), "fs.file", stderr)); + uc_add_proto_val(scope, "stdin", uc_alloc_ressource(file_type, stdin)); + uc_add_proto_val(scope, "stdout", uc_alloc_ressource(file_type, stdout)); + uc_add_proto_val(scope, "stderr", uc_alloc_ressource(file_type, stderr)); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,172 +14,151 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "../module.h" - #include <math.h> #include <sys/time.h> -static const struct uc_ops *ops; - -static double -to_double(struct json_object *v) -{ - int64_t n; - double d; - - return (ops->cast_number(v, &n, &d) == json_type_double) ? d : (double)n; -} - -static int64_t -to_int64(struct json_object *v) -{ - int64_t n; - double d; +#include "../module.h" - return (ops->cast_number(v, &n, &d) == json_type_double) ? (int64_t)d : n; -} +static bool srand_called = false; -static struct json_object * -uc_abs(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_abs(uc_vm *vm, size_t nargs) { - struct json_object *v = json_object_array_get_idx(args, 0); + json_object *v = uc_get_arg(0); enum json_type t; int64_t n; double d; if (json_object_is_type(v, json_type_null)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - t = ops->cast_number(v, &n, &d); + t = uc_to_number(v, &n, &d); if (t == json_type_double) - return (isnan(d) || d < 0) ? ops->new_double(-d) : json_object_get(v); + return (isnan(d) || d < 0) ? uc_alloc_double(-d) : json_object_get(v); return (n < 0) ? json_object_new_int64(-n) : json_object_get(v); } -static struct json_object * -uc_atan2(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_atan2(uc_vm *vm, size_t nargs) { - double d1 = to_double(json_object_array_get_idx(args, 0)); - double d2 = to_double(json_object_array_get_idx(args, 1)); + double d1 = uc_to_double(uc_get_arg(0)); + double d2 = uc_to_double(uc_get_arg(1)); if (isnan(d1) || isnan(d2)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(atan2(d1, d2)); + return uc_alloc_double(atan2(d1, d2)); } -static struct json_object * -uc_cos(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_cos(uc_vm *vm, size_t nargs) { - double d = to_double(json_object_array_get_idx(args, 0)); + double d = uc_to_double(uc_get_arg(0)); if (isnan(d)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(cos(d)); + return uc_alloc_double(cos(d)); } -static struct json_object * -uc_exp(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_exp(uc_vm *vm, size_t nargs) { - double d = to_double(json_object_array_get_idx(args, 0)); + double d = uc_to_double(uc_get_arg(0)); if (isnan(d)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(exp(d)); + return uc_alloc_double(exp(d)); } -static struct json_object * -uc_log(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_log(uc_vm *vm, size_t nargs) { - double d = to_double(json_object_array_get_idx(args, 0)); + double d = uc_to_double(uc_get_arg(0)); if (isnan(d)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(log(d)); + return uc_alloc_double(log(d)); } -static struct json_object * -uc_sin(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_sin(uc_vm *vm, size_t nargs) { - double d = to_double(json_object_array_get_idx(args, 0)); + double d = uc_to_double(uc_get_arg(0)); if (isnan(d)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(sin(d)); + return uc_alloc_double(sin(d)); } -static struct json_object * -uc_sqrt(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_sqrt(uc_vm *vm, size_t nargs) { - double d = to_double(json_object_array_get_idx(args, 0)); + double d = uc_to_double(uc_get_arg(0)); if (isnan(d)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(sqrt(d)); + return uc_alloc_double(sqrt(d)); } -static struct json_object * -uc_pow(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_pow(uc_vm *vm, size_t nargs) { - double x = to_double(json_object_array_get_idx(args, 0)); - double y = to_double(json_object_array_get_idx(args, 1)); + double x = uc_to_double(uc_get_arg(0)); + double y = uc_to_double(uc_get_arg(1)); if (isnan(x) || isnan(y)) - return ops->new_double(NAN); + return uc_alloc_double(NAN); - return ops->new_double(pow(x, y)); + return uc_alloc_double(pow(x, y)); } -static struct json_object * -uc_rand(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_rand(uc_vm *vm, size_t nargs) { struct timeval tv; - if (!s->srand_called) { + if (!srand_called) { gettimeofday(&tv, NULL); srand((tv.tv_sec * 1000) + (tv.tv_usec / 1000)); - s->srand_called = true; + srand_called = true; } return json_object_new_int64(rand()); } -static struct json_object * -uc_srand(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_srand(uc_vm *vm, size_t nargs) { - - int64_t n = to_int64(json_object_array_get_idx(args, 0)); + int64_t n = uc_to_int64(uc_get_arg(0)); srand((unsigned int)n); - s->srand_called = true; + srand_called = true; return NULL; } -static const struct { const char *name; uc_c_fn *func; } global_fns[] = { - { "abs", uc_abs }, - { "atan2", uc_atan2 }, - { "cos", uc_cos }, - { "exp", uc_exp }, - { "log", uc_log }, - { "sin", uc_sin }, - { "sqrt", uc_sqrt }, - { "pow", uc_pow }, - { "rand", uc_rand }, - { "srand", uc_srand }, +static const uc_cfunction_list math_fns[] = { + { "abs", uc_abs }, + { "atan2", uc_atan2 }, + { "cos", uc_cos }, + { "exp", uc_exp }, + { "log", uc_log }, + { "sin", uc_sin }, + { "sqrt", uc_sqrt }, + { "pow", uc_pow }, + { "rand", uc_rand }, + { "srand", uc_srand }, }; -void uc_module_init(const struct uc_ops *ut, struct uc_state *s, struct json_object *scope) +void uc_module_init(uc_prototype *scope) { - ops = ut; - - register_functions(s, ops, global_fns, scope); + uc_add_proto_functions(scope, math_fns); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,29 +14,28 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "../module.h" - #include <unistd.h> #include <libubus.h> #include <libubox/blobmsg.h> #include <libubox/blobmsg_json.h> -#define err_return(err) do { last_error = err; return NULL; } while(0) +#include "../module.h" -static const struct uc_ops *ops; +#define err_return(err) do { last_error = err; return NULL; } while(0) static enum ubus_msg_status last_error = 0; +static uc_ressource_type *conn_type; -struct ubus_connection { +typedef struct { int timeout; struct blob_buf buf; struct ubus_context *ctx; -}; +} ubus_connection; -static struct json_object * -uc_ubus_error(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ubus_error(uc_vm *vm, size_t nargs) { - struct json_object *errmsg; + json_object *errmsg; if (last_error == 0) return NULL; @@ -47,14 +46,14 @@ uc_ubus_error(struct uc_state *s, uint32_t off, struct json_object *args) return errmsg; } -static struct json_object * +static json_object * uc_blob_to_json(struct blob_attr *attr, bool table, const char **name); -static struct json_object * +static json_object * uc_blob_array_to_json(struct blob_attr *attr, size_t len, bool table) { - struct json_object *o = table ? json_object_new_object() : json_object_new_array(); - struct json_object *v; + json_object *o = table ? json_object_new_object() : json_object_new_array(); + json_object *v; struct blob_attr *pos; size_t rem = len; const char *name; @@ -77,7 +76,7 @@ uc_blob_array_to_json(struct blob_attr *attr, size_t len, bool table) return o; } -static struct json_object * +static json_object * uc_blob_to_json(struct blob_attr *attr, bool table, const char **name) { void *data; @@ -131,13 +130,13 @@ uc_blob_to_json(struct blob_attr *attr, bool table, const char **name) } -static struct json_object * -uc_ubus_connect(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ubus_connect(uc_vm *vm, size_t nargs) { - struct json_object *socket = json_object_array_get_idx(args, 0); - struct json_object *timeout = json_object_array_get_idx(args, 1); - struct json_object *co; - struct ubus_connection *c; + json_object *socket = uc_get_arg(0); + json_object *timeout = uc_get_arg(1); + json_object *co; + ubus_connection *c; if ((socket && !json_object_is_type(socket, json_type_string)) || (timeout && !json_object_is_type(timeout, json_type_int))) @@ -169,14 +168,14 @@ uc_ubus_connect(struct uc_state *s, uint32_t off, struct json_object *args) ubus_add_uloop(c->ctx); - return ops->set_type(co, "ubus.connection", c); + return uc_alloc_ressource(conn_type, c); } static void uc_ubus_signatures_cb(struct ubus_context *c, struct ubus_object_data *o, void *p) { - struct json_object *arr = p; - struct json_object *sig; + json_object *arr = p; + json_object *sig; if (!o->signature) return; @@ -190,8 +189,8 @@ uc_ubus_signatures_cb(struct ubus_context *c, struct ubus_object_data *o, void * static void uc_ubus_objects_cb(struct ubus_context *c, struct ubus_object_data *o, void *p) { - struct json_object *arr = p; - struct json_object *obj; + json_object *arr = p; + json_object *obj; obj = json_object_new_string(o->path); @@ -199,12 +198,12 @@ uc_ubus_objects_cb(struct ubus_context *c, struct ubus_object_data *o, void *p) json_object_array_add(arr, obj); } -static struct json_object * -uc_ubus_list(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ubus_list(uc_vm *vm, size_t nargs) { - struct ubus_connection **c = (struct ubus_connection **)ops->get_type(s->ctx, "ubus.connection"); - struct json_object *objname = json_object_array_get_idx(args, 0); - struct json_object *res = NULL; + ubus_connection **c = uc_get_self("ubus.connection"); + json_object *objname = uc_get_arg(0); + json_object *res = NULL; enum ubus_msg_status rv; if (!c || !*c || !(*c)->ctx) @@ -232,19 +231,19 @@ uc_ubus_list(struct uc_state *s, uint32_t off, struct json_object *args) static void uc_ubus_call_cb(struct ubus_request *req, int type, struct blob_attr *msg) { - struct json_object **res = (struct json_object **)req->priv; + json_object **res = (json_object **)req->priv; *res = msg ? uc_blob_array_to_json(blob_data(msg), blob_len(msg), true) : NULL; } -static struct json_object * -uc_ubus_call(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ubus_call(uc_vm *vm, size_t nargs) { - struct ubus_connection **c = (struct ubus_connection **)ops->get_type(s->ctx, "ubus.connection"); - struct json_object *objname = json_object_array_get_idx(args, 0); - struct json_object *funname = json_object_array_get_idx(args, 1); - struct json_object *funargs = json_object_array_get_idx(args, 2); - struct json_object *res = NULL; + ubus_connection **c = uc_get_self("ubus.connection"); + json_object *objname = uc_get_arg(0); + json_object *funname = uc_get_arg(1); + json_object *funargs = uc_get_arg(2); + json_object *res = NULL; enum ubus_msg_status rv; uint32_t id; @@ -275,10 +274,10 @@ uc_ubus_call(struct uc_state *s, uint32_t off, struct json_object *args) return res; } -static struct json_object * -uc_ubus_disconnect(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_ubus_disconnect(uc_vm *vm, size_t nargs) { - struct ubus_connection **c = (struct ubus_connection **)ops->get_type(s->ctx, "ubus.connection"); + ubus_connection **c = uc_get_self("ubus.connection"); if (!c || !*c || !(*c)->ctx) err_return(UBUS_STATUS_CONNECTION_FAILED); @@ -290,12 +289,12 @@ uc_ubus_disconnect(struct uc_state *s, uint32_t off, struct json_object *args) } -static const struct { const char *name; uc_c_fn *func; } global_fns[] = { +static const uc_cfunction_list global_fns[] = { { "error", uc_ubus_error }, { "connect", uc_ubus_connect }, }; -static const struct { const char *name; uc_c_fn *func; } conn_fns[] = { +static const uc_cfunction_list conn_fns[] = { { "list", uc_ubus_list }, { "call", uc_ubus_call }, { "disconnect", uc_ubus_disconnect }, @@ -303,7 +302,7 @@ static const struct { const char *name; uc_c_fn *func; } conn_fns[] = { static void close_connection(void *ud) { - struct ubus_connection *conn = ud; + ubus_connection *conn = ud; blob_buf_free(&conn->buf); @@ -313,15 +312,9 @@ static void close_connection(void *ud) { free(conn); } -void uc_module_init(const struct uc_ops *ut, struct uc_state *s, struct json_object *scope) +void uc_module_init(uc_prototype *scope) { - struct json_object *conn_proto; - - ops = ut; - conn_proto = ops->new_object(NULL); - - register_functions(s, ops, global_fns, scope); - register_functions(s, ops, conn_fns, conn_proto); + uc_add_proto_functions(scope, global_fns); - ops->register_type("ubus.connection", conn_proto, close_connection); + conn_type = uc_declare_type("ubus.connection", conn_fns, close_connection); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,16 +14,15 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include "../module.h" - #include <string.h> #include <uci.h> -#define err_return(err) do { last_error = err; return NULL; } while(0) +#include "../module.h" -static const struct uc_ops *ops; +#define err_return(err) do { last_error = err; return NULL; } while(0) static int last_error = 0; +static uc_ressource_type *cursor_type; enum pkg_cmd { CMD_SAVE, @@ -31,11 +30,11 @@ enum pkg_cmd { CMD_REVERT }; -static struct json_object * -uc_uci_error(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_error(uc_vm *vm, size_t nargs) { char buf[sizeof("Unknown error: -9223372036854775808")]; - struct json_object *errmsg; + json_object *errmsg; const char *errstr[] = { [UCI_ERR_MEM] = "Out of memory", @@ -64,12 +63,11 @@ uc_uci_error(struct uc_state *s, uint32_t off, struct json_object *args) } -static struct json_object * -uc_uci_cursor(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_cursor(uc_vm *vm, size_t nargs) { - struct json_object *cdir = json_object_array_get_idx(args, 0); - struct json_object *sdir = json_object_array_get_idx(args, 1); - struct json_object *co; + json_object *cdir = uc_get_arg(0); + json_object *sdir = uc_get_arg(1); struct uci_context *c; int rv; @@ -96,22 +94,15 @@ uc_uci_cursor(struct uc_state *s, uint32_t off, struct json_object *args) err_return(rv); } - co = json_object_new_object(); - - if (!co) { - uci_free_context(c); - err_return(UCI_ERR_MEM); - } - - return ops->set_type(co, "uci.cursor", c); + return uc_alloc_ressource(cursor_type, c); } -static struct json_object * -uc_uci_load(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_load(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); struct uci_element *e; if (!c || !*c) @@ -133,11 +124,11 @@ uc_uci_load(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_uci_unload(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_unload(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); struct uci_element *e; if (!c || !*c) @@ -186,10 +177,10 @@ lookup_ptr(struct uci_context *ctx, struct uci_ptr *ptr, bool extended) return uci_lookup_ptr(ctx, ptr, NULL, extended); } -static struct json_object * +static json_object * option_to_json(struct uci_option *o) { - struct json_object *arr; + json_object *arr; struct uci_element *e; switch (o->type) { @@ -210,10 +201,10 @@ option_to_json(struct uci_option *o) } } -static struct json_object * +static json_object * section_to_json(struct uci_section *s, int index) { - struct json_object *so = json_object_new_object(); + json_object *so = json_object_new_object(); struct uci_element *e; struct uci_option *o; @@ -235,11 +226,11 @@ section_to_json(struct uci_section *s, int index) return so; } -static struct json_object * +static json_object * package_to_json(struct uci_package *p) { - struct json_object *po = json_object_new_object(); - struct json_object *so; + json_object *po = json_object_new_object(); + json_object *so; struct uci_element *e; int i = 0; @@ -254,13 +245,13 @@ package_to_json(struct uci_package *p) return po; } -static struct json_object * -uc_uci_get_any(struct uc_state *s, uint32_t off, struct json_object *args, bool all) +static json_object * +uc_uci_get_any(uc_vm *vm, size_t nargs, bool all) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *sect = json_object_array_get_idx(args, 1); - struct json_object *opt = json_object_array_get_idx(args, 2); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *sect = uc_get_arg(1); + json_object *opt = uc_get_arg(2); struct uci_ptr ptr = {}; int rv; @@ -314,25 +305,25 @@ uc_uci_get_any(struct uc_state *s, uint32_t off, struct json_object *args, bool return json_object_new_string(ptr.s->type); } -static struct json_object * -uc_uci_get(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_get(uc_vm *vm, size_t nargs) { - return uc_uci_get_any(s, off, args, false); + return uc_uci_get_any(vm, nargs, false); } -static struct json_object * -uc_uci_get_all(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_get_all(uc_vm *vm, size_t nargs) { - return uc_uci_get_any(s, off, args, true); + return uc_uci_get_any(vm, nargs, true); } -static struct json_object * -uc_uci_get_first(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_get_first(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *type = json_object_array_get_idx(args, 1); - struct json_object *opt = json_object_array_get_idx(args, 2); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *type = uc_get_arg(1); + json_object *opt = uc_get_arg(2); struct uci_package *p = NULL; struct uci_section *sc; struct uci_element *e; @@ -384,12 +375,12 @@ uc_uci_get_first(struct uc_state *s, uint32_t off, struct json_object *args) err_return(UCI_ERR_NOTFOUND); } -static struct json_object * -uc_uci_add(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_add(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *type = json_object_array_get_idx(args, 1); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *type = uc_get_arg(1); struct uci_element *e = NULL; struct uci_package *p = NULL; struct uci_section *sc = NULL; @@ -420,9 +411,9 @@ uc_uci_add(struct uc_state *s, uint32_t off, struct json_object *args) } static bool -json_to_value(struct json_object *val, const char **p, bool *is_list) +json_to_value(json_object *val, const char **p, bool *is_list) { - struct json_object *item; + json_object *item; *p = NULL; @@ -463,13 +454,13 @@ json_to_value(struct json_object *val, const char **p, bool *is_list) } } -static struct json_object * -uc_uci_set(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_set(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *sect = json_object_array_get_idx(args, 1); - struct json_object *opt = NULL, *val = NULL; + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *sect = uc_get_arg(1); + json_object *opt = NULL, *val = NULL; struct uci_ptr ptr = {}; bool is_list = false; int rv, i; @@ -478,11 +469,11 @@ uc_uci_set(struct uc_state *s, uint32_t off, struct json_object *args) !json_object_is_type(sect, json_type_string)) err_return(UCI_ERR_INVAL); - switch (json_object_array_length(args)) { + switch (nargs) { /* conf, sect, opt, val */ case 4: - opt = json_object_array_get_idx(args, 2); - val = json_object_array_get_idx(args, 3); + opt = uc_get_arg(2); + val = uc_get_arg(3); if (!json_object_is_type(opt, json_type_string)) err_return(UCI_ERR_INVAL); @@ -491,7 +482,7 @@ uc_uci_set(struct uc_state *s, uint32_t off, struct json_object *args) /* conf, sect, type */ case 3: - val = json_object_array_get_idx(args, 2); + val = uc_get_arg(2); if (!json_object_is_type(val, json_type_string)) err_return(UCI_ERR_INVAL); @@ -563,13 +554,13 @@ uc_uci_set(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_uci_delete(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_delete(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *sect = json_object_array_get_idx(args, 1); - struct json_object *opt = json_object_array_get_idx(args, 2); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *sect = uc_get_arg(1); + json_object *opt = uc_get_arg(2); struct uci_ptr ptr = {}; int rv; @@ -598,13 +589,13 @@ uc_uci_delete(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_uci_rename(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_rename(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *sect = json_object_array_get_idx(args, 1); - struct json_object *opt = NULL, *val = NULL; + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *sect = uc_get_arg(1); + json_object *opt = NULL, *val = NULL; struct uci_ptr ptr = {}; int rv; @@ -612,11 +603,11 @@ uc_uci_rename(struct uc_state *s, uint32_t off, struct json_object *args) !json_object_is_type(sect, json_type_string)) err_return(UCI_ERR_INVAL); - switch (json_object_array_length(args)) { + switch (nargs) { /* conf, sect, opt, val */ case 4: - opt = json_object_array_get_idx(args, 2); - val = json_object_array_get_idx(args, 3); + opt = uc_get_arg(2); + val = uc_get_arg(3); if (!json_object_is_type(opt, json_type_string) || !json_object_is_type(val, json_type_string)) @@ -626,7 +617,7 @@ uc_uci_rename(struct uc_state *s, uint32_t off, struct json_object *args) /* conf, sect, type */ case 3: - val = json_object_array_get_idx(args, 2); + val = uc_get_arg(2); if (!json_object_is_type(val, json_type_string)) err_return(UCI_ERR_INVAL); @@ -658,13 +649,13 @@ uc_uci_rename(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_uci_reorder(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_reorder(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *sect = json_object_array_get_idx(args, 1); - struct json_object *val = json_object_array_get_idx(args, 2); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *sect = uc_get_arg(1); + json_object *val = uc_get_arg(2); struct uci_ptr ptr = {}; int64_t n; int rv; @@ -698,11 +689,11 @@ uc_uci_reorder(struct uc_state *s, uint32_t off, struct json_object *args) return json_object_new_boolean(true); } -static struct json_object * -uc_uci_pkg_command(struct uc_state *s, uint32_t off, struct json_object *args, enum pkg_cmd cmd) +static json_object * +uc_uci_pkg_command(uc_vm *vm, size_t nargs, enum pkg_cmd cmd) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); struct uci_element *e, *tmp; struct uci_package *p; struct uci_ptr ptr = {}; @@ -748,25 +739,25 @@ uc_uci_pkg_command(struct uc_state *s, uint32_t off, struct json_object *args, e return json_object_new_boolean(true); } -static struct json_object * -uc_uci_save(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_save(uc_vm *vm, size_t nargs) { - return uc_uci_pkg_command(s, off, args, CMD_SAVE); + return uc_uci_pkg_command(vm, nargs, CMD_SAVE); } -static struct json_object * -uc_uci_commit(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_commit(uc_vm *vm, size_t nargs) { - return uc_uci_pkg_command(s, off, args, CMD_COMMIT); + return uc_uci_pkg_command(vm, nargs, CMD_COMMIT); } -static struct json_object * -uc_uci_revert(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_revert(uc_vm *vm, size_t nargs) { - return uc_uci_pkg_command(s, off, args, CMD_REVERT); + return uc_uci_pkg_command(vm, nargs, CMD_REVERT); } -static struct json_object * +static json_object * change_to_json(struct uci_delta *d) { const char *types[] = { @@ -779,7 +770,7 @@ change_to_json(struct uci_delta *d) [UCI_CMD_CHANGE] = "set", }; - struct json_object *a; + json_object *a; if (!d->section) return NULL; @@ -805,10 +796,10 @@ change_to_json(struct uci_delta *d) return a; } -static struct json_object * +static json_object * changes_to_json(struct uci_context *ctx, const char *package) { - struct json_object *a = NULL, *c; + json_object *a = NULL, *c; struct uci_package *p = NULL; struct uci_element *e; bool unload = false; @@ -855,12 +846,12 @@ changes_to_json(struct uci_context *ctx, const char *package) return a; } -static struct json_object * -uc_uci_changes(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_changes(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *res, *chg; + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *res, *chg; char **configs; int rv, i; @@ -894,17 +885,18 @@ uc_uci_changes(struct uc_state *s, uint32_t off, struct json_object *args) return res; } -static struct json_object * -uc_uci_foreach(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_foreach(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *conf = json_object_array_get_idx(args, 0); - struct json_object *type = json_object_array_get_idx(args, 1); - struct json_object *func = json_object_array_get_idx(args, 2); - struct json_object *fnargs, *rv = NULL; + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *conf = uc_get_arg(0); + json_object *type = uc_get_arg(1); + json_object *func = uc_get_arg(2); + json_object *rv = NULL; struct uci_package *p = NULL; struct uci_element *e, *tmp; struct uci_section *sc; + uc_exception_type_t ex; bool stop = false; bool ret = false; int i = 0; @@ -924,11 +916,6 @@ uc_uci_foreach(struct uc_state *s, uint32_t off, struct json_object *args) if (!p) err_return(UCI_ERR_NOTFOUND); - fnargs = json_object_new_array(); - - if (!fnargs) - err_return(UCI_ERR_MEM); - uci_foreach_element_safe(&p->sections, tmp, e) { sc = uci_to_section(e); i++; @@ -936,18 +923,17 @@ uc_uci_foreach(struct uc_state *s, uint32_t off, struct json_object *args) if (type && strcmp(sc->type, json_object_get_string(type))) continue; - json_object_array_put_idx(fnargs, 0, section_to_json(sc, i - 1)); - - rv = ops->invoke(s, off, NULL, func, fnargs); + uc_push_val(uc_value_get(func)); + uc_push_val(section_to_json(sc, i - 1)); - /* forward exceptions from callback function */ - if (uc_is_type(rv, T_EXCEPTION)) { - json_object_put(fnargs); + ex = uc_call(1); - return rv; - } + /* stop on exception in callback */ + if (ex) + break; ret = true; + rv = uc_pop_val(); stop = (json_object_is_type(rv, json_type_boolean) && !json_object_get_boolean(rv)); json_object_put(rv); @@ -956,16 +942,16 @@ uc_uci_foreach(struct uc_state *s, uint32_t off, struct json_object *args) break; } - json_object_put(fnargs); + /* XXX: rethrow */ return json_object_new_boolean(ret); } -static struct json_object * -uc_uci_configs(struct uc_state *s, uint32_t off, struct json_object *args) +static json_object * +uc_uci_configs(uc_vm *vm, size_t nargs) { - struct uci_context **c = (struct uci_context **)ops->get_type(s->ctx, "uci.cursor"); - struct json_object *a; + struct uci_context **c = uc_get_self("uci.cursor"); + json_object *a; char **configs; int i, rv; @@ -990,7 +976,7 @@ uc_uci_configs(struct uc_state *s, uint32_t off, struct json_object *args) } -static const struct { const char *name; uc_c_fn *func; } cursor_fns[] = { +static const uc_cfunction_list cursor_fns[] = { { "load", uc_uci_load }, { "unload", uc_uci_unload }, { "get", uc_uci_get }, @@ -1009,7 +995,7 @@ static const struct { const char *name; uc_c_fn *func; } cursor_fns[] = { { "configs", uc_uci_configs }, }; -static const struct { const char *name; uc_c_fn *func; } global_fns[] = { +static const uc_cfunction_list global_fns[] = { { "error", uc_uci_error }, { "cursor", uc_uci_cursor }, }; @@ -1019,15 +1005,9 @@ static void close_uci(void *ud) { uci_free_context((struct uci_context *)ud); } -void uc_module_init(const struct uc_ops *ut, struct uc_state *s, struct json_object *scope) +void uc_module_init(uc_prototype *scope) { - struct json_object *uci_proto; - - ops = ut; - uci_proto = ops->new_object(NULL); - - register_functions(s, ops, global_fns, scope); - register_functions(s, ops, cursor_fns, uci_proto); + uc_add_proto_functions(scope, global_fns); - ops->register_type("uci.cursor", uci_proto, close_uci); + cursor_type = uc_declare_type("uci.cursor", cursor_fns, close_uci); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,6 +19,7 @@ #include <stdint.h> #include <unistd.h> #include <errno.h> +#include <ctype.h> #include <sys/stat.h> #ifdef JSONC @@ -27,10 +28,11 @@ #include <json-c/json.h> #endif +#include "compiler.h" #include "lexer.h" -#include "parser.h" -#include "eval.h" #include "lib.h" +#include "vm.h" +#include "source.h" static void @@ -52,103 +54,56 @@ print_usage(char *app) app); } -#ifndef NDEBUG -static void dump(struct uc_state *state, uint32_t off, int level); - -static void dump_node(struct uc_state *state, uint32_t off) { - const char *p; - - switch (OP_TYPE(off)) { - case T_NUMBER: - printf("n%u [label=\"%"PRId64"\"];\n", off, json_object_get_int64(OP_VAL(off))); - break; - - case T_DOUBLE: - printf("n%u [label=\"%f\"];\n", off, json_object_get_double(OP_VAL(off))); - break; - - case T_BOOL: - printf("n%u [label=\"%s\"];\n", off, json_object_get_boolean(OP_VAL(off)) ? "true" : "false"); - break; - - case T_STRING: - case T_LABEL: - case T_TEXT: - printf("n%u [label=\"%s<", off, uc_get_tokenname(OP_TYPE(off))); - - for (p = json_object_get_string(OP_VAL(off)); *p; p++) - switch (*p) { - case '\n': - printf("\\\n"); - break; +static void +globals_init(uc_prototype *scope) +{ + json_object *arr = xjs_new_array(); + const char *p, *last; - case '\t': - printf("\\\t"); - break; + for (p = last = LIB_SEARCH_PATH;; p++) { + if (*p == ':' || *p == '\0') { + json_object_array_add(arr, xjs_new_string_len(last, p - last)); - case '"': - printf("\\\""); + if (!*p) break; - default: - printf("%c", *p); - } - - printf(">\"];\n"); - break; - - default: - printf("n%u [label=\"%s", off, uc_get_tokenname(OP_TYPE(off))); - - if (OP_IS_POSTFIX(off)) - printf(", postfix"); - - printf("\"];\n"); - } -} - -static void dump(struct uc_state *state, uint32_t off, int level) { - uint32_t prev_off, cur_off, child_off; - int i; - - if (level == 0) { - printf("digraph G {\nmain [shape=box];\n"); + last = p + 1; + } } - for (prev_off = 0, cur_off = off; cur_off != 0; prev_off = cur_off, cur_off = OP_NEXT(cur_off)) { - dump_node(state, cur_off); - - if (OP_TYPE(cur_off) < __T_MAX) { - for (i = 0; i < OPn_NUM; i++) { - child_off = OPn(cur_off, i); + json_object_object_add(scope->header.jso, "REQUIRE_SEARCH_PATH", arr); +} - if (child_off) { - dump(state, child_off, level + 1); - printf("n%u -> n%u [label=\"op%d\"];\n", cur_off, child_off, i + 1); - } - } - } +static void +register_variable(uc_prototype *scope, const char *key, json_object *val) +{ + char *name = strdup(key); + char *p; - if (prev_off) - printf("n%u -> n%u [style=dotted];\n", prev_off, cur_off); - } + if (!name) + return; - if (level == 0) { - printf("main -> n%u [style=dotted];\n", off); + for (p = name; *p; p++) + if (!isalnum(*p) && *p != '_') + *p = '_'; - printf("}\n"); - } + json_object_object_add(scope->header.jso, name, val); + free(name); } -#endif /* NDEBUG */ + static int -parse(struct uc_state *state, struct uc_source *src, bool dumponly, - bool skip_shebang, struct json_object *env, struct json_object *modules) +parse(uc_parse_config *config, uc_source *src, + bool skip_shebang, json_object *env, json_object *modules) { - struct json_object *rv; - char c, c2, *msg; + uc_prototype *globals = uc_prototype_new(NULL); + uc_function *entry; + uc_vm vm = {}; + char c, c2, *err; int rc = 0; + uc_vm_init(&vm, config); + if (skip_shebang) { c = fgetc(src->fp); c2 = fgetc(src->fp); @@ -167,33 +122,42 @@ parse(struct uc_state *state, struct uc_source *src, bool dumponly, } } - if (dumponly) { -#ifdef NDEBUG - rv = uc_new_exception(state, 0, "Debug support not compiled in"); -#else /* NDEBUG */ - rv = uc_parse(state, src->fp); + entry = uc_compile(config, src, &err); - if (!uc_is_type(rv, T_EXCEPTION)) - dump(state, state->main, 0); -#endif /* NDEBUG */ + if (!entry) { + fprintf(stderr, "%s", err); + free(err); + rc = 2; + goto out; } - else { - rv = uc_run(state, env, modules); + + /* load global variables */ + globals_init(globals); + + /* load env variables */ + if (env) { + json_object_object_foreach(env, key, val) + register_variable(globals, key, uc_value_get(val)); } - if (uc_is_type(rv, T_EXCEPTION)) { - msg = uc_format_error(state, src->fp); - fprintf(stderr, "%s\n\n", msg); - free(msg); + /* load std functions into global scope */ + uc_lib_init(globals); + + rc = uc_vm_execute(&vm, entry, globals, modules); + + if (rc) { rc = 1; + goto out; } - json_object_put(rv); +out: + uc_vm_free(&vm); + uc_value_put(globals->header.jso); return rc; } -static FILE * +static uc_source * read_stdin(char **ptr) { size_t rlen = 0, tlen = 0; @@ -217,13 +181,13 @@ read_stdin(char **ptr) tlen += rlen; } - return fmemopen(*ptr, tlen, "rb"); + return uc_source_new_buffer("[stdin]", *ptr, tlen); } -static struct json_object * +static json_object * parse_envfile(FILE *fp) { - struct json_object *rv = NULL; + json_object *rv = NULL; enum json_tokener_error err; struct json_tokener *tok; char buf[128]; @@ -257,29 +221,25 @@ parse_envfile(FILE *fp) int main(int argc, char **argv) { - struct json_object *env = NULL, *modules = NULL, *o, *p; - struct uc_state *state = NULL; - struct uc_source source = {}; + json_object *env = NULL, *modules = NULL, *o, *p; + uc_source *source = NULL, *envfile = NULL; char *stdin = NULL, *c; - bool dumponly = false; bool shebang = false; - FILE *envfile = NULL; int opt, rv = 0; + uc_parse_config config = { + .strict_declarations = false, + .lstrip_blocks = true, + .trim_blocks = true + }; + if (argc == 1) { print_usage(argv[0]); goto out; } - state = xalloc(sizeof(*state)); - state->lstrip_blocks = 1; - state->trim_blocks = 1; - - /* reserve opcode slot 0 */ - uc_new_op(state, 0, NULL, UINT32_MAX); - - while ((opt = getopt(argc, argv, "dhlrSe:E:i:s:m:")) != -1) + while ((opt = getopt(argc, argv, "hlrSe:E:i:s:m:")) != -1) { switch (opt) { case 'h': @@ -287,19 +247,15 @@ main(int argc, char **argv) goto out; case 'i': - if (source.fp) + if (source) fprintf(stderr, "Options -i and -s are exclusive\n"); - if (!strcmp(optarg, "-")) { - source.fp = read_stdin(&stdin); - source.filename = xstrdup("[stdin]"); - } - else { - source.fp = fopen(optarg, "rb"); - source.filename = xstrdup(optarg); - } + if (!strcmp(optarg, "-")) + source = read_stdin(&stdin); + else + source = uc_source_new_file(optarg); - if (!source.fp) { + if (!source) { fprintf(stderr, "Failed to open %s: %s\n", optarg, strerror(errno)); rv = 1; goto out; @@ -307,28 +263,23 @@ main(int argc, char **argv) break; - case 'd': - dumponly = true; - break; - case 'l': - state->lstrip_blocks = 0; + config.lstrip_blocks = false; break; case 'r': - state->trim_blocks = 0; + config.trim_blocks = false; break; case 's': - if (source.fp) + if (source) fprintf(stderr, "Options -i and -s are exclusive\n"); - source.fp = fmemopen(optarg, strlen(optarg), "rb"); - source.filename = xstrdup("[-s argument]"); + source = uc_source_new_buffer("[-s argument]", xstrdup(optarg), strlen(optarg)); break; case 'S': - state->strict_declarations = 1; + config.strict_declarations = true; break; case 'e': @@ -339,7 +290,7 @@ main(int argc, char **argv) else c = optarg; - envfile = fmemopen(c, strlen(c), "rb"); + envfile = uc_source_new_buffer("[-e argument]", xstrdup(c), strlen(c)); /* fallthrough */ case 'E': @@ -354,7 +305,7 @@ main(int argc, char **argv) if (!strcmp(c, "-")) envfile = read_stdin(&stdin); else - envfile = fopen(c, "rb"); + envfile = uc_source_new_file(c); if (!envfile) { fprintf(stderr, "Failed to open %s: %s\n", c, strerror(errno)); @@ -363,9 +314,9 @@ main(int argc, char **argv) } } - o = parse_envfile(envfile); + o = parse_envfile(envfile->fp); - fclose(envfile); + uc_source_put(envfile); envfile = NULL; @@ -401,11 +352,10 @@ main(int argc, char **argv) } } - if (!source.fp && argv[optind] != NULL) { - source.fp = fopen(argv[optind], "rb"); - source.filename = xstrdup(argv[optind]); + if (!source && argv[optind] != NULL) { + source = uc_source_new_file(argv[optind]); - if (!source.fp) { + if (!source) { fprintf(stderr, "Failed to open %s: %s\n", argv[optind], strerror(errno)); rv = 1; goto out; @@ -414,24 +364,19 @@ main(int argc, char **argv) shebang = true; } - if (!source.fp) { + if (!source) { fprintf(stderr, "One of -i or -s is required\n"); rv = 1; goto out; } - state->source = xalloc(sizeof(source)); - state->sources = state->source; - *state->source = source; - - rv = parse(state, state->source, dumponly, shebang, env, modules); + rv = parse(&config, source, shebang, env, modules); out: json_object_put(modules); json_object_put(env); - uc_free(state); - free(stdin); + uc_source_put(source); return rv; } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,27 +17,42 @@ #ifndef __MODULE_H_ #define __MODULE_H_ -#include "ast.h" #include "lib.h" +#include "object.h" +#include "vm.h" -struct uc_ops { - bool (*register_function)(struct uc_state *, struct json_object *, const char *, uc_c_fn *); - bool (*register_type)(const char *, struct json_object *, void (*)(void *)); - struct json_object *(*set_type)(struct json_object *, const char *, void *); - void **(*get_type)(struct json_object *, const char *); - struct json_object *(*new_object)(struct json_object *); - struct json_object *(*new_double)(double); - struct json_object *(*invoke)(struct uc_state *, uint32_t, struct json_object *, struct json_object *, struct json_object *); - enum json_type (*cast_number)(struct json_object *, int64_t *, double *); -}; - -extern const struct uc_ops ut; - -#define register_functions(state, ops, functions, scope) \ +#define register_functions(scope, functions) \ if (scope) \ for (int i = 0; i < ARRAY_SIZE(functions); i++) \ - ops->register_function(state, scope, functions[i].name, functions[i].func) + json_object_object_add(scope->header.jso, functions[i].name, \ + ops->value.cfunc(functions[i].name, functions[i].func)) + +#define alloc_prototype(functions) ({ \ + uc_prototype *__proto = uc_object_as_prototype(ops->value.proto(NULL)); \ + register_functions(__proto, functions); \ + __proto; \ +}) + +#define declare_type(name, proto, freefn) \ + ops->ressource.define(name, proto, freefn) + +#define alloc_ressource(data, type) \ + ops->ressource.create(xjs_new_object(), type, data) + +#define register_ressource(scope, key, res) \ + json_object_object_add((scope)->header.jso, key, (res)->header.jso) + +static const uc_ops *ops; + +void uc_module_init(uc_prototype *scope) __attribute__((weak)); + +void uc_module_entry(const uc_ops *_ops, uc_prototype *scope); +void uc_module_entry(const uc_ops *_ops, uc_prototype *scope) +{ + ops = _ops; -void uc_module_init(const struct uc_ops *, struct uc_state *, struct json_object *); + if (uc_module_init) + uc_module_init(scope); +} #endif /* __MODULE_H_ */ diff --git a/object.c b/object.c new file mode 100644 index 0000000..5841f76 --- /dev/null +++ b/object.c @@ -0,0 +1,432 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> +#include <assert.h> + +#include "object.h" + +static void * +uc_object_new(uc_objtype_t type, size_t size, json_object_to_json_string_fn *tostring, json_object_delete_fn *gc) +{ + uc_objhdr *hdr = xalloc(size); + + hdr->type = type; + hdr->jso = xjs_new_object(); + + json_object_set_serializer(hdr->jso, tostring, hdr, gc); + + return hdr; +} + +static int +uc_upvalref_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + return sprintbuf(pb, "<upvalref %p>", jso); +} + +static void +uc_upvalref_gc(json_object *jso, void *userdata) +{ + uc_upvalref *up = userdata; + + uc_value_put(up->value); + free(up); +} + +uc_upvalref * +uc_upvalref_new(size_t slot) +{ + uc_upvalref *up; + + up = uc_object_new(UC_OBJ_UPVAL, sizeof(*up), uc_upvalref_tostring, uc_upvalref_gc); + up->slot = slot; + + return up; +} + +static int +uc_function_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + return sprintbuf(pb, "<function %p>", jso); +} + +static void +uc_function_gc(json_object *jso, void *userdata) +{ + uc_function *fn = userdata; + + uc_chunk_free(&fn->chunk); + uc_source_put(fn->source); + + free(fn); +} + +uc_function * +uc_function_new(const char *name, size_t srcpos, uc_source *source) +{ + size_t namelen = 0; + uc_function *fn; + + if (name) + namelen = strlen(name) + 1; + + fn = uc_object_new(UC_OBJ_FUNCTION, ALIGN(sizeof(*fn)) + namelen, uc_function_tostring, uc_function_gc); + fn->name = name ? strcpy((char *)fn + ALIGN(sizeof(*fn)), name) : NULL; + fn->nargs = 0; + fn->nupvals = 0; + fn->srcpos = srcpos; + fn->source = uc_source_get(source); + fn->vararg = false; + + uc_chunk_init(&fn->chunk); + + return fn; +} + +size_t +uc_function_get_srcpos(uc_function *fn, size_t off) +{ + size_t pos = uc_chunk_debug_get_srcpos(&fn->chunk, off); + + return pos ? fn->srcpos + pos : 0; +} + +static int +uc_closure_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); + uc_closure *closure = json_object_get_userdata(jso); + uc_function *function = closure->function; + json_object *argname; + size_t i; + + sprintbuf(pb, "%s%s", + strict ? "\"" : "", + closure->is_arrow ? "" : "function"); + + if (function->name) + sprintbuf(pb, " %s", function->name); + + sprintbuf(pb, "("); + + for (i = 1; i <= function->nargs; i++) { + argname = uc_chunk_debug_get_variable(&function->chunk, i - 1, i, false); + + if (i > 1) + sprintbuf(pb, ", "); + + if (i == function->nargs && function->vararg) + sprintbuf(pb, "..."); + + if (argname) + sprintbuf(pb, "%s", json_object_get_string(argname)); + else + sprintbuf(pb, "[arg%zu]", i); + + uc_value_put(argname); + } + + return sprintbuf(pb, ")%s { ... }%s", + closure->is_arrow ? " =>" : "", + strict ? "\"" : ""); +} + +static void +uc_closure_gc(json_object *jso, void *userdata) +{ + uc_closure *closure = userdata; + uc_function *function = closure->function; + size_t i; + + for (i = 0; i < function->nupvals; i++) + uc_value_put(closure->upvals[i]->header.jso); + + uc_value_put(function->header.jso); + + free(closure); +} + +uc_closure * +uc_closure_new(uc_function *function, bool arrow_fn) +{ + uc_closure *closure; + + closure = uc_object_new(UC_OBJ_CLOSURE, + ALIGN(sizeof(*closure)) + (sizeof(uc_upvalref *) * function->nupvals), + uc_closure_tostring, uc_closure_gc); + + closure->function = function; + closure->is_arrow = arrow_fn; + closure->upvals = function->nupvals ? ((void *)closure + ALIGN(sizeof(*closure))) : NULL; + + return closure; +} + +static int +uc_cfunction_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); + uc_cfunction *cfn = json_object_get_userdata(jso); + + return sprintbuf(pb, "%sfunction%s%s(...) { [native code] }%s", + strict ? "\"" : "", + cfn->name ? " " : "", + cfn->name ? cfn->name : "", + strict ? "\"" : ""); +} + +static void +uc_cfunction_gc(json_object *jso, void *userdata) +{ + free(userdata); +} + +uc_cfunction * +uc_cfunction_new(const char *name, uc_cfn_ptr fptr) +{ + size_t namelen = 0; + uc_cfunction *cfn; + + if (name) + namelen = strlen(name) + 1; + + cfn = uc_object_new(UC_OBJ_CFUNCTION, ALIGN(sizeof(*cfn)) + namelen, uc_cfunction_tostring, uc_cfunction_gc); + cfn->name = name ? strcpy((char *)cfn + ALIGN(sizeof(*cfn)), name) : NULL; + cfn->cfn = fptr; + + return cfn; +} + +static int +uc_regexp_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); + uc_regexp *re = json_object_get_userdata(jso); + json_object *s; + const char *p; + size_t len; + + sprintbuf(pb, "%s/", strict ? "\"" : ""); + + s = xjs_new_string(re->pattern); + + if (strict) + for (p = json_object_to_json_string(s) + 1, len = strlen(p) - 1; len > 0; len--, p++) + sprintbuf(pb, "%c", *p); + else + sprintbuf(pb, "%s", json_object_get_string(s)); + + uc_value_put(s); + + return sprintbuf(pb, "/%s%s%s%s", + re->global ? "g" : "", + re->icase ? "i" : "", + re->newline ? "s" : "", + strict ? "\"" : ""); +} + +static void +uc_regexp_gc(json_object *jso, void *userdata) +{ + uc_regexp *re = userdata; + + regfree(&re->re); + free(re); +} + +uc_regexp * +uc_regexp_new(const char *pattern, bool icase, bool newline, bool global, char **err) +{ + int cflags = REG_EXTENDED, res; + uc_regexp *re; + size_t len; + + re = uc_object_new(UC_OBJ_REGEXP, ALIGN(sizeof(*re)) + strlen(pattern) + 1, uc_regexp_tostring, uc_regexp_gc); + re->icase = icase; + re->global = global; + re->newline = newline; + re->pattern = strcpy((char *)re + ALIGN(sizeof(*re)), pattern); + + if (icase) + cflags |= REG_ICASE; + + if (newline) + cflags |= REG_NEWLINE; + + res = regcomp(&re->re, pattern, cflags); + + if (res != 0) { + if (err) { + len = regerror(res, &re->re, NULL, 0); + *err = xalloc(len); + + regerror(res, &re->re, *err, len); + } + + uc_value_put(re->header.jso); + + return NULL; + } + + json_object_object_add(re->header.jso, "source", xjs_new_string(pattern)); + json_object_object_add(re->header.jso, "i", xjs_new_boolean(icase)); + json_object_object_add(re->header.jso, "g", xjs_new_boolean(global)); + json_object_object_add(re->header.jso, "s", xjs_new_boolean(newline)); + + return re; +} + +static void +uc_prototype_gc(json_object *jso, void *userdata) +{ + uc_prototype *proto = userdata; + + if (proto->parent) + uc_value_put(proto->parent->header.jso); + + free(proto); +} + +uc_prototype * +uc_prototype_new(uc_prototype *parent) +{ + uc_prototype *proto; + + proto = uc_object_new(UC_OBJ_PROTOTYPE, sizeof(*proto), NULL, uc_prototype_gc); + + if (parent) { + proto->parent = parent; + uc_value_get(parent->header.jso); + } + + return proto; +} + + +static uc_ressource_types res_types; + +uc_ressource_type * +uc_ressource_type_add(const char *name, uc_prototype *proto, void (*freefn)(void *)) +{ + uc_vector_grow(&res_types); + + res_types.entries[res_types.count].name = name; + res_types.entries[res_types.count].proto = proto; + res_types.entries[res_types.count].free = freefn; + + return &res_types.entries[res_types.count++]; +} + +static uc_ressource_type * +uc_ressource_type_get(size_t type) +{ + return (type < res_types.count) ? &res_types.entries[type] : NULL; +} + +uc_ressource_type * +uc_ressource_type_lookup(const char *name) +{ + size_t i; + + for (i = 0; i < res_types.count; i++) + if (!strcmp(res_types.entries[i].name, name)) + return &res_types.entries[i]; + + return NULL; +} + +static int +uc_ressource_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); + uc_ressource *res = json_object_get_userdata(jso); + uc_ressource_type *type = uc_ressource_type_get(res->type); + + return sprintbuf(pb, "%s<%s %p>%s", + strict ? "\"" : "", + type ? type->name : "ressource", + res->data, + strict ? "\"" : ""); +} + +static void +uc_ressource_gc(json_object *jso, void *userdata) +{ + uc_ressource *res = userdata; + uc_ressource_type *type = uc_ressource_type_get(res->type); + + if (type && type->free) + type->free(res->data); + + free(res); +} + +uc_ressource * +uc_ressource_new(json_object *jso, uc_ressource_type *type, void *data) +{ + uc_ressource *res; + + if (!jso) + return NULL; + + res = xalloc(sizeof(*res)); + res->header.type = UC_OBJ_RESSOURCE; + res->header.jso = jso; + + res->type = type - res_types.entries; + res->data = data; + + json_object_set_serializer(res->header.jso, uc_ressource_tostring, res, uc_ressource_gc); + + return res; +} + +void ** +uc_ressource_dataptr(json_object *jso, const char *name) +{ + uc_ressource_type *type; + uc_ressource *res; + + if (!uc_object_is_type(jso, UC_OBJ_RESSOURCE)) + return NULL; + + res = uc_object_as_ressource(jso); + + if (name) { + type = uc_ressource_type_lookup(name); + + if (!type || type != uc_ressource_type_get(res->type)) + return NULL; + } + + return &res->data; +} + +uc_prototype * +uc_ressource_prototype(json_object *jso) +{ + uc_ressource_type *type; + uc_ressource *res; + + if (!uc_object_is_type(jso, UC_OBJ_RESSOURCE)) + return NULL; + + res = uc_object_as_ressource(jso); + type = uc_ressource_type_get(res->type); + + return type ? type->proto : NULL; +} diff --git a/object.h b/object.h new file mode 100644 index 0000000..ba08651 --- /dev/null +++ b/object.h @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __OBJECT_H_ +#define __OBJECT_H_ + +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> +#include <regex.h> + +#ifdef JSONC + #include <json.h> +#else + #include <json-c/json.h> +#endif + +#include "source.h" +#include "chunk.h" +#include "util.h" + +typedef enum { + UC_OBJ_INVAL, + UC_OBJ_UPVAL, + UC_OBJ_FUNCTION, + UC_OBJ_CLOSURE, + UC_OBJ_CFUNCTION, + UC_OBJ_REGEXP, + UC_OBJ_PROTOTYPE, + UC_OBJ_RESSOURCE +} uc_objtype_t; + +typedef struct { + uc_objtype_t type; + json_object *jso; +} uc_objhdr; + +typedef struct uc_upvalref { + uc_objhdr header; + size_t slot; + bool closed; + json_object *value; + struct uc_upvalref *next; +} uc_upvalref; + +typedef struct { + uc_objhdr header; + char *name; + bool arrow, vararg; + size_t nargs; + size_t nupvals; + size_t srcpos; + uc_chunk chunk; + uc_source *source; +} uc_function; + +typedef struct { + uc_objhdr header; + uc_function *function; + uc_upvalref **upvals; + bool is_arrow; +} uc_closure; + +struct uc_vm; +typedef json_object *(*uc_cfn_ptr)(struct uc_vm *, size_t); + +typedef struct { + uc_objhdr header; + char *name; + uc_cfn_ptr cfn; +} uc_cfunction; + +typedef struct { + uc_objhdr header; + regex_t re; + char *pattern; + bool icase, newline, global; +} uc_regexp; + +struct uc_prototype { + uc_objhdr header; + struct uc_prototype *parent; +}; + +typedef struct uc_prototype uc_prototype; + +typedef struct { + uc_objhdr header; + uc_prototype *proto; + size_t type; + void *data; +} uc_ressource; + +typedef struct { + const char *name; + uc_prototype *proto; + void (*free)(void *); +} uc_ressource_type; + +uc_declare_vector(uc_ressource_types, uc_ressource_type); + +uc_upvalref *uc_upvalref_new(size_t slot); +uc_function *uc_function_new(const char *name, size_t line, uc_source *source); +uc_closure *uc_closure_new(uc_function *function, bool arrow_fn); +uc_cfunction *uc_cfunction_new(const char *name, uc_cfn_ptr cfn); +uc_regexp *uc_regexp_new(const char *pattern, bool icase, bool newline, bool global, char **err); +uc_prototype *uc_prototype_new(uc_prototype *parent); + +uc_ressource_type *uc_ressource_type_add(const char *name, uc_prototype *proto, void (*freefn)(void *)); +uc_ressource_type *uc_ressource_type_lookup(const char *name); + +uc_ressource *uc_ressource_new(json_object *jso, uc_ressource_type *type, void *data); +uc_prototype *uc_ressource_prototype(json_object *jso); +void **uc_ressource_dataptr(json_object *jso, const char *name); + +size_t uc_function_get_srcpos(uc_function *function, size_t off); + +static inline uc_objtype_t +uc_object_type(json_object *jso) +{ + uc_objhdr *hdr = json_object_get_userdata(jso); + + return hdr ? hdr->type : UC_OBJ_INVAL; +} + +static inline bool +uc_object_is_type(json_object *jso, uc_objtype_t type) +{ + return uc_object_type(jso) == type; +} + +static inline uc_upvalref * +uc_object_as_upvalref(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline uc_function * +uc_object_as_function(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline uc_closure * +uc_object_as_closure(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline uc_cfunction * +uc_object_as_cfunction(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline uc_regexp * +uc_object_as_regexp(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline uc_prototype * +uc_object_as_prototype(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline uc_ressource * +uc_object_as_ressource(json_object *jso) +{ + return json_object_get_userdata(jso); +} + +static inline bool +uc_object_is_callable(json_object *jso) +{ + switch (uc_object_type(jso)) { + case UC_OBJ_CLOSURE: + case UC_OBJ_CFUNCTION: + return true; + + default: + return false; + } +} + +#endif /* __OBJECT_H_ */ diff --git a/parser.y b/parser.y deleted file mode 100644 index ebd5f5c..0000000 --- a/parser.y +++ /dev/null @@ -1,476 +0,0 @@ -/* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -%token_type {uint32_t} -%extra_argument {struct uc_state *state} - -%nonassoc T_LEXP T_REXP T_LSTM T_RSTM. - -%nonassoc T_IF. -%nonassoc T_ELSE. - -%left T_COMMA. -%right T_ASBAND T_ASBXOR T_ASBOR. -%right T_ASLEFT T_ASRIGHT. -%right T_ASMUL T_ASDIV T_ASMOD. -%right T_ASADD T_ASSUB. -%right T_ASSIGN. -%right T_QMARK T_COLON. -%left T_OR. -%left T_AND. -%left T_BOR. -%left T_BXOR. -%left T_BAND. -%left T_EQ T_NE T_EQS T_NES. -%left T_LT T_LE T_GT T_GE T_IN. -%left T_LSHIFT T_RSHIFT. -%left T_ADD T_SUB. -%left T_MUL T_DIV T_MOD. -%right T_NOT T_COMPL. -%right T_INC T_DEC. -%left T_LPAREN T_LBRACK. - - -%include { -#include <assert.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> - -#include "ast.h" -#include "lib.h" -#include "lexer.h" -#include "parser.h" - -#define YYSTACKDEPTH 0 -#define YYNOERRORRECOVERY - -#define new_op(type, val, ...) \ - uc_new_op(state, type, val, ##__VA_ARGS__, UINT32_MAX) - -#define wrap_op(op, ...) \ - uc_wrap_op(state, op, ##__VA_ARGS__, UINT32_MAX) - -#define append_op(op1, op2) \ - uc_append_op(state, op1, op2) - -#define no_empty_obj(op) \ - uc_no_empty_obj(state, op) - -static inline uint32_t -uc_no_empty_obj(struct uc_state *state, uint32_t off) -{ - return (OP_TYPE(off) != T_LBRACE || OPn(off, 0)) ? off : 0; -} - -static inline uint32_t -uc_add_else(struct uc_state *state, uint32_t off, uint32_t add) -{ - uint32_t tail_off = off; - - while (OPn(tail_off, 2)) - tail_off = OPn(tail_off, 2); - - OPn(tail_off, 2) = add; - - return off; -} - -static inline uint32_t -uc_expect_token(struct uc_state *state, uint32_t off, int token) -{ - uint64_t tokens[(__T_MAX + 63) & -64] = {}; - - tokens[token / 64] |= ((uint64_t)1 << (token % 64)); - uc_parse_error(state, off, tokens, token); - - return 0; -} - -static inline uint32_t -_uc_check_op_seq_types(struct uc_state *state, uint32_t off, ...) -{ - uint64_t tokens[(__T_MAX + 63) & -64] = {}; - int type, token, max_token = 0; - uint32_t arg_off; - va_list ap; - - va_start(ap, off); - - while ((token = va_arg(ap, int)) != 0) { - tokens[token / 64] |= ((uint64_t)1 << (token % 64)); - max_token = (token > max_token) ? token : max_token; - } - - va_end(ap); - - for (arg_off = off; arg_off != 0; arg_off = OP_NEXT(arg_off)) { - type = OP_TYPE(arg_off); - - if (!(tokens[type / 64] & ((uint64_t)1 << (type % 64)))) { - uc_parse_error(state, off, tokens, max_token); - - return 0; - } - } - - return off; -} - -#define uc_check_op_seq_types(state, off, ...) _uc_check_op_seq_types(state, off, __VA_ARGS__, 0) - -static inline uint32_t -uc_reject_local(struct uc_state *state, uint32_t off) -{ - if (OP_TYPE(off) == T_LOCAL) { - uc_new_exception(state, OP_POS(off), - "Syntax error: Unexpected token\nDeclaration not allowed in this context"); - - return 0; - } - - return off; -} - -static inline uint32_t -uc_check_for_in(struct uc_state *state, uint32_t off) -{ - uint32_t arg_off, idx = 0; - - arg_off = (OP_TYPE(off) == T_LOCAL) ? OPn(off, 0) : off; - - if (OP_TYPE(arg_off) == T_LABEL) { - idx = arg_off; - - if (!OP_NEXT(arg_off)) { - uc_new_exception(state, OP_POS(arg_off) + json_object_get_string_len(OP_VAL(arg_off)), - "Syntax error: Unexpected token\nExpecting ',' or 'in'"); - - return 0; - } - - arg_off = OP_NEXT(arg_off); - } - - if (OP_TYPE(arg_off) != T_IN || OP_NEXT(arg_off) || OPn_TYPE(arg_off, 0) != T_LABEL) { - if (OP_TYPE(arg_off) == T_IN && OP_NEXT(arg_off)) - arg_off = OP_NEXT(arg_off); - - uc_new_exception(state, OP_POS(arg_off), "Syntax error: Invalid for-in expression"); - - return 0; - } - - /* transform T_LABEL->T_IN(T_LABEL, ...) into T_IN(T_LABEL->T_LABEL, ...) */ - if (idx) { - OP_NEXT(idx) = 0; - OPn(arg_off, 0) = append_op(idx, OPn(arg_off, 0)); - - if (OP_TYPE(off) == T_LOCAL) - OPn(off, 0) = arg_off; - else - off = arg_off; - } - - return off; -} - -} - -%syntax_error { - uint64_t tokens[(__T_MAX + 63) & -64] = {}; - int i, max_token = 0; - - for (i = 0; i < __T_MAX; i++) { - if (yy_find_shift_action(yypParser, (YYCODETYPE)i) < YYNSTATE + YYNRULE) { - tokens[i / 64] |= ((uint64_t)1 << (i % 64)); - max_token = i; - } - } - - uc_parse_error(state, TOKEN, tokens, max_token); -} - - -input ::= chunks(A). { state->main = new_op(T_FUNC, NULL, 0, 0, A); } -input ::= . { state->main = new_op(T_TEXT, xjs_new_string("")); state->main = new_op(T_FUNC, NULL, 0, 0, state->main); } - -chunks(A) ::= chunks(B) T_TEXT(C). { A = B ? append_op(B, C) : C; } -chunks(A) ::= chunks(B) tplexp(C). { A = B ? append_op(B, C) : C; } -chunks(A) ::= chunks(B) stmt(C). { A = B ? append_op(B, C) : C; } -chunks(A) ::= T_TEXT(B). { A = B; } -chunks(A) ::= tplexp(B). { A = B; } -chunks(A) ::= stmt(B). { A = B; } - -tplexp(A) ::= T_LEXP(B) exp_stmt(C) T_REXP. { A = wrap_op(B, C); } - -stmts(A) ::= stmts(B) stmt(C). { A = B ? append_op(B, C) : C; } -stmts(A) ::= stmt(B). { A = B; } - -stmt(A) ::= cpd_stmt(B). { A = B; } -stmt(A) ::= exp_stmt(B). { A = B; } -stmt(A) ::= sel_stmt(B). { A = B; } -stmt(A) ::= iter_stmt(B). { A = B; } -stmt(A) ::= func_stmt(B). { A = B; } -stmt(A) ::= try_stmt(B). { A = B; } -stmt(A) ::= switch_stmt(B). { A = B; } -stmt(A) ::= ret_stmt(B). { A = B; } -stmt(A) ::= break_stmt(B). { A = B; } -stmt(A) ::= decl_stmt(B). { A = B; } - -//cpd_stmt(A) ::= T_LBRACE T_RBRACE. { A = NULL; } -cpd_stmt(A) ::= T_LBRACE stmts(B) exp(C) T_RBRACE. { A = B ? append_op(B, C) : C; } -cpd_stmt(A) ::= T_LBRACE stmts(B) T_RBRACE. { A = B; } -cpd_stmt(A) ::= T_LBRACE exp(B) T_RBRACE. { A = B; } - -exp_stmt(A) ::= exp(B) T_SCOL. { A = B; } -exp_stmt(A) ::= T_SCOL. { A = 0; } - -sel_stmt(A) ::= T_IF(B) T_LPAREN exp(C) T_RPAREN stmt(D) T_ELSE stmt(E). - { A = wrap_op(B, C, no_empty_obj(D), no_empty_obj(E)); } -sel_stmt(A) ::= T_IF(B) T_LPAREN exp(C) T_RPAREN stmt(D). [T_IF] - { A = wrap_op(B, C, no_empty_obj(D)); } -sel_stmt(A) ::= T_IF(B) T_LPAREN exp(C) T_RPAREN T_COLON chunks(D) sel_elifs(E) T_ELSE chunks(F) T_ENDIF. - { A = uc_add_else(state, wrap_op(B, C, D, E), F); } -sel_stmt(A) ::= T_IF(B) T_LPAREN exp(C) T_RPAREN T_COLON chunks(D) T_ELSE chunks(E) T_ENDIF. - { A = wrap_op(B, C, D, E); } -sel_stmt(A) ::= T_IF(B) T_LPAREN exp(C) T_RPAREN T_COLON chunks(D) T_ENDIF. [T_IF] - { A = wrap_op(B, C, D); } - -sel_elifs(A) ::= sel_elifs(B) sel_elif(C). { A = uc_add_else(state, B, C); } -sel_elifs(A) ::= sel_elif(B). { A = B; } - -sel_elif(A) ::= T_ELIF(B) T_LPAREN exp(C) T_RPAREN T_COLON chunks(D). - { A = wrap_op(B, C, D); } - -iter_stmt(A) ::= T_WHILE(B) T_LPAREN exp(C) T_RPAREN stmt(D). - { A = wrap_op(B, C, no_empty_obj(D)); } -iter_stmt(A) ::= T_WHILE(B) T_LPAREN exp(C) T_RPAREN T_COLON chunks(D) T_ENDWHILE. - { A = wrap_op(B, C, D); } -iter_stmt(A) ::= T_FOR(B) paren_exp(C) stmt(D). - { A = wrap_op(B, uc_check_for_in(state, C), NULL, NULL, no_empty_obj(D)); OP(A)->is_for_in = 1; } -iter_stmt(A) ::= T_FOR(B) paren_exp(C) T_COLON chunks(D) T_ENDFOR. - { A = wrap_op(B, uc_check_for_in(state, C), NULL, NULL, no_empty_obj(D)); OP(A)->is_for_in = 1; } -iter_stmt(A) ::= T_FOR(B) T_LPAREN decl_or_exp(C) exp_stmt(D) T_RPAREN stmt(E). - { A = wrap_op(B, C, D, NULL, no_empty_obj(E)); } -iter_stmt(A) ::= T_FOR(B) T_LPAREN decl_or_exp(C) exp_stmt(D) exp(E) T_RPAREN stmt(F). - { A = wrap_op(B, C, D, E, no_empty_obj(F)); } -iter_stmt(A) ::= T_FOR(B) T_LPAREN decl_or_exp(C) exp_stmt(D) T_RPAREN T_COLON chunks(E) T_ENDFOR. - { A = wrap_op(B, C, D, NULL, E); } -iter_stmt(A) ::= T_FOR(B) T_LPAREN decl_or_exp(C) exp_stmt(D) exp(E) T_RPAREN T_COLON chunks(F) T_ENDFOR. - { A = wrap_op(B, C, D, E, F); } - -func_stmt(A) ::= T_FUNC(B) T_LABEL(C) T_LPAREN T_RPAREN cpd_stmt(D). - { A = wrap_op(B, C, 0, D); } -func_stmt(A) ::= T_FUNC(B) T_LABEL(C) T_LPAREN T_RPAREN empty_object. - { A = wrap_op(B, C, 0, 0); } -func_stmt(A) ::= T_FUNC(B) T_LABEL(C) T_LPAREN T_RPAREN T_COLON chunks(D) T_ENDFUNC. - { A = wrap_op(B, C, 0, D); } -func_stmt(A) ::= T_FUNC(B) T_LABEL(C) T_LPAREN args(D) T_RPAREN cpd_stmt(E). - { A = wrap_op(B, C, D, E); } -func_stmt(A) ::= T_FUNC(B) T_LABEL(C) T_LPAREN args(D) T_RPAREN empty_object. - { A = wrap_op(B, C, D, 0); } -func_stmt(A) ::= T_FUNC(B) T_LABEL(C) T_LPAREN args(D) T_RPAREN T_COLON chunks(E) T_ENDFUNC. - { A = wrap_op(B, C, D, E); } - -try_stmt(A) ::= T_TRY(B) try_catch_block(C) T_CATCH T_LPAREN T_LABEL(D) T_RPAREN try_catch_block(E). - { A = wrap_op(B, C, D, E); } -try_stmt(A) ::= T_TRY(B) try_catch_block(C) T_CATCH try_catch_block(D). - { A = wrap_op(B, C, 0, D); } - -try_catch_block(A) ::= cpd_stmt(B). { A = B; } -try_catch_block(A) ::= empty_object. { A = 0; } - -switch_stmt(A) ::= T_SWITCH(B) T_LPAREN exp(C) T_RPAREN T_LBRACE switch_cases(D) T_RBRACE. - { A = wrap_op(B, C, D); } -switch_stmt(A) ::= T_SWITCH T_LPAREN exp(B) T_RPAREN empty_object. - { A = B; } - -switch_cases(A) ::= switch_cases(B) switch_case(C). { A = append_op(B, C); } -switch_cases(A) ::= switch_case(B). { A = B; } - -switch_case(A) ::= T_CASE(B) exp(C) T_COLON stmts(D). { A = wrap_op(B, C, D); } -switch_case(A) ::= T_CASE(B) exp(C) T_COLON. { A = wrap_op(B, C); } -switch_case(A) ::= T_DEFAULT(B) T_COLON stmts(C). { A = wrap_op(B, C); } - -args(A) ::= sargs(B) T_COMMA T_ELLIP T_LABEL(C). { A = append_op(B, C); OP(C)->is_ellip = 1; } -args(A) ::= T_ELLIP T_LABEL(B). { A = B; OP(B)->is_ellip = 1; } -args(A) ::= sargs(B). { A = B; } - -sargs(A) ::= sargs(B) T_COMMA T_LABEL(C). { A = append_op(B, C); } -sargs(A) ::= T_LABEL(B). { A = B; } - -decl_or_exp(A) ::= exp_stmt(B). { A = B; } -decl_or_exp(A) ::= decl_stmt(B). { A = B; } - -ret_stmt(A) ::= T_RETURN(B) exp(C) T_SCOL. { A = wrap_op(B, C); } -ret_stmt(A) ::= T_RETURN(B) T_SCOL. { A = B; } - -break_stmt(A) ::= T_BREAK(B) T_SCOL. { A = B; } -break_stmt(A) ::= T_CONTINUE(B) T_SCOL. { A = B; } - -decl_stmt(A) ::= T_LOCAL(B) decls(C) T_SCOL. { A = wrap_op(B, uc_check_op_seq_types(state, C, T_ASSIGN, T_LABEL)); } - -decls(A) ::= decls(B) T_COMMA decl(C). { A = append_op(B, C); } -decls(A) ::= decl(B). { A = B; } - -decl(A) ::= T_LABEL(B) T_ASSIGN(C) arrow_exp(D). { A = wrap_op(C, B, D); } -decl(A) ::= T_LABEL(B) T_IN(C) arrow_exp(D). { A = wrap_op(C, B, D); } -decl(A) ::= T_LABEL(B). { A = B; } - -arrowfn_body(A) ::= cpd_stmt(B). { A = B; } -arrowfn_body(A) ::= assign_exp(B). { A = no_empty_obj(B); } - -exp(A) ::= exp(B) T_COMMA assign_exp(C). { A = append_op(B, C); } -exp(A) ::= assign_exp(B). { A = B; } - -assign_exp(A) ::= unary_exp(B) T_ASSIGN(C) arrow_exp(D). - { A = wrap_op(C, B, D); } -assign_exp(A) ::= unary_exp(B) T_ASADD arrow_exp(C). { A = new_op(T_ADD, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASSUB arrow_exp(C). { A = new_op(T_SUB, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASMUL arrow_exp(C). { A = new_op(T_MUL, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASDIV arrow_exp(C). { A = new_op(T_DIV, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASMOD arrow_exp(C). { A = new_op(T_MOD, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASLEFT arrow_exp(C). { A = new_op(T_LSHIFT, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASRIGHT arrow_exp(C). - { A = new_op(T_RSHIFT, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASBAND arrow_exp(C). { A = new_op(T_BAND, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASBXOR arrow_exp(C). { A = new_op(T_BXOR, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= unary_exp(B) T_ASBOR arrow_exp(C). { A = new_op(T_BOR, NULL, B, C); A = new_op(T_ASSIGN, NULL, B, A); } -assign_exp(A) ::= arrow_exp(B). { A = B; } - -arrow_exp(A) ::= unary_exp(B) T_ARROW(C) arrowfn_body(D). - { A = wrap_op(C, 0, uc_check_op_seq_types(state, B, T_LABEL), D); } -arrow_exp(A) ::= T_LPAREN T_RPAREN T_ARROW(C) arrowfn_body(D). - { A = wrap_op(C, 0, 0, D); } -arrow_exp(A) ::= T_LPAREN T_ELLIP T_LABEL(B) T_RPAREN T_ARROW(C) arrowfn_body(D). - { A = wrap_op(C, 0, B, D); OP(B)->is_ellip = 1; } -arrow_exp(A) ::= T_LPAREN exp(B) T_COMMA T_ELLIP T_LABEL(C) T_RPAREN T_ARROW(D) arrowfn_body(E). - { A = append_op(B, C); A = wrap_op(D, 0, uc_check_op_seq_types(state, A, T_LABEL), E); OP(C)->is_ellip = 1; } -arrow_exp(A) ::= ternary_exp(B). { A = B; } - -ternary_exp(A) ::= or_exp(B) T_QMARK(C) assign_exp(D) T_COLON ternary_exp(E). - { A = wrap_op(C, B, D, E); } -ternary_exp(A) ::= or_exp(B). { A = B; } - -or_exp(A) ::= or_exp(B) T_OR(C) and_exp(D). { A = wrap_op(C, B, D); } -or_exp(A) ::= and_exp(B). { A = B; } - -and_exp(A) ::= and_exp(B) T_AND(C) bor_exp(D). { A = wrap_op(C, B, D); } -and_exp(A) ::= bor_exp(B). { A = B; } - -bor_exp(A) ::= bor_exp(B) T_BOR(C) bxor_exp(D). { A = wrap_op(C, B, D); } -bor_exp(A) ::= bxor_exp(B). { A = B; } - -bxor_exp(A) ::= bxor_exp(B) T_BXOR(C) band_exp(D). { A = wrap_op(C, B, D); } -bxor_exp(A) ::= band_exp(B). { A = B; } - -band_exp(A) ::= band_exp(B) T_BAND(C) equal_exp(D). { A = wrap_op(C, B, D); } -band_exp(A) ::= equal_exp(B). { A = B; } - -equal_exp(A) ::= equal_exp(B) T_EQ(C) rel_exp(D). { A = wrap_op(C, B, D); } -equal_exp(A) ::= equal_exp(B) T_NE(C) rel_exp(D). { A = wrap_op(C, B, D); } -equal_exp(A) ::= equal_exp(B) T_EQS(C) rel_exp(D). { A = wrap_op(C, B, D); } -equal_exp(A) ::= equal_exp(B) T_NES(C) rel_exp(D). { A = wrap_op(C, B, D); } -equal_exp(A) ::= rel_exp(B). { A = B; } - -rel_exp(A) ::= rel_exp(B) T_LT(C) shift_exp(D). { A = wrap_op(C, B, D); } -rel_exp(A) ::= rel_exp(B) T_LE(C) shift_exp(D). { A = wrap_op(C, B, D); } -rel_exp(A) ::= rel_exp(B) T_GT(C) shift_exp(D). { A = wrap_op(C, B, D); } -rel_exp(A) ::= rel_exp(B) T_GE(C) shift_exp(D). { A = wrap_op(C, B, D); } -rel_exp(A) ::= rel_exp(B) T_IN(C) shift_exp(D). { A = wrap_op(C, B, D); } -rel_exp(A) ::= shift_exp(B). { A = B; } - -shift_exp(A) ::= shift_exp(B) T_LSHIFT(C) add_exp(D). { A = wrap_op(C, B, D); } -shift_exp(A) ::= shift_exp(B) T_RSHIFT(C) add_exp(D). { A = wrap_op(C, B, D); } -shift_exp(A) ::= add_exp(B). { A = B; } - -add_exp(A) ::= add_exp(B) T_ADD(C) mul_exp(D). { A = wrap_op(C, B, D); } -add_exp(A) ::= add_exp(B) T_SUB(C) mul_exp(D). { A = wrap_op(C, B, D); } -add_exp(A) ::= mul_exp(B). { A = B; } - -mul_exp(A) ::= mul_exp(B) T_MUL(C) unary_exp(D). { A = wrap_op(C, B, D); } -mul_exp(A) ::= mul_exp(B) T_DIV(C) unary_exp(D). { A = wrap_op(C, B, D); } -mul_exp(A) ::= mul_exp(B) T_MOD(C) unary_exp(D). { A = wrap_op(C, B, D); } -mul_exp(A) ::= unary_exp(B). { A = B; } - -unary_exp(A) ::= T_INC(B) unary_exp(C). [T_LPAREN] { A = wrap_op(B, C); } -unary_exp(A) ::= T_DEC(B) unary_exp(C). [T_LPAREN] { A = wrap_op(B, C); } -unary_exp(A) ::= T_ADD(B) unary_exp(C). [T_NOT] { A = wrap_op(B, C); } -unary_exp(A) ::= T_SUB(B) unary_exp(C). [T_NOT] { A = wrap_op(B, C); } -unary_exp(A) ::= T_COMPL(B) unary_exp(C). { A = wrap_op(B, C); } -unary_exp(A) ::= T_NOT(B) unary_exp(C). { A = wrap_op(B, C); } -unary_exp(A) ::= postfix_exp(B). { A = B; } - -postfix_exp(A) ::= unary_exp(B) T_INC(C). { A = wrap_op(C, B); OP(A)->is_postfix = 1; } -postfix_exp(A) ::= unary_exp(B) T_DEC(C). { A = wrap_op(C, B); OP(A)->is_postfix = 1; } -postfix_exp(A) ::= unary_exp(B) T_LPAREN(C) T_RPAREN. { A = wrap_op(C, B); } -postfix_exp(A) ::= unary_exp(B) T_LPAREN(C) arg_exps(D) T_RPAREN. - { A = wrap_op(C, B, D); } -postfix_exp(A) ::= postfix_exp(B) T_DOT(C) T_LABEL(D). { A = wrap_op(C, B, D); } -postfix_exp(A) ::= postfix_exp(B) T_LBRACK(C) exp(D) T_RBRACK. - { A = wrap_op(C, B, D); OP(A)->is_postfix = 1; } -postfix_exp(A) ::= primary_exp(B). { A = B; } - -primary_exp(A) ::= T_BOOL(B). { A = B; } -primary_exp(A) ::= T_NUMBER(B). { A = B; } -primary_exp(A) ::= T_DOUBLE(B). { A = B; } -primary_exp(A) ::= T_STRING(B). { A = B; } -primary_exp(A) ::= T_LABEL(B). { A = B; } -primary_exp(A) ::= T_REGEXP(B). { A = B; } -primary_exp(A) ::= T_NULL(B). { A = B; } -primary_exp(A) ::= T_THIS(B). { A = B; } -primary_exp(A) ::= array(B). { A = B; } -primary_exp(A) ::= object(B). { A = B; } -primary_exp(A) ::= paren_exp(B). { A = uc_reject_local(state, B); } -primary_exp(A) ::= T_FUNC(B) T_LPAREN T_RPAREN empty_object. - { A = B; } -primary_exp(A) ::= T_FUNC(B) T_LPAREN args(C) T_RPAREN empty_object. - { A = wrap_op(B, 0, C, 0); } -primary_exp(A) ::= T_FUNC(B) T_LPAREN T_RPAREN cpd_stmt(C). - { A = wrap_op(B, 0, 0, C); } -primary_exp(A) ::= T_FUNC(B) T_LPAREN args(C) T_RPAREN cpd_stmt(D). - { A = wrap_op(B, 0, C, D); } - -paren_exp(A) ::= T_LPAREN exp(B) T_RPAREN. { A = B; } -paren_exp(A) ::= T_LPAREN T_LOCAL(B) decls(C) T_RPAREN. { A = wrap_op(B, C); } - -array(A) ::= T_LBRACK(B) T_RBRACK. { A = B; } -array(A) ::= T_LBRACK(B) items(C) T_RBRACK. { A = wrap_op(B, C); } - -items(A) ::= items(B) T_COMMA item(C). { A = append_op(B, C); } -items(A) ::= item(B). { A = B; } - -item(A) ::= T_ELLIP assign_exp(B). { A = OP_NEXT(B) ? new_op(T_COMMA, NULL, B) : B; OP(A)->is_ellip = 1; } -item(A) ::= assign_exp(B). { A = OP_NEXT(B) ? new_op(T_COMMA, NULL, B) : B; } - -object(A) ::= empty_object(B). { A = B; } -object(A) ::= T_LBRACE(B) tuples(C) T_RBRACE. { A = wrap_op(B, C); } - -empty_object(A) ::= T_LBRACE(B) T_RBRACE. { A = B; } - -tuples(A) ::= tuples(B) T_COMMA tuple(C). { A = append_op(B, C); } -tuples(A) ::= tuple(B). { A = B; } - -tuple(A) ::= T_LABEL(B) T_COLON exp(C). { A = wrap_op(B, C); } -tuple(A) ::= T_STRING(B) T_COLON exp(C). { A = wrap_op(B, C); } -tuple(A) ::= T_ELLIP(B) assign_exp(C). { A = wrap_op(B, C); } - -arg_exps(A) ::= arg_exps(B) T_COMMA arg_exp(C). { A = append_op(B, C); OP(A)->is_list = 1; } -arg_exps(A) ::= arg_exp(B). { A = B; OP(A)->is_list = 1; } - -arg_exp(A) ::= T_ELLIP assign_exp(B). { A = OP_NEXT(B) ? new_op(T_COMMA, NULL, B) : B; OP(A)->is_ellip = 1; } -arg_exp(A) ::= assign_exp(B). { A = OP_NEXT(B) ? new_op(T_COMMA, NULL, B) : B; } diff --git a/source.c b/source.c new file mode 100644 index 0000000..21b9124 --- /dev/null +++ b/source.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> + +#include "source.h" + + +uc_source * +uc_source_new_file(const char *path) +{ + FILE *fp = fopen(path, "rb"); + uc_source *src; + + if (!fp) + return NULL; + + src = xalloc(ALIGN(sizeof(*src)) + strlen(path) + 1); + src->fp = fp; + src->buffer = NULL; + src->filename = strcpy((char *)src + ALIGN(sizeof(*src)), path); + + src->usecount = 1; + + src->lineinfo.count = 0; + src->lineinfo.entries = NULL; + + return src; +} + +uc_source * +uc_source_new_buffer(const char *name, char *buf, size_t len) +{ + FILE *fp = fmemopen(buf, len, "rb"); + uc_source *src; + + if (!fp) + return NULL; + + src = xalloc(ALIGN(sizeof(*src)) + strlen(name) + 1); + src->fp = fp; + src->buffer = buf; + src->filename = strcpy((char *)src + ALIGN(sizeof(*src)), name); + + src->usecount = 1; + + src->lineinfo.count = 0; + src->lineinfo.entries = NULL; + + return src; +} + +size_t +uc_source_get_line(uc_source *source, size_t *offset) +{ + uc_lineinfo *lines = &source->lineinfo; + size_t i, pos = 0, line = 0, lastoff = 0; + + for (i = 0; i < lines->count; i++) { + if (lines->entries[i] & 0x80) { + lastoff = pos; + line++; + pos++; + } + + pos += (lines->entries[i] & 0x7f); + + if (pos >= *offset) { + *offset -= lastoff - 1; + + return line; + } + } + + return 0; +} + +uc_source * +uc_source_get(uc_source *source) +{ + if (!source) + return NULL; + + source->usecount++; + + return source; +} + +void +uc_source_put(uc_source *source) +{ + if (!source) + return; + + if (source->usecount > 1) { + source->usecount--; + + return; + } + + uc_vector_clear(&source->lineinfo); + fclose(source->fp); + free(source->buffer); + free(source); +} @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Jo-Philipp Wich <jo@mein.io> + * Copyright (C) 2021 Jo-Philipp Wich <jo@mein.io> * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -14,29 +14,31 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifndef __MATCHER_H_ -#define __MATCHER_H_ +#ifndef __SOURCE_H_ +#define __SOURCE_H_ -#include <string.h> -#include <stdbool.h> +#include <stdint.h> +#include <stddef.h> #include <stdio.h> -#include <regex.h> -#include "ast.h" +#include "util.h" -bool -uc_cmp(int how, struct json_object *v1, struct json_object *v2); -bool -uc_val_is_truish(struct json_object *val); +uc_declare_vector(uc_lineinfo, uint8_t); -enum json_type -uc_cast_number(struct json_object *v, int64_t *n, double *d); +typedef struct { + char *filename, *buffer; + FILE *fp; + size_t usecount, off; + uc_lineinfo lineinfo; +} uc_source; -struct json_object * -uc_invoke(struct uc_state *, uint32_t, struct json_object *, struct json_object *, struct json_object *); +uc_source *uc_source_new_file(const char *path); +uc_source *uc_source_new_buffer(const char *name, char *buf, size_t len); -struct json_object * -uc_run(struct uc_state *state, struct json_object *env, struct json_object *modules); +size_t uc_source_get_line(uc_source *source, size_t *offset); -#endif +uc_source *uc_source_get(uc_source *source); +void uc_source_put(uc_source *source); + +#endif /* __SOURCE_H_ */ diff --git a/tests/00_syntax/05_block_nesting b/tests/00_syntax/05_block_nesting index 7b74adb..fcfd7da 100644 --- a/tests/00_syntax/05_block_nesting +++ b/tests/00_syntax/05_block_nesting @@ -8,6 +8,13 @@ In line 2, byte 61: Near here --------------------------------------------------^ +Syntax error: Template blocks may not be nested +In line 3, byte 61: + + `We may not nest expression blocks into statement blocks: {% {{ 1 + 2 }} %}.` + Near here --------------------------------------------------^ + + -- End -- -- Testcase -- diff --git a/tests/00_syntax/13_object_literals b/tests/00_syntax/13_object_literals index 60c9f32..18fbbed 100644 --- a/tests/00_syntax/13_object_literals +++ b/tests/00_syntax/13_object_literals @@ -48,6 +48,7 @@ either JSON or JavaScript notation. print(another_obj, "\n"); print(third_obj, "\n"); print(nested_obj, "\n"); +%} -- End -- @@ -90,3 +91,84 @@ of object properties into other objects. ]), "\n"); %} -- End -- + + +ES2015 short hand property notation is supported as well. + +-- Expect stdout -- +{ "a": 123, "b": true, "c": "test" } +-- End -- + +-- Testcase -- +{% + a = 123; + b = true; + c = "test"; + + o = { a, b, c }; + + print(o, "\n"); +%} +-- End -- + +-- Expect stderr -- +Syntax error: Unexpected token +Expecting ':' +In line 2, byte 14: + + ` o = { "foo" };` + Near here ------^ + + +-- End -- + +-- Testcase -- +{% + o = { "foo" }; +%} +-- End -- + + +ES2015 computed property names are supported. + +-- Expect stdout -- +{ "test": true, "hello": false, "ABC": 123 } +-- End -- + +-- Testcase -- +{% + s = "test"; + o = { + [s]: true, + ["he" + "llo"]: false, + [uc("abc")]: 123 + }; + + print(o, "\n"); +%} +-- End -- + +-- Expect stderr -- +Syntax error: Expecting expression +In line 2, byte 10: + + ` o1 = { []: true };` + Near here --^ + + +Syntax error: Unexpected token +Expecting ']' +In line 3, byte 14: + + ` o2 = { [true, false]: 123 };` + Near here ------^ + + +-- End -- + +-- Testcase -- +{% + o1 = { []: true }; + o2 = { [true, false]: 123 }; +%} +-- End -- diff --git a/tests/00_syntax/16_for_loop b/tests/00_syntax/16_for_loop index 33d1d97..67edc21 100644 --- a/tests/00_syntax/16_for_loop +++ b/tests/00_syntax/16_for_loop @@ -218,11 +218,12 @@ Ensure that for-in loop expressions with more than two variables are rejected. -- Expect stderr -- -Syntax error: Invalid for-in expression -In line 2, byte 14: +Syntax error: Unexpected token +Expecting ';' +In line 2, byte 24: ` for (let x, y, z in {})` - Near here ------^ + Near here ----------------^ -- End -- @@ -238,11 +239,12 @@ In line 2, byte 14: Ensure that assignments in for-in loop expressions are rejected. -- Expect stderr -- -Syntax error: Invalid for-in expression -In line 2, byte 13: +Syntax error: Unexpected token +Expecting ';' +In line 2, byte 25: ` for (let x = 1, y in {})` - Near here -----^ + Near here -----------------^ -- End -- @@ -259,7 +261,7 @@ Ensure that too short for-in loop expressions are rejected (1/2). -- Expect stderr -- Syntax error: Unexpected token -Expecting ',' or 'in' +Expecting ';' In line 2, byte 12: ` for (let x)` @@ -279,11 +281,12 @@ In line 2, byte 12: Ensure that too short for-in loop expressions are rejected (2/2). -- Expect stderr -- -Syntax error: Invalid for-in expression -In line 2, byte 14: +Syntax error: Unexpected token +Expecting ';' +In line 2, byte 15: ` for (let x, y)` - Near here ------^ + Near here -------^ -- End -- diff --git a/tests/00_syntax/17_while_loop b/tests/00_syntax/17_while_loop index 4dfaccc..1e68d6b 100644 --- a/tests/00_syntax/17_while_loop +++ b/tests/00_syntax/17_while_loop @@ -32,7 +32,7 @@ Iteration 8 Iteration 9 A counting while-loop using the alternative syntax: -Iteration null +Iteration 0 Iteration 1 Iteration 2 Iteration 3 diff --git a/tests/00_syntax/19_arrow_functions b/tests/00_syntax/19_arrow_functions index 4847d8a..102c527 100644 --- a/tests/00_syntax/19_arrow_functions +++ b/tests/00_syntax/19_arrow_functions @@ -108,11 +108,11 @@ subsequent testcase asserts that case. -- Expect stderr -- Syntax error: Unexpected token -Expecting Label -In line 2, byte 5: +Expecting ';' +In line 2, byte 10: ` (a + 1) => { print("test\n") }` - ^-- Near here + Near here --^ -- End -- diff --git a/tests/00_syntax/21_regex_literals b/tests/00_syntax/21_regex_literals index 4aef33f..3af53bb 100644 --- a/tests/00_syntax/21_regex_literals +++ b/tests/00_syntax/21_regex_literals @@ -35,7 +35,7 @@ Testing invalid flag characters. -- Expect stderr -- Syntax error: Unexpected token -Expecting ',' or ';' +Expecting ';' In line 2, byte 8: ` /test/x` diff --git a/tests/01_arithmetic/04_inc_dec b/tests/01_arithmetic/04_inc_dec index 5a048fe..ae50ceb 100644 --- a/tests/01_arithmetic/04_inc_dec +++ b/tests/01_arithmetic/04_inc_dec @@ -17,9 +17,9 @@ or decrement operation is NaN. -- Expect stdout -- Incrementing a not existing variable assumes "0" as initial value: - - Postfix increment result: null, value after: 1 + - Postfix increment result: 0, value after: 1 - Prefix increment result: 1, value after: 1 - - Postfix decrement result: null, value after: -1 + - Postfix decrement result: 0, value after: -1 - Prefix decrement result: -1, value after: -1 Incrementing a non-numeric value will convert it to a number: diff --git a/tests/02_runtime/00_scoping b/tests/02_runtime/00_scoping index 2bca2ab..5fadf43 100644 --- a/tests/02_runtime/00_scoping +++ b/tests/02_runtime/00_scoping @@ -12,7 +12,7 @@ c_global=true c_local=false -When seting a nonlet variable, it is set in the nearest parent +When seting a nonlocal variable, it is set in the nearest parent scope containing the variable or in the root scope if the variable was not found. @@ -25,13 +25,13 @@ Variables implicitly declared by for-in or counting for loops follow the same scoping rules. inner2 f_a=3 -inner2 f_b=3 +inner2 f_b= inner2 f_c=3 -inner2 f_d=3 +inner2 f_d= inner2 f_e=3 inner f_a=3 -inner f_b=3 +inner f_b= inner f_c=3 inner f_d= inner f_e=3 @@ -73,7 +73,7 @@ c_global={{ !!c_global }} c_local={{ !!c_local }} -When seting a nonlet variable, it is set in the nearest parent +When seting a nonlocal variable, it is set in the nearest parent scope containing the variable or in the root scope if the variable was not found. @@ -110,7 +110,7 @@ scoping rules. {% function scope3() { - // f_a is not declared let and be set i nthe root scope + // f_a is not declared local and be set in the root scope for (f_a = 1; f_a < 3; f_a++) ; @@ -120,7 +120,7 @@ scoping rules. let f_c; function scope4() { - // f_c is not declared let but declared in the parent scope, it + // f_c is not declared local but declared in the parent scope, it // will be set there for (f_c in [1, 2, 3]) ; diff --git a/tests/02_runtime/02_this b/tests/02_runtime/02_this index e629853..d8e85d2 100644 --- a/tests/02_runtime/02_this +++ b/tests/02_runtime/02_this @@ -13,7 +13,8 @@ true } // When invoked, "this" will point to the object containing the function - let o = { + let o; + o = { test: function() { return (this === o); } @@ -34,7 +35,8 @@ true -- Testcase -- {% - let o = { + let o; + o = { test: function() { return (this === o); } @@ -45,3 +47,4 @@ true print(o.test(dummy.foo, dummy.bar), "\n"); print(o.test(dummy.foo, o.test(dummy.foo, dummy.bar)), "\n"); %} +-- End -- diff --git a/tests/02_runtime/03_try_catch b/tests/02_runtime/03_try_catch index 518c1f1..751ca1d 100644 --- a/tests/02_runtime/03_try_catch +++ b/tests/02_runtime/03_try_catch @@ -29,3 +29,110 @@ After exceptions. print("After exceptions.\n"); %} -- End -- + + +Ensure that exceptions are propagated through C function calls. + +-- Expect stderr -- +exception +In [anonymous function](), line 3, byte 18: + called from function replace ([C]) + called from anonymous function ([stdin]:4:3) + + ` die("exception");` + Near here -------------^ + + +-- End -- + +-- Testcase -- +{% + replace("test", "t", function(m) { + die("exception"); + }); +%} +-- End -- + + +Ensure that exception can be catched through C function calls. + +-- Expect stdout -- +Caught exception: exception +-- End -- + +-- Testcase -- +{% + try { + replace("test", "t", function(m) { + die("exception"); + }); + } + catch (e) { + print("Caught exception: ", e, "\n"); + } +%} +-- End -- + + +Ensure that exceptions are propagated through user function calls. + +-- Expect stderr -- +exception +In a(), line 3, byte 18: + called from function b ([stdin]:7:5) + called from function c ([stdin]:11:5) + called from anonymous function ([stdin]:14:4) + + ` die("exception");` + Near here -------------^ + + +-- End -- + +-- Testcase -- +{% + function a() { + die("exception"); + } + + function b() { + a(); + } + + function c() { + b(); + } + + c(); +%} +-- End -- + + +Ensure that exceptions can be caught in parent functions. + +-- Expect stdout -- +Caught exception: exception +-- End -- + +-- Testcase -- +{% + function a() { + die("exception"); + } + + function b() { + a(); + } + + function c() { + try { + b(); + } + catch (e) { + print("Caught exception: ", e, "\n"); + } + } + + c(); +%} +-- End -- diff --git a/tests/02_runtime/04_switch_case b/tests/02_runtime/04_switch_case index 0de87dc..4c1fc57 100644 --- a/tests/02_runtime/04_switch_case +++ b/tests/02_runtime/04_switch_case @@ -84,7 +84,26 @@ default -- End -- -4. Ensure that duplicate default cases emit a syntax +4. Ensure that a single default case matches. + +-- Expect stdout -- +default +default +-- End -- + +-- Testcase -- +{% + for (n in [1, 3]) { + switch (n) { + default: + print("default\n"); + } + } +%} +-- End -- + + +5. Ensure that duplicate default cases emit a syntax error during parsing. -- Expect stderr -- @@ -95,6 +114,13 @@ In line 6, byte 3: ^-- Near here +Syntax error: Expecting expression +In line 8, byte 2: + + ` }` + ^-- Near here + + -- End -- -- Testcase -- @@ -110,7 +136,7 @@ In line 6, byte 3: -- End -- -5. Ensure that case values use strict comparison. +6. Ensure that case values use strict comparison. -- Expect stdout -- b @@ -142,7 +168,7 @@ b -- End -- -6. Ensure that case values may be complex expressions. +7. Ensure that case values may be complex expressions. -- Expect stdout -- 2, 3, 1 @@ -159,7 +185,7 @@ b -- End -- -7. Ensure that empty switch statements are accepted by the +8. Ensure that empty switch statements are accepted by the parser and that the test expression is evaluated. -- Expect stdout -- @@ -179,7 +205,7 @@ true -- End -- -8. Ensure that `return` breaks out of switch statements. +9. Ensure that `return` breaks out of switch statements. -- Expect stdout -- one @@ -207,7 +233,7 @@ two -- End -- -9. Ensure that `continue` breaks out of switch statements. +10. Ensure that `continue` breaks out of switch statements. -- Expect stdout -- one @@ -234,7 +260,7 @@ two -- End -- -10. Ensure that exceptions break out of switch statements. +11. Ensure that exceptions break out of switch statements. -- Expect stdout -- one @@ -242,11 +268,11 @@ one -- Expect stderr -- Died -In test(), line 6, byte 7: - called from anonymous function ([stdin]:17:12) +In test(), line 6, byte 8: + called from anonymous function ([stdin]:17:14) ` die();` - Near here -----^ + Near here ------^ -- End -- @@ -273,7 +299,7 @@ In test(), line 6, byte 7: -- End -- -11. Ensure that consecutive cases values are properly handled. +12. Ensure that consecutive cases values are properly handled. -- Expect stdout -- three and four diff --git a/tests/02_runtime/06_recursion b/tests/02_runtime/06_recursion index b222640..470fc3a 100644 --- a/tests/02_runtime/06_recursion +++ b/tests/02_runtime/06_recursion @@ -39,11 +39,11 @@ Testing recursive invocations. -- Expect stderr -- Runtime error: Too much recursion -In test(), line 3, byte 7: - called from anonymous function ([stdin]:6:6) +In test(), line 3, byte 8: + called from anonymous function ([stdin]:6:7) ` test();` - Near here --^ + Near here ---^ -- End -- @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __UTIL_H_ +#define __UTIL_H_ + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> /* va_start(), va_end(), va_list */ +#include <string.h> /* strdup() */ +#include <json-c/json.h> + + +/* alignment & array size */ + +#ifndef ALIGN +#define ALIGN(x) (((x) + sizeof(size_t) - 1) & -sizeof(size_t)) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif + +#define JSON_C_TO_STRING_STRICT (1<<31) + + +/* vector macros */ + +#define UC_VECTOR_CHUNK_SIZE 8 + +#define uc_declare_vector(name, type) \ + typedef struct { \ + size_t count; \ + type *entries; \ + } name + +#define uc_vector_grow(vec) \ + do { \ + if (((vec)->count % UC_VECTOR_CHUNK_SIZE) == 0) { \ + (vec)->entries = xrealloc((vec)->entries, sizeof((vec)->entries[0]) * ((vec)->count + UC_VECTOR_CHUNK_SIZE)); \ + memset(&(vec)->entries[(vec)->count], 0, sizeof((vec)->entries[0]) * UC_VECTOR_CHUNK_SIZE); \ + } \ + } while(0) + +#define uc_vector_clear(vec) \ + do { \ + free((vec)->entries); \ + (vec)->entries = NULL; \ + (vec)->count = 0; \ + } while(0) + +#define uc_vector_first(vec) \ + (&((vec)->entries[0])) + +#define uc_vector_last(vec) \ + (&((vec)->entries[(vec)->count - 1])) + + +/* debug helper */ + +static inline uint32_t getrefcnt(json_object *v) { + struct { + enum json_type o_type; + uint32_t _ref_count; + } *spy = (void *)v; + + return spy ? spy->_ref_count : 0; +} + +static inline void *xalloc(size_t size) { + void *ptr = calloc(1, size); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + + +/* "failsafe" utility functions */ + +static inline void *xrealloc(void *ptr, size_t size) { + ptr = realloc(ptr, size); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline char *xstrdup(const char *s) { + char *ptr = strdup(s); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_object(void) { + json_object *ptr = json_object_new_object(); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_array(void) { + json_object *ptr = json_object_new_array(); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_array_size(int size) { + json_object *ptr = json_object_new_array_ext(size); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_int64(int64_t n) { + json_object *ptr = json_object_new_int64(n); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_uint64(uint64_t n) { + json_object *ptr = json_object_new_uint64(n); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_string(const char *s) { + json_object *ptr = json_object_new_string(s); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_string_len(const char *s, size_t len) { + json_object *ptr = json_object_new_string_len(s, len); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + +static inline json_object *xjs_new_boolean(bool v) { + json_object *ptr = json_object_new_boolean(v); + + if (!ptr) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return ptr; +} + + +static inline struct json_tokener *xjs_new_tokener(void) { + struct json_tokener *tok = json_tokener_new(); + + if (!tok) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return tok; +} + +static inline int xasprintf(char **strp, const char *fmt, ...) { + va_list ap; + int len; + + va_start(ap, fmt); + len = vasprintf(strp, fmt, ap); + va_end(ap); + + if (len == -1) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return len; +} + +static inline int xvasprintf(char **strp, const char *fmt, va_list ap) { + int len = vasprintf(strp, fmt, ap); + + if (len == -1) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + return len; +} + +#endif /* __UTIL_H_ */ @@ -0,0 +1,716 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> /* memcpy(), memset() */ +#include <endian.h> /* htobe64(), be64toh() */ +#include <math.h> /* isnan(), INFINITY */ +#include <ctype.h> /* isspace(), isdigit(), isxdigit() */ +#include <errno.h> + +#include "util.h" +#include "chunk.h" +#include "value.h" +#include "object.h" +#include "lexer.h" /* TK_* */ + +#define TAG_TYPE uint64_t +#define TAG_BITS 3 +#define TAG_MASK ((1LL << ((sizeof(TAG_TYPE) << 3) - TAG_BITS)) - 1) +#define TAG_MAXN (TAG_MASK / 2) +#define TAG_ALIGN(s) (((s) + (1 << TAG_BITS) - 1) & -(1 << TAG_BITS)) +#define TAG_GET_TYPE(n) (int)((TAG_TYPE)n & ((1 << TAG_BITS) - 1)) +#define TAG_FIT_NV(n) ((int64_t)n >= -TAG_MAXN && (int64_t)n <= TAG_MAXN) +#define TAG_SET_NV(n) ((TAG_TYPE)((int64_t)n + TAG_MAXN) << TAG_BITS) +#define TAG_GET_NV(n) (int64_t)((int64_t)(((TAG_TYPE)n >> TAG_BITS) & TAG_MASK) - TAG_MAXN) +#define TAG_FIT_STR(l) ((l - 1) < (((sizeof(TAG_TYPE) << 3) - TAG_BITS) >> 3)) +#define TAG_SET_STR_L(l) (TAG_TYPE)((l & ((1 << (8 - TAG_BITS)) - 1)) << TAG_BITS) +#define TAG_GET_STR_L(n) (size_t)(((TAG_TYPE)n >> TAG_BITS) & ((1 << (8 - TAG_BITS)) - 1)) +#define TAG_GET_BOOL(n) (bool)(((TAG_TYPE)n >> TAG_BITS) & 1) +#define TAG_GET_OFFSET(n) (size_t)(((TAG_TYPE)n >> TAG_BITS) & TAG_MASK) + +#define UC_VALLIST_CHUNK_SIZE 8 + + +static int +uc_double_tostring(json_object *v, struct printbuf *pb, int level, int flags) +{ + bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); + double d = json_object_get_double(v); + + if (isnan(d)) + return sprintbuf(pb, strict ? "\"NaN\"" : "NaN"); + + if (d == INFINITY) + return sprintbuf(pb, strict ? "1e309" : "Infinity"); + + if (d == -INFINITY) + return sprintbuf(pb, strict ? "-1e309" : "-Infinity"); + + return sprintbuf(pb, "%g", d); +} + +json_object * +uc_double_new(double v) +{ + json_object *d = json_object_new_double(v); + + if (!d) { + fprintf(stderr, "Out of memory\n"); + abort(); + } + + json_object_set_serializer(d, uc_double_tostring, NULL, NULL); + + return d; +} + +bool +uc_val_is_truish(json_object *val) +{ + double d; + + switch (json_object_get_type(val)) { + case json_type_int: + return (json_object_get_int64(val) != 0); + + case json_type_double: + d = json_object_get_double(val); + + return (d != 0 && !isnan(d)); + + case json_type_boolean: + return (json_object_get_boolean(val) != false); + + case json_type_string: + return (json_object_get_string_len(val) > 0); + + case json_type_array: + case json_type_object: + return true; + + default: + return false; + } +} + +enum json_type +uc_cast_number(json_object *v, int64_t *n, double *d) +{ + bool is_double = false; + const char *s; + char *e; + + *d = 0.0; + *n = 0; + + switch (json_object_get_type(v)) { + case json_type_int: + *n = json_object_get_int64(v); + + return json_type_int; + + case json_type_double: + *d = json_object_get_double(v); + + return json_type_double; + + case json_type_null: + return json_type_int; + + case json_type_boolean: + *n = json_object_get_boolean(v) ? 1 : 0; + + return json_type_int; + + case json_type_string: + s = json_object_get_string(v); + + while (isspace(*s)) + s++; + + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X') && isxdigit(s[2])) { + *n = strtoll(s, &e, 16); + } + else if (s[0] == '0' && isdigit(s[2])) { + *n = strtoll(s, &e, 8); + } + else { + *n = strtoll(s, &e, 10); + + if (*e == '.') { + *d = strtod(s, &e); + is_double = (e > s); + } + } + + while (isspace(*e)) + e++; + + if (*e) { + *d = NAN; + + return json_type_double; + } + + if (is_double) + return json_type_double; + + return json_type_int; + + default: + *d = NAN; + + return json_type_double; + } +} + +static json_object * +uc_getproto(json_object *obj) +{ + uc_prototype *proto; + + switch (uc_object_type(obj)) { + case UC_OBJ_RESSOURCE: + proto = uc_ressource_prototype(obj); + break; + + case UC_OBJ_PROTOTYPE: + proto = uc_object_as_prototype(obj)->parent; + break; + + default: + proto = NULL; + } + + return proto ? proto->header.jso : NULL; +} + +json_object * +uc_getval(json_object *scope, json_object *key) +{ + json_object *o, *v; + int64_t idx; + double d; + + if (!key) + return NULL; + + if (json_object_is_type(scope, json_type_array)) { + /* only consider doubles with integer values as array keys */ + if (json_object_is_type(key, json_type_double)) { + d = json_object_get_double(key); + + if ((double)(int64_t)(d) != d) + return NULL; + + idx = (int64_t)d; + } + else { + errno = 0; + idx = json_object_get_int64(key); + + if (errno != 0) + return NULL; + } + + return json_object_get(json_object_array_get_idx(scope, idx)); + } + + for (o = scope; o; o = uc_getproto(o)) { + if (!json_object_is_type(o, json_type_object)) + continue; + + if (json_object_object_get_ex(o, key ? json_object_get_string(key) : "null", &v)) + return json_object_get(v); + } + + return NULL; +} + +json_object * +uc_setval(json_object *scope, json_object *key, json_object *val) +{ + int64_t idx; + + if (!key) + return NULL; + + if (json_object_is_type(scope, json_type_array)) { + errno = 0; + idx = json_object_get_int64(key); + + if (errno != 0) + return NULL; + + if (json_object_array_put_idx(scope, idx, val)) + return NULL; + + return json_object_get(val); + } + + if (json_object_object_add(scope, key ? json_object_get_string(key) : "null", val)) + return NULL; + + return json_object_get(val); +} + +bool +uc_cmp(int how, json_object *v1, json_object *v2) +{ + enum json_type t1 = json_object_get_type(v1); + enum json_type t2 = json_object_get_type(v2); + int64_t n1, n2, delta; + double d1, d2; + + if (t1 == json_type_string && t2 == json_type_string) { + delta = strcmp(json_object_get_string(v1), json_object_get_string(v2)); + } + else { + if ((t1 == json_type_array && t2 == json_type_array) || + (t1 == json_type_object && t2 == json_type_object)) { + delta = (void *)v1 - (void *)v2; + } + else { + t1 = uc_cast_number(v1, &n1, &d1); + t2 = uc_cast_number(v2, &n2, &d2); + + if (t1 == json_type_double || t2 == json_type_double) { + d1 = (t1 == json_type_double) ? d1 : (double)n1; + d2 = (t2 == json_type_double) ? d2 : (double)n2; + + if (d1 == d2) + delta = 0; + else if (d1 < d2) + delta = -1; + else + delta = 1; + } + else { + delta = n1 - n2; + } + } + } + + switch (how) { + case TK_LT: + return (delta < 0); + + case TK_LE: + return (delta <= 0); + + case TK_GT: + return (delta > 0); + + case TK_GE: + return (delta >= 0); + + case TK_EQ: + return (delta == 0); + + case TK_NE: + return (delta != 0); + + default: + return false; + } +} + +bool +uc_eq(json_object *v1, json_object *v2) +{ + uc_objtype_t o1 = uc_object_type(v1); + uc_objtype_t o2 = uc_object_type(v2); + enum json_type t1 = json_object_get_type(v1); + enum json_type t2 = json_object_get_type(v2); + + if (o1 != o2 || t1 != t2) + return false; + + switch (t1) { + case json_type_array: + case json_type_object: + return (v1 == v2); + + case json_type_boolean: + return (json_object_get_boolean(v1) == json_object_get_boolean(v2)); + + case json_type_double: + if (isnan(json_object_get_double(v1)) || isnan(json_object_get_double(v2))) + return false; + + return (json_object_get_double(v1) == json_object_get_double(v2)); + + case json_type_int: + return (json_object_get_int64(v1) == json_object_get_int64(v2)); + + case json_type_string: + return !strcmp(json_object_get_string(v1), json_object_get_string(v2)); + + case json_type_null: + return true; + } + + return false; +} + +void +uc_vallist_init(uc_value_list *list) +{ + list->isize = 0; + list->dsize = 0; + list->index = NULL; + list->data = NULL; +} + +void +uc_vallist_free(uc_value_list *list) +{ + json_object *o; + size_t i; + + for (i = 0; i < list->isize; i++) { + if (TAG_GET_TYPE(list->index[i]) == TAG_PTR) { + o = uc_vallist_get(list, i); + uc_value_put(o); + uc_value_put(o); + } + } + + free(list->index); + free(list->data); + uc_vallist_init(list); +} + +static void +add_num(uc_value_list *list, int64_t n) +{ + size_t sz = TAG_ALIGN(sizeof(n)); + + if (TAG_FIT_NV(n)) { + list->index[list->isize++] = (TAG_TYPE)(TAG_NUM | TAG_SET_NV(n)); + } + else { + if (list->dsize + sz > TAG_MASK) { + fprintf(stderr, "Constant data too large\n"); + abort(); + } + + list->data = xrealloc(list->data, list->dsize + sz); + + n = htobe64(n); + memset(list->data + list->dsize, 0, sz); + memcpy(list->data + list->dsize, &n, sizeof(n)); + + list->index[list->isize++] = (TAG_TYPE)(TAG_LNUM | (list->dsize << TAG_BITS)); + list->dsize += sz; + } +} + +static ssize_t +find_num(uc_value_list *list, int64_t n) +{ + TAG_TYPE search; + size_t i; + + if (TAG_FIT_NV(n)) { + search = (TAG_TYPE)(TAG_NUM | TAG_SET_NV(n)); + + for (i = 0; i < list->isize; i++) + if (list->index[i] == search) + return i; + } + else { + for (i = 0; i < list->isize; i++) { + if (TAG_GET_TYPE(list->index[i]) != TAG_LNUM) + continue; + + if (TAG_GET_OFFSET(list->index[i]) + sizeof(int64_t) > list->dsize) + continue; + + if ((int64_t)be64toh(*(int64_t *)(list->data + TAG_GET_OFFSET(list->index[i]))) != n) + continue; + + return i; + } + } + + return -1; +} + +static void +add_dbl(uc_value_list *list, double d) +{ + size_t sz = TAG_ALIGN(sizeof(d)); + + if (list->dsize + sz > TAG_MASK) { + fprintf(stderr, "Constant data too large\n"); + abort(); + } + + list->data = xrealloc(list->data, list->dsize + sz); + + memset(list->data + list->dsize, 0, sz); + memcpy(list->data + list->dsize, &d, sizeof(d)); + + list->index[list->isize++] = (uint64_t)(TAG_DBL | (list->dsize << TAG_BITS)); + list->dsize += sz; +} + +static ssize_t +find_dbl(uc_value_list *list, double d) +{ + size_t i; + + for (i = 0; i < list->isize; i++) { + if (TAG_GET_TYPE(list->index[i]) != TAG_DBL) + continue; + + if (TAG_GET_OFFSET(list->index[i]) + sizeof(double) > list->dsize) + continue; + + if (*(double *)(list->data + TAG_GET_OFFSET(list->index[i])) != d) + continue; + + return i; + } + + return -1; +} + +static void +add_str(uc_value_list *list, const char *s, size_t slen) +{ + uint32_t sl; + size_t sz; + char *dst; + int i; + + if (slen > UINT32_MAX) { + fprintf(stderr, "String constant too long\n"); + abort(); + } + + sz = TAG_ALIGN(sizeof(uint32_t) + slen); + + if (list->dsize + sz > TAG_MASK) { + fprintf(stderr, "Constant data too large\n"); + abort(); + } + + if (TAG_FIT_STR(slen)) { + list->index[list->isize] = (uint64_t)(TAG_STR | TAG_SET_STR_L(slen)); + + for (i = 0; i < slen; i++) + list->index[list->isize] |= (((TAG_TYPE)s[i] << ((i + 1) << 3))); + + list->isize++; + } + else { + list->data = xrealloc(list->data, list->dsize + sz); + + sl = htobe32(slen); + dst = list->data + list->dsize; + memcpy(dst, &sl, sizeof(sl)); + + dst += sizeof(sl); + memcpy(dst, s, slen); + + dst += slen; + memset(dst, 0, TAG_ALIGN(sizeof(uint32_t) + slen) - (sizeof(uint32_t) + slen)); + + list->index[list->isize++] = (uint64_t)(TAG_LSTR | (list->dsize << TAG_BITS)); + list->dsize += sz; + } +} + +static ssize_t +find_str(uc_value_list *list, const char *s, size_t slen) +{ + TAG_TYPE search; + size_t i, len; + + if (TAG_FIT_STR(slen)) { + search = (TAG_TYPE)(TAG_STR | TAG_SET_STR_L(slen)); + + for (i = 0; i < slen; i++) + search |= (((TAG_TYPE)s[i] << ((i + 1) << 3))); + + for (i = 0; i < list->isize; i++) + if (list->index[i] == search) + return i; + } + else { + for (i = 0; i < list->isize; i++) { + if (TAG_GET_TYPE(list->index[i]) != TAG_LSTR) + continue; + + if (TAG_GET_OFFSET(list->index[i]) + sizeof(uint32_t) > list->dsize) + continue; + + len = (size_t)be32toh(*(uint32_t *)(list->data + TAG_GET_OFFSET(list->index[i]))); + + if (len != slen) + continue; + + if (TAG_GET_OFFSET(list->index[i]) + sizeof(uint32_t) + len > list->dsize) + continue; + + if (memcmp(list->data + TAG_GET_OFFSET(list->index[i]) + sizeof(uint32_t), s, slen)) + continue; + + return i; + } + } + + return -1; +} + +static void +add_ptr(uc_value_list *list, void *ptr) +{ + size_t sz = TAG_ALIGN(sizeof(ptr)); + + if (list->dsize + sz > TAG_MASK) { + fprintf(stderr, "Constant data too large\n"); + abort(); + } + + list->data = xrealloc(list->data, list->dsize + sz); + + memset(list->data + list->dsize, 0, sz); + memcpy(list->data + list->dsize, &ptr, sizeof(ptr)); + + list->index[list->isize++] = (uint64_t)(TAG_PTR | (list->dsize << TAG_BITS)); + list->dsize += sz; +} + +ssize_t +uc_vallist_add(uc_value_list *list, json_object *value) +{ + ssize_t existing; + + if ((list->isize % UC_VALLIST_CHUNK_SIZE) == 0) { + list->index = xrealloc(list->index, sizeof(list->index[0]) * (list->isize + UC_VALLIST_CHUNK_SIZE)); + memset(&list->index[list->isize], 0, UC_VALLIST_CHUNK_SIZE); + } + + switch (json_object_get_type(value)) { + case json_type_int: + existing = find_num(list, json_object_get_int64(value)); + + if (existing > -1) + return existing; + + add_num(list, json_object_get_int64(value)); + + break; + + case json_type_double: + existing = find_dbl(list, json_object_get_double(value)); + + if (existing > -1) + return existing; + + add_dbl(list, json_object_get_double(value)); + + break; + + case json_type_string: + existing = find_str(list, + json_object_get_string(value), + json_object_get_string_len(value)); + + if (existing > -1) + return existing; + + add_str(list, + json_object_get_string(value), + json_object_get_string_len(value)); + + break; + + case json_type_object: + add_ptr(list, value); + break; + + default: + return -1; + } + + return (ssize_t)list->isize - 1; +} + +uc_value_type_t +uc_vallist_type(uc_value_list *list, size_t idx) +{ + if (idx >= list->isize) + return TAG_INVAL; + + return TAG_GET_TYPE(list->index[idx]); +} + +json_object * +uc_vallist_get(uc_value_list *list, size_t idx) +{ + char str[sizeof(TAG_TYPE)]; + size_t len; + int n; + + switch (uc_vallist_type(list, idx)) { + case TAG_NUM: + return xjs_new_int64(TAG_GET_NV(list->index[idx])); + + case TAG_LNUM: + if (TAG_GET_OFFSET(list->index[idx]) + sizeof(int64_t) > list->dsize) + return NULL; + + return xjs_new_int64(be64toh(*(int64_t *)(list->data + TAG_GET_OFFSET(list->index[idx])))); + + case TAG_DBL: + if (TAG_GET_OFFSET(list->index[idx]) + sizeof(double) > list->dsize) + return NULL; + + return uc_double_new(*(double *)(list->data + TAG_GET_OFFSET(list->index[idx]))); + + case TAG_STR: + len = TAG_GET_STR_L(list->index[idx]); + + for (n = 0; n < len; n++) + str[n] = (list->index[idx] >> ((n + 1) << 3)); + + return xjs_new_string_len(str, len); + + case TAG_LSTR: + if (TAG_GET_OFFSET(list->index[idx]) + sizeof(uint32_t) > list->dsize) + return NULL; + + len = (size_t)be32toh(*(uint32_t *)(list->data + TAG_GET_OFFSET(list->index[idx]))); + + if (TAG_GET_OFFSET(list->index[idx]) + sizeof(uint32_t) + len > list->dsize) + return NULL; + + return xjs_new_string_len(list->data + TAG_GET_OFFSET(list->index[idx]) + sizeof(uint32_t), len); + + case TAG_PTR: + if (TAG_GET_OFFSET(list->index[idx]) + sizeof(void *) > list->dsize) + return NULL; + + return uc_value_get(*(json_object **)(list->data + TAG_GET_OFFSET(list->index[idx]))); + + default: + return NULL; + } +} @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __VALUE_H_ +#define __VALUE_H_ + +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> + +#ifdef JSONC + #include <json.h> +#else + #include <json-c/json.h> +#endif + +#include <stdio.h> + +typedef enum { + TAG_INVAL = 0, + TAG_NUM = 1, + TAG_LNUM = 2, + TAG_DBL = 3, + TAG_STR = 4, + TAG_LSTR = 5, + TAG_PTR = 6 +} uc_value_type_t; + +typedef struct { + size_t isize; + size_t dsize; + uint64_t *index; + char *data; +} uc_value_list; + +json_object *uc_double_new(double v); + +bool uc_eq(json_object *v1, json_object *v2); +bool uc_cmp(int how, json_object *v1, json_object *v2); +bool uc_val_is_truish(json_object *val); + +enum json_type uc_cast_number(json_object *v, int64_t *n, double *d); + +json_object *uc_getval(json_object *scope, json_object *key); +json_object *uc_setval(json_object *scope, json_object *key, json_object *val); + +void uc_vallist_init(uc_value_list *list); +void uc_vallist_free(uc_value_list *list); + +ssize_t uc_vallist_add(uc_value_list *list, json_object *value); +uc_value_type_t uc_vallist_type(uc_value_list *list, size_t idx); +struct json_object *uc_vallist_get(uc_value_list *list, size_t idx); + +#define uc_value_get(val) \ + ({ \ + struct json_object *__o = val; \ + /*fprintf(stderr, "get(%p // %s) [%d + 1] @ %s:%d\n", __o, json_object_to_json_string(__o), getrefcnt(__o), __FILE__, __LINE__);*/ \ + json_object_get(__o); \ + }) + +#define uc_value_put(val) \ + ({ \ + struct json_object *__o = val; \ + /*fprintf(stderr, "put(%p // %s) [%d - 1] @ %s:%d\n", __o, json_object_to_json_string(__o), getrefcnt(__o), __FILE__, __LINE__);*/ \ + json_object_put(__o); \ + }) + +#endif /* __VALUE_H_ */ @@ -0,0 +1,2233 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <assert.h> +#include <ctype.h> +#include <math.h> + +#include "vm.h" +#include "compiler.h" +#include "lib.h" /* format_error_context() */ + +#undef __insn +#define __insn(_name) #_name, + +static const char *insn_names[__I_MAX] = { + __insns +}; + +static const uc_insn_definition insn_defs[__I_MAX] = { + [I_NOOP] = { 0, 0, 0 }, + + [I_LOAD] = { 0, 1, 4 }, + [I_LOAD8] = { 0, 1, -1 }, + [I_LOAD16] = { 0, 1, -2 }, + [I_LOAD32] = { 0, 1, -4 }, + + [I_LREXP] = { 0, 1, 4 }, + [I_LNULL] = { 0, 1, 0 }, + [I_LTRUE] = { 0, 1, 0 }, + [I_LFALSE] = { 0, 1, 0 }, + [I_LTHIS] = { 0, 1, 0 }, + + [I_LLOC] = { 0, 1, 4 }, + [I_LVAR] = { 0, 1, 4 }, + [I_LUPV] = { 0, 1, 4 }, + [I_LVAL] = { 2, 1, 0 }, + + [I_CLFN] = { 0, 1, 4, true }, + [I_ARFN] = { 0, 1, 4, true }, + + [I_SLOC] = { 0, 0, 4 }, + [I_SUPV] = { 0, 0, 4 }, + [I_SVAR] = { 0, 0, 4 }, + [I_SVAL] = { 3, 1, 0 }, + + [I_ULOC] = { 1, 0, 4 }, + [I_UUPV] = { 1, 0, 4 }, + [I_UVAR] = { 1, 0, 4 }, + [I_UVAL] = { 3, 1, 1 }, + + [I_NARR] = { 0, 1, 4 }, + [I_PARR] = { -1, 0, 4 }, + [I_MARR] = { 1, 0, 0 }, + + [I_NOBJ] = { 0, 1, 4 }, + [I_SOBJ] = { -1, 0, 4 }, + [I_MOBJ] = { 1, 0, 0 }, + + [I_ADD] = { 2, 1, 0 }, + [I_SUB] = { 2, 1, 0 }, + [I_MUL] = { 2, 1, 0 }, + [I_DIV] = { 2, 1, 0 }, + [I_MOD] = { 2, 1, 0 }, + [I_LSHIFT] = { 2, 1, 0 }, + [I_RSHIFT] = { 2, 1, 0 }, + [I_BAND] = { 2, 1, 0 }, + [I_BXOR] = { 2, 1, 0 }, + [I_BOR] = { 2, 1, 0 }, + [I_EQ] = { 2, 1, 0 }, + [I_NE] = { 2, 1, 0 }, + [I_EQS] = { 2, 1, 0 }, + [I_NES] = { 2, 1, 0 }, + [I_LT] = { 2, 1, 0 }, + [I_GT] = { 2, 1, 0 }, + [I_IN] = { 2, 1, 0 }, + + [I_JMP] = { 0, 0, -4, true }, + [I_JMPZ] = { 1, 0, -4, true }, + + [I_COPY] = { 0, 1, 1 }, + [I_POP] = { 1, 0, 0 }, + [I_CUPV] = { 1, 0, 0 }, + + [I_PLUS] = { 1, 1, 0 }, + [I_MINUS] = { 1, 1, 0 }, + + [I_RETURN] = { 1, 0, 0 }, + [I_CALL] = { -2, 1, 4 }, + [I_MCALL] = { -3, 1, 4 }, + + [I_NEXTK] = { 2, 2, 0 }, + [I_NEXTKV] = { 2, 3, 0 }, + + [I_PRINT] = { 1, 0, 0 } +}; + +static const char *exception_type_strings[] = { + [EXCEPTION_SYNTAX] = "Syntax error", + [EXCEPTION_RUNTIME] = "Runtime error", + [EXCEPTION_TYPE] = "Type error", + [EXCEPTION_REFERENCE] = "Reference error", + [EXCEPTION_USER] = "Error", +}; + + +static void +uc_vm_reset_stack(uc_vm *vm) +{ + while (vm->stack.count > 0) { + vm->stack.count--; + uc_value_put(vm->stack.entries[vm->stack.count]); + vm->stack.entries[vm->stack.count] = NULL; + } +} + +static json_object * +uc_vm_callframe_pop(uc_vm *vm); + +static void +uc_vm_reset_callframes(uc_vm *vm) +{ + while (vm->callframes.count > 0) + uc_value_put(uc_vm_callframe_pop(vm)); +} + +void uc_vm_init(uc_vm *vm, uc_parse_config *config) +{ + char *s = getenv("TRACE"); + + vm->exception.type = EXCEPTION_NONE; + vm->exception.message = NULL; + + vm->trace = s ? strtoul(s, NULL, 0) : 0; + + vm->config = config; + + vm->open_upvals = NULL; + + uc_vm_reset_stack(vm); +} + +void uc_vm_free(uc_vm *vm) +{ + uc_upvalref *ref; + + uc_value_put(vm->exception.stacktrace); + free(vm->exception.message); + + while (vm->open_upvals) { + ref = vm->open_upvals->next; + uc_value_put(vm->open_upvals->header.jso); + vm->open_upvals = ref; + } + + uc_vm_reset_callframes(vm); + uc_vm_reset_stack(vm); + uc_vector_clear(&vm->stack); + uc_vector_clear(&vm->callframes); +} + +static uc_chunk * +uc_vm_frame_chunk(uc_callframe *frame) +{ + return frame->closure ? &frame->closure->function->chunk : NULL; +} + +static uc_callframe * +uc_vm_current_frame(uc_vm *vm) +{ + return uc_vector_last(&vm->callframes); +} + +static uc_chunk * +uc_vm_current_chunk(uc_vm *vm) +{ + return uc_vm_frame_chunk(uc_vm_current_frame(vm)); +} + +static enum insn_type +uc_vm_decode_insn(uc_vm *vm, uc_callframe *frame, uc_chunk *chunk) +{ + enum insn_type insn; + +#ifndef NDEBUG + uint8_t *end = chunk->entries + chunk->count; +#endif + + assert(frame->ip < end); + + insn = frame->ip[0]; + frame->ip++; + + assert(frame->ip + abs(insn_defs[insn].operand_bytes) <= end); + + switch (insn_defs[insn].operand_bytes) { + case 0: + break; + + case -1: + vm->arg.s8 = frame->ip[0] - 0x7f; + frame->ip++; + break; + + case -2: + vm->arg.s16 = ( + frame->ip[0] * 0x100 + + frame->ip[1] + ) - 0x7fff; + frame->ip += 2; + break; + + case -4: + vm->arg.s32 = ( + frame->ip[0] * 0x1000000 + + frame->ip[1] * 0x10000 + + frame->ip[2] * 0x100 + + frame->ip[3] + ) - 0x7fffffff; + frame->ip += 4; + break; + + case 1: + vm->arg.u8 = frame->ip[0]; + frame->ip++; + break; + + case 4: + vm->arg.u32 = ( + frame->ip[0] * 0x1000000 + + frame->ip[1] * 0x10000 + + frame->ip[2] * 0x100 + + frame->ip[3] + ); + frame->ip += 4; + break; + + default: + fprintf(stderr, "Unhandled operand format: %d\n", insn_defs[insn].operand_bytes); + abort(); + } + + return insn; +} + + +static void +uc_vm_frame_dump(uc_vm *vm, uc_callframe *frame) +{ + uc_chunk *chunk = uc_vm_frame_chunk(frame); + uc_function *function; + uc_closure *closure; + uc_upvalref *ref; + size_t i; + + fprintf(stderr, " [*] CALLFRAME[%lx]\n", + frame - vm->callframes.entries); + + fprintf(stderr, " |- stackframe %zu/%zu\n", + frame->stackframe, vm->stack.count); + + fprintf(stderr, " |- ctx %s\n", + json_object_to_json_string(frame->ctx)); + + if (chunk) { + fprintf(stderr, " |- %zu constants\n", + chunk->constants.isize); + + for (i = 0; i < chunk->constants.isize; i++) + fprintf(stderr, " | [%zu] %s\n", + i, + json_object_to_json_string(uc_chunk_get_constant(chunk, i))); + + closure = frame->closure; + function = closure->function; + + fprintf(stderr, " `- %zu upvalues\n", + function->nupvals); + + for (i = 0; i < function->nupvals; i++) { + ref = closure->upvals[i]; + + if (ref->closed) + fprintf(stderr, " [%zu] <%p> %s {closed} %s\n", + i, + ref, + json_object_to_json_string( + uc_chunk_debug_get_variable(chunk, 0, i, true)), + json_object_to_json_string(ref->value)); + else + fprintf(stderr, " [%zu] <%p> %s {open[%zu]} %s\n", + i, + ref, + json_object_to_json_string( + uc_chunk_debug_get_variable(chunk, 0, i, true)), + ref->slot, + json_object_to_json_string(vm->stack.entries[ref->slot])); + } + } +} + +void +uc_vm_stack_push(uc_vm *vm, json_object *value) +{ + uc_vector_grow(&vm->stack); + + uc_value_put(vm->stack.entries[vm->stack.count]); + + vm->stack.entries[vm->stack.count] = value; + vm->stack.count++; + + if (vm->trace) + fprintf(stderr, " [+%zd] %s\n", + vm->stack.count, + json_object_to_json_string(value)); +} + +json_object * +uc_vm_stack_pop(uc_vm *vm) +{ + json_object *rv; + + vm->stack.count--; + rv = vm->stack.entries[vm->stack.count]; + vm->stack.entries[vm->stack.count] = NULL; + + if (vm->trace) + fprintf(stderr, " [-%zd] %s\n", + vm->stack.count + 1, + json_object_to_json_string(rv)); + + return rv; +} + +json_object * +uc_vm_stack_peek(uc_vm *vm, size_t offset) +{ + return vm->stack.entries[vm->stack.count + (-1 - offset)]; +} + +static void +uc_vm_stack_set(uc_vm *vm, size_t offset, json_object *value) +{ + if (vm->trace) + fprintf(stderr, " [!%zu] %s\n", + offset, json_object_to_json_string(value)); + + uc_value_put(vm->stack.entries[offset]); + vm->stack.entries[offset] = value; +} + +static void +uc_vm_call_native(uc_vm *vm, json_object *ctx, uc_cfunction *fptr, bool mcall, size_t nargs) +{ + json_object *res = NULL; + uc_callframe *frame; + + /* add new callframe */ + uc_vector_grow(&vm->callframes); + + frame = &vm->callframes.entries[vm->callframes.count++]; + frame->stackframe = vm->stack.count - nargs - 1; + frame->cfunction = fptr; + frame->closure = NULL; + frame->ctx = ctx; + frame->mcall = mcall; + + if (vm->trace) + uc_vm_frame_dump(vm, frame); + + res = fptr->cfn(vm, nargs); + + /* reset stack */ + uc_value_put(uc_vm_callframe_pop(vm)); + + /* push return value */ + if (!vm->exception.type) + uc_vm_stack_push(vm, res); + else + uc_value_put(res); +} + +static bool +uc_vm_call_function(uc_vm *vm, json_object *ctx, json_object *fno, bool mcall, size_t argspec) +{ + size_t i, j, stackoff, nargs = argspec & 0xffff, nspreads = argspec >> 16; + uc_callframe *frame = uc_vm_current_frame(vm); + json_object *ellip, *arg; + uc_function *function; + uc_closure *closure; + uint16_t slot, tmp; + + /* XXX: make dependent on stack size */ + if (vm->callframes.count >= 1000) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "Too much recursion"); + + return false; + } + + stackoff = vm->stack.count - nargs - 1; + + /* argument list contains spread operations, we need to reshuffle the stack */ + if (nspreads > 0) { + /* create temporary array */ + ellip = xjs_new_array_size(nargs); + + /* pop original stack values and push to temp array in reverse order */ + for (i = 0; i < nargs; i++) + json_object_array_add(ellip, uc_vm_stack_pop(vm)); + + /* for each spread value index ... */ + for (i = 0, slot = nargs; i < nspreads; i++) { + /* decode stack depth value */ + tmp = frame->ip[0] * 0x100 + frame->ip[1]; + frame->ip += 2; + + /* push each preceeding non-spread value to the stack */ + for (j = slot; j > tmp + 1; j--) + uc_vm_stack_push(vm, uc_value_get(json_object_array_get_idx(ellip, j - 1))); + + /* read spread value at index... */ + slot = tmp; + arg = uc_value_get(json_object_array_get_idx(ellip, slot)); + + /* ... ensure that it is an array type ... */ + if (!json_object_is_type(arg, json_type_array)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "(%s) is not iterable", + json_object_to_json_string(arg)); + + return false; + } + + /* ... and push each spread array value as argument to the stack */ + for (j = 0; j < json_object_array_length(arg); j++) + uc_vm_stack_push(vm, uc_value_get(json_object_array_get_idx(arg, j))); + + uc_value_put(arg); + } + + /* push remaining non-spread arguments to the stack */ + for (i = slot; i > 0; i--) + uc_vm_stack_push(vm, uc_value_get(json_object_array_get_idx(ellip, i - 1))); + + /* free temp array */ + uc_value_put(ellip); + + /* update arg count */ + nargs = vm->stack.count - stackoff - 1; + } + + /* is a native function */ + if (uc_object_is_type(fno, UC_OBJ_CFUNCTION)) { + uc_vm_call_native(vm, ctx, uc_object_as_cfunction(fno), mcall, nargs); + + return true; + } + + if (!uc_object_is_type(fno, UC_OBJ_CLOSURE)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, "left-hand side is not a function"); + + return false; + } + + closure = uc_object_as_closure(fno); + function = closure->function; + + /* fewer arguments on stack than function expects => pad */ + if (nargs < function->nargs) { + for (i = nargs; i < function->nargs; i++) { + if (function->vararg && (i + 1) == function->nargs) + uc_vm_stack_push(vm, xjs_new_array_size(0)); + else + uc_vm_stack_push(vm, NULL); + } + } + + /* more arguments on stack than function expects... */ + else if (nargs > function->nargs - function->vararg) { + /* is a vararg function => pass excess args as array */ + if (function->vararg) { + ellip = xjs_new_array_size(nargs - (function->nargs - 1)); + + for (i = function->nargs; i <= nargs; i++) + json_object_array_add(ellip, uc_vm_stack_peek(vm, nargs - i)); + + for (i = function->nargs; i <= nargs; i++) + uc_vm_stack_pop(vm); + + uc_vm_stack_push(vm, ellip); + } + + /* static amount of args => drop excess values */ + else { + for (i = function->nargs; i < nargs; i++) + uc_value_put(uc_vm_stack_pop(vm)); + } + } + + uc_vector_grow(&vm->callframes); + + frame = &vm->callframes.entries[vm->callframes.count++]; + frame->stackframe = stackoff; + frame->cfunction = NULL; + frame->closure = closure; + frame->ctx = ctx; + frame->ip = function->chunk.entries; + frame->mcall = mcall; + + if (vm->trace) + uc_vm_frame_dump(vm, frame); + + return true; +} + +static uc_source *last_source = NULL; +static size_t last_srcpos = 0; + +static void +uc_dump_insn(uc_vm *vm, uint8_t *pos, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_chunk *chunk = uc_vm_frame_chunk(frame); + size_t msglen = 0, srcpos; + json_object *cnst = NULL; + char *msg = NULL; + + srcpos = uc_function_get_srcpos(frame->closure->function, pos - chunk->entries); + + if (last_srcpos == 0 || last_source != frame->closure->function->source || srcpos != last_srcpos) { + format_source_context(&msg, &msglen, + frame->closure->function->source, + srcpos, true); + + fprintf(stderr, "%s", msg); + + last_source = frame->closure->function->source; + last_srcpos = srcpos; + } + + fprintf(stderr, "%08lx %s", pos - chunk->entries, insn_names[insn]); + + switch (insn_defs[insn].operand_bytes) { + case 0: + break; + + case -1: + fprintf(stderr, " {%s%hhd}", vm->arg.s8 < 0 ? "" : "+", vm->arg.s8); + break; + + case -2: + fprintf(stderr, " {%s%hx}", vm->arg.s16 < 0 ? "" : "+", vm->arg.s16); + break; + + case -4: + fprintf(stderr, " {%s%x}", vm->arg.s32 < 0 ? "" : "+", vm->arg.s32); + break; + + case 1: + fprintf(stderr, " {%hhu}", vm->arg.u8); + break; + + case 2: + fprintf(stderr, " {%hx}", vm->arg.u16); + break; + + case 4: + fprintf(stderr, " {%x}", vm->arg.u32); + break; + + default: + fprintf(stderr, " (unknown operand format: %d)", insn_defs[insn].operand_bytes); + break; + } + + switch (insn) { + case I_LOAD: + case I_LVAR: + case I_SVAR: + cnst = uc_chunk_get_constant(uc_vm_frame_chunk(uc_vector_last(&vm->callframes)), vm->arg.u32); + + fprintf(stderr, "\t; %s", cnst ? json_object_to_json_string(cnst) : "null"); + uc_value_put(cnst); + break; + + case I_LLOC: + case I_LUPV: + case I_SLOC: + case I_SUPV: + cnst = uc_chunk_debug_get_variable(chunk, pos - chunk->entries, vm->arg.u32, (insn == I_LUPV || insn == I_SUPV)); + + fprintf(stderr, "\t; %s", cnst ? json_object_to_json_string(cnst) : "(?)"); + uc_value_put(cnst); + break; + + case I_ULOC: + case I_UUPV: + cnst = uc_chunk_debug_get_variable(chunk, pos - chunk->entries, vm->arg.u32 & 0x00ffffff, (insn == I_UUPV)); + /* fall through */ + + case I_UVAR: + if (!cnst) + cnst = uc_chunk_get_constant(uc_vm_frame_chunk(uc_vector_last(&vm->callframes)), vm->arg.u32 & 0x00ffffff); + + fprintf(stderr, "\t; %s (%s)", + cnst ? json_object_to_json_string(cnst) : "(?)", + insn_names[vm->arg.u32 >> 24]); + + uc_value_put(cnst); + break; + + case I_UVAL: + fprintf(stderr, "\t; (%s)", insn_names[vm->arg.u32]); + break; + + default: + break; + } + + fprintf(stderr, "\n"); +} + +static int +uc_vm_exception_tostring(json_object *jso, struct printbuf *pb, int level, int flags) +{ + bool strict = (level > 0) || (flags & JSON_C_TO_STRING_STRICT); + json_object *message = json_object_object_get(jso, "message"); + + return sprintbuf(pb, "%s", + strict ? json_object_to_json_string(message) : json_object_get_string(message)); +} + +static bool +uc_vm_handle_exception(uc_vm *vm) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_chunk *chunk = NULL; + json_object *exo; + size_t i, pos; + + if (!frame->closure) + return false; + + chunk = uc_vm_frame_chunk(frame); + pos = frame->ip - chunk->entries; + + /* iterate the known exception ranges, see if the current ip falls into any of them */ + for (i = 0; i < chunk->ehranges.count; i++) { + /* skip nonmatching ranges */ + if (pos < chunk->ehranges.entries[i].from || + pos >= chunk->ehranges.entries[i].to) + continue; + + /* we found a matching range... first unwind stack */ + while (vm->stack.count > frame->stackframe + chunk->ehranges.entries[i].slot) + uc_value_put(uc_vm_stack_pop(vm)); + + /* prepare exception object and expose it to user handler code */ + exo = xjs_new_object(); + + json_object_object_add(exo, "type", xjs_new_string(exception_type_strings[vm->exception.type])); + json_object_object_add(exo, "message", xjs_new_string(vm->exception.message)); + json_object_object_add(exo, "stacktrace", uc_value_get(vm->exception.stacktrace)); + + json_object_set_serializer(exo, uc_vm_exception_tostring, NULL, NULL); + uc_vm_stack_push(vm, exo); + + /* reset exception information */ + free(vm->exception.message); + + vm->exception.type = EXCEPTION_NONE; + vm->exception.message = NULL; + + /* jump to exception handler */ + if (chunk->ehranges.entries[i].target >= chunk->count) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "jump target out of range"); + return false; + } + +#if 0 + if (vm->trace && chunk->entries + chunk->ehranges.entries[i].target > frame->ip) { + while (frame->ip < chunk->entries + chunk->ehranges.entries[i].target) { + fprintf(stderr, "(eh:skip) [%p:%zu] ", chunk, frame->ip - chunk->entries); + uc_dump_insn(vm, frame->ip, uc_vm_decode_insn(vm, frame, chunk)); + } + } +#endif + + frame->ip = chunk->entries + chunk->ehranges.entries[i].target; + + return true; + } + + return false; +} + +static json_object * +uc_vm_capture_stacktrace(uc_vm *vm, size_t i) +{ + json_object *stacktrace, *entry, *last = NULL; + uc_function *function; + uc_callframe *frame; + size_t off, srcpos; + char *name; + + stacktrace = xjs_new_array(); + + for (; i > 0; i--) { + frame = &vm->callframes.entries[i - 1]; + entry = xjs_new_object(); + + if (frame->closure) { + function = frame->closure->function; + + off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1; + srcpos = uc_function_get_srcpos(function, off); + + json_object_object_add(entry, "filename", xjs_new_string(function->source->filename)); + json_object_object_add(entry, "line", xjs_new_int64(uc_source_get_line(function->source, &srcpos))); + json_object_object_add(entry, "byte", xjs_new_int64(srcpos)); + } + + if (i > 1) { + if (frame->closure) { + if (frame->closure->function->name) + name = frame->closure->function->name; + else if (frame->closure->is_arrow) + name = "[arrow function]"; + else + name = "[anonymous function]"; + } + else { + name = frame->cfunction->name; + } + + json_object_object_add(entry, "function", xjs_new_string(name)); + } + + if (!json_object_equal(last, entry)) { + json_object_array_add(stacktrace, entry); + last = entry; + } + else { + uc_value_put(entry); + } + } + + return stacktrace; +} + +static json_object * +uc_vm_get_error_context(uc_vm *vm) +{ + json_object *stacktrace; + uc_callframe *frame; + uc_chunk *chunk; + size_t offset, len = 0, i; + char *msg = NULL; + + /* skip to first non-native function call frame */ + for (i = vm->callframes.count; i > 0; i--) + if (vm->callframes.entries[i - 1].closure) + break; + + frame = &vm->callframes.entries[i - 1]; + + if (!frame->closure) + return NULL; + + chunk = uc_vm_frame_chunk(frame); + offset = uc_function_get_srcpos(frame->closure->function, (frame->ip - chunk->entries) - 1); + stacktrace = uc_vm_capture_stacktrace(vm, i); + + if (offset) + format_error_context(&msg, &len, frame->closure->function->source, stacktrace, offset); + else + xasprintf(&msg, "At offset %zu", (frame->ip - chunk->entries) - 1); + + json_object_object_add(json_object_array_get_idx(stacktrace, 0), "context", xjs_new_string(msg)); + + free(msg); + + return stacktrace; +} + +void __attribute__((format(printf, 3, 0))) +uc_vm_raise_exception(uc_vm *vm, uc_exception_type_t type, const char *fmt, ...) +{ + va_list ap; + + vm->exception.type = type; + + free(vm->exception.message); + + va_start(ap, fmt); + xvasprintf(&vm->exception.message, fmt, ap); + va_end(ap); + + uc_value_put(vm->exception.stacktrace); + vm->exception.stacktrace = uc_vm_get_error_context(vm); +} + + +static void +uc_vm_insn_load(uc_vm *vm, enum insn_type insn) +{ + switch (insn) { + case I_LOAD: + uc_vm_stack_push(vm, uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32)); + break; + + case I_LOAD8: + uc_vm_stack_push(vm, xjs_new_int64(vm->arg.s8)); + break; + + case I_LOAD16: + uc_vm_stack_push(vm, xjs_new_int64(vm->arg.s16)); + break; + + case I_LOAD32: + uc_vm_stack_push(vm, xjs_new_int64(vm->arg.s32)); + break; + + default: + break; + } +} + +static void +uc_vm_insn_load_regexp(uc_vm *vm, enum insn_type insn) +{ + bool icase = false, newline = false, global = false; + json_object *jstr = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + const char *str; + uc_regexp *re; + char *err; + + if (!json_object_is_type(jstr, json_type_string) || json_object_get_string_len(jstr) < 2) { + uc_vm_stack_push(vm, NULL); + uc_value_put(jstr); + + return; + } + + str = json_object_get_string(jstr); + + global = (*str & (1 << 0)); + icase = (*str & (1 << 1)); + newline = (*str & (1 << 2)); + + re = uc_regexp_new(++str, icase, newline, global, &err); + + uc_value_put(jstr); + + if (re) + uc_vm_stack_push(vm, re->header.jso); + else + uc_vm_raise_exception(vm, EXCEPTION_SYNTAX, "%s", err); +} + +static void +uc_vm_insn_load_null(uc_vm *vm, enum insn_type insn) +{ + uc_vm_stack_push(vm, NULL); +} + +static void +uc_vm_insn_load_bool(uc_vm *vm, enum insn_type insn) +{ + uc_vm_stack_push(vm, xjs_new_boolean(insn == I_LTRUE)); +} + +static void +uc_vm_insn_load_var(uc_vm *vm, enum insn_type insn) +{ + json_object *name, *val = NULL; + uc_prototype *scope, *next; + + scope = vm->globals; + name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + + while (json_object_get_type(name) == json_type_string) { + if (json_object_object_get_ex(scope->header.jso, json_object_get_string(name), &val)) + break; + + next = scope->parent; + + if (!next) { + if (vm->config->strict_declarations) { + uc_vm_raise_exception(vm, EXCEPTION_REFERENCE, + "access to undeclared variable %s", + json_object_get_string(name)); + } + + break; + } + + scope = next; + } + + uc_value_put(name); + + uc_vm_stack_push(vm, uc_value_get(val)); +} + +static void +uc_vm_insn_load_val(uc_vm *vm, enum insn_type insn) +{ + json_object *k = uc_vm_stack_pop(vm); + json_object *v = uc_vm_stack_pop(vm); + + switch (json_object_get_type(v)) { + case json_type_object: + case json_type_array: + uc_vm_stack_push(vm, uc_getval(v, k)); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_REFERENCE, + "left-hand side expression is %s", + v ? "not an array or object" : "null"); + + break; + } + + + uc_value_put(k); + uc_value_put(v); +} + +static void +uc_vm_insn_load_upval(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_upvalref *ref = frame->closure->upvals[vm->arg.u32]; + + if (ref->closed) + uc_vm_stack_push(vm, uc_value_get(ref->value)); + else + uc_vm_stack_push(vm, uc_value_get(vm->stack.entries[ref->slot])); +} + +static void +uc_vm_insn_load_local(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + + uc_vm_stack_push(vm, uc_value_get(vm->stack.entries[frame->stackframe + vm->arg.u32])); +} + +static uc_upvalref * +uc_vm_capture_upval(uc_vm *vm, size_t slot) +{ + uc_upvalref *curr = vm->open_upvals; + uc_upvalref *prev = NULL; + uc_upvalref *created; + + while (curr && curr->slot > slot) { + prev = curr; + curr = curr->next; + } + + if (curr && curr->slot == slot) { + if (vm->trace) + fprintf(stderr, " {+%zu} <%p> %s\n", + slot, + curr, + json_object_to_json_string(vm->stack.entries[slot])); + + return curr; + } + + created = uc_upvalref_new(slot); + created->next = curr; + + if (vm->trace) + fprintf(stderr, " {*%zu} <%p> %s\n", + slot, + created, + json_object_to_json_string(vm->stack.entries[slot])); + + if (prev) + prev->next = created; + else + vm->open_upvals = created; + + return created; +} + +static void +uc_vm_close_upvals(uc_vm *vm, size_t slot) +{ + uc_upvalref *ref; + + while (vm->open_upvals && vm->open_upvals->slot >= slot) { + ref = vm->open_upvals; + ref->value = uc_value_get(vm->stack.entries[ref->slot]); + ref->closed = true; + + if (vm->trace) + fprintf(stderr, " {!%zu} <%p> %s\n", + ref->slot, + ref, + json_object_to_json_string(ref->value)); + + vm->open_upvals = ref->next; + json_object_put(ref->header.jso); + } +} + +static void +uc_vm_insn_load_closure(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + json_object *fno = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + uc_function *function = uc_object_as_function(fno); + uc_closure *closure = uc_closure_new(function, insn == I_ARFN); + volatile int32_t uv; + size_t i; + + uc_vm_stack_push(vm, closure->header.jso); + + for (i = 0; i < function->nupvals; i++) { + uv = ( + frame->ip[0] * 0x1000000 + + frame->ip[1] * 0x10000 + + frame->ip[2] * 0x100 + + frame->ip[3] + ) - 0x7fffffff; + + if (uv < 0) + closure->upvals[i] = uc_vm_capture_upval(vm, frame->stackframe - (uv + 1)); + else + closure->upvals[i] = frame->closure->upvals[uv]; + + uc_value_get(closure->upvals[i]->header.jso); + + frame->ip += 4; + } +} + +static void +uc_vm_insn_store_var(uc_vm *vm, enum insn_type insn) +{ + json_object *name, *v = uc_vm_stack_pop(vm); + uc_prototype *scope, *next; + + scope = vm->globals; + name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32); + + while (json_object_get_type(name) == json_type_string) { + if (json_object_object_get_ex(scope->header.jso, json_object_get_string(name), NULL)) + break; + + next = scope->parent; + + if (!next) { + if (vm->config->strict_declarations) { + uc_vm_raise_exception(vm, EXCEPTION_REFERENCE, + "access to undeclared variable %s", + json_object_get_string(name)); + } + + break; + } + + scope = next; + } + + if (scope && json_object_get_type(name) == json_type_string) + json_object_object_add(scope->header.jso, json_object_get_string(name), uc_value_get(v)); + + uc_value_put(name); + uc_vm_stack_push(vm, v); +} + +static void +uc_vm_insn_store_val(uc_vm *vm, enum insn_type insn) +{ + json_object *v = uc_vm_stack_pop(vm); + json_object *k = uc_vm_stack_pop(vm); + json_object *o = uc_vm_stack_pop(vm); + + uc_vm_stack_push(vm, uc_setval(o, k, v)); + + uc_value_put(o); + uc_value_put(k); +} + +static void +uc_vm_insn_store_upval(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_upvalref *ref = frame->closure->upvals[vm->arg.u32]; + json_object *val = uc_value_get(uc_vm_stack_peek(vm, 0)); + + if (ref->closed) { + uc_value_put(ref->value); + ref->value = val; + } + else { + uc_vm_stack_set(vm, ref->slot, val); + } +} + +static void +uc_vm_insn_store_local(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + json_object *val = uc_value_get(uc_vm_stack_peek(vm, 0)); + + uc_vm_stack_set(vm, frame->stackframe + vm->arg.u32, val); +} + +static json_object * +uc_vm_value_bitop(uc_vm *vm, enum insn_type operation, json_object *value, json_object *operand) +{ + json_object *rv = NULL; + int64_t n1, n2; + double d; + + if (uc_cast_number(value, &n1, &d) == json_type_double) + n1 = isnan(d) ? 0 : (int64_t)d; + + if (uc_cast_number(operand, &n2, &d) == json_type_double) + n2 = isnan(d) ? 0 : (int64_t)d; + + switch (operation) { + case I_LSHIFT: + rv = xjs_new_int64(n1 << n2); + break; + + case I_RSHIFT: + rv = xjs_new_int64(n1 >> n2); + break; + + case I_BAND: + rv = xjs_new_int64(n1 & n2); + break; + + case I_BXOR: + rv = xjs_new_int64(n1 ^ n2); + break; + + case I_BOR: + rv = xjs_new_int64(n1 | n2); + break; + + default: + break; + } + + return rv; +} + +static json_object * +uc_vm_value_arith(uc_vm *vm, enum insn_type operation, json_object *value, json_object *operand) +{ + json_object *rv = NULL; + enum json_type t1, t2; + const char *s1, *s2; + size_t len1, len2; + int64_t n1, n2; + double d1, d2; + char *s; + + if (operation > I_MOD) + return uc_vm_value_bitop(vm, operation, value, operand); + + if (operation == I_ADD && + (json_object_is_type(value, json_type_string) || + json_object_is_type(operand, json_type_string))) { + s1 = value ? json_object_get_string(value) : "null"; + s2 = operand ? json_object_get_string(operand) : "null"; + len1 = strlen(s1); + len2 = strlen(s2); + s = xalloc(len1 + len2 + 1); + + snprintf(s, len1 + len2 + 1, "%s%s", s1, s2); + + rv = xjs_new_string(s); + + free(s); + + return rv; + } + + t1 = uc_cast_number(value, &n1, &d1); + t2 = uc_cast_number(operand, &n2, &d2); + + if (t1 == json_type_double || t2 == json_type_double) { + d1 = (t1 == json_type_double) ? d1 : (double)n1; + d2 = (t2 == json_type_double) ? d2 : (double)n2; + + switch (operation) { + case I_ADD: + case I_PLUS: + rv = uc_double_new(d1 + d2); + break; + + case I_SUB: + rv = uc_double_new(d1 - d2); + break; + + case I_MUL: + rv = uc_double_new(d1 * d2); + break; + + case I_DIV: + if (d2 == 0.0) + rv = uc_double_new(INFINITY); + else if (isnan(d2)) + rv = uc_double_new(NAN); + else if (!isfinite(d2)) + rv = uc_double_new(isfinite(d1) ? 0.0 : NAN); + else + rv = uc_double_new(d1 / d2); + + break; + + case I_MOD: + rv = uc_double_new(NAN); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "undefined arithmetic operation %d", + operation); + break; + } + } + else { + switch (operation) { + case I_ADD: + case I_PLUS: + rv = xjs_new_int64(n1 + n2); + break; + + case I_SUB: + rv = xjs_new_int64(n1 - n2); + break; + + case I_MUL: + rv = xjs_new_int64(n1 * n2); + break; + + case I_DIV: + if (n2 == 0) + rv = uc_double_new(INFINITY); + else + rv = xjs_new_int64(n1 / n2); + + break; + + case I_MOD: + rv = xjs_new_int64(n1 % n2); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, + "undefined arithmetic operation %d", + operation); + break; + } + } + + return rv; +} + +static void +uc_vm_insn_update_var(uc_vm *vm, enum insn_type insn) +{ + json_object *name, *val, *inc = uc_vm_stack_pop(vm); + uc_prototype *scope, *next; + + scope = vm->globals; + name = uc_chunk_get_constant(uc_vm_current_chunk(vm), vm->arg.u32 & 0x00FFFFFF); + + assert(json_object_is_type(name, json_type_string)); + + while (true) { + if (json_object_object_get_ex(scope->header.jso, json_object_get_string(name), &val)) + break; + + next = scope->parent; + + if (!next) { + if (vm->config->strict_declarations) { + uc_vm_raise_exception(vm, EXCEPTION_REFERENCE, + "access to undeclared variable %s", + json_object_get_string(name)); + } + + break; + } + + scope = next; + } + + val = uc_vm_value_arith(vm, vm->arg.u32 >> 24, val, inc); + + json_object_object_add(scope->header.jso, json_object_get_string(name), uc_value_get(val)); + uc_vm_stack_push(vm, val); + + uc_value_put(name); + uc_value_put(inc); +} + +static void +uc_vm_insn_update_val(uc_vm *vm, enum insn_type insn) +{ + json_object *inc = uc_vm_stack_pop(vm); + json_object *k = uc_vm_stack_pop(vm); + json_object *v = uc_vm_stack_pop(vm); + json_object *val = NULL; + + switch (json_object_get_type(v)) { + case json_type_object: + case json_type_array: + val = uc_getval(v, k); + uc_vm_stack_push(vm, uc_setval(v, k, uc_vm_value_arith(vm, vm->arg.u8, val, inc))); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_REFERENCE, + "left-hand side expression is %s", + v ? "not an array or object" : "null"); + + break; + } + + uc_value_put(val); + uc_value_put(inc); + uc_value_put(v); + uc_value_put(k); +} + +static void +uc_vm_insn_update_upval(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + size_t slot = vm->arg.u32 & 0x00FFFFFF; + uc_upvalref *ref = frame->closure->upvals[slot]; + json_object *inc = uc_vm_stack_pop(vm); + json_object *val; + + if (ref->closed) + val = ref->value; + else + val = vm->stack.entries[ref->slot]; + + val = uc_vm_value_arith(vm, vm->arg.u32 >> 24, val, inc); + + uc_vm_stack_push(vm, val); + + uc_value_put(inc); + + if (ref->closed) { + uc_value_put(ref->value); + ref->value = uc_value_get(uc_vm_stack_peek(vm, 0)); + } + else { + uc_vm_stack_set(vm, ref->slot, uc_value_get(uc_vm_stack_peek(vm, 0))); + } +} + +static void +uc_vm_insn_update_local(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + size_t slot = vm->arg.u32 & 0x00FFFFFF; + json_object *inc = uc_vm_stack_pop(vm); + json_object *val; + + val = uc_vm_value_arith(vm, vm->arg.u32 >> 24, + vm->stack.entries[frame->stackframe + slot], inc); + + uc_vm_stack_push(vm, val); + + uc_value_put(inc); + uc_vm_stack_set(vm, frame->stackframe + slot, uc_value_get(uc_vm_stack_peek(vm, 0))); +} + +static void +uc_vm_insn_narr(uc_vm *vm, enum insn_type insn) +{ + json_object *arr = xjs_new_array_size(vm->arg.u32); + + uc_vm_stack_push(vm, arr); +} + +static void +uc_vm_insn_parr(uc_vm *vm, enum insn_type insn) +{ + json_object *arr = uc_vm_stack_peek(vm, vm->arg.u32); + size_t idx; + + for (idx = 0; idx < vm->arg.u32; idx++) + json_object_array_add(arr, uc_vm_stack_peek(vm, vm->arg.u32 - idx - 1)); + + for (idx = 0; idx < vm->arg.u32; idx++) + uc_vm_stack_pop(vm); + + //uc_vm_shrink(state, vm->arg.u32); +} + +static void +uc_vm_insn_marr(uc_vm *vm, enum insn_type insn) +{ + json_object *src = uc_vm_stack_pop(vm); + json_object *dst = uc_vm_stack_peek(vm, 0); + size_t i; + + if (!json_object_is_type(src, json_type_array)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "(%s) is not iterable", + json_object_to_json_string(src)); + + return; + } + + for (i = 0; i < json_object_array_length(src); i++) + json_object_array_add(dst, uc_value_get(json_object_array_get_idx(src, i))); + + uc_value_put(src); +} + +static void +uc_vm_insn_nobj(uc_vm *vm, enum insn_type insn) +{ + json_object *arr = xjs_new_object(); + + uc_vm_stack_push(vm, arr); +} + +static void +uc_vm_insn_sobj(uc_vm *vm, enum insn_type insn) +{ + json_object *obj = uc_vm_stack_peek(vm, vm->arg.u32); + size_t idx; + + for (idx = 0; idx < vm->arg.u32; idx += 2) { + json_object_object_add(obj, + json_object_get_string(uc_vm_stack_peek(vm, vm->arg.u32 - idx - 1)), + uc_value_get(uc_vm_stack_peek(vm, vm->arg.u32 - idx - 2))); + } + + for (idx = 0; idx < vm->arg.u32; idx++) + uc_value_put(uc_vm_stack_pop(vm)); +} + +static void +uc_vm_insn_mobj(uc_vm *vm, enum insn_type insn) +{ + json_object *src = uc_vm_stack_pop(vm); + json_object *dst = uc_vm_stack_peek(vm, 0); + char *istr; + size_t i; + + switch (json_object_get_type(src)) { + case json_type_object: + ; /* a label can only be part of a statement and a declaration is not a statement */ + json_object_object_foreach(src, k, v) + json_object_object_add(dst, k, uc_value_get(v)); + + uc_value_put(src); + break; + + case json_type_array: + for (i = 0; i < json_object_array_length(src); i++) { + xasprintf(&istr, "%zu", i); + json_object_object_add(dst, istr, uc_value_get(json_object_array_get_idx(src, i))); + free(istr); + } + + uc_value_put(src); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "Value (%s) is not iterable", + json_object_to_json_string(src)); + + break; + } +} + +static void +uc_vm_insn_arith(uc_vm *vm, enum insn_type insn) +{ + json_object *r2 = uc_vm_stack_pop(vm); + json_object *r1 = uc_vm_stack_pop(vm); + json_object *rv; + + rv = uc_vm_value_arith(vm, insn, r1, r2); + + uc_value_put(r1); + uc_value_put(r2); + + uc_vm_stack_push(vm, rv); +} + +static void +uc_vm_insn_plus_minus(uc_vm *vm, enum insn_type insn) +{ + struct json_object *v = uc_vm_stack_pop(vm); + bool is_sub = (insn == I_MINUS); + enum json_type t; + int64_t n; + double d; + + t = uc_cast_number(v, &n, &d); + + json_object_put(v); + + switch (t) { + case json_type_int: + uc_vm_stack_push(vm, xjs_new_int64(is_sub ? -n : n)); + break; + + default: + uc_vm_stack_push(vm, uc_double_new(is_sub ? -d : d)); + break; + } +} + +static void +uc_vm_insn_bitop(uc_vm *vm, enum insn_type insn) +{ + json_object *r2 = uc_vm_stack_pop(vm); + json_object *r1 = uc_vm_stack_pop(vm); + json_object *rv; + + rv = uc_vm_value_bitop(vm, insn, r1, r2); + + uc_value_put(r1); + uc_value_put(r2); + + uc_vm_stack_push(vm, rv); +} + +static void +uc_vm_insn_complement(uc_vm *vm, enum insn_type insn) +{ + struct json_object *v = uc_vm_stack_pop(vm); + int64_t n; + double d; + + if (uc_cast_number(v, &n, &d) == json_type_double) + n = isnan(d) ? 0 : (int64_t)d; + + json_object_put(v); + + uc_vm_stack_push(vm, xjs_new_int64(~n)); +} + +static void +uc_vm_insn_rel(uc_vm *vm, enum insn_type insn) +{ + json_object *r2 = uc_vm_stack_pop(vm); + json_object *r1 = uc_vm_stack_pop(vm); + bool res = false; + + switch (insn) { + case I_LT: + res = uc_cmp(TK_LT, r1, r2); + break; + + case I_GT: + res = uc_cmp(TK_GT, r1, r2); + break; + + case I_EQ: + res = uc_cmp(TK_EQ, r1, r2); + break; + + case I_NE: + res = uc_cmp(TK_NE, r1, r2); + break; + + default: + break; + } + + uc_value_put(r1); + uc_value_put(r2); + + uc_vm_stack_push(vm, xjs_new_boolean(res)); +} + +static void +uc_vm_insn_in(uc_vm *vm, enum insn_type insn) +{ + json_object *r2 = uc_vm_stack_pop(vm); + json_object *r1 = uc_vm_stack_pop(vm); + json_object *item; + size_t arrlen, arridx; + bool found = false; + const char *key; + + switch (json_object_get_type(r2)) { + case json_type_array: + for (arridx = 0, arrlen = json_object_array_length(r2); + arridx < arrlen; arridx++) { + item = json_object_array_get_idx(r2, arridx); + + if (uc_cmp(TK_EQ, r1, item)) { + found = true; + break; + } + } + + break; + + case json_type_object: + key = r1 ? json_object_get_string(r1) : "null"; + found = json_object_object_get_ex(r2, key, NULL); + break; + + default: + found = false; + } + + uc_value_put(r1); + uc_value_put(r2); + + uc_vm_stack_push(vm, xjs_new_boolean(found)); +} + +static void +uc_vm_insn_equality(uc_vm *vm, enum insn_type insn) +{ + json_object *r2 = uc_vm_stack_pop(vm); + json_object *r1 = uc_vm_stack_pop(vm); + bool equal = uc_eq(r1, r2); + + uc_value_put(r1); + uc_value_put(r2); + + uc_vm_stack_push(vm, xjs_new_boolean((insn == I_EQS) ? equal : !equal)); +} + +static void +uc_vm_insn_not(uc_vm *vm, enum insn_type insn) +{ + json_object *r1 = uc_vm_stack_pop(vm); + + uc_vm_stack_push(vm, xjs_new_boolean(!uc_val_is_truish(r1))); + uc_value_put(r1); +} + +static void +uc_vm_insn_jmp(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_chunk *chunk = uc_vm_frame_chunk(frame); + int32_t addr = vm->arg.s32; + + /* ip already has been incremented */ + addr -= 5; + + if (frame->ip + addr < chunk->entries || + frame->ip + addr >= chunk->entries + chunk->count) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "jump target out of range"); + return; + } + + frame->ip += addr; +} + +static void +uc_vm_insn_jmpz(uc_vm *vm, enum insn_type insn) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_chunk *chunk = uc_vm_frame_chunk(frame); + json_object *v = uc_vm_stack_pop(vm); + int32_t addr = vm->arg.s32; + + /* ip already has been incremented */ + addr -= 5; + + if (frame->ip + addr < chunk->entries || + frame->ip + addr >= chunk->entries + chunk->count) { + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "jump target out of range"); + return; + } + + if (!uc_val_is_truish(v)) + frame->ip += addr; + + uc_value_put(v); +} + +static void +uc_vm_insn_next(uc_vm *vm, enum insn_type insn) +{ + json_object *k = uc_vm_stack_pop(vm); + json_object *v = uc_vm_stack_pop(vm); + struct lh_entry *curr; + int64_t n; + + switch (json_object_get_type(v)) { + case json_type_object: + curr = k ? json_object_get_userdata(k) : json_object_get_object(v)->head; + + if (curr) { + if (!k) + k = xjs_new_string("[key]"); + + json_object_set_userdata(k, curr->next, NULL); + + uc_vm_stack_push(vm, xjs_new_string(curr->k)); + + if (insn == I_NEXTKV) + uc_vm_stack_push(vm, uc_value_get((json_object *)curr->v)); + + uc_vm_stack_push(vm, k); + + return; + } + + break; + + case json_type_array: + if (!k) + k = xjs_new_int64(0); + + n = json_object_get_int64(k); + + if (json_object_is_type(k, json_type_int) && n < json_object_array_length(v)) { + json_object_int_inc(k, 1); + + if (insn == I_NEXTKV) + uc_vm_stack_push(vm, xjs_new_int64(n)); + + uc_vm_stack_push(vm, uc_value_get(json_object_array_get_idx(v, n))); + + uc_vm_stack_push(vm, k); + + return; + } + + break; + + default: + break; + } + + uc_vm_stack_push(vm, NULL); + uc_vm_stack_push(vm, NULL); + + if (insn == I_NEXTKV) + uc_vm_stack_push(vm, NULL); +} + +static void +uc_vm_insn_close_upval(uc_vm *vm, enum insn_type insn) +{ + uc_vm_close_upvals(vm, vm->stack.count - 1); + uc_value_put(uc_vm_stack_pop(vm)); +} + +static void +uc_vm_insn_call(uc_vm *vm, enum insn_type insn) +{ + json_object *fno = uc_value_get(uc_vm_stack_peek(vm, vm->arg.u32 & 0xffff)); + json_object *ctx = NULL; + + if (!uc_object_is_type(fno, UC_OBJ_CLOSURE) || !uc_object_as_closure(fno)->is_arrow) + ctx = NULL; + else if (vm->callframes.count > 0) + ctx = uc_value_get(uc_vm_current_frame(vm)->ctx); + + uc_vm_call_function(vm, ctx, fno, false, vm->arg.u32); +} + +static void +uc_vm_insn_mcall(uc_vm *vm, enum insn_type insn) +{ + size_t key_slot = vm->stack.count - (vm->arg.u32 & 0xffff) - 1; + json_object *ctx = vm->stack.entries[key_slot - 1]; + json_object *key = vm->stack.entries[key_slot]; + json_object *fno = uc_getval(ctx, key); + + uc_vm_stack_set(vm, key_slot, fno); + + /* arrow functions as method calls inherit the parent ctx */ + if (uc_object_is_type(fno, UC_OBJ_CLOSURE) && uc_object_as_closure(fno)->is_arrow) + ctx = uc_vm_current_frame(vm)->ctx; + + uc_vm_call_function(vm, uc_value_get(ctx), uc_value_get(fno), true, vm->arg.u32); +} + +static void +uc_vm_insn_print(uc_vm *vm, enum insn_type insn) +{ + json_object *v = uc_vm_stack_pop(vm); + const char *p; + size_t len; + + switch (json_object_get_type(v)) { + case json_type_object: + case json_type_array: + p = json_object_to_json_string_ext(v, JSON_C_TO_STRING_NOSLASHESCAPE|JSON_C_TO_STRING_SPACED); + len = strlen(p); + break; + + case json_type_string: + p = json_object_get_string(v); + len = json_object_get_string_len(v); + break; + + case json_type_null: + p = ""; + len = 0; + break; + + default: + p = json_object_get_string(v); + len = strlen(p); + } + + fwrite(p, 1, len, stdout); + + uc_value_put(v); +} + +static json_object * +uc_vm_callframe_pop(uc_vm *vm) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + json_object *retval; + + /* close upvalues */ + uc_vm_close_upvals(vm, frame->stackframe); + + if (vm->stack.count > frame->stackframe) + retval = uc_vm_stack_pop(vm); + else + retval = NULL; + + /* reset function stack frame */ + while (vm->stack.count > frame->stackframe) + uc_value_put(uc_vm_stack_pop(vm)); + + /* for method calls, release context as well */ + if (frame->mcall) + uc_value_put(uc_vm_stack_pop(vm)); + + /* release function */ + uc_value_put(frame->closure ? frame->closure->header.jso : NULL); + uc_value_put(frame->cfunction ? frame->cfunction->header.jso : NULL); + + /* release context */ + uc_value_put(frame->ctx); + + vm->callframes.count--; + + return retval; +} + +static void +uc_vm_output_exception(uc_vm *vm) +{ + if (vm->exception.type == EXCEPTION_USER) + fprintf(stderr, "%s\n", vm->exception.message); + else + fprintf(stderr, "%s: %s\n", + exception_type_strings[vm->exception.type] ? exception_type_strings[vm->exception.type] : "Error", + vm->exception.message); + + fprintf(stderr, "%s\n\n", + json_object_get_string( + json_object_object_get( + json_object_array_get_idx(vm->exception.stacktrace, 0), "context"))); +} + +static uc_vm_status_t +uc_vm_execute_chunk(uc_vm *vm) +{ + uc_callframe *frame = uc_vm_current_frame(vm); + uc_chunk *chunk = uc_vm_frame_chunk(frame); + json_object *retval; + enum insn_type insn; + + while (chunk) { + if (vm->trace) + uc_dump_insn(vm, frame->ip, (insn = uc_vm_decode_insn(vm, frame, chunk))); + else + insn = uc_vm_decode_insn(vm, frame, chunk); + + switch (insn) { + case I_LOAD: + case I_LOAD8: + case I_LOAD16: + case I_LOAD32: + uc_vm_insn_load(vm, insn); + break; + + case I_LREXP: + uc_vm_insn_load_regexp(vm, insn); + break; + + case I_LNULL: + uc_vm_insn_load_null(vm, insn); + break; + + case I_LTRUE: + case I_LFALSE: + uc_vm_insn_load_bool(vm, insn); + break; + + case I_LTHIS: + uc_vm_stack_push(vm, uc_value_get(frame->ctx)); + break; + + case I_LVAR: + uc_vm_insn_load_var(vm, insn); + break; + + case I_LVAL: + uc_vm_insn_load_val(vm, insn); + break; + + case I_LUPV: + uc_vm_insn_load_upval(vm, insn); + break; + + case I_LLOC: + uc_vm_insn_load_local(vm, insn); + break; + + case I_CLFN: + case I_ARFN: + uc_vm_insn_load_closure(vm, insn); + break; + + case I_NARR: + uc_vm_insn_narr(vm, insn); + break; + + case I_PARR: + uc_vm_insn_parr(vm, insn); + break; + + case I_MARR: + uc_vm_insn_marr(vm, insn); + break; + + case I_NOBJ: + uc_vm_insn_nobj(vm, insn); + break; + + case I_SOBJ: + uc_vm_insn_sobj(vm, insn); + break; + + case I_MOBJ: + uc_vm_insn_mobj(vm, insn); + break; + + case I_SVAR: + uc_vm_insn_store_var(vm, insn); + break; + + case I_SVAL: + uc_vm_insn_store_val(vm, insn); + break; + + case I_SUPV: + uc_vm_insn_store_upval(vm, insn); + break; + + case I_SLOC: + uc_vm_insn_store_local(vm, insn); + break; + + case I_UVAR: + uc_vm_insn_update_var(vm, insn); + break; + + case I_UVAL: + uc_vm_insn_update_val(vm, insn); + break; + + case I_UUPV: + uc_vm_insn_update_upval(vm, insn); + break; + + case I_ULOC: + uc_vm_insn_update_local(vm, insn); + break; + + case I_ADD: + case I_SUB: + case I_MUL: + case I_DIV: + case I_MOD: + uc_vm_insn_arith(vm, insn); + break; + + case I_PLUS: + case I_MINUS: + uc_vm_insn_plus_minus(vm, insn); + break; + + case I_LSHIFT: + case I_RSHIFT: + case I_BAND: + case I_BXOR: + case I_BOR: + uc_vm_insn_bitop(vm, insn); + break; + + case I_COMPL: + uc_vm_insn_complement(vm, insn); + break; + + case I_EQS: + case I_NES: + uc_vm_insn_equality(vm, insn); + break; + + case I_EQ: + case I_NE: + case I_LT: + case I_GT: + uc_vm_insn_rel(vm, insn); + break; + + case I_IN: + uc_vm_insn_in(vm, insn); + break; + + case I_NOT: + uc_vm_insn_not(vm, insn); + break; + + case I_JMP: + uc_vm_insn_jmp(vm, insn); + break; + + case I_JMPZ: + uc_vm_insn_jmpz(vm, insn); + break; + + case I_NEXTK: + case I_NEXTKV: + uc_vm_insn_next(vm, insn); + break; + + case I_COPY: + uc_vm_stack_push(vm, uc_value_get(uc_vm_stack_peek(vm, vm->arg.u8))); + break; + + case I_POP: + uc_value_put(uc_vm_stack_pop(vm)); + break; + + case I_CUPV: + uc_vm_insn_close_upval(vm, insn); + break; + + case I_CALL: + uc_vm_insn_call(vm, insn); + frame = uc_vm_current_frame(vm); + chunk = frame->closure ? uc_vm_frame_chunk(frame) : NULL; + break; + + case I_MCALL: + uc_vm_insn_mcall(vm, insn); + frame = uc_vm_current_frame(vm); + chunk = frame->closure ? uc_vm_frame_chunk(frame) : NULL; + break; + + case I_RETURN: + retval = uc_vm_callframe_pop(vm); + + if (vm->callframes.count == 0) { + uc_value_put(retval); + + return STATUS_OK; + } + + uc_vm_stack_push(vm, retval); + + frame = uc_vector_last(&vm->callframes); + chunk = uc_vm_frame_chunk(frame); + break; + + case I_PRINT: + uc_vm_insn_print(vm, insn); + break; + + default: + uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "unknown opcode %d", insn); + break; + } + + /* previous instruction raised exception */ + if (vm->exception.type != EXCEPTION_NONE) { + /* walk up callframes until something handles the exception or the root is reached */ + while (!uc_vm_handle_exception(vm)) { + /* no further callframe to pop, report unhandled exception and terminate */ + if (vm->callframes.count == 1) { + uc_vm_output_exception(vm); + + return ERROR_RUNTIME; + } + + /* if VM returned into native function, don't bubble up */ + if (!chunk) + return ERROR_RUNTIME; + + /* no exception handler in current function, pop callframe */ + uc_value_put(uc_vm_callframe_pop(vm)); + + /* resume execution at topmost remaining callframe */ + frame = uc_vector_last(&vm->callframes); + chunk = uc_vm_frame_chunk(frame); + } + } + } + + return STATUS_OK; +} + +static uc_vm_status_t +uc_vm_preload(uc_vm *vm, json_object *modules) +{ + json_object *requirefn, *module, *name; + uc_exception_type_t ex; + size_t i; + + if (!json_object_is_type(modules, json_type_array)) + return STATUS_OK; + + requirefn = json_object_object_get(vm->globals->header.jso, "require"); + + if (!uc_object_is_type(requirefn, UC_OBJ_CFUNCTION)) + return STATUS_OK; + + for (i = 0; i < json_object_array_length(modules); i++) { + name = json_object_array_get_idx(modules, i); + + uc_vm_stack_push(vm, uc_value_get(requirefn)); + uc_vm_stack_push(vm, uc_value_get(name)); + + ex = uc_vm_call(vm, false, 1); + + if (ex) + return ERROR_RUNTIME; + + module = uc_vm_stack_pop(vm); + + uc_value_put(uc_setval(vm->globals->header.jso, name, module)); + } + + return STATUS_OK; +} + +uc_vm_status_t +uc_vm_execute(uc_vm *vm, uc_function *fn, uc_prototype *globals, json_object *modules) +{ + uc_closure *closure = uc_closure_new(fn, false); + uc_callframe *frame; + uc_vm_status_t rv; + + vm->globals = globals; + + uc_vector_grow(&vm->callframes); + + frame = &vm->callframes.entries[vm->callframes.count++]; + frame->closure = closure; + frame->stackframe = 0; + frame->ip = uc_vm_frame_chunk(frame)->entries; + + if (vm->trace) { + size_t msglen = 0; + char *msg = NULL; + + format_source_context(&msg, &msglen, + fn->source, 0, true); + + fprintf(stderr, "%s", msg); + + uc_vm_frame_dump(vm, frame); + } + + //uc_vm_stack_push(vm, closure->header.jso); + uc_vm_stack_push(vm, NULL); + + rv = uc_vm_preload(vm, modules); + + if (rv != STATUS_OK) + uc_vm_output_exception(vm); + else + rv = uc_vm_execute_chunk(vm); + + uc_value_put(vm->globals->header.jso); + vm->globals = NULL; + + return rv; +} + +uc_exception_type_t +uc_vm_call(uc_vm *vm, bool mcall, size_t nargs) +{ + json_object *ctx = mcall ? uc_value_get(uc_vm_stack_peek(vm, nargs - 1)) : NULL; + json_object *fno = uc_value_get(uc_vm_stack_peek(vm, nargs)); + + if (uc_vm_call_function(vm, ctx, fno, mcall, nargs & 0xffff)) { + if (!uc_object_is_type(fno, UC_OBJ_CFUNCTION)) + uc_vm_execute_chunk(vm); + } + + return vm->exception.type; +} @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2020-2021 Jo-Philipp Wich <jo@mein.io> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef __VM_H_ +#define __VM_H_ + +#include <stdbool.h> +#include <stdarg.h> + +#include "chunk.h" +#include "object.h" +#include "util.h" +#include "lexer.h" + +#define __insns \ +__insn(NOOP) \ +__insn(LOAD) \ +__insn(LOAD8) \ +__insn(LOAD16) \ +__insn(LOAD32) \ +__insn(LTHIS) \ +__insn(LREXP) \ +__insn(LNULL) \ +__insn(LTRUE) \ +__insn(LFALSE) \ +__insn(LLOC) \ +__insn(LUPV) \ +__insn(LVAR) \ +__insn(LVAL) \ +__insn(CLFN) \ +__insn(ARFN) \ +__insn(SLOC) \ +__insn(SUPV) \ +__insn(SVAR) \ +__insn(SVAL) \ +__insn(ULOC) \ +__insn(UUPV) \ +__insn(UVAR) \ +__insn(UVAL) \ +__insn(NARR) \ +__insn(PARR) \ +__insn(MARR) \ +__insn(NOBJ) \ +__insn(SOBJ) \ +__insn(MOBJ) \ +__insn(PLUS) \ +__insn(MINUS) \ +__insn(ADD) \ +__insn(SUB) \ +__insn(MUL) \ +__insn(DIV) \ +__insn(MOD) \ +__insn(LSHIFT) \ +__insn(RSHIFT) \ +__insn(BAND) \ +__insn(BXOR) \ +__insn(BOR) \ +__insn(COMPL) \ +__insn(EQ) \ +__insn(NE) \ +__insn(EQS) \ +__insn(NES) \ +__insn(LT) \ +__insn(GT) \ +__insn(IN) \ +__insn(NOT) \ +__insn(JMP) \ +__insn(JMPZ) \ +__insn(COPY) \ +__insn(POP) \ +__insn(CUPV) \ +__insn(RETURN) \ +__insn(CALL) \ +__insn(MCALL) \ +__insn(PRINT) \ +__insn(NEXTK) \ +__insn(NEXTKV) + + +#undef __insn +#define __insn(_name) I_##_name, + +enum insn_type { + __insns + __I_MAX +}; + +typedef struct { + int8_t stack_pop; + int8_t stack_push; + int8_t operand_bytes; + bool operand_is_skip; +} uc_insn_definition; + +typedef enum { + EXCEPTION_NONE, + EXCEPTION_SYNTAX, + EXCEPTION_RUNTIME, + EXCEPTION_TYPE, + EXCEPTION_REFERENCE, + EXCEPTION_USER +} uc_exception_type_t; + +typedef struct { + uc_exception_type_t type; + json_object *stacktrace; + char *message; +} uc_exception; + +typedef struct { + uint8_t *ip; + uc_closure *closure; + uc_cfunction *cfunction; + size_t stackframe; + json_object *ctx; + bool mcall; +} uc_callframe; + +uc_declare_vector(uc_callframes, uc_callframe); +uc_declare_vector(uc_stack, json_object *); + +typedef struct uc_vm { + uc_stack stack; + uc_exception exception; + uc_callframes callframes; + uc_upvalref *open_upvals; + uc_parse_config *config; + uc_prototype *globals; + uc_source *sources; + union { + uint32_t u32; + int32_t s32; + uint16_t u16; + int16_t s16; + uint8_t u8; + int8_t s8; + } arg; + size_t spread_values; + uint8_t trace; +} uc_vm; + +typedef enum { + STATUS_OK, + ERROR_COMPILE, + ERROR_RUNTIME +} uc_vm_status_t; + +extern uint32_t insns[__I_MAX]; + +void uc_vm_init(uc_vm *vm, uc_parse_config *config); +void uc_vm_free(uc_vm *vm); + +void uc_vm_stack_push(uc_vm *vm, json_object *value); +json_object *uc_vm_stack_pop(uc_vm *vm); +json_object *uc_vm_stack_peek(uc_vm *vm, size_t offset); + +uc_exception_type_t uc_vm_call(uc_vm *vm, bool mcall, size_t nargs); + +void __attribute__((format(printf, 3, 0))) +uc_vm_raise_exception(uc_vm *vm, uc_exception_type_t type, const char *fmt, ...); + +uc_vm_status_t uc_vm_execute(uc_vm *vm, uc_function *fn, uc_prototype *globals, json_object *modules); + +#endif /* __VM_H_ */ |