diff options
author | Jo-Philipp Wich <jo@mein.io> | 2022-01-13 16:06:17 +0100 |
---|---|---|
committer | Jo-Philipp Wich <jo@mein.io> | 2022-01-18 10:58:11 +0100 |
commit | 6c2caf9fbb9d346cfb20cd5c83875fdff77e584c (patch) | |
tree | 4d0fe816584e8f351ed0f1da8be0b9ccf1c5635f /source.c | |
parent | 725bb75b7b66dd1e0a381908e831cede0402cb6e (diff) |
source: refactor source file handling
- Move source object pointer into program entity which is referenced by
each function
- Move lineinfo related routines into source.c and use them from lexer.c
since lineinfo encoding does not belong into the lexical analyzer.
- Implement initial infrastructure for detecting source file type,
this is required later to differentiate between plaintext and
precompiled bytecode files
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
Diffstat (limited to 'source.c')
-rw-r--r-- | source.c | 82 |
1 files changed, 82 insertions, 0 deletions
@@ -15,6 +15,7 @@ */ #include <string.h> +#include <errno.h> #include "ucode/source.h" @@ -116,3 +117,84 @@ uc_source_put(uc_source_t *source) free(source->buffer); free(source); } + +uc_source_type_t +uc_source_type_test(uc_source_t *source) +{ + union { char s[sizeof(uint32_t)]; uint32_t n; } buf; + uc_source_type_t type = UC_SOURCE_TYPE_PLAIN; + FILE *fp = source->fp; + int c; + + if (fread(buf.s, 1, 2, fp) == 2 && !strncmp(buf.s, "#!", 2)) { + source->off += 2; + + while ((c = fgetc(fp)) != EOF) { + source->off++; + + if (c == '\n') { + uc_source_line_update(source, source->off); + uc_source_line_next(source); + + break; + } + } + } + else { + if (fseek(fp, 0L, SEEK_SET) == -1) + fprintf(stderr, "Failed to rewind source buffer: %s\n", strerror(errno)); + } + + return type; +} + +/* lineinfo is encoded in bytes: the most significant bit specifies whether + * to advance the line count by one or not, while the remaining 7 bits encode + * the amounts of bytes on the current line. + * + * If a line has more than 127 characters, the first byte will be set to + * 0xff (1 1111111) and subsequent bytes will encode the remaining characters + * in bits 1..7 while setting bit 8 to 0. A line with 400 characters will thus + * be encoded as 0xff 0x7f 0x7f 0x13 (1:1111111 + 0:1111111 + 0:1111111 + 0:1111111). + * + * The newline character itself is not counted, so an empty line is encoded as + * 0x80 (1:0000000). + */ + +void +uc_source_line_next(uc_source_t *source) +{ + uc_lineinfo_t *lines = &source->lineinfo; + + uc_vector_grow(lines); + lines->entries[lines->count++] = 0x80; +} + +void +uc_source_line_update(uc_source_t *source, size_t off) +{ + uc_lineinfo_t *lines = &source->lineinfo; + uint8_t *entry, n; + + if (!lines->count) + uc_source_line_next(source); + + entry = uc_vector_last(lines); + + if ((entry[0] & 0x7f) + off <= 0x7f) { + entry[0] += off; + } + else { + off -= (0x7f - (entry[0] & 0x7f)); + entry[0] |= 0x7f; + + while (off > 0) { + n = (off > 0x7f) ? 0x7f : off; + uc_vector_grow(lines); + entry = uc_vector_last(lines); + entry[1] = n; + off -= n; + lines->count++; + } + } +} |