diff options
author | Jo-Philipp Wich <jow@openwrt.org> | 2011-01-23 01:49:54 +0000 |
---|---|---|
committer | Jo-Philipp Wich <jow@openwrt.org> | 2011-01-23 01:49:54 +0000 |
commit | 03ad47080cc869051af58f91769da4faf503e151 (patch) | |
tree | 78ef0ffe46a578a9892115658e788b69a303db04 /contrib/luasrcdiet/lua/llex.lua | |
parent | 02977e53298dbcede2e4932b270f4ac23b51e014 (diff) |
contrib, build: bundle LuaSrcDiet and make it available in build targets
Diffstat (limited to 'contrib/luasrcdiet/lua/llex.lua')
-rw-r--r-- | contrib/luasrcdiet/lua/llex.lua | 355 |
1 files changed, 355 insertions, 0 deletions
diff --git a/contrib/luasrcdiet/lua/llex.lua b/contrib/luasrcdiet/lua/llex.lua new file mode 100644 index 0000000000..a637f3048e --- /dev/null +++ b/contrib/luasrcdiet/lua/llex.lua @@ -0,0 +1,355 @@ +--[[-------------------------------------------------------------------- + + llex.lua: Lua 5.1 lexical analyzer in Lua + This file is part of LuaSrcDiet, based on Yueliang material. + + Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> + The COPYRIGHT file describes the conditions + under which this software may be distributed. + + See the ChangeLog for more information. + +----------------------------------------------------------------------]] + +--[[-------------------------------------------------------------------- +-- NOTES: +-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0, +-- with significant modifications to handle LuaSrcDiet's needs: +-- (1) llex.error is an optional error function handler +-- (2) seminfo for strings include their delimiters and no +-- translation operations are performed on them +-- * ADDED shbang handling has been added to support executable scripts +-- * NO localized decimal point replacement magic +-- * NO limit to number of lines +-- * NO support for compatible long strings (LUA_COMPAT_LSTR) +-- * Please read technotes.txt for more technical details. +----------------------------------------------------------------------]] + +local base = _G +local string = require "string" +module "llex" + +local find = string.find +local match = string.match +local sub = string.sub + +---------------------------------------------------------------------- +-- initialize keyword list, variables +---------------------------------------------------------------------- + +local kw = {} +for v in string.gmatch([[ +and break do else elseif end false for function if in +local nil not or repeat return then true until while]], "%S+") do + kw[v] = true +end + +-- NOTE: see init() for module variables (externally visible): +-- tok, seminfo, tokln + +local z, -- source stream + sourceid, -- name of source + I, -- position of lexer + buff, -- buffer for strings + ln -- line number + +---------------------------------------------------------------------- +-- add information to token listing +---------------------------------------------------------------------- + +local function addtoken(token, info) + local i = #tok + 1 + tok[i] = token + seminfo[i] = info + tokln[i] = ln +end + +---------------------------------------------------------------------- +-- handles line number incrementation and end-of-line characters +---------------------------------------------------------------------- + +local function inclinenumber(i, is_tok) + local sub = sub + local old = sub(z, i, i) + i = i + 1 -- skip '\n' or '\r' + local c = sub(z, i, i) + if (c == "\n" or c == "\r") and (c ~= old) then + i = i + 1 -- skip '\n\r' or '\r\n' + old = old..c + end + if is_tok then addtoken("TK_EOL", old) end + ln = ln + 1 + I = i + return i +end + +---------------------------------------------------------------------- +-- initialize lexer for given source _z and source name _sourceid +---------------------------------------------------------------------- + +function init(_z, _sourceid) + z = _z -- source + sourceid = _sourceid -- name of source + I = 1 -- lexer's position in source + ln = 1 -- line number + tok = {} -- lexed token list* + seminfo = {} -- lexed semantic information list* + tokln = {} -- line numbers for messages* + -- (*) externally visible thru' module + -------------------------------------------------------------------- + -- initial processing (shbang handling) + -------------------------------------------------------------------- + local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)") + if p then -- skip first line + I = I + #q + addtoken("TK_COMMENT", q) + if #r > 0 then inclinenumber(I, true) end + end +end + +---------------------------------------------------------------------- +-- returns a chunk name or id, no truncation for long names +---------------------------------------------------------------------- + +function chunkid() + if sourceid and match(sourceid, "^[=@]") then + return sub(sourceid, 2) -- remove first char + end + return "[string]" +end + +---------------------------------------------------------------------- +-- formats error message and throws error +-- * a simplified version, does not report what token was responsible +---------------------------------------------------------------------- + +function errorline(s, line) + local e = error or base.error + e(string.format("%s:%d: %s", chunkid(), line or ln, s)) +end +local errorline = errorline + +------------------------------------------------------------------------ +-- count separators ("=") in a long string delimiter +------------------------------------------------------------------------ + +local function skip_sep(i) + local sub = sub + local s = sub(z, i, i) + i = i + 1 + local count = #match(z, "=*", i) -- note, take the length + i = i + count + I = i + return (sub(z, i, i) == s) and count or (-count) - 1 +end + +---------------------------------------------------------------------- +-- reads a long string or long comment +---------------------------------------------------------------------- + +local function read_long_string(is_str, sep) + local i = I + 1 -- skip 2nd '[' + local sub = sub + local c = sub(z, i, i) + if c == "\r" or c == "\n" then -- string starts with a newline? + i = inclinenumber(i) -- skip it + end + local j = i + while true do + local p, q, r = find(z, "([\r\n%]])", i) -- (long range) + if not p then + errorline(is_str and "unfinished long string" or + "unfinished long comment") + end + i = p + if r == "]" then -- delimiter test + if skip_sep(i) == sep then + buff = sub(z, buff, I) + I = I + 1 -- skip 2nd ']' + return buff + end + i = I + else -- newline + buff = buff.."\n" + i = inclinenumber(i) + end + end--while +end + +---------------------------------------------------------------------- +-- reads a string +---------------------------------------------------------------------- + +local function read_string(del) + local i = I + local find = find + local sub = sub + while true do + local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range) + if p then + if r == "\n" or r == "\r" then + errorline("unfinished string") + end + i = p + if r == "\\" then -- handle escapes + i = i + 1 + r = sub(z, i, i) + if r == "" then break end -- (EOZ error) + p = find("abfnrtv\n\r", r, 1, true) + ------------------------------------------------------ + if p then -- special escapes + if p > 7 then + i = inclinenumber(i) + else + i = i + 1 + end + ------------------------------------------------------ + elseif find(r, "%D") then -- other non-digits + i = i + 1 + ------------------------------------------------------ + else -- \xxx sequence + local p, q, s = find(z, "^(%d%d?%d?)", i) + i = q + 1 + if s + 1 > 256 then -- UCHAR_MAX + errorline("escape sequence too large") + end + ------------------------------------------------------ + end--if p + else + i = i + 1 + if r == del then -- ending delimiter + I = i + return sub(z, buff, i - 1) -- return string + end + end--if r + else + break -- (error) + end--if p + end--while + errorline("unfinished string") +end + +------------------------------------------------------------------------ +-- main lexer function +------------------------------------------------------------------------ + +function llex() + local find = find + local match = match + while true do--outer + local i = I + -- inner loop allows break to be used to nicely section tests + while true do--inner + ---------------------------------------------------------------- + local p, _, r = find(z, "^([_%a][_%w]*)", i) + if p then + I = i + #r + if kw[r] then + addtoken("TK_KEYWORD", r) -- reserved word (keyword) + else + addtoken("TK_NAME", r) -- identifier + end + break -- (continue) + end + ---------------------------------------------------------------- + local p, _, r = find(z, "^(%.?)%d", i) + if p then -- numeral + if r == "." then i = i + 1 end + local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) + i = q + 1 + if #r == 1 then -- optional exponent + if match(z, "^[%+%-]", i) then -- optional sign + i = i + 1 + end + end + local _, q = find(z, "^[_%w]*", i) + I = q + 1 + local v = sub(z, p, q) -- string equivalent + if not base.tonumber(v) then -- handles hex test also + errorline("malformed number") + end + addtoken("TK_NUMBER", v) + break -- (continue) + end + ---------------------------------------------------------------- + local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i) + if p then + if t == "\n" or t == "\r" then -- newline + inclinenumber(i, true) + else + I = q + 1 -- whitespace + addtoken("TK_SPACE", r) + end + break -- (continue) + end + ---------------------------------------------------------------- + local r = match(z, "^%p", i) + if r then + buff = i + local p = find("-[\"\'.=<>~", r, 1, true) + if p then + -- two-level if block for punctuation/symbols + -------------------------------------------------------- + if p <= 2 then + if p == 1 then -- minus + local c = match(z, "^%-%-(%[?)", i) + if c then + i = i + 2 + local sep = -1 + if c == "[" then + sep = skip_sep(i) + end + if sep >= 0 then -- long comment + addtoken("TK_LCOMMENT", read_long_string(false, sep)) + else -- short comment + I = find(z, "[\n\r]", i) or (#z + 1) + addtoken("TK_COMMENT", sub(z, buff, I - 1)) + end + break -- (continue) + end + -- (fall through for "-") + else -- [ or long string + local sep = skip_sep(i) + if sep >= 0 then + addtoken("TK_LSTRING", read_long_string(true, sep)) + elseif sep == -1 then + addtoken("TK_OP", "[") + else + errorline("invalid long string delimiter") + end + break -- (continue) + end + -------------------------------------------------------- + elseif p <= 5 then + if p < 5 then -- strings + I = i + 1 + addtoken("TK_STRING", read_string(r)) + break -- (continue) + end + r = match(z, "^%.%.?%.?", i) -- .|..|... dots + -- (fall through) + -------------------------------------------------------- + else -- relational + r = match(z, "^%p=?", i) + -- (fall through) + end + end + I = i + #r + addtoken("TK_OP", r) -- for other symbols, fall through + break -- (continue) + end + ---------------------------------------------------------------- + local r = sub(z, i, i) + if r ~= "" then + I = i + 1 + addtoken("TK_OP", r) -- other single-char tokens + break + end + addtoken("TK_EOS", "") -- end of stream, + return -- exit here + ---------------------------------------------------------------- + end--while inner + end--while outer +end + +return base.getfenv() |