summaryrefslogtreecommitdiffhomepage
path: root/contrib/luasrcdiet/lua/llex.lua
diff options
context:
space:
mode:
authorJo-Philipp Wich <jow@openwrt.org>2011-01-23 01:49:54 +0000
committerJo-Philipp Wich <jow@openwrt.org>2011-01-23 01:49:54 +0000
commit03ad47080cc869051af58f91769da4faf503e151 (patch)
tree78ef0ffe46a578a9892115658e788b69a303db04 /contrib/luasrcdiet/lua/llex.lua
parent02977e53298dbcede2e4932b270f4ac23b51e014 (diff)
contrib, build: bundle LuaSrcDiet and make it available in build targets
Diffstat (limited to 'contrib/luasrcdiet/lua/llex.lua')
-rw-r--r--contrib/luasrcdiet/lua/llex.lua355
1 files changed, 355 insertions, 0 deletions
diff --git a/contrib/luasrcdiet/lua/llex.lua b/contrib/luasrcdiet/lua/llex.lua
new file mode 100644
index 0000000000..a637f3048e
--- /dev/null
+++ b/contrib/luasrcdiet/lua/llex.lua
@@ -0,0 +1,355 @@
+--[[--------------------------------------------------------------------
+
+ llex.lua: Lua 5.1 lexical analyzer in Lua
+ This file is part of LuaSrcDiet, based on Yueliang material.
+
+ Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
+ The COPYRIGHT file describes the conditions
+ under which this software may be distributed.
+
+ See the ChangeLog for more information.
+
+----------------------------------------------------------------------]]
+
+--[[--------------------------------------------------------------------
+-- NOTES:
+-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
+-- with significant modifications to handle LuaSrcDiet's needs:
+-- (1) llex.error is an optional error function handler
+-- (2) seminfo for strings include their delimiters and no
+-- translation operations are performed on them
+-- * ADDED shbang handling has been added to support executable scripts
+-- * NO localized decimal point replacement magic
+-- * NO limit to number of lines
+-- * NO support for compatible long strings (LUA_COMPAT_LSTR)
+-- * Please read technotes.txt for more technical details.
+----------------------------------------------------------------------]]
+
+local base = _G
+local string = require "string"
+module "llex"
+
+local find = string.find
+local match = string.match
+local sub = string.sub
+
+----------------------------------------------------------------------
+-- initialize keyword list, variables
+----------------------------------------------------------------------
+
+local kw = {}
+for v in string.gmatch([[
+and break do else elseif end false for function if in
+local nil not or repeat return then true until while]], "%S+") do
+ kw[v] = true
+end
+
+-- NOTE: see init() for module variables (externally visible):
+-- tok, seminfo, tokln
+
+local z, -- source stream
+ sourceid, -- name of source
+ I, -- position of lexer
+ buff, -- buffer for strings
+ ln -- line number
+
+----------------------------------------------------------------------
+-- add information to token listing
+----------------------------------------------------------------------
+
+local function addtoken(token, info)
+ local i = #tok + 1
+ tok[i] = token
+ seminfo[i] = info
+ tokln[i] = ln
+end
+
+----------------------------------------------------------------------
+-- handles line number incrementation and end-of-line characters
+----------------------------------------------------------------------
+
+local function inclinenumber(i, is_tok)
+ local sub = sub
+ local old = sub(z, i, i)
+ i = i + 1 -- skip '\n' or '\r'
+ local c = sub(z, i, i)
+ if (c == "\n" or c == "\r") and (c ~= old) then
+ i = i + 1 -- skip '\n\r' or '\r\n'
+ old = old..c
+ end
+ if is_tok then addtoken("TK_EOL", old) end
+ ln = ln + 1
+ I = i
+ return i
+end
+
+----------------------------------------------------------------------
+-- initialize lexer for given source _z and source name _sourceid
+----------------------------------------------------------------------
+
+function init(_z, _sourceid)
+ z = _z -- source
+ sourceid = _sourceid -- name of source
+ I = 1 -- lexer's position in source
+ ln = 1 -- line number
+ tok = {} -- lexed token list*
+ seminfo = {} -- lexed semantic information list*
+ tokln = {} -- line numbers for messages*
+ -- (*) externally visible thru' module
+ --------------------------------------------------------------------
+ -- initial processing (shbang handling)
+ --------------------------------------------------------------------
+ local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
+ if p then -- skip first line
+ I = I + #q
+ addtoken("TK_COMMENT", q)
+ if #r > 0 then inclinenumber(I, true) end
+ end
+end
+
+----------------------------------------------------------------------
+-- returns a chunk name or id, no truncation for long names
+----------------------------------------------------------------------
+
+function chunkid()
+ if sourceid and match(sourceid, "^[=@]") then
+ return sub(sourceid, 2) -- remove first char
+ end
+ return "[string]"
+end
+
+----------------------------------------------------------------------
+-- formats error message and throws error
+-- * a simplified version, does not report what token was responsible
+----------------------------------------------------------------------
+
+function errorline(s, line)
+ local e = error or base.error
+ e(string.format("%s:%d: %s", chunkid(), line or ln, s))
+end
+local errorline = errorline
+
+------------------------------------------------------------------------
+-- count separators ("=") in a long string delimiter
+------------------------------------------------------------------------
+
+local function skip_sep(i)
+ local sub = sub
+ local s = sub(z, i, i)
+ i = i + 1
+ local count = #match(z, "=*", i) -- note, take the length
+ i = i + count
+ I = i
+ return (sub(z, i, i) == s) and count or (-count) - 1
+end
+
+----------------------------------------------------------------------
+-- reads a long string or long comment
+----------------------------------------------------------------------
+
+local function read_long_string(is_str, sep)
+ local i = I + 1 -- skip 2nd '['
+ local sub = sub
+ local c = sub(z, i, i)
+ if c == "\r" or c == "\n" then -- string starts with a newline?
+ i = inclinenumber(i) -- skip it
+ end
+ local j = i
+ while true do
+ local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
+ if not p then
+ errorline(is_str and "unfinished long string" or
+ "unfinished long comment")
+ end
+ i = p
+ if r == "]" then -- delimiter test
+ if skip_sep(i) == sep then
+ buff = sub(z, buff, I)
+ I = I + 1 -- skip 2nd ']'
+ return buff
+ end
+ i = I
+ else -- newline
+ buff = buff.."\n"
+ i = inclinenumber(i)
+ end
+ end--while
+end
+
+----------------------------------------------------------------------
+-- reads a string
+----------------------------------------------------------------------
+
+local function read_string(del)
+ local i = I
+ local find = find
+ local sub = sub
+ while true do
+ local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
+ if p then
+ if r == "\n" or r == "\r" then
+ errorline("unfinished string")
+ end
+ i = p
+ if r == "\\" then -- handle escapes
+ i = i + 1
+ r = sub(z, i, i)
+ if r == "" then break end -- (EOZ error)
+ p = find("abfnrtv\n\r", r, 1, true)
+ ------------------------------------------------------
+ if p then -- special escapes
+ if p > 7 then
+ i = inclinenumber(i)
+ else
+ i = i + 1
+ end
+ ------------------------------------------------------
+ elseif find(r, "%D") then -- other non-digits
+ i = i + 1
+ ------------------------------------------------------
+ else -- \xxx sequence
+ local p, q, s = find(z, "^(%d%d?%d?)", i)
+ i = q + 1
+ if s + 1 > 256 then -- UCHAR_MAX
+ errorline("escape sequence too large")
+ end
+ ------------------------------------------------------
+ end--if p
+ else
+ i = i + 1
+ if r == del then -- ending delimiter
+ I = i
+ return sub(z, buff, i - 1) -- return string
+ end
+ end--if r
+ else
+ break -- (error)
+ end--if p
+ end--while
+ errorline("unfinished string")
+end
+
+------------------------------------------------------------------------
+-- main lexer function
+------------------------------------------------------------------------
+
+function llex()
+ local find = find
+ local match = match
+ while true do--outer
+ local i = I
+ -- inner loop allows break to be used to nicely section tests
+ while true do--inner
+ ----------------------------------------------------------------
+ local p, _, r = find(z, "^([_%a][_%w]*)", i)
+ if p then
+ I = i + #r
+ if kw[r] then
+ addtoken("TK_KEYWORD", r) -- reserved word (keyword)
+ else
+ addtoken("TK_NAME", r) -- identifier
+ end
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local p, _, r = find(z, "^(%.?)%d", i)
+ if p then -- numeral
+ if r == "." then i = i + 1 end
+ local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
+ i = q + 1
+ if #r == 1 then -- optional exponent
+ if match(z, "^[%+%-]", i) then -- optional sign
+ i = i + 1
+ end
+ end
+ local _, q = find(z, "^[_%w]*", i)
+ I = q + 1
+ local v = sub(z, p, q) -- string equivalent
+ if not base.tonumber(v) then -- handles hex test also
+ errorline("malformed number")
+ end
+ addtoken("TK_NUMBER", v)
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
+ if p then
+ if t == "\n" or t == "\r" then -- newline
+ inclinenumber(i, true)
+ else
+ I = q + 1 -- whitespace
+ addtoken("TK_SPACE", r)
+ end
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local r = match(z, "^%p", i)
+ if r then
+ buff = i
+ local p = find("-[\"\'.=<>~", r, 1, true)
+ if p then
+ -- two-level if block for punctuation/symbols
+ --------------------------------------------------------
+ if p <= 2 then
+ if p == 1 then -- minus
+ local c = match(z, "^%-%-(%[?)", i)
+ if c then
+ i = i + 2
+ local sep = -1
+ if c == "[" then
+ sep = skip_sep(i)
+ end
+ if sep >= 0 then -- long comment
+ addtoken("TK_LCOMMENT", read_long_string(false, sep))
+ else -- short comment
+ I = find(z, "[\n\r]", i) or (#z + 1)
+ addtoken("TK_COMMENT", sub(z, buff, I - 1))
+ end
+ break -- (continue)
+ end
+ -- (fall through for "-")
+ else -- [ or long string
+ local sep = skip_sep(i)
+ if sep >= 0 then
+ addtoken("TK_LSTRING", read_long_string(true, sep))
+ elseif sep == -1 then
+ addtoken("TK_OP", "[")
+ else
+ errorline("invalid long string delimiter")
+ end
+ break -- (continue)
+ end
+ --------------------------------------------------------
+ elseif p <= 5 then
+ if p < 5 then -- strings
+ I = i + 1
+ addtoken("TK_STRING", read_string(r))
+ break -- (continue)
+ end
+ r = match(z, "^%.%.?%.?", i) -- .|..|... dots
+ -- (fall through)
+ --------------------------------------------------------
+ else -- relational
+ r = match(z, "^%p=?", i)
+ -- (fall through)
+ end
+ end
+ I = i + #r
+ addtoken("TK_OP", r) -- for other symbols, fall through
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local r = sub(z, i, i)
+ if r ~= "" then
+ I = i + 1
+ addtoken("TK_OP", r) -- other single-char tokens
+ break
+ end
+ addtoken("TK_EOS", "") -- end of stream,
+ return -- exit here
+ ----------------------------------------------------------------
+ end--while inner
+ end--while outer
+end
+
+return base.getfenv()