summaryrefslogtreecommitdiffhomepage
path: root/contrib/luasrcdiet/lua/optlex.lua
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/luasrcdiet/lua/optlex.lua')
-rw-r--r--contrib/luasrcdiet/lua/optlex.lua832
1 files changed, 832 insertions, 0 deletions
diff --git a/contrib/luasrcdiet/lua/optlex.lua b/contrib/luasrcdiet/lua/optlex.lua
new file mode 100644
index 0000000000..4c46b918bf
--- /dev/null
+++ b/contrib/luasrcdiet/lua/optlex.lua
@@ -0,0 +1,832 @@
+--[[--------------------------------------------------------------------
+
+ optlex.lua: does lexer-based optimizations
+ This file is part of LuaSrcDiet.
+
+ Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
+ The COPYRIGHT file describes the conditions
+ under which this software may be distributed.
+
+ See the ChangeLog for more information.
+
+----------------------------------------------------------------------]]
+
+--[[--------------------------------------------------------------------
+-- NOTES:
+-- * For more lexer-based optimization ideas, see the TODO items or
+-- look at technotes.txt.
+-- * TODO: general string delimiter conversion optimizer
+-- * TODO: (numbers) warn if overly significant digit
+----------------------------------------------------------------------]]
+
+local base = _G
+local string = require "string"
+module "optlex"
+local match = string.match
+local sub = string.sub
+local find = string.find
+local rep = string.rep
+local print
+
+------------------------------------------------------------------------
+-- variables and data structures
+------------------------------------------------------------------------
+
+-- error function, can override by setting own function into module
+error = base.error
+
+warn = {} -- table for warning flags
+
+local stoks, sinfos, stoklns -- source lists
+
+local is_realtoken = { -- significant (grammar) tokens
+ TK_KEYWORD = true,
+ TK_NAME = true,
+ TK_NUMBER = true,
+ TK_STRING = true,
+ TK_LSTRING = true,
+ TK_OP = true,
+ TK_EOS = true,
+}
+local is_faketoken = { -- whitespace (non-grammar) tokens
+ TK_COMMENT = true,
+ TK_LCOMMENT = true,
+ TK_EOL = true,
+ TK_SPACE = true,
+}
+
+local opt_details -- for extra information
+
+------------------------------------------------------------------------
+-- true if current token is at the start of a line
+-- * skips over deleted tokens via recursion
+------------------------------------------------------------------------
+
+local function atlinestart(i)
+ local tok = stoks[i - 1]
+ if i <= 1 or tok == "TK_EOL" then
+ return true
+ elseif tok == "" then
+ return atlinestart(i - 1)
+ end
+ return false
+end
+
+------------------------------------------------------------------------
+-- true if current token is at the end of a line
+-- * skips over deleted tokens via recursion
+------------------------------------------------------------------------
+
+local function atlineend(i)
+ local tok = stoks[i + 1]
+ if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
+ return true
+ elseif tok == "" then
+ return atlineend(i + 1)
+ end
+ return false
+end
+
+------------------------------------------------------------------------
+-- counts comment EOLs inside a long comment
+-- * in order to keep line numbering, EOLs need to be reinserted
+------------------------------------------------------------------------
+
+local function commenteols(lcomment)
+ local sep = #match(lcomment, "^%-%-%[=*%[")
+ local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
+ local i, c = 1, 0
+ while true do
+ local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
+ if not p then break end -- if no matches, done
+ i = p + 1
+ c = c + 1
+ if #s > 0 and r ~= s then -- skip CRLF or LFCR
+ i = i + 1
+ end
+ end
+ return c
+end
+
+------------------------------------------------------------------------
+-- compares two tokens (i, j) and returns the whitespace required
+-- * important! see technotes.txt for more information
+-- * only two grammar/real tokens are being considered
+-- * if "", no separation is needed
+-- * if " ", then at least one whitespace (or EOL) is required
+------------------------------------------------------------------------
+
+local function checkpair(i, j)
+ local match = match
+ local t1, t2 = stoks[i], stoks[j]
+ --------------------------------------------------------------------
+ if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
+ t2 == "TK_STRING" or t2 == "TK_LSTRING" then
+ return ""
+ --------------------------------------------------------------------
+ elseif t1 == "TK_OP" or t2 == "TK_OP" then
+ if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
+ (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
+ return ""
+ end
+ if t1 == "TK_OP" and t2 == "TK_OP" then
+ -- for TK_OP/TK_OP pairs, see notes in technotes.txt
+ local op, op2 = sinfos[i], sinfos[j]
+ if (match(op, "^%.%.?$") and match(op2, "^%.")) or
+ (match(op, "^[~=<>]$") and op2 == "=") or
+ (op == "[" and (op2 == "[" or op2 == "=")) then
+ return " "
+ end
+ return ""
+ end
+ -- "TK_OP" + "TK_NUMBER" case
+ local op = sinfos[i]
+ if t2 == "TK_OP" then op = sinfos[j] end
+ if match(op, "^%.%.?%.?$") then
+ return " "
+ end
+ return ""
+ --------------------------------------------------------------------
+ else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
+ return " "
+ --------------------------------------------------------------------
+ end
+end
+
+------------------------------------------------------------------------
+-- repack tokens, removing deletions caused by optimization process
+------------------------------------------------------------------------
+
+local function repack_tokens()
+ local dtoks, dinfos, dtoklns = {}, {}, {}
+ local j = 1
+ for i = 1, #stoks do
+ local tok = stoks[i]
+ if tok ~= "" then
+ dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
+ j = j + 1
+ end
+ end
+ stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
+end
+
+------------------------------------------------------------------------
+-- number optimization
+-- * optimization using string formatting functions is one way of doing
+-- this, but here, we consider all cases and handle them separately
+-- (possibly an idiotic approach...)
+-- * scientific notation being generated is not in canonical form, this
+-- may or may not be a bad thing, feedback welcome
+-- * note: intermediate portions need to fit into a normal number range
+-- * optimizations can be divided based on number patterns:
+-- * hexadecimal:
+-- (1) no need to remove leading zeros, just skip to (2)
+-- (2) convert to integer if size equal or smaller
+-- * change if equal size -> lose the 'x' to reduce entropy
+-- (3) number is then processed as an integer
+-- (4) note: does not make 0[xX] consistent
+-- * integer:
+-- (1) note: includes anything with trailing ".", ".0", ...
+-- (2) remove useless fractional part, if present, e.g. 123.000
+-- (3) remove leading zeros, e.g. 000123
+-- (4) switch to scientific if shorter, e.g. 123000 -> 123e3
+-- * with fraction:
+-- (1) split into digits dot digits
+-- (2) if no integer portion, take as zero (can omit later)
+-- (3) handle degenerate .000 case, after which the fractional part
+-- must be non-zero (if zero, it's matched as an integer)
+-- (4) remove trailing zeros for fractional portion
+-- (5) p.q where p > 0 and q > 0 cannot be shortened any more
+-- (6) otherwise p == 0 and the form is .q, e.g. .000123
+-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
+-- * scientific:
+-- (1) split into (digits dot digits) [eE] ([+-] digits)
+-- (2) if significand has ".", shift it out so it becomes an integer
+-- (3) if significand is zero, just use zero
+-- (4) remove leading zeros for significand
+-- (5) shift out trailing zeros for significand
+-- (6) examine exponent and determine which format is best:
+-- integer, with fraction, scientific
+------------------------------------------------------------------------
+
+local function do_number(i)
+ local before = sinfos[i] -- 'before'
+ local z = before -- working representation
+ local y -- 'after', if better
+ --------------------------------------------------------------------
+ if match(z, "^0[xX]") then -- hexadecimal number
+ local v = base.tostring(base.tonumber(z))
+ if #v <= #z then
+ z = v -- change to integer, AND continue
+ else
+ return -- no change; stick to hex
+ end
+ end
+ --------------------------------------------------------------------
+ if match(z, "^%d+%.?0*$") then -- integer or has useless frac
+ z = match(z, "^(%d+)%.?0*$") -- int portion only
+ if z + 0 > 0 then
+ z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
+ local v = #match(z, "0*$")
+ local nv = base.tostring(v)
+ if v > #nv + 1 then -- scientific is shorter
+ z = sub(z, 1, #z - v).."e"..nv
+ end
+ y = z
+ else
+ y = "0" -- basic zero
+ end
+ --------------------------------------------------------------------
+ elseif not match(z, "[eE]") then -- number with fraction part
+ local p, q = match(z, "^(%d*)%.(%d+)$") -- split
+ if p == "" then p = 0 end -- int part zero
+ if q + 0 == 0 and p == 0 then
+ y = "0" -- degenerate .000 case
+ else
+ -- now, q > 0 holds and p is a number
+ local v = #match(q, "0*$") -- remove trailing zeros
+ if v > 0 then
+ q = sub(q, 1, #q - v)
+ end
+ -- if p > 0, nothing else we can do to simplify p.q case
+ if p + 0 > 0 then
+ y = p.."."..q
+ else
+ y = "."..q -- tentative, e.g. .000123
+ local v = #match(q, "^0*") -- # leading spaces
+ local w = #q - v -- # significant digits
+ local nv = base.tostring(#q)
+ -- e.g. compare 123e-6 versus .000123
+ if w + 2 + #nv < 1 + #q then
+ y = sub(q, -w).."e-"..nv
+ end
+ end
+ end
+ --------------------------------------------------------------------
+ else -- scientific number
+ local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
+ ex = base.tonumber(ex)
+ -- if got ".", shift out fractional portion of significand
+ local p, q = match(sig, "^(%d*)%.(%d*)$")
+ if p then
+ ex = ex - #q
+ sig = p..q
+ end
+ if sig + 0 == 0 then
+ y = "0" -- basic zero
+ else
+ local v = #match(sig, "^0*") -- remove leading zeros
+ sig = sub(sig, v + 1)
+ v = #match(sig, "0*$") -- shift out trailing zeros
+ if v > 0 then
+ sig = sub(sig, 1, #sig - v)
+ ex = ex + v
+ end
+ -- examine exponent and determine which format is best
+ local nex = base.tostring(ex)
+ if ex == 0 then -- it's just an integer
+ y = sig
+ elseif ex > 0 and (ex <= 1 + #nex) then -- a number
+ y = sig..rep("0", ex)
+ elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
+ v = #sig + ex
+ y = sub(sig, 1, v).."."..sub(sig, v + 1)
+ elseif ex < 0 and (#nex >= -ex - #sig) then
+ -- e.g. compare 1234e-5 versus .01234
+ -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
+ -- -> #nex >= -ex - #sig
+ v = -ex - #sig
+ y = "."..rep("0", v)..sig
+ else -- non-canonical scientific representation
+ y = sig.."e"..ex
+ end
+ end--if sig
+ end
+ --------------------------------------------------------------------
+ if y and y ~= sinfos[i] then
+ if opt_details then
+ print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
+ opt_details = opt_details + 1
+ end
+ sinfos[i] = y
+ end
+end
+
+------------------------------------------------------------------------
+-- string optimization
+-- * note: works on well-formed strings only!
+-- * optimizations on characters can be summarized as follows:
+-- \a\b\f\n\r\t\v -- no change
+-- \\ -- no change
+-- \"\' -- depends on delim, other can remove \
+-- \[\] -- remove \
+-- \<char> -- general escape, remove \
+-- \<eol> -- normalize the EOL only
+-- \ddd -- if \a\b\f\n\r\t\v, change to latter
+-- if other < ascii 32, keep ddd but zap leading zeros
+-- if >= ascii 32, translate it into the literal, then also
+-- do escapes for \\,\",\' cases
+-- <other> -- no change
+-- * switch delimiters if string becomes shorter
+------------------------------------------------------------------------
+
+local function do_string(I)
+ local info = sinfos[I]
+ local delim = sub(info, 1, 1) -- delimiter used
+ local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
+ local z = sub(info, 2, -2) -- actual string
+ local i = 1
+ local c_delim, c_ndelim = 0, 0 -- "/' counts
+ --------------------------------------------------------------------
+ while i <= #z do
+ local c = sub(z, i, i)
+ ----------------------------------------------------------------
+ if c == "\\" then -- escaped stuff
+ local j = i + 1
+ local d = sub(z, j, j)
+ local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
+ ------------------------------------------------------------
+ if not p then -- \<char> -- remove \
+ z = sub(z, 1, i - 1)..sub(z, j)
+ i = i + 1
+ ------------------------------------------------------------
+ elseif p <= 8 then -- \a\b\f\n\r\t\v\\
+ i = i + 2 -- no change
+ ------------------------------------------------------------
+ elseif p <= 10 then -- \<eol> -- normalize EOL
+ local eol = sub(z, j, j + 1)
+ if eol == "\r\n" or eol == "\n\r" then
+ z = sub(z, 1, i).."\n"..sub(z, j + 2)
+ elseif p == 10 then -- \r case
+ z = sub(z, 1, i).."\n"..sub(z, j + 1)
+ end
+ i = i + 2
+ ------------------------------------------------------------
+ elseif p <= 12 then -- \"\' -- remove \ for ndelim
+ if d == delim then
+ c_delim = c_delim + 1
+ i = i + 2
+ else
+ c_ndelim = c_ndelim + 1
+ z = sub(z, 1, i - 1)..sub(z, j)
+ i = i + 1
+ end
+ ------------------------------------------------------------
+ else -- \ddd -- various steps
+ local s = match(z, "^(%d%d?%d?)", j)
+ j = i + 1 + #s -- skip to location
+ local cv = s + 0
+ local cc = string.char(cv)
+ local p = find("\a\b\f\n\r\t\v", cc, 1, true)
+ if p then -- special escapes
+ s = "\\"..sub("abfnrtv", p, p)
+ elseif cv < 32 then -- normalized \ddd
+ s = "\\"..cv
+ elseif cc == delim then -- \<delim>
+ s = "\\"..cc
+ c_delim = c_delim + 1
+ elseif cc == "\\" then -- \\
+ s = "\\\\"
+ else -- literal character
+ s = cc
+ if cc == ndelim then
+ c_ndelim = c_ndelim + 1
+ end
+ end
+ z = sub(z, 1, i - 1)..s..sub(z, j)
+ i = i + #s
+ ------------------------------------------------------------
+ end--if p
+ ----------------------------------------------------------------
+ else-- c ~= "\\" -- <other> -- no change
+ i = i + 1
+ if c == ndelim then -- count ndelim, for switching delimiters
+ c_ndelim = c_ndelim + 1
+ end
+ ----------------------------------------------------------------
+ end--if c
+ end--while
+ --------------------------------------------------------------------
+ -- switching delimiters, a long-winded derivation:
+ -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
+ -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
+ -- simplifying the condition (1)>(2) --> c_delim > c_ndelim
+ if c_delim > c_ndelim then
+ i = 1
+ while i <= #z do
+ local p, q, r = find(z, "([\'\"])", i)
+ if not p then break end
+ if r == delim then -- \<delim> -> <delim>
+ z = sub(z, 1, p - 2)..sub(z, p)
+ i = p
+ else-- r == ndelim -- <ndelim> -> \<ndelim>
+ z = sub(z, 1, p - 1).."\\"..sub(z, p)
+ i = p + 2
+ end
+ end--while
+ delim = ndelim -- actually change delimiters
+ end
+ --------------------------------------------------------------------
+ z = delim..z..delim
+ if z ~= sinfos[I] then
+ if opt_details then
+ print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
+ opt_details = opt_details + 1
+ end
+ sinfos[I] = z
+ end
+end
+
+------------------------------------------------------------------------
+-- long string optimization
+-- * note: warning flagged if trailing whitespace found, not trimmed
+-- * remove first optional newline
+-- * normalize embedded newlines
+-- * reduce '=' separators in delimiters if possible
+------------------------------------------------------------------------
+
+local function do_lstring(I)
+ local info = sinfos[I]
+ local delim1 = match(info, "^%[=*%[") -- cut out delimiters
+ local sep = #delim1
+ local delim2 = sub(info, -sep, -1)
+ local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
+ local y = ""
+ local i = 1
+ --------------------------------------------------------------------
+ while true do
+ local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
+ -- deal with a single line
+ local ln
+ if not p then
+ ln = sub(z, i)
+ elseif p >= i then
+ ln = sub(z, i, p - 1)
+ end
+ if ln ~= "" then
+ -- flag a warning if there are trailing spaces, won't optimize!
+ if match(ln, "%s+$") then
+ warn.lstring = "trailing whitespace in long string near line "..stoklns[I]
+ end
+ y = y..ln
+ end
+ if not p then -- done if no more EOLs
+ break
+ end
+ -- deal with line endings, normalize them
+ i = p + 1
+ if p then
+ if #s > 0 and r ~= s then -- skip CRLF or LFCR
+ i = i + 1
+ end
+ -- skip first newline, which can be safely deleted
+ if not(i == 1 and i == p) then
+ y = y.."\n"
+ end
+ end
+ end--while
+ --------------------------------------------------------------------
+ -- handle possible deletion of one or more '=' separators
+ if sep >= 3 then
+ local chk, okay = sep - 1
+ -- loop to test ending delimiter with less of '=' down to zero
+ while chk >= 2 do
+ local delim = "%]"..rep("=", chk - 2).."%]"
+ if not match(y, delim) then okay = chk end
+ chk = chk - 1
+ end
+ if okay then -- change delimiters
+ sep = rep("=", okay - 2)
+ delim1, delim2 = "["..sep.."[", "]"..sep.."]"
+ end
+ end
+ --------------------------------------------------------------------
+ sinfos[I] = delim1..y..delim2
+end
+
+------------------------------------------------------------------------
+-- long comment optimization
+-- * note: does not remove first optional newline
+-- * trim trailing whitespace
+-- * normalize embedded newlines
+-- * reduce '=' separators in delimiters if possible
+------------------------------------------------------------------------
+
+local function do_lcomment(I)
+ local info = sinfos[I]
+ local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
+ local sep = #delim1
+ local delim2 = sub(info, -sep, -1)
+ local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
+ local y = ""
+ local i = 1
+ --------------------------------------------------------------------
+ while true do
+ local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
+ -- deal with a single line, extract and check trailing whitespace
+ local ln
+ if not p then
+ ln = sub(z, i)
+ elseif p >= i then
+ ln = sub(z, i, p - 1)
+ end
+ if ln ~= "" then
+ -- trim trailing whitespace if non-empty line
+ local ws = match(ln, "%s*$")
+ if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
+ y = y..ln
+ end
+ if not p then -- done if no more EOLs
+ break
+ end
+ -- deal with line endings, normalize them
+ i = p + 1
+ if p then
+ if #s > 0 and r ~= s then -- skip CRLF or LFCR
+ i = i + 1
+ end
+ y = y.."\n"
+ end
+ end--while
+ --------------------------------------------------------------------
+ -- handle possible deletion of one or more '=' separators
+ sep = sep - 2
+ if sep >= 3 then
+ local chk, okay = sep - 1
+ -- loop to test ending delimiter with less of '=' down to zero
+ while chk >= 2 do
+ local delim = "%]"..rep("=", chk - 2).."%]"
+ if not match(y, delim) then okay = chk end
+ chk = chk - 1
+ end
+ if okay then -- change delimiters
+ sep = rep("=", okay - 2)
+ delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
+ end
+ end
+ --------------------------------------------------------------------
+ sinfos[I] = delim1..y..delim2
+end
+
+------------------------------------------------------------------------
+-- short comment optimization
+-- * trim trailing whitespace
+------------------------------------------------------------------------
+
+local function do_comment(i)
+ local info = sinfos[i]
+ local ws = match(info, "%s*$") -- just look from end of string
+ if #ws > 0 then
+ info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
+ end
+ sinfos[i] = info
+end
+
+------------------------------------------------------------------------
+-- returns true if string found in long comment
+-- * this is a feature to keep copyright or license texts
+------------------------------------------------------------------------
+
+local function keep_lcomment(opt_keep, info)
+ if not opt_keep then return false end -- option not set
+ local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
+ local sep = #delim1
+ local delim2 = sub(info, -sep, -1)
+ local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
+ if find(z, opt_keep, 1, true) then -- try to match
+ return true
+ end
+end
+
+------------------------------------------------------------------------
+-- main entry point
+-- * currently, lexer processing has 2 passes
+-- * processing is done on a line-oriented basis, which is easier to
+-- grok due to the next point...
+-- * since there are various options that can be enabled or disabled,
+-- processing is a little messy or convoluted
+------------------------------------------------------------------------
+
+function optimize(option, toklist, semlist, toklnlist)
+ --------------------------------------------------------------------
+ -- set option flags
+ --------------------------------------------------------------------
+ local opt_comments = option["opt-comments"]
+ local opt_whitespace = option["opt-whitespace"]
+ local opt_emptylines = option["opt-emptylines"]
+ local opt_eols = option["opt-eols"]
+ local opt_strings = option["opt-strings"]
+ local opt_numbers = option["opt-numbers"]
+ local opt_keep = option.KEEP
+ opt_details = option.DETAILS and 0 -- upvalues for details display
+ print = print or base.print
+ if opt_eols then -- forced settings, otherwise won't work properly
+ opt_comments = true
+ opt_whitespace = true
+ opt_emptylines = true
+ end
+ --------------------------------------------------------------------
+ -- variable initialization
+ --------------------------------------------------------------------
+ stoks, sinfos, stoklns -- set source lists
+ = toklist, semlist, toklnlist
+ local i = 1 -- token position
+ local tok, info -- current token
+ local prev -- position of last grammar token
+ -- on same line (for TK_SPACE stuff)
+ --------------------------------------------------------------------
+ -- changes a token, info pair
+ --------------------------------------------------------------------
+ local function settoken(tok, info, I)
+ I = I or i
+ stoks[I] = tok or ""
+ sinfos[I] = info or ""
+ end
+ --------------------------------------------------------------------
+ -- processing loop (PASS 1)
+ --------------------------------------------------------------------
+ while true do
+ tok, info = stoks[i], sinfos[i]
+ ----------------------------------------------------------------
+ local atstart = atlinestart(i) -- set line begin flag
+ if atstart then prev = nil end
+ ----------------------------------------------------------------
+ if tok == "TK_EOS" then -- end of stream/pass
+ break
+ ----------------------------------------------------------------
+ elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
+ tok == "TK_NAME" or -- operators
+ tok == "TK_OP" then
+ -- TK_KEYWORD and TK_OP can't be optimized without a big
+ -- optimization framework; it would be more of an optimizing
+ -- compiler, not a source code compressor
+ -- TK_NAME that are locals needs parser to analyze/optimize
+ prev = i
+ ----------------------------------------------------------------
+ elseif tok == "TK_NUMBER" then -- numbers
+ if opt_numbers then
+ do_number(i) -- optimize
+ end
+ prev = i
+ ----------------------------------------------------------------
+ elseif tok == "TK_STRING" or -- strings, long strings
+ tok == "TK_LSTRING" then
+ if opt_strings then
+ if tok == "TK_STRING" then
+ do_string(i) -- optimize
+ else
+ do_lstring(i) -- optimize
+ end
+ end
+ prev = i
+ ----------------------------------------------------------------
+ elseif tok == "TK_COMMENT" then -- short comments
+ if opt_comments then
+ if i == 1 and sub(info, 1, 1) == "#" then
+ -- keep shbang comment, trim whitespace
+ do_comment(i)
+ else
+ -- safe to delete, as a TK_EOL (or TK_EOS) always follows
+ settoken() -- remove entirely
+ end
+ elseif opt_whitespace then -- trim whitespace only
+ do_comment(i)
+ end
+ ----------------------------------------------------------------
+ elseif tok == "TK_LCOMMENT" then -- long comments
+ if keep_lcomment(opt_keep, info) then
+ ------------------------------------------------------------
+ -- if --keep, we keep a long comment if <msg> is found;
+ -- this is a feature to keep copyright or license texts
+ if opt_whitespace then -- trim whitespace only
+ do_lcomment(i)
+ end
+ prev = i
+ elseif opt_comments then
+ local eols = commenteols(info)
+ ------------------------------------------------------------
+ -- prepare opt_emptylines case first, if a disposable token
+ -- follows, current one is safe to dump, else keep a space;
+ -- it is implied that the operation is safe for '-', because
+ -- current is a TK_LCOMMENT, and must be separate from a '-'
+ if is_faketoken[stoks[i + 1]] then
+ settoken() -- remove entirely
+ tok = ""
+ else
+ settoken("TK_SPACE", " ")
+ end
+ ------------------------------------------------------------
+ -- if there are embedded EOLs to keep and opt_emptylines is
+ -- disabled, then switch the token into one or more EOLs
+ if not opt_emptylines and eols > 0 then
+ settoken("TK_EOL", rep("\n", eols))
+ end
+ ------------------------------------------------------------
+ -- if optimizing whitespaces, force reinterpretation of the
+ -- token to give a chance for the space to be optimized away
+ if opt_whitespace and tok ~= "" then
+ i = i - 1 -- to reinterpret
+ end
+ ------------------------------------------------------------
+ else -- disabled case
+ if opt_whitespace then -- trim whitespace only
+ do_lcomment(i)
+ end
+ prev = i
+ end
+ ----------------------------------------------------------------
+ elseif tok == "TK_EOL" then -- line endings
+ if atstart and opt_emptylines then
+ settoken() -- remove entirely
+ elseif info == "\r\n" or info == "\n\r" then
+ -- normalize the rest of the EOLs for CRLF/LFCR only
+ -- (note that TK_LCOMMENT can change into several EOLs)
+ settoken("TK_EOL", "\n")
+ end
+ ----------------------------------------------------------------
+ elseif tok == "TK_SPACE" then -- whitespace
+ if opt_whitespace then
+ if atstart or atlineend(i) then
+ -- delete leading and trailing whitespace
+ settoken() -- remove entirely
+ else
+ ------------------------------------------------------------
+ -- at this point, since leading whitespace have been removed,
+ -- there should be a either a real token or a TK_LCOMMENT
+ -- prior to hitting this whitespace; the TK_LCOMMENT case
+ -- only happens if opt_comments is disabled; so prev ~= nil
+ local ptok = stoks[prev]
+ if ptok == "TK_LCOMMENT" then
+ -- previous TK_LCOMMENT can abut with anything
+ settoken() -- remove entirely
+ else
+ -- prev must be a grammar token; consecutive TK_SPACE
+ -- tokens is impossible when optimizing whitespace
+ local ntok = stoks[i + 1]
+ if is_faketoken[ntok] then
+ -- handle special case where a '-' cannot abut with
+ -- either a short comment or a long comment
+ if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
+ ptok == "TK_OP" and sinfos[prev] == "-" then
+ -- keep token
+ else
+ settoken() -- remove entirely
+ end
+ else--is_realtoken
+ -- check a pair of grammar tokens, if can abut, then
+ -- delete space token entirely, otherwise keep one space
+ local s = checkpair(prev, i + 1)
+ if s == "" then
+ settoken() -- remove entirely
+ else
+ settoken("TK_SPACE", " ")
+ end
+ end
+ end
+ ------------------------------------------------------------
+ end
+ end
+ ----------------------------------------------------------------
+ else
+ error("unidentified token encountered")
+ end
+ ----------------------------------------------------------------
+ i = i + 1
+ end--while
+ repack_tokens()
+ --------------------------------------------------------------------
+ -- processing loop (PASS 2)
+ --------------------------------------------------------------------
+ if opt_eols then
+ i = 1
+ -- aggressive EOL removal only works with most non-grammar tokens
+ -- optimized away because it is a rather simple scheme -- basically
+ -- it just checks 'real' token pairs around EOLs
+ if stoks[1] == "TK_COMMENT" then
+ -- first comment still existing must be shbang, skip whole line
+ i = 3
+ end
+ while true do
+ tok, info = stoks[i], sinfos[i]
+ --------------------------------------------------------------
+ if tok == "TK_EOS" then -- end of stream/pass
+ break
+ --------------------------------------------------------------
+ elseif tok == "TK_EOL" then -- consider each TK_EOL
+ local t1, t2 = stoks[i - 1], stoks[i + 1]
+ if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
+ local s = checkpair(i - 1, i + 1)
+ if s == "" then
+ settoken() -- remove entirely
+ end
+ end
+ end--if tok
+ --------------------------------------------------------------
+ i = i + 1
+ end--while
+ repack_tokens()
+ end
+ --------------------------------------------------------------------
+ if opt_details and opt_details > 0 then print() end -- spacing
+ return stoks, sinfos, stoklns
+end