diff options
Diffstat (limited to 'contrib/luasrcdiet/lua/optlex.lua')
-rw-r--r-- | contrib/luasrcdiet/lua/optlex.lua | 832 |
1 files changed, 0 insertions, 832 deletions
diff --git a/contrib/luasrcdiet/lua/optlex.lua b/contrib/luasrcdiet/lua/optlex.lua deleted file mode 100644 index 4c46b918bf..0000000000 --- a/contrib/luasrcdiet/lua/optlex.lua +++ /dev/null @@ -1,832 +0,0 @@ ---[[-------------------------------------------------------------------- - - optlex.lua: does lexer-based optimizations - This file is part of LuaSrcDiet. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * For more lexer-based optimization ideas, see the TODO items or --- look at technotes.txt. --- * TODO: general string delimiter conversion optimizer --- * TODO: (numbers) warn if overly significant digit -----------------------------------------------------------------------]] - -local base = _G -local string = require "string" -module "optlex" -local match = string.match -local sub = string.sub -local find = string.find -local rep = string.rep -local print - ------------------------------------------------------------------------- --- variables and data structures ------------------------------------------------------------------------- - --- error function, can override by setting own function into module -error = base.error - -warn = {} -- table for warning flags - -local stoks, sinfos, stoklns -- source lists - -local is_realtoken = { -- significant (grammar) tokens - TK_KEYWORD = true, - TK_NAME = true, - TK_NUMBER = true, - TK_STRING = true, - TK_LSTRING = true, - TK_OP = true, - TK_EOS = true, -} -local is_faketoken = { -- whitespace (non-grammar) tokens - TK_COMMENT = true, - TK_LCOMMENT = true, - TK_EOL = true, - TK_SPACE = true, -} - -local opt_details -- for extra information - ------------------------------------------------------------------------- --- true if current token is at the start of a line --- * skips over deleted tokens via recursion ------------------------------------------------------------------------- - -local function atlinestart(i) - local tok = stoks[i - 1] - if i <= 1 or tok == "TK_EOL" then - return true - elseif tok == "" then - return atlinestart(i - 1) - end - return false -end - ------------------------------------------------------------------------- --- true if current token is at the end of a line --- * skips over deleted tokens via recursion ------------------------------------------------------------------------- - -local function atlineend(i) - local tok = stoks[i + 1] - if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then - return true - elseif tok == "" then - return atlineend(i + 1) - end - return false -end - ------------------------------------------------------------------------- --- counts comment EOLs inside a long comment --- * in order to keep line numbering, EOLs need to be reinserted ------------------------------------------------------------------------- - -local function commenteols(lcomment) - local sep = #match(lcomment, "^%-%-%[=*%[") - local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims - local i, c = 1, 0 - while true do - local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) - if not p then break end -- if no matches, done - i = p + 1 - c = c + 1 - if #s > 0 and r ~= s then -- skip CRLF or LFCR - i = i + 1 - end - end - return c -end - ------------------------------------------------------------------------- --- compares two tokens (i, j) and returns the whitespace required --- * important! see technotes.txt for more information --- * only two grammar/real tokens are being considered --- * if "", no separation is needed --- * if " ", then at least one whitespace (or EOL) is required ------------------------------------------------------------------------- - -local function checkpair(i, j) - local match = match - local t1, t2 = stoks[i], stoks[j] - -------------------------------------------------------------------- - if t1 == "TK_STRING" or t1 == "TK_LSTRING" or - t2 == "TK_STRING" or t2 == "TK_LSTRING" then - return "" - -------------------------------------------------------------------- - elseif t1 == "TK_OP" or t2 == "TK_OP" then - if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or - (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then - return "" - end - if t1 == "TK_OP" and t2 == "TK_OP" then - -- for TK_OP/TK_OP pairs, see notes in technotes.txt - local op, op2 = sinfos[i], sinfos[j] - if (match(op, "^%.%.?$") and match(op2, "^%.")) or - (match(op, "^[~=<>]$") and op2 == "=") or - (op == "[" and (op2 == "[" or op2 == "=")) then - return " " - end - return "" - end - -- "TK_OP" + "TK_NUMBER" case - local op = sinfos[i] - if t2 == "TK_OP" then op = sinfos[j] end - if match(op, "^%.%.?%.?$") then - return " " - end - return "" - -------------------------------------------------------------------- - else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then - return " " - -------------------------------------------------------------------- - end -end - ------------------------------------------------------------------------- --- repack tokens, removing deletions caused by optimization process ------------------------------------------------------------------------- - -local function repack_tokens() - local dtoks, dinfos, dtoklns = {}, {}, {} - local j = 1 - for i = 1, #stoks do - local tok = stoks[i] - if tok ~= "" then - dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i] - j = j + 1 - end - end - stoks, sinfos, stoklns = dtoks, dinfos, dtoklns -end - ------------------------------------------------------------------------- --- number optimization --- * optimization using string formatting functions is one way of doing --- this, but here, we consider all cases and handle them separately --- (possibly an idiotic approach...) --- * scientific notation being generated is not in canonical form, this --- may or may not be a bad thing, feedback welcome --- * note: intermediate portions need to fit into a normal number range --- * optimizations can be divided based on number patterns: --- * hexadecimal: --- (1) no need to remove leading zeros, just skip to (2) --- (2) convert to integer if size equal or smaller --- * change if equal size -> lose the 'x' to reduce entropy --- (3) number is then processed as an integer --- (4) note: does not make 0[xX] consistent --- * integer: --- (1) note: includes anything with trailing ".", ".0", ... --- (2) remove useless fractional part, if present, e.g. 123.000 --- (3) remove leading zeros, e.g. 000123 --- (4) switch to scientific if shorter, e.g. 123000 -> 123e3 --- * with fraction: --- (1) split into digits dot digits --- (2) if no integer portion, take as zero (can omit later) --- (3) handle degenerate .000 case, after which the fractional part --- must be non-zero (if zero, it's matched as an integer) --- (4) remove trailing zeros for fractional portion --- (5) p.q where p > 0 and q > 0 cannot be shortened any more --- (6) otherwise p == 0 and the form is .q, e.g. .000123 --- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6 --- * scientific: --- (1) split into (digits dot digits) [eE] ([+-] digits) --- (2) if significand has ".", shift it out so it becomes an integer --- (3) if significand is zero, just use zero --- (4) remove leading zeros for significand --- (5) shift out trailing zeros for significand --- (6) examine exponent and determine which format is best: --- integer, with fraction, scientific ------------------------------------------------------------------------- - -local function do_number(i) - local before = sinfos[i] -- 'before' - local z = before -- working representation - local y -- 'after', if better - -------------------------------------------------------------------- - if match(z, "^0[xX]") then -- hexadecimal number - local v = base.tostring(base.tonumber(z)) - if #v <= #z then - z = v -- change to integer, AND continue - else - return -- no change; stick to hex - end - end - -------------------------------------------------------------------- - if match(z, "^%d+%.?0*$") then -- integer or has useless frac - z = match(z, "^(%d+)%.?0*$") -- int portion only - if z + 0 > 0 then - z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros - local v = #match(z, "0*$") - local nv = base.tostring(v) - if v > #nv + 1 then -- scientific is shorter - z = sub(z, 1, #z - v).."e"..nv - end - y = z - else - y = "0" -- basic zero - end - -------------------------------------------------------------------- - elseif not match(z, "[eE]") then -- number with fraction part - local p, q = match(z, "^(%d*)%.(%d+)$") -- split - if p == "" then p = 0 end -- int part zero - if q + 0 == 0 and p == 0 then - y = "0" -- degenerate .000 case - else - -- now, q > 0 holds and p is a number - local v = #match(q, "0*$") -- remove trailing zeros - if v > 0 then - q = sub(q, 1, #q - v) - end - -- if p > 0, nothing else we can do to simplify p.q case - if p + 0 > 0 then - y = p.."."..q - else - y = "."..q -- tentative, e.g. .000123 - local v = #match(q, "^0*") -- # leading spaces - local w = #q - v -- # significant digits - local nv = base.tostring(#q) - -- e.g. compare 123e-6 versus .000123 - if w + 2 + #nv < 1 + #q then - y = sub(q, -w).."e-"..nv - end - end - end - -------------------------------------------------------------------- - else -- scientific number - local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$") - ex = base.tonumber(ex) - -- if got ".", shift out fractional portion of significand - local p, q = match(sig, "^(%d*)%.(%d*)$") - if p then - ex = ex - #q - sig = p..q - end - if sig + 0 == 0 then - y = "0" -- basic zero - else - local v = #match(sig, "^0*") -- remove leading zeros - sig = sub(sig, v + 1) - v = #match(sig, "0*$") -- shift out trailing zeros - if v > 0 then - sig = sub(sig, 1, #sig - v) - ex = ex + v - end - -- examine exponent and determine which format is best - local nex = base.tostring(ex) - if ex == 0 then -- it's just an integer - y = sig - elseif ex > 0 and (ex <= 1 + #nex) then -- a number - y = sig..rep("0", ex) - elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123 - v = #sig + ex - y = sub(sig, 1, v).."."..sub(sig, v + 1) - elseif ex < 0 and (#nex >= -ex - #sig) then - -- e.g. compare 1234e-5 versus .01234 - -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig - -- -> #nex >= -ex - #sig - v = -ex - #sig - y = "."..rep("0", v)..sig - else -- non-canonical scientific representation - y = sig.."e"..ex - end - end--if sig - end - -------------------------------------------------------------------- - if y and y ~= sinfos[i] then - if opt_details then - print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y) - opt_details = opt_details + 1 - end - sinfos[i] = y - end -end - ------------------------------------------------------------------------- --- string optimization --- * note: works on well-formed strings only! --- * optimizations on characters can be summarized as follows: --- \a\b\f\n\r\t\v -- no change --- \\ -- no change --- \"\' -- depends on delim, other can remove \ --- \[\] -- remove \ --- \<char> -- general escape, remove \ --- \<eol> -- normalize the EOL only --- \ddd -- if \a\b\f\n\r\t\v, change to latter --- if other < ascii 32, keep ddd but zap leading zeros --- if >= ascii 32, translate it into the literal, then also --- do escapes for \\,\",\' cases --- <other> -- no change --- * switch delimiters if string becomes shorter ------------------------------------------------------------------------- - -local function do_string(I) - local info = sinfos[I] - local delim = sub(info, 1, 1) -- delimiter used - local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> ' - local z = sub(info, 2, -2) -- actual string - local i = 1 - local c_delim, c_ndelim = 0, 0 -- "/' counts - -------------------------------------------------------------------- - while i <= #z do - local c = sub(z, i, i) - ---------------------------------------------------------------- - if c == "\\" then -- escaped stuff - local j = i + 1 - local d = sub(z, j, j) - local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true) - ------------------------------------------------------------ - if not p then -- \<char> -- remove \ - z = sub(z, 1, i - 1)..sub(z, j) - i = i + 1 - ------------------------------------------------------------ - elseif p <= 8 then -- \a\b\f\n\r\t\v\\ - i = i + 2 -- no change - ------------------------------------------------------------ - elseif p <= 10 then -- \<eol> -- normalize EOL - local eol = sub(z, j, j + 1) - if eol == "\r\n" or eol == "\n\r" then - z = sub(z, 1, i).."\n"..sub(z, j + 2) - elseif p == 10 then -- \r case - z = sub(z, 1, i).."\n"..sub(z, j + 1) - end - i = i + 2 - ------------------------------------------------------------ - elseif p <= 12 then -- \"\' -- remove \ for ndelim - if d == delim then - c_delim = c_delim + 1 - i = i + 2 - else - c_ndelim = c_ndelim + 1 - z = sub(z, 1, i - 1)..sub(z, j) - i = i + 1 - end - ------------------------------------------------------------ - else -- \ddd -- various steps - local s = match(z, "^(%d%d?%d?)", j) - j = i + 1 + #s -- skip to location - local cv = s + 0 - local cc = string.char(cv) - local p = find("\a\b\f\n\r\t\v", cc, 1, true) - if p then -- special escapes - s = "\\"..sub("abfnrtv", p, p) - elseif cv < 32 then -- normalized \ddd - s = "\\"..cv - elseif cc == delim then -- \<delim> - s = "\\"..cc - c_delim = c_delim + 1 - elseif cc == "\\" then -- \\ - s = "\\\\" - else -- literal character - s = cc - if cc == ndelim then - c_ndelim = c_ndelim + 1 - end - end - z = sub(z, 1, i - 1)..s..sub(z, j) - i = i + #s - ------------------------------------------------------------ - end--if p - ---------------------------------------------------------------- - else-- c ~= "\\" -- <other> -- no change - i = i + 1 - if c == ndelim then -- count ndelim, for switching delimiters - c_ndelim = c_ndelim + 1 - end - ---------------------------------------------------------------- - end--if c - end--while - -------------------------------------------------------------------- - -- switching delimiters, a long-winded derivation: - -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes - -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes - -- simplifying the condition (1)>(2) --> c_delim > c_ndelim - if c_delim > c_ndelim then - i = 1 - while i <= #z do - local p, q, r = find(z, "([\'\"])", i) - if not p then break end - if r == delim then -- \<delim> -> <delim> - z = sub(z, 1, p - 2)..sub(z, p) - i = p - else-- r == ndelim -- <ndelim> -> \<ndelim> - z = sub(z, 1, p - 1).."\\"..sub(z, p) - i = p + 2 - end - end--while - delim = ndelim -- actually change delimiters - end - -------------------------------------------------------------------- - z = delim..z..delim - if z ~= sinfos[I] then - if opt_details then - print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z) - opt_details = opt_details + 1 - end - sinfos[I] = z - end -end - ------------------------------------------------------------------------- --- long string optimization --- * note: warning flagged if trailing whitespace found, not trimmed --- * remove first optional newline --- * normalize embedded newlines --- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------- - -local function do_lstring(I) - local info = sinfos[I] - local delim1 = match(info, "^%[=*%[") -- cut out delimiters - local sep = #delim1 - local delim2 = sub(info, -sep, -1) - local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims - local y = "" - local i = 1 - -------------------------------------------------------------------- - while true do - local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) - -- deal with a single line - local ln - if not p then - ln = sub(z, i) - elseif p >= i then - ln = sub(z, i, p - 1) - end - if ln ~= "" then - -- flag a warning if there are trailing spaces, won't optimize! - if match(ln, "%s+$") then - warn.lstring = "trailing whitespace in long string near line "..stoklns[I] - end - y = y..ln - end - if not p then -- done if no more EOLs - break - end - -- deal with line endings, normalize them - i = p + 1 - if p then - if #s > 0 and r ~= s then -- skip CRLF or LFCR - i = i + 1 - end - -- skip first newline, which can be safely deleted - if not(i == 1 and i == p) then - y = y.."\n" - end - end - end--while - -------------------------------------------------------------------- - -- handle possible deletion of one or more '=' separators - if sep >= 3 then - local chk, okay = sep - 1 - -- loop to test ending delimiter with less of '=' down to zero - while chk >= 2 do - local delim = "%]"..rep("=", chk - 2).."%]" - if not match(y, delim) then okay = chk end - chk = chk - 1 - end - if okay then -- change delimiters - sep = rep("=", okay - 2) - delim1, delim2 = "["..sep.."[", "]"..sep.."]" - end - end - -------------------------------------------------------------------- - sinfos[I] = delim1..y..delim2 -end - ------------------------------------------------------------------------- --- long comment optimization --- * note: does not remove first optional newline --- * trim trailing whitespace --- * normalize embedded newlines --- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------- - -local function do_lcomment(I) - local info = sinfos[I] - local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters - local sep = #delim1 - local delim2 = sub(info, -sep, -1) - local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims - local y = "" - local i = 1 - -------------------------------------------------------------------- - while true do - local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) - -- deal with a single line, extract and check trailing whitespace - local ln - if not p then - ln = sub(z, i) - elseif p >= i then - ln = sub(z, i, p - 1) - end - if ln ~= "" then - -- trim trailing whitespace if non-empty line - local ws = match(ln, "%s*$") - if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end - y = y..ln - end - if not p then -- done if no more EOLs - break - end - -- deal with line endings, normalize them - i = p + 1 - if p then - if #s > 0 and r ~= s then -- skip CRLF or LFCR - i = i + 1 - end - y = y.."\n" - end - end--while - -------------------------------------------------------------------- - -- handle possible deletion of one or more '=' separators - sep = sep - 2 - if sep >= 3 then - local chk, okay = sep - 1 - -- loop to test ending delimiter with less of '=' down to zero - while chk >= 2 do - local delim = "%]"..rep("=", chk - 2).."%]" - if not match(y, delim) then okay = chk end - chk = chk - 1 - end - if okay then -- change delimiters - sep = rep("=", okay - 2) - delim1, delim2 = "--["..sep.."[", "]"..sep.."]" - end - end - -------------------------------------------------------------------- - sinfos[I] = delim1..y..delim2 -end - ------------------------------------------------------------------------- --- short comment optimization --- * trim trailing whitespace ------------------------------------------------------------------------- - -local function do_comment(i) - local info = sinfos[i] - local ws = match(info, "%s*$") -- just look from end of string - if #ws > 0 then - info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace - end - sinfos[i] = info -end - ------------------------------------------------------------------------- --- returns true if string found in long comment --- * this is a feature to keep copyright or license texts ------------------------------------------------------------------------- - -local function keep_lcomment(opt_keep, info) - if not opt_keep then return false end -- option not set - local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters - local sep = #delim1 - local delim2 = sub(info, -sep, -1) - local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims - if find(z, opt_keep, 1, true) then -- try to match - return true - end -end - ------------------------------------------------------------------------- --- main entry point --- * currently, lexer processing has 2 passes --- * processing is done on a line-oriented basis, which is easier to --- grok due to the next point... --- * since there are various options that can be enabled or disabled, --- processing is a little messy or convoluted ------------------------------------------------------------------------- - -function optimize(option, toklist, semlist, toklnlist) - -------------------------------------------------------------------- - -- set option flags - -------------------------------------------------------------------- - local opt_comments = option["opt-comments"] - local opt_whitespace = option["opt-whitespace"] - local opt_emptylines = option["opt-emptylines"] - local opt_eols = option["opt-eols"] - local opt_strings = option["opt-strings"] - local opt_numbers = option["opt-numbers"] - local opt_keep = option.KEEP - opt_details = option.DETAILS and 0 -- upvalues for details display - print = print or base.print - if opt_eols then -- forced settings, otherwise won't work properly - opt_comments = true - opt_whitespace = true - opt_emptylines = true - end - -------------------------------------------------------------------- - -- variable initialization - -------------------------------------------------------------------- - stoks, sinfos, stoklns -- set source lists - = toklist, semlist, toklnlist - local i = 1 -- token position - local tok, info -- current token - local prev -- position of last grammar token - -- on same line (for TK_SPACE stuff) - -------------------------------------------------------------------- - -- changes a token, info pair - -------------------------------------------------------------------- - local function settoken(tok, info, I) - I = I or i - stoks[I] = tok or "" - sinfos[I] = info or "" - end - -------------------------------------------------------------------- - -- processing loop (PASS 1) - -------------------------------------------------------------------- - while true do - tok, info = stoks[i], sinfos[i] - ---------------------------------------------------------------- - local atstart = atlinestart(i) -- set line begin flag - if atstart then prev = nil end - ---------------------------------------------------------------- - if tok == "TK_EOS" then -- end of stream/pass - break - ---------------------------------------------------------------- - elseif tok == "TK_KEYWORD" or -- keywords, identifiers, - tok == "TK_NAME" or -- operators - tok == "TK_OP" then - -- TK_KEYWORD and TK_OP can't be optimized without a big - -- optimization framework; it would be more of an optimizing - -- compiler, not a source code compressor - -- TK_NAME that are locals needs parser to analyze/optimize - prev = i - ---------------------------------------------------------------- - elseif tok == "TK_NUMBER" then -- numbers - if opt_numbers then - do_number(i) -- optimize - end - prev = i - ---------------------------------------------------------------- - elseif tok == "TK_STRING" or -- strings, long strings - tok == "TK_LSTRING" then - if opt_strings then - if tok == "TK_STRING" then - do_string(i) -- optimize - else - do_lstring(i) -- optimize - end - end - prev = i - ---------------------------------------------------------------- - elseif tok == "TK_COMMENT" then -- short comments - if opt_comments then - if i == 1 and sub(info, 1, 1) == "#" then - -- keep shbang comment, trim whitespace - do_comment(i) - else - -- safe to delete, as a TK_EOL (or TK_EOS) always follows - settoken() -- remove entirely - end - elseif opt_whitespace then -- trim whitespace only - do_comment(i) - end - ---------------------------------------------------------------- - elseif tok == "TK_LCOMMENT" then -- long comments - if keep_lcomment(opt_keep, info) then - ------------------------------------------------------------ - -- if --keep, we keep a long comment if <msg> is found; - -- this is a feature to keep copyright or license texts - if opt_whitespace then -- trim whitespace only - do_lcomment(i) - end - prev = i - elseif opt_comments then - local eols = commenteols(info) - ------------------------------------------------------------ - -- prepare opt_emptylines case first, if a disposable token - -- follows, current one is safe to dump, else keep a space; - -- it is implied that the operation is safe for '-', because - -- current is a TK_LCOMMENT, and must be separate from a '-' - if is_faketoken[stoks[i + 1]] then - settoken() -- remove entirely - tok = "" - else - settoken("TK_SPACE", " ") - end - ------------------------------------------------------------ - -- if there are embedded EOLs to keep and opt_emptylines is - -- disabled, then switch the token into one or more EOLs - if not opt_emptylines and eols > 0 then - settoken("TK_EOL", rep("\n", eols)) - end - ------------------------------------------------------------ - -- if optimizing whitespaces, force reinterpretation of the - -- token to give a chance for the space to be optimized away - if opt_whitespace and tok ~= "" then - i = i - 1 -- to reinterpret - end - ------------------------------------------------------------ - else -- disabled case - if opt_whitespace then -- trim whitespace only - do_lcomment(i) - end - prev = i - end - ---------------------------------------------------------------- - elseif tok == "TK_EOL" then -- line endings - if atstart and opt_emptylines then - settoken() -- remove entirely - elseif info == "\r\n" or info == "\n\r" then - -- normalize the rest of the EOLs for CRLF/LFCR only - -- (note that TK_LCOMMENT can change into several EOLs) - settoken("TK_EOL", "\n") - end - ---------------------------------------------------------------- - elseif tok == "TK_SPACE" then -- whitespace - if opt_whitespace then - if atstart or atlineend(i) then - -- delete leading and trailing whitespace - settoken() -- remove entirely - else - ------------------------------------------------------------ - -- at this point, since leading whitespace have been removed, - -- there should be a either a real token or a TK_LCOMMENT - -- prior to hitting this whitespace; the TK_LCOMMENT case - -- only happens if opt_comments is disabled; so prev ~= nil - local ptok = stoks[prev] - if ptok == "TK_LCOMMENT" then - -- previous TK_LCOMMENT can abut with anything - settoken() -- remove entirely - else - -- prev must be a grammar token; consecutive TK_SPACE - -- tokens is impossible when optimizing whitespace - local ntok = stoks[i + 1] - if is_faketoken[ntok] then - -- handle special case where a '-' cannot abut with - -- either a short comment or a long comment - if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and - ptok == "TK_OP" and sinfos[prev] == "-" then - -- keep token - else - settoken() -- remove entirely - end - else--is_realtoken - -- check a pair of grammar tokens, if can abut, then - -- delete space token entirely, otherwise keep one space - local s = checkpair(prev, i + 1) - if s == "" then - settoken() -- remove entirely - else - settoken("TK_SPACE", " ") - end - end - end - ------------------------------------------------------------ - end - end - ---------------------------------------------------------------- - else - error("unidentified token encountered") - end - ---------------------------------------------------------------- - i = i + 1 - end--while - repack_tokens() - -------------------------------------------------------------------- - -- processing loop (PASS 2) - -------------------------------------------------------------------- - if opt_eols then - i = 1 - -- aggressive EOL removal only works with most non-grammar tokens - -- optimized away because it is a rather simple scheme -- basically - -- it just checks 'real' token pairs around EOLs - if stoks[1] == "TK_COMMENT" then - -- first comment still existing must be shbang, skip whole line - i = 3 - end - while true do - tok, info = stoks[i], sinfos[i] - -------------------------------------------------------------- - if tok == "TK_EOS" then -- end of stream/pass - break - -------------------------------------------------------------- - elseif tok == "TK_EOL" then -- consider each TK_EOL - local t1, t2 = stoks[i - 1], stoks[i + 1] - if is_realtoken[t1] and is_realtoken[t2] then -- sanity check - local s = checkpair(i - 1, i + 1) - if s == "" then - settoken() -- remove entirely - end - end - end--if tok - -------------------------------------------------------------- - i = i + 1 - end--while - repack_tokens() - end - -------------------------------------------------------------------- - if opt_details and opt_details > 0 then print() end -- spacing - return stoks, sinfos, stoklns -end |