diff options
Diffstat (limited to 'contrib/luasrcdiet/lua/optparser.lua')
-rw-r--r-- | contrib/luasrcdiet/lua/optparser.lua | 564 |
1 files changed, 0 insertions, 564 deletions
diff --git a/contrib/luasrcdiet/lua/optparser.lua b/contrib/luasrcdiet/lua/optparser.lua deleted file mode 100644 index cfe6cc1013..0000000000 --- a/contrib/luasrcdiet/lua/optparser.lua +++ /dev/null @@ -1,564 +0,0 @@ ---[[-------------------------------------------------------------------- - - optparser.lua: does parser-based optimizations - This file is part of LuaSrcDiet. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * For more parser-based optimization ideas, see the TODO items or --- look at technotes.txt. --- * The processing load is quite significant, but since this is an --- off-line text processor, I believe we can wait a few seconds. --- * TODO: might process "local a,a,a" wrongly... need tests! --- * TODO: remove position handling if overlapped locals (rem < 0) --- needs more study, to check behaviour --- * TODO: there are probably better ways to do allocation, e.g. by --- choosing better methods to sort and pick locals... --- * TODO: we don't need 53*63 two-letter identifiers; we can make --- do with significantly less depending on how many that are really --- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead -----------------------------------------------------------------------]] - -local base = _G -local string = require "string" -local table = require "table" -module "optparser" - ----------------------------------------------------------------------- --- Letter frequencies for reducing symbol entropy (fixed version) --- * Might help a wee bit when the output file is compressed --- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies --- * We use letter frequencies according to a Linotype keyboard, plus --- the underscore, and both lower case and upper case letters. --- * The arrangement below (LC, underscore, %d, UC) is arbitrary. --- * This is certainly not optimal, but is quick-and-dirty and the --- process has no significant overhead ----------------------------------------------------------------------- - -local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" -local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" - --- names or identifiers that must be skipped --- * the first two lines are for keywords -local SKIP_NAME = {} -for v in string.gmatch([[ -and break do else elseif end false for function if in -local nil not or repeat return then true until while -self]], "%S+") do - SKIP_NAME[v] = true -end - ------------------------------------------------------------------------- --- variables and data structures ------------------------------------------------------------------------- - -local toklist, seminfolist, -- token lists - globalinfo, localinfo, -- variable information tables - globaluniq, localuniq, -- unique name tables - var_new, -- index of new variable names - varlist -- list of output variables - ----------------------------------------------------------------------- --- preprocess information table to get lists of unique names ----------------------------------------------------------------------- - -local function preprocess(infotable) - local uniqtable = {} - for i = 1, #infotable do -- enumerate info table - local obj = infotable[i] - local name = obj.name - -------------------------------------------------------------------- - if not uniqtable[name] then -- not found, start an entry - uniqtable[name] = { - decl = 0, token = 0, size = 0, - } - end - -------------------------------------------------------------------- - local uniq = uniqtable[name] -- count declarations, tokens, size - uniq.decl = uniq.decl + 1 - local xref = obj.xref - local xcount = #xref - uniq.token = uniq.token + xcount - uniq.size = uniq.size + xcount * #name - -------------------------------------------------------------------- - if obj.decl then -- if local table, create first,last pairs - obj.id = i - obj.xcount = xcount - if xcount > 1 then -- if ==1, means local never accessed - obj.first = xref[2] - obj.last = xref[xcount] - end - -------------------------------------------------------------------- - else -- if global table, add a back ref - uniq.id = i - end - -------------------------------------------------------------------- - end--for - return uniqtable -end - ----------------------------------------------------------------------- --- calculate actual symbol frequencies, in order to reduce entropy --- * this may help further reduce the size of compressed sources --- * note that since parsing optimizations is put before lexing --- optimizations, the frequency table is not exact! --- * yes, this will miss --keep block comments too... ----------------------------------------------------------------------- - -local function recalc_for_entropy(option) - local byte = string.byte - local char = string.char - -- table of token classes to accept in calculating symbol frequency - local ACCEPT = { - TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, - TK_STRING = true, TK_LSTRING = true, - } - if not option["opt-comments"] then - ACCEPT.TK_COMMENT = true - ACCEPT.TK_LCOMMENT = true - end - -------------------------------------------------------------------- - -- create a new table and remove any original locals by filtering - -------------------------------------------------------------------- - local filtered = {} - for i = 1, #toklist do - filtered[i] = seminfolist[i] - end - for i = 1, #localinfo do -- enumerate local info table - local obj = localinfo[i] - local xref = obj.xref - for j = 1, obj.xcount do - local p = xref[j] - filtered[p] = "" -- remove locals - end - end - -------------------------------------------------------------------- - local freq = {} -- reset symbol frequency table - for i = 0, 255 do freq[i] = 0 end - for i = 1, #toklist do -- gather symbol frequency - local tok, info = toklist[i], filtered[i] - if ACCEPT[tok] then - for j = 1, #info do - local c = byte(info, j) - freq[c] = freq[c] + 1 - end - end--if - end--for - -------------------------------------------------------------------- - -- function to re-sort symbols according to actual frequencies - -------------------------------------------------------------------- - local function resort(symbols) - local symlist = {} - for i = 1, #symbols do -- prepare table to sort - local c = byte(symbols, i) - symlist[i] = { c = c, freq = freq[c], } - end - table.sort(symlist, -- sort selected symbols - function(v1, v2) - return v1.freq > v2.freq - end - ) - local charlist = {} -- reconstitute the string - for i = 1, #symlist do - charlist[i] = char(symlist[i].c) - end - return table.concat(charlist) - end - -------------------------------------------------------------------- - LETTERS = resort(LETTERS) -- change letter arrangement - ALPHANUM = resort(ALPHANUM) -end - ----------------------------------------------------------------------- --- returns a string containing a new local variable name to use, and --- a flag indicating whether it collides with a global variable --- * trapping keywords and other names like 'self' is done elsewhere ----------------------------------------------------------------------- - -local function new_var_name() - local var - local cletters, calphanum = #LETTERS, #ALPHANUM - local v = var_new - if v < cletters then -- single char - v = v + 1 - var = string.sub(LETTERS, v, v) - else -- longer names - local range, sz = cletters, 1 -- calculate # chars fit - repeat - v = v - range - range = range * calphanum - sz = sz + 1 - until range > v - local n = v % cletters -- left side cycles faster - v = (v - n) / cletters -- do first char first - n = n + 1 - var = string.sub(LETTERS, n, n) - while sz > 1 do - local m = v % calphanum - v = (v - m) / calphanum - m = m + 1 - var = var..string.sub(ALPHANUM, m, m) - sz = sz - 1 - end - end - var_new = var_new + 1 - return var, globaluniq[var] ~= nil -end - ----------------------------------------------------------------------- --- calculate and print some statistics --- * probably better in main source, put here for now ----------------------------------------------------------------------- - -local function stats_summary(globaluniq, localuniq, afteruniq, option) - local print = print or base.print - local fmt = string.format - local opt_details = option.DETAILS - local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed - decl_g, decl_li, decl_lo, decl_ti, decl_to, - token_g, token_li, token_lo, token_ti, token_to, - size_g, size_li, size_lo, size_ti, size_to - = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - local function avg(c, l) -- safe average function - if c == 0 then return 0 end - return l / c - end - -------------------------------------------------------------------- - -- collect statistics (note: globals do not have declarations!) - -------------------------------------------------------------------- - for name, uniq in base.pairs(globaluniq) do - uniq_g = uniq_g + 1 - token_g = token_g + uniq.token - size_g = size_g + uniq.size - end - for name, uniq in base.pairs(localuniq) do - uniq_li = uniq_li + 1 - decl_li = decl_li + uniq.decl - token_li = token_li + uniq.token - size_li = size_li + uniq.size - end - for name, uniq in base.pairs(afteruniq) do - uniq_lo = uniq_lo + 1 - decl_lo = decl_lo + uniq.decl - token_lo = token_lo + uniq.token - size_lo = size_lo + uniq.size - end - uniq_ti = uniq_g + uniq_li - decl_ti = decl_g + decl_li - token_ti = token_g + token_li - size_ti = size_g + size_li - uniq_to = uniq_g + uniq_lo - decl_to = decl_g + decl_lo - token_to = token_g + token_lo - size_to = size_g + size_lo - -------------------------------------------------------------------- - -- detailed stats: global list - -------------------------------------------------------------------- - if opt_details then - local sorted = {} -- sort table of unique global names by size - for name, uniq in base.pairs(globaluniq) do - uniq.name = name - sorted[#sorted + 1] = uniq - end - table.sort(sorted, - function(v1, v2) - return v1.size > v2.size - end - ) - local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s" - local hl = string.rep("-", 44) - print("*** global variable list (sorted by size) ***\n"..hl) - print(fmt(tabf1, "Token", "Input", "Input", "Global")) - print(fmt(tabf1, "Count", "Bytes", "Average", "Name")) - print(hl) - for i = 1, #sorted do - local uniq = sorted[i] - print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name)) - end - print(hl) - print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL")) - print(hl.."\n") - -------------------------------------------------------------------- - -- detailed stats: local list - -------------------------------------------------------------------- - local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s" - local hl = string.rep("-", 70) - print("*** local variable list (sorted by allocation order) ***\n"..hl) - print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global")) - print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name")) - print(hl) - for i = 1, #varlist do -- iterate according to order assigned - local name = varlist[i] - local uniq = afteruniq[name] - local old_t, old_s = 0, 0 - for j = 1, #localinfo do -- find corresponding old names and calculate - local obj = localinfo[j] - if obj.name == name then - old_t = old_t + obj.xcount - old_s = old_s + obj.xcount * #obj.oldname - end - end - print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s), - uniq.size, avg(uniq.token, uniq.size), name)) - end - print(hl) - print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li), - size_lo, avg(token_lo, size_lo), "TOTAL")) - print(hl.."\n") - end--if opt_details - -------------------------------------------------------------------- - -- display output - -------------------------------------------------------------------- - local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f" - local hl = string.rep("-", 58) - print("*** local variable optimization summary ***\n"..hl) - print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average")) - print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes")) - print(hl) - print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g))) - print(hl) - print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li))) - print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti))) - print(hl) - print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo))) - print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to))) - print(hl.."\n") -end - ----------------------------------------------------------------------- --- main entry point --- * does only local variable optimization for now ----------------------------------------------------------------------- - -function optimize(option, _toklist, _seminfolist, _globalinfo, _localinfo) - -- set tables - toklist, seminfolist, globalinfo, localinfo - = _toklist, _seminfolist, _globalinfo, _localinfo - var_new = 0 -- reset variable name allocator - varlist = {} - ------------------------------------------------------------------ - -- preprocess global/local tables, handle entropy reduction - ------------------------------------------------------------------ - globaluniq = preprocess(globalinfo) - localuniq = preprocess(localinfo) - if option["opt-entropy"] then -- for entropy improvement - recalc_for_entropy(option) - end - ------------------------------------------------------------------ - -- build initial declared object table, then sort according to - -- token count, this might help assign more tokens to more common - -- variable names such as 'e' thus possibly reducing entropy - -- * an object knows its localinfo index via its 'id' field - -- * special handling for "self" special local (parameter) here - ------------------------------------------------------------------ - local object = {} - for i = 1, #localinfo do - object[i] = localinfo[i] - end - table.sort(object, -- sort largest first - function(v1, v2) - return v1.xcount > v2.xcount - end - ) - ------------------------------------------------------------------ - -- the special "self" function parameters must be preserved - -- * the allocator below will never use "self", so it is safe to - -- keep those implicit declarations as-is - ------------------------------------------------------------------ - local temp, j, gotself = {}, 1, false - for i = 1, #object do - local obj = object[i] - if not obj.isself then - temp[j] = obj - j = j + 1 - else - gotself = true - end - end - object = temp - ------------------------------------------------------------------ - -- a simple first-come first-served heuristic name allocator, - -- note that this is in no way optimal... - -- * each object is a local variable declaration plus existence - -- * the aim is to assign short names to as many tokens as possible, - -- so the following tries to maximize name reuse - -- * note that we preserve sort order - ------------------------------------------------------------------ - local nobject = #object - while nobject > 0 do - local varname, gcollide - repeat - varname, gcollide = new_var_name() -- collect a variable name - until not SKIP_NAME[varname] -- skip all special names - varlist[#varlist + 1] = varname -- keep a list - local oleft = nobject - ------------------------------------------------------------------ - -- if variable name collides with an existing global, the name - -- cannot be used by a local when the name is accessed as a global - -- during which the local is alive (between 'act' to 'rem'), so - -- we drop objects that collides with the corresponding global - ------------------------------------------------------------------ - if gcollide then - -- find the xref table of the global - local gref = globalinfo[globaluniq[varname].id].xref - local ngref = #gref - -- enumerate for all current objects; all are valid at this point - for i = 1, nobject do - local obj = object[i] - local act, rem = obj.act, obj.rem -- 'live' range of local - -- if rem < 0, it is a -id to a local that had the same name - -- so follow rem to extend it; does this make sense? - while rem < 0 do - rem = localinfo[-rem].rem - end - local drop - for j = 1, ngref do - local p = gref[j] - if p >= act and p <= rem then drop = true end -- in range? - end - if drop then - obj.skip = true - oleft = oleft - 1 - end - end--for - end--if gcollide - ------------------------------------------------------------------ - -- now the first unassigned local (since it's sorted) will be the - -- one with the most tokens to rename, so we set this one and then - -- eliminate all others that collides, then any locals that left - -- can then reuse the same variable name; this is repeated until - -- all local declaration that can use this name is assigned - -- * the criteria for local-local reuse/collision is: - -- A is the local with a name already assigned - -- B is the unassigned local under consideration - -- => anytime A is accessed, it cannot be when B is 'live' - -- => to speed up things, we have first/last accesses noted - ------------------------------------------------------------------ - while oleft > 0 do - local i = 1 - while object[i].skip do -- scan for first object - i = i + 1 - end - ------------------------------------------------------------------ - -- first object is free for assignment of the variable name - -- [first,last] gives the access range for collision checking - ------------------------------------------------------------------ - oleft = oleft - 1 - local obja = object[i] - i = i + 1 - obja.newname = varname - obja.skip = true - obja.done = true - local first, last = obja.first, obja.last - local xref = obja.xref - ------------------------------------------------------------------ - -- then, scan all the rest and drop those colliding - -- if A was never accessed then it'll never collide with anything - -- otherwise trivial skip if: - -- * B was activated after A's last access (last < act) - -- * B was removed before A's first access (first > rem) - -- if not, see detailed skip below... - ------------------------------------------------------------------ - if first and oleft > 0 then -- must have at least 1 access - local scanleft = oleft - while scanleft > 0 do - while object[i].skip do -- next valid object - i = i + 1 - end - scanleft = scanleft - 1 - local objb = object[i] - i = i + 1 - local act, rem = objb.act, objb.rem -- live range of B - -- if rem < 0, extend range of rem thru' following local - while rem < 0 do - rem = localinfo[-rem].rem - end - -------------------------------------------------------- - if not(last < act or first > rem) then -- possible collision - -------------------------------------------------------- - -- B is activated later than A or at the same statement, - -- this means for no collision, A cannot be accessed when B - -- is alive, since B overrides A (or is a peer) - -------------------------------------------------------- - if act >= obja.act then - for j = 1, obja.xcount do -- ... then check every access - local p = xref[j] - if p >= act and p <= rem then -- A accessed when B live! - oleft = oleft - 1 - objb.skip = true - break - end - end--for - -------------------------------------------------------- - -- A is activated later than B, this means for no collision, - -- A's access is okay since it overrides B, but B's last - -- access need to be earlier than A's activation time - -------------------------------------------------------- - else - if objb.last and objb.last >= obja.act then - oleft = oleft - 1 - objb.skip = true - end - end - end - -------------------------------------------------------- - if oleft == 0 then break end - end - end--if first - ------------------------------------------------------------------ - end--while - ------------------------------------------------------------------ - -- after assigning all possible locals to one variable name, the - -- unassigned locals/objects have the skip field reset and the table - -- is compacted, to hopefully reduce iteration time - ------------------------------------------------------------------ - local temp, j = {}, 1 - for i = 1, nobject do - local obj = object[i] - if not obj.done then - obj.skip = false - temp[j] = obj - j = j + 1 - end - end - object = temp -- new compacted object table - nobject = #object -- objects left to process - ------------------------------------------------------------------ - end--while - ------------------------------------------------------------------ - -- after assigning all locals with new variable names, we can - -- patch in the new names, and reprocess to get 'after' stats - ------------------------------------------------------------------ - for i = 1, #localinfo do -- enumerate all locals - local obj = localinfo[i] - local xref = obj.xref - if obj.newname then -- if got new name, patch it in - for j = 1, obj.xcount do - local p = xref[j] -- xrefs indexes the token list - seminfolist[p] = obj.newname - end - obj.name, obj.oldname -- adjust names - = obj.newname, obj.name - else - obj.oldname = obj.name -- for cases like 'self' - end - end - ------------------------------------------------------------------ - -- deal with statistics output - ------------------------------------------------------------------ - if gotself then -- add 'self' to end of list - varlist[#varlist + 1] = "self" - end - local afteruniq = preprocess(localinfo) - stats_summary(globaluniq, localuniq, afteruniq, option) - ------------------------------------------------------------------ -end |