diff options
Diffstat (limited to 'contrib/luasrcdiet')
-rw-r--r-- | contrib/luasrcdiet/Makefile | 2 | ||||
-rw-r--r-- | contrib/luasrcdiet/lua/COPYRIGHT | 38 | ||||
-rwxr-xr-x | contrib/luasrcdiet/lua/LuaSrcDiet.lua | 670 | ||||
-rw-r--r-- | contrib/luasrcdiet/lua/llex.lua | 355 | ||||
-rw-r--r-- | contrib/luasrcdiet/lua/lparser.lua | 1295 | ||||
-rw-r--r-- | contrib/luasrcdiet/lua/optlex.lua | 832 | ||||
-rw-r--r-- | contrib/luasrcdiet/lua/optparser.lua | 564 |
7 files changed, 0 insertions, 3756 deletions
diff --git a/contrib/luasrcdiet/Makefile b/contrib/luasrcdiet/Makefile deleted file mode 100644 index 81a96f6a83..0000000000 --- a/contrib/luasrcdiet/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -include ../../build/config.mk -include ../../build/module.mk
\ No newline at end of file diff --git a/contrib/luasrcdiet/lua/COPYRIGHT b/contrib/luasrcdiet/lua/COPYRIGHT deleted file mode 100644 index b91ffa8d4b..0000000000 --- a/contrib/luasrcdiet/lua/COPYRIGHT +++ /dev/null @@ -1,38 +0,0 @@ -LuaSrcDiet License ------------------- - -LuaSrcDiet is licensed under the terms of the MIT license reproduced -below. This means that LuaSrcDiet is free software and can be used for -both academic and commercial purposes at absolutely no cost. - -Parts of LuaSrcDiet is based on Lua 5 code. See COPYRIGHT_Lua51 -(Lua 5.1.3) for Lua 5 license information. - -For details and rationale, see http://www.lua.org/license.html . - -=============================================================================== - -Copyright (C) 2005-2008 Kein-Hong Man <khman@users.sf.net> -Lua 5.1.3 Copyright (C) 1994-2008 Lua.org, PUC-Rio. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -=============================================================================== - -(end of COPYRIGHT) diff --git a/contrib/luasrcdiet/lua/LuaSrcDiet.lua b/contrib/luasrcdiet/lua/LuaSrcDiet.lua deleted file mode 100755 index d9a41f24bc..0000000000 --- a/contrib/luasrcdiet/lua/LuaSrcDiet.lua +++ /dev/null @@ -1,670 +0,0 @@ -#!/usr/bin/env lua ---[[-------------------------------------------------------------------- - - LuaSrcDiet - Compresses Lua source code by removing unnecessary characters. - For Lua 5.1.x source code. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * Remember to update version and date information below (MSG_TITLE) --- * TODO: to implement pcall() to properly handle lexer etc. errors --- * TODO: verify token stream or double-check binary chunk? --- * TODO: need some automatic testing for a semblance of sanity --- * TODO: the plugin module is highly experimental and unstable -----------------------------------------------------------------------]] - --- standard libraries, functions -local string = string -local math = math -local table = table -local require = require -local print = print -local sub = string.sub -local gmatch = string.gmatch - --- support modules -local llex = require "llex" -local lparser = require "lparser" -local optlex = require "optlex" -local optparser = require "optparser" -local plugin - ---[[-------------------------------------------------------------------- --- messages and textual data -----------------------------------------------------------------------]] - -local MSG_TITLE = [[ -LuaSrcDiet: Puts your Lua 5.1 source code on a diet -Version 0.11.2 (20080608) Copyright (c) 2005-2008 Kein-Hong Man -The COPYRIGHT file describes the conditions under which this -software may be distributed. -]] - -local MSG_USAGE = [[ -usage: LuaSrcDiet [options] [filenames] - -example: - >LuaSrcDiet myscript.lua -o myscript_.lua - -options: - -v, --version prints version information - -h, --help prints usage information - -o <file> specify file name to write output - -s <suffix> suffix for output files (default '_') - --keep <msg> keep block comment with <msg> inside - --plugin <module> run <module> in plugin/ directory - - stop handling arguments - - (optimization levels) - --none all optimizations off (normalizes EOLs only) - --basic lexer-based optimizations only - --maximum maximize reduction of source - - (informational) - --quiet process files quietly - --read-only read file and print token stats only - --dump-lexer dump raw tokens from lexer to stdout - --dump-parser dump variable tracking tables from parser - --details extra info (strings, numbers, locals) - -features (to disable, insert 'no' prefix like --noopt-comments): -%s -default settings: -%s]] - ------------------------------------------------------------------------- --- optimization options, for ease of switching on and off --- * positive to enable optimization, negative (no) to disable --- * these options should follow --opt-* and --noopt-* style for now ------------------------------------------------------------------------- - -local OPTION = [[ ---opt-comments,'remove comments and block comments' ---opt-whitespace,'remove whitespace excluding EOLs' ---opt-emptylines,'remove empty lines' ---opt-eols,'all above, plus remove unnecessary EOLs' ---opt-strings,'optimize strings and long strings' ---opt-numbers,'optimize numbers' ---opt-locals,'optimize local variable names' ---opt-entropy,'tries to reduce symbol entropy of locals' -]] - --- preset configuration -local DEFAULT_CONFIG = [[ - --opt-comments --opt-whitespace --opt-emptylines - --opt-numbers --opt-locals -]] --- override configurations: MUST explicitly enable/disable everything -local BASIC_CONFIG = [[ - --opt-comments --opt-whitespace --opt-emptylines - --noopt-eols --noopt-strings --noopt-numbers - --noopt-locals -]] -local MAXIMUM_CONFIG = [[ - --opt-comments --opt-whitespace --opt-emptylines - --opt-eols --opt-strings --opt-numbers - --opt-locals --opt-entropy -]] -local NONE_CONFIG = [[ - --noopt-comments --noopt-whitespace --noopt-emptylines - --noopt-eols --noopt-strings --noopt-numbers - --noopt-locals -]] - -local DEFAULT_SUFFIX = "_" -- default suffix for file renaming -local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins - ---[[-------------------------------------------------------------------- --- startup and initialize option list handling -----------------------------------------------------------------------]] - --- simple error message handler; change to error if traceback wanted -local function die(msg) - print("LuaSrcDiet: "..msg); os.exit() -end ---die = error--DEBUG - -if not string.match(_VERSION, "5.1", 1, 1) then -- sanity check - die("requires Lua 5.1 to run") -end - ------------------------------------------------------------------------- --- prepares text for list of optimizations, prepare lookup table ------------------------------------------------------------------------- - -local MSG_OPTIONS = "" -do - local WIDTH = 24 - local o = {} - for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do - local msg = " "..op - msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n" - MSG_OPTIONS = MSG_OPTIONS..msg - o[op] = true - o["--no"..sub(op, 3)] = true - end - OPTION = o -- replace OPTION with lookup table -end - -MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG) - ------------------------------------------------------------------------- --- global variable initialization, option set handling ------------------------------------------------------------------------- - -local suffix = DEFAULT_SUFFIX -- file suffix -local option = {} -- program options -local stat_c, stat_l -- statistics tables - --- function to set option lookup table based on a text list of options --- note: additional forced settings for --opt-eols is done in optlex.lua -local function set_options(CONFIG) - for op in gmatch(CONFIG, "(%-%-%S+)") do - if sub(op, 3, 4) == "no" and -- handle negative options - OPTION["--"..sub(op, 5)] then - option[sub(op, 5)] = false - else - option[sub(op, 3)] = true - end - end -end - ---[[-------------------------------------------------------------------- --- support functions -----------------------------------------------------------------------]] - --- list of token types, parser-significant types are up to TTYPE_GRAMMAR --- while the rest are not used by parsers; arranged for stats display -local TTYPES = { - "TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar - "TK_STRING", "TK_LSTRING", "TK_OP", - "TK_EOS", - "TK_COMMENT", "TK_LCOMMENT", -- non-grammar - "TK_EOL", "TK_SPACE", -} -local TTYPE_GRAMMAR = 7 - -local EOLTYPES = { -- EOL names for token dump - ["\n"] = "LF", ["\r"] = "CR", - ["\n\r"] = "LFCR", ["\r\n"] = "CRLF", -} - ------------------------------------------------------------------------- --- read source code from file ------------------------------------------------------------------------- - -local function load_file(fname) - local INF = io.open(fname, "rb") - if not INF then die("cannot open \""..fname.."\" for reading") end - local dat = INF:read("*a") - if not dat then die("cannot read from \""..fname.."\"") end - INF:close() - return dat -end - ------------------------------------------------------------------------- --- save source code to file ------------------------------------------------------------------------- - -local function save_file(fname, dat) - local OUTF = io.open(fname, "wb") - if not OUTF then die("cannot open \""..fname.."\" for writing") end - local status = OUTF:write(dat) - if not status then die("cannot write to \""..fname.."\"") end - OUTF:close() -end - ------------------------------------------------------------------------- --- functions to deal with statistics ------------------------------------------------------------------------- - --- initialize statistics table -local function stat_init() - stat_c, stat_l = {}, {} - for i = 1, #TTYPES do - local ttype = TTYPES[i] - stat_c[ttype], stat_l[ttype] = 0, 0 - end -end - --- add a token to statistics table -local function stat_add(tok, seminfo) - stat_c[tok] = stat_c[tok] + 1 - stat_l[tok] = stat_l[tok] + #seminfo -end - --- do totals for statistics table, return average table -local function stat_calc() - local function avg(c, l) -- safe average function - if c == 0 then return 0 end - return l / c - end - local stat_a = {} - local c, l = 0, 0 - for i = 1, TTYPE_GRAMMAR do -- total grammar tokens - local ttype = TTYPES[i] - c = c + stat_c[ttype]; l = l + stat_l[ttype] - end - stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l - stat_a.TOTAL_TOK = avg(c, l) - c, l = 0, 0 - for i = 1, #TTYPES do -- total all tokens - local ttype = TTYPES[i] - c = c + stat_c[ttype]; l = l + stat_l[ttype] - stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype]) - end - stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l - stat_a.TOTAL_ALL = avg(c, l) - return stat_a -end - ---[[-------------------------------------------------------------------- --- main tasks -----------------------------------------------------------------------]] - ------------------------------------------------------------------------- --- a simple token dumper, minimal translation of seminfo data ------------------------------------------------------------------------- - -local function dump_tokens(srcfl) - -------------------------------------------------------------------- - -- load file and process source input into tokens - -------------------------------------------------------------------- - local z = load_file(srcfl) - llex.init(z) - llex.llex() - local toklist, seminfolist = llex.tok, llex.seminfo - -------------------------------------------------------------------- - -- display output - -------------------------------------------------------------------- - for i = 1, #toklist do - local tok, seminfo = toklist[i], seminfolist[i] - if tok == "TK_OP" and string.byte(seminfo) < 32 then - seminfo = "(".. string.byte(seminfo)..")" - elseif tok == "TK_EOL" then - seminfo = EOLTYPES[seminfo] - else - seminfo = "'"..seminfo.."'" - end - print(tok.." "..seminfo) - end--for -end - ----------------------------------------------------------------------- --- parser dump; dump globalinfo and localinfo tables ----------------------------------------------------------------------- - -local function dump_parser(srcfl) - local print = print - -------------------------------------------------------------------- - -- load file and process source input into tokens - -------------------------------------------------------------------- - local z = load_file(srcfl) - llex.init(z) - llex.llex() - local toklist, seminfolist, toklnlist - = llex.tok, llex.seminfo, llex.tokln - -------------------------------------------------------------------- - -- do parser optimization here - -------------------------------------------------------------------- - lparser.init(toklist, seminfolist, toklnlist) - local globalinfo, localinfo = lparser.parser() - -------------------------------------------------------------------- - -- display output - -------------------------------------------------------------------- - local hl = string.rep("-", 72) - print("*** Local/Global Variable Tracker Tables ***") - print(hl.."\n GLOBALS\n"..hl) - -- global tables have a list of xref numbers only - for i = 1, #globalinfo do - local obj = globalinfo[i] - local msg = "("..i..") '"..obj.name.."' -> " - local xref = obj.xref - for j = 1, #xref do msg = msg..xref[j].." " end - print(msg) - end - -- local tables have xref numbers and a few other special - -- numbers that are specially named: decl (declaration xref), - -- act (activation xref), rem (removal xref) - print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl) - for i = 1, #localinfo do - local obj = localinfo[i] - local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl.. - " act:"..obj.act.." rem:"..obj.rem - if obj.isself then - msg = msg.." isself" - end - msg = msg.." -> " - local xref = obj.xref - for j = 1, #xref do msg = msg..xref[j].." " end - print(msg) - end - print(hl.."\n") -end - ------------------------------------------------------------------------- --- reads source file(s) and reports some statistics ------------------------------------------------------------------------- - -local function read_only(srcfl) - local print = print - -------------------------------------------------------------------- - -- load file and process source input into tokens - -------------------------------------------------------------------- - local z = load_file(srcfl) - llex.init(z) - llex.llex() - local toklist, seminfolist = llex.tok, llex.seminfo - print(MSG_TITLE) - print("Statistics for: "..srcfl.."\n") - -------------------------------------------------------------------- - -- collect statistics - -------------------------------------------------------------------- - stat_init() - for i = 1, #toklist do - local tok, seminfo = toklist[i], seminfolist[i] - stat_add(tok, seminfo) - end--for - local stat_a = stat_calc() - -------------------------------------------------------------------- - -- display output - -------------------------------------------------------------------- - local fmt = string.format - local function figures(tt) - return stat_c[tt], stat_l[tt], stat_a[tt] - end - local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f" - local hl = string.rep("-", 42) - print(fmt(tabf1, "Lexical", "Input", "Input", "Input")) - print(fmt(tabf1, "Elements", "Count", "Bytes", "Average")) - print(hl) - for i = 1, #TTYPES do - local ttype = TTYPES[i] - print(fmt(tabf2, ttype, figures(ttype))) - if ttype == "TK_EOS" then print(hl) end - end - print(hl) - print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) - print(hl) - print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) - print(hl.."\n") -end - ------------------------------------------------------------------------- --- process source file(s), write output and reports some statistics ------------------------------------------------------------------------- - -local function process_file(srcfl, destfl) - local function print(...) -- handle quiet option - if option.QUIET then return end - _G.print(...) - end - if plugin and plugin.init then -- plugin init - option.EXIT = false - plugin.init(option, srcfl, destfl) - if option.EXIT then return end - end - print(MSG_TITLE) -- title message - -------------------------------------------------------------------- - -- load file and process source input into tokens - -------------------------------------------------------------------- - local z = load_file(srcfl) - if plugin and plugin.post_load then -- plugin post-load - z = plugin.post_load(z) or z - if option.EXIT then return end - end - llex.init(z) - llex.llex() - local toklist, seminfolist, toklnlist - = llex.tok, llex.seminfo, llex.tokln - if plugin and plugin.post_lex then -- plugin post-lex - plugin.post_lex(toklist, seminfolist, toklnlist) - if option.EXIT then return end - end - -------------------------------------------------------------------- - -- collect 'before' statistics - -------------------------------------------------------------------- - stat_init() - for i = 1, #toklist do - local tok, seminfo = toklist[i], seminfolist[i] - stat_add(tok, seminfo) - end--for - local stat1_a = stat_calc() - local stat1_c, stat1_l = stat_c, stat_l - -------------------------------------------------------------------- - -- do parser optimization here - -------------------------------------------------------------------- - if option["opt-locals"] then - optparser.print = print -- hack - lparser.init(toklist, seminfolist, toklnlist) - local globalinfo, localinfo = lparser.parser() - if plugin and plugin.post_parse then -- plugin post-parse - plugin.post_parse(globalinfo, localinfo) - if option.EXIT then return end - end - optparser.optimize(option, toklist, seminfolist, globalinfo, localinfo) - if plugin and plugin.post_optparse then -- plugin post-optparse - plugin.post_optparse() - if option.EXIT then return end - end - end - -------------------------------------------------------------------- - -- do lexer optimization here, save output file - -------------------------------------------------------------------- - optlex.print = print -- hack - toklist, seminfolist, toklnlist - = optlex.optimize(option, toklist, seminfolist, toklnlist) - if plugin and plugin.post_optlex then -- plugin post-optlex - plugin.post_optlex(toklist, seminfolist, toklnlist) - if option.EXIT then return end - end - local dat = table.concat(seminfolist) - -- depending on options selected, embedded EOLs in long strings and - -- long comments may not have been translated to \n, tack a warning - if string.find(dat, "\r\n", 1, 1) or - string.find(dat, "\n\r", 1, 1) then - optlex.warn.mixedeol = true - end - -- save optimized source stream to output file - save_file(destfl, dat) - -------------------------------------------------------------------- - -- collect 'after' statistics - -------------------------------------------------------------------- - stat_init() - for i = 1, #toklist do - local tok, seminfo = toklist[i], seminfolist[i] - stat_add(tok, seminfo) - end--for - local stat_a = stat_calc() - -------------------------------------------------------------------- - -- display output - -------------------------------------------------------------------- - print("Statistics for: "..srcfl.." -> "..destfl.."\n") - local fmt = string.format - local function figures(tt) - return stat1_c[tt], stat1_l[tt], stat1_a[tt], - stat_c[tt], stat_l[tt], stat_a[tt] - end - local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s", - "%-16s%8d%8d%10.2f%8d%8d%10.2f" - local hl = string.rep("-", 68) - print("*** lexer-based optimizations summary ***\n"..hl) - print(fmt(tabf1, "Lexical", - "Input", "Input", "Input", - "Output", "Output", "Output")) - print(fmt(tabf1, "Elements", - "Count", "Bytes", "Average", - "Count", "Bytes", "Average")) - print(hl) - for i = 1, #TTYPES do - local ttype = TTYPES[i] - print(fmt(tabf2, ttype, figures(ttype))) - if ttype == "TK_EOS" then print(hl) end - end - print(hl) - print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) - print(hl) - print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) - print(hl) - -------------------------------------------------------------------- - -- report warning flags from optimizing process - -------------------------------------------------------------------- - if optlex.warn.lstring then - print("* WARNING: "..optlex.warn.lstring) - elseif optlex.warn.mixedeol then - print("* WARNING: ".."output still contains some CRLF or LFCR line endings") - end - print() -end - ---[[-------------------------------------------------------------------- --- main functions -----------------------------------------------------------------------]] - -local arg = {...} -- program arguments -local fspec = {} -set_options(DEFAULT_CONFIG) -- set to default options at beginning - ------------------------------------------------------------------------- --- per-file handling, ship off to tasks ------------------------------------------------------------------------- - -local function do_files(fspec) - for _, srcfl in ipairs(fspec) do - local destfl - ------------------------------------------------------------------ - -- find and replace extension for filenames - ------------------------------------------------------------------ - local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") - local basename, extension = srcfl, "" - if extb and extb > 1 then - basename = sub(srcfl, 1, extb - 1) - extension = sub(srcfl, extb, exte) - end - destfl = basename..suffix..extension - if #fspec == 1 and option.OUTPUT_FILE then - destfl = option.OUTPUT_FILE - end - if srcfl == destfl then - die("output filename identical to input filename") - end - ------------------------------------------------------------------ - -- perform requested operations - ------------------------------------------------------------------ - if option.DUMP_LEXER then - dump_tokens(srcfl) - elseif option.DUMP_PARSER then - dump_parser(srcfl) - elseif option.READ_ONLY then - read_only(srcfl) - else - process_file(srcfl, destfl) - end - end--for -end - ------------------------------------------------------------------------- --- main function (entry point is after this definition) ------------------------------------------------------------------------- - -local function main() - local argn, i = #arg, 1 - if argn == 0 then - option.HELP = true - end - -------------------------------------------------------------------- - -- handle arguments - -------------------------------------------------------------------- - while i <= argn do - local o, p = arg[i], arg[i + 1] - local dash = string.match(o, "^%-%-?") - if dash == "-" then -- single-dash options - if o == "-h" then - option.HELP = true; break - elseif o == "-v" then - option.VERSION = true; break - elseif o == "-s" then - if not p then die("-s option needs suffix specification") end - suffix = p - i = i + 1 - elseif o == "-o" then - if not p then die("-o option needs a file name") end - option.OUTPUT_FILE = p - i = i + 1 - elseif o == "-" then - break -- ignore rest of args - else - die("unrecognized option "..o) - end - elseif dash == "--" then -- double-dash options - if o == "--help" then - option.HELP = true; break - elseif o == "--version" then - option.VERSION = true; break - elseif o == "--keep" then - if not p then die("--keep option needs a string to match for") end - option.KEEP = p - i = i + 1 - elseif o == "--plugin" then - if not p then die("--plugin option needs a module name") end - if option.PLUGIN then die("only one plugin can be specified") end - option.PLUGIN = p - plugin = require(PLUGIN_SUFFIX..p) - i = i + 1 - elseif o == "--quiet" then - option.QUIET = true - elseif o == "--read-only" then - option.READ_ONLY = true - elseif o == "--basic" then - set_options(BASIC_CONFIG) - elseif o == "--maximum" then - set_options(MAXIMUM_CONFIG) - elseif o == "--none" then - set_options(NONE_CONFIG) - elseif o == "--dump-lexer" then - option.DUMP_LEXER = true - elseif o == "--dump-parser" then - option.DUMP_PARSER = true - elseif o == "--details" then - option.DETAILS = true - elseif OPTION[o] then -- lookup optimization options - set_options(o) - else - die("unrecognized option "..o) - end - else - fspec[#fspec + 1] = o -- potential filename - end - i = i + 1 - end--while - if option.HELP then - print(MSG_TITLE..MSG_USAGE); return true - elseif option.VERSION then - print(MSG_TITLE); return true - end - if #fspec > 0 then - if #fspec > 1 and option.OUTPUT_FILE then - die("with -o, only one source file can be specified") - end - do_files(fspec) - return true - else - die("nothing to do!") - end -end - --- entry point -> main() -> do_files() -if not main() then - die("Please run with option -h or --help for usage information") -end - --- end of script diff --git a/contrib/luasrcdiet/lua/llex.lua b/contrib/luasrcdiet/lua/llex.lua deleted file mode 100644 index a637f3048e..0000000000 --- a/contrib/luasrcdiet/lua/llex.lua +++ /dev/null @@ -1,355 +0,0 @@ ---[[-------------------------------------------------------------------- - - llex.lua: Lua 5.1 lexical analyzer in Lua - This file is part of LuaSrcDiet, based on Yueliang material. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0, --- with significant modifications to handle LuaSrcDiet's needs: --- (1) llex.error is an optional error function handler --- (2) seminfo for strings include their delimiters and no --- translation operations are performed on them --- * ADDED shbang handling has been added to support executable scripts --- * NO localized decimal point replacement magic --- * NO limit to number of lines --- * NO support for compatible long strings (LUA_COMPAT_LSTR) --- * Please read technotes.txt for more technical details. -----------------------------------------------------------------------]] - -local base = _G -local string = require "string" -module "llex" - -local find = string.find -local match = string.match -local sub = string.sub - ----------------------------------------------------------------------- --- initialize keyword list, variables ----------------------------------------------------------------------- - -local kw = {} -for v in string.gmatch([[ -and break do else elseif end false for function if in -local nil not or repeat return then true until while]], "%S+") do - kw[v] = true -end - --- NOTE: see init() for module variables (externally visible): --- tok, seminfo, tokln - -local z, -- source stream - sourceid, -- name of source - I, -- position of lexer - buff, -- buffer for strings - ln -- line number - ----------------------------------------------------------------------- --- add information to token listing ----------------------------------------------------------------------- - -local function addtoken(token, info) - local i = #tok + 1 - tok[i] = token - seminfo[i] = info - tokln[i] = ln -end - ----------------------------------------------------------------------- --- handles line number incrementation and end-of-line characters ----------------------------------------------------------------------- - -local function inclinenumber(i, is_tok) - local sub = sub - local old = sub(z, i, i) - i = i + 1 -- skip '\n' or '\r' - local c = sub(z, i, i) - if (c == "\n" or c == "\r") and (c ~= old) then - i = i + 1 -- skip '\n\r' or '\r\n' - old = old..c - end - if is_tok then addtoken("TK_EOL", old) end - ln = ln + 1 - I = i - return i -end - ----------------------------------------------------------------------- --- initialize lexer for given source _z and source name _sourceid ----------------------------------------------------------------------- - -function init(_z, _sourceid) - z = _z -- source - sourceid = _sourceid -- name of source - I = 1 -- lexer's position in source - ln = 1 -- line number - tok = {} -- lexed token list* - seminfo = {} -- lexed semantic information list* - tokln = {} -- line numbers for messages* - -- (*) externally visible thru' module - -------------------------------------------------------------------- - -- initial processing (shbang handling) - -------------------------------------------------------------------- - local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)") - if p then -- skip first line - I = I + #q - addtoken("TK_COMMENT", q) - if #r > 0 then inclinenumber(I, true) end - end -end - ----------------------------------------------------------------------- --- returns a chunk name or id, no truncation for long names ----------------------------------------------------------------------- - -function chunkid() - if sourceid and match(sourceid, "^[=@]") then - return sub(sourceid, 2) -- remove first char - end - return "[string]" -end - ----------------------------------------------------------------------- --- formats error message and throws error --- * a simplified version, does not report what token was responsible ----------------------------------------------------------------------- - -function errorline(s, line) - local e = error or base.error - e(string.format("%s:%d: %s", chunkid(), line or ln, s)) -end -local errorline = errorline - ------------------------------------------------------------------------- --- count separators ("=") in a long string delimiter ------------------------------------------------------------------------- - -local function skip_sep(i) - local sub = sub - local s = sub(z, i, i) - i = i + 1 - local count = #match(z, "=*", i) -- note, take the length - i = i + count - I = i - return (sub(z, i, i) == s) and count or (-count) - 1 -end - ----------------------------------------------------------------------- --- reads a long string or long comment ----------------------------------------------------------------------- - -local function read_long_string(is_str, sep) - local i = I + 1 -- skip 2nd '[' - local sub = sub - local c = sub(z, i, i) - if c == "\r" or c == "\n" then -- string starts with a newline? - i = inclinenumber(i) -- skip it - end - local j = i - while true do - local p, q, r = find(z, "([\r\n%]])", i) -- (long range) - if not p then - errorline(is_str and "unfinished long string" or - "unfinished long comment") - end - i = p - if r == "]" then -- delimiter test - if skip_sep(i) == sep then - buff = sub(z, buff, I) - I = I + 1 -- skip 2nd ']' - return buff - end - i = I - else -- newline - buff = buff.."\n" - i = inclinenumber(i) - end - end--while -end - ----------------------------------------------------------------------- --- reads a string ----------------------------------------------------------------------- - -local function read_string(del) - local i = I - local find = find - local sub = sub - while true do - local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range) - if p then - if r == "\n" or r == "\r" then - errorline("unfinished string") - end - i = p - if r == "\\" then -- handle escapes - i = i + 1 - r = sub(z, i, i) - if r == "" then break end -- (EOZ error) - p = find("abfnrtv\n\r", r, 1, true) - ------------------------------------------------------ - if p then -- special escapes - if p > 7 then - i = inclinenumber(i) - else - i = i + 1 - end - ------------------------------------------------------ - elseif find(r, "%D") then -- other non-digits - i = i + 1 - ------------------------------------------------------ - else -- \xxx sequence - local p, q, s = find(z, "^(%d%d?%d?)", i) - i = q + 1 - if s + 1 > 256 then -- UCHAR_MAX - errorline("escape sequence too large") - end - ------------------------------------------------------ - end--if p - else - i = i + 1 - if r == del then -- ending delimiter - I = i - return sub(z, buff, i - 1) -- return string - end - end--if r - else - break -- (error) - end--if p - end--while - errorline("unfinished string") -end - ------------------------------------------------------------------------- --- main lexer function ------------------------------------------------------------------------- - -function llex() - local find = find - local match = match - while true do--outer - local i = I - -- inner loop allows break to be used to nicely section tests - while true do--inner - ---------------------------------------------------------------- - local p, _, r = find(z, "^([_%a][_%w]*)", i) - if p then - I = i + #r - if kw[r] then - addtoken("TK_KEYWORD", r) -- reserved word (keyword) - else - addtoken("TK_NAME", r) -- identifier - end - break -- (continue) - end - ---------------------------------------------------------------- - local p, _, r = find(z, "^(%.?)%d", i) - if p then -- numeral - if r == "." then i = i + 1 end - local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) - i = q + 1 - if #r == 1 then -- optional exponent - if match(z, "^[%+%-]", i) then -- optional sign - i = i + 1 - end - end - local _, q = find(z, "^[_%w]*", i) - I = q + 1 - local v = sub(z, p, q) -- string equivalent - if not base.tonumber(v) then -- handles hex test also - errorline("malformed number") - end - addtoken("TK_NUMBER", v) - break -- (continue) - end - ---------------------------------------------------------------- - local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i) - if p then - if t == "\n" or t == "\r" then -- newline - inclinenumber(i, true) - else - I = q + 1 -- whitespace - addtoken("TK_SPACE", r) - end - break -- (continue) - end - ---------------------------------------------------------------- - local r = match(z, "^%p", i) - if r then - buff = i - local p = find("-[\"\'.=<>~", r, 1, true) - if p then - -- two-level if block for punctuation/symbols - -------------------------------------------------------- - if p <= 2 then - if p == 1 then -- minus - local c = match(z, "^%-%-(%[?)", i) - if c then - i = i + 2 - local sep = -1 - if c == "[" then - sep = skip_sep(i) - end - if sep >= 0 then -- long comment - addtoken("TK_LCOMMENT", read_long_string(false, sep)) - else -- short comment - I = find(z, "[\n\r]", i) or (#z + 1) - addtoken("TK_COMMENT", sub(z, buff, I - 1)) - end - break -- (continue) - end - -- (fall through for "-") - else -- [ or long string - local sep = skip_sep(i) - if sep >= 0 then - addtoken("TK_LSTRING", read_long_string(true, sep)) - elseif sep == -1 then - addtoken("TK_OP", "[") - else - errorline("invalid long string delimiter") - end - break -- (continue) - end - -------------------------------------------------------- - elseif p <= 5 then - if p < 5 then -- strings - I = i + 1 - addtoken("TK_STRING", read_string(r)) - break -- (continue) - end - r = match(z, "^%.%.?%.?", i) -- .|..|... dots - -- (fall through) - -------------------------------------------------------- - else -- relational - r = match(z, "^%p=?", i) - -- (fall through) - end - end - I = i + #r - addtoken("TK_OP", r) -- for other symbols, fall through - break -- (continue) - end - ---------------------------------------------------------------- - local r = sub(z, i, i) - if r ~= "" then - I = i + 1 - addtoken("TK_OP", r) -- other single-char tokens - break - end - addtoken("TK_EOS", "") -- end of stream, - return -- exit here - ---------------------------------------------------------------- - end--while inner - end--while outer -end - -return base.getfenv() diff --git a/contrib/luasrcdiet/lua/lparser.lua b/contrib/luasrcdiet/lua/lparser.lua deleted file mode 100644 index a1e10e0161..0000000000 --- a/contrib/luasrcdiet/lua/lparser.lua +++ /dev/null @@ -1,1295 +0,0 @@ ---[[-------------------------------------------------------------------- - - lparser.lua: Lua 5.1 parser in Lua - This file is part of LuaSrcDiet, based on Yueliang material. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * This is a version of the native 5.1.x parser from Yueliang 0.4.0, --- with significant modifications to handle LuaSrcDiet's needs: --- (1) needs pre-built token tables instead of a module.method --- (2) lparser.error is an optional error handler (from llex) --- (3) not full parsing, currently fakes raw/unlexed constants --- (4) parser() returns globalinfo, localinfo tables --- * Please read technotes.txt for more technical details. --- * NO support for 'arg' vararg functions (LUA_COMPAT_VARARG) --- * A lot of the parser is unused, but might later be useful for --- full-on parsing and analysis for a few measly bytes saved. -----------------------------------------------------------------------]] - -local base = _G -local string = require "string" -module "lparser" -local _G = base.getfenv() - ---[[-------------------------------------------------------------------- --- variable and data structure initialization -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- initialization: main variables ----------------------------------------------------------------------- - -local toklist, -- grammar-only token tables (token table, - seminfolist, -- semantic information table, line number - toklnlist, -- table, cross-reference table) - xreflist, - tpos, -- token position - - line, -- start line # for error messages - lastln, -- last line # for ambiguous syntax chk - tok, seminfo, ln, xref, -- token, semantic info, line - nameref, -- proper position of <name> token - fs, -- current function state - top_fs, -- top-level function state - - globalinfo, -- global variable information table - globallookup, -- global variable name lookup table - localinfo, -- local variable information table - ilocalinfo, -- inactive locals (prior to activation) - ilocalrefs -- corresponding references to activate - --- forward references for local functions -local explist1, expr, block, exp1, body, chunk - ----------------------------------------------------------------------- --- initialization: data structures ----------------------------------------------------------------------- - -local gmatch = string.gmatch - -local block_follow = {} -- lookahead check in chunk(), returnstat() -for v in gmatch("else elseif end until <eof>", "%S+") do - block_follow[v] = true -end - -local stat_call = {} -- lookup for calls in stat() -for v in gmatch("if while do for repeat function local return break", "%S+") do - stat_call[v] = v.."_stat" -end - -local binopr_left = {} -- binary operators, left priority -local binopr_right = {} -- binary operators, right priority -for op, lt, rt in gmatch([[ -{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7} -{^ 10 9}{.. 5 4} -{~= 3 3}{== 3 3} -{< 3 3}{<= 3 3}{> 3 3}{>= 3 3} -{and 2 2}{or 1 1} -]], "{(%S+)%s(%d+)%s(%d+)}") do - binopr_left[op] = lt + 0 - binopr_right[op] = rt + 0 -end - -local unopr = { ["not"] = true, ["-"] = true, - ["#"] = true, } -- unary operators -local UNARY_PRIORITY = 8 -- priority for unary operators - ---[[-------------------------------------------------------------------- --- support functions -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- formats error message and throws error (duplicated from llex) --- * a simplified version, does not report what token was responsible ----------------------------------------------------------------------- - -local function errorline(s, line) - local e = error or base.error - e(string.format("(source):%d: %s", line or ln, s)) -end - ----------------------------------------------------------------------- --- handles incoming token, semantic information pairs --- * NOTE: 'nextt' is named 'next' originally ----------------------------------------------------------------------- - --- reads in next token -local function nextt() - lastln = toklnlist[tpos] - tok, seminfo, ln, xref - = toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos] - tpos = tpos + 1 -end - --- peek at next token (single lookahead for table constructor) -local function lookahead() - return toklist[tpos] -end - ----------------------------------------------------------------------- --- throws a syntax error, or if token expected is not there ----------------------------------------------------------------------- - -local function syntaxerror(msg) - local tok = tok - if tok ~= "<number>" and tok ~= "<string>" then - if tok == "<name>" then tok = seminfo end - tok = "'"..tok.."'" - end - errorline(msg.." near "..tok) -end - -local function error_expected(token) - syntaxerror("'"..token.."' expected") -end - ----------------------------------------------------------------------- --- tests for a token, returns outcome --- * return value changed to boolean ----------------------------------------------------------------------- - -local function testnext(c) - if tok == c then nextt(); return true end -end - ----------------------------------------------------------------------- --- check for existence of a token, throws error if not found ----------------------------------------------------------------------- - -local function check(c) - if tok ~= c then error_expected(c) end -end - ----------------------------------------------------------------------- --- verify existence of a token, then skip it ----------------------------------------------------------------------- - -local function checknext(c) - check(c); nextt() -end - ----------------------------------------------------------------------- --- throws error if condition not matched ----------------------------------------------------------------------- - -local function check_condition(c, msg) - if not c then syntaxerror(msg) end -end - ----------------------------------------------------------------------- --- verifies token conditions are met or else throw error ----------------------------------------------------------------------- - -local function check_match(what, who, where) - if not testnext(what) then - if where == ln then - error_expected(what) - else - syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")") - end - end -end - ----------------------------------------------------------------------- --- expect that token is a name, return the name ----------------------------------------------------------------------- - -local function str_checkname() - check("<name>") - local ts = seminfo - nameref = xref - nextt() - return ts -end - ----------------------------------------------------------------------- --- adds given string s in string pool, sets e as VK ----------------------------------------------------------------------- - -local function codestring(e, s) - e.k = "VK" -end - ----------------------------------------------------------------------- --- consume a name token, adds it to string pool ----------------------------------------------------------------------- - -local function checkname(e) - codestring(e, str_checkname()) -end - ---[[-------------------------------------------------------------------- --- variable (global|local|upvalue) handling --- * to track locals and globals, we can extend Yueliang's minimal --- variable management code with little trouble --- * entry point is singlevar() for variable lookups --- * lookup tables (bl.locallist) are maintained awkwardly in the basic --- block data structures, PLUS the function data structure (this is --- an inelegant hack, since bl is nil for the top level of a function) -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- register a local variable, create local variable object, set in --- to-activate variable list --- * used in new_localvarliteral(), parlist(), fornum(), forlist(), --- localfunc(), localstat() ----------------------------------------------------------------------- - -local function new_localvar(name, special) - local bl = fs.bl - local locallist - -- locate locallist in current block object or function root object - if bl then - locallist = bl.locallist - else - locallist = fs.locallist - end - -- build local variable information object and set localinfo - local id = #localinfo + 1 - localinfo[id] = { -- new local variable object - name = name, -- local variable name - xref = { nameref }, -- xref, first value is declaration - decl = nameref, -- location of declaration, = xref[1] - } - if special then -- "self" must be not be changed - localinfo[id].isself = true - end - -- this can override a local with the same name in the same scope - -- but first, keep it inactive until it gets activated - local i = #ilocalinfo + 1 - ilocalinfo[i] = id - ilocalrefs[i] = locallist -end - ----------------------------------------------------------------------- --- actually activate the variables so that they are visible --- * remember Lua semantics, e.g. RHS is evaluated first, then LHS --- * used in parlist(), forbody(), localfunc(), localstat(), body() ----------------------------------------------------------------------- - -local function adjustlocalvars(nvars) - local sz = #ilocalinfo - -- i goes from left to right, in order of local allocation, because - -- of something like: local a,a,a = 1,2,3 which gives a = 3 - while nvars > 0 do - nvars = nvars - 1 - local i = sz - nvars - local id = ilocalinfo[i] -- local's id - local obj = localinfo[id] - local name = obj.name -- name of local - obj.act = xref -- set activation location - ilocalinfo[i] = nil - local locallist = ilocalrefs[i] -- ref to lookup table to update - ilocalrefs[i] = nil - local existing = locallist[name] -- if existing, remove old first! - if existing then -- do not overlap, set special - obj = localinfo[existing] -- form of rem, as -id - obj.rem = -id - end - locallist[name] = id -- activate, now visible to Lua - end -end - ----------------------------------------------------------------------- --- remove (deactivate) variables in current scope (before scope exits) --- * zap entire locallist tables since we are not allocating registers --- * used in leaveblock(), close_func() ----------------------------------------------------------------------- - -local function removevars() - local bl = fs.bl - local locallist - -- locate locallist in current block object or function root object - if bl then - locallist = bl.locallist - else - locallist = fs.locallist - end - -- enumerate the local list at current scope and deactivate 'em - for name, id in base.pairs(locallist) do - local obj = localinfo[id] - obj.rem = xref -- set deactivation location - end -end - ----------------------------------------------------------------------- --- creates a new local variable given a name --- * skips internal locals (those starting with '('), so internal --- locals never needs a corresponding adjustlocalvars() call --- * special is true for "self" which must not be optimized --- * used in fornum(), forlist(), parlist(), body() ----------------------------------------------------------------------- - -local function new_localvarliteral(name, special) - if string.sub(name, 1, 1) == "(" then -- can skip internal locals - return - end - new_localvar(name, special) -end - ----------------------------------------------------------------------- --- search the local variable namespace of the given fs for a match --- * returns localinfo index --- * used only in singlevaraux() ----------------------------------------------------------------------- - -local function searchvar(fs, n) - local bl = fs.bl - local locallist - if bl then - locallist = bl.locallist - while locallist do - if locallist[n] then return locallist[n] end -- found - bl = bl.prev - locallist = bl and bl.locallist - end - end - locallist = fs.locallist - return locallist[n] or -1 -- found or not found (-1) -end - ----------------------------------------------------------------------- --- handle locals, globals and upvalues and related processing --- * search mechanism is recursive, calls itself to search parents --- * used only in singlevar() ----------------------------------------------------------------------- - -local function singlevaraux(fs, n, var) - if fs == nil then -- no more levels? - var.k = "VGLOBAL" -- default is global variable - return "VGLOBAL" - else - local v = searchvar(fs, n) -- look up at current level - if v >= 0 then - var.k = "VLOCAL" - var.id = v - -- codegen may need to deal with upvalue here - return "VLOCAL" - else -- not found at current level; try upper one - if singlevaraux(fs.prev, n, var) == "VGLOBAL" then - return "VGLOBAL" - end - -- else was LOCAL or UPVAL, handle here - var.k = "VUPVAL" -- upvalue in this level - return "VUPVAL" - end--if v - end--if fs -end - ----------------------------------------------------------------------- --- consume a name token, creates a variable (global|local|upvalue) --- * used in prefixexp(), funcname() ----------------------------------------------------------------------- - -local function singlevar(v) - local name = str_checkname() - singlevaraux(fs, name, v) - ------------------------------------------------------------------ - -- variable tracking - ------------------------------------------------------------------ - if v.k == "VGLOBAL" then - -- if global being accessed, keep track of it by creating an object - local id = globallookup[name] - if not id then - id = #globalinfo + 1 - globalinfo[id] = { -- new global variable object - name = name, -- global variable name - xref = { nameref }, -- xref, first value is declaration - } - globallookup[name] = id -- remember it - else - local obj = globalinfo[id].xref - obj[#obj + 1] = nameref -- add xref - end - else - -- local/upvalue is being accessed, keep track of it - local id = v.id - local obj = localinfo[id].xref - obj[#obj + 1] = nameref -- add xref - end -end - ---[[-------------------------------------------------------------------- --- state management functions with open/close pairs -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- enters a code unit, initializes elements ----------------------------------------------------------------------- - -local function enterblock(isbreakable) - local bl = {} -- per-block state - bl.isbreakable = isbreakable - bl.prev = fs.bl - bl.locallist = {} - fs.bl = bl -end - ----------------------------------------------------------------------- --- leaves a code unit, close any upvalues ----------------------------------------------------------------------- - -local function leaveblock() - local bl = fs.bl - removevars() - fs.bl = bl.prev -end - ----------------------------------------------------------------------- --- opening of a function --- * top_fs is only for anchoring the top fs, so that parser() can --- return it to the caller function along with useful output --- * used in parser() and body() ----------------------------------------------------------------------- - -local function open_func() - local new_fs -- per-function state - if not fs then -- top_fs is created early - new_fs = top_fs - else - new_fs = {} - end - new_fs.prev = fs -- linked list of function states - new_fs.bl = nil - new_fs.locallist = {} - fs = new_fs -end - ----------------------------------------------------------------------- --- closing of a function --- * used in parser() and body() ----------------------------------------------------------------------- - -local function close_func() - removevars() - fs = fs.prev -end - ---[[-------------------------------------------------------------------- --- other parsing functions --- * for table constructor, parameter list, argument list -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- parse a function name suffix, for function call specifications --- * used in primaryexp(), funcname() ----------------------------------------------------------------------- - -local function field(v) - -- field -> ['.' | ':'] NAME - local key = {} - nextt() -- skip the dot or colon - checkname(key) - v.k = "VINDEXED" -end - ----------------------------------------------------------------------- --- parse a table indexing suffix, for constructors, expressions --- * used in recfield(), primaryexp() ----------------------------------------------------------------------- - -local function yindex(v) - -- index -> '[' expr ']' - nextt() -- skip the '[' - expr(v) - checknext("]") -end - ----------------------------------------------------------------------- --- parse a table record (hash) field --- * used in constructor() ----------------------------------------------------------------------- - -local function recfield(cc) - -- recfield -> (NAME | '['exp1']') = exp1 - local key, val = {}, {} - if tok == "<name>" then - checkname(key) - else-- tok == '[' - yindex(key) - end - checknext("=") - expr(val) -end - ----------------------------------------------------------------------- --- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH) --- * note: retained in this skeleton because it modifies cc.v.k --- * used in constructor() ----------------------------------------------------------------------- - -local function closelistfield(cc) - if cc.v.k == "VVOID" then return end -- there is no list item - cc.v.k = "VVOID" -end - ----------------------------------------------------------------------- --- parse a table list (array) field --- * used in constructor() ----------------------------------------------------------------------- - -local function listfield(cc) - expr(cc.v) -end - ----------------------------------------------------------------------- --- parse a table constructor --- * used in funcargs(), simpleexp() ----------------------------------------------------------------------- - -local function constructor(t) - -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}' - -- field -> recfield | listfield - -- fieldsep -> ',' | ';' - local line = ln - local cc = {} - cc.v = {} - cc.t = t - t.k = "VRELOCABLE" - cc.v.k = "VVOID" - checknext("{") - repeat - if tok == "}" then break end - -- closelistfield(cc) here - local c = tok - if c == "<name>" then -- may be listfields or recfields - if lookahead() ~= "=" then -- look ahead: expression? - listfield(cc) - else - recfield(cc) - end - elseif c == "[" then -- constructor_item -> recfield - recfield(cc) - else -- constructor_part -> listfield - listfield(cc) - end - until not testnext(",") and not testnext(";") - check_match("}", "{", line) - -- lastlistfield(cc) here -end - ----------------------------------------------------------------------- --- parse the arguments (parameters) of a function declaration --- * used in body() ----------------------------------------------------------------------- - -local function parlist() - -- parlist -> [ param { ',' param } ] - local nparams = 0 - if tok ~= ")" then -- is 'parlist' not empty? - repeat - local c = tok - if c == "<name>" then -- param -> NAME - new_localvar(str_checkname()) - nparams = nparams + 1 - elseif c == "..." then - nextt() - fs.is_vararg = true - else - syntaxerror("<name> or '...' expected") - end - until fs.is_vararg or not testnext(",") - end--if - adjustlocalvars(nparams) -end - ----------------------------------------------------------------------- --- parse the parameters of a function call --- * contrast with parlist(), used in function declarations --- * used in primaryexp() ----------------------------------------------------------------------- - -local function funcargs(f) - local args = {} - local line = ln - local c = tok - if c == "(" then -- funcargs -> '(' [ explist1 ] ')' - if line ~= lastln then - syntaxerror("ambiguous syntax (function call x new statement)") - end - nextt() - if tok == ")" then -- arg list is empty? - args.k = "VVOID" - else - explist1(args) - end - check_match(")", "(", line) - elseif c == "{" then -- funcargs -> constructor - constructor(args) - elseif c == "<string>" then -- funcargs -> STRING - codestring(args, seminfo) - nextt() -- must use 'seminfo' before 'next' - else - syntaxerror("function arguments expected") - return - end--if c - f.k = "VCALL" -end - ---[[-------------------------------------------------------------------- --- mostly expression functions -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- parses an expression in parentheses or a single variable --- * used in primaryexp() ----------------------------------------------------------------------- - -local function prefixexp(v) - -- prefixexp -> NAME | '(' expr ')' - local c = tok - if c == "(" then - local line = ln - nextt() - expr(v) - check_match(")", "(", line) - elseif c == "<name>" then - singlevar(v) - else - syntaxerror("unexpected symbol") - end--if c -end - ----------------------------------------------------------------------- --- parses a prefixexp (an expression in parentheses or a single --- variable) or a function call specification --- * used in simpleexp(), assignment(), expr_stat() ----------------------------------------------------------------------- - -local function primaryexp(v) - -- primaryexp -> - -- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } - prefixexp(v) - while true do - local c = tok - if c == "." then -- field - field(v) - elseif c == "[" then -- '[' exp1 ']' - local key = {} - yindex(key) - elseif c == ":" then -- ':' NAME funcargs - local key = {} - nextt() - checkname(key) - funcargs(v) - elseif c == "(" or c == "<string>" or c == "{" then -- funcargs - funcargs(v) - else - return - end--if c - end--while -end - ----------------------------------------------------------------------- --- parses general expression types, constants handled here --- * used in subexpr() ----------------------------------------------------------------------- - -local function simpleexp(v) - -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... | - -- constructor | FUNCTION body | primaryexp - local c = tok - if c == "<number>" then - v.k = "VKNUM" - elseif c == "<string>" then - codestring(v, seminfo) - elseif c == "nil" then - v.k = "VNIL" - elseif c == "true" then - v.k = "VTRUE" - elseif c == "false" then - v.k = "VFALSE" - elseif c == "..." then -- vararg - check_condition(fs.is_vararg == true, - "cannot use '...' outside a vararg function"); - v.k = "VVARARG" - elseif c == "{" then -- constructor - constructor(v) - return - elseif c == "function" then - nextt() - body(v, false, ln) - return - else - primaryexp(v) - return - end--if c - nextt() -end - ------------------------------------------------------------------------- --- Parse subexpressions. Includes handling of unary operators and binary --- operators. A subexpr is given the rhs priority level of the operator --- immediately left of it, if any (limit is -1 if none,) and if a binop --- is found, limit is compared with the lhs priority level of the binop --- in order to determine which executes first. --- * recursively called --- * used in expr() ------------------------------------------------------------------------- - -local function subexpr(v, limit) - -- subexpr -> (simpleexp | unop subexpr) { binop subexpr } - -- * where 'binop' is any binary operator with a priority - -- higher than 'limit' - local op = tok - local uop = unopr[op] - if uop then - nextt() - subexpr(v, UNARY_PRIORITY) - else - simpleexp(v) - end - -- expand while operators have priorities higher than 'limit' - op = tok - local binop = binopr_left[op] - while binop and binop > limit do - local v2 = {} - nextt() - -- read sub-expression with higher priority - local nextop = subexpr(v2, binopr_right[op]) - op = nextop - binop = binopr_left[op] - end - return op -- return first untreated operator -end - ----------------------------------------------------------------------- --- Expression parsing starts here. Function subexpr is entered with the --- left operator (which is non-existent) priority of -1, which is lower --- than all actual operators. Expr information is returned in parm v. --- * used in cond(), explist1(), index(), recfield(), listfield(), --- prefixexp(), while_stat(), exp1() ----------------------------------------------------------------------- - --- this is a forward-referenced local -function expr(v) - -- expr -> subexpr - subexpr(v, 0) -end - ---[[-------------------------------------------------------------------- --- third level parsing functions -----------------------------------------------------------------------]] - ------------------------------------------------------------------------- --- parse a variable assignment sequence --- * recursively called --- * used in expr_stat() ------------------------------------------------------------------------- - -local function assignment(v) - local e = {} - local c = v.v.k - check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL" - or c == "VINDEXED", "syntax error") - if testnext(",") then -- assignment -> ',' primaryexp assignment - local nv = {} -- expdesc - nv.v = {} - primaryexp(nv.v) - -- lparser.c deals with some register usage conflict here - assignment(nv) - else -- assignment -> '=' explist1 - checknext("=") - explist1(e) - return -- avoid default - end - e.k = "VNONRELOC" -end - ----------------------------------------------------------------------- --- parse a for loop body for both versions of the for loop --- * used in fornum(), forlist() ----------------------------------------------------------------------- - -local function forbody(nvars, isnum) - -- forbody -> DO block - checknext("do") - enterblock(false) -- scope for declared variables - adjustlocalvars(nvars) - block() - leaveblock() -- end of scope for declared variables -end - ----------------------------------------------------------------------- --- parse a numerical for loop, calls forbody() --- * used in for_stat() ----------------------------------------------------------------------- - -local function fornum(varname) - -- fornum -> NAME = exp1, exp1 [, exp1] DO body - local line = line - new_localvarliteral("(for index)") - new_localvarliteral("(for limit)") - new_localvarliteral("(for step)") - new_localvar(varname) - checknext("=") - exp1() -- initial value - checknext(",") - exp1() -- limit - if testnext(",") then - exp1() -- optional step - else - -- default step = 1 - end - forbody(1, true) -end - ----------------------------------------------------------------------- --- parse a generic for loop, calls forbody() --- * used in for_stat() ----------------------------------------------------------------------- - -local function forlist(indexname) - -- forlist -> NAME {, NAME} IN explist1 DO body - local e = {} - -- create control variables - new_localvarliteral("(for generator)") - new_localvarliteral("(for state)") - new_localvarliteral("(for control)") - -- create declared variables - new_localvar(indexname) - local nvars = 1 - while testnext(",") do - new_localvar(str_checkname()) - nvars = nvars + 1 - end - checknext("in") - local line = line - explist1(e) - forbody(nvars, false) -end - ----------------------------------------------------------------------- --- parse a function name specification --- * used in func_stat() ----------------------------------------------------------------------- - -local function funcname(v) - -- funcname -> NAME {field} [':' NAME] - local needself = false - singlevar(v) - while tok == "." do - field(v) - end - if tok == ":" then - needself = true - field(v) - end - return needself -end - ----------------------------------------------------------------------- --- parse the single expressions needed in numerical for loops --- * used in fornum() ----------------------------------------------------------------------- - --- this is a forward-referenced local -function exp1() - -- exp1 -> expr - local e = {} - expr(e) -end - ----------------------------------------------------------------------- --- parse condition in a repeat statement or an if control structure --- * used in repeat_stat(), test_then_block() ----------------------------------------------------------------------- - -local function cond() - -- cond -> expr - local v = {} - expr(v) -- read condition -end - ----------------------------------------------------------------------- --- parse part of an if control structure, including the condition --- * used in if_stat() ----------------------------------------------------------------------- - -local function test_then_block() - -- test_then_block -> [IF | ELSEIF] cond THEN block - nextt() -- skip IF or ELSEIF - cond() - checknext("then") - block() -- 'then' part -end - ----------------------------------------------------------------------- --- parse a local function statement --- * used in local_stat() ----------------------------------------------------------------------- - -local function localfunc() - -- localfunc -> NAME body - local v, b = {} - new_localvar(str_checkname()) - v.k = "VLOCAL" - adjustlocalvars(1) - body(b, false, ln) -end - ----------------------------------------------------------------------- --- parse a local variable declaration statement --- * used in local_stat() ----------------------------------------------------------------------- - -local function localstat() - -- localstat -> NAME {',' NAME} ['=' explist1] - local nvars = 0 - local e = {} - repeat - new_localvar(str_checkname()) - nvars = nvars + 1 - until not testnext(",") - if testnext("=") then - explist1(e) - else - e.k = "VVOID" - end - adjustlocalvars(nvars) -end - ----------------------------------------------------------------------- --- parse a list of comma-separated expressions --- * used in return_stat(), localstat(), funcargs(), assignment(), --- forlist() ----------------------------------------------------------------------- - --- this is a forward-referenced local -function explist1(e) - -- explist1 -> expr { ',' expr } - expr(e) - while testnext(",") do - expr(e) - end -end - ----------------------------------------------------------------------- --- parse function declaration body --- * used in simpleexp(), localfunc(), func_stat() ----------------------------------------------------------------------- - --- this is a forward-referenced local -function body(e, needself, line) - -- body -> '(' parlist ')' chunk END - open_func() - checknext("(") - if needself then - new_localvarliteral("self", true) - adjustlocalvars(1) - end - parlist() - checknext(")") - chunk() - check_match("end", "function", line) - close_func() -end - ----------------------------------------------------------------------- --- parse a code block or unit --- * used in do_stat(), while_stat(), forbody(), test_then_block(), --- if_stat() ----------------------------------------------------------------------- - --- this is a forward-referenced local -function block() - -- block -> chunk - enterblock(false) - chunk() - leaveblock() -end - ---[[-------------------------------------------------------------------- --- second level parsing functions, all with '_stat' suffix --- * since they are called via a table lookup, they cannot be local --- functions (a lookup table of local functions might be smaller...) --- * stat() -> *_stat() -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- initial parsing for a for loop, calls fornum() or forlist() --- * removed 'line' parameter (used to set debug information only) --- * used in stat() ----------------------------------------------------------------------- - -function for_stat() - -- stat -> for_stat -> FOR (fornum | forlist) END - local line = line - enterblock(true) -- scope for loop and control variables - nextt() -- skip 'for' - local varname = str_checkname() -- first variable name - local c = tok - if c == "=" then - fornum(varname) - elseif c == "," or c == "in" then - forlist(varname) - else - syntaxerror("'=' or 'in' expected") - end - check_match("end", "for", line) - leaveblock() -- loop scope (`break' jumps to this point) -end - ----------------------------------------------------------------------- --- parse a while-do control structure, body processed by block() --- * used in stat() ----------------------------------------------------------------------- - -function while_stat() - -- stat -> while_stat -> WHILE cond DO block END - local line = line - nextt() -- skip WHILE - cond() -- parse condition - enterblock(true) - checknext("do") - block() - check_match("end", "while", line) - leaveblock() -end - ----------------------------------------------------------------------- --- parse a repeat-until control structure, body parsed by chunk() --- * originally, repeatstat() calls breakstat() too if there is an --- upvalue in the scope block; nothing is actually lexed, it is --- actually the common code in breakstat() for closing of upvalues --- * used in stat() ----------------------------------------------------------------------- - -function repeat_stat() - -- stat -> repeat_stat -> REPEAT block UNTIL cond - local line = line - enterblock(true) -- loop block - enterblock(false) -- scope block - nextt() -- skip REPEAT - chunk() - check_match("until", "repeat", line) - cond() - -- close upvalues at scope level below - leaveblock() -- finish scope - leaveblock() -- finish loop -end - ----------------------------------------------------------------------- --- parse an if control structure --- * used in stat() ----------------------------------------------------------------------- - -function if_stat() - -- stat -> if_stat -> IF cond THEN block - -- {ELSEIF cond THEN block} [ELSE block] END - local line = line - local v = {} - test_then_block() -- IF cond THEN block - while tok == "elseif" do - test_then_block() -- ELSEIF cond THEN block - end - if tok == "else" then - nextt() -- skip ELSE - block() -- 'else' part - end - check_match("end", "if", line) -end - ----------------------------------------------------------------------- --- parse a return statement --- * used in stat() ----------------------------------------------------------------------- - -function return_stat() - -- stat -> return_stat -> RETURN explist - local e = {} - nextt() -- skip RETURN - local c = tok - if block_follow[c] or c == ";" then - -- return no values - else - explist1(e) -- optional return values - end -end - ----------------------------------------------------------------------- --- parse a break statement --- * used in stat() ----------------------------------------------------------------------- - -function break_stat() - -- stat -> break_stat -> BREAK - local bl = fs.bl - nextt() -- skip BREAK - while bl and not bl.isbreakable do -- find a breakable block - bl = bl.prev - end - if not bl then - syntaxerror("no loop to break") - end -end - ----------------------------------------------------------------------- --- parse a function call with no returns or an assignment statement --- * the struct with .prev is used for name searching in lparse.c, --- so it is retained for now; present in assignment() also --- * used in stat() ----------------------------------------------------------------------- - -function expr_stat() - -- stat -> expr_stat -> func | assignment - local v = {} - v.v = {} - primaryexp(v.v) - if v.v.k == "VCALL" then -- stat -> func - -- call statement uses no results - else -- stat -> assignment - v.prev = nil - assignment(v) - end -end - ----------------------------------------------------------------------- --- parse a function statement --- * used in stat() ----------------------------------------------------------------------- - -function function_stat() - -- stat -> function_stat -> FUNCTION funcname body - local line = line - local v, b = {}, {} - nextt() -- skip FUNCTION - local needself = funcname(v) - body(b, needself, line) -end - ----------------------------------------------------------------------- --- parse a simple block enclosed by a DO..END pair --- * used in stat() ----------------------------------------------------------------------- - -function do_stat() - -- stat -> do_stat -> DO block END - local line = line - nextt() -- skip DO - block() - check_match("end", "do", line) -end - ----------------------------------------------------------------------- --- parse a statement starting with LOCAL --- * used in stat() ----------------------------------------------------------------------- - -function local_stat() - -- stat -> local_stat -> LOCAL FUNCTION localfunc - -- -> LOCAL localstat - nextt() -- skip LOCAL - if testnext("function") then -- local function? - localfunc() - else - localstat() - end -end - ---[[-------------------------------------------------------------------- --- main functions, top level parsing functions --- * accessible functions are: init(lexer), parser() --- * [entry] -> parser() -> chunk() -> stat() -----------------------------------------------------------------------]] - ----------------------------------------------------------------------- --- initial parsing for statements, calls '_stat' suffixed functions --- * used in chunk() ----------------------------------------------------------------------- - -local function stat() - -- stat -> if_stat while_stat do_stat for_stat repeat_stat - -- function_stat local_stat return_stat break_stat - -- expr_stat - line = ln -- may be needed for error messages - local c = tok - local fn = stat_call[c] - -- handles: if while do for repeat function local return break - if fn then - _G[fn]() - -- return or break must be last statement - if c == "return" or c == "break" then return true end - else - expr_stat() - end - return false -end - ----------------------------------------------------------------------- --- parse a chunk, which consists of a bunch of statements --- * used in parser(), body(), block(), repeat_stat() ----------------------------------------------------------------------- - --- this is a forward-referenced local -function chunk() - -- chunk -> { stat [';'] } - local islast = false - while not islast and not block_follow[tok] do - islast = stat() - testnext(";") - end -end - ----------------------------------------------------------------------- --- performs parsing, returns parsed data structure ----------------------------------------------------------------------- - -function parser() - open_func() - fs.is_vararg = true -- main func. is always vararg - nextt() -- read first token - chunk() - check("<eof>") - close_func() - return globalinfo, localinfo -end - ----------------------------------------------------------------------- --- initialization function ----------------------------------------------------------------------- - -function init(tokorig, seminfoorig, toklnorig) - tpos = 1 -- token position - top_fs = {} -- reset top level function state - ------------------------------------------------------------------ - -- set up grammar-only token tables; impedance-matching... - -- note that constants returned by the lexer is source-level, so - -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING) - ------------------------------------------------------------------ - local j = 1 - toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {} - for i = 1, #tokorig do - local tok = tokorig[i] - local yep = true - if tok == "TK_KEYWORD" or tok == "TK_OP" then - tok = seminfoorig[i] - elseif tok == "TK_NAME" then - tok = "<name>" - seminfolist[j] = seminfoorig[i] - elseif tok == "TK_NUMBER" then - tok = "<number>" - seminfolist[j] = 0 -- fake! - elseif tok == "TK_STRING" or tok == "TK_LSTRING" then - tok = "<string>" - seminfolist[j] = "" -- fake! - elseif tok == "TK_EOS" then - tok = "<eof>" - else - -- non-grammar tokens; ignore them - yep = false - end - if yep then -- set rest of the information - toklist[j] = tok - toklnlist[j] = toklnorig[i] - xreflist[j] = i - j = j + 1 - end - end--for - ------------------------------------------------------------------ - -- initialize data structures for variable tracking - ------------------------------------------------------------------ - globalinfo, globallookup, localinfo = {}, {}, {} - ilocalinfo, ilocalrefs = {}, {} -end - -return _G diff --git a/contrib/luasrcdiet/lua/optlex.lua b/contrib/luasrcdiet/lua/optlex.lua deleted file mode 100644 index 4c46b918bf..0000000000 --- a/contrib/luasrcdiet/lua/optlex.lua +++ /dev/null @@ -1,832 +0,0 @@ ---[[-------------------------------------------------------------------- - - optlex.lua: does lexer-based optimizations - This file is part of LuaSrcDiet. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * For more lexer-based optimization ideas, see the TODO items or --- look at technotes.txt. --- * TODO: general string delimiter conversion optimizer --- * TODO: (numbers) warn if overly significant digit -----------------------------------------------------------------------]] - -local base = _G -local string = require "string" -module "optlex" -local match = string.match -local sub = string.sub -local find = string.find -local rep = string.rep -local print - ------------------------------------------------------------------------- --- variables and data structures ------------------------------------------------------------------------- - --- error function, can override by setting own function into module -error = base.error - -warn = {} -- table for warning flags - -local stoks, sinfos, stoklns -- source lists - -local is_realtoken = { -- significant (grammar) tokens - TK_KEYWORD = true, - TK_NAME = true, - TK_NUMBER = true, - TK_STRING = true, - TK_LSTRING = true, - TK_OP = true, - TK_EOS = true, -} -local is_faketoken = { -- whitespace (non-grammar) tokens - TK_COMMENT = true, - TK_LCOMMENT = true, - TK_EOL = true, - TK_SPACE = true, -} - -local opt_details -- for extra information - ------------------------------------------------------------------------- --- true if current token is at the start of a line --- * skips over deleted tokens via recursion ------------------------------------------------------------------------- - -local function atlinestart(i) - local tok = stoks[i - 1] - if i <= 1 or tok == "TK_EOL" then - return true - elseif tok == "" then - return atlinestart(i - 1) - end - return false -end - ------------------------------------------------------------------------- --- true if current token is at the end of a line --- * skips over deleted tokens via recursion ------------------------------------------------------------------------- - -local function atlineend(i) - local tok = stoks[i + 1] - if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then - return true - elseif tok == "" then - return atlineend(i + 1) - end - return false -end - ------------------------------------------------------------------------- --- counts comment EOLs inside a long comment --- * in order to keep line numbering, EOLs need to be reinserted ------------------------------------------------------------------------- - -local function commenteols(lcomment) - local sep = #match(lcomment, "^%-%-%[=*%[") - local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims - local i, c = 1, 0 - while true do - local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) - if not p then break end -- if no matches, done - i = p + 1 - c = c + 1 - if #s > 0 and r ~= s then -- skip CRLF or LFCR - i = i + 1 - end - end - return c -end - ------------------------------------------------------------------------- --- compares two tokens (i, j) and returns the whitespace required --- * important! see technotes.txt for more information --- * only two grammar/real tokens are being considered --- * if "", no separation is needed --- * if " ", then at least one whitespace (or EOL) is required ------------------------------------------------------------------------- - -local function checkpair(i, j) - local match = match - local t1, t2 = stoks[i], stoks[j] - -------------------------------------------------------------------- - if t1 == "TK_STRING" or t1 == "TK_LSTRING" or - t2 == "TK_STRING" or t2 == "TK_LSTRING" then - return "" - -------------------------------------------------------------------- - elseif t1 == "TK_OP" or t2 == "TK_OP" then - if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or - (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then - return "" - end - if t1 == "TK_OP" and t2 == "TK_OP" then - -- for TK_OP/TK_OP pairs, see notes in technotes.txt - local op, op2 = sinfos[i], sinfos[j] - if (match(op, "^%.%.?$") and match(op2, "^%.")) or - (match(op, "^[~=<>]$") and op2 == "=") or - (op == "[" and (op2 == "[" or op2 == "=")) then - return " " - end - return "" - end - -- "TK_OP" + "TK_NUMBER" case - local op = sinfos[i] - if t2 == "TK_OP" then op = sinfos[j] end - if match(op, "^%.%.?%.?$") then - return " " - end - return "" - -------------------------------------------------------------------- - else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then - return " " - -------------------------------------------------------------------- - end -end - ------------------------------------------------------------------------- --- repack tokens, removing deletions caused by optimization process ------------------------------------------------------------------------- - -local function repack_tokens() - local dtoks, dinfos, dtoklns = {}, {}, {} - local j = 1 - for i = 1, #stoks do - local tok = stoks[i] - if tok ~= "" then - dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i] - j = j + 1 - end - end - stoks, sinfos, stoklns = dtoks, dinfos, dtoklns -end - ------------------------------------------------------------------------- --- number optimization --- * optimization using string formatting functions is one way of doing --- this, but here, we consider all cases and handle them separately --- (possibly an idiotic approach...) --- * scientific notation being generated is not in canonical form, this --- may or may not be a bad thing, feedback welcome --- * note: intermediate portions need to fit into a normal number range --- * optimizations can be divided based on number patterns: --- * hexadecimal: --- (1) no need to remove leading zeros, just skip to (2) --- (2) convert to integer if size equal or smaller --- * change if equal size -> lose the 'x' to reduce entropy --- (3) number is then processed as an integer --- (4) note: does not make 0[xX] consistent --- * integer: --- (1) note: includes anything with trailing ".", ".0", ... --- (2) remove useless fractional part, if present, e.g. 123.000 --- (3) remove leading zeros, e.g. 000123 --- (4) switch to scientific if shorter, e.g. 123000 -> 123e3 --- * with fraction: --- (1) split into digits dot digits --- (2) if no integer portion, take as zero (can omit later) --- (3) handle degenerate .000 case, after which the fractional part --- must be non-zero (if zero, it's matched as an integer) --- (4) remove trailing zeros for fractional portion --- (5) p.q where p > 0 and q > 0 cannot be shortened any more --- (6) otherwise p == 0 and the form is .q, e.g. .000123 --- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6 --- * scientific: --- (1) split into (digits dot digits) [eE] ([+-] digits) --- (2) if significand has ".", shift it out so it becomes an integer --- (3) if significand is zero, just use zero --- (4) remove leading zeros for significand --- (5) shift out trailing zeros for significand --- (6) examine exponent and determine which format is best: --- integer, with fraction, scientific ------------------------------------------------------------------------- - -local function do_number(i) - local before = sinfos[i] -- 'before' - local z = before -- working representation - local y -- 'after', if better - -------------------------------------------------------------------- - if match(z, "^0[xX]") then -- hexadecimal number - local v = base.tostring(base.tonumber(z)) - if #v <= #z then - z = v -- change to integer, AND continue - else - return -- no change; stick to hex - end - end - -------------------------------------------------------------------- - if match(z, "^%d+%.?0*$") then -- integer or has useless frac - z = match(z, "^(%d+)%.?0*$") -- int portion only - if z + 0 > 0 then - z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros - local v = #match(z, "0*$") - local nv = base.tostring(v) - if v > #nv + 1 then -- scientific is shorter - z = sub(z, 1, #z - v).."e"..nv - end - y = z - else - y = "0" -- basic zero - end - -------------------------------------------------------------------- - elseif not match(z, "[eE]") then -- number with fraction part - local p, q = match(z, "^(%d*)%.(%d+)$") -- split - if p == "" then p = 0 end -- int part zero - if q + 0 == 0 and p == 0 then - y = "0" -- degenerate .000 case - else - -- now, q > 0 holds and p is a number - local v = #match(q, "0*$") -- remove trailing zeros - if v > 0 then - q = sub(q, 1, #q - v) - end - -- if p > 0, nothing else we can do to simplify p.q case - if p + 0 > 0 then - y = p.."."..q - else - y = "."..q -- tentative, e.g. .000123 - local v = #match(q, "^0*") -- # leading spaces - local w = #q - v -- # significant digits - local nv = base.tostring(#q) - -- e.g. compare 123e-6 versus .000123 - if w + 2 + #nv < 1 + #q then - y = sub(q, -w).."e-"..nv - end - end - end - -------------------------------------------------------------------- - else -- scientific number - local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$") - ex = base.tonumber(ex) - -- if got ".", shift out fractional portion of significand - local p, q = match(sig, "^(%d*)%.(%d*)$") - if p then - ex = ex - #q - sig = p..q - end - if sig + 0 == 0 then - y = "0" -- basic zero - else - local v = #match(sig, "^0*") -- remove leading zeros - sig = sub(sig, v + 1) - v = #match(sig, "0*$") -- shift out trailing zeros - if v > 0 then - sig = sub(sig, 1, #sig - v) - ex = ex + v - end - -- examine exponent and determine which format is best - local nex = base.tostring(ex) - if ex == 0 then -- it's just an integer - y = sig - elseif ex > 0 and (ex <= 1 + #nex) then -- a number - y = sig..rep("0", ex) - elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123 - v = #sig + ex - y = sub(sig, 1, v).."."..sub(sig, v + 1) - elseif ex < 0 and (#nex >= -ex - #sig) then - -- e.g. compare 1234e-5 versus .01234 - -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig - -- -> #nex >= -ex - #sig - v = -ex - #sig - y = "."..rep("0", v)..sig - else -- non-canonical scientific representation - y = sig.."e"..ex - end - end--if sig - end - -------------------------------------------------------------------- - if y and y ~= sinfos[i] then - if opt_details then - print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y) - opt_details = opt_details + 1 - end - sinfos[i] = y - end -end - ------------------------------------------------------------------------- --- string optimization --- * note: works on well-formed strings only! --- * optimizations on characters can be summarized as follows: --- \a\b\f\n\r\t\v -- no change --- \\ -- no change --- \"\' -- depends on delim, other can remove \ --- \[\] -- remove \ --- \<char> -- general escape, remove \ --- \<eol> -- normalize the EOL only --- \ddd -- if \a\b\f\n\r\t\v, change to latter --- if other < ascii 32, keep ddd but zap leading zeros --- if >= ascii 32, translate it into the literal, then also --- do escapes for \\,\",\' cases --- <other> -- no change --- * switch delimiters if string becomes shorter ------------------------------------------------------------------------- - -local function do_string(I) - local info = sinfos[I] - local delim = sub(info, 1, 1) -- delimiter used - local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> ' - local z = sub(info, 2, -2) -- actual string - local i = 1 - local c_delim, c_ndelim = 0, 0 -- "/' counts - -------------------------------------------------------------------- - while i <= #z do - local c = sub(z, i, i) - ---------------------------------------------------------------- - if c == "\\" then -- escaped stuff - local j = i + 1 - local d = sub(z, j, j) - local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true) - ------------------------------------------------------------ - if not p then -- \<char> -- remove \ - z = sub(z, 1, i - 1)..sub(z, j) - i = i + 1 - ------------------------------------------------------------ - elseif p <= 8 then -- \a\b\f\n\r\t\v\\ - i = i + 2 -- no change - ------------------------------------------------------------ - elseif p <= 10 then -- \<eol> -- normalize EOL - local eol = sub(z, j, j + 1) - if eol == "\r\n" or eol == "\n\r" then - z = sub(z, 1, i).."\n"..sub(z, j + 2) - elseif p == 10 then -- \r case - z = sub(z, 1, i).."\n"..sub(z, j + 1) - end - i = i + 2 - ------------------------------------------------------------ - elseif p <= 12 then -- \"\' -- remove \ for ndelim - if d == delim then - c_delim = c_delim + 1 - i = i + 2 - else - c_ndelim = c_ndelim + 1 - z = sub(z, 1, i - 1)..sub(z, j) - i = i + 1 - end - ------------------------------------------------------------ - else -- \ddd -- various steps - local s = match(z, "^(%d%d?%d?)", j) - j = i + 1 + #s -- skip to location - local cv = s + 0 - local cc = string.char(cv) - local p = find("\a\b\f\n\r\t\v", cc, 1, true) - if p then -- special escapes - s = "\\"..sub("abfnrtv", p, p) - elseif cv < 32 then -- normalized \ddd - s = "\\"..cv - elseif cc == delim then -- \<delim> - s = "\\"..cc - c_delim = c_delim + 1 - elseif cc == "\\" then -- \\ - s = "\\\\" - else -- literal character - s = cc - if cc == ndelim then - c_ndelim = c_ndelim + 1 - end - end - z = sub(z, 1, i - 1)..s..sub(z, j) - i = i + #s - ------------------------------------------------------------ - end--if p - ---------------------------------------------------------------- - else-- c ~= "\\" -- <other> -- no change - i = i + 1 - if c == ndelim then -- count ndelim, for switching delimiters - c_ndelim = c_ndelim + 1 - end - ---------------------------------------------------------------- - end--if c - end--while - -------------------------------------------------------------------- - -- switching delimiters, a long-winded derivation: - -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes - -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes - -- simplifying the condition (1)>(2) --> c_delim > c_ndelim - if c_delim > c_ndelim then - i = 1 - while i <= #z do - local p, q, r = find(z, "([\'\"])", i) - if not p then break end - if r == delim then -- \<delim> -> <delim> - z = sub(z, 1, p - 2)..sub(z, p) - i = p - else-- r == ndelim -- <ndelim> -> \<ndelim> - z = sub(z, 1, p - 1).."\\"..sub(z, p) - i = p + 2 - end - end--while - delim = ndelim -- actually change delimiters - end - -------------------------------------------------------------------- - z = delim..z..delim - if z ~= sinfos[I] then - if opt_details then - print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z) - opt_details = opt_details + 1 - end - sinfos[I] = z - end -end - ------------------------------------------------------------------------- --- long string optimization --- * note: warning flagged if trailing whitespace found, not trimmed --- * remove first optional newline --- * normalize embedded newlines --- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------- - -local function do_lstring(I) - local info = sinfos[I] - local delim1 = match(info, "^%[=*%[") -- cut out delimiters - local sep = #delim1 - local delim2 = sub(info, -sep, -1) - local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims - local y = "" - local i = 1 - -------------------------------------------------------------------- - while true do - local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) - -- deal with a single line - local ln - if not p then - ln = sub(z, i) - elseif p >= i then - ln = sub(z, i, p - 1) - end - if ln ~= "" then - -- flag a warning if there are trailing spaces, won't optimize! - if match(ln, "%s+$") then - warn.lstring = "trailing whitespace in long string near line "..stoklns[I] - end - y = y..ln - end - if not p then -- done if no more EOLs - break - end - -- deal with line endings, normalize them - i = p + 1 - if p then - if #s > 0 and r ~= s then -- skip CRLF or LFCR - i = i + 1 - end - -- skip first newline, which can be safely deleted - if not(i == 1 and i == p) then - y = y.."\n" - end - end - end--while - -------------------------------------------------------------------- - -- handle possible deletion of one or more '=' separators - if sep >= 3 then - local chk, okay = sep - 1 - -- loop to test ending delimiter with less of '=' down to zero - while chk >= 2 do - local delim = "%]"..rep("=", chk - 2).."%]" - if not match(y, delim) then okay = chk end - chk = chk - 1 - end - if okay then -- change delimiters - sep = rep("=", okay - 2) - delim1, delim2 = "["..sep.."[", "]"..sep.."]" - end - end - -------------------------------------------------------------------- - sinfos[I] = delim1..y..delim2 -end - ------------------------------------------------------------------------- --- long comment optimization --- * note: does not remove first optional newline --- * trim trailing whitespace --- * normalize embedded newlines --- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------- - -local function do_lcomment(I) - local info = sinfos[I] - local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters - local sep = #delim1 - local delim2 = sub(info, -sep, -1) - local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims - local y = "" - local i = 1 - -------------------------------------------------------------------- - while true do - local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) - -- deal with a single line, extract and check trailing whitespace - local ln - if not p then - ln = sub(z, i) - elseif p >= i then - ln = sub(z, i, p - 1) - end - if ln ~= "" then - -- trim trailing whitespace if non-empty line - local ws = match(ln, "%s*$") - if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end - y = y..ln - end - if not p then -- done if no more EOLs - break - end - -- deal with line endings, normalize them - i = p + 1 - if p then - if #s > 0 and r ~= s then -- skip CRLF or LFCR - i = i + 1 - end - y = y.."\n" - end - end--while - -------------------------------------------------------------------- - -- handle possible deletion of one or more '=' separators - sep = sep - 2 - if sep >= 3 then - local chk, okay = sep - 1 - -- loop to test ending delimiter with less of '=' down to zero - while chk >= 2 do - local delim = "%]"..rep("=", chk - 2).."%]" - if not match(y, delim) then okay = chk end - chk = chk - 1 - end - if okay then -- change delimiters - sep = rep("=", okay - 2) - delim1, delim2 = "--["..sep.."[", "]"..sep.."]" - end - end - -------------------------------------------------------------------- - sinfos[I] = delim1..y..delim2 -end - ------------------------------------------------------------------------- --- short comment optimization --- * trim trailing whitespace ------------------------------------------------------------------------- - -local function do_comment(i) - local info = sinfos[i] - local ws = match(info, "%s*$") -- just look from end of string - if #ws > 0 then - info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace - end - sinfos[i] = info -end - ------------------------------------------------------------------------- --- returns true if string found in long comment --- * this is a feature to keep copyright or license texts ------------------------------------------------------------------------- - -local function keep_lcomment(opt_keep, info) - if not opt_keep then return false end -- option not set - local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters - local sep = #delim1 - local delim2 = sub(info, -sep, -1) - local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims - if find(z, opt_keep, 1, true) then -- try to match - return true - end -end - ------------------------------------------------------------------------- --- main entry point --- * currently, lexer processing has 2 passes --- * processing is done on a line-oriented basis, which is easier to --- grok due to the next point... --- * since there are various options that can be enabled or disabled, --- processing is a little messy or convoluted ------------------------------------------------------------------------- - -function optimize(option, toklist, semlist, toklnlist) - -------------------------------------------------------------------- - -- set option flags - -------------------------------------------------------------------- - local opt_comments = option["opt-comments"] - local opt_whitespace = option["opt-whitespace"] - local opt_emptylines = option["opt-emptylines"] - local opt_eols = option["opt-eols"] - local opt_strings = option["opt-strings"] - local opt_numbers = option["opt-numbers"] - local opt_keep = option.KEEP - opt_details = option.DETAILS and 0 -- upvalues for details display - print = print or base.print - if opt_eols then -- forced settings, otherwise won't work properly - opt_comments = true - opt_whitespace = true - opt_emptylines = true - end - -------------------------------------------------------------------- - -- variable initialization - -------------------------------------------------------------------- - stoks, sinfos, stoklns -- set source lists - = toklist, semlist, toklnlist - local i = 1 -- token position - local tok, info -- current token - local prev -- position of last grammar token - -- on same line (for TK_SPACE stuff) - -------------------------------------------------------------------- - -- changes a token, info pair - -------------------------------------------------------------------- - local function settoken(tok, info, I) - I = I or i - stoks[I] = tok or "" - sinfos[I] = info or "" - end - -------------------------------------------------------------------- - -- processing loop (PASS 1) - -------------------------------------------------------------------- - while true do - tok, info = stoks[i], sinfos[i] - ---------------------------------------------------------------- - local atstart = atlinestart(i) -- set line begin flag - if atstart then prev = nil end - ---------------------------------------------------------------- - if tok == "TK_EOS" then -- end of stream/pass - break - ---------------------------------------------------------------- - elseif tok == "TK_KEYWORD" or -- keywords, identifiers, - tok == "TK_NAME" or -- operators - tok == "TK_OP" then - -- TK_KEYWORD and TK_OP can't be optimized without a big - -- optimization framework; it would be more of an optimizing - -- compiler, not a source code compressor - -- TK_NAME that are locals needs parser to analyze/optimize - prev = i - ---------------------------------------------------------------- - elseif tok == "TK_NUMBER" then -- numbers - if opt_numbers then - do_number(i) -- optimize - end - prev = i - ---------------------------------------------------------------- - elseif tok == "TK_STRING" or -- strings, long strings - tok == "TK_LSTRING" then - if opt_strings then - if tok == "TK_STRING" then - do_string(i) -- optimize - else - do_lstring(i) -- optimize - end - end - prev = i - ---------------------------------------------------------------- - elseif tok == "TK_COMMENT" then -- short comments - if opt_comments then - if i == 1 and sub(info, 1, 1) == "#" then - -- keep shbang comment, trim whitespace - do_comment(i) - else - -- safe to delete, as a TK_EOL (or TK_EOS) always follows - settoken() -- remove entirely - end - elseif opt_whitespace then -- trim whitespace only - do_comment(i) - end - ---------------------------------------------------------------- - elseif tok == "TK_LCOMMENT" then -- long comments - if keep_lcomment(opt_keep, info) then - ------------------------------------------------------------ - -- if --keep, we keep a long comment if <msg> is found; - -- this is a feature to keep copyright or license texts - if opt_whitespace then -- trim whitespace only - do_lcomment(i) - end - prev = i - elseif opt_comments then - local eols = commenteols(info) - ------------------------------------------------------------ - -- prepare opt_emptylines case first, if a disposable token - -- follows, current one is safe to dump, else keep a space; - -- it is implied that the operation is safe for '-', because - -- current is a TK_LCOMMENT, and must be separate from a '-' - if is_faketoken[stoks[i + 1]] then - settoken() -- remove entirely - tok = "" - else - settoken("TK_SPACE", " ") - end - ------------------------------------------------------------ - -- if there are embedded EOLs to keep and opt_emptylines is - -- disabled, then switch the token into one or more EOLs - if not opt_emptylines and eols > 0 then - settoken("TK_EOL", rep("\n", eols)) - end - ------------------------------------------------------------ - -- if optimizing whitespaces, force reinterpretation of the - -- token to give a chance for the space to be optimized away - if opt_whitespace and tok ~= "" then - i = i - 1 -- to reinterpret - end - ------------------------------------------------------------ - else -- disabled case - if opt_whitespace then -- trim whitespace only - do_lcomment(i) - end - prev = i - end - ---------------------------------------------------------------- - elseif tok == "TK_EOL" then -- line endings - if atstart and opt_emptylines then - settoken() -- remove entirely - elseif info == "\r\n" or info == "\n\r" then - -- normalize the rest of the EOLs for CRLF/LFCR only - -- (note that TK_LCOMMENT can change into several EOLs) - settoken("TK_EOL", "\n") - end - ---------------------------------------------------------------- - elseif tok == "TK_SPACE" then -- whitespace - if opt_whitespace then - if atstart or atlineend(i) then - -- delete leading and trailing whitespace - settoken() -- remove entirely - else - ------------------------------------------------------------ - -- at this point, since leading whitespace have been removed, - -- there should be a either a real token or a TK_LCOMMENT - -- prior to hitting this whitespace; the TK_LCOMMENT case - -- only happens if opt_comments is disabled; so prev ~= nil - local ptok = stoks[prev] - if ptok == "TK_LCOMMENT" then - -- previous TK_LCOMMENT can abut with anything - settoken() -- remove entirely - else - -- prev must be a grammar token; consecutive TK_SPACE - -- tokens is impossible when optimizing whitespace - local ntok = stoks[i + 1] - if is_faketoken[ntok] then - -- handle special case where a '-' cannot abut with - -- either a short comment or a long comment - if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and - ptok == "TK_OP" and sinfos[prev] == "-" then - -- keep token - else - settoken() -- remove entirely - end - else--is_realtoken - -- check a pair of grammar tokens, if can abut, then - -- delete space token entirely, otherwise keep one space - local s = checkpair(prev, i + 1) - if s == "" then - settoken() -- remove entirely - else - settoken("TK_SPACE", " ") - end - end - end - ------------------------------------------------------------ - end - end - ---------------------------------------------------------------- - else - error("unidentified token encountered") - end - ---------------------------------------------------------------- - i = i + 1 - end--while - repack_tokens() - -------------------------------------------------------------------- - -- processing loop (PASS 2) - -------------------------------------------------------------------- - if opt_eols then - i = 1 - -- aggressive EOL removal only works with most non-grammar tokens - -- optimized away because it is a rather simple scheme -- basically - -- it just checks 'real' token pairs around EOLs - if stoks[1] == "TK_COMMENT" then - -- first comment still existing must be shbang, skip whole line - i = 3 - end - while true do - tok, info = stoks[i], sinfos[i] - -------------------------------------------------------------- - if tok == "TK_EOS" then -- end of stream/pass - break - -------------------------------------------------------------- - elseif tok == "TK_EOL" then -- consider each TK_EOL - local t1, t2 = stoks[i - 1], stoks[i + 1] - if is_realtoken[t1] and is_realtoken[t2] then -- sanity check - local s = checkpair(i - 1, i + 1) - if s == "" then - settoken() -- remove entirely - end - end - end--if tok - -------------------------------------------------------------- - i = i + 1 - end--while - repack_tokens() - end - -------------------------------------------------------------------- - if opt_details and opt_details > 0 then print() end -- spacing - return stoks, sinfos, stoklns -end diff --git a/contrib/luasrcdiet/lua/optparser.lua b/contrib/luasrcdiet/lua/optparser.lua deleted file mode 100644 index cfe6cc1013..0000000000 --- a/contrib/luasrcdiet/lua/optparser.lua +++ /dev/null @@ -1,564 +0,0 @@ ---[[-------------------------------------------------------------------- - - optparser.lua: does parser-based optimizations - This file is part of LuaSrcDiet. - - Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> - The COPYRIGHT file describes the conditions - under which this software may be distributed. - - See the ChangeLog for more information. - -----------------------------------------------------------------------]] - ---[[-------------------------------------------------------------------- --- NOTES: --- * For more parser-based optimization ideas, see the TODO items or --- look at technotes.txt. --- * The processing load is quite significant, but since this is an --- off-line text processor, I believe we can wait a few seconds. --- * TODO: might process "local a,a,a" wrongly... need tests! --- * TODO: remove position handling if overlapped locals (rem < 0) --- needs more study, to check behaviour --- * TODO: there are probably better ways to do allocation, e.g. by --- choosing better methods to sort and pick locals... --- * TODO: we don't need 53*63 two-letter identifiers; we can make --- do with significantly less depending on how many that are really --- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead -----------------------------------------------------------------------]] - -local base = _G -local string = require "string" -local table = require "table" -module "optparser" - ----------------------------------------------------------------------- --- Letter frequencies for reducing symbol entropy (fixed version) --- * Might help a wee bit when the output file is compressed --- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies --- * We use letter frequencies according to a Linotype keyboard, plus --- the underscore, and both lower case and upper case letters. --- * The arrangement below (LC, underscore, %d, UC) is arbitrary. --- * This is certainly not optimal, but is quick-and-dirty and the --- process has no significant overhead ----------------------------------------------------------------------- - -local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" -local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" - --- names or identifiers that must be skipped --- * the first two lines are for keywords -local SKIP_NAME = {} -for v in string.gmatch([[ -and break do else elseif end false for function if in -local nil not or repeat return then true until while -self]], "%S+") do - SKIP_NAME[v] = true -end - ------------------------------------------------------------------------- --- variables and data structures ------------------------------------------------------------------------- - -local toklist, seminfolist, -- token lists - globalinfo, localinfo, -- variable information tables - globaluniq, localuniq, -- unique name tables - var_new, -- index of new variable names - varlist -- list of output variables - ----------------------------------------------------------------------- --- preprocess information table to get lists of unique names ----------------------------------------------------------------------- - -local function preprocess(infotable) - local uniqtable = {} - for i = 1, #infotable do -- enumerate info table - local obj = infotable[i] - local name = obj.name - -------------------------------------------------------------------- - if not uniqtable[name] then -- not found, start an entry - uniqtable[name] = { - decl = 0, token = 0, size = 0, - } - end - -------------------------------------------------------------------- - local uniq = uniqtable[name] -- count declarations, tokens, size - uniq.decl = uniq.decl + 1 - local xref = obj.xref - local xcount = #xref - uniq.token = uniq.token + xcount - uniq.size = uniq.size + xcount * #name - -------------------------------------------------------------------- - if obj.decl then -- if local table, create first,last pairs - obj.id = i - obj.xcount = xcount - if xcount > 1 then -- if ==1, means local never accessed - obj.first = xref[2] - obj.last = xref[xcount] - end - -------------------------------------------------------------------- - else -- if global table, add a back ref - uniq.id = i - end - -------------------------------------------------------------------- - end--for - return uniqtable -end - ----------------------------------------------------------------------- --- calculate actual symbol frequencies, in order to reduce entropy --- * this may help further reduce the size of compressed sources --- * note that since parsing optimizations is put before lexing --- optimizations, the frequency table is not exact! --- * yes, this will miss --keep block comments too... ----------------------------------------------------------------------- - -local function recalc_for_entropy(option) - local byte = string.byte - local char = string.char - -- table of token classes to accept in calculating symbol frequency - local ACCEPT = { - TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, - TK_STRING = true, TK_LSTRING = true, - } - if not option["opt-comments"] then - ACCEPT.TK_COMMENT = true - ACCEPT.TK_LCOMMENT = true - end - -------------------------------------------------------------------- - -- create a new table and remove any original locals by filtering - -------------------------------------------------------------------- - local filtered = {} - for i = 1, #toklist do - filtered[i] = seminfolist[i] - end - for i = 1, #localinfo do -- enumerate local info table - local obj = localinfo[i] - local xref = obj.xref - for j = 1, obj.xcount do - local p = xref[j] - filtered[p] = "" -- remove locals - end - end - -------------------------------------------------------------------- - local freq = {} -- reset symbol frequency table - for i = 0, 255 do freq[i] = 0 end - for i = 1, #toklist do -- gather symbol frequency - local tok, info = toklist[i], filtered[i] - if ACCEPT[tok] then - for j = 1, #info do - local c = byte(info, j) - freq[c] = freq[c] + 1 - end - end--if - end--for - -------------------------------------------------------------------- - -- function to re-sort symbols according to actual frequencies - -------------------------------------------------------------------- - local function resort(symbols) - local symlist = {} - for i = 1, #symbols do -- prepare table to sort - local c = byte(symbols, i) - symlist[i] = { c = c, freq = freq[c], } - end - table.sort(symlist, -- sort selected symbols - function(v1, v2) - return v1.freq > v2.freq - end - ) - local charlist = {} -- reconstitute the string - for i = 1, #symlist do - charlist[i] = char(symlist[i].c) - end - return table.concat(charlist) - end - -------------------------------------------------------------------- - LETTERS = resort(LETTERS) -- change letter arrangement - ALPHANUM = resort(ALPHANUM) -end - ----------------------------------------------------------------------- --- returns a string containing a new local variable name to use, and --- a flag indicating whether it collides with a global variable --- * trapping keywords and other names like 'self' is done elsewhere ----------------------------------------------------------------------- - -local function new_var_name() - local var - local cletters, calphanum = #LETTERS, #ALPHANUM - local v = var_new - if v < cletters then -- single char - v = v + 1 - var = string.sub(LETTERS, v, v) - else -- longer names - local range, sz = cletters, 1 -- calculate # chars fit - repeat - v = v - range - range = range * calphanum - sz = sz + 1 - until range > v - local n = v % cletters -- left side cycles faster - v = (v - n) / cletters -- do first char first - n = n + 1 - var = string.sub(LETTERS, n, n) - while sz > 1 do - local m = v % calphanum - v = (v - m) / calphanum - m = m + 1 - var = var..string.sub(ALPHANUM, m, m) - sz = sz - 1 - end - end - var_new = var_new + 1 - return var, globaluniq[var] ~= nil -end - ----------------------------------------------------------------------- --- calculate and print some statistics --- * probably better in main source, put here for now ----------------------------------------------------------------------- - -local function stats_summary(globaluniq, localuniq, afteruniq, option) - local print = print or base.print - local fmt = string.format - local opt_details = option.DETAILS - local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed - decl_g, decl_li, decl_lo, decl_ti, decl_to, - token_g, token_li, token_lo, token_ti, token_to, - size_g, size_li, size_lo, size_ti, size_to - = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - local function avg(c, l) -- safe average function - if c == 0 then return 0 end - return l / c - end - -------------------------------------------------------------------- - -- collect statistics (note: globals do not have declarations!) - -------------------------------------------------------------------- - for name, uniq in base.pairs(globaluniq) do - uniq_g = uniq_g + 1 - token_g = token_g + uniq.token - size_g = size_g + uniq.size - end - for name, uniq in base.pairs(localuniq) do - uniq_li = uniq_li + 1 - decl_li = decl_li + uniq.decl - token_li = token_li + uniq.token - size_li = size_li + uniq.size - end - for name, uniq in base.pairs(afteruniq) do - uniq_lo = uniq_lo + 1 - decl_lo = decl_lo + uniq.decl - token_lo = token_lo + uniq.token - size_lo = size_lo + uniq.size - end - uniq_ti = uniq_g + uniq_li - decl_ti = decl_g + decl_li - token_ti = token_g + token_li - size_ti = size_g + size_li - uniq_to = uniq_g + uniq_lo - decl_to = decl_g + decl_lo - token_to = token_g + token_lo - size_to = size_g + size_lo - -------------------------------------------------------------------- - -- detailed stats: global list - -------------------------------------------------------------------- - if opt_details then - local sorted = {} -- sort table of unique global names by size - for name, uniq in base.pairs(globaluniq) do - uniq.name = name - sorted[#sorted + 1] = uniq - end - table.sort(sorted, - function(v1, v2) - return v1.size > v2.size - end - ) - local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s" - local hl = string.rep("-", 44) - print("*** global variable list (sorted by size) ***\n"..hl) - print(fmt(tabf1, "Token", "Input", "Input", "Global")) - print(fmt(tabf1, "Count", "Bytes", "Average", "Name")) - print(hl) - for i = 1, #sorted do - local uniq = sorted[i] - print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name)) - end - print(hl) - print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL")) - print(hl.."\n") - -------------------------------------------------------------------- - -- detailed stats: local list - -------------------------------------------------------------------- - local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s" - local hl = string.rep("-", 70) - print("*** local variable list (sorted by allocation order) ***\n"..hl) - print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global")) - print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name")) - print(hl) - for i = 1, #varlist do -- iterate according to order assigned - local name = varlist[i] - local uniq = afteruniq[name] - local old_t, old_s = 0, 0 - for j = 1, #localinfo do -- find corresponding old names and calculate - local obj = localinfo[j] - if obj.name == name then - old_t = old_t + obj.xcount - old_s = old_s + obj.xcount * #obj.oldname - end - end - print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s), - uniq.size, avg(uniq.token, uniq.size), name)) - end - print(hl) - print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li), - size_lo, avg(token_lo, size_lo), "TOTAL")) - print(hl.."\n") - end--if opt_details - -------------------------------------------------------------------- - -- display output - -------------------------------------------------------------------- - local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f" - local hl = string.rep("-", 58) - print("*** local variable optimization summary ***\n"..hl) - print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average")) - print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes")) - print(hl) - print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g))) - print(hl) - print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li))) - print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti))) - print(hl) - print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo))) - print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to))) - print(hl.."\n") -end - ----------------------------------------------------------------------- --- main entry point --- * does only local variable optimization for now ----------------------------------------------------------------------- - -function optimize(option, _toklist, _seminfolist, _globalinfo, _localinfo) - -- set tables - toklist, seminfolist, globalinfo, localinfo - = _toklist, _seminfolist, _globalinfo, _localinfo - var_new = 0 -- reset variable name allocator - varlist = {} - ------------------------------------------------------------------ - -- preprocess global/local tables, handle entropy reduction - ------------------------------------------------------------------ - globaluniq = preprocess(globalinfo) - localuniq = preprocess(localinfo) - if option["opt-entropy"] then -- for entropy improvement - recalc_for_entropy(option) - end - ------------------------------------------------------------------ - -- build initial declared object table, then sort according to - -- token count, this might help assign more tokens to more common - -- variable names such as 'e' thus possibly reducing entropy - -- * an object knows its localinfo index via its 'id' field - -- * special handling for "self" special local (parameter) here - ------------------------------------------------------------------ - local object = {} - for i = 1, #localinfo do - object[i] = localinfo[i] - end - table.sort(object, -- sort largest first - function(v1, v2) - return v1.xcount > v2.xcount - end - ) - ------------------------------------------------------------------ - -- the special "self" function parameters must be preserved - -- * the allocator below will never use "self", so it is safe to - -- keep those implicit declarations as-is - ------------------------------------------------------------------ - local temp, j, gotself = {}, 1, false - for i = 1, #object do - local obj = object[i] - if not obj.isself then - temp[j] = obj - j = j + 1 - else - gotself = true - end - end - object = temp - ------------------------------------------------------------------ - -- a simple first-come first-served heuristic name allocator, - -- note that this is in no way optimal... - -- * each object is a local variable declaration plus existence - -- * the aim is to assign short names to as many tokens as possible, - -- so the following tries to maximize name reuse - -- * note that we preserve sort order - ------------------------------------------------------------------ - local nobject = #object - while nobject > 0 do - local varname, gcollide - repeat - varname, gcollide = new_var_name() -- collect a variable name - until not SKIP_NAME[varname] -- skip all special names - varlist[#varlist + 1] = varname -- keep a list - local oleft = nobject - ------------------------------------------------------------------ - -- if variable name collides with an existing global, the name - -- cannot be used by a local when the name is accessed as a global - -- during which the local is alive (between 'act' to 'rem'), so - -- we drop objects that collides with the corresponding global - ------------------------------------------------------------------ - if gcollide then - -- find the xref table of the global - local gref = globalinfo[globaluniq[varname].id].xref - local ngref = #gref - -- enumerate for all current objects; all are valid at this point - for i = 1, nobject do - local obj = object[i] - local act, rem = obj.act, obj.rem -- 'live' range of local - -- if rem < 0, it is a -id to a local that had the same name - -- so follow rem to extend it; does this make sense? - while rem < 0 do - rem = localinfo[-rem].rem - end - local drop - for j = 1, ngref do - local p = gref[j] - if p >= act and p <= rem then drop = true end -- in range? - end - if drop then - obj.skip = true - oleft = oleft - 1 - end - end--for - end--if gcollide - ------------------------------------------------------------------ - -- now the first unassigned local (since it's sorted) will be the - -- one with the most tokens to rename, so we set this one and then - -- eliminate all others that collides, then any locals that left - -- can then reuse the same variable name; this is repeated until - -- all local declaration that can use this name is assigned - -- * the criteria for local-local reuse/collision is: - -- A is the local with a name already assigned - -- B is the unassigned local under consideration - -- => anytime A is accessed, it cannot be when B is 'live' - -- => to speed up things, we have first/last accesses noted - ------------------------------------------------------------------ - while oleft > 0 do - local i = 1 - while object[i].skip do -- scan for first object - i = i + 1 - end - ------------------------------------------------------------------ - -- first object is free for assignment of the variable name - -- [first,last] gives the access range for collision checking - ------------------------------------------------------------------ - oleft = oleft - 1 - local obja = object[i] - i = i + 1 - obja.newname = varname - obja.skip = true - obja.done = true - local first, last = obja.first, obja.last - local xref = obja.xref - ------------------------------------------------------------------ - -- then, scan all the rest and drop those colliding - -- if A was never accessed then it'll never collide with anything - -- otherwise trivial skip if: - -- * B was activated after A's last access (last < act) - -- * B was removed before A's first access (first > rem) - -- if not, see detailed skip below... - ------------------------------------------------------------------ - if first and oleft > 0 then -- must have at least 1 access - local scanleft = oleft - while scanleft > 0 do - while object[i].skip do -- next valid object - i = i + 1 - end - scanleft = scanleft - 1 - local objb = object[i] - i = i + 1 - local act, rem = objb.act, objb.rem -- live range of B - -- if rem < 0, extend range of rem thru' following local - while rem < 0 do - rem = localinfo[-rem].rem - end - -------------------------------------------------------- - if not(last < act or first > rem) then -- possible collision - -------------------------------------------------------- - -- B is activated later than A or at the same statement, - -- this means for no collision, A cannot be accessed when B - -- is alive, since B overrides A (or is a peer) - -------------------------------------------------------- - if act >= obja.act then - for j = 1, obja.xcount do -- ... then check every access - local p = xref[j] - if p >= act and p <= rem then -- A accessed when B live! - oleft = oleft - 1 - objb.skip = true - break - end - end--for - -------------------------------------------------------- - -- A is activated later than B, this means for no collision, - -- A's access is okay since it overrides B, but B's last - -- access need to be earlier than A's activation time - -------------------------------------------------------- - else - if objb.last and objb.last >= obja.act then - oleft = oleft - 1 - objb.skip = true - end - end - end - -------------------------------------------------------- - if oleft == 0 then break end - end - end--if first - ------------------------------------------------------------------ - end--while - ------------------------------------------------------------------ - -- after assigning all possible locals to one variable name, the - -- unassigned locals/objects have the skip field reset and the table - -- is compacted, to hopefully reduce iteration time - ------------------------------------------------------------------ - local temp, j = {}, 1 - for i = 1, nobject do - local obj = object[i] - if not obj.done then - obj.skip = false - temp[j] = obj - j = j + 1 - end - end - object = temp -- new compacted object table - nobject = #object -- objects left to process - ------------------------------------------------------------------ - end--while - ------------------------------------------------------------------ - -- after assigning all locals with new variable names, we can - -- patch in the new names, and reprocess to get 'after' stats - ------------------------------------------------------------------ - for i = 1, #localinfo do -- enumerate all locals - local obj = localinfo[i] - local xref = obj.xref - if obj.newname then -- if got new name, patch it in - for j = 1, obj.xcount do - local p = xref[j] -- xrefs indexes the token list - seminfolist[p] = obj.newname - end - obj.name, obj.oldname -- adjust names - = obj.newname, obj.name - else - obj.oldname = obj.name -- for cases like 'self' - end - end - ------------------------------------------------------------------ - -- deal with statistics output - ------------------------------------------------------------------ - if gotself then -- add 'self' to end of list - varlist[#varlist + 1] = "self" - end - local afteruniq = preprocess(localinfo) - stats_summary(globaluniq, localuniq, afteruniq, option) - ------------------------------------------------------------------ -end |