diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-09-08 09:52:04 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-09-08 09:52:04 +0200 |
commit | 48cb983b136fb74c61db594a30e18bdc42b7264c (patch) | |
tree | dfd043047bec76c4f09e9ff535e47bd449d1140a | |
parent | 8c68ae8416c8b54222eb3cd1d4908a570147e134 (diff) |
ash: parser: Get rid of PEOA
Upstream commit:
Date: Wed, 27 May 2020 12:19:13 +1000
parser: Get rid of PEOA
PEOA is a special character used to mark an alias as being finished
so that we don't enter an infinite loop with nested aliases. It
complicates the parser because we have to ensure that it is skipped
where necessary and not copied to the resulting token text.
This patch removes it and instead delays the marking of aliases
until the second pgetc. This has the same effect as the current
PEOA code while keeping the complexities within the input code.
This adds ~32 bytes of global data:
function old new delta
__pgetc - 512 +512
freestrings - 95 +95
popfile 86 110 +24
pushstring 141 160 +19
basepf 76 84 +8
syntax_index_table 258 257 -1
S_I_T 30 28 -2
.rodata 104255 104247 -8
pgetc_without_PEOA 13 - -13
xxreadtoken 230 215 -15
popstring 158 120 -38
readtoken1 3110 3045 -65
pgetc 547 22 -525
------------------------------------------------------------------------------
(add/remove: 2/1 grow/shrink: 3/7 up/down: 658/-667) Total: -9 bytes
text data bss dec hex filename
1043102 559 5020 1048681 100069 busybox_old
1043085 559 5052 1048696 100078 busybox_unstripped
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | shell/ash.c | 239 |
1 files changed, 108 insertions, 131 deletions
diff --git a/shell/ash.c b/shell/ash.c index 5a001b004..ba116d83a 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -295,6 +295,10 @@ typedef long arith_t; # define PIPE_BUF 4096 /* amount of buffering in a pipe */ #endif +#ifndef unlikely +# define unlikely(cond) (cond) +#endif + #if !BB_MMU # error "Do not even bother, ash will not run on NOMMU machine" #endif @@ -583,6 +587,9 @@ struct strpush { #endif char *string; /* remember the string since it may change */ + /* Delay freeing so we can stop nested aliases. */ + struct strpush *spfree; + /* Remember last two characters for pungetc. */ int lastc[2]; @@ -605,6 +612,9 @@ struct parsefile { struct strpush *strpush; /* for pushing strings at this level */ struct strpush basestrpush; /* so pushing one is fast */ + /* Delay freeing so we can stop nested aliases. */ + struct strpush *spfree; + /* Remember last two characters for pungetc. */ int lastc[2]; @@ -3013,12 +3023,8 @@ pwdcmd(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) #define CENDFILE 11 /* end of file */ #define CCTL 12 /* like CWORD, except it must be escaped */ #define CSPCL 13 /* these terminate a word */ -#define CIGN 14 /* character should be ignored */ #define PEOF 256 -#if ENABLE_ASH_ALIAS -# define PEOA 257 -#endif #define USE_SIT_FUNCTION ENABLE_ASH_OPTIMIZE_FOR_SIZE @@ -3028,49 +3034,43 @@ pwdcmd(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) # define SIT_ITEM(a,b,c,d) (a | (b << 4) | (c << 8)) #endif static const uint16_t S_I_T[] ALIGN2 = { -#if ENABLE_ASH_ALIAS - SIT_ITEM(CSPCL , CIGN , CIGN , CIGN ), /* 0, PEOA */ -#endif - SIT_ITEM(CSPCL , CWORD , CWORD, CWORD ), /* 1, ' ' */ - SIT_ITEM(CNL , CNL , CNL , CNL ), /* 2, \n */ - SIT_ITEM(CWORD , CCTL , CCTL , CWORD ), /* 3, !*-/:=?[]~ */ - SIT_ITEM(CDQUOTE , CENDQUOTE, CWORD, CWORD ), /* 4, '"' */ - SIT_ITEM(CVAR , CVAR , CWORD, CVAR ), /* 5, $ */ - SIT_ITEM(CSQUOTE , CWORD , CENDQUOTE, CWORD), /* 6, "'" */ - SIT_ITEM(CSPCL , CWORD , CWORD, CLP ), /* 7, ( */ - SIT_ITEM(CSPCL , CWORD , CWORD, CRP ), /* 8, ) */ - SIT_ITEM(CBACK , CBACK , CCTL , CBACK ), /* 9, \ */ - SIT_ITEM(CBQUOTE , CBQUOTE , CWORD, CBQUOTE), /* 10, ` */ - SIT_ITEM(CENDVAR , CENDVAR , CWORD, CENDVAR), /* 11, } */ + SIT_ITEM(CSPCL , CWORD , CWORD, CWORD ), /* 0, ' ' */ + SIT_ITEM(CNL , CNL , CNL , CNL ), /* 1, \n */ + SIT_ITEM(CWORD , CCTL , CCTL , CWORD ), /* 2, !*-/:=?[]~ */ + SIT_ITEM(CDQUOTE , CENDQUOTE, CWORD, CWORD ), /* 3, '"' */ + SIT_ITEM(CVAR , CVAR , CWORD, CVAR ), /* 4, $ */ + SIT_ITEM(CSQUOTE , CWORD , CENDQUOTE, CWORD), /* 5, "'" */ + SIT_ITEM(CSPCL , CWORD , CWORD, CLP ), /* 6, ( */ + SIT_ITEM(CSPCL , CWORD , CWORD, CRP ), /* 7, ) */ + SIT_ITEM(CBACK , CBACK , CCTL , CBACK ), /* 8, \ */ + SIT_ITEM(CBQUOTE , CBQUOTE , CWORD, CBQUOTE), /* 9, ` */ + SIT_ITEM(CENDVAR , CENDVAR , CWORD, CENDVAR), /* 10, } */ #if !USE_SIT_FUNCTION - SIT_ITEM(CENDFILE, CENDFILE , CENDFILE, CENDFILE),/* 12, PEOF */ - SIT_ITEM(CWORD , CWORD , CWORD, CWORD ), /* 13, 0-9A-Za-z */ - SIT_ITEM(CCTL , CCTL , CCTL , CCTL ) /* 14, CTLESC ... */ + SIT_ITEM(CENDFILE, CENDFILE , CENDFILE, CENDFILE),/* 11, PEOF */ + SIT_ITEM(CWORD , CWORD , CWORD, CWORD ), /* 12, 0-9A-Za-z */ + SIT_ITEM(CCTL , CCTL , CCTL , CCTL ) /* 13, CTLESC ... */ #endif #undef SIT_ITEM }; /* Constants below must match table above */ enum { -#if ENABLE_ASH_ALIAS - CSPCL_CIGN_CIGN_CIGN , /* 0 */ -#endif - CSPCL_CWORD_CWORD_CWORD , /* 1 */ - CNL_CNL_CNL_CNL , /* 2 */ - CWORD_CCTL_CCTL_CWORD , /* 3 */ - CDQUOTE_CENDQUOTE_CWORD_CWORD , /* 4 */ - CVAR_CVAR_CWORD_CVAR , /* 5 */ - CSQUOTE_CWORD_CENDQUOTE_CWORD , /* 6 */ - CSPCL_CWORD_CWORD_CLP , /* 7 */ - CSPCL_CWORD_CWORD_CRP , /* 8 */ - CBACK_CBACK_CCTL_CBACK , /* 9 */ - CBQUOTE_CBQUOTE_CWORD_CBQUOTE , /* 10 */ - CENDVAR_CENDVAR_CWORD_CENDVAR , /* 11 */ - CENDFILE_CENDFILE_CENDFILE_CENDFILE, /* 12 */ - CWORD_CWORD_CWORD_CWORD , /* 13 */ - CCTL_CCTL_CCTL_CCTL , /* 14 */ + CSPCL_CWORD_CWORD_CWORD , /* 0 */ + CNL_CNL_CNL_CNL , /* 1 */ + CWORD_CCTL_CCTL_CWORD , /* 2 */ + CDQUOTE_CENDQUOTE_CWORD_CWORD , /* 3 */ + CVAR_CVAR_CWORD_CVAR , /* 4 */ + CSQUOTE_CWORD_CENDQUOTE_CWORD , /* 5 */ + CSPCL_CWORD_CWORD_CLP , /* 6 */ + CSPCL_CWORD_CWORD_CRP , /* 7 */ + CBACK_CBACK_CCTL_CBACK , /* 8 */ + CBQUOTE_CBQUOTE_CWORD_CBQUOTE , /* 9 */ + CENDVAR_CENDVAR_CWORD_CENDVAR , /* 10 */ + CENDFILE_CENDFILE_CENDFILE_CENDFILE, /* 11 */ + CWORD_CWORD_CWORD_CWORD , /* 12 */ + CCTL_CCTL_CCTL_CCTL , /* 13 */ }; -/* c in SIT(c, syntax) must be an *unsigned char* or PEOA or PEOF, +/* c in SIT(c, syntax) must be an *unsigned char* or PEOF, * caller must ensure proper cast on it if c is *char_ptr! */ #if USE_SIT_FUNCTION @@ -3088,44 +3088,28 @@ SIT(int c, int syntax) * but glibc one isn't. With '/' always treated as CWORD, * both work fine. */ -# if ENABLE_ASH_ALIAS - static const uint8_t syntax_index_table[] ALIGN1 = { - 1, 2, 1, 3, 4, 5, 1, 6, /* "\t\n !\"$&'" */ - 7, 8, 3, 3,/*3,*/3, 1, 1, /* "()*-/:;<" */ - 3, 1, 3, 3, 9, 3, 10, 1, /* "=>?[\\]`|" */ - 11, 3 /* "}~" */ - }; -# else static const uint8_t syntax_index_table[] ALIGN1 = { 0, 1, 0, 2, 3, 4, 0, 5, /* "\t\n !\"$&'" */ 6, 7, 2, 2,/*2,*/2, 0, 0, /* "()*-/:;<" */ 2, 0, 2, 2, 8, 2, 9, 0, /* "=>?[\\]`|" */ 10, 2 /* "}~" */ }; -# endif const char *s; int indx; if (c == PEOF) return CENDFILE; -# if ENABLE_ASH_ALIAS - if (c == PEOA) - indx = 0; - else -# endif - { - /* Cast is purely for paranoia here, - * just in case someone passed signed char to us */ - if ((unsigned char)c >= CTL_FIRST - && (unsigned char)c <= CTL_LAST - ) { - return CCTL; - } - s = strchrnul(spec_symbls, c); - if (*s == '\0') - return CWORD; - indx = syntax_index_table[s - spec_symbls]; + /* Cast is purely for paranoia here, + * just in case someone passed signed char to us */ + if ((unsigned char)c >= CTL_FIRST + && (unsigned char)c <= CTL_LAST + ) { + return CCTL; } + s = strchrnul(spec_symbls, c); + if (*s == '\0') + return CWORD; + indx = syntax_index_table[s - spec_symbls]; return (S_I_T[indx] >> (syntax*4)) & 0xf; } @@ -3396,9 +3380,6 @@ static const uint8_t syntax_index_table[] ALIGN1 = { /* 254 */ CWORD_CWORD_CWORD_CWORD, /* 255 */ CWORD_CWORD_CWORD_CWORD, /* PEOF */ CENDFILE_CENDFILE_CENDFILE_CENDFILE, -# if ENABLE_ASH_ALIAS - /* PEOA */ CSPCL_CIGN_CIGN_CIGN, -# endif }; #if 1 @@ -10712,7 +10693,7 @@ pushstring(char *s, struct alias *ap) len = strlen(s); INT_OFF; - if (g_parsefile->strpush) { + if (g_parsefile->strpush || g_parsefile->spfree) { sp = ckzalloc(sizeof(*sp)); sp->prev = g_parsefile->strpush; } else { @@ -10722,6 +10703,7 @@ pushstring(char *s, struct alias *ap) sp->prev_string = g_parsefile->next_to_pgetc; sp->prev_left_in_line = g_parsefile->left_in_line; sp->unget = g_parsefile->unget; + sp->spfree = g_parsefile->spfree; memcpy(sp->lastc, g_parsefile->lastc, sizeof(sp->lastc)); #if ENABLE_ASH_ALIAS sp->ap = ap; @@ -10733,11 +10715,11 @@ pushstring(char *s, struct alias *ap) g_parsefile->next_to_pgetc = s; g_parsefile->left_in_line = len; g_parsefile->unget = 0; + g_parsefile->spfree = NULL; INT_ON; } -static void -popstring(void) +static void popstring(void) { struct strpush *sp = g_parsefile->strpush; @@ -10752,10 +10734,6 @@ popstring(void) if (sp->string != sp->ap->val) { free(sp->string); } - sp->ap->flag &= ~ALIASINUSE; - if (sp->ap->flag & ALIASDEAD) { - unalias(sp->ap->name); - } } #endif g_parsefile->next_to_pgetc = sp->prev_string; @@ -10763,8 +10741,7 @@ popstring(void) g_parsefile->unget = sp->unget; memcpy(g_parsefile->lastc, sp->lastc, sizeof(sp->lastc)); g_parsefile->strpush = sp->prev; - if (sp != &(g_parsefile->basestrpush)) - free(sp); + g_parsefile->spfree = sp; INT_ON; } @@ -10853,26 +10830,16 @@ preadfd(void) */ //#define pgetc_debug(...) bb_error_msg(__VA_ARGS__) #define pgetc_debug(...) ((void)0) -static int pgetc(void); +static int __pgetc(void); static int preadbuffer(void) { char *q; int more; - if (g_parsefile->strpush) { -#if ENABLE_ASH_ALIAS - if (g_parsefile->left_in_line == -1 - && g_parsefile->strpush->ap - && g_parsefile->next_to_pgetc[-1] != ' ' - && g_parsefile->next_to_pgetc[-1] != '\t' - ) { - pgetc_debug("preadbuffer PEOA"); - return PEOA; - } -#endif + if (unlikely(g_parsefile->strpush)) { popstring(); - return pgetc(); + return __pgetc(); } /* on both branches above g_parsefile->left_in_line < 0. * "pgetc" needs refilling. @@ -10966,8 +10933,31 @@ nlnoprompt(void) needprompt = doprompt; } -static int -pgetc(void) +static void freestrings(struct strpush *sp) +{ + INT_OFF; + do { + struct strpush *psp; + + if (sp->ap) { + sp->ap->flag &= ~ALIASINUSE; + if (sp->ap->flag & ALIASDEAD) { + unalias(sp->ap->name); + } + } + + psp = sp; + sp = sp->spfree; + + if (psp != &(g_parsefile->basestrpush)) + free(psp); + } while (sp); + + g_parsefile->spfree = NULL; + INT_ON; +} + +static int __pgetc(void) { int c; @@ -10989,23 +10979,19 @@ pgetc(void) return c; } -#if ENABLE_ASH_ALIAS -static int -pgetc_without_PEOA(void) +/* + * Read a character from the script, returning PEOF on end of file. + * Nul characters in the input are silently discarded. + */ +static int pgetc(void) { - int c; - do { - pgetc_debug("pgetc at %d:%p'%s'", - g_parsefile->left_in_line, - g_parsefile->next_to_pgetc, - g_parsefile->next_to_pgetc); - c = pgetc(); - } while (c == PEOA); - return c; + struct strpush *sp = g_parsefile->spfree; + + if (unlikely(sp)) + freestrings(sp); + + return __pgetc(); } -#else -# define pgetc_without_PEOA() pgetc() -#endif /* * Undo a call to pgetc. Only two characters may be pushed back. @@ -11082,6 +11068,7 @@ pushfile(void) pf->prev = g_parsefile; pf->pf_fd = -1; /*pf->strpush = NULL; - ckzalloc did it */ + /*pf->spfree = NULL;*/ /*pf->basestrpush.prev = NULL;*/ /*pf->unget = 0;*/ g_parsefile = pf; @@ -11099,8 +11086,12 @@ popfile(void) if (pf->pf_fd >= 0) close(pf->pf_fd); free(pf->buf); - while (pf->strpush) + if (g_parsefile->spfree) + freestrings(g_parsefile->spfree); + while (pf->strpush) { popstring(); + freestrings(g_parsefile->spfree); + } g_parsefile = pf->prev; free(pf); INT_ON; @@ -12390,7 +12381,7 @@ static int readtoken1(int c, int syntax, char *eofmark, int striptabs) { /* NB: syntax parameter fits into smallint */ - /* c parameter is an unsigned char or PEOF or PEOA */ + /* c parameter is an unsigned char or PEOF */ char *out; size_t len; struct nodelist *bqlist; @@ -12460,7 +12451,7 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CBACK: /* backslash */ - c = pgetc_without_PEOA(); + c = pgetc(); if (c == PEOF) { USTPUTC(CTLESC, out); USTPUTC('\\', out); @@ -12567,8 +12558,6 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) break; case CENDFILE: goto endword; /* exit outer loop */ - case CIGN: - break; default: if (synstack->varnest == 0) { #if BASH_REDIR_OUTPUT @@ -12590,8 +12579,7 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) #endif goto endword; /* exit outer loop */ } - IF_ASH_ALIAS(if (c != PEOA)) - USTPUTC(c, out); + USTPUTC(c, out); } c = pgetc_top(synstack); } /* for (;;) */ @@ -12642,14 +12630,9 @@ checkend: { int markloc; char *p; -#if ENABLE_ASH_ALIAS - if (c == PEOA) - c = pgetc_without_PEOA(); -#endif if (striptabs) { - while (c == '\t') { - c = pgetc_without_PEOA(); - } + while (c == '\t') + c = pgetc(); } markloc = out - (char *)stackblock(); @@ -12663,7 +12646,7 @@ checkend: { * F * (see heredoc_bkslash_newline2.tests) */ - c = pgetc_without_PEOA(); + c = pgetc(); } if (c == '\n' || c == PEOF) { @@ -12788,7 +12771,6 @@ parsesub: { c = pgetc_eatbnl(); if ((checkkwd & CHKEOFMARK) - || c > 255 /* PEOA or PEOF */ || (c != '(' && c != '{' && !is_name(c) && !is_special(c)) ) { #if BASH_DOLLAR_SQUOTE @@ -12811,7 +12793,7 @@ parsesub: { PARSEBACKQNEW(); } } else { - /* $VAR, $<specialchar>, ${...}, or PEOA/PEOF */ + /* $VAR, $<specialchar>, ${...}, or PEOF */ smalluint newsyn = synstack->syntax; USTPUTC(CTLVAR, out); @@ -13006,13 +12988,9 @@ parsebackq: { ) { STPUTC('\\', pout); } - if (pc <= 255 /* not PEOA or PEOF */) { - break; - } - /* fall through */ + break; case PEOF: - IF_ASH_ALIAS(case PEOA:) raise_error_syntax("EOF in backquote substitution"); case '\n': @@ -13147,7 +13125,7 @@ xxreadtoken(void) setprompt_if(needprompt, 2); for (;;) { /* until token or start of word found */ c = pgetc_eatbnl(); - if (c == ' ' || c == '\t' IF_ASH_ALIAS( || c == PEOA)) + if (c == ' ' || c == '\t') continue; if (c == '#') { @@ -13205,7 +13183,6 @@ xxreadtoken(void) c = pgetc_eatbnl(); switch (c) { case ' ': case '\t': - IF_ASH_ALIAS(case PEOA:) continue; case '#': while ((c = pgetc()) != '\n' && c != PEOF) |