diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2013-11-28 03:14:16 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2013-11-28 03:14:16 +0100 |
commit | 259b3c047aea430c4aaecbdb9580a07e67691e8d (patch) | |
tree | 937c646d6384e0f79c11ef3d6527601ce164911b | |
parent | 8bd810bd276d20451faafdae88df4af9c2dd96d1 (diff) |
sed: open input files sequentially to avoid EMFILE
Currently, sed pre-opens all files, which may cause EMFILE errors
on systems with low ulimit -n. Change sed to open one file at a time.
function old new delta
get_next_line 177 235 +58
sed_main 682 652 -30
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 58/-30) Total: 28 bytes
Based on the patch by Daniel Borca <dborca@yahoo.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/sed.c | 60 | ||||
-rw-r--r-- | libbb/fclose_nonstdin.c | 3 |
2 files changed, 36 insertions, 27 deletions
diff --git a/editors/sed.c b/editors/sed.c index 777f38308..87fa00291 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -23,7 +23,7 @@ * resulting sed_cmd_t structures are appended to a linked list * (G.sed_cmd_head/G.sed_cmd_tail). * - * add_input_file() adds a FILE* to the list of input files. We need to + * add_input_file() adds a char* to the list of input files. We need to * know all input sources ahead of time to find the last line for the $ match. * * process_files() does actual sedding, reading data lines from each input FILE* @@ -135,12 +135,15 @@ static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v"; struct globals { /* options */ int be_quiet, regex_type; + FILE *nonstdout; char *outname, *hold_space; + smallint exitcode; - /* List of input files */ + /* list of input files */ int input_file_count, current_input_file; - FILE **input_file_list; + const char **input_file_list; + FILE *current_fp; regmatch_t regmatch[10]; regex_t *previous_regex_ptr; @@ -148,7 +151,7 @@ struct globals { /* linked list of sed commands */ sed_cmd_t *sed_cmd_head, **sed_cmd_tail; - /* Linked list of append lines */ + /* linked list of append lines */ llist_t *append_head; char *add_cmd_line; @@ -200,8 +203,8 @@ static void sed_free_and_close_stuff(void) free(G.hold_space); - while (G.current_input_file < G.input_file_count) - fclose(G.input_file_list[G.current_input_file++]); + if (G.current_fp) + fclose(G.current_fp); } #else void sed_free_and_close_stuff(void); @@ -939,8 +942,20 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets /* will be returned if last line in the file * doesn't end with either '\n' or '\0' */ gc = NO_EOL_CHAR; - while (G.current_input_file < G.input_file_count) { - FILE *fp = G.input_file_list[G.current_input_file]; + for (; G.input_file_list[G.current_input_file]; G.current_input_file++) { + FILE *fp = G.current_fp; + if (!fp) { + const char *path = G.input_file_list[G.current_input_file]; + fp = stdin; + if (path != bb_msg_standard_input) { + fp = fopen_or_warn(path, "r"); + if (!fp) { + G.exitcode = EXIT_FAILURE; + continue; + } + } + G.current_fp = fp; + } /* Read line up to a newline or NUL byte, inclusive, * return malloc'ed char[]. length of the chunk read * is stored in len. NULL if EOF/error */ @@ -971,8 +986,8 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets * (note: *no* newline after "b bang"!) */ } /* Close this file and advance to next one */ - fclose(fp); - G.current_input_file++; + fclose_if_not_stdin(fp); + G.current_fp = NULL; } *gets_char = gc; return temp; @@ -1399,7 +1414,7 @@ static void add_cmd_block(char *cmdstr) free(sv); } -static void add_input_file(FILE *file) +static void add_input_file(const char *file) { G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count); G.input_file_list[G.input_file_count++] = file; @@ -1423,8 +1438,6 @@ int sed_main(int argc UNUSED_PARAM, char **argv) "file\0" Required_argument "f"; #endif - int status = EXIT_SUCCESS; - INIT_G(); /* destroy command strings on exit */ @@ -1491,27 +1504,21 @@ int sed_main(int argc UNUSED_PARAM, char **argv) if (argv[0] == NULL) { if (opt & OPT_in_place) bb_error_msg_and_die(bb_msg_requires_arg, "-i"); - add_input_file(stdin); + add_input_file(bb_msg_standard_input); } else { int i; for (i = 0; argv[i]; i++) { struct stat statbuf; int nonstdoutfd; - FILE *file; sed_cmd_t *sed_cmd; if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) { - add_input_file(stdin); + add_input_file(bb_msg_standard_input); process_files(); continue; } - file = fopen_or_warn(argv[i], "r"); - if (!file) { - status = EXIT_FAILURE; - continue; - } - add_input_file(file); + add_input_file(argv[i]); if (!(opt & OPT_in_place)) { continue; } @@ -1523,7 +1530,7 @@ int sed_main(int argc UNUSED_PARAM, char **argv) G.nonstdout = xfdopen_for_write(nonstdoutfd); /* Set permissions/owner of output file */ - fstat(fileno(file), &statbuf); + stat(argv[i], &statbuf); /* chmod'ing AFTER chown would preserve suid/sgid bits, * but GNU sed 4.2.1 does not preserve them either */ fchmod(nonstdoutfd, statbuf.st_mode); @@ -1549,12 +1556,13 @@ int sed_main(int argc UNUSED_PARAM, char **argv) } } /* Here, to handle "sed 'cmds' nonexistent_file" case we did: - * if (G.current_input_file >= G.input_file_count) - * return status; + * if (G.current_input_file[G.current_input_file] == NULL) + * return G.exitcode; * but it's not needed since process_files() works correctly * in this case too. */ } + process_files(); - return status; + return G.exitcode; } diff --git a/libbb/fclose_nonstdin.c b/libbb/fclose_nonstdin.c index 5ce9d5b48..1b1441347 100644 --- a/libbb/fclose_nonstdin.c +++ b/libbb/fclose_nonstdin.c @@ -18,7 +18,8 @@ int FAST_FUNC fclose_if_not_stdin(FILE *f) { /* Some more paranoid applets want ferror() check too */ int r = ferror(f); /* NB: does NOT set errno! */ - if (r) errno = EIO; /* so we'll help it */ + if (r) + errno = EIO; /* so we'll help it */ if (f != stdin) return (r | fclose(f)); /* fclose does set errno on error */ return r; |