summaryrefslogtreecommitdiffhomepage
path: root/libbb/read.c
blob: cd6bbeb13f959783a9e2094db75c13c1c1a9a50d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/* vi: set sw=4 ts=4: */
/*
 * Utility routines.
 *
 * Copyright (C) 1999-2004 by Erik Andersen <andersen@codepoet.org>
 *
 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
 */
#include "libbb.h"

#define ZIPPED (ENABLE_FEATURE_SEAMLESS_LZMA \
	|| ENABLE_FEATURE_SEAMLESS_BZ2 \
	|| ENABLE_FEATURE_SEAMLESS_GZ \
	/* || ENABLE_FEATURE_SEAMLESS_Z */ \
)

#if ZIPPED
# include "unarchive.h"
#endif

ssize_t FAST_FUNC safe_read(int fd, void *buf, size_t count)
{
	ssize_t n;

	do {
		n = read(fd, buf, count);
	} while (n < 0 && errno == EINTR);

	return n;
}

/* Suppose that you are a shell. You start child processes.
 * They work and eventually exit. You want to get user input.
 * You read stdin. But what happens if last child switched
 * its stdin into O_NONBLOCK mode?
 *
 * *** SURPRISE! It will affect the parent too! ***
 * *** BIG SURPRISE! It stays even after child exits! ***
 *
 * This is a design bug in UNIX API.
 *      fcntl(0, F_SETFL, fcntl(0, F_GETFL) | O_NONBLOCK);
 * will set nonblocking mode not only on _your_ stdin, but
 * also on stdin of your parent, etc.
 *
 * In general,
 *      fd2 = dup(fd1);
 *      fcntl(fd2, F_SETFL, fcntl(fd2, F_GETFL) | O_NONBLOCK);
 * sets both fd1 and fd2 to O_NONBLOCK. This includes cases
 * where duping is done implicitly by fork() etc.
 *
 * We need
 *      fcntl(fd2, F_SETFD, fcntl(fd2, F_GETFD) | O_NONBLOCK);
 * (note SETFD, not SETFL!) but such thing doesn't exist.
 *
 * Alternatively, we need nonblocking_read(fd, ...) which doesn't
 * require O_NONBLOCK dance at all. Actually, it exists:
 *      n = recv(fd, buf, len, MSG_DONTWAIT);
 *      "MSG_DONTWAIT:
 *      Enables non-blocking operation; if the operation
 *      would block, EAGAIN is returned."
 * but recv() works only for sockets!
 *
 * So far I don't see any good solution, I can only propose
 * that affected readers should be careful and use this routine,
 * which detects EAGAIN and uses poll() to wait on the fd.
 * Thankfully, poll() doesn't care about O_NONBLOCK flag.
 */
ssize_t FAST_FUNC nonblock_safe_read(int fd, void *buf, size_t count)
{
	struct pollfd pfd[1];
	ssize_t n;

	while (1) {
		n = safe_read(fd, buf, count);
		if (n >= 0 || errno != EAGAIN)
			return n;
		/* fd is in O_NONBLOCK mode. Wait using poll and repeat */
		pfd[0].fd = fd;
		pfd[0].events = POLLIN;
		safe_poll(pfd, 1, -1);
	}
}

/*
 * Read all of the supplied buffer from a file.
 * This does multiple reads as necessary.
 * Returns the amount read, or -1 on an error.
 * A short read is returned on an end of file.
 */
ssize_t FAST_FUNC full_read(int fd, void *buf, size_t len)
{
	ssize_t cc;
	ssize_t total;

	total = 0;

	while (len) {
		cc = safe_read(fd, buf, len);

		if (cc < 0) {
			if (total) {
				/* we already have some! */
				/* user can do another read to know the error code */
				return total;
			}
			return cc; /* read() returns -1 on failure. */
		}
		if (cc == 0)
			break;
		buf = ((char *)buf) + cc;
		total += cc;
		len -= cc;
	}

	return total;
}

/* Die with an error message if we can't read the entire buffer. */
void FAST_FUNC xread(int fd, void *buf, size_t count)
{
	if (count) {
		ssize_t size = full_read(fd, buf, count);
		if ((size_t)size != count)
			bb_error_msg_and_die("short read");
	}
}

/* Die with an error message if we can't read one character. */
unsigned char FAST_FUNC xread_char(int fd)
{
	char tmp;
	xread(fd, &tmp, 1);
	return tmp;
}

// Reads one line a-la fgets (but doesn't save terminating '\n').
// Reads byte-by-byte. Useful when it is important to not read ahead.
// Bytes are appended to pfx (which must be malloced, or NULL).
char* FAST_FUNC xmalloc_reads(int fd, char *buf, size_t *maxsz_p)
{
	char *p;
	size_t sz = buf ? strlen(buf) : 0;
	size_t maxsz = maxsz_p ? *maxsz_p : (INT_MAX - 4095);

	goto jump_in;
	while (sz < maxsz) {
		if ((size_t)(p - buf) == sz) {
 jump_in:
			buf = xrealloc(buf, sz + 128);
			p = buf + sz;
			sz += 128;
		}
		/* nonblock_safe_read() because we are used by e.g. shells */
		if (nonblock_safe_read(fd, p, 1) != 1) { /* EOF/error */
			if (p == buf) { /* we read nothing */
				free(buf);
				return NULL;
			}
			break;
		}
		if (*p == '\n')
			break;
		p++;
	}
	*p = '\0';
	if (maxsz_p)
		*maxsz_p  = p - buf;
	p++;
	return xrealloc(buf, p - buf);
}

ssize_t FAST_FUNC read_close(int fd, void *buf, size_t size)
{
	/*int e;*/
	size = full_read(fd, buf, size);
	/*e = errno;*/
	close(fd);
	/*errno = e;*/
	return size;
}

ssize_t FAST_FUNC open_read_close(const char *filename, void *buf, size_t size)
{
	int fd = open(filename, O_RDONLY);
	if (fd < 0)
		return fd;
	return read_close(fd, buf, size);
}


// Read (potentially big) files in one go. File size is estimated
// by stat. Extra '\0' byte is appended.
void* FAST_FUNC xmalloc_read(int fd, size_t *maxsz_p)
{
	char *buf;
	size_t size, rd_size, total;
	size_t to_read;
	struct stat st;

	to_read = maxsz_p ? *maxsz_p : (INT_MAX - 4095); /* max to read */

	/* Estimate file size */
	st.st_size = 0; /* in case fstat fails, assume 0 */
	fstat(fd, &st);
	/* /proc/N/stat files report st_size 0 */
	/* In order to make such files readable, we add small const */
	size = (st.st_size | 0x3ff) + 1;

	total = 0;
	buf = NULL;
	while (1) {
		if (to_read < size)
			size = to_read;
		buf = xrealloc(buf, total + size + 1);
		rd_size = full_read(fd, buf + total, size);
		if ((ssize_t)rd_size == (ssize_t)(-1)) { /* error */
			free(buf);
			return NULL;
		}
		total += rd_size;
		if (rd_size < size) /* EOF */
			break;
		if (to_read <= rd_size)
			break;
		to_read -= rd_size;
		/* grow by 1/8, but in [1k..64k] bounds */
		size = ((total / 8) | 0x3ff) + 1;
		if (size > 64*1024)
			size = 64*1024;
	}
	buf = xrealloc(buf, total + 1);
	buf[total] = '\0';

	if (maxsz_p)
		*maxsz_p = total;
	return buf;
}

#ifdef USING_LSEEK_TO_GET_SIZE
/* Alternatively, file size can be obtained by lseek to the end.
 * The code is slightly bigger. Retained in case fstat approach
 * will not work for some weird cases (/proc, block devices, etc).
 * (NB: lseek also can fail to work for some weird files) */

// Read (potentially big) files in one go. File size is estimated by
// lseek to end.
void* FAST_FUNC xmalloc_open_read_close(const char *filename, size_t *maxsz_p)
{
	char *buf;
	size_t size;
	int fd;
	off_t len;

	fd = open(filename, O_RDONLY);
	if (fd < 0)
		return NULL;

	/* /proc/N/stat files report len 0 here */
	/* In order to make such files readable, we add small const */
	size = 0x3ff; /* read only 1k on unseekable files */
	len = lseek(fd, 0, SEEK_END) | 0x3ff; /* + up to 1k */
	if (len != (off_t)-1) {
		xlseek(fd, 0, SEEK_SET);
		size = maxsz_p ? *maxsz_p : (INT_MAX - 4095);
		if (len < size)
			size = len;
	}

	buf = xmalloc(size + 1);
	size = read_close(fd, buf, size);
	if ((ssize_t)size < 0) {
		free(buf);
		return NULL;
	}
	buf = xrealloc(buf, size + 1);
	buf[size] = '\0';

	if (maxsz_p)
		*maxsz_p = size;
	return buf;
}
#endif

// Read (potentially big) files in one go. File size is estimated
// by stat.
void* FAST_FUNC xmalloc_open_read_close(const char *filename, size_t *maxsz_p)
{
	char *buf;
	int fd;

	fd = open(filename, O_RDONLY);
	if (fd < 0)
		return NULL;

	buf = xmalloc_read(fd, maxsz_p);
	close(fd);
	return buf;
}

void* FAST_FUNC xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p)
{
	void *buf = xmalloc_open_read_close(filename, maxsz_p);
	if (!buf)
		bb_perror_msg_and_die("can't read '%s'", filename);
	return buf;
}

/* Used by e.g. rpm which gives us a fd without filename,
 * thus we can't guess the format from filename's extension.
 */
#if ZIPPED
void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/)
{
	const int fail_if_not_detected = 1;
	unsigned char magic[8];
	int offset = -2;
# if BB_MMU
	IF_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd);
	enum { xformer_prog = 0 };
# else
	enum { xformer = 0 };
	const char *xformer_prog;
# endif

	/* .gz and .bz2 both have 2-byte signature, and their
	 * unpack_XXX_stream wants this header skipped. */
	xread(fd, magic, 2);
	if (ENABLE_FEATURE_SEAMLESS_GZ
	 && magic[0] == 0x1f && magic[1] == 0x8b
	) {
# if BB_MMU
		xformer = unpack_gz_stream;
# else
		xformer_prog = "gunzip";
# endif
		goto found_magic;
	}
	if (ENABLE_FEATURE_SEAMLESS_BZ2
	 && magic[0] == 'B' && magic[1] == 'Z'
	) {
# if BB_MMU
		xformer = unpack_bz2_stream;
# else
		xformer_prog = "bunzip2";
# endif
		goto found_magic;
	}
	if (ENABLE_FEATURE_SEAMLESS_XZ
	 && magic[0] == 0xfd && magic[1] == '7'
	) {
		/* .xz signature: 0xfd, '7', 'z', 'X', 'Z', 0x00 */
		/* More info at: http://tukaani.org/xz/xz-file-format.txt */
		offset = -6;
		xread(fd, magic + 2, 4);
		if (strcmp((char*)magic + 2, "zXZ") == 0) {
# if BB_MMU
			xformer = unpack_xz_stream;
# else
			xformer_prog = "unxz";
# endif
			xlseek(fd, offset, SEEK_CUR);
			goto found_magic;
		}
	}

	/* No known magic seen */
	if (fail_if_not_detected)
		bb_error_msg_and_die("no gzip"
			IF_FEATURE_SEAMLESS_BZ2("/bzip2")
			IF_FEATURE_SEAMLESS_XZ("/xz")
			" magic");
	xlseek(fd, offset, SEEK_CUR);
	return;

 found_magic:
# if !BB_MMU
	/* NOMMU version of open_transformer execs
	 * an external unzipper that wants
	 * file position at the start of the file */
	xlseek(fd, offset, SEEK_CUR);
# endif
	open_transformer(fd, xformer, xformer_prog);
}
#endif /* ZIPPED */

int FAST_FUNC open_zipped(const char *fname)
{
#if !ZIPPED
	return open(fname, O_RDONLY);
#else
	char *sfx;
	int fd;

	fd = open(fname, O_RDONLY);
	if (fd < 0)
		return fd;

	sfx = strrchr(fname, '.');
	if (sfx) {
		sfx++;
		if (ENABLE_FEATURE_SEAMLESS_LZMA && strcmp(sfx, "lzma") == 0)
			/* .lzma has no header/signature, just trust it */
			open_transformer(fd, unpack_lzma_stream, "unlzma");
		else
		if ((ENABLE_FEATURE_SEAMLESS_GZ && strcmp(sfx, "gz") == 0)
		 || (ENABLE_FEATURE_SEAMLESS_BZ2 && strcmp(sfx, "bz2") == 0)
		 || (ENABLE_FEATURE_SEAMLESS_XZ && strcmp(sfx, "xz") == 0)
		) {
			setup_unzip_on_fd(fd /*, fail_if_not_detected: 1*/);
		}
	}

	return fd;
#endif
}

void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p)
{
	int fd;
	char *image;

	fd = open_zipped(fname);
	if (fd < 0)
		return NULL;

	image = xmalloc_read(fd, maxsz_p);
	if (!image)
		bb_perror_msg("read error from '%s'", fname);
	close(fd);

	return image;
}