diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-09-07 22:51:42 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-09-07 22:51:42 +0200 |
commit | 6a9b3f7acfaa7365515f1eb70427d5ddd687c162 (patch) | |
tree | cf8f15c4e8f4a1acadc5b14557238396be4c9962 /coreutils | |
parent | 574b9c446da11baaf89551f09f951d6523eff731 (diff) |
shuf: add a TODO, code shrink
function old new delta
shuf_main 501 500 -1
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'coreutils')
-rw-r--r-- | coreutils/shuf.c | 24 |
1 files changed, 14 insertions, 10 deletions
diff --git a/coreutils/shuf.c b/coreutils/shuf.c index 50483a25e..3def3d80f 100644 --- a/coreutils/shuf.c +++ b/coreutils/shuf.c @@ -44,21 +44,25 @@ */ static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines) { - unsigned i; - unsigned r; - char *tmp; - srand(monotonic_us()); - for (i = numlines - 1; outlines > 0; i--, outlines--) { - r = rand(); + while (outlines != 0) { + char *tmp; + unsigned r = rand(); /* RAND_MAX can be as small as 32767 */ - if (i > RAND_MAX) + if (numlines > RAND_MAX) r ^= rand() << 15; - r %= i + 1; - tmp = lines[i]; - lines[i] = lines[r]; + r %= numlines; +//TODO: the above method is seriously non-uniform when numlines is very large. +//For example, with numlines of 0xf0000000, +//values of (r % numlines) in [0, 0x0fffffff] range +//are more likely: e.g. r=1 and r=0xf0000001 both map to 1, +//whereas only one value, r=0xefffffff, maps to 0xefffffff. + numlines--; + tmp = lines[numlines]; + lines[numlines] = lines[r]; lines[r] = tmp; + outlines--; } } |