summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-09-07 22:51:42 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-09-07 22:51:42 +0200
commit6a9b3f7acfaa7365515f1eb70427d5ddd687c162 (patch)
treecf8f15c4e8f4a1acadc5b14557238396be4c9962
parent574b9c446da11baaf89551f09f951d6523eff731 (diff)
shuf: add a TODO, code shrink
function old new delta shuf_main 501 500 -1 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--coreutils/shuf.c24
1 files changed, 14 insertions, 10 deletions
diff --git a/coreutils/shuf.c b/coreutils/shuf.c
index 50483a25e..3def3d80f 100644
--- a/coreutils/shuf.c
+++ b/coreutils/shuf.c
@@ -44,21 +44,25 @@
*/
static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
{
- unsigned i;
- unsigned r;
- char *tmp;
-
srand(monotonic_us());
- for (i = numlines - 1; outlines > 0; i--, outlines--) {
- r = rand();
+ while (outlines != 0) {
+ char *tmp;
+ unsigned r = rand();
/* RAND_MAX can be as small as 32767 */
- if (i > RAND_MAX)
+ if (numlines > RAND_MAX)
r ^= rand() << 15;
- r %= i + 1;
- tmp = lines[i];
- lines[i] = lines[r];
+ r %= numlines;
+//TODO: the above method is seriously non-uniform when numlines is very large.
+//For example, with numlines of 0xf0000000,
+//values of (r % numlines) in [0, 0x0fffffff] range
+//are more likely: e.g. r=1 and r=0xf0000001 both map to 1,
+//whereas only one value, r=0xefffffff, maps to 0xefffffff.
+ numlines--;
+ tmp = lines[numlines];
+ lines[numlines] = lines[r];
lines[r] = tmp;
+ outlines--;
}
}