summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authorMartin Mares <mj@ucw.cz>1998-12-19 11:51:47 +0000
committerMartin Mares <mj@ucw.cz>1998-12-19 11:51:47 +0000
commit87b60bf7e8ad12b3efd3d6f37df0d029f50d2d91 (patch)
tree08e7758f9f14a3446286d42e78812860524de5a9 /misc
parent02933ddbbec94f1bb01c0b9e5198fe272c1f5025 (diff)
Added several tools for fib hashing function analysis. It turned out
we can use very simple function which is monotonic with respect to re-hashing: n ^= n >> 16; n ^= n << 10; h = (n >> (16 - o)) & ((1 << o) - 1); where o is table order. Statistical analysis for both backbone routing table and local OSPF routing tables gives values near theoretical optimum for uniform distribution (see ips.c for formulae). The trick is very simple: We always calculate a 16-bit hash value n and use o most significant bits (this gives us monotonity wrt. rehashing if we sort the chains by the value of n). The first shift/xor pair reduces the IP address to a 16-bit one, the second pair makes higher bits of the 16-bit value uniformly distributed even for tables containing lots of long prefixes (typical interior routing case with 24-bit or even longer prefixes).
Diffstat (limited to 'misc')
-rw-r--r--misc/Makefile7
-rwxr-xr-xmisc/cisco2list20
-rw-r--r--misc/ips.c94
-rwxr-xr-xmisc/stats9
4 files changed, 130 insertions, 0 deletions
diff --git a/misc/Makefile b/misc/Makefile
new file mode 100644
index 00000000..fe0307d5
--- /dev/null
+++ b/misc/Makefile
@@ -0,0 +1,7 @@
+all: ips
+
+ips: ips.c
+ gcc ips.c -o ips -lm -O2 -Wall
+
+clean:
+ rm -f ips
diff --git a/misc/cisco2list b/misc/cisco2list
new file mode 100755
index 00000000..6f5466fe
--- /dev/null
+++ b/misc/cisco2list
@@ -0,0 +1,20 @@
+#!/usr/bin/perl
+#
+# Convert Cisco routing table listing to list of prefixes
+#
+
+$loc = ($ARGV[0] eq "-l"); # Print only local prefixes
+
+while (<STDIN>) {
+ ($loc ? /^[OR]\s/ : /^B\s/) || next;
+ /^[ORB]( E[12])?\s+(\d+\.\d+\.\d+\.\d+)(\s|\/\d+\s)/ || die "Cannot parse $_";
+ $net = $2;
+ $len = $3;
+ if ($len =~ /^\s*$/) {
+ # Magic rule :)
+ $len = ($net =~ /\.0$/) ? 24 : 32;
+ }
+ $len =~ s/^\///;
+ $net =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/;
+ printf "%02x%02x%02x%02x/%d\n", $1, $2, $3, $4, $len;
+}
diff --git a/misc/ips.c b/misc/ips.c
new file mode 100644
index 00000000..ec7e673e
--- /dev/null
+++ b/misc/ips.c
@@ -0,0 +1,94 @@
+#include <stdio.h>
+#include <math.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+int h[65536];
+
+/*
+ * Probability analysis of hashing function:
+ *
+ * Let n be number of items and k number of boxes. For uniform distribution
+ * we get:
+ *
+ * Expected value of "item i is in given box": Xi = 1/k
+ * Expected number of items in given box: a = EX = E(sum Xi) = sum E(Xi) = n/k
+ * Expected square value: E(X^2) = E((sum Xi)^2) = E((sum_i Xi^2) + (sum_i,j i<>j XiXj)) =
+ * = sum_i E(Xi^2) + sum_i,j i<>j E(XiXj) =
+ * = sum_i E(Xi) [Xi is binary] + sum_i,j i<>j E(XiXj) [those are independent] =
+ * = n/k + n*(n-1)/k^2
+ * Variance: var X = E(X^2) - (EX)^2 = n/k + n*(n-1)/k^2 - n^2/k^2 =
+ * = n/k - n/k^2 = a * (1-1/k)
+ * Probability of fixed box being zero: Pz = ((k-1)/k)^n = (1-1/k)^n = (1-1/k)^(ak) =
+ * = ((1-1/k)^k)^a which we can approximate by e^-a.
+ */
+
+unsigned int hf(unsigned int n)
+{
+#if 0
+ n = (n ^ (n >> 16)) & 0xffff;
+ n = (n ^ (n << 8)) & 0xffff;
+#elif 1
+ n = (n >> 16) ^ n;
+ n = (n ^ (n << 10)) & 0xffff;
+#elif 0
+ n = (n >> 16) ^ n;
+ n *= 259309;
+#elif 0
+ n ^= (n >> 20);
+ n ^= (n >> 10);
+ n ^= (n >> 5);
+#elif 0
+ n = (n * 259309) + ((n >> 16) * 123479);
+#else
+ return random();
+#endif
+ return n;
+}
+
+int
+main(int argc, char **argv)
+{
+ int cnt=0;
+ int i;
+
+ int bits = atol(argv[1]);
+ int z = 1 << bits;
+ int max = atol(argv[2]);
+
+ while (max--)
+ {
+ unsigned int i, e;
+ if (scanf("%x/%d", &i, &e) != 2)
+ if (feof(stdin))
+ break;
+ else
+ fprintf(stderr, "BUGGG\n");
+// i >>= (32-e);
+// i |= (i >> e);
+ cnt++;
+ h[(hf(i) >> 1*(16 - bits)) & (z-1)]++;
+ }
+// printf(">>> %d addresses\n", cnt);
+#if 0
+ for(i=0; i<z; i++)
+ printf("%d\t%d\n", i, h[i]);
+#else
+{
+ int min=cnt, max=0, zer=0;
+ double delta=0;
+ double avg = (double) cnt / z;
+ double exdelta = avg*(1-1/z);
+ double exzer = exp(-avg);
+ for(i=0; i<z; i++) {
+ if (h[i] < min) min=h[i];
+ if (h[i] > max) max=h[i];
+ delta += (h[i] - avg) * (h[i] - avg);
+ if (!h[i]) zer++;
+ }
+ printf("size=%5d, min=%d, max=%2d, delta=%-7.6g (%-7.6g), avg=%-5.3g, zero=%g%% (%g%%)\n", z, min, max, delta/z, exdelta, avg, zer/(double)z*100, exzer*100);
+}
+#endif
+
+ return 0;
+}
diff --git a/misc/stats b/misc/stats
new file mode 100755
index 00000000..57920ce4
--- /dev/null
+++ b/misc/stats
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+make ips
+echo "Global tables:"
+for a in 4 5 6 7 8 9 10 11 12 13 14 15 ; do
+ ./ips <global $a $((1<<($a+2)))
+ done
+echo "Local tables:"
+./ips <local 6 1000