diff options
Diffstat (limited to 'nest')
-rw-r--r-- | nest/Makefile | 13 | ||||
-rw-r--r-- | nest/a-path.c | 905 | ||||
-rw-r--r-- | nest/a-path_test.c | 221 | ||||
-rw-r--r-- | nest/a-set.c | 565 | ||||
-rw-r--r-- | nest/a-set_test.c | 263 | ||||
-rw-r--r-- | nest/attrs.h | 224 | ||||
-rw-r--r-- | nest/bird.h | 1 | ||||
-rw-r--r-- | nest/cli.c | 8 | ||||
-rw-r--r-- | nest/cli.h | 4 | ||||
-rw-r--r-- | nest/cmds.c | 69 | ||||
-rw-r--r-- | nest/config.Y | 102 | ||||
-rw-r--r-- | nest/iface.c | 2 | ||||
-rw-r--r-- | nest/neighbor.c | 2 | ||||
-rw-r--r-- | nest/proto.c | 548 | ||||
-rw-r--r-- | nest/protocol.h | 69 | ||||
-rw-r--r-- | nest/route.h | 931 | ||||
-rw-r--r-- | nest/rt-attr.c | 1020 | ||||
-rw-r--r-- | nest/rt-dev.c | 25 | ||||
-rw-r--r-- | nest/rt-fib.c | 6 | ||||
-rw-r--r-- | nest/rt-show.c | 372 | ||||
-rw-r--r-- | nest/rt-table.c | 2220 | ||||
-rw-r--r-- | nest/rt.h | 589 |
22 files changed, 3294 insertions, 4865 deletions
diff --git a/nest/Makefile b/nest/Makefile index 884d3950..39617350 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,8 +1,17 @@ -src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +src := cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,dev_build) -tests_src := a-set_test.c a-path_test.c +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) + $(E)echo GEN $@ + $(Q)echo "#include \"lib/birdlib.h\"" > $@ + $(Q)$(patsubst %,echo 'void %(void);' >> $@;,$(PROTO_BUILD)) + $(Q)echo "void protos_build_gen(void) {" >> $@ + $(Q)$(patsubst %,echo ' %();'>>$@;,$(PROTO_BUILD)) + $(Q)echo "}" >> $@ + +tests_src := tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/nest/a-path.c deleted file mode 100644 index 2e34a3d1..00000000 --- a/nest/a-path.c +++ /dev/null @@ -1,905 +0,0 @@ -/* - * BIRD -- Path Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * (c) 2000 Pavel Machek <pavel@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "lib/unaligned.h" -#include "lib/string.h" -#include "filter/data.h" - -// static inline void put_as(byte *data, u32 as) { put_u32(data, as); } -// static inline u32 get_as(byte *data) { return get_u32(data); } - -#define put_as put_u32 -#define get_as get_u32 -#define BS 4 /* Default block size of ASN (autonomous system number) */ - -#define BAD(DSC, VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; }) - -int -as_path_valid(byte *data, uint len, int bs, int sets, int confed, char *err, uint elen) -{ - byte *pos = data; - char *err_dsc = NULL; - uint err_val = 0; - - while (len) - { - if (len < 2) - BAD("segment framing error", 0); - - /* Process one AS path segment */ - uint type = pos[0]; - uint slen = 2 + bs * pos[1]; - - if (len < slen) - BAD("segment framing error", len); - - switch (type) - { - case AS_PATH_SET: - if (!sets) - BAD("AS_SET segment", type); - break; - - case AS_PATH_SEQUENCE: - break; - - case AS_PATH_CONFED_SEQUENCE: - if (!confed) - BAD("AS_CONFED_SEQUENCE segment", type); - break; - - case AS_PATH_CONFED_SET: - if (!sets || !confed) - BAD("AS_CONFED_SET segment", type); - break; - - default: - BAD("unknown segment", type); - } - - if (pos[1] == 0) - BAD("zero-length segment", type); - - pos += slen; - len -= slen; - } - - return 1; - -bad: - if (err) - if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0) - err[0] = 0; - - return 0; -} - -int -as_path_16to32(byte *dst, const byte *src, uint len) -{ - byte *dst0 = dst; - const byte *end = src + len; - uint i, n; - - while (src < end) - { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; - - for (i = 0; i < n; i++) - { - put_u32(dst, get_u16(src)); - src += 2; - dst += 4; - } - } - - return dst - dst0; -} - -int -as_path_32to16(byte *dst, const byte *src, uint len) -{ - byte *dst0 = dst; - const byte *end = src + len; - uint i, n; - - while (src < end) - { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; - - for (i = 0; i < n; i++) - { - put_u16(dst, get_u32(src)); - src += 4; - dst += 2; - } - } - - return dst - dst0; -} - -int -as_path_contains_as4(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - uint i, n; - - while (pos < end) - { - n = pos[1]; - pos += 2; - - for (i = 0; i < n; i++) - { - if (get_as(pos) > 0xFFFF) - return 1; - - pos += BS; - } - } - - return 0; -} - -int -as_path_contains_confed(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint type = pos[0]; - uint slen = 2 + BS * pos[1]; - - if ((type == AS_PATH_CONFED_SEQUENCE) || - (type == AS_PATH_CONFED_SET)) - return 1; - - pos += slen; - } - - return 0; -} - -struct adata * -as_path_strip_confed(struct linpool *pool, const struct adata *path) -{ - struct adata *res = lp_alloc_adata(pool, path->length); - const byte *src = path->data; - const byte *end = src + path->length; - byte *dst = res->data; - - while (src < end) - { - uint type = src[0]; - uint slen = 2 + BS * src[1]; - - /* Copy regular segments */ - if ((type == AS_PATH_SET) || (type == AS_PATH_SEQUENCE)) - { - memcpy(dst, src, slen); - dst += slen; - } - - src += slen; - } - - /* Fix the result length */ - res->length = dst - res->data; - - return res; -} - -struct adata * -as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as) -{ - struct adata *np; - const byte *pos = op->data; - uint len = op->length; - - if (len && (pos[0] == seq) && (pos[1] < 255)) - { - /* Starting with matching segment => just prepend the AS number */ - np = lp_alloc_adata(pool, len + BS); - np->data[0] = seq; - np->data[1] = pos[1] + 1; - put_as(np->data + 2, as); - - uint dlen = BS * pos[1]; - memcpy(np->data + 2 + BS, pos + 2, dlen); - ADVANCE(pos, len, 2 + dlen); - } - else - { - /* Create a new path segment */ - np = lp_alloc_adata(pool, len + 2 + BS); - np->data[0] = seq; - np->data[1] = 1; - put_as(np->data + 2, as); - } - - if (len) - { - byte *dst = np->data + 2 + BS * np->data[1]; - - memcpy(dst, pos, len); - } - - return np; -} - - -struct adata * -as_path_to_old(struct linpool *pool, const struct adata *path) -{ - struct adata *res = lp_alloc_adata(pool, path->length); - byte *pos = res->data; - byte *end = pos + res->length; - uint i, n; - u32 as; - - /* Copy the whole path */ - memcpy(res->data, path->data, path->length); - - /* Replace 32-bit AS numbers with AS_TRANS */ - while (pos < end) - { - n = pos[1]; - pos += 2; - - for (i = 0; i < n; i++) - { - as = get_as(pos); - if (as > 0xFFFF) - put_as(pos, AS_TRANS); - - pos += BS; - } - } - - return res; -} - -/* - * Cut the path to the length @num, measured to the usual path metric. Note that - * AS_CONFED_* segments have zero length and must be added if they are on edge. - */ -struct adata * -as_path_cut(struct linpool *pool, const struct adata *path, uint num) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint t = pos[0]; - uint l = pos[1]; - uint n = 0; - - switch (t) - { - case AS_PATH_SET: n = 1; break; - case AS_PATH_SEQUENCE: n = l; break; - case AS_PATH_CONFED_SEQUENCE: n = 0; break; - case AS_PATH_CONFED_SET: n = 0; break; - default: bug("as_path_cut: Invalid path segment"); - } - - /* Cannot add whole segment, so try partial one and finish */ - if (num < n) - { - const byte *nend = pos; - if (num) - nend += 2 + BS * num; - - struct adata *res = lp_alloc_adata(pool, path->length); - res->length = nend - (const byte *) path->data; - memcpy(res->data, path->data, res->length); - - if (num) - { - byte *dpos = ((byte *) res->data) + (pos - (const byte *) path->data); - dpos[1] = num; - } - - return res; - } - - num -= n; - pos += 2 + BS * l; - } - - struct adata *res = lp_alloc_adata(pool, path->length); - res->length = path->length; - memcpy(res->data, path->data, res->length); - return res; -} - -/* - * Merge (concatenate) paths @p1 and @p2 and return the result. - * In contrast to other as_path_* functions, @p1 and @p2 may be reused. - */ -const struct adata * -as_path_merge(struct linpool *pool, const struct adata *p1, const struct adata *p2) -{ - if (p1->length == 0) - return p2; - - if (p2->length == 0) - return p1; - - struct adata *res = lp_alloc_adata(pool, p1->length + p2->length); - memcpy(res->data, p1->data, p1->length); - memcpy(res->data + p1->length, p2->data, p2->length); - - return res; -} - -void -as_path_format(const struct adata *path, byte *bb, uint size) -{ - buffer buf = { .start = bb, .pos = bb, .end = bb + size }, *b = &buf; - const byte *pos = path->data; - const byte *end = pos + path->length; - const char *ops, *cls; - - b->pos[0] = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: ops = "{"; cls = "}"; break; - case AS_PATH_SEQUENCE: ops = NULL; cls = NULL; break; - case AS_PATH_CONFED_SEQUENCE: ops = "("; cls = ")"; break; - case AS_PATH_CONFED_SET: ops = "({"; cls = "})"; break; - default: bug("Invalid path segment"); - } - - if (ops) - buffer_puts(b, ops); - - while (len--) - { - buffer_print(b, len ? "%u " : "%u", get_as(pos)); - pos += BS; - } - - if (cls) - buffer_puts(b, cls); - - if (pos < end) - buffer_puts(b, " "); - } - - /* Handle overflow */ - if (b->pos == b->end) - strcpy(b->end - 12, "..."); -} - -int -as_path_getlen(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - uint res = 0; - - while (pos < end) - { - uint t = pos[0]; - uint l = pos[1]; - uint n = 0; - - switch (t) - { - case AS_PATH_SET: n = 1; break; - case AS_PATH_SEQUENCE: n = l; break; - case AS_PATH_CONFED_SEQUENCE: n = 0; break; - case AS_PATH_CONFED_SET: n = 0; break; - default: bug("as_path_getlen: Invalid path segment"); - } - - res += n; - pos += 2 + BS * l; - } - - return res; -} - -int -as_path_get_last(const struct adata *path, u32 *orig_as) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - int found = 0; - u32 val = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - if (!len) - continue; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - found = 0; - break; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - val = get_as(pos + BS * (len - 1)); - found = 1; - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - if (found) - *orig_as = val; - return found; -} - -u32 -as_path_get_last_nonaggregated(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - u32 val = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - if (!len) - continue; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - return val; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - val = get_as(pos + BS * (len - 1)); - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - return val; -} - -int -as_path_get_first(const struct adata *path, u32 *last_as) -{ - const u8 *p = path->data; - - if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) - return 0; - - *last_as = get_as(p+2); - return 1; -} - -int -as_path_get_first_regular(const struct adata *path, u32 *last_as) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: - return 0; - - case AS_PATH_SEQUENCE: - if (len == 0) - return 0; - - *last_as = get_as(pos); - return 1; - - case AS_PATH_CONFED_SEQUENCE: - case AS_PATH_CONFED_SET: - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - return 0; -} - -int -as_path_contains(const struct adata *path, u32 as, int min) -{ - const u8 *p = path->data; - const u8 *q = p+path->length; - int num = 0; - int i, n; - - while (p<q) - { - n = p[1]; - p += 2; - for(i=0; i<n; i++) - { - if (get_as(p) == as) - if (++num == min) - return 1; - p += BS; - } - } - return 0; -} - -int -as_path_match_set(const struct adata *path, const struct f_tree *set) -{ - const u8 *p = path->data; - const u8 *q = p+path->length; - int i, n; - - while (p<q) - { - n = p[1]; - p += 2; - for (i=0; i<n; i++) - { - struct f_val v = {T_INT, .val.i = get_as(p)}; - if (find_tree(set, &v)) - return 1; - p += BS; - } - } - - return 0; -} - -const struct adata * -as_path_filter(struct linpool *pool, const struct adata *path, const struct f_tree *set, u32 key, int pos) -{ - if (!path) - return NULL; - - int len = path->length; - const u8 *p = path->data; - const u8 *q = path->data + len; - u8 *d, *d2; - int i, bt, sn, dn; - u8 buf[len]; - - d = buf; - while (p<q) - { - /* Read block header (type and length) */ - bt = p[0]; - sn = p[1]; - dn = 0; - p += 2; - d2 = d + 2; - - for (i = 0; i < sn; i++) - { - u32 as = get_as(p); - int match; - - if (set) - { - struct f_val v = {T_INT, .val.i = as}; - match = !!find_tree(set, &v); - } - else - match = (as == key); - - if (match == pos) - { - put_as(d2, as); - d2 += BS; - dn++; - } - - p += BS; - } - - if (dn > 0) - { - /* Nonempty block, set block header and advance */ - d[0] = bt; - d[1] = dn; - d = d2; - } - } - - uint nl = d - buf; - if (nl == path->length) - return path; - - struct adata *res = lp_alloc(pool, sizeof(struct adata) + nl); - res->length = nl; - memcpy(res->data, buf, nl); - - return res; -} - - -struct pm_pos -{ - u8 set; - u8 mark; - union - { - const char *sp; - u32 asn; - } val; -}; - -static int -parse_path(const struct adata *path, struct pm_pos *pp) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - struct pm_pos *op = pp; - uint i; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - pp->set = 1; - pp->mark = 0; - pp->val.sp = pos - 1; - pp++; - - pos += BS * len; - break; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - for (i = 0; i < len; i++) - { - pp->set = 0; - pp->mark = 0; - pp->val.asn = get_as(pos); - pp++; - - pos += BS; - } - break; - - default: - bug("Invalid path segment"); - } - } - - return pp - op; -} - -static int -pm_match_val(const struct pm_pos *pos, u32 asn, u32 asn2) -{ - u32 gas; - if (! pos->set) - return ((pos->val.asn >= asn) && (pos->val.asn <= asn2)); - - const u8 *p = pos->val.sp; - int len = *p++; - int i; - - for (i = 0; i < len; i++) - { - gas = get_as(p + i * BS); - - if ((gas >= asn) && (gas <= asn2)) - return 1; - } - - return 0; -} - -static int -pm_match_set(const struct pm_pos *pos, const struct f_tree *set) -{ - struct f_val asn = { .type = T_INT }; - - if (! pos->set) - { - asn.val.i = pos->val.asn; - return !!find_tree(set, &asn); - } - - const u8 *p = pos->val.sp; - int len = *p++; - int i; - - for (i = 0; i < len; i++) - { - asn.val.i = get_as(p + i * BS); - if (find_tree(set, &asn)) - return 1; - } - - return 0; -} - -static inline int -pm_match(const struct pm_pos *pos, const struct f_path_mask_item *mask, u32 asn, u32 asn2) -{ - return ((mask->kind == PM_QUESTION) || - ((mask->kind != PM_ASN_SET) ? - pm_match_val(pos, asn, asn2) : - pm_match_set(pos, mask->set))); -} - -static void -pm_mark(struct pm_pos *pos, int *i, int plen, int *nl, int *nh) -{ - int j = *i; - - if (pos[j].set) - do { pos[j].mark = 1; j++; } - while ((j < plen) && pos[j].set); - else - j++; - - pos[j].mark = 1; - - /* Update low, high based on first and last marked pos */ - int l = pos[*i].set ? *i : j; - - *nl = (*nl < 0) ? l : MIN(*nl, l); - *nh = MAX(*nh, j); - *i = j; -} - -/* AS path matching is nontrivial. Because AS path can - * contain sets, it is not a plain wildcard matching. A set - * in an AS path is interpreted as it might represent any - * sequence of AS numbers from that set (possibly with - * repetitions). So it is also a kind of a pattern, - * more complicated than a path mask. - * - * The algorithm for AS path matching is a variant - * of nondeterministic finite state machine, where - * positions in AS path are states, and items in - * path mask are input for that finite state machine. - * During execution of the algorithm we maintain a set - * of marked states - a state is marked if it can be - * reached by any walk through NFSM with regard to - * currently processed part of input. When we process - * next part of mask, we advance each marked state. - * We start with marked first position, when we - * run out of marked positions, we reject. When - * we process the whole mask, we accept if final position - * (auxiliary position after last real position in AS path) - * is marked. - */ -int -as_path_match(const struct adata *path, const struct f_path_mask *mask) -{ - struct pm_pos pos[2048 + 1]; - int plen = parse_path(path, pos); - int l, h, i, nh, nl, last, loop; - u32 val = 0; - u32 val2 = 0; - - /* l and h are bound of interval of positions where - are marked states */ - - pos[plen].set = 0; - pos[plen].mark = 0; - - l = h = loop = 0; - pos[0].mark = 1; - - for (uint m=0; m < mask->len; m++) - { - /* We remove this mark to not step after pos[plen] */ - pos[plen].mark = 0; - - switch (mask->item[m].kind) - { - case PM_ASTERISK: - for (i = l; i <= plen; i++) - pos[i].mark = 1; - h = plen; - break; - - case PM_LOOP: - loop = 1; - break; - - case PM_ASN: /* Define single ASN as ASN..ASN - very narrow interval */ - val2 = val = mask->item[m].asn; - goto step; - case PM_ASN_EXPR: - bug("Expressions should be evaluated on AS path mask construction."); - case PM_ASN_RANGE: - val = mask->item[m].from; - val2 = mask->item[m].to; - goto step; - case PM_QUESTION: - case PM_ASN_SET: - step: - nh = nl = -1; - last = plen; - for (i = h; i >= l; i--) - if (pos[i].mark) - { - pos[i].mark = 0; - int j = i; - - match: - if (pm_match(pos + j, &mask->item[m], val, val2)) - { - pm_mark(pos, &j, plen, &nl, &nh); - if (loop && (j < last)) - goto match; - } - - last = i; - } - - if (nh < 0) - return 0; - - h = nh; - l = nl; - loop = 0; - break; - } - } - - return pos[plen].mark; -} diff --git a/nest/a-path_test.c b/nest/a-path_test.c deleted file mode 100644 index 2e6e4956..00000000 --- a/nest/a-path_test.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * BIRD -- Path Operations Tests - * - * (c) 2015 CZ.NIC z.s.p.o. - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "test/birdtest.h" -#include "test/bt-utils.h" - -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" - -#define TESTS_NUM 30 -#define AS_PATH_LENGTH 1000 - -#if AS_PATH_LENGTH > AS_PATH_MAXLEN -#warning "AS_PATH_LENGTH should be <= AS_PATH_MAXLEN" -#endif - -static int -t_as_path_match(void) -{ - resource_init(); - - int round; - for (round = 0; round < TESTS_NUM; round++) - { - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - u32 first_prepended, last_prepended; - first_prepended = last_prepended = 0; - struct linpool *lp = lp_new_default(&root_pool); - - struct f_path_mask *mask = alloca(sizeof(struct f_path_mask) + AS_PATH_LENGTH * sizeof(struct f_path_mask_item)); - mask->len = AS_PATH_LENGTH; - for (int i = AS_PATH_LENGTH - 1; i >= 0; i--) - { - u32 val = bt_random(); - as_path = as_path_prepend(lp, as_path, val); - bt_debug("Prepending ASN: %10u \n", val); - - if (i == 0) - last_prepended = val; - if (i == AS_PATH_LENGTH-1) - first_prepended = val; - - mask->item[i].kind = PM_ASN; - mask->item[i].asn = val; - } - - bt_assert_msg(as_path_match(as_path, mask), "Mask should match with AS path"); - - u32 asn; - - bt_assert(as_path_get_first(as_path, &asn)); - bt_assert_msg(asn == last_prepended, "as_path_get_first() should return the last prepended ASN"); - - bt_assert(as_path_get_last(as_path, &asn)); - bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); - - rfree(lp); - } - - return 1; -} - -static int -t_path_format(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); - - uint i; - for (i = 4294967285; i <= 4294967294; i++) - { - as_path = as_path_prepend(lp, as_path, i); - bt_debug("Prepending ASN: %10u \n", i); - } - -#define BUFFER_SIZE 120 - byte buf[BUFFER_SIZE] = {}; - - as_path_format(&empty_as_path, buf, BUFFER_SIZE); - bt_assert_msg(strcmp(buf, "") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); - - as_path_format(as_path, buf, BUFFER_SIZE); - bt_assert_msg(strcmp(buf, "4294967294 4294967293 4294967292 4294967291 4294967290 4294967289 4294967288 4294967287 4294967286 4294967285") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); - -#define SMALL_BUFFER_SIZE 25 - byte buf2[SMALL_BUFFER_SIZE] = {}; - as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); - bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); - - rfree(lp); - - return 1; -} - -static int -count_asn_in_array(const u32 *array, u32 asn) -{ - int counts_of_contains = 0; - int u; - for (u = 0; u < AS_PATH_LENGTH; u++) - if (array[u] == asn) - counts_of_contains++; - return counts_of_contains; -} - -static int -t_path_include(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); - - u32 as_nums[AS_PATH_LENGTH] = {}; - int i; - for (i = 0; i < AS_PATH_LENGTH; i++) - { - u32 val = bt_random(); - as_nums[i] = val; - as_path = as_path_prepend(lp, as_path, val); - } - - for (i = 0; i < AS_PATH_LENGTH; i++) - { - int counts_of_contains = count_asn_in_array(as_nums, as_nums[i]); - bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); - - bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 0) != NULL); - bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 1) != NULL); - } - - for (i = 0; i < 10000; i++) - { - u32 test_val = bt_random(); - int counts_of_contains = count_asn_in_array(as_nums, test_val); - int result = as_path_contains(as_path, test_val, (counts_of_contains == 0 ? 1 : counts_of_contains)); - - if (counts_of_contains) - bt_assert_msg(result, "As path should contain %d-times the number %u", counts_of_contains, test_val); - else - bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); - } - - rfree(lp); - - return 1; -} - -#if 0 -static int -t_as_path_converting(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); -#define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 - - int i; - for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) - as_path = as_path_prepend(lp, as_path, i); - - bt_debug("data length: %u \n", as_path->length); - - byte buffer[100] = {}; - int used_size = as_path_convert_to_new(as_path, buffer, AS_PATH_LENGTH_FOR_CONVERTING_TEST-1); - bt_debug("as_path_convert_to_new: len %d \n%s\n", used_size, buffer); - for (i = 0; i < used_size; i++) - { - bt_debug("\\03%d", buffer[i]); - } - bt_debug("\n"); - bt_assert(memcmp(buffer, - "\032\039\030\030\030\030\030\030\030\039\030\030\030\030\030\030\030\038\030\030\030\030\030\030" - "\030\037\030\030\030\030\030\030\030\036\030\030\030\030", - 38)); - - bzero(buffer, sizeof(buffer)); - int new_used; - used_size = as_path_convert_to_old(as_path, buffer, &new_used); - bt_debug("as_path_convert_to_old: len %d, new_used: %d \n", used_size, new_used); - for (i = 0; i < used_size; i++) - { - bt_debug("\\03%d", buffer[i]); - } - bt_debug("\n"); - bt_assert(memcmp(buffer, - "\032\0310\030\039\030\038\030\037\030\036\030\035\030\034\030\033\030\032\030\031\030\030", - 22)); - - return 1; -} -#endif - -void resource_sys_init(void); - -int -main(int argc, char *argv[]) -{ - bt_init(argc, argv); - resource_sys_init(); - - bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); - bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); - bt_test_suite(t_path_include, "Testing including a AS number in AS path"); - // bt_test_suite(t_as_path_converting, "Testing as_path_convert_to_*() output constancy"); - - return bt_exit_value(); -} diff --git a/nest/a-set.c b/nest/a-set.c deleted file mode 100644 index 1186eb56..00000000 --- a/nest/a-set.c +++ /dev/null @@ -1,565 +0,0 @@ -/* - * BIRD -- Set/Community-list Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * (c) 2000 Pavel Machek <pavel@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include <stdlib.h> - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "lib/string.h" - -/** - * int_set_format - format an &set for printing - * @set: set attribute to be formatted - * @way: style of format (0 for router ID list, 1 for community list) - * @from: starting position in set - * @buf: destination buffer - * @size: size of buffer - * - * This function takes a set attribute and formats it. @way specifies - * the style of format (router ID / community). @from argument can be - * used to specify the first printed value for the purpose of printing - * untruncated sets even with smaller buffers. If the output fits in - * the buffer, 0 is returned, otherwise the position of the first not - * printed item is returned. This value can be used as @from argument - * in subsequent calls. If truncated output suffices, -1 can be - * instead used as @from, in that case " ..." is eventually added at - * the buffer to indicate truncation. - */ -int -int_set_format(const struct adata *set, int way, int from, byte *buf, uint size) -{ - u32 *z = (u32 *) set->data; - byte *end = buf + size - 24; - int from2 = MAX(from, 0); - int to = set->length / 4; - int i; - - for (i = from2; i < to; i++) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, " ..."); - else - *buf = 0; - return i; - } - - if (i > from2) - *buf++ = ' '; - - if (way) - buf += bsprintf(buf, "(%d,%d)", z[i] >> 16, z[i] & 0xffff); - else - buf += bsprintf(buf, "%R", z[i]); - } - *buf = 0; - return 0; -} - -int -ec_format(byte *buf, u64 ec) -{ - u32 type, key, val; - char tbuf[16]; - const char *kind; - - type = ec >> 48; - kind = ec_subtype_str(type & 0xf0ff); - - if (!kind) { - bsprintf(tbuf, "unknown 0x%x", type); - kind = tbuf; - } - - switch (ec >> 56) - { - /* RFC 4360 3.1. Two-Octet AS Specific Extended Community */ - case 0x00: - case 0x40: - key = (ec >> 32) & 0xFFFF; - val = ec; - return bsprintf(buf, "(%s, %u, %u)", kind, key, val); - - /* RFC 4360 3.2. IPv4 Address Specific Extended Community */ - case 0x01: - case 0x41: - key = ec >> 16; - val = ec & 0xFFFF; - return bsprintf(buf, "(%s, %R, %u)", kind, key, val); - - /* RFC 5668 4-Octet AS Specific BGP Extended Community */ - case 0x02: - case 0x42: - key = ec >> 16; - val = ec & 0xFFFF; - return bsprintf(buf, "(%s, %u, %u)", kind, key, val); - - /* Generic format for unknown kinds of extended communities */ - default: - key = ec >> 32; - val = ec; - return bsprintf(buf, "(generic, 0x%x, 0x%x)", key, val); - } - -} - -int -ec_set_format(const struct adata *set, int from, byte *buf, uint size) -{ - u32 *z = int_set_get_data(set); - byte *end = buf + size - 64; - int from2 = MAX(from, 0); - int to = int_set_get_size(set); - int i; - - for (i = from2; i < to; i += 2) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, " ..."); - else - *buf = 0; - return i; - } - - if (i > from2) - *buf++ = ' '; - - buf += ec_format(buf, ec_get(z, i)); - } - *buf = 0; - return 0; -} - -int -lc_format(byte *buf, lcomm lc) -{ - return bsprintf(buf, "(%u, %u, %u)", lc.asn, lc.ldp1, lc.ldp2); -} - -int -lc_set_format(const struct adata *set, int from, byte *buf, uint bufsize) -{ - u32 *d = (u32 *) set->data; - byte *end = buf + bufsize - 64; - int from2 = MAX(from, 0); - int to = set->length / 4; - int i; - - for (i = from2; i < to; i += 3) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, "..."); - else - buf[-1] = 0; - return i; - } - - buf += bsprintf(buf, "(%u, %u, %u)", d[i], d[i+1], d[i+2]); - *buf++ = ' '; - } - - if (i != from2) - buf--; - - *buf = 0; - return 0; -} - -int -int_set_contains(const struct adata *list, u32 val) -{ - if (!list) - return 0; - - u32 *l = (u32 *) list->data; - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i++) - if (*l++ == val) - return 1; - - return 0; -} - -int -ec_set_contains(const struct adata *list, u64 val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - u32 eh = ec_hi(val); - u32 el = ec_lo(val); - int i; - - for (i=0; i < len; i += 2) - if (l[i] == eh && l[i+1] == el) - return 1; - - return 0; -} - -int -lc_set_contains(const struct adata *list, lcomm val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i += 3) - if (lc_match(l, i, val)) - return 1; - - return 0; -} - -const struct adata * -int_set_prepend(struct linpool *pool, const struct adata *list, u32 val) -{ - struct adata *res; - int len; - - if (int_set_contains(list, val)) - return list; - - len = list ? list->length : 0; - res = lp_alloc(pool, sizeof(struct adata) + len + 4); - res->length = len + 4; - - if (list) - memcpy(res->data + 4, list->data, list->length); - - * (u32 *) res->data = val; - - return res; -} - -const struct adata * -int_set_add(struct linpool *pool, const struct adata *list, u32 val) -{ - struct adata *res; - int len; - - if (int_set_contains(list, val)) - return list; - - len = list ? list->length : 0; - res = lp_alloc(pool, sizeof(struct adata) + len + 4); - res->length = len + 4; - - if (list) - memcpy(res->data, list->data, list->length); - - * (u32 *) (res->data + len) = val; - - return res; -} - -const struct adata * -ec_set_add(struct linpool *pool, const struct adata *list, u64 val) -{ - if (ec_set_contains(list, val)) - return list; - - int olen = list ? list->length : 0; - struct adata *res = lp_alloc(pool, sizeof(struct adata) + olen + 8); - res->length = olen + 8; - - if (list) - memcpy(res->data, list->data, list->length); - - u32 *l = (u32 *) (res->data + olen); - l[0] = ec_hi(val); - l[1] = ec_lo(val); - - return res; -} - -const struct adata * -lc_set_add(struct linpool *pool, const struct adata *list, lcomm val) -{ - if (lc_set_contains(list, val)) - return list; - - int olen = list ? list->length : 0; - struct adata *res = lp_alloc(pool, sizeof(struct adata) + olen + LCOMM_LENGTH); - res->length = olen + LCOMM_LENGTH; - - if (list) - memcpy(res->data, list->data, list->length); - - lc_put((u32 *) (res->data + olen), val); - - return res; -} - -const struct adata * -int_set_del(struct linpool *pool, const struct adata *list, u32 val) -{ - if (!int_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - 4); - res->length = list->length - 4; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i++) - if (l[i] != val) - *k++ = l[i]; - - return res; -} - -const struct adata * -ec_set_del(struct linpool *pool, const struct adata *list, u64 val) -{ - if (!ec_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - 8); - res->length = list->length - 8; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - u32 eh = ec_hi(val); - u32 el = ec_lo(val); - int i; - - for (i=0; i < len; i += 2) - if (! (l[i] == eh && l[i+1] == el)) - { - *k++ = l[i]; - *k++ = l[i+1]; - } - - return res; -} - -const struct adata * -lc_set_del(struct linpool *pool, const struct adata *list, lcomm val) -{ - if (!lc_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - LCOMM_LENGTH); - res->length = list->length - LCOMM_LENGTH; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - int i; - - for (i=0; i < len; i += 3) - if (! lc_match(l, i, val)) - k = lc_copy(k, l+i); - - return res; -} - -const struct adata * -int_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i++) - if (!int_set_contains(l1, l[i])) - *k++ = l[i]; - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - -const struct adata * -ec_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i += 2) - if (!ec_set_contains(l1, ec_get(l, i))) - { - *k++ = l[i]; - *k++ = l[i+1]; - } - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - -const struct adata * -lc_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i += 3) - if (!lc_set_contains(l1, lc_get(l, i))) - k = lc_copy(k, l+i); - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - - -struct adata * -ec_set_del_nontrans(struct linpool *pool, const struct adata *set) -{ - adata *res = lp_alloc_adata(pool, set->length); - u32 *src = int_set_get_data(set); - u32 *dst = int_set_get_data(res); - int len = int_set_get_size(set); - int i; - - /* Remove non-transitive communities (EC_TBIT set) */ - for (i = 0; i < len; i += 2) - { - if (src[i] & EC_TBIT) - continue; - - *dst++ = src[i]; - *dst++ = src[i+1]; - } - - res->length = ((byte *) dst) - res->data; - - return res; -} - -static int -int_set_cmp(const void *X, const void *Y) -{ - const u32 *x = X, *y = Y; - return (*x < *y) ? -1 : (*x > *y) ? 1 : 0; -} - -struct adata * -int_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / 4, 4, int_set_cmp); - return dst; -} - - -static int -ec_set_cmp(const void *X, const void *Y) -{ - u64 x = ec_get(X, 0); - u64 y = ec_get(Y, 0); - return (x < y) ? -1 : (x > y) ? 1 : 0; -} - -struct adata * -ec_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / 8, 8, ec_set_cmp); - return dst; -} - -void -ec_set_sort_x(struct adata *set) -{ - /* Sort in place */ - qsort(set->data, set->length / 8, 8, ec_set_cmp); -} - - -static int -lc_set_cmp(const void *X, const void *Y) -{ - const u32 *x = X, *y = Y; - if (x[0] != y[0]) - return (x[0] > y[0]) ? 1 : -1; - if (x[1] != y[1]) - return (x[1] > y[1]) ? 1 : -1; - if (x[2] != y[2]) - return (x[2] > y[2]) ? 1 : -1; - return 0; -} - -struct adata * -lc_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); - return dst; -} diff --git a/nest/a-set_test.c b/nest/a-set_test.c deleted file mode 100644 index efd1b67d..00000000 --- a/nest/a-set_test.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * BIRD -- Set/Community-list Operations Tests - * - * (c) 2015 CZ.NIC z.s.p.o. - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "test/birdtest.h" -#include "test/bt-utils.h" - -#include "lib/net.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" - -#define SET_SIZE 10 -static const struct adata *set_sequence; /* <0; SET_SIZE) */ -static const struct adata *set_sequence_same; /* <0; SET_SIZE) */ -static const struct adata *set_sequence_higher; /* <SET_SIZE; 2*SET_SIZE) */ -static const struct adata *set_random; - -#define BUFFER_SIZE 1000 -static byte buf[BUFFER_SIZE] = {}; - -#define SET_SIZE_FOR_FORMAT_OUTPUT 10 - -struct linpool *lp; - -enum set_type -{ - SET_TYPE_INT, - SET_TYPE_EC -}; - -static void -generate_set_sequence(enum set_type type, int len) -{ - struct adata empty_as_path = {}; - set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); - - int i; - for (i = 0; i < len; i++) - { - if (type == SET_TYPE_INT) - { - set_sequence = int_set_add(lp, set_sequence, i); - set_sequence_same = int_set_add(lp, set_sequence_same, i); - set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = int_set_add(lp, set_random, bt_random()); - } - else if (type == SET_TYPE_EC) - { - set_sequence = ec_set_add(lp, set_sequence, i); - set_sequence_same = ec_set_add(lp, set_sequence_same, i); - set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); - } - else - bt_abort_msg("This should be unreachable"); - } -} - -/* - * SET INT TESTS - */ - -static int -t_set_int_contains(void) -{ - int i; - - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - bt_assert(int_set_get_size(set_sequence) == SET_SIZE); - - for (i = 0; i < SET_SIZE; i++) - bt_assert(int_set_contains(set_sequence, i)); - bt_assert(int_set_contains(set_sequence, -1) == 0); - bt_assert(int_set_contains(set_sequence, SET_SIZE) == 0); - - int *data = int_set_get_data(set_sequence); - for (i = 0; i < SET_SIZE; i++) - bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); - - rfree(lp); - return 1; -} - -static int -t_set_int_union(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - const struct adata *set_union; - set_union = int_set_union(lp, set_sequence, set_sequence_same); - bt_assert(int_set_get_size(set_union) == SET_SIZE); - bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - - set_union = int_set_union(lp, set_sequence, set_sequence_higher); - bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); - bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - - rfree(lp); - return 1; -} - -static int -t_set_int_format(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE_FOR_FORMAT_OUTPUT); - - bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "0.0.0.0 0.0.0.1 0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); - - bzero(buf, BUFFER_SIZE); - bt_assert(int_set_format(set_sequence, 0, 2, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); - - bzero(buf, BUFFER_SIZE); - bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); - - rfree(lp); - return 1; -} - -static int -t_set_int_delete(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - const struct adata *deleting_sequence = set_sequence; - u32 i; - for (i = 0; i < SET_SIZE; i++) - { - deleting_sequence = int_set_del(lp, deleting_sequence, i); - bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), - "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", - int_set_get_size(deleting_sequence), - SET_SIZE-1-i); - } - - bt_assert(int_set_get_size(set_sequence) == SET_SIZE); - - return 1; -} - -/* - * SET EC TESTS - */ - -static int -t_set_ec_contains(void) -{ - u32 i; - - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); - - for (i = 0; i < SET_SIZE; i++) - bt_assert(ec_set_contains(set_sequence, i)); - bt_assert(ec_set_contains(set_sequence, -1) == 0); - bt_assert(ec_set_contains(set_sequence, SET_SIZE) == 0); - -// int *data = ec_set_get_data(set_sequence); -// for (i = 0; i < SET_SIZE; i++) -// bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); - - rfree(lp); - return 1; -} - -static int -t_set_ec_union(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - const struct adata *set_union; - set_union = ec_set_union(lp, set_sequence, set_sequence_same); - bt_assert(ec_set_get_size(set_union) == SET_SIZE); - bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - - set_union = ec_set_union(lp, set_sequence, set_sequence_higher); - bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); - bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - - rfree(lp); - return 1; -} - -static int -t_set_ec_format(void) -{ - resource_init(); - - const struct adata empty_as_path = {}; - set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); - - u64 i = 0; - set_sequence = ec_set_add(lp, set_sequence, i); - for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) - set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); - - bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); - bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, - "ec_set_format() returns '%s'", buf); - - rfree(lp); - return 1; -} - -static int -t_set_ec_delete(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - const struct adata *deleting_sequence = set_sequence; - u32 i; - for (i = 0; i < SET_SIZE; i++) - { - deleting_sequence = ec_set_del(lp, deleting_sequence, i); - bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), - "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", - ec_set_get_size(deleting_sequence), SET_SIZE-1-i); - } - - bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); - - return 1; -} - - -void resource_sys_init(void); - -int -main(int argc, char *argv[]) -{ - bt_init(argc, argv); - resource_sys_init(); - - bt_test_suite(t_set_int_contains, "Testing sets of integers: contains, get_data"); - bt_test_suite(t_set_int_format, "Testing sets of integers: format"); - bt_test_suite(t_set_int_union, "Testing sets of integers: union"); - bt_test_suite(t_set_int_delete, "Testing sets of integers: delete"); - - bt_test_suite(t_set_ec_contains, "Testing sets of Extended Community values: contains, get_data"); - bt_test_suite(t_set_ec_format, "Testing sets of Extended Community values: format"); - bt_test_suite(t_set_ec_union, "Testing sets of Extended Community values: union"); - bt_test_suite(t_set_ec_delete, "Testing sets of Extended Community values: delete"); - - return bt_exit_value(); -} diff --git a/nest/attrs.h b/nest/attrs.h deleted file mode 100644 index 50da817b..00000000 --- a/nest/attrs.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Attribute Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ATTRS_H_ -#define _BIRD_ATTRS_H_ - -#include <stdint.h> -#include "lib/unaligned.h" -#include "nest/route.h" - - -/* a-path.c */ - -#define AS_PATH_SET 1 /* Types of path segments */ -#define AS_PATH_SEQUENCE 2 -#define AS_PATH_CONFED_SEQUENCE 3 -#define AS_PATH_CONFED_SET 4 - -#define AS_PATH_MAXLEN 10000 - -#define AS_TRANS 23456 -/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF - * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details - */ - -struct f_tree; - -int as_path_valid(byte *data, uint len, int bs, int sets, int confed, char *err, uint elen); -int as_path_16to32(byte *dst, const byte *src, uint len); -int as_path_32to16(byte *dst, const byte *src, uint len); -int as_path_contains_as4(const struct adata *path); -int as_path_contains_confed(const struct adata *path); -struct adata *as_path_strip_confed(struct linpool *pool, const struct adata *op); -struct adata *as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as); -struct adata *as_path_to_old(struct linpool *pool, const struct adata *path); -struct adata *as_path_cut(struct linpool *pool, const struct adata *path, uint num); -const struct adata *as_path_merge(struct linpool *pool, const struct adata *p1, const struct adata *p2); -void as_path_format(const struct adata *path, byte *buf, uint size); -int as_path_getlen(const struct adata *path); -int as_path_getlen_int(const struct adata *path, int bs); -int as_path_get_first(const struct adata *path, u32 *orig_as); -int as_path_get_first_regular(const struct adata *path, u32 *last_as); -int as_path_get_last(const struct adata *path, u32 *last_as); -u32 as_path_get_last_nonaggregated(const struct adata *path); -int as_path_contains(const struct adata *path, u32 as, int min); -int as_path_match_set(const struct adata *path, const struct f_tree *set); -const struct adata *as_path_filter(struct linpool *pool, const struct adata *path, const struct f_tree *set, u32 key, int pos); - -static inline struct adata *as_path_prepend(struct linpool *pool, const struct adata *path, u32 as) -{ return as_path_prepend2(pool, path, AS_PATH_SEQUENCE, as); } - - -#define PM_ASN 0 -#define PM_QUESTION 1 -#define PM_ASTERISK 2 -#define PM_ASN_EXPR 3 -#define PM_ASN_RANGE 4 -#define PM_ASN_SET 5 -#define PM_LOOP 6 - -struct f_path_mask_item { - union { - u32 asn; /* PM_ASN */ - const struct f_line *expr; /* PM_ASN_EXPR */ - const struct f_tree *set; /* PM_ASN_SET */ - struct { /* PM_ASN_RANGE */ - u32 from; - u32 to; - }; - }; - int kind; -}; - -struct f_path_mask { - uint len; - struct f_path_mask_item item[0]; -}; - -int as_path_match(const struct adata *path, const struct f_path_mask *mask); - - -/* Counterparts to appropriate as_path_* functions */ - -static inline int -aggregator_16to32(byte *dst, const byte *src) -{ - put_u32(dst, get_u16(src)); - memcpy(dst+4, src+2, 4); - return 8; -} - -static inline int -aggregator_32to16(byte *dst, const byte *src) -{ - put_u16(dst, get_u32(src)); - memcpy(dst+2, src+4, 4); - return 6; -} - -static inline int -aggregator_contains_as4(const struct adata *a) -{ - return get_u32(a->data) > 0xFFFF; -} - -static inline struct adata * -aggregator_to_old(struct linpool *pool, const struct adata *a) -{ - struct adata *d = lp_alloc_adata(pool, 8); - put_u32(d->data, AS_TRANS); - memcpy(d->data + 4, a->data + 4, 4); - return d; -} - - -/* a-set.c */ - - -/* Extended Community subtypes (kinds) */ -enum ec_subtype { - EC_RT = 0x0002, - EC_RO = 0x0003, - EC_GENERIC = 0xFFFF, -}; - -static inline const char *ec_subtype_str(const enum ec_subtype ecs) { - switch (ecs) { - case EC_RT: return "rt"; - case EC_RO: return "ro"; - default: return NULL; - } -} - -/* Transitive bit (for first u32 half of EC) */ -#define EC_TBIT 0x40000000 - -#define ECOMM_LENGTH 8 - -static inline int int_set_get_size(const struct adata *list) -{ return list->length / 4; } - -static inline int ec_set_get_size(const struct adata *list) -{ return list->length / 8; } - -static inline int lc_set_get_size(const struct adata *list) -{ return list->length / 12; } - -static inline u32 *int_set_get_data(const struct adata *list) -{ return (u32 *) list->data; } - -static inline u32 ec_hi(u64 ec) { return ec >> 32; } -static inline u32 ec_lo(u64 ec) { return ec; } -static inline u64 ec_get(const u32 *l, int i) -{ return (((u64) l[i]) << 32) | l[i+1]; } - -/* RFC 4360 3.1. Two-Octet AS Specific Extended Community */ -static inline u64 ec_as2(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0000) << 48) | (key << 32) | val; } - -/* RFC 5668 4-Octet AS Specific BGP Extended Community */ -static inline u64 ec_as4(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0200) << 48) | (key << 16) | val; } - -/* RFC 4360 3.2. IPv4 Address Specific Extended Community */ -static inline u64 ec_ip4(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0100) << 48) | (key << 16) | val; } - -static inline u64 ec_generic(u64 key, u64 val) -{ return (key << 32) | val; } - -/* Large community value */ -typedef struct lcomm { - u32 asn; - u32 ldp1; - u32 ldp2; -} lcomm; - -#define LCOMM_LENGTH 12 - -static inline lcomm lc_get(const u32 *l, int i) -{ return (lcomm) { l[i], l[i+1], l[i+2] }; } - -static inline void lc_put(u32 *l, lcomm v) -{ l[0] = v.asn; l[1] = v.ldp1; l[2] = v.ldp2; } - -static inline int lc_match(const u32 *l, int i, lcomm v) -{ return (l[i] == v.asn && l[i+1] == v.ldp1 && l[i+2] == v.ldp2); } - -static inline u32 *lc_copy(u32 *dst, const u32 *src) -{ memcpy(dst, src, LCOMM_LENGTH); return dst + 3; } - - -int int_set_format(const struct adata *set, int way, int from, byte *buf, uint size); -int ec_format(byte *buf, u64 ec); -int ec_set_format(const struct adata *set, int from, byte *buf, uint size); -int lc_format(byte *buf, lcomm lc); -int lc_set_format(const struct adata *set, int from, byte *buf, uint size); -int int_set_contains(const struct adata *list, u32 val); -int ec_set_contains(const struct adata *list, u64 val); -int lc_set_contains(const struct adata *list, lcomm val); -const struct adata *int_set_prepend(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *int_set_add(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *ec_set_add(struct linpool *pool, const struct adata *list, u64 val); -const struct adata *lc_set_add(struct linpool *pool, const struct adata *list, lcomm val); -const struct adata *int_set_del(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *ec_set_del(struct linpool *pool, const struct adata *list, u64 val); -const struct adata *lc_set_del(struct linpool *pool, const struct adata *list, lcomm val); -const struct adata *int_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); -const struct adata *ec_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); -const struct adata *lc_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); - -struct adata *ec_set_del_nontrans(struct linpool *pool, const struct adata *set); -struct adata *int_set_sort(struct linpool *pool, const struct adata *src); -struct adata *ec_set_sort(struct linpool *pool, const struct adata *src); -struct adata *lc_set_sort(struct linpool *pool, const struct adata *src); - -void ec_set_sort_x(struct adata *set); /* Sort in place */ - -#endif diff --git a/nest/bird.h b/nest/bird.h index 55712abe..931974a0 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -9,7 +9,6 @@ #ifndef _BIRD_BIRD_H_ #define _BIRD_BIRD_H_ -#include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" #include "lib/net.h" @@ -319,7 +319,6 @@ cli_new(void *priv) c->event->data = c; c->cont = cli_hello; c->parser_pool = lp_new_default(c->pool); - c->show_pool = lp_new_default(c->pool); c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE); ev_schedule(c->event); return c; @@ -409,11 +408,14 @@ void cli_free(cli *c) { cli_set_log_echo(c, 0, 0); + int defer = 0; if (c->cleanup) - c->cleanup(c); + defer = c->cleanup(c); if (c == cmd_reconfig_stored_cli) cmd_reconfig_stored_cli = NULL; - rfree(c->pool); + + if (!defer) + rfree(c->pool); } /** @@ -33,12 +33,12 @@ typedef struct cli { struct cli_out *tx_buf, *tx_pos, *tx_write; event *event; void (*cont)(struct cli *c); - void (*cleanup)(struct cli *c); + int (*cleanup)(struct cli *c); /* Return 0 if finished and cli may be freed immediately. + Otherwise return 1 and call rfree(c->pool) when appropriate. */ void *rover; /* Private to continuation routine */ int last_reply; int restricted; /* CLI is restricted to read-only commands */ struct linpool *parser_pool; /* Pool used during parsing */ - struct linpool *show_pool; /* Pool used during route show */ byte *ring_buf; /* Ring buffer for asynchronous messages */ byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */ uint ring_overflow; /* Counter of ring overflows */ diff --git a/nest/cmds.c b/nest/cmds.c index 18f39eb5..092be48a 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -8,7 +8,7 @@ #include "nest/bird.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "nest/cmds.h" @@ -51,47 +51,80 @@ cmd_show_symbols(struct sym_show_data *sd) cli_msg(1010, "%-8s\t%s", sd->sym->name, cf_symbol_class_name(sd->sym)); else { - HASH_WALK(config->sym_hash, next, sym) - { - if (!sym->scope->active) - continue; + for (const struct sym_scope *scope = config->root_scope; scope; scope = scope->next) + HASH_WALK(scope->hash, next, sym) + { + if (!sym->scope->active) + continue; - if (sd->type && (sym->class != sd->type)) - continue; + if (sd->type && (sym->class != sd->type)) + continue; - cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); - } - HASH_WALK_END; + cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); + } + HASH_WALK_END; cli_msg(0, ""); } } -static void -print_size(char *dsc, size_t val) +#define SIZE_SUFFIX " kMGT" +#define SIZE_FORMAT "% 4u.%1u % 1cB" +#define SIZE_ARGS(a) (a).val, (a).decimal, SIZE_SUFFIX[(a).magnitude] + +struct size_args { + u64 val:48; + u64 decimal:8; + u64 magnitude:8; +}; + +static struct size_args +get_size_args(u64 val) { - char *px = " kMG"; - int i = 0; - while ((val >= 10000) && (i < 3)) +#define VALDEC 10 /* One decimal place */ + val *= VALDEC; + + uint i = 0; + while ((val >= 10000 * VALDEC) && (i < 4)) { val = (val + 512) / 1024; i++; } - cli_msg(-1018, "%-17s %4u %cB", dsc, (unsigned) val, px[i]); + return (struct size_args) { + .val = (val / VALDEC), + .decimal = (val % VALDEC), + .magnitude = i, + }; +} + +static void +print_size(char *dsc, struct resmem vals) +{ + struct size_args effective = get_size_args(vals.effective); + struct size_args overhead = get_size_args(vals.overhead); + + cli_msg(-1018, "%-17s " SIZE_FORMAT " " SIZE_FORMAT, dsc, SIZE_ARGS(effective), SIZE_ARGS(overhead)); } extern pool *rt_table_pool; extern pool *rta_pool; +extern uint *pages_kept; void cmd_show_memory(void) { cli_msg(-1018, "BIRD memory usage"); + cli_msg(-1018, "%-17s Effective Overhead", ""); print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); print_size("Protocols:", rmemsize(proto_pool)); - print_size("Total:", rmemsize(&root_pool)); + struct resmem total = rmemsize(&root_pool); +#ifdef HAVE_MMAP + print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept }); + total.overhead += page_size * *pages_kept; +#endif + print_size("Total:", total); cli_msg(0, ""); } @@ -101,7 +134,7 @@ cmd_eval(const struct f_line *expr) buffer buf; LOG_BUFFER_INIT(buf); - if (f_eval_buf(expr, this_cli->parser_pool, &buf) > F_RETURN) + if (f_eval_buf(expr, &buf) > F_RETURN) { cli_msg(8008, "runtime error"); return; diff --git a/nest/config.Y b/nest/config.Y index 6e7689ed..91147a29 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -17,6 +17,7 @@ CF_HDR CF_DEFINES +static struct rtable_config *this_table; static struct proto_config *this_proto; static struct channel_config *this_channel; static struct iface_patt *this_ipatt; @@ -117,13 +118,14 @@ CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) -CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) -CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) +CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -131,7 +133,7 @@ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, CF_ENUM(T_ENUM_RTS, RTS_, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) +CF_ENUM(T_ENUM_RTD, RTD_, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) @@ -141,7 +143,7 @@ CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) %type <s> optproto %type <ra> r_args %type <sd> sym_args -%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type table_sorted tos password_algorithm +%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type tos password_algorithm %type <ps> proto_patt proto_patt2 %type <cc> channel_start proto_channel %type <cl> limit_spec @@ -163,7 +165,7 @@ rtrid: idval: NUM { $$ = $1; } - | '(' term ')' { $$ = f_eval_int(f_linearize($2)); } + | '(' term ')' { $$ = f_eval_int(f_linearize($2, 1)); } | IP4 { $$ = ip4_to_u32($1); } | CF_SYM_KNOWN { if ($1->class == (SYM_CONSTANT | T_INT) || $1->class == (SYM_CONSTANT | T_QUAD)) @@ -206,16 +208,43 @@ CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, IP conf: table ; +table: table_start table_sorted table_opt_list ; + +table_start: net_type TABLE symbol { + this_table = rt_new_table($3, $1); + } + ; + table_sorted: - { $$ = 0; } - | SORTED { $$ = 1; } + /* empty */ + | SORTED { this_table->sorted = 1; } ; -table: net_type TABLE symbol table_sorted { - struct rtable_config *cf; - cf = rt_new_table($3, $1); - cf->sorted = $4; +table_opt: + SORTED bool { this_table->sorted = $2; } + | TRIE bool { + if (!net_val_match(this_table->addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR)) + cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); + this_table->trie_used = $2; } + | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } + | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } + | GC THRESHOLD expr { this_table->gc_threshold = $3; } + | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } + ; + +table_opts: + /* empty */ + | table_opts table_opt ';' + ; + +table_opt_list: + /* empty */ + | '{' table_opts '}' ; @@ -283,12 +312,25 @@ channel_item_: this_channel->table = $2; } | IMPORT imexport { this_channel->in_filter = $2; } + | EXPORT IN net_any imexport { + if (this_channel->net_type && ($3->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + this_channel->out_subprefix = $3; + this_channel->out_filter = $4; + } | EXPORT imexport { this_channel->out_filter = $2; } | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } - | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + | IMPORT KEEP FILTERED bool { + if ($4) + this_channel->in_keep |= RIK_REJECTED; + else if ((this_channel->in_keep & RIK_PREFILTER) == RIK_PREFILTER) + cf_error("Import keep filtered is implied by the import table."); + else + this_channel->in_keep &= ~RIK_REJECTED; + } | RPKI RELOAD bool { this_channel->rpki_reload = $3; } ; @@ -617,20 +659,29 @@ r_args: $$ = cfg_allocz(sizeof(struct rt_show_data)); init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; - $$->running_on_config = new_config->fallback; + $$->running_on_config = config; + $$->cli = this_cli; } | r_args net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; + $$->addr_mode = TE_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); - $$->show_for = 1; $$->addr = $3; + $$->addr_mode = TE_ADDR_FOR; + } + | r_args IN net_any { + $$ = $1; + if ($$->addr) cf_error("Only one prefix expected"); + if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); + $$->addr = $3; + $$->addr_mode = TE_ADDR_IN; } - | r_args TABLE CF_SYM_KNOWN { +| r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); @@ -644,13 +695,13 @@ r_args: $$->tables_defined_by = RSD_TDB_ALL; } | r_args IMPORT TABLE channel_arg { - if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table->tab); + if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); + rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->out_table->tab); + rt_show_add_exporter($$, $4->out_table, "export"); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args FILTER filter { @@ -675,7 +726,7 @@ r_args: $$ = $1; $$->filtered = 1; } - | r_args export_mode CF_SYM_KNOWN { + | r_args export_mode symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -692,7 +743,7 @@ r_args: $$->export_channel = $3; $$->tables_defined_by = RSD_TDB_INDIRECT; } - | r_args PROTOCOL CF_SYM_KNOWN { + | r_args PROTOCOL symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -820,7 +871,7 @@ CF_CLI(DUMP INTERFACES,,, [[Dump interface information]]) CF_CLI(DUMP NEIGHBORS,,, [[Dump neighbor cache]]) { neigh_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ATTRIBUTES,,, [[Dump attribute cache]]) -{ rta_dump_all(); cli_msg(0, ""); } ; +{ ea_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ROUTES,,, [[Dump routes]]) { rt_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP TABLES,,, [[Dump table connections]]) @@ -831,7 +882,7 @@ CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) { filters_dump_all(); cli_msg(0, ""); } ; CF_CLI(EVAL, term, <expr>, [[Evaluate an expression]]) -{ cmd_eval(f_linearize($2)); } ; +{ cmd_eval(f_linearize($2, 1)); } ; CF_CLI_HELP(ECHO, ..., [[Control echoing of log messages]]) CF_CLI(ECHO, echo_mask echo_size, (all | off | { debug|trace|info|remote|warning|error|auth [, ...] }) [<buffer-size>], [[Control echoing of log messages]]) { @@ -894,9 +945,6 @@ proto_patt2: | TEXT { $$.ptr = $1; $$.patt = 1; } ; -dynamic_attr: IGP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_GEN_IGP_METRIC); } ; - - CF_CODE CF_END diff --git a/nest/iface.c b/nest/iface.c index 83a633a3..682340c5 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -591,7 +591,7 @@ ifa_update(struct ifa *a) if (ipa_equal(b->brd, a->brd) && ipa_equal(b->opposite, a->opposite) && b->scope == a->scope && - !((b->flags ^ a->flags) & IA_PEER)) + !((b->flags ^ a->flags) & (IA_SECONDARY | IA_PEER | IA_HOST))) { b->flags |= IA_UPDATED; return b; diff --git a/nest/neighbor.c b/nest/neighbor.c index 1a31fb79..7cf9c85d 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -345,7 +345,7 @@ neigh_free(neighbor *n) { rem_node(&n->n); rem_node(&n->if_n); - sl_free(neigh_slab, n); + sl_free(n); } /** diff --git a/nest/proto.c b/nest/proto.c index 930fad1d..e64cef28 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -15,19 +15,17 @@ #include "lib/event.h" #include "lib/timer.h" #include "lib/string.h" -#include "lib/coro.h" #include "conf/conf.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/iface.h" #include "nest/cli.h" #include "filter/filter.h" #include "filter/f-inst.h" pool *proto_pool; -list proto_list; +list STATIC_LIST_INIT(proto_list); -static list protocol_list; -struct protocol *class_to_protocol[PROTOCOL__MAX]; +static list STATIC_LIST_INIT(protocol_list); #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) @@ -48,7 +46,7 @@ static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; -static void channel_aux_request_refeed(struct channel_aux_table *cat); +static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); @@ -110,9 +108,7 @@ channel_export_log_state_change(struct rt_export_request *req, u8 state) switch (state) { case TES_FEEDING: - if (c->out_table) - rt_refresh_begin(&c->out_table->push); - else if (c->proto->feed_begin) + if (c->proto->feed_begin) c->proto->feed_begin(c, !c->refeeding); break; case TES_READY: @@ -202,8 +198,7 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } -rte * channel_preimport(struct rt_import_request *req, rte *new, rte *old); -rte * channel_in_preimport(struct rt_import_request *req, rte *new, rte *old); +int channel_preimport(struct rt_import_request *req, rte *new, rte *old); void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); @@ -240,6 +235,7 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; + c->out_subprefix = cf->out_subprefix; channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); @@ -250,7 +246,7 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; @@ -320,10 +316,14 @@ static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; + int active = !!c->reload_req.hook; - CD(c, "Reload triggered by RPKI change"); + CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); - channel_request_reload(c); + if (!active) + channel_request_reload(c); + else + c->reload_pending = 1; } static void @@ -401,7 +401,7 @@ channel_roa_subscribe_filter(struct channel *c, int dir) #ifdef CONFIG_BGP /* No automatic reload for BGP channels without in_table / out_table */ if (c->channel == &channel_bgp) - valid = dir ? !!c->in_table : !!c->out_table; + valid = dir ? ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) : !!c->out_table; #endif struct filter_iterator fit; @@ -411,14 +411,8 @@ channel_roa_subscribe_filter(struct channel *c, int dir) { switch (fi->fi_code) { - case FI_ROA_CHECK_IMPLICIT: - tab = fi->i_FI_ROA_CHECK_IMPLICIT.rtc->table; - if (valid) channel_roa_subscribe(c, tab, dir); - found = 1; - break; - - case FI_ROA_CHECK_EXPLICIT: - tab = fi->i_FI_ROA_CHECK_EXPLICIT.rtc->table; + case FI_ROA_CHECK: + tab = fi->i_FI_ROA_CHECK.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; @@ -483,8 +477,7 @@ channel_start_export(struct channel *c) { if (c->out_req.hook) { - c->restart_export = 1; - log(L_WARN "%s.%s: Fast channel export restart", c->proto->name, c->name); + log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); return; } @@ -496,6 +489,8 @@ channel_start_export(struct channel *c) c->out_req = (struct rt_export_request) { .name = rn, .list = proto_work_list(c->proto), + .addr = c->out_subprefix, + .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, .trace_routes = c->debug | c->proto->debug, .dump_req = channel_dump_export_req, .log_state_change = channel_export_log_state_change, @@ -527,7 +522,7 @@ channel_start_export(struct channel *c) } DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); - rt_request_export(c->table, &c->out_req); + rt_request_export(&c->table->exporter, &c->out_req); } static void @@ -536,7 +531,7 @@ channel_check_stopped(struct channel *c) switch (c->channel_state) { case CS_STOP: - if (c->out_req.hook || c->in_req.hook || c->out_table || c->in_table) + if (c->out_req.hook || c->in_req.hook) return; channel_set_state(c, CS_DOWN); @@ -581,20 +576,14 @@ channel_export_stopped(struct rt_export_request *req) { c->refeeding = 1; c->refeed_pending = 0; - rt_request_export(c->table, req); + rt_request_export(&c->table->exporter, req); return; } mb_free(c->out_req.name); c->out_req.name = NULL; - if (c->restart_export) - { - c->restart_export = 0; - channel_start_export(c); - } - else - channel_check_stopped(c); + channel_check_stopped(c); } static void @@ -616,330 +605,66 @@ channel_feed_end(struct channel *c) return; } - if (c->out_table) - rt_refresh_end(&c->out_table->push); - else if (c->proto->feed_end) + if (c->proto->feed_end) c->proto->feed_end(c); if (c->refeed_pending) rt_stop_export(req, channel_export_stopped); -} - -#define CHANNEL_AUX_TABLE_DUMP_REQ(inout, imex, pgimex, pushget) static void \ - channel_##inout##_##pushget##_dump_req(struct rt_##pgimex##_request *req) { \ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, pushget, req); \ - debug(" Channel %s.%s " #imex " table " #pushget " request %p\n", cat->c->proto->name, cat->c->name, req); } - -CHANNEL_AUX_TABLE_DUMP_REQ(in, import, import, push) -CHANNEL_AUX_TABLE_DUMP_REQ(in, import, export, get) -CHANNEL_AUX_TABLE_DUMP_REQ(out, export, import, push) -CHANNEL_AUX_TABLE_DUMP_REQ(out, export, export, get) - -#undef CHANNEL_AUX_TABLE_DUMP_REQ - -static uint channel_aux_imex(struct channel_aux_table *cat) -{ - if (cat->c->in_table == cat) - return 0; - else if (cat->c->out_table == cat) - return 1; - else - bug("Channel aux table must be in_table or out_table"); -} - -static void -channel_aux_stopped(void *data) -{ - struct channel_aux_table *cat = data; - struct channel *c = cat->c; - - if (channel_aux_imex(cat)) - c->out_table = NULL; else - c->in_table = NULL; - - rfree(cat->tab->rp); - - mb_free(cat); - return channel_check_stopped(c); -} - -static void -channel_aux_import_stopped(struct rt_import_request *req) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); - ASSERT_DIE(cat->tab->delete_event); -} - -static void -channel_aux_export_stopped(struct rt_export_request *req) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - req->hook = NULL; - - if (cat->refeed_pending && !cat->tab->delete_event) - { - cat->refeed_pending = 0; - rt_request_export(cat->tab, req); - } - else - ASSERT_DIE(cat->tab->delete_event); -} - -static void -channel_aux_stop(struct channel_aux_table *cat) -{ - rt_stop_import(&cat->push, channel_aux_import_stopped); - rt_stop_export(&cat->get, channel_aux_export_stopped); - - cat->tab->delete_event = ev_new_init(cat->tab->rp, channel_aux_stopped, cat); - - rt_unlock_table(cat->tab); -} - -static void -channel_push_log_state_change(struct rt_import_request *req, u8 state) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); - const char *imex = channel_aux_imex(cat) ? "export" : "import"; - CD(cat->c, "Channel %s table import state changed to %s", imex, rt_import_state_name(state)); -} - -static void -channel_get_log_state_change(struct rt_export_request *req, u8 state) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - const char *imex = channel_aux_imex(cat) ? "export" : "import"; - CD(cat->c, "Channel %s table export state changed to %s", imex, rt_export_state_name(state)); - - switch (state) - { - case TES_FEEDING: - if (imex && cat->c->proto->feed_begin) - cat->c->proto->feed_begin(cat->c, !cat->c->refeeding); - else if (!imex) - rt_refresh_begin(&cat->c->in_req); - break; - - case TES_READY: - if (imex && cat->c->proto->feed_end) - cat->c->proto->feed_end(cat->c); - else if (!imex) - rt_refresh_end(&cat->c->in_req); - - if (cat->refeed_pending) - rt_stop_export(&cat->get, channel_aux_export_stopped); - - break; - } -} - -void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); - -static int -channel_aux_export_one_any(struct rt_export_request *req, struct rt_pending_export *rpe, rte **new, rte **old) -{ - struct rte_src *src = rpe->new ? rpe->new->rte.src : rpe->old->rte.src; - *old = RTES_OR_NULL(rpe->old); - struct rte_storage *new_stored; - - while (rpe) - { - new_stored = rpe->new; - rpe_mark_seen(req->hook, rpe); - rpe = rpe_next(rpe, src); - } - - *new = RTES_CLONE(new_stored, *new); - - return (*new || *old) && (&new_stored->rte != *old); -} - -static int -channel_aux_export_one_best(struct rt_export_request *req, struct rt_pending_export *rpe, rte **new, rte **old) -{ - *old = RTES_OR_NULL(rpe->old_best); - struct rte_storage *new_stored; - - while (rpe) - { - new_stored = rpe->new_best; - rpe_mark_seen(req->hook, rpe); - rpe = rpe_next(rpe, NULL); - } - - *new = RTES_CLONE(new_stored, *new); - - return (*new || *old) && (&new_stored->rte != *old); -} - -static void -channel_in_export_one_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - - rte n0, *new = &n0, *old; - if (channel_aux_export_one_any(req, rpe, &new, &old)) - rte_update_direct(cat->c, net, new, old ? old->src : new->src); + c->refeeding = 0; } -static void -channel_in_export_one_best(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +/* Called by protocol for reload from in_table */ +void +channel_schedule_reload(struct channel *c) { - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - - rte n0, *new = &n0, *old; - if (channel_aux_export_one_best(req, rpe, &new, &old)) - rte_update_direct(cat->c, net, new, old ? old->src : new->src); -} + ASSERT(c->in_req.hook); -static void -channel_in_export_bulk_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - for (uint i=0; i<count; i++) - { - rte n0 = *feed[i]; - rte_update_direct(cat->c, net, &n0, n0.src); - } + rt_request_export(&c->table->exporter, &c->reload_req); } static void -channel_in_export_bulk_best(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +channel_reload_stopped(struct rt_export_request *req) { - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - if (!count) - return; + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - rte n0 = *feed[0]; - rte_update_direct(cat->c, net, &n0, n0.src); + /* Restart reload */ + if (c->reload_pending) + channel_request_reload(c); } -void do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old); - static void -channel_out_export_one_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +channel_reload_log_state_change(struct rt_export_request *req, u8 state) { - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - rte n0, *new = &n0, *old; - if (channel_aux_export_one_any(req, rpe, &new, &old)) - do_rt_notify_direct(cat->c, net, new, old); + if (state == TES_READY) + rt_stop_export(req, channel_reload_stopped); } static void -channel_out_export_one_best(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +channel_reload_dump_req(struct rt_export_request *req) { - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - rte n0, *new = &n0, *old; - if (channel_aux_export_one_best(req, rpe, &new, &old)) - do_rt_notify_direct(cat->c, net, new, old); + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + debug(" Channel %s.%s import reload request %p\n", c->proto->name, c->name, req); } -static void -channel_out_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); - if (cat->c->ra_mode != RA_ANY) - ASSERT_DIE(count <= 1); - - for (uint i=0; i<count; i++) - { - rte n0 = *feed[i]; - do_rt_notify_direct(cat->c, net, &n0, NULL); - } -} +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); /* Called by protocol to activate in_table */ void -channel_setup_in_table(struct channel *c, int best) +channel_setup_in_table(struct channel *c) { - int nlen = sizeof("import") + strlen(c->name) + strlen(c->proto->name) + 3; - - struct { - struct channel_aux_table cat; - struct rtable_config tab_cf; - char name[0]; - } *cat = mb_allocz(c->proto->pool, sizeof(*cat) + nlen); - - bsprintf(cat->name, "%s.%s.import", c->proto->name, c->name); - - cat->tab_cf.name = cat->name; - cat->tab_cf.addr_type = c->net_type; - - c->in_table = &cat->cat; - c->in_table->push = (struct rt_import_request) { - .name = cat->name, - .trace_routes = c->debug | c->proto->debug, - .dump_req = channel_in_push_dump_req, - .log_state_change = channel_push_log_state_change, - .preimport = channel_in_preimport, - }; - c->in_table->get = (struct rt_export_request) { - .name = cat->name, + c->reload_req = (struct rt_export_request) { + .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, - .dump_req = channel_in_get_dump_req, - .log_state_change = channel_get_log_state_change, - .export_one = best ? channel_in_export_one_best : channel_in_export_one_any, - .export_bulk = best ? channel_in_export_bulk_best : channel_in_export_bulk_any, + .export_bulk = channel_reload_export_bulk, + .dump_req = channel_reload_dump_req, + .log_state_change = channel_reload_log_state_change, }; - c->in_table->c = c; - c->in_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); - rt_lock_table(c->in_table->tab); - - rt_request_import(c->in_table->tab, &c->in_table->push); - rt_request_export(c->in_table->tab, &c->in_table->get); + c->in_keep |= RIK_PREFILTER; } -/* Called by protocol to activate out_table */ -void -channel_setup_out_table(struct channel *c) -{ - int nlen = sizeof("export") + strlen(c->name) + strlen(c->proto->name) + 3; - - struct { - struct channel_aux_table cat; - struct rtable_config tab_cf; - char name[0]; - } *cat = mb_allocz(c->proto->pool, sizeof(*cat) + nlen); - - bsprintf(cat->name, "%s.%s.export", c->proto->name, c->name); - - cat->tab_cf.name = cat->name; - cat->tab_cf.addr_type = c->net_type; - - c->out_table = &cat->cat; - c->out_table->push = (struct rt_import_request) { - .name = cat->name, - .trace_routes = c->debug | c->proto->debug, - .dump_req = channel_out_push_dump_req, - .log_state_change = channel_push_log_state_change, - }; - c->out_table->get = (struct rt_export_request) { - .name = cat->name, - .list = proto_work_list(c->proto), - .trace_routes = c->debug | c->proto->debug, - .dump_req = channel_out_get_dump_req, - .log_state_change = channel_get_log_state_change, - .export_one = (c->ra_mode == RA_ANY) ? channel_out_export_one_any : channel_out_export_one_best, - .export_bulk = channel_out_export_bulk, - }; - - c->out_table->c = c; - c->out_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); - rt_lock_table(c->out_table->tab); - - rt_request_import(c->out_table->tab, &c->out_table->push); - rt_request_export(c->out_table->tab, &c->out_table->get); -} - -static void -channel_aux_request_refeed(struct channel_aux_table *cat) -{ - cat->refeed_pending = 1; - rt_stop_export(&cat->get, channel_aux_export_stopped); -} static void channel_do_start(struct channel *c) @@ -965,12 +690,16 @@ channel_do_up(struct channel *c) static void channel_do_pause(struct channel *c) { + /* Need to abort feeding */ + if (c->reload_req.hook) + { + c->reload_pending = 0; + rt_stop_export(&c->reload_req, channel_reload_stopped); + } + /* Stop export */ if (c->out_req.hook) - { rt_stop_export(&c->out_req, channel_export_stopped); - c->refeeding = 0; - } channel_roa_unsubscribe_all(c); @@ -981,13 +710,6 @@ channel_do_pause(struct channel *c) static void channel_do_stop(struct channel *c) { - /* Drop auxiliary tables */ - if (c->in_table) - channel_aux_stop(c->in_table); - - if (c->out_table) - channel_aux_stop(c->out_table); - /* Stop import */ if (c->in_req.hook) rt_stop_import(&c->in_req, channel_import_stopped); @@ -998,13 +720,12 @@ channel_do_stop(struct channel *c) CALL(c->channel->shutdown, c); - channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->out_req.hook && !c->in_req.hook && !c->out_table && !c->in_table); + ASSERT(!c->reload_req.hook); c->proto->active_channels--; @@ -1012,11 +733,11 @@ channel_do_down(struct channel *c) memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - CALL(c->channel->cleanup, c); + c->out_table = NULL; - /* This have to be done in here, as channel pool is freed before channel_do_down() */ - bmap_free(&c->export_map); - bmap_free(&c->export_reject_map); + /* The in_table and out_table are going to be freed by freeing their resource pools. */ + + CALL(c->channel->cleanup, c); /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) @@ -1046,7 +767,7 @@ channel_set_state(struct channel *c, uint state) break; case CS_UP: - ASSERT(cs == CS_DOWN || cs == CS_START || cs == CS_PAUSE); + ASSERT(cs == CS_DOWN || cs == CS_START); if (cs == CS_DOWN) channel_do_start(c); @@ -1096,8 +817,8 @@ channel_set_state(struct channel *c, uint state) * completed, it will switch back to ES_READY. This function can be called * even when feeding is already running, in that case it is restarted. */ -static void -channel_request_table_feeding(struct channel *c) +void +channel_request_feeding(struct channel *c) { ASSERT(c->out_req.hook); @@ -1105,23 +826,7 @@ channel_request_table_feeding(struct channel *c) rt_stop_export(&c->out_req, channel_export_stopped); } -void -channel_request_feeding(struct channel *c) -{ - if (c->gr_wait || !c->proto->rt_notify) - return; - - CD(c, "Refeed requested"); - - ASSERT_DIE(c->out_req.hook); - - if (c->out_table) - channel_aux_request_refeed(c->out_table); - else - channel_request_table_feeding(c); -} - -void +static void channel_request_reload(struct channel *c) { ASSERT(c->in_req.hook); @@ -1129,29 +834,14 @@ channel_request_reload(struct channel *c) CD(c, "Reload requested"); - if (c->in_table) - channel_aux_request_refeed(c->in_table); - else - c->proto->reload_routes(c); -} - -void -channel_refresh_begin(struct channel *c) -{ - CD(c, "Channel route refresh begin"); - if (c->in_table) - rt_refresh_begin(&c->in_table->push); - else - rt_refresh_begin(&c->in_req); -} + c->proto->reload_routes(c); -void -channel_refresh_end(struct channel *c) -{ - if (c->in_table) - rt_refresh_end(&c->in_table->push); - else - rt_refresh_end(&c->in_req); + /* + * Should this be done before reload_routes() hook? + * Perhaps, but routes are updated asynchronously. + */ + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -1237,7 +927,12 @@ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* FIXME: better handle these changes, also handle in_keep_filtered */ - if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + if ((c->table != cf->table->table) || + (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || + (cf->in_keep != c->in_keep) || + cf->out_subprefix && c->out_subprefix && + !net_equal(cf->out_subprefix, c->out_subprefix) || + (!cf->out_subprefix != !c->out_subprefix)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ @@ -1262,9 +957,9 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; + c->out_req.addr = c->out_subprefix = cf->out_subprefix; c->debug = cf->debug; c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; - c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; /* Execute channel-specific reconfigure hook */ @@ -1309,7 +1004,7 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) channel_request_reload(c); if (export_changed) - channel_request_table_feeding(c); + channel_request_feeding(c); done: CD(c, "Reconfigured"); @@ -1376,7 +1071,6 @@ proto_loop_stopped(void *ptr) birdloop_enter(&main_birdloop); p->loop = &main_birdloop; - p->event->list = NULL; proto_cleanup(p); birdloop_leave(&main_birdloop); @@ -1459,21 +1153,13 @@ proto_start(struct proto *p) DBG("Kicking %s up\n", p->name); PD(p, "Starting"); - int ns = strlen("Protocol ") + strlen(p->cf->name) + 1; - void *nb = mb_alloc(proto_pool, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Protocol %s", p->cf->name)); - - p->pool = rp_new(proto_pool, nb); + p->pool = rp_newf(proto_pool, "Protocol %s", p->cf->name); if (graceful_restart_state == GRS_INIT) p->gr_recovery = 1; if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) - p->loop = birdloop_new(p->pool, p->cf->loop_order, nb); - - p->event->list = proto_event_list(p); - - mb_move(nb, p->pool); + p->loop = birdloop_new(p->pool, p->cf->loop_order, p->pool->name); PROTO_LOCKED_FROM_MAIN(p) proto_notify_state(p, (p->proto->start ? p->proto->start(p) : PS_UP)); @@ -2043,7 +1729,7 @@ protos_dump_all(void) WALK_LIST(p, proto_list) { #define DPF(x) (p->x ? " " #x : "") - debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s\n", + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", p->name, p, p_states[p->proto_state], p->active_channels, DPF(disabled), DPF(active), DPF(do_stop), DPF(reconfiguring)); #undef DPF @@ -2059,20 +1745,6 @@ protos_dump_all(void) debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); - if (c->in_table) - { - debug("\tInput aux table:\n"); - rt_dump_hooks(c->in_table->tab); - rt_dump(c->in_table->tab); - debug("\tEnd of input aux table.\n"); - } - if (c->out_table) - { - debug("\tOutput aux table:\n"); - rt_dump_hooks(c->in_table->tab); - rt_dump(c->in_table->tab); - debug("\tEnd of output aux table.\n"); - } } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -2092,14 +1764,13 @@ void proto_build(struct protocol *p) { add_tail(&protocol_list, &p->n); - ASSERT(p->class); - ASSERT(!class_to_protocol[p->class]); - class_to_protocol[p->class] = p; } /* FIXME: convert this call to some protocol hook */ extern void bfd_init_all(void); +void protos_build_gen(void); + /** * protos_build - build a protocol list * @@ -2112,44 +1783,7 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&proto_list); - init_list(&protocol_list); - - proto_build(&proto_device); -#ifdef CONFIG_RADV - proto_build(&proto_radv); -#endif -#ifdef CONFIG_RIP - proto_build(&proto_rip); -#endif -#ifdef CONFIG_STATIC - proto_build(&proto_static); -#endif -#ifdef CONFIG_MRT - proto_build(&proto_mrt); -#endif -#ifdef CONFIG_OSPF - proto_build(&proto_ospf); -#endif -#ifdef CONFIG_PIPE - proto_build(&proto_pipe); -#endif -#ifdef CONFIG_BGP - proto_build(&proto_bgp); -#endif -#ifdef CONFIG_BFD - proto_build(&proto_bfd); - bfd_init_all(); -#endif -#ifdef CONFIG_BABEL - proto_build(&proto_babel); -#endif -#ifdef CONFIG_RPKI - proto_build(&proto_rpki); -#endif -#ifdef CONFIG_PERF - proto_build(&proto_perf); -#endif + protos_build_gen(); proto_pool = rp_new(&root_pool, "Protocols"); proto_shutdown_timer = tm_new(proto_pool); @@ -2496,18 +2130,18 @@ channel_show_stats(struct channel *c) u32 in_routes = c->in_limit.count; u32 out_routes = c->out_limit.count; - if (c->in_keep_filtered) + if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", in_routes, out_routes, SRI(pref)); - cli_msg(-1006, " Route change stats: received rejected filtered ignored limited accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u", + cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", SCI(updates_received), SCI(updates_invalid), SCI(updates_filtered), SRI(updates_ignored), - SCI(updates_limited_rx) + SCI(updates_limited_in), + SCI(updates_limited_rx), SCI(updates_limited_in), SRI(updates_accepted)); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", SCI(withdraws_received), SCI(withdraws_invalid), diff --git a/nest/protocol.h b/nest/protocol.h index 440297a1..709d6415 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,7 +12,7 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/limit.h" #include "conf/conf.h" @@ -38,38 +38,20 @@ struct symbol; * Routing Protocol */ -enum protocol_class { - PROTOCOL_NONE, - PROTOCOL_BABEL, - PROTOCOL_BFD, - PROTOCOL_BGP, - PROTOCOL_DEVICE, - PROTOCOL_DIRECT, - PROTOCOL_KERNEL, - PROTOCOL_OSPF, - PROTOCOL_MRT, - PROTOCOL_PERF, - PROTOCOL_PIPE, - PROTOCOL_RADV, - PROTOCOL_RIP, - PROTOCOL_RPKI, - PROTOCOL_STATIC, - PROTOCOL__MAX -}; - -extern struct protocol *class_to_protocol[PROTOCOL__MAX]; struct protocol { node n; char *name; char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ - enum protocol_class class; /* Machine readable protocol class */ uint preference; /* Default protocol preference */ uint channel_mask; /* Mask of accepted channel types (NB_*) */ uint proto_size; /* Size of protocol data structure */ uint config_size; /* Size of protocol config data structure */ + uint eattr_begin; /* First ID of registered eattrs */ + uint eattr_end; /* End of eattr id zone */ + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ struct proto * (*init)(struct proto_config *); /* Create new instance */ @@ -78,13 +60,13 @@ struct protocol { int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ - int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ +// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ }; -void protos_build(void); -void proto_build(struct protocol *); +void protos_build(void); /* Called from sysdep to initialize protocols */ +void proto_build(struct protocol *); /* Called from protocol to register itself */ void protos_preconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); struct proto * proto_spawn(struct proto_config *cf, uint disabled); @@ -152,7 +134,7 @@ struct proto { u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ uint active_channels; /* Number of active channels */ - uint active_coroutines; /* Number of active coroutines */ + uint active_loops; /* Number of active IO loops */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte vrf_set; /* Related VRF instance (above) is defined */ @@ -212,7 +194,7 @@ struct proto { int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); - u32 (*rte_igp_metric)(struct rte *); + u32 (*rte_igp_metric)(const struct rte *); /* Hic sunt protocol-specific data */ }; @@ -360,7 +342,7 @@ void proto_notify_state(struct proto *p, unsigned state); */ static inline int proto_is_inactive(struct proto *p) -{ return (p->active_channels == 0) && (p->active_coroutines == 0) && (p->sources.uc == 0); } +{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); } /* @@ -474,9 +456,10 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ struct channel_limit rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ + (relevant when in_keep & RIK_REJECTED) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ @@ -485,7 +468,7 @@ struct channel_config { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 rpki_reload; /* RPKI changes trigger channel reload */ }; @@ -499,10 +482,11 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ struct bmap export_map; /* Keeps track which routes were really exported */ struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ - struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit rx_limit; /* Receive limit (for in_keep & RIK_REJECTED) */ struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ @@ -539,7 +523,7 @@ struct channel { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 disabled; u8 stale; /* Used in reconfiguration */ @@ -548,29 +532,22 @@ struct channel { u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ - u8 restart_export; /* Route export should restart as soon as it stops */ btime last_state_change; /* Time of last state transition */ - struct channel_aux_table *in_table; /* Internal table for received routes */ + struct rt_export_request reload_req; /* Feeder for import reload */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 rpki_reload; /* RPKI changes trigger channel reload */ - struct channel_aux_table *out_table; /* Internal table for exported routes */ + struct rt_exporter *out_table; /* Internal table for exported routes */ list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; -struct channel_aux_table { - struct channel *c; - struct rt_import_request push; - struct rt_export_request get; - rtable *tab; - event *stop; - u8 refeed_pending; -}; +#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ +#define RIK_PREFILTER (2 | RIK_REJECTED) /* All routes' attribute state before import filter is kept */ /* * Channel states @@ -636,8 +613,7 @@ struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); void channel_set_state(struct channel *c, uint state); -void channel_setup_in_table(struct channel *c, int best); -void channel_setup_out_table(struct channel *c); +void channel_setup_in_table(struct channel *c); void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } @@ -645,9 +621,6 @@ static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP) static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); -void channel_request_reload(struct channel *c); -void channel_refresh_begin(struct channel *c); -void channel_refresh_end(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); int channel_reconfigure(struct channel *c, struct channel_config *cf); diff --git a/nest/route.h b/nest/route.h deleted file mode 100644 index a01eff1a..00000000 --- a/nest/route.h +++ /dev/null @@ -1,931 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Routing Table - * - * (c) 1998--2000 Martin Mares <mj@ucw.cz> - * (c) 2019--2021 Maria Matejka <mq@jmq.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ROUTE_H_ -#define _BIRD_ROUTE_H_ - -#include "lib/lists.h" -#include "lib/event.h" -#include "lib/bitmap.h" -#include "lib/resource.h" -#include "lib/net.h" -#include "lib/hash.h" -#include "lib/event.h" - -#include <stdatomic.h> - -struct ea_list; -struct protocol; -struct proto; -struct channel; -struct rte_src; -struct symbol; -struct timer; -struct filter; -struct cli; - -/* - * Generic data structure for storing network prefixes. Also used - * for the master routing table. Currently implemented as a hash - * table. - * - * Available operations: - * - insertion of new entry - * - deletion of entry - * - searching for entry by network prefix - * - asynchronous retrieval of fib contents - */ - -struct fib_node { - struct fib_node *next; /* Next in hash chain */ - struct fib_iterator *readers; /* List of readers of this node */ - net_addr addr[0]; -}; - -struct fib_iterator { /* See lib/slists.h for an explanation */ - struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ - byte efef; /* 0xff to distinguish between iterator and node */ - byte pad[3]; - struct fib_node *node; /* Or NULL if freshly merged */ - uint hash; -}; - -typedef void (*fib_init_fn)(void *); - -struct fib { - pool *fib_pool; /* Pool holding all our data */ - slab *fib_slab; /* Slab holding all fib nodes */ - struct fib_node **hash_table; /* Node hash table */ - uint hash_size; /* Number of hash table entries (a power of two) */ - uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 32 - hash_order */ - uint addr_type; /* Type of address data stored in fib (NET_*) */ - uint node_size; /* FIB node size, 0 for nonuniform */ - uint node_offset; /* Offset of fib_node struct inside of user data */ - uint entries; /* Number of entries */ - uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_fn init; /* Constructor */ -}; - -static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) -{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } - -static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) -{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } - -void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); -void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ -void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ -void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ -void fib_delete(struct fib *, void *); /* Remove fib entry */ -void fib_free(struct fib *); /* Destroy the fib */ -void fib_check(struct fib *); /* Consistency check for debugging */ - -void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ -struct fib_node *fit_get(struct fib *, struct fib_iterator *); -void fit_put(struct fib_iterator *, struct fib_node *); -void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -void fit_put_end(struct fib_iterator *i); -void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); - - -#define FIB_WALK(fib, type, z) do { \ - struct fib_node *fn_, **ff_ = (fib)->hash_table; \ - uint count_ = (fib)->hash_size; \ - type *z; \ - while (count_--) \ - for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) - -#define FIB_WALK_END } while (0) - -#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) - -#define FIB_ITERATE_START(fib, it, type, z) do { \ - struct fib_node *fn_ = fit_get(fib, it); \ - uint count_ = (fib)->hash_size; \ - uint hpos_ = (it)->hash; \ - type *z; \ - for(;;) { \ - if (!fn_) \ - { \ - if (++hpos_ >= count_) \ - break; \ - fn_ = (fib)->hash_table[hpos_]; \ - continue; \ - } \ - z = fib_node_to_user(fib, fn_); - -#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) - -#define FIB_ITERATE_PUT(it) fit_put(it, fn_) - -#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) - -#define FIB_ITERATE_PUT_END(it) fit_put_end(it) - -#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) - -#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) - - -/* - * Master Routing Tables. Generally speaking, each of them contains a FIB - * with each entry pointing to a list of route entries representing routes - * to given network (with the selected one at the head). - * - * Each of the RTE's contains variable data (the preference and protocol-dependent - * metrics) and a pointer to a route attribute block common for many routes). - * - * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. - */ - -struct rtable_config { - node n; - char *name; - struct config *config; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ - btime export_settle_time; /* Delay before exports are announced */ - uint cork_limit; /* Amount of routes to be pending on export to cork imports */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ - pool *rp; /* Resource pool to allocate everything from, including itself */ - struct slab *rte_slab; /* Slab to allocate route objects */ - struct fib fib; - char *name; /* Name of this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int use_count; /* Number of protocols using this table */ - u32 rt_count; /* Number of routes in the table */ - - list imports; /* Registered route importers */ - list exports; /* Registered route exporters */ - - struct hmap id_map; - struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ - struct event *prune_event; /* Event to prune abandoned routes */ - struct event *ec_event; /* Event to prune finished exports */ - struct event *hcu_event; /* Event to update host cache */ - struct event *nhu_event; /* Event to update next hops */ - struct event *delete_event; /* Event to delete the table */ - btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ - btime gc_time; /* Time of last GC */ - int gc_counter; /* Number of operations since last GC */ - byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte nhu_state; /* Next Hop Update state */ - - byte cork_active; /* Congestion control activated */ - - struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ - struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ - struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ - - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ - - list pending_exports; /* List of packed struct rt_pending_export */ - btime base_export_time; /* When first pending export was announced */ - struct timer *export_timer; - - struct rt_pending_export *first_export; /* First export to announce */ - u64 next_export_seq; /* The next export will have this ID */ -} rtable; - -struct rt_subscription { - node n; - rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; -}; - -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - -typedef struct network { - struct rte_storage *routes; /* Available routes for this network */ - struct rt_pending_export *last, *first; /* Routes with unfinished exports */ - struct fib_node n; /* FIB flags reserved for kernel syncer */ -} net; - -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - struct rta *src; /* Source rta entry */ - byte dest; /* Chosen route destination type (RTD_...) */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - -typedef struct rte { - struct rta *attrs; /* Attributes of this route */ - const net_addr *net; /* Network this RTE belongs to */ - struct rte_src *src; /* Route source that created the route */ - struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ - btime lastmod; /* Last modified (set by table) */ - u32 id; /* Table specific route id */ - byte flags; /* Table-specific flags */ - byte pflags; /* Protocol-specific flags */ - u8 generation; /* If this route import is based on other previously exported route, - this value should be 1 + MAX(generation of the parent routes). - Otherwise the route is independent and this value is zero. */ - u8 stale_cycle; /* Auxiliary value for route refresh */ -} rte; - -struct rte_storage { - struct rte_storage *next; /* Next in chain */ - struct rte rte; /* Route data */ -}; - -#define RTES_CLONE(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) -#define RTES_OR_NULL(r) ((r) ? &((r)->rte) : NULL) - -#define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_USE_STALE 4 /* Do not reset route's stale_cycle to the actual value */ - -/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); } - -/* Route just has REF_FILTERED flag */ -static inline int rte_is_filtered(const rte *r) { return !!(r->flags & REF_FILTERED); } - - -/* Table-channel connections */ - -struct rt_import_request { - struct rt_import_hook *hook; /* The table part of importer */ - char *name; - u8 trace_routes; - - void (*dump_req)(struct rt_import_request *req); - void (*log_state_change)(struct rt_import_request *req, u8 state); - /* Preimport is called when the @new route is just-to-be inserted, replacing @old. - * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ - struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); -}; - -struct rt_import_hook { - node n; - rtable *table; /* The connected table */ - struct rt_import_request *req; /* The requestor */ - - struct rt_import_stats { - /* Import - from protocol to core */ - u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ - u32 updates_ignored; /* Number of route updates rejected as already in route table */ - u32 updates_accepted; /* Number of route updates accepted and imported */ - u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ - } stats; - - u64 flush_seq; /* Table export seq when the channel announced flushing */ - btime last_state_change; /* Time of last state transition */ - - u8 import_state; /* IS_* */ - u8 stale_set; /* Set this stale_cycle to imported routes */ - u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ - u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ - u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ - - void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ -}; - -struct rt_pending_export { - struct rt_pending_export * _Atomic next; /* Next export for the same destination */ - struct rte_storage *new, *new_best, *old, *old_best; - u64 seq; /* Sequential ID (table-local) of the pending export */ -}; - -struct rt_export_request { - struct rt_export_hook *hook; /* Table part of the export */ - char *name; - u8 trace_routes; - - event_list *list; /* Where to schedule export events */ - - /* There are two methods of export. You can either request feeding every single change - * or feeding the whole route feed. In case of regular export, &export_one is preferred. - * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. - * Thus, for RA_OPTIMAL, &export_one is only set, - * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set - * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes - */ - void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); - void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); - - void (*dump_req)(struct rt_export_request *req); - void (*log_state_change)(struct rt_export_request *req, u8); -}; - -struct rt_export_hook { - node n; - rtable *table; /* The connected table */ - - pool *pool; - - struct rt_export_request *req; /* The requestor */ - - struct rt_export_stats { - /* Export - from core to protocol */ - u32 updates_received; /* Number of route updates received */ - u32 withdraws_received; /* Number of route withdraws received */ - } stats; - - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - - struct bmap seq_map; /* Keep track which exports were already procesed */ - - struct rt_pending_export * _Atomic last_export;/* Last export processed */ - struct rt_pending_export *rpe_next; /* Next pending export to process */ - - btime last_state_change; /* Time of last state transition */ - - u8 refeed_pending; /* Refeeding and another refeed is scheduled */ - _Atomic u8 export_state; /* Route export state (TES_*, see below) */ - - struct event *event; /* Event running all the export operations */ - - void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ -}; - -extern struct event_cork rt_cork; - -#define TIS_DOWN 0 -#define TIS_UP 1 -#define TIS_STOP 2 -#define TIS_FLUSHING 3 -#define TIS_WAITING 4 -#define TIS_CLEARED 5 -#define TIS_MAX 6 - -#define TES_DOWN 0 -#define TES_HUNGRY 1 -#define TES_FEEDING 2 -#define TES_READY 3 -#define TES_STOP 4 -#define TES_MAX 5 - -void rt_request_import(rtable *tab, struct rt_import_request *req); -void rt_request_export(rtable *tab, struct rt_export_request *req); - -void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); -void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); - -const char *rt_import_state_name(u8 state); -const char *rt_export_state_name(u8 state); - -static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } -static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } - -void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); - -/* Get next rpe. If src is given, it must match. */ -struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); - -/* Mark the pending export processed */ -void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); - -/* Get pending export seen status */ -int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); - - -/* Types of route announcement, also used as flags */ -#define RA_UNDEF 0 /* Undefined RA type */ -#define RA_OPTIMAL 1 /* Announcement of optimal route change */ -#define RA_ACCEPTED 2 /* Announcement of first accepted route */ -#define RA_ANY 3 /* Announcement of any route change */ -#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ - -/* Return value of preexport() callback */ -#define RIC_ACCEPT 1 /* Accepted by protocol */ -#define RIC_PROCESS 0 /* Process it through import filter */ -#define RIC_REJECT -1 /* Rejected by protocol */ -#define RIC_DROP -2 /* Silently dropped by protocol */ - -#define rte_update channel_rte_import -/** - * rte_update - enter a new update to a routing table - * @c: channel doing the update - * @net: network address - * @rte: a &rte representing the new route - * @src: old route source identifier - * - * This function imports a new route to the appropriate table (via the channel). - * Table keys are @net (obligatory) and @rte->attrs->src. - * Both the @net and @rte pointers can be local. - * - * The route attributes (@rte->attrs) are obligatory. They can be also allocated - * locally. Anyway, if you use an already-cached attribute object, you shall - * call rta_clone() on that object yourself. (This semantics may change in future.) - * - * If the route attributes are local, you may set @rte->attrs->src to NULL, then - * the protocol's default route source will be supplied. - * - * When rte_update() gets a route, it automatically validates it. This includes - * checking for validity of the given network and next hop addresses and also - * checking for host-scope or link-scope routes. Then the import filters are - * processed and if accepted, the route is passed to route table recalculation. - * - * The accepted routes are then inserted into the table, replacing the old route - * for the same @net identified by @src. Then the route is announced - * to all the channels connected to the table using the standard export mechanism. - * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same - * as @src. - * - * All memory used for temporary allocations is taken from a special linpool - * @rte_update_pool and freed when rte_update() finishes. - */ -void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); - -extern list routing_tables; -struct config; - -void rt_init(void); -void rt_preconfig(struct config *); -void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); -rtable *rt_setup(pool *, struct rtable_config *); - -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); -int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); -rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); - -void rt_refresh_begin(struct rt_import_request *); -void rt_refresh_end(struct rt_import_request *); -void rt_schedule_prune(rtable *t); -void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *, rtable *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); -void rt_dump(rtable *); -void rt_dump_all(void); -void rt_dump_hooks(rtable *); -void rt_dump_hooks_all(void); -void rt_prune_sync(rtable *t, int all); -struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - -/* Default limit for ECMP next hops, defined in sysdep code */ -extern const int rt_default_ecmp; - -struct rt_show_data_rtable { - node n; - rtable *table; - struct channel *export_channel; -}; - -struct rt_show_data { - net_addr *addr; - list tables; - struct rt_show_data_rtable *tab; /* Iterator over table list */ - struct rt_show_data_rtable *last_table; /* Last table in output */ - struct fib_iterator fit; /* Iterator over networks in table */ - int verbose, tables_defined_by; - const struct filter *filter; - struct proto *show_protocol; - struct proto *export_protocol; - struct channel *export_channel; - struct config *running_on_config; - struct krt_proto *kernel; - struct rt_export_hook *kernel_export_hook; - int export_mode, primary_only, filtered, stats, show_for; - - int table_open; /* Iteration (fit) is open */ - int net_counter, rt_counter, show_counter, table_counter; - int net_counter_last, rt_counter_last, show_counter_last; -}; - -void rt_show(struct rt_show_data *); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); - -/* Value of table definition mode in struct rt_show_data */ -#define RSD_TDB_DEFAULT 0 /* no table specified */ -#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ -#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ -#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ - -#define RSD_TDB_SET 0x1 /* internal: show empty tables */ -#define RSD_TDB_NMN 0x2 /* internal: need matching net */ - -/* Value of export_mode in struct rt_show_data */ -#define RSEM_NONE 0 /* Export mode not used */ -#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ -#define RSEM_EXPORT 2 /* Routes accepted by export filter */ -#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ -#define RSEM_EXPORTED 4 /* Routes marked in export map */ - -/* - * Route Attributes - * - * Beware: All standard BGP attributes must be represented here instead - * of making them local to the route. This is needed to ensure proper - * construction of BGP route attribute lists. - */ - -/* Nexthop structure */ -struct nexthop { - ip_addr gw; /* Next hop */ - struct iface *iface; /* Outgoing interface */ - struct nexthop *next; - byte flags; - byte weight; - byte labels_orig; /* Number of labels before hostentry was applied */ - byte labels; /* Number of all labels */ - u32 label[0]; -}; - -#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ - - -struct rte_src { - struct rte_src *next; /* Hash chain */ - struct rte_owner *owner; /* Route source owner */ - u32 private_id; /* Private ID, assigned by the protocol */ - u32 global_id; /* Globally unique ID of the source */ - _Atomic u64 uc; /* Use count */ -}; - - -typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* Hash over important fields */ - struct ea_list *eattrs; /* Extended Attribute chain */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - ip_addr from; /* Advertising router */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u16 cached:1; /* Are attributes cached? */ - u16 source:7; /* Route source (RTS_...) */ - u16 scope:4; /* Route scope (SCOPE_... -- see ip.h) */ - u16 dest:4; /* Route destination type (RTD_...) */ - word pref; - struct nexthop nh; /* Next hop */ -} rta; - -#define RTS_STATIC 1 /* Normal static route */ -#define RTS_INHERIT 2 /* Route inherited from kernel */ -#define RTS_DEVICE 3 /* Device route */ -#define RTS_STATIC_DEVICE 4 /* Static device route */ -#define RTS_REDIRECT 5 /* Learned via redirect */ -#define RTS_RIP 6 /* RIP route */ -#define RTS_OSPF 7 /* OSPF route */ -#define RTS_OSPF_IA 8 /* OSPF inter-area route */ -#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ -#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ -#define RTS_BGP 11 /* BGP route */ -#define RTS_PIPE 12 /* Inter-table wormhole */ -#define RTS_BABEL 13 /* Babel route */ -#define RTS_RPKI 14 /* Route Origin Authorization */ -#define RTS_PERF 15 /* Perf checker */ -#define RTS_MAX 16 - -#define RTD_NONE 0 /* Undefined next hop */ -#define RTD_UNICAST 1 /* Next hop is neighbor router */ -#define RTD_BLACKHOLE 2 /* Silently drop packets */ -#define RTD_UNREACHABLE 3 /* Reject as unreachable */ -#define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MAX 5 - -#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other - protocol-specific metric is availabe */ - - -extern const char * rta_dest_names[RTD_MAX]; - -static inline const char *rta_dest_name(uint n) -{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } - -/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ -static inline int rte_is_reachable(rte *r) -{ return r->attrs->dest == RTD_UNICAST; } - - -/* - * Extended Route Attributes - */ - -typedef struct eattr { - word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ - byte flags; /* Protocol-dependent flags */ - byte type; /* Attribute type and several flags (EAF_...) */ - union { - uintptr_t data; - const struct adata *ptr; /* Attribute data elsewhere */ - } u; -} eattr; - - -#define EA_CODE(proto,id) (((proto) << 8) | (id)) -#define EA_ID(ea) ((ea) & 0xff) -#define EA_PROTO(ea) ((ea) >> 8) -#define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) -#define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) -#define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) - -const char *ea_custom_name(uint ea); - -#define EA_GEN_IGP_METRIC EA_CODE(PROTOCOL_NONE, 0) - -#define EA_CODE_MASK 0xffff -#define EA_CUSTOM_BIT 0x8000 -#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ -#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ -#define EA_BIT_GET(ea) ((ea) >> 24) - -#define EAF_TYPE_MASK 0x1f /* Mask with this to get type */ -#define EAF_TYPE_INT 0x01 /* 32-bit unsigned integer number */ -#define EAF_TYPE_OPAQUE 0x02 /* Opaque byte string (not filterable) */ -#define EAF_TYPE_IP_ADDRESS 0x04 /* IP address */ -#define EAF_TYPE_ROUTER_ID 0x05 /* Router ID (IPv4 address) */ -#define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ -#define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ -#define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ -#define EAF_TYPE_PTR 0x0d /* Pointer to an object */ -#define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ -#define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ -#define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ -#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ -#define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ -#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ - -typedef struct adata { - uint length; /* Length of data */ - byte data[0]; -} adata; - -extern const adata null_adata; /* adata of length 0 */ - -static inline struct adata * -lp_alloc_adata(struct linpool *pool, uint len) -{ - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; -} - -static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } - - -typedef struct ea_list { - struct ea_list *next; /* In case we have an override list */ - byte flags; /* Flags: EALF_... */ - byte rfu; - word count; /* Number of attributes */ - eattr attrs[0]; /* Attribute definitions themselves */ -} ea_list; - -#define EALF_SORTED 1 /* Attributes are sorted by code */ -#define EALF_BISECT 2 /* Use interval bisection for searching */ -#define EALF_CACHED 4 /* Attributes belonging to cached rta */ - -struct rte_owner_class { - void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ - int (*rte_better)(struct rte *, struct rte *); - int (*rte_mergable)(struct rte *, struct rte *); - u32 (*rte_igp_metric)(struct rte *); -}; - -struct rte_owner { - struct rte_owner_class *class; - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); - HASH(struct rte_src) hash; - const char *name; - u32 hash_key; - u32 uc; - event_list *list; - event *prune; - event *stop; -}; - -DEFINE_DOMAIN(attrs); -extern DOMAIN(attrs) attrs_domain; - -#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) -#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) - -#define RTE_SRC_PU_SHIFT 44 -#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) - -struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); -#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) -static inline void rt_lock_source(struct rte_src *src) -{ - u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); - ASSERT_DIE(uc > 0); -} - -static inline void rt_unlock_source(struct rte_src *src) -{ - u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); - u64 pending = uc >> RTE_SRC_PU_SHIFT; - uc &= RTE_SRC_IN_PROGRESS - 1; - - ASSERT_DIE(uc > pending); - if (uc == pending + 1) - ev_send(src->owner->list, src->owner->prune); - - atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); -} - -void rt_init_sources(struct rte_owner *, const char *name, event_list *list); -void rt_destroy_sources(struct rte_owner *, event *); - -struct ea_walk_state { - ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ - eattr *ea; /* Current eattr, initially NULL */ - u32 visited[4]; /* Bitfield, limiting max to 128 */ -}; - -eattr *ea_find(ea_list *, unsigned ea); -eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); -uintptr_t ea_get_int(ea_list *, unsigned ea, uintptr_t def); -void ea_dump(ea_list *); -void ea_sort(ea_list *); /* Sort entries in all sub-lists */ -unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ -void ea_merge(ea_list *from, ea_list *to); /* Merge sub-lists to allocated buffer */ -int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ -uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ -ea_list *ea_append(ea_list *to, ea_list *what); -void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); - -#define ea_normalize(ea) do { \ - if (ea->next) { \ - ea_list *t = alloca(ea_scan(ea)); \ - ea_merge(ea, t); \ - ea = t; \ - } \ - ea_sort(ea); \ - if (ea->count == 0) \ - ea = NULL; \ -} while(0) \ - -static inline eattr * -ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) -{ - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - eattr *e = &a->attrs[0]; - - a->flags = EALF_SORTED; - a->count = 1; - a->next = *to; - *to = a; - - e->id = id; - e->type = type; - e->flags = flags; - - if (type & EAF_EMBEDDED) - e->u.data = (u32) val; - else - e->u.ptr = (struct adata *) val; - - return e; -} - -static inline void -ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - memcpy(a->data, data, len); - ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); -} - - -#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - -static inline size_t nexthop_size(const struct nexthop *nh) -{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } -int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ -static inline int nexthop_same(struct nexthop *x, struct nexthop *y) -{ return (x == y) || nexthop__same(x, y); } -struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); -struct nexthop *nexthop_sort(struct nexthop *x); -static inline void nexthop_link(struct rta *a, struct nexthop *from) -{ memcpy(&a->nh, from, nexthop_size(from)); } -void nexthop_insert(struct nexthop **n, struct nexthop *y); -int nexthop_is_sorted(struct nexthop *x); - -void rta_init(void); -static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } -#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) -rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ -static inline int rta_is_cached(rta *r) { return r->cached; } -static inline rta *rta_clone(rta *r) { r->uc++; return r; } -void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } -rta *rta_do_cow(rta *o, linpool *lp); -static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } -static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } -void rta_dump(rta *); -void rta_dump_all(void); -void rta_show(struct cli *, rta *); - -u32 rt_get_igp_metric(rte *); -struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); - -static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); -} - -/* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ - -static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } -static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } - -/* - * Default protocol preferences - */ - -#define DEF_PREF_DIRECT 240 /* Directly connected */ -#define DEF_PREF_STATIC 200 /* Static route */ -#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ -#define DEF_PREF_BABEL 130 /* Babel */ -#define DEF_PREF_RIP 120 /* RIP */ -#define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_RPKI 100 /* RPKI */ -#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - -/* - * Route Origin Authorization - */ - -#define ROA_UNKNOWN 0 -#define ROA_VALID 1 -#define ROA_INVALID 2 - -#endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index f7e33d72..b3a4c7a1 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -45,11 +45,11 @@ */ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" #include "lib/idm.h" @@ -57,9 +57,25 @@ #include "lib/string.h" #include <stddef.h> +#include <stdlib.h> const adata null_adata; /* adata of length 0 */ +struct ea_class ea_gen_igp_metric = { + .name = "igp_metric", + .type = T_INT, +}; + +struct ea_class ea_gen_preference = { + .name = "preference", + .type = T_INT, +}; + +struct ea_class ea_gen_from = { + .name = "from", + .type = T_IP, +}; + const char * const rta_src_names[RTS_MAX] = { [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", @@ -77,6 +93,71 @@ const char * const rta_src_names[RTS_MAX] = { [RTS_RPKI] = "RPKI", }; +static void +ea_gen_source_format(const eattr *a, byte *buf, uint size) +{ + if ((a->u.data >= RTS_MAX) || !rta_src_names[a->u.data]) + bsnprintf(buf, size, "unknown"); + else + bsnprintf(buf, size, "%s", rta_src_names[a->u.data]); +} + +struct ea_class ea_gen_source = { + .name = "source", + .type = T_ENUM_RTS, + .readonly = 1, + .format = ea_gen_source_format, +}; + +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, + .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, +}; + const char * rta_dest_names[RTD_MAX] = { [RTD_NONE] = "", [RTD_UNICAST] = "unicast", @@ -85,12 +166,22 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + DOMAIN(attrs) attrs_domain; pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; static slab *rte_src_slab; static struct idm src_ids; @@ -106,11 +197,14 @@ static struct idm src_ids; #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 #define RSH_INIT_ORDER 2 +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); } @@ -151,18 +245,32 @@ rt_get_source_o(struct rte_owner *p, u32 id) log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", p->name, src->private_id, src->global_id, p->uc); + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; RTA_UNLOCK; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + static inline void rt_done_sources(struct rte_owner *o) { - if (o->stop->list) - ev_send(o->stop->list, o->stop); - else - ev_send(o->list, o->stop); + ev_send(o->list, o->stop); } void @@ -174,7 +282,7 @@ rt_prune_sources(void *data) { u64 uc; while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) - ; + synchronize_rcu(); if (uc == 0) { @@ -183,8 +291,9 @@ rt_prune_sources(void *data) HASH_DO_REMOVE(o->hash, RSH, sp); RTA_LOCK; + rte_src_global[src->global_id] = NULL; idm_free(&src_ids, src->global_id); - sl_free(rte_src_slab, src); + sl_free(src); RTA_UNLOCK; } } @@ -246,50 +355,10 @@ rt_destroy_sources(struct rte_owner *o, event *done) * Multipath Next Hop */ -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) - { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); - - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); - } - - return h; -} - -int -nexthop__same(struct nexthop *x, struct nexthop *y) -{ - for (; x && y; x = x->next, y = y->next) - { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; - - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } - - return x == y; -} - static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -314,23 +383,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -347,138 +409,227 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); - while ((x || y) && max--) + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; + + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); + + if (!xvalid && !yvalid) + break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); if (cmp < 0) { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); } else if (cmp > 0) { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); } else { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); } - n = &((*n)->next); + cur = NEXTHOP_NEXT(cur); } - *n = NULL; - return root; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -void -nexthop_insert(struct nexthop **n, struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - if (cmp < 0) - continue; - else if (cmp > 0) - break; - else - return; - } + if (cnt <= 1) + return nhad; - x->next = *n; - *n = x; -} + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); -struct nexthop * -nexthop_sort(struct nexthop *x) -{ - struct nexthop *s = NULL; + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); - /* Simple insert-sort */ - while (x) + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) +/* + * Extended Attributes + */ + +#define EA_CLASS_INITIAL_MAX 128 +static struct ea_class **ea_class_global = NULL; +static uint ea_class_max; +static struct idm ea_class_idm; + +/* Config parser lex register function */ +void ea_lex_register(struct ea_class *def); +void ea_lex_unregister(struct ea_class *def); + +static void +ea_class_free(struct ea_class *cl) { - return nexthop_slab_[MIN(nh->labels, 3)]; + /* No more ea class references. Unregister the attribute. */ + idm_free(&ea_class_idm, cl->id); + ea_class_global[cl->id] = NULL; + if (!cl->hidden) + ea_lex_unregister(cl); } -static struct nexthop * -nexthop_copy(struct nexthop *o) +static void +ea_class_ref_free(resource *r) { - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels_orig = o->labels_orig; - n->labels = o->labels; - for (int i=0; i<o->labels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + if (!--ref->class->uc) + ea_class_free(ref->class); +} - return first; +static void +ea_class_ref_dump(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + debug("name \"%s\", type=%d\n", ref->class->name, ref->class->type); } +static struct resclass ea_class_ref_class = { + .name = "Attribute class reference", + .size = sizeof(struct ea_class_ref), + .free = ea_class_ref_free, + .dump = ea_class_ref_dump, + .lookup = NULL, + .memsize = NULL, +}; + static void -nexthop_free(struct nexthop *o) +ea_class_init(void) { - struct nexthop *n; + idm_init(&ea_class_idm, rta_pool, EA_CLASS_INITIAL_MAX); + ea_class_global = mb_allocz(rta_pool, + sizeof(*ea_class_global) * (ea_class_max = EA_CLASS_INITIAL_MAX)); +} - while (o) - { - n = o->next; - sl_free(nexthop_slab(o), o); - o = n; - } +static struct ea_class_ref * +ea_ref_class(pool *p, struct ea_class *def) +{ + def->uc++; + struct ea_class_ref *ref = ralloc(p, &ea_class_ref_class); + ref->class = def; + return ref; } +static struct ea_class_ref * +ea_register(pool *p, struct ea_class *def) +{ + def->id = idm_alloc(&ea_class_idm); -/* - * Extended Attributes - */ + ASSERT_DIE(ea_class_global); + while (def->id >= ea_class_max) + ea_class_global = mb_realloc(ea_class_global, sizeof(*ea_class_global) * (ea_class_max *= 2)); + + ASSERT_DIE(def->id < ea_class_max); + ea_class_global[def->id] = def; + + if (!def->hidden) + ea_lex_register(def); + + return ea_ref_class(p, def); +} + +struct ea_class_ref * +ea_register_alloc(pool *p, struct ea_class cl) +{ + struct ea_class *clp = ea_class_find_by_name(cl.name); + if (clp && clp->type == cl.type) + return ea_ref_class(p, clp); + + uint namelen = strlen(cl.name) + 1; + + struct { + struct ea_class cl; + char name[0]; + } *cla = mb_alloc(rta_pool, sizeof(struct ea_class) + namelen); + cla->cl = cl; + memcpy(cla->name, cl.name, namelen); + cla->cl.name = cla->name; + + return ea_register(p, &cla->cl); +} + +void +ea_register_init(struct ea_class *clp) +{ + ASSERT_DIE(!ea_class_find_by_name(clp->name)); + ea_register(&root_pool, clp); +} + +struct ea_class * +ea_class_find_by_id(uint id) +{ + ASSERT_DIE(id < ea_class_max); + ASSERT_DIE(ea_class_global[id]); + return ea_class_global[id]; +} static inline eattr * ea__find(ea_list *e, unsigned id) @@ -523,12 +674,11 @@ ea__find(ea_list *e, unsigned id) * to its &eattr structure or %NULL if no such attribute exists. */ eattr * -ea_find(ea_list *e, unsigned id) +ea_find_by_id(ea_list *e, unsigned id) { eattr *a = ea__find(e, id & EA_CODE_MASK); - if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF && - !(id & EA_ALLOW_UNDEF)) + if (a && a->undef && !(id & EA_ALLOW_UNDEF)) return NULL; return a; } @@ -595,7 +745,7 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) BIT32_SET(s->visited, n); - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (a->undef) continue; s->eattrs = e; @@ -609,25 +759,6 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) return NULL; } -/** - * ea_get_int - fetch an integer attribute - * @e: attribute list - * @id: attribute ID - * @def: default value - * - * This function is a shortcut for retrieving a value of an integer attribute - * by calling ea_find() to find the attribute, extracting its value or returning - * a provided default if no such attribute is present. - */ -uintptr_t -ea_get_int(ea_list *e, unsigned id, uintptr_t def) -{ - eattr *a = ea_find(e, id); - if (!a) - return def; - return a->u.data; -} - static inline void ea_do_sort(ea_list *e) { @@ -694,15 +825,18 @@ ea_do_prune(ea_list *e) s++; /* Now s0 is the most recent version, s[-1] the oldest one */ - /* Drop undefs */ - if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + /* Drop undefs unless this is a true overlay */ + if (s0->undef && (s[-1].undef || !e->next)) continue; /* Copy the newest version to destination */ *d = *s0; /* Preserve info whether it originated locally */ - d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); + d->originated = s[-1].originated; + + /* Not fresh any more, we prefer surstroemming */ + d->fresh = 0; /* Next destination */ d++; @@ -722,21 +856,18 @@ ea_do_prune(ea_list *e) * If an attribute occurs multiple times in a single &ea_list, * ea_sort() leaves only the first (the only significant) occurrence. */ -void +static void ea_sort(ea_list *e) { - while (e) - { - if (!(e->flags & EALF_SORTED)) - { - ea_do_sort(e); - ea_do_prune(e); - e->flags |= EALF_SORTED; - } - if (e->count > 5) - e->flags |= EALF_BISECT; - e = e->next; - } + if (!(e->flags & EALF_SORTED)) + { + ea_do_sort(e); + ea_do_prune(e); + e->flags |= EALF_SORTED; + } + + if (e->count > 5) + e->flags |= EALF_BISECT; } /** @@ -746,8 +877,8 @@ ea_sort(ea_list *e) * This function calculates an upper bound of the size of * a given &ea_list after merging with ea_merge(). */ -unsigned -ea_scan(ea_list *e) +static unsigned +ea_scan(const ea_list *e, int overlay) { unsigned cnt = 0; @@ -755,6 +886,8 @@ ea_scan(ea_list *e) { cnt += e->count; e = e->next; + if (e && overlay && ea_is_cached(e)) + break; } return sizeof(ea_list) + sizeof(eattr)*cnt; } @@ -773,21 +906,36 @@ ea_scan(ea_list *e) * segments with ea_merge() and finally sort and prune the result * by calling ea_sort(). */ -void -ea_merge(ea_list *e, ea_list *t) +static void +ea_merge(ea_list *e, ea_list *t, int overlay) { eattr *d = t->attrs; t->flags = 0; t->count = 0; - t->next = NULL; + while (e) { memcpy(d, e->attrs, sizeof(eattr)*e->count); t->count += e->count; d += e->count; e = e->next; + + if (e && overlay && ea_is_cached(e)) + break; } + + t->next = e; +} + +ea_list * +ea_normalize(ea_list *e, int overlay) +{ + ea_list *t = tmp_alloc(ea_scan(e, overlay)); + ea_merge(e, t, overlay); + ea_sort(t); + + return t->count ? t : t->next; } /** @@ -805,7 +953,8 @@ ea_same(ea_list *x, ea_list *y) if (!x || !y) return x == y; - ASSERT(!x->next && !y->next); + if (x->next != y->next) + return 0; if (x->count != y->count) return 0; for(c=0; c<x->count; c++) @@ -816,39 +965,46 @@ ea_same(ea_list *x, ea_list *y) if (a->id != b->id || a->flags != b->flags || a->type != b->type || + a->originated != b->originated || + a->fresh != b->fresh || + a->undef != b->undef || ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr))) return 0; } return 1; } -static inline ea_list * -ea_list_copy(ea_list *o) +uint +ea_list_size(ea_list *o) { - ea_list *n; - unsigned i, adpos, elen; + unsigned i, elen; - if (!o) - return NULL; - ASSERT(!o->next); - elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count; + ASSERT_DIE(o); + elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) - elen += sizeof(struct adata) + a->u.ptr->length; + if (!a->undef && !(a->type & EAF_EMBEDDED)) + elen += ADATA_SIZE(a->u.ptr->length); } - n = mb_alloc(rta_pool, elen); + return elen; +} + +void +ea_list_copy(ea_list *n, ea_list *o, uint elen) +{ + uint adpos = sizeof(ea_list) + sizeof(eattr) * o->count; memcpy(n, o, adpos); - n->flags |= EALF_CACHED; - for(i=0; i<o->count; i++) + adpos = BIRD_CPU_ALIGN(adpos); + + for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) { - unsigned size = sizeof(struct adata) + a->u.ptr->length; + unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); struct adata *d = ((void *) n) + adpos; @@ -858,30 +1014,56 @@ ea_list_copy(ea_list *o) adpos += size; } } + ASSERT_DIE(adpos == elen); - return n; } -static inline void -ea_free(ea_list *o) +static void +ea_list_ref(ea_list *l) { - if (o) + for(uint i=0; i<l->count; i++) { - ASSERT(!o->next); - mb_free(o); + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); + cl->uc++; } + + if (l->next) + { + ASSERT_DIE(ea_is_cached(l->next)); + ea_clone(l->next); + } } -static int -get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED) +static void +ea_list_unref(ea_list *l) { - if (a->id == EA_GEN_IGP_METRIC) + for(uint i=0; i<l->count; i++) { - *buf += bsprintf(*buf, "igp_metric"); - return GA_NAME; + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); + if (!--cl->uc) + ea_class_free(cl); } - return GA_UNKNOWN; + if (l->next) + ea_free(l->next); } void @@ -934,41 +1116,90 @@ opaque_format(const struct adata *ad, byte *buf, uint size) } static inline void -ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end) +ea_show_int_set(struct cli *c, const char *name, const struct adata *ad, int way, byte *buf) { - int i = int_set_format(ad, way, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = int_set_format(ad, way, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = int_set_format(ad, way, i, buf, end - buf - 1); + i = int_set_format(ad, way, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_ec_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = ec_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = ec_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = ec_set_format(ad, i, buf, end - buf - 1); + i = ec_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_lc_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = lc_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = lc_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = lc_set_format(ad, i, buf, end - buf - 1); + i = lc_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } +void +ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad) +{ + if (!NEXTHOP_IS_REACHABLE(nhad)) + return; + + NEXTHOP_WALK(nh, nhad) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (!NEXTHOP_ONE(nhad)) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + if (nh->iface) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tvia %I", nh->gw); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } +} + +void +ea_show_hostentry(const struct adata *ad, byte *buf, uint size) +{ + const struct hostentry_adata *had = (const struct hostentry_adata *) ad; + + if (ipa_nonzero(had->he->link) && !ipa_equal(had->he->link, had->he->addr)) + bsnprintf(buf, size, "via %I %I table %s", had->he->addr, had->he->link, had->he->tab->name); + else + bsnprintf(buf, size, "via %I table %s", had->he->addr, had->he->tab->name); +} + /** * ea_show - print an &eattr to CLI * @c: destination CLI @@ -980,79 +1211,80 @@ ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte * If the protocol defining the attribute provides its own * get_attr() hook, it's consulted first. */ -void +static void ea_show(struct cli *c, const eattr *e) { - struct protocol *p; - int status = GA_UNKNOWN; const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr; byte buf[CLI_MSG_SIZE]; byte *pos = buf, *end = buf + sizeof(buf); - if (EA_IS_CUSTOM(e->id)) - { - const char *name = ea_custom_name(e->id); - if (name) - { - pos += bsprintf(pos, "%s", name); - status = GA_NAME; - } - else - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - } - else if (p = class_to_protocol[EA_PROTO(e->id)]) - { - pos += bsprintf(pos, "%s.", p->name); - if (p->get_attr) - status = p->get_attr(e, pos, end - pos); - pos += strlen(pos); - } - else if (EA_PROTO(e->id)) - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); + ASSERT_DIE(e->id < ea_class_max); + + struct ea_class *cls = ea_class_global[e->id]; + ASSERT_DIE(cls); + + if (e->undef || cls->hidden) + return; + else if (cls->format) + cls->format(e, buf, end - buf); else - status = get_generic_attr(e, &pos, end - pos); + switch (e->type) + { + case T_INT: + if ((cls == &ea_gen_igp_metric) && e->u.data >= IGP_METRIC_UNKNOWN) + return; - if (status < GA_NAME) - pos += bsprintf(pos, "%02x", EA_ID(e->id)); - if (status < GA_FULL) - { - *pos++ = ':'; - *pos++ = ' '; - switch (e->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT: bsprintf(pos, "%u", e->u.data); break; - case EAF_TYPE_OPAQUE: + case T_OPAQUE: opaque_format(ad, pos, end - pos); break; - case EAF_TYPE_IP_ADDRESS: + case T_IP: bsprintf(pos, "%I", *(ip_addr *) ad->data); break; - case EAF_TYPE_ROUTER_ID: + case T_QUAD: bsprintf(pos, "%R", e->u.data); break; - case EAF_TYPE_AS_PATH: + case T_PATH: as_path_format(ad, pos, end - pos); break; - case EAF_TYPE_BITFIELD: - bsprintf(pos, "%08x", e->u.data); - break; - case EAF_TYPE_INT_SET: - ea_show_int_set(c, ad, 1, pos, buf, end); + case T_CLIST: + ea_show_int_set(c, cls->name, ad, 1, buf); return; - case EAF_TYPE_EC_SET: - ea_show_ec_set(c, ad, pos, buf, end); + case T_ECLIST: + ea_show_ec_set(c, cls->name, ad, buf); return; - case EAF_TYPE_LC_SET: - ea_show_lc_set(c, ad, pos, buf, end); + case T_LCLIST: + ea_show_lc_set(c, cls->name, ad, buf); return; - case EAF_TYPE_UNDEF: + case T_NEXTHOP_LIST: + ea_show_nexthop_list(c, (struct nexthop_adata *) e->u.ptr); + return; + case T_HOSTENTRY: + ea_show_hostentry(ad, pos, end - pos); + break; default: bsprintf(pos, "<type %02x>", e->type); - } + } + + cli_printf(c, -1012, "\t%s: %s", cls->name, buf); +} + +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); } - cli_printf(c, -1012, "\t%s", buf); } /** @@ -1074,19 +1306,26 @@ ea_dump(ea_list *e) } while (e) { - debug("[%c%c%c]", + struct ea_storage *s = ea_is_cached(e) ? ea_get_storage(e) : NULL; + debug("[%c%c%c] uc=%d h=%08x", (e->flags & EALF_SORTED) ? 'S' : 's', (e->flags & EALF_BISECT) ? 'B' : 'b', - (e->flags & EALF_CACHED) ? 'C' : 'c'); + (e->flags & EALF_CACHED) ? 'C' : 'c', + s ? s->uc : 0, s ? s->hash_key : 0); for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; - debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags); - debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]); - if (a->type & EAF_ORIGINATED) + debug(" %04x.%02x", a->id, a->flags); + debug("=%c", + "?iO?IRP???S??pE?" + "??L???N?????????" + "?o???r??????????" [a->type]); + if (a->originated) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1116,10 +1355,13 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { + h ^= mem_hash(&e->next, sizeof(e->next)); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; h ^= a->id; h *= mul; + if (a->undef) + continue; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else @@ -1163,12 +1405,12 @@ static uint rta_cache_count; static uint rta_cache_size = 32; static uint rta_cache_limit; static uint rta_cache_mask; -static rta **rta_hash_table; +static struct ea_storage **rta_hash_table; static void rta_alloc_hash(void) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); + rta_hash_table = mb_allocz(rta_pool, sizeof(struct ea_storage *) * rta_cache_size); if (rta_cache_size < 32768) rta_cache_limit = rta_cache_size * 2; else @@ -1176,64 +1418,14 @@ rta_alloc_hash(void) rta_cache_mask = rta_cache_size - 1; } -static inline uint -rta_hash(rta *a) -{ - u64 h; - mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); -#define BMIX(f) mem_hash_mix_num(&h, a->f); - MIX(hostentry); - MIX(from); - MIX(igp_metric); - BMIX(source); - BMIX(scope); - BMIX(dest); - MIX(pref); -#undef MIX - - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); -} - -static inline int -rta_same(rta *x, rta *y) -{ - return (x->source == y->source && - x->scope == y->scope && - x->dest == y->dest && - x->igp_metric == y->igp_metric && - ipa_equal(x->from, y->from) && - x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && - ea_same(x->eattrs, y->eattrs)); -} - -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; -} - -static rta * -rta_copy(rta *o) -{ - rta *r = sl_alloc(rta_slab(o)); - - memcpy(r, o, rta_size(o)); - r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); - r->eattrs = ea_list_copy(o->eattrs); - return r; -} - static inline void -rta_insert(rta *r) +rta_insert(struct ea_storage *r) { uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; + r->next_hash = rta_hash_table[h]; + if (r->next_hash) + r->next_hash->pprev_hash = &r->next_hash; + r->pprev_hash = &rta_hash_table[h]; rta_hash_table[h] = r; } @@ -1242,8 +1434,8 @@ rta_rehash(void) { uint ohs = rta_cache_size; uint h; - rta *r, *n; - rta **oht = rta_hash_table; + struct ea_storage *r, *n; + struct ea_storage **oht = rta_hash_table; rta_cache_size = 2*rta_cache_size; DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); @@ -1251,7 +1443,7 @@ rta_rehash(void) for(h=0; h<ohs; h++) for(r=oht[h]; r; r=n) { - n = r->next; + n = r->next_hash; rta_insert(r); } mb_free(oht); @@ -1270,99 +1462,48 @@ rta_rehash(void) * The extended attribute lists attached to the &rta are automatically * converted to the normalized form. */ -rta * -rta_lookup(rta *o) +ea_list * +ea_lookup(ea_list *o, int overlay) { - rta *r; + struct ea_storage *r; uint h; - ASSERT(!o->cached); - if (o->eattrs) - ea_normalize(o->eattrs); + ASSERT(!ea_is_cached(o)); + o = ea_normalize(o, overlay); + h = ea_hash(o); + + for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) + if (r->hash_key == h && ea_same(r->l, o)) + return ea_clone(r->l); - h = rta_hash(o); - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - return rta_clone(r); + uint elen = ea_list_size(o); + r = mb_alloc(rta_pool, elen + sizeof(struct ea_storage)); + ea_list_copy(r->l, o, elen); + ea_list_ref(r->l); - r = rta_copy(o); + r->l->flags |= EALF_CACHED; r->hash_key = h; - r->cached = 1; - rt_lock_hostentry(r->hostentry); + r->uc = 1; + rta_insert(r); if (++rta_cache_count > rta_cache_limit) rta_rehash(); - return r; + return r->l; } void -rta__free(rta *a) +ea__free(struct ea_storage *a) { - ASSERT(rta_cache_count && a->cached); + ASSERT(rta_cache_count); rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); - if (a->nh.next) - nexthop_free(a->nh.next); - ea_free(a->eattrs); - a->cached = 0; - sl_free(rta_slab(a), a); -} + *a->pprev_hash = a->next_hash; + if (a->next_hash) + a->next_hash->pprev_hash = a->pprev_hash; -rta * -rta_do_cow(rta *o, linpool *lp) -{ - rta *r = lp_alloc(lp, rta_size(o)); - memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } - rta_uncache(r); - return r; -} - -/** - * rta_dump - dump route attributes - * @a: attribute structure to dump - * - * This function takes a &rta and dumps its contents to the debug output. - */ -void -rta_dump(rta *a) -{ - static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", - "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", - "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - - debug("pref=%d uc=%d %s %s%s h=%04x", - a->pref, a->uc, rts[a->source], ip_scope_text(a->scope), - rtd[a->dest], a->hash_key); - if (!a->cached) - debug(" !CACHED"); - debug(" <-%I", a->from); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; i<nh->labels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } - if (a->eattrs) - { - debug(" EA: "); - ea_dump(a->eattrs); - } + ea_list_unref(a->l); + mb_free(a); } /** @@ -1372,30 +1513,25 @@ rta_dump(rta *a) * to the debug output. */ void -rta_dump_all(void) +ea_dump_all(void) { - rta *a; - uint h; - debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; h<rta_cache_size; h++) - for(a=rta_hash_table[h]; a; a=a->next) + for (uint h=0; h < rta_cache_size; h++) + for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash) { debug("%p ", a); - rta_dump(a); + ea_dump(a->l); debug("\n"); } debug("\n"); } void -rta_show(struct cli *c, rta *a) +ea_show_list(struct cli *c, ea_list *eal) { - cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); - - for(ea_list *eal = a->eattrs; eal; eal=eal->next) - for(int i=0; i<eal->count; i++) - ea_show(c, &eal->attrs[i]); + ea_list *n = ea_normalize(eal, 0); + for (int i =0; i < n->count; i++) + ea_show(c, &n->attrs[i]); } /** @@ -1411,18 +1547,20 @@ rta_init(void) rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + /* These attributes are required to be first for nice "show route" output */ + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + + /* Other generic route attributes */ + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_flowspec_valid); } /* diff --git a/nest/rt-dev.c b/nest/rt-dev.c index c1251675..4199e17c 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/rt-dev.h" #include "conf/conf.h" #include "lib/resource.h" @@ -80,16 +80,18 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rta a0 = { - .pref = c->preference, - .source = RTS_DEVICE, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh.iface = ad->iface, + ea_list *ea = NULL; + struct nexthop_adata nhad = { + .nh = { .iface = ad->iface, }, + .ad = { .length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data, }, }; + ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_DEVICE); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); + rte e0 = { - .attrs = rta_lookup(&a0), + .attrs = ea, .src = src, }; @@ -186,7 +188,6 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_device = { .name = "Direct", .template = "direct%d", - .class = PROTOCOL_DIRECT, .preference = DEF_PREF_DIRECT, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct rt_dev_proto), @@ -196,3 +197,9 @@ struct protocol proto_device = { .reconfigure = dev_reconfigure, .copy_config = dev_copy_config }; + +void +dev_build(void) +{ + proto_build(&proto_device); +} diff --git a/nest/rt-fib.c b/nest/rt-fib.c index a7f70371..801561da 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -55,7 +55,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/string.h" /* @@ -331,7 +331,7 @@ fib_get(struct fib *f, const net_addr *a) memset(b, 0, f->node_offset); if (f->init) - f->init(b); + f->init(f, b); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); @@ -475,7 +475,7 @@ fib_delete(struct fib *f, void *E) } if (f->fib_slab) - sl_free(f->fib_slab, E); + sl_free(E); else mb_free(E); diff --git a/nest/rt-show.c b/nest/rt-show.c index 8196903d..17400029 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -10,123 +10,92 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/cli.h" #include "nest/iface.h" #include "filter/filter.h" +#include "filter/data.h" #include "sysdep/unix/krt.h" static void -rt_show_table(struct cli *c, struct rt_show_data *d) +rt_show_table(struct rt_show_data *d) { + struct cli *c = d->cli; + /* No table blocks in 'show route count' */ if (d->stats == 2) return; if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, -1007, "Table %s:", d->tab->table->name); + cli_printf(c, -1007, "Table %s:", + d->tab->name); d->last_table = d->tab; } -static inline struct krt_proto * -rt_show_get_kernel(struct rt_show_data *d) -{ - struct proto_config *krt = d->tab->table->config->krt_attached; - return krt ? (struct krt_proto *) krt->proto : NULL; -} - static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary) { byte from[IPA_MAX_TEXT_LENGTH+8]; byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; - rta *a = e->attrs; - int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; + ea_list *a = e->attrs; + int sync_error = d->tab->kernel ? krt_get_sync_error(d->tab->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - struct nexthop *nh; + eattr *nhea = net_type_match(e->net, NB_DEST) ? + ea_find(a, &ea_gen_nexthop) : NULL; + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + int dest = nhad ? (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) : RTD_NONE; + int flowspec_valid = net_is_flow(e->net) ? rt_get_flowspec_valid(e) : FLOWSPEC_UNKNOWN; tm_format_time(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) - bsprintf(from, " from %I", a->from); + ip_addr a_from = ea_get_ip(a, &ea_gen_from, IPA_NONE); + if (ipa_nonzero(a_from) && (!nhad || !ipa_equal(a_from, nhad->nh.gw))) + bsprintf(from, " from %I", a_from); else from[0] = 0; /* Need to normalize the extended attributes */ - if (d->verbose && !rta_is_cached(a) && a->eattrs) - ea_normalize(a->eattrs); + if (d->verbose && !rta_is_cached(a) && a) + a = ea_normalize(a, 0); get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL; if (get_route_info) get_route_info(e, info); else - bsprintf(info, " (%d)", a->pref); + bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) - rt_show_table(c, d); - - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); - - if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) - { - char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; - char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; - char weight[16] = ""; - - if (nh->labels) - { - lsp += bsprintf(lsp, " mpls %d", nh->label[0]); - for (int i=1;i<nh->labels; i++) - lsp += bsprintf(lsp, "/%d", nh->label[i]); - } - *lsp = '\0'; + rt_show_table(d); - if (a->nh.next) - bsprintf(weight, " weight %d", nh->weight + 1); + eattr *heea; + struct hostentry_adata *had = NULL; + if (!net_is_flow(e->net) && (dest == RTD_NONE) && (heea = ea_find(a, &ea_gen_hostentry))) + had = (struct hostentry_adata *) heea->u.ptr; - if (ipa_nonzero(nh->gw)) - cli_printf(c, -1007, "\tvia %I on %s%s%s%s", - nh->gw, nh->iface->name, mpls, onlink, weight); - else - cli_printf(c, -1007, "\tdev %s%s%s", - nh->iface->name, mpls, onlink, weight); - } + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), + e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) { - cli_printf(c, -1008, "\tInternal route ID: %uL %uG %uS", e->src->private_id, e->src->global_id, e->stale_cycle); - rta_show(c, a); + ea_show_list(c, a); + cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS id %u", + e->src->private_id, e->src->global_id, e->stale_cycle, e->id); + } + else if (dest == RTD_UNICAST) + ea_show_nexthop_list(c, nhad); + else if (had) + { + char hetext[256]; + ea_show_hostentry(&had->ad, hetext, sizeof hetext); + cli_printf(c, -1007, "\t%s", hetext); } -} - -static uint -rte_feed_count(net *n) -{ - uint count = 0; - for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) - count++; - return count; -} - -static void -rte_feed_obtain(net *n, rte **feed, uint count) -{ - uint i = 0; - for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) - { - ASSERT_DIE(i < count); - feed[i++] = &e->rte; - } - ASSERT_DIE(i == count); } static void -rt_show_net(struct cli *c, net *n, struct rt_show_data *d) +rt_show_net(struct rt_show_data *d, const net_addr *n, rte **feed, uint count) { + struct cli *c = d->cli; byte ia[NET_MAX_TEXT_LENGTH+1]; struct channel *ec = d->tab->export_channel; @@ -135,13 +104,12 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) ASSUME(!d->export_mode || ec); int first = 1; + int first_show = 1; int pass = 0; - bsnprintf(ia, sizeof(ia), "%N", n->n.addr); - - for (struct rte_storage *er = n->routes; er; er = er->next) + for (uint i = 0; i < count; i++) { - if (rte_is_filtered(&er->rte) != d->filtered) + if (!d->tab->prefilter && (rte_is_filtered(feed[i]) != d->filtered)) continue; d->rt_counter++; @@ -151,7 +119,12 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (pass) continue; - struct rte e = er->rte; + struct rte e = *feed[i]; + if (d->tab->prefilter) + if (e.sender != d->tab->prefilter->in_req.hook) + continue; + else while (e.attrs->next) + e.attrs = e.attrs->next; /* Export channel is down, do not try to export routes to it */ if (ec && !ec->out_req.hook) @@ -169,13 +142,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { /* Special case for merged export */ pass = 1; - uint count = rte_feed_count(n); - if (!count) - goto skip; - - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - rte *em = rt_export_merged(ec, feed, count, c->show_pool, 1); + rte *em = rt_export_merged(ec, feed, count, tmp_linpool, 1); if (em) e = *em; @@ -201,7 +168,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) * command may change the export filter and do not update routes. */ int do_export = (ic > 0) || - (f_run(ec->out_filter, &e, c->show_pool, FF_SILENT) <= F_ACCEPT); + (f_run(ec->out_filter, &e, FF_SILENT) <= F_ACCEPT); if (do_export != (d->export_mode == RSEM_EXPORT)) goto skip; @@ -214,122 +181,190 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (d->show_protocol && (&d->show_protocol->sources != e.src->owner)) goto skip; - if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) + if (f_run(d->filter, &e, 0) > F_ACCEPT) goto skip; if (d->stats < 2) - rt_show_rte(c, ia, &e, d, (n->routes == er)); + { + if (first_show) + net_format(n, ia, sizeof(ia)); + else + ia[0] = 0; + + rt_show_rte(c, ia, &e, d, !d->tab->prefilter && !i); + first_show = 0; + } d->show_counter++; - ia[0] = 0; skip: - lp_flush(c->show_pool); - if (d->primary_only) break; } + + if ((d->show_counter - d->show_counter_last_flush) > 64) + { + d->show_counter_last_flush = d->show_counter; + cli_write_trigger(d->cli); + } } static void +rt_show_net_export_bulk(struct rt_export_request *req, const net_addr *n, + struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + return rt_show_net(d, n, feed, count); +} + +static void +rt_show_export_stopped_cleanup(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + + /* The hook is now invalid */ + req->hook = NULL; + + /* And free the CLI (deferred) */ + rfree(d->cli->pool); +} + +static int rt_show_cleanup(struct cli *c) { struct rt_show_data *d = c->rover; - struct rt_show_data_rtable *tab; - /* Unlink the iterator */ - if (d->table_open) - fit_get(&d->tab->table->fib, &d->fit); + /* Cancel the feed */ + if (d->req.hook) + { + rt_stop_export(&d->req, rt_show_export_stopped_cleanup); + return 1; + } + else + return 0; +} + +static void rt_show_export_stopped(struct rt_export_request *req); - /* Unlock referenced tables */ - WALK_LIST(tab, d->tables) - rt_unlock_table(tab->table); +static void +rt_show_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, rt_show_export_stopped); } static void -rt_show_cont(struct cli *c) +rt_show_dump_req(struct rt_export_request *req) { - struct rt_show_data *d = c->rover; -#ifdef DEBUGGING - unsigned max = 4; -#else - unsigned max = 64; -#endif - struct fib *fib = &d->tab->table->fib; - struct fib_iterator *it = &d->fit; + debug(" CLI Show Route Feed %p\n", req); +} + +static void +rt_show_done(struct rt_show_data *d) +{ + /* No more action */ + d->cli->cleanup = NULL; + d->cli->cont = NULL; + d->cli->rover = NULL; + + /* Write pending messages */ + cli_write_trigger(d->cli); +} + +static void +rt_show_cont(struct rt_show_data *d) +{ + struct cli *c = d->cli; if (d->running_on_config && (d->running_on_config != config)) { cli_printf(c, 8004, "Stopped due to reconfiguration"); - goto done; + return rt_show_done(d); } - if (!d->table_open) - { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); - d->table_open = 1; - d->table_counter++; - d->kernel = rt_show_get_kernel(d); + d->req = (struct rt_export_request) { + .addr = d->addr, + .name = "CLI Show Route", + .list = &global_work_list, + .export_bulk = rt_show_net_export_bulk, + .dump_req = rt_show_dump_req, + .log_state_change = rt_show_log_state_change, + .addr_mode = d->addr_mode, + }; - d->show_counter_last = d->show_counter; - d->rt_counter_last = d->rt_counter; - d->net_counter_last = d->net_counter; + d->table_counter++; - if (d->tables_defined_by & RSD_TDB_SET) - rt_show_table(c, d); - } + d->show_counter_last = d->show_counter; + d->rt_counter_last = d->rt_counter; + d->net_counter_last = d->net_counter; - FIB_ITERATE_START(fib, it, net, n) - { - if (!max--) - { - FIB_ITERATE_PUT(it); - return; - } - rt_show_net(c, n, d); - } - FIB_ITERATE_END; + if (d->tables_defined_by & RSD_TDB_SET) + rt_show_table(d); + + rt_request_export(d->tab->table, &d->req); +} + +static void +rt_show_export_stopped(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + + /* The hook is now invalid */ + req->hook = NULL; if (d->stats) { if (d->last_table != d->tab) - rt_show_table(c, d); + rt_show_table(d); - cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", + cli_printf(d->cli, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, d->tab->table->name); + d->net_counter - d->net_counter_last, d->tab->name); } - d->kernel = NULL; - d->table_open = 0; d->tab = NODE_NEXT(d->tab); if (NODE_VALID(d->tab)) - return; + return rt_show_cont(d); + /* Printout total stats */ if (d->stats && (d->table_counter > 1)) { - if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables", + if (d->last_table) cli_printf(d->cli, -1007, ""); + cli_printf(d->cli, 14, "Total: %d of %d routes for %d networks in %d tables", d->show_counter, d->rt_counter, d->net_counter, d->table_counter); } + else if (!d->rt_counter && ((d->addr_mode == TE_ADDR_EQUAL) || (d->addr_mode == TE_ADDR_FOR))) + cli_printf(d->cli, 8001, "Network not found"); else - cli_printf(c, 0, ""); + cli_printf(d->cli, 0, ""); -done: - rt_show_cleanup(c); - c->cont = c->cleanup = NULL; + /* No more route showing */ + rt_show_done(d); } struct rt_show_data_rtable * -rt_show_add_table(struct rt_show_data *d, rtable *t) +rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name) { struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable)); tab->table = t; + tab->name = name; add_tail(&(d->tables), &(tab->n)); return tab; } +struct rt_show_data_rtable * +rt_show_add_table(struct rt_show_data *d, struct rtable *t) +{ + struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name); + + struct proto_config *krt = t->config->krt_attached; + if (krt) + rsdr->kernel = (struct krt_proto *) krt->proto; + + return rsdr; +} + static inline void rt_show_get_default_tables(struct rt_show_data *d) { @@ -384,16 +419,16 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->export_mode) { if (!tab->export_channel && d->export_channel && - (tab->table == d->export_channel->table)) + (tab->table == &d->export_channel->table->exporter)) tab->export_channel = d->export_channel; if (!tab->export_channel && d->export_protocol) - tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table); + tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table)); if (!tab->export_channel) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("No export channel for table %s", tab->table->name); + cf_error("No export channel for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -404,7 +439,7 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->addr && (tab->table->addr_type != d->addr->type)) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("Incompatible type of prefix/ip for table %s", tab->table->name); + cf_error("Incompatible type of prefix/ip for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -416,48 +451,29 @@ rt_show_prepare_tables(struct rt_show_data *d) cf_error("No valid tables"); } +static void +rt_show_dummy_cont(struct cli *c UNUSED) +{ + /* Explicitly do nothing to prevent CLI from trying to parse another command. */ +} + void rt_show(struct rt_show_data *d) { - struct rt_show_data_rtable *tab; - net *n; - /* Filtered routes are neither exported nor have sensible ordering */ if (d->filtered && (d->export_mode || d->primary_only)) cf_error("Incompatible show route options"); rt_show_prepare_tables(d); - if (!d->addr) - { - WALK_LIST(tab, d->tables) - rt_lock_table(tab->table); - - /* There is at least one table */ - d->tab = HEAD(d->tables); - this_cli->cont = rt_show_cont; - this_cli->cleanup = rt_show_cleanup; - this_cli->rover = d; - } - else - { - WALK_LIST(tab, d->tables) - { - d->tab = tab; - d->kernel = rt_show_get_kernel(d); + if (EMPTY_LIST(d->tables)) + cf_error("No suitable tables found"); - if (d->show_for) - n = net_route(tab->table, d->addr); - else - n = net_find(tab->table, d->addr); + d->tab = HEAD(d->tables); - if (n) - rt_show_net(this_cli, n, d); - } + this_cli->cleanup = rt_show_cleanup; + this_cli->rover = d; + this_cli->cont = rt_show_dummy_cont; - if (d->rt_counter) - cli_msg(0, ""); - else - cli_msg(8001, "Network not found"); - } + rt_show_cont(d); } diff --git a/nest/rt-table.c b/nest/rt-table.c index c67f5bf8..b8f0e61d 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -26,12 +26,72 @@ * (see the route attribute module for a precise explanation) holding the * remaining route attributes which are expected to be shared by multiple * routes in order to conserve memory. + * + * There are several mechanisms that allow automatic update of routes in one + * routing table (dst) as a result of changes in another routing table (src). + * They handle issues of recursive next hop resolving, flowspec validation and + * RPKI validation. + * + * The first such mechanism is handling of recursive next hops. A route in the + * dst table has an indirect next hop address, which is resolved through a route + * in the src table (which may also be the same table) to get an immediate next + * hop. This is implemented using structure &hostcache attached to the src + * table, which contains &hostentry structures for each tracked next hop + * address. These structures are linked from recursive routes in dst tables, + * possibly multiple routes sharing one hostentry (as many routes may have the + * same indirect next hop). There is also a trie in the hostcache, which matches + * all prefixes that may influence resolving of tracked next hops. + * + * When a best route changes in the src table, the hostcache is notified using + * rt_notify_hostcache(), which immediately checks using the trie whether the + * change is relevant and if it is, then it schedules asynchronous hostcache + * recomputation. The recomputation is done by rt_update_hostcache() (called + * from rt_event() of src table), it walks through all hostentries and resolves + * them (by rt_update_hostentry()). It also updates the trie. If a change in + * hostentry resolution was found, then it schedules asynchronous nexthop + * recomputation of associated dst table. That is done by rt_next_hop_update() + * (called from rt_event() of dst table), it iterates over all routes in the dst + * table and re-examines their hostentries for changes. Note that in contrast to + * hostcache update, next hop update can be interrupted by main loop. These two + * full-table walks (over hostcache and dst table) are necessary due to absence + * of direct lookups (route -> affected nexthop, nexthop -> its route). + * + * The second mechanism is for flowspec validation, where validity of flowspec + * routes depends of resolving their network prefixes in IP routing tables. This + * is similar to the recursive next hop mechanism, but simpler as there are no + * intermediate hostcache and hostentries (because flows are less likely to + * share common net prefix than routes sharing a common next hop). In src table, + * there is a list of dst tables (list flowspec_links), this list is updated by + * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during + * channel start/stop). Each dst table has its own trie of prefixes that may + * influence validation of flowspec routes in it (flowspec_trie). + * + * When a best route changes in the src table, rt_flowspec_notify() immediately + * checks all dst tables from the list using their tries to see whether the + * change is relevant for them. If it is, then an asynchronous re-validation of + * flowspec routes in the dst table is scheduled. That is also done by function + * rt_next_hop_update(), like nexthop recomputation above. It iterates over all + * flowspec routes and re-validates them. It also recalculates the trie. + * + * Note that in contrast to the hostcache update, here the trie is recalculated + * during the rt_next_hop_update(), which may be interleaved with IP route + * updates. The trie is flushed at the beginning of recalculation, which means + * that such updates may use partial trie to see if they are relevant. But it + * works anyway! Either affected flowspec was already re-validated and added to + * the trie, then IP route change would match the trie and trigger a next round + * of re-validation, or it was not yet re-validated and added to the trie, but + * will be re-validated later in this round anyway. + * + * The third mechanism is used for RPKI re-validation of IP routes and it is the + * simplest. It is just a list of subscribers in src table, who are notified + * when any change happened, but only after a settle time. Also, in RPKI case + * the dst is not a table, but a channel, who refeeds routes through a filter. */ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -44,6 +104,11 @@ #include "lib/hash.h" #include "lib/string.h" #include "lib/alloca.h" +#include "lib/flowspec.h" + +#ifdef CONFIG_BGP +#include "proto/bgp/bgp.h" +#endif #include <stdatomic.h> @@ -52,6 +117,9 @@ pool *rt_table_pool; static linpool *rte_update_pool; list routing_tables; +list deleted_routing_tables; + +struct rt_cork rt_cork; /* Data structures for export journal */ #define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) @@ -65,18 +133,31 @@ struct rt_export_block { static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); -static void rt_update_hostcache(void *tab); -static void rt_next_hop_update(void *tab); -static inline void rt_prune_table(void *tab); +static void rt_update_hostcache(rtable *tab); +static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); +static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); -static void rt_feed_channel(void *); - -static inline void rt_export_used(rtable *tab); -static void rt_export_cleanup(void *tab); +static void rt_flowspec_notify(rtable *tab, net *net); +static void rt_kick_prune_timer(rtable *tab); +static void rt_feed_by_fib(void *); +static void rt_feed_by_trie(void *); +static void rt_feed_equal(void *); +static void rt_feed_for(void *); +static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(rtable *tab); +static void rt_check_cork_high(rtable *tab); +static void rt_cork_release_hook(void *); + +static inline void rt_export_used(struct rt_exporter *); +static void rt_export_cleanup(rtable *tab); static inline void rte_update_lock(void); static inline void rte_update_unlock(void); +static int rte_same(rte *x, rte *y); + const char *rt_import_state_name_array[TIS_MAX] = { [TIS_DOWN] = "DOWN", [TIS_UP] = "UP", @@ -88,7 +169,6 @@ const char *rt_import_state_name_array[TIS_MAX] = { const char *rt_export_state_name_array[TES_MAX] = { [TES_DOWN] = "DOWN", - [TES_HUNGRY] = "HUNGRY", [TES_FEEDING] = "FEEDING", [TES_READY] = "READY", [TES_STOP] = "STOP" @@ -110,41 +190,184 @@ const char *rt_export_state_name(u8 state) return rt_export_state_name_array[state]; } -struct event_cork rt_cork; +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); + +static void +net_init_with_trie(struct fib *f, void *N) +{ + rtable *tab = SKIP_BACK(rtable, fib, f); + net *n = N; + + if (tab->trie) + trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + + if (tab->trie_new) + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); +} + +static inline net * +net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) + { + net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} -/* Like fib_route(), but skips empty net entries */ static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen); + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } + } + + if (best) + return best; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) { + net_addr_ip4 n; + net_copy_ip4(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + return r; +} + +static inline net * +net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +{ + net_addr_vpn4 n; + net_copy_vpn4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip4_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); } return r; } -static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +static inline net * +net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) { + net_addr_ip6 n; + net_copy_ip6(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +{ + net_addr_vpn6 n; + net_copy_vpn6(&n, n0); - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip6_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); } return r; } static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) { - struct fib_node *fn; + net_addr_ip6_sadr n; + net_copy_ip6_sadr(&n, n0); while (1) { @@ -153,13 +376,13 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) /* We need to do dst first matching. Since sadr addresses are hashed on dst prefix only, find the hash table chain and go through it to find the - match with the smallest matching src prefix. */ - for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) { net_addr_ip6_sadr *a = (void *) fn->addr; - if (net_equal_dst_ip6_sadr(n, a) && - net_in_net_src_ip6_sadr(n, a) && + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && (a->src_pxlen >= best_pxlen)) { best = fib_node_to_user(&t->fib, fn); @@ -170,38 +393,52 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) if (best) return best; - if (!n->dst_pxlen) + if (!n.dst_pxlen) break; - n->dst_pxlen--; - ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + n.dst_pxlen--; + ip6_clrbit(&n.dst_prefix, n.dst_pxlen); } return NULL; } -void * +net * net_route(rtable *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); - net_addr *n0 = alloca(n->length); - net_copy(n0, n); - switch (n->type) { case NET_IP4: + if (tab->trie) + return net_route_ip4_trie(tab, (net_addr_ip4 *) n); + else + return net_route_ip4_fib (tab, (net_addr_ip4 *) n); + case NET_VPN4: - case NET_ROA4: - return net_route_ip4(tab, (net_addr_ip4 *) n0); + if (tab->trie) + return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n); + else + return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n); case NET_IP6: + if (tab->trie) + return net_route_ip6_trie(tab, (net_addr_ip6 *) n); + else + return net_route_ip6_fib (tab, (net_addr_ip6 *) n); + case NET_VPN6: - case NET_ROA6: - return net_route_ip6(tab, (net_addr_ip6 *) n0); + if (tab->trie) + return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n); + else + return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n); case NET_IP6_SADR: - return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); + if (tab->trie) + return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); + else + return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); default: return NULL; @@ -210,7 +447,35 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP4(tab->trie, px, px0) + { + net_addr_roa4 roa0 = NET_ADDR_ROA4(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -242,7 +507,35 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP6(tab->trie, px, px0) + { + net_addr_roa6 roa0 = NET_ADDR_ROA6(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -292,9 +585,19 @@ int net_roa_check(rtable *tab, const net_addr *n, u32 asn) { if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) - return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else return ROA_UNKNOWN; /* Should not happen */ } @@ -329,10 +632,10 @@ rte_store(const rte *r, net *net, rtable *tab) rt_lock_source(e->rte.src); - if (e->rte.attrs->cached) + if (ea_is_cached(e->rte.attrs)) e->rte.attrs = rta_clone(e->rte.attrs); else - e->rte.attrs = rta_lookup(e->rte.attrs); + e->rte.attrs = rta_lookup(e->rte.attrs, 1); return e; } @@ -346,11 +649,11 @@ rte_store(const rte *r, net *net, rtable *tab) */ void -rte_free(struct rte_storage *e, rtable *tab) +rte_free(struct rte_storage *e) { rt_unlock_source(e->rte.src); rta_free(e->rte.attrs); - sl_free(tab->rte_slab, e); + sl_free(e); } static int /* Actually better or at least as good as */ @@ -363,9 +666,12 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->attrs->pref > old->attrs->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->attrs->pref < old->attrs->pref) + if (np < op) return 0; if (new->src->owner->class != old->src->owner->class) { @@ -389,7 +695,7 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->attrs->pref != sec->attrs->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; if (pri->src->owner->class != sec->src->owner->class) @@ -404,11 +710,10 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s%s", + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", name, dir, msg, e->net, e->src->private_id, e->src->global_id, e->stale_cycle, e->id, - rta_dest_name(e->attrs->dest), - rte_is_filtered(e) ? " (filtered)" : ""); + rta_dest_name(rte_dest(e))); } static inline void @@ -447,8 +752,8 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) - count++; + count++; + return count; } @@ -457,16 +762,16 @@ rte_feed_obtain(net *n, struct rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; } + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; @@ -496,7 +801,7 @@ export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) } v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, rt, pool, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { @@ -522,17 +827,10 @@ reject_noset: return NULL; } -static inline rte * -export_filter(struct channel *c, rte *rt, int silent) -{ - return export_filter_(c, rt, rte_update_pool, silent); -} - -void do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old); - static void do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { + struct proto *p = c->proto; struct channel_export_stats *stats = &c->export_stats; if (c->refeeding && new) @@ -549,30 +847,16 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) if (!new && old) CHANNEL_LIMIT_POP(c, OUT); - /* Store route export state */ - if (old) - bmap_clear(&c->export_map, old->id); - if (new) - bmap_set(&c->export_map, new->id); - - /* Apply export table */ - if (c->out_table) - rte_import(&c->out_table->push, net, new, old ? old->src : new->src); + stats->updates_accepted++; else - do_rt_notify_direct(c, net, new, old); -} + stats->withdraws_accepted++; -void -do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old) -{ - struct proto *p = c->proto; - struct channel_export_stats *stats = &c->export_stats; + if (old) + bmap_clear(&c->export_map, old->id); if (new) - stats->updates_accepted++; - else - stats->withdraws_accepted++; + bmap_set(&c->export_map, new->id); if (p->debug & D_ROUTES) { @@ -590,6 +874,16 @@ do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte static void rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { + if (new && old && rte_same(new, old)) + { + if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) + { + bmap_set(&c->export_map, new->id); + bmap_clear(&c->export_map, old->id); + } + return; + } + if (new) new = export_filter(c, new, 0); @@ -681,20 +975,13 @@ done: do_rt_notify(c, n, new_best, old_best); } - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); -} - rte * rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { _Thread_local static rte rloc; // struct proto *p = c->proto; - struct nexthop *nhs = NULL; + struct nexthop_adata *nhs = NULL; rte *best0 = feed[0]; rte *best = NULL; @@ -706,7 +993,7 @@ rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool return NULL; rloc = *best0; - best = export_filter_(c, &rloc, pool, silent); + best = export_filter(c, &rloc, silent); if (!best) /* Best route doesn't pass the filter */ @@ -722,23 +1009,29 @@ rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool continue; rte tmp0 = *feed[i]; - rte *tmp = export_filter_(c, &tmp0, pool, 1); + rte *tmp = export_filter(c, &tmp0, 1); if (!tmp || !rte_is_reachable(tmp)) continue; - nhs = nexthop_merge_rta(nhs, tmp->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(tmp->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); + + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best->attrs = rta_cow(best->attrs, pool); - nexthop_link(best->attrs, nhs); - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + + ea_set_attr(&best->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); } return best; @@ -797,7 +1090,7 @@ rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_ { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte *old = RTES_OR_NULL(rpe->old_best); + rte *o = RTE_VALID_OR_NULL(rpe->old_best); struct rte_storage *new_best = rpe->new_best; while (rpe) @@ -807,11 +1100,9 @@ rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_ rpe = rpe_next(rpe, NULL); } - if (&new_best->rte != old) - { - rte n0, *new = RTES_CLONE(new_best, &n0); - rt_notify_basic(c, net, new, old); - } + rte n0 = RTE_COPY_VALID(new_best); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void @@ -819,22 +1110,28 @@ rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pend { struct channel *c = SKIP_BACK(struct channel, out_req, req); - struct rte_src *src = rpe->new ? rpe->new->rte.src : rpe->old->rte.src; - rte *old = RTES_OR_NULL(rpe->old); - struct rte_storage *new_any = rpe->new; + rte *n = RTE_VALID_OR_NULL(rpe->new); + rte *o = RTE_VALID_OR_NULL(rpe->old); - while (rpe) + if (!n && !o) { channel_rpe_mark_seen(req, rpe); - new_any = rpe->new; - rpe = rpe_next(rpe, src); + return; } - if (&new_any->rte != old) + struct rte_src *src = n ? n->src : o->src; + struct rte_storage *new_latest = rpe->new; + + while (rpe) { - rte n0, *new = RTES_CLONE(new_any, &n0); - rt_notify_basic(c, net, new, old); + channel_rpe_mark_seen(req, rpe); + new_latest = rpe->new; + rpe = rpe_next(rpe, src); } + + rte n0 = RTE_COPY_VALID(new_latest); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void @@ -843,10 +1140,11 @@ rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pendin struct channel *c = SKIP_BACK(struct channel, out_req, req); for (uint i=0; i<count; i++) - { - rte n0 = *feed[i]; - rt_notify_basic(c, net, &n0, NULL); - } + if (rte_is_valid(feed[i])) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } } void @@ -880,10 +1178,32 @@ static void rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) { if (bmap_test(&hook->seq_map, rpe->seq)) - goto seen; + goto ignore; /* Seen already */ const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + switch (hook->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", hook->req->addr_mode); + } + if (rpe->new) hook->stats.updates_received++; else @@ -906,13 +1226,13 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) else bug("Export request must always provide an export method"); -seen: +ignore: /* Get the next export if exists */ hook->rpe_next = rt_next_export_fast(rpe); /* The last block may be available to free */ if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) - rt_export_used(hook->table); + CALL(hook->table->used, hook->table); /* Releasing this export for cleanup routine */ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); @@ -954,46 +1274,36 @@ static void rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { - if (!new_best || !rte_is_valid(&new_best->rte)) - new_best = NULL; - - if (!old_best || !rte_is_valid(&old_best->rte)) - old_best = NULL; - - if (!new || !rte_is_valid(&new->rte)) - new = NULL; - - if (old && !rte_is_valid(&old->rte)) - { - /* Filtered old route isn't announced, should be freed immediately. */ - rte_free(old, tab); - old = NULL; - } + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); if ((new == old) && (new_best == old_best)) return; - if (new_best != old_best) + if (new_best_valid || old_best_valid) { - if (new_best) + if (new_best_valid) new_best->rte.sender->stats.pref++; - if (old_best) + if (old_best_valid) old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); + + if (!EMPTY_LIST(tab->flowspec_links)) + rt_flowspec_notify(tab, net); } rt_schedule_notify(tab); - if (EMPTY_LIST(tab->exports) && EMPTY_LIST(tab->pending_exports)) + if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) { /* No export hook and no pending exports to cleanup. We may free the route immediately. */ if (!old) return; hmap_clear(&tab->id_map, old->rte.id); - rte_free(old, tab); + rte_free(old); return; } @@ -1001,9 +1311,9 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; u32 end = 0; - if (!EMPTY_LIST(tab->pending_exports)) + if (!EMPTY_LIST(tab->exporter.pending)) { - rpeb = TAIL(tab->pending_exports); + rpeb = TAIL(tab->exporter.pending); end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); if (end >= RT_PENDING_EXPORT_ITEMS) { @@ -1017,9 +1327,9 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage if (!rpeb) { - rpeb = alloc_page(tab->rp); + rpeb = alloc_page(); *rpeb = (struct rt_export_block) {}; - add_tail(&tab->pending_exports, &rpeb->n); + add_tail(&tab->exporter.pending, &rpeb->n); } /* Fill the pending export */ @@ -1029,10 +1339,14 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage .new_best = new_best, .old = old, .old_best = old_best, - .seq = tab->next_export_seq++, + .seq = tab->exporter.next_seq++, }; - DBG("rte_announce: table=%s net=%N new=%p from %p old=%p from %p new_best=%p old_best=%p seq=%lu\n", tab->name, net->n.addr, new, new ? new->sender : NULL, old, old ? old->sender : NULL, new_best, old_best, rpe->seq); + DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", + tab->name, net->n.addr, + new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, + old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, + new_best, old_best, rpe->seq); ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); @@ -1059,17 +1373,13 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage if (!net->first) net->first = rpe; - if (tab->first_export == NULL) - tab->first_export = rpe; + if (tab->exporter.first == NULL) + tab->exporter.first = rpe; - if ((tab->first_export->seq + tab->config->cork_limit <= tab->next_export_seq) && !tab->cork_active) - { - ev_cork(&rt_cork); - tab->cork_active = 1; - tm_start(tab->export_timer, 0); - } - else if (!tm_active(tab->export_timer)) - tm_start(tab->export_timer, tab->config->export_settle_time); + rt_check_cork_high(tab); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); } static struct rt_pending_export * @@ -1098,7 +1408,7 @@ rt_next_export_fast(struct rt_pending_export *last) } static struct rt_pending_export * -rt_next_export(struct rt_export_hook *hook, rtable *tab) +rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) { /* As the table is locked, it is safe to reload the last export pointer */ struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); @@ -1109,7 +1419,7 @@ rt_next_export(struct rt_export_hook *hook, rtable *tab) /* No, therefore we must process the table's first pending export */ else - return tab->first_export; + return tab->first; } static inline void @@ -1124,7 +1434,7 @@ rt_announce_exports(timer *tm) rtable *tab = tm->data; struct rt_export_hook *c; node *n; - WALK_LIST2(c, n, tab->exports, n) + WALK_LIST2(c, n, tab->exporter.hooks, n) { if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) continue; @@ -1134,14 +1444,14 @@ rt_announce_exports(timer *tm) } static struct rt_pending_export * -rt_last_export(rtable *tab) +rt_last_export(struct rt_exporter *tab) { struct rt_pending_export *rpe = NULL; - if (!EMPTY_LIST(tab->pending_exports)) + if (!EMPTY_LIST(tab->pending)) { /* We'll continue processing exports from this export on */ - struct rt_export_block *reb = TAIL(tab->pending_exports); + struct rt_export_block *reb = TAIL(tab->pending); ASSERT_DIE(reb->end); rpe = &reb->export[reb->end - 1]; } @@ -1164,7 +1474,7 @@ rt_export_hook(void *_data) if (!c->rpe_next) { - rt_export_used(c->table); + CALL(c->table->used, c->table); return; } } @@ -1209,16 +1519,29 @@ rte_validate(struct channel *ch, rte *e) return 0; } - if (net_type_match(n, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST)) { - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n, e->attrs->dest, ch->proto->name); - return 0; - } + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } + + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", n, ch->proto->name); return 0; } @@ -1249,9 +1572,13 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; - /* Set the stale cycle unless already set */ - if (new && !(new->flags & REF_USE_STALE)) - new->stale_cycle = c->stale_set; + /* If the new route is identical to the old one, we find the attributes in + * cache and clone these with no performance drop. OTOH, if we were to lookup + * the attributes, such a route definitely hasn't been anywhere yet, + * therefore it's definitely worth the time. */ + struct rte_storage *new_stored = NULL; + if (new) + new = &(new_stored = rte_store(new, net, table))->rte; /* Find and remove original route from the same protocol */ struct rte_storage **before_old = rte_find(net, src); @@ -1275,7 +1602,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); } - if (new && rte_same(old, new)) + if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ old->stale_cycle = new->stale_cycle; @@ -1285,6 +1612,10 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr stats->updates_ignored++; rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } + + /* We need to free the already stored route here before returning */ + rte_free(new_stored); + return; } *before_old = (*before_old)->next; @@ -1297,8 +1628,13 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr return; } - if (req->preimport) - new = req->preimport(req, new, old); + /* If rejected by import limit, we need to pretend there is no route */ + if (req->preimport && (req->preimport(req, new, old) == 0)) + { + rte_free(new_stored); + new_stored = NULL; + new = NULL; + } int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); @@ -1313,8 +1649,6 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (old_ok || new_ok) table->last_rt_change = current_time(); - struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ @@ -1410,32 +1744,26 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr hmap_set(&table->id_map, new_stored->rte.id); } - _Bool nb = (new_stored == net->routes); - _Bool ob = (old_best == old); - /* Log the route change */ - if (new_ok && old_ok) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; - rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, best_indicator[nb][ob]); + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } - else if (new_ok) - rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, - (!net->routes->next || !rte_is_ok(&net->routes->next->rte)) ? "added [sole]" : - nb ? "added [best]" : "added"); - else if (old_ok) - rt_rte_trace_in(D_ROUTES, req, old, - (!net->routes || !rte_is_ok(&net->routes->rte)) ? "removed [sole]" : - ob ? "removed [best]" : "removed"); /* Propagate the route change */ rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); if (!net->routes && - (table->gc_counter++ >= table->config->gc_max_ops) && - (table->gc_time + table->config->gc_min_time <= current_time())) - rt_schedule_prune(table); + (table->gc_counter++ >= table->config->gc_threshold)) + rt_kick_prune_timer(table); #if 0 /* Enable and reimplement these callbacks if anybody wants to use them */ @@ -1462,57 +1790,37 @@ rte_update_unlock(void) lp_flush(rte_update_pool); } -rte * +int channel_preimport(struct rt_import_request *req, rte *new, rte *old) { struct channel *c = SKIP_BACK(struct channel, in_req, req); - if (!c->in_table) - { - if (new && !old) - if (CHANNEL_LIMIT_PUSH(c, RX)) - return NULL; + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return 0; - if (!new && old) - CHANNEL_LIMIT_POP(c, RX); - } + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); int new_in = new && !rte_is_filtered(new); int old_in = old && !rte_is_filtered(old); if (new_in && !old_in) if (CHANNEL_LIMIT_PUSH(c, IN)) - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) { new->flags |= REF_FILTERED; - return new; + return 1; } else - return NULL; + return 0; if (!new_in && old_in) CHANNEL_LIMIT_POP(c, IN); - return new; -} - -rte * -channel_in_preimport(struct rt_import_request *req, rte *new, rte *old) -{ - struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); - - if (new && !old) - if (CHANNEL_LIMIT_PUSH(cat->c, RX)) - return NULL; - - if (!new && old) - CHANNEL_LIMIT_POP(cat->c, RX); - - return new; + return 1; } -void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); - void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { @@ -1521,16 +1829,13 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); - if (c->in_table) - rte_import(&c->in_table->push, n, new, src); - else - rte_update_direct(c, n, new, src); -} - -void -rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ const struct filter *filter = c->in_filter; struct channel_import_stats *stats = &c->import_stats; @@ -1542,29 +1847,49 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src int fr; stats->updates_received++; - if (!rte_validate(c, new)) - { - channel_rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->updates_invalid++; - new = NULL; - } - else if ((filter == FILTER_REJECT) || - ((fr = f_run(filter, new, rte_update_pool, 0)) > F_ACCEPT)) + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { stats->updates_filtered++; channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) new->flags |= REF_FILTERED; else new = NULL; } + + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) + { + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; + } + } else stats->withdraws_received++; rte_import(&c->in_req, n, new, src); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + { + /* There may be some updates on top of the original attribute block */ + ea_list *a = new->attrs; + while (a->next) + a = a->next; + + ea_free(a); + } + rte_update_unlock(); } @@ -1582,6 +1907,9 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt nn = net_get(hook->table, n); new->net = nn->n.addr; new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } else if (!(nn = net_find(hook->table, n))) { @@ -1618,20 +1946,17 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte { net *n = net_find(t, a); - if (!n || !n->routes) + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) return 0; rte rt = n->routes->rte; - if (!rte_is_valid(&rt)) - return 0; - rte_update_lock(); /* Rest is stripped down export_filter() */ int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); + v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); rte_update_unlock(); @@ -1639,13 +1964,23 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte } static void +rt_table_export_done(struct rt_export_hook *hook) +{ + struct rt_exporter *re = hook->table; + struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + + rt_unlock_table(tab); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); +} + +static void rt_export_stopped(void *data) { struct rt_export_hook *hook = data; - rtable *tab = hook->table; + struct rt_exporter *tab = hook->table; /* Drop pending exports */ - rt_export_used(tab); + CALL(tab->used, tab); /* Unlist */ rem_node(&hook->n); @@ -1653,14 +1988,13 @@ rt_export_stopped(void *data) /* Report the channel as stopped. */ hook->stopped(hook->req); - /* Free the hook together with its coroutine. */ - rfree(hook->pool); - rt_unlock_table(tab); + /* Reporting the hook as finished. */ + CALL(tab->done, hook); - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + /* Free the hook. */ + rfree(hook->pool); } - static inline void rt_set_import_state(struct rt_import_hook *hook, u8 state) { @@ -1671,7 +2005,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state) hook->req->log_state_change(hook->req, state); } -static inline void +void rt_set_export_state(struct rt_export_hook *hook, u8 state) { hook->last_state_change = current_time(); @@ -1693,9 +2027,6 @@ rt_request_import(rtable *tab, struct rt_import_request *req) hook->req = req; hook->table = tab; - if (!hook->stale_set) - hook->stale_set = hook->stale_valid = hook->stale_pruning = hook->stale_pruned = 1; - rt_set_import_state(hook, TIS_UP); hook->n = (node) {}; @@ -1715,39 +2046,97 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r hook->stopped = stopped; } -void -rt_request_export(rtable *tab, struct rt_export_request *req) +static struct rt_export_hook * +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) { + rtable *tab = SKIP_BACK(rtable, exporter, re); rt_lock_table(tab); pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); hook->pool = p; - - hook->req = req; - hook->table = tab; /* stats zeroed by mb_allocz */ + switch (req->addr_mode) + { + case TE_ADDR_IN: + if (tab->trie && net_val_match(tab->addr_type, NB_IP)) + { + hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_lock = rt_lock_trie(tab); + trie_walk_init(hook->walk_state, tab->trie, req->addr); + hook->event = ev_new_init(p, rt_feed_by_trie, hook); + break; + } + /* fall through */ + case TE_ADDR_NONE: + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + hook->event = ev_new_init(p, rt_feed_by_fib, hook); + break; - bmap_init(&hook->seq_map, p, 1024); + case TE_ADDR_EQUAL: + hook->event = ev_new_init(p, rt_feed_equal, hook); + break; + + case TE_ADDR_FOR: + hook->event = ev_new_init(p, rt_feed_for, hook); + break; + + default: + bug("Requested an unknown export address mode"); + } + + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); - rt_set_export_state(hook, TES_HUNGRY); + return hook; +} + +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +{ + struct rt_export_hook *hook = req->hook = re->start(re, req); + + hook->req = req; + hook->table = re; + + bmap_init(&hook->seq_map, hook->pool, 1024); struct rt_pending_export *rpe = rt_last_export(hook->table); DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); hook->n = (node) {}; - add_tail(&tab->exports, &hook->n); + add_tail(&re->hooks, &hook->n); - FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + /* Regular export */ + rt_set_export_state(hook, TES_FEEDING); + rt_send_export_event(hook); +} - DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); - hook->event = ev_new_init(p, rt_feed_channel, hook); - rt_send_export_event(hook); + if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) + return; - rt_set_export_state(hook, TES_FEEDING); + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; + } } void @@ -1756,22 +2145,22 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - rtable *tab = hook->table; - - /* Stop feeding */ + /* Cancel the feeder event */ ev_postpone(hook->event); - if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) == TES_FEEDING) - fit_get(&tab->fib, &hook->feed_fit); + /* Stop feeding from the exporter */ + CALL(hook->table->stop, hook); + /* Reset the event as the stopped event */ hook->event->hook = rt_export_stopped; hook->stopped = stopped; - rt_send_export_event(hook); - + /* Update export state */ rt_set_export_state(hook, TES_STOP); -} + /* Run the stopped event */ + rt_send_export_event(hook); +} /** * rt_refresh_begin - start a refresh cycle @@ -1783,14 +2172,15 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r * routes to the routing table (by rte_update()). After that, all protocol * routes (more precisely routes with @c as @sender) not sent during the * refresh cycle but still in the table from the past are pruned. This is - * implemented by setting rte->stale_cycle to req->stale_set in rte_update() - * and then dropping all routes with old stale_cycle values in table prune loop. */ + * implemented by marking all related routes as stale by REF_STALE flag in + * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD + * flag in rt_refresh_end() and then removing such routes in the prune loop. + */ void rt_refresh_begin(struct rt_import_request *req) { struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); - ASSERT_DIE(hook->stale_set == hook->stale_valid); /* If the pruning routine is too slow */ @@ -1800,15 +2190,16 @@ rt_refresh_begin(struct rt_import_request *req) log(L_WARN "Route refresh flood in table %s", hook->table->name); FIB_WALK(&hook->table->fib, net, n) { - for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == req->hook) - e->rte.stale_cycle = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; } FIB_WALK_END; hook->stale_set = 1; hook->stale_valid = 0; hook->stale_pruned = 0; } + /* Setting a new value of the stale modifier */ else if (!++hook->stale_set) { /* Let's reserve the stale_cycle zero value for always-invalid routes */ @@ -1854,7 +2245,7 @@ rte_dump(struct rte_storage *e) { debug("%-1N ", e->rte.net); debug("PF=%02x ", e->rte.pflags); - rta_dump(e->rte.attrs); + ea_dump(e->rte.attrs); debug("\n"); } @@ -1867,7 +2258,7 @@ rte_dump(struct rte_storage *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>%s\n", t->name, t->delete_event ? " (deleted)" : ""); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif @@ -1893,14 +2284,17 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); } void rt_dump_hooks(rtable *tab) { - debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->delete_event ? " (deleted)" : ""); + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", - tab->nhu_state, ev_active(tab->hcu_event), tab->use_count, tab->rt_count); + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); @@ -1915,7 +2309,7 @@ rt_dump_hooks(rtable *tab) } struct rt_export_hook *eh; - WALK_LIST(eh, tab->exports) + WALK_LIST(eh, tab->exporter.hooks) { eh->req->dump_req(eh->req); debug(" Export hook %p requested by %p:" @@ -1935,13 +2329,26 @@ rt_dump_hooks_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); +} + +static inline void +rt_schedule_hcu(rtable *tab) +{ + if (tab->hcu_scheduled) + return; + + tab->hcu_scheduled = 1; + ev_schedule(tab->rt_event); } static inline void rt_schedule_nhu(rtable *tab) { if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->nhu_event); + ev_schedule(tab->rt_event); /* state change: * NHU_CLEAN -> NHU_SCHEDULED @@ -1954,21 +2361,105 @@ void rt_schedule_prune(rtable *tab) { if (tab->prune_state == 0) - ev_schedule(tab->prune_event); + ev_schedule(tab->rt_event); /* state change 0->1, 2->3 */ tab->prune_state |= 1; } -void -rt_export_used(rtable *tab) +static void +rt_export_used(struct rt_exporter *e) { + rtable *tab = SKIP_BACK(rtable, exporter, e); + if (config->table_debug) log(L_TRACE "%s: Export cleanup requested", tab->name); - ev_schedule(tab->ec_event); + if (tab->export_used) + return; + + tab->export_used = 1; + ev_schedule(tab->rt_event); +} + +static void +rt_event(void *ptr) +{ + rtable *tab = ptr; + + rt_lock_table(tab); + + if (tab->export_used) + rt_export_cleanup(tab); + + if ( + tab->hcu_corked || + tab->nhu_corked || + (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) + ) + { + if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) + log(L_TRACE "%s: Auxiliary routines corked", tab->name); + + tab->hcu_corked |= tab->hcu_scheduled; + tab->hcu_scheduled = 0; + + tab->nhu_corked |= tab->nhu_state; + tab->nhu_state = 0; + } + + if (tab->hcu_scheduled) + rt_update_hostcache(tab); + + if (tab->nhu_state) + rt_next_hop_update(tab); + + if (tab->prune_state) + rt_prune_table(tab); + + rt_unlock_table(tab); +} + +static void +rt_uncork_event(void *ptr) +{ + rtable *tab = ptr; + + tab->hcu_scheduled |= tab->hcu_corked; + tab->hcu_corked = 0; + + tab->nhu_state |= tab->nhu_corked; + tab->nhu_corked = 0; + + if (config->table_debug) + log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); + + ev_schedule(tab->rt_event); +} + +static void +rt_prune_timer(timer *t) +{ + rtable *tab = t->data; + + if (tab->gc_counter >= tab->config->gc_threshold) + rt_schedule_prune(tab); +} + +static void +rt_kick_prune_timer(rtable *tab) +{ + /* Return if prune is already scheduled */ + if (tm_active(tab->prune_timer) || (tab->prune_state & 1)) + return; + + /* Randomize GC period to +/- 50% */ + btime gc_period = tab->config->gc_period; + gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period); + tm_start(tab->prune_timer, gc_period); } + static inline btime rt_settled_time(rtable *tab) { @@ -2041,6 +2532,90 @@ rt_unsubscribe(struct rt_subscription *s) rt_unlock_table(s->tab); } +static struct rt_flowspec_link * +rt_flowspec_find_link(rtable *src, rtable *dst) +{ + struct rt_flowspec_link *ln; + WALK_LIST(ln, src->flowspec_links) + if ((ln->src == src) && (ln->dst == dst)) + return ln; + + return NULL; +} + +void +rt_flowspec_link(rtable *src, rtable *dst) +{ + ASSERT(rt_is_ip(src)); + ASSERT(rt_is_flow(dst)); + + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + + if (!ln) + { + rt_lock_table(src); + rt_lock_table(dst); + + ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link)); + ln->src = src; + ln->dst = dst; + add_tail(&src->flowspec_links, &ln->n); + } + + ln->uc++; +} + +void +rt_flowspec_unlink(rtable *src, rtable *dst) +{ + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + + ASSERT(ln && (ln->uc > 0)); + + ln->uc--; + + if (!ln->uc) + { + rem_node(&ln->n); + mb_free(ln); + + rt_unlock_table(src); + rt_unlock_table(dst); + } +} + +static void +rt_flowspec_notify(rtable *src, net *net) +{ + /* Only IP tables are src links */ + ASSERT(rt_is_ip(src)); + + struct rt_flowspec_link *ln; + WALK_LIST(ln, src->flowspec_links) + { + rtable *dst = ln->dst; + ASSERT(rt_is_flow(dst)); + + /* No need to inspect it further if recalculation is already active */ + if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)) + continue; + + if (trie_match_net(dst->flowspec_trie, net->n.addr)) + rt_schedule_nhu(dst); + } +} + +static void +rt_flowspec_reset_trie(rtable *tab) +{ + linpool *lp = tab->flowspec_trie->lp; + int ipv4 = tab->flowspec_trie->ipv4; + + lp_flush(lp); + tab->flowspec_trie = f_new_trie(lp, 0); + tab->flowspec_trie->ipv4 = ipv4; +} + static void rt_free(resource *_r) { @@ -2048,10 +2623,12 @@ rt_free(resource *_r) DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - ASSERT_DIE(r->rt_count == 0); - ASSERT_DIE(!r->cork_active); - ASSERT_DIE(EMPTY_LIST(r->imports)); - ASSERT_DIE(EMPTY_LIST(r->exports)); + + r->config->table = NULL; + rem_node(&r->n); + + if (r->hostcache) + rt_free_hostcache(r); /* Freed automagically by the resource pool fib_free(&r->fib); @@ -2082,12 +2659,7 @@ static struct resclass rt_class = { rtable * rt_setup(pool *pp, struct rtable_config *cf) { - int ns = strlen("Routing table ") + strlen(cf->name) + 1; - void *nb = mb_alloc(pp, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - - pool *p = rp_new(pp, nb); - mb_move(nb, p); + pool *p = rp_newf(pp, "Routing table %s", cf->name); rtable *t = ralloc(p, &rt_class); t->rp = p; @@ -2100,29 +2672,51 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + + init_list(&t->flowspec_links); + + t->exporter = (struct rt_exporter) { + .addr_type = t->addr_type, + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, + .used = rt_export_used, + }; + + init_list(&t->exporter.hooks); + init_list(&t->exporter.pending); + init_list(&t->imports); - init_list(&t->exports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); - init_list(&t->pending_exports); init_list(&t->subscribers); - t->ec_event = ev_new_init(p, rt_export_cleanup, t); - t->prune_event = ev_new_init(p, rt_prune_table, t); - t->hcu_event = ev_new_init(p, rt_update_hostcache, t); - t->nhu_event = ev_new_init(p, rt_next_hop_update, t); - - t->nhu_event->cork = &rt_cork; - t->prune_event->cork = &rt_cork; - - t->export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); + t->rt_event = ev_new_init(p, rt_event, t); + t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); - t->next_export_seq = 1; + t->exporter.next_seq = 1; + + t->cork_threshold = cf->cork_threshold; t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); + } + return t; } @@ -2139,9 +2733,12 @@ rt_init(void) rt_table_pool = rp_new(&root_pool, "Routing tables"); rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); - ev_init_cork(&rt_cork, "Route Table Cork"); + init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; } + /** * rt_prune_table - prune a routing table * @@ -2157,11 +2754,10 @@ rt_init(void) * iteration. */ static void -rt_prune_table(void *data) +rt_prune_table(rtable *tab) { - rtable *tab = data; struct fib_iterator *fit = &tab->prune_fit; - int limit = 512; + int limit = 2000; struct rt_import_hook *ih; node *n, *x; @@ -2190,27 +2786,36 @@ rt_prune_table(void *data) FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; + + tab->gc_counter = 0; + tab->gc_time = current_time(); + + if (tab->prune_trie) + { + /* Init prefix trie pruning */ + tab->trie_new = f_new_trie(lp_new_default(tab->rp), 0); + tab->trie_new->ipv4 = tab->trie->ipv4; + } } again: FIB_ITERATE_START(&tab->fib, fit, net, n) { rescan: + if (limit <= 0) + { + FIB_ITERATE_PUT(fit); + ev_schedule(tab->rt_event); + return; + } + for (struct rte_storage *e=n->routes; e; e=e->next) { struct rt_import_hook *s = e->rte.sender; - if ((s->import_state == TIS_FLUSHING) || (e->rte.stale_cycle < s->stale_valid) || (e->rte.stale_cycle > s->stale_set)) { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->prune_event); - return; - } - rte_discard(n, &e->rte); limit--; @@ -2224,6 +2829,12 @@ again: fib_delete(&tab->fib, n); goto again; } + + if (tab->trie_new) + { + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + limit--; + } } FIB_ITERATE_END; @@ -2231,52 +2842,78 @@ again: fib_check(&tab->fib); #endif - tab->gc_counter = 0; - tab->gc_time = current_time(); - /* state change 2->0, 3->1 */ tab->prune_state &= 1; + if (tab->trie_new) + { + /* Finish prefix trie pruning */ + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + + tab->trie = tab->trie_new; + tab->trie_new = NULL; + tab->prune_trie = 0; + } + else + { + /* Schedule prefix trie pruning */ + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; + tab->prune_trie = 1; + } + } + uint flushed_channels = 0; /* Close flushed channels */ WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_FLUSHING) { - ih->flush_seq = tab->next_export_seq; + ih->flush_seq = tab->exporter.next_seq; rt_set_import_state(ih, TIS_WAITING); flushed_channels++; } else if (ih->stale_pruning != ih->stale_pruned) { ih->stale_pruned = ih->stale_pruning; - if (ih->req->trace_routes & D_STATES) log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); } /* In some cases, we may want to directly proceed to export cleanup */ - if (EMPTY_LIST(tab->exports) && flushed_channels) + if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) rt_export_cleanup(tab); } static void -rt_export_cleanup(void *data) +rt_export_cleanup(rtable *tab) { - rtable *tab = data; + tab->export_used = 0; u64 min_seq = ~((u64) 0); struct rt_pending_export *last_export_to_free = NULL; - struct rt_pending_export *first_export = tab->first_export; + struct rt_pending_export *first = tab->exporter.first; struct rt_export_hook *eh; node *n; - WALK_LIST2(eh, n, tab->exports, n) + WALK_LIST2(eh, n, tab->exporter.hooks, n) { switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) { case TES_DOWN: - case TES_HUNGRY: continue; case TES_READY: @@ -2300,16 +2937,16 @@ rt_export_cleanup(void *data) } } - tab->first_export = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; if (config->table_debug) - log(L_TRACE "%s: Export cleanup, old first_export seq %lu, new %lu, min_seq %ld", + log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", tab->name, - first_export ? first_export->seq : 0, - tab->first_export ? tab->first_export->seq : 0, + first ? first->seq : 0, + tab->exporter.first ? tab->exporter.first->seq : 0, min_seq); - WALK_LIST2(eh, n, tab->exports, n) + WALK_LIST2(eh, n, tab->exporter.hooks, n) { if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) continue; @@ -2327,40 +2964,40 @@ rt_export_cleanup(void *data) } } - while (first_export && (first_export->seq <= min_seq)) + while (first && (first->seq <= min_seq)) { - ASSERT_DIE(first_export->new || first_export->old); + ASSERT_DIE(first->new || first->old); - const net_addr *n = first_export->new ? - first_export->new->rte.net : - first_export->old->rte.net; + const net_addr *n = first->new ? + first->new->rte.net : + first->old->rte.net; net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); - ASSERT_DIE(net->first == first_export); + ASSERT_DIE(net->first == first); - if (first_export == net->last) + if (first == net->last) /* The only export here */ net->last = net->first = NULL; else /* First is now the next one */ - net->first = atomic_load_explicit(&first_export->next, memory_order_relaxed); + net->first = atomic_load_explicit(&first->next, memory_order_relaxed); /* For now, the old route may be finally freed */ - if (first_export->old) + if (first->old) { - rt_rte_trace_in(D_ROUTES, first_export->old->rte.sender->req, &first_export->old->rte, "freed"); - hmap_clear(&tab->id_map, first_export->old->rte.id); - rte_free(first_export->old, tab); + rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); + hmap_clear(&tab->id_map, first->old->rte.id); + rte_free(first->old); } #ifdef LOCAL_DEBUG - memset(first_export, 0xbd, sizeof(struct rt_pending_export)); + memset(first, 0xbd, sizeof(struct rt_pending_export)); #endif - struct rt_export_block *reb = HEAD(tab->pending_exports); - ASSERT_DIE(reb == PAGE_HEAD(first_export)); + struct rt_export_block *reb = HEAD(tab->exporter.pending); + ASSERT_DIE(reb == PAGE_HEAD(first)); - u32 pos = (first_export - &reb->export[0]); + u32 pos = (first - &reb->export[0]); u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); ASSERT_DIE(pos < end); @@ -2376,15 +3013,15 @@ rt_export_cleanup(void *data) memset(reb, 0xbe, page_size); #endif - free_page(tab->rp, reb); + free_page(reb); - if (EMPTY_LIST(tab->pending_exports)) + if (EMPTY_LIST(tab->exporter.pending)) { if (config->table_debug) log(L_TRACE "%s: Resetting export seq", tab->name); node *n; - WALK_LIST2(eh, n, tab->exports, n) + WALK_LIST2(eh, n, tab->exporter.hooks, n) { if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) continue; @@ -2393,23 +3030,25 @@ rt_export_cleanup(void *data) bmap_reset(&eh->seq_map, 1024); } - tab->next_export_seq = 1; + tab->exporter.next_seq = 1; } else { - reb = HEAD(tab->pending_exports); + reb = HEAD(tab->exporter.pending); next = &reb->export[0]; } } - first_export = next; + first = next; } + rt_check_cork_low(tab); + done:; struct rt_import_hook *ih; node *x; WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_WAITING) - if (!first_export || (first_export->seq >= ih->flush_seq)) + if (!first || (first->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; ih->stopped(ih->req); @@ -2418,19 +3057,90 @@ done:; rt_unlock_table(tab); } + if (tab->export_used) + ev_schedule(tab->rt_event); + + + if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) + tm_stop(tab->exporter.export_timer); +} - if (EMPTY_LIST(tab->pending_exports) && tm_active(tab->export_timer)) - tm_stop(tab->export_timer); +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); +} - /* If reduced to at most one export block pending */ - if (tab->cork_active && - ((!tab->first_export) || (tab->first_export->seq + 128 > tab->next_export_seq))) +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * + */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); + + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) +{ + ASSERT(trie); + + if (trie == tab->trie) { - tab->cork_active = 0; - ev_uncork(&rt_cork); + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; + + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; + + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + tab->prune_trie = 1; + rt_schedule_prune(tab); + } + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); } + void rt_preconfig(struct config *c) { @@ -2440,148 +3150,377 @@ rt_preconfig(struct config *c) rt_new_table(cf_get_symbol("master6"), NET_IP6); } +void +rt_postconfig(struct config *c) +{ + uint num_tables = list_length(&c->tables); + btime def_gc_period = 400 MS * num_tables; + def_gc_period = MAX(def_gc_period, 10 S); + def_gc_period = MIN(def_gc_period, 600 S); + + struct rtable_config *rc; + WALK_LIST(rc, c->tables) + if (rc->gc_period == (uint) -1) + rc->gc_period = (uint) def_gc_period; +} + /* * Some functions for handing internal next hop updates * triggered by rt_schedule_nhu(). */ -static inline int -rta_next_hop_outdated(rta *a) +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - struct hostentry *he = a->hostentry; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); - if (!he) - return 0; - - if (!he->src) - return a->dest != RTD_UNREACHABLE; + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); } -void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) + +static void +rta_apply_hostentry(ea_list **to, struct hostentry_adata *head) { - a->hostentry = he; - a->dest = he->dest; - a->igp_metric = he->igp_metric; + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; - if (a->dest != RTD_UNICAST) + ea_set_attr_u32(to, &ea_gen_igp_metric, 0, he->igp_metric); + + if (!he->src) { - /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; - if (mls) - { /* Store the label stack for later changes */ - a->nh.labels_orig = a->nh.labels = mls->len; - memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); - } + ea_set_dest(to, 0, RTD_UNREACHABLE); return; } - if (((!mls) || (!mls->len)) && he->nexthop_linkable) + eattr *he_nh_ea = ea_find(he->src, &ea_gen_nexthop); + ASSERT_DIE(he_nh_ea); + + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + int idest = nhea_dest(he_nh_ea); + + if ((idest != RTD_UNICAST) || + !lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(to, he->src, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; + uint total_size = OFFSETOF(struct nexthop_adata, nh); - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); + } - if (mls) - { - nhp->labels = nh->labels + mls->len; - nhp->labels_orig = mls->len; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + struct nexthop_adata nha = { + .ad.length = NEXTHOP_DEST_SIZE, + .dest = RTD_UNREACHABLE, + }; + + ea_set_attr_data(to, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (lnum) { - nhp->labels = nh->labels; - nhp->labels_orig = 0; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); } -static inline struct rte_storage * -rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +static inline struct hostentry_adata * +rta_next_hop_outdated(ea_list *a) { - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); + /* First retrieve the hostentry */ + eattr *heea = ea_find(a, &ea_gen_hostentry); + if (!heea) + return NULL; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + /* If no nexthop is present, we have to create one */ + eattr *a_nh_ea = ea_find(a, &ea_gen_nexthop); + if (!a_nh_ea) + return head; + + struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr; + + /* Shortcut for unresolvable hostentry */ + if (!head->he->src) + return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL; - mpls_label_stack mls = { .len = a->nh.labels_orig }; - memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + /* Comparing our nexthop with the hostentry nexthop */ + eattr *he_nh_ea = ea_find(head->he->src, &ea_gen_nexthop); - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->cached = 0; + return ( + (ea_get_int(a, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; +} + +static inline struct rte_storage * +rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +{ + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) + return NULL; rte e0 = *old; - e0.attrs = a; + rta_apply_hostentry(&e0.attrs, head); return rte_store(&e0, n, tab); } +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs, &ea_gen_hostentry); + if (!heea) + return; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + rta_apply_hostentry(&r->attrs, head); +} + +#ifdef CONFIG_BGP + +static inline int +net_flow_has_dst_prefix(const net_addr *n) +{ + ASSUME(net_is_flow(n)); + + if (n->pxlen) + return 1; + + if (n->type == NET_FLOW4) + { + const net_addr_flow4 *n4 = (void *) n; + return (n4->length > sizeof(net_addr_flow4)) && (n4->data[0] == FLOW_TYPE_DST_PREFIX); + } + else + { + const net_addr_flow6 *n6 = (void *) n; + return (n6->length > sizeof(net_addr_flow6)) && (n6->data[0] == FLOW_TYPE_DST_PREFIX); + } +} + +static inline int +rta_as_path_is_empty(ea_list *a) +{ + eattr *e = ea_find(a, "bgp_path"); + return !e || (as_path_getlen(e->u.ptr) == 0); +} + +static inline u32 +rta_get_first_asn(ea_list *a) +{ + eattr *e = ea_find(a, "bgp_path"); + u32 asn; + + return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; +} + +static inline enum flowspec_valid +rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *a, int interior) +{ + ASSERT(rt_is_ip(tab_ip)); + ASSERT(rt_is_flow(tab_flow)); + ASSERT(tab_ip->trie); + + /* RFC 8955 6. a) Flowspec has defined dst prefix */ + if (!net_flow_has_dst_prefix(n)) + return FLOWSPEC_INVALID; + + /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ + if (interior && rta_as_path_is_empty(a)) + return FLOWSPEC_VALID; + + + /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ + + /* Find flowspec dst prefix */ + net_addr dst; + if (n->type == NET_FLOW4) + net_fill_ip4(&dst, net4_prefix(n), net4_pxlen(n)); + else + net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n)); + + /* Find best-match BGP unicast route for flowspec dst prefix */ + net *nb = net_route(tab_ip, &dst); + const rte *rb = nb ? &nb->routes->rte : NULL; + + /* Register prefix to trie for tracking further changes */ + int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); + + /* No best-match BGP route -> no flowspec */ + if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + return FLOWSPEC_INVALID; + + /* Find ORIGINATOR_ID values */ + u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); + + /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ + if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( + ea_get_ip(a, &ea_gen_from, IPA_NONE), + ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ))) + return FLOWSPEC_INVALID; + + + /* Find ASN of the best-match route, for use in next checks */ + u32 asn_b = rta_get_first_asn(rb->attrs); + if (!asn_b) + return FLOWSPEC_INVALID; + + /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ + if (!interior && (rta_get_first_asn(a) != asn_b)) + return FLOWSPEC_INVALID; + + /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ + TRIE_WALK(tab_ip->trie, subnet, &dst) + { + net *nc = net_find_valid(tab_ip, &subnet); + if (!nc) + continue; + + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + return FLOWSPEC_INVALID; + + if (rta_get_first_asn(rc->attrs) != asn_b) + return FLOWSPEC_INVALID; + } + TRIE_WALK_END; + + return FLOWSPEC_VALID; +} + +#endif /* CONFIG_BGP */ + +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) +{ +#ifdef CONFIG_BGP + if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) + return NULL; + + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); + if (!bc->base_table) + return NULL; + + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + + if (old == valid) + return NULL; + + rte new = *r; + ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); + + return rte_store(&new, n, tab); +#else + return NULL; +#endif +} + +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} + static inline int rt_next_hop_update_net(rtable *tab, net *n) { struct rte_storage *new; int count = 0; + int is_flow = net_is_flow(n->n.addr); struct rte_storage *old_best = n->routes; if (!old_best) return 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (rta_next_hop_outdated(e->rte.attrs)) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) count++; if (!count) @@ -2593,9 +3532,14 @@ rt_next_hop_update_net(rtable *tab, net *n) int pos = 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (rta_next_hop_outdated(e->rte.attrs)) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) { - struct rte_storage *new = rt_next_hop_update_rte(tab, n, &e->rte); + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); + + if (!new) + continue; /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ @@ -2617,7 +3561,8 @@ rt_next_hop_update_net(rtable *tab, net *n) hmap_set(&tab->id_map, new->rte.id); } - ASSERT_DIE(pos == count); + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ struct rte_storage **new_best = NULL; @@ -2652,9 +3597,8 @@ rt_next_hop_update_net(rtable *tab, net *n) } static void -rt_next_hop_update(void *data) +rt_next_hop_update(rtable *tab) { - rtable *tab = data; struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; @@ -2665,6 +3609,9 @@ rt_next_hop_update(void *data) { FIB_ITERATE_INIT(fit, &tab->fib); tab->nhu_state = NHU_RUNNING; + + if (tab->flowspec_trie) + rt_flowspec_reset_trie(tab); } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -2672,7 +3619,7 @@ rt_next_hop_update(void *data) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->nhu_event); + ev_schedule(tab->rt_event); return; } max_feed -= rt_next_hop_update_net(tab, n); @@ -2686,7 +3633,7 @@ rt_next_hop_update(void *data) tab->nhu_state &= 1; if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->nhu_event); + ev_schedule(tab->rt_event); } @@ -2704,12 +3651,12 @@ rt_new_table(struct symbol *s, uint addr_type) cf_define_symbol(s, SYM_TABLE, table, c); c->name = s->name; c->addr_type = addr_type; - c->gc_max_ops = 1000; - c->gc_min_time = 5; + c->gc_threshold = 1000; + c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; - c->cork_limit = 4 * page_size / sizeof(struct rt_pending_export); - c->config = new_config; + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; add_tail(&new_config->tables, &c->n); @@ -2745,35 +3692,75 @@ rt_lock_table(rtable *r) void rt_unlock_table(rtable *r) { - if (!--r->use_count && r->delete_event) - /* Delete the routing table by freeing its pool */ - ev_schedule(r->delete_event); -} + if (!--r->use_count && r->deleted) + { + struct config *conf = r->deleted; + /* Delete the routing table by freeing its pool */ + rt_shutdown(r); + config_del_obstacle(conf); + } +} -static struct rtable_config * -rt_find_table_config(struct config *cf, char *name) +static void +rt_check_cork_low(rtable *tab) { - struct symbol *sym = cf_find_symbol(cf, name); - return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL; + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + if (config->table_debug) + log(L_TRACE "%s: Uncorked", tab->name); + } } static void -rt_done(void *data) +rt_check_cork_high(rtable *tab) { - rtable *t = data; - struct rtable_config *tc = t->config; - struct config *c = tc->config; + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + if (config->table_debug) + log(L_TRACE "%s: Corked", tab->name); + } +} + + +static int +rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +{ + if ((new->addr_type != old->addr_type) || + (new->sorted != old->sorted) || + (new->trie_used != old->trie_used)) + return 0; - tc->table = NULL; - rem_node(&t->n); + DBG("\t%s: same\n", new->name); + new->table = tab; + tab->name = new->name; + tab->config = new; - if (t->hostcache) - rt_free_hostcache(t); + tab->cork_threshold = new->cork_threshold; - rfree(t->rp); + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); - config_del_obstacle(c); + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + + return 1; +} + +static struct rtable_config * +rt_find_table_config(struct config *cf, char *name) +{ + struct symbol *sym = cf_find_symbol(cf, name); + return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL; } /** @@ -2798,28 +3785,19 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; - if (!ot->delete_event) - { - r = rt_find_table_config(new, o->name); - if (r && (r->addr_type == o->addr_type) && !new->shutdown) - { - DBG("\t%s: same\n", o->name); - r->table = ot; - ot->name = r->name; - ot->config = r; - if (o->sorted != r->sorted) - log(L_WARN "Reconfiguration of rtable sorted flag not implemented"); - } - else - { - DBG("\t%s: deleted\n", o->name); - rt_lock_table(ot); - ot->delete_event = ev_new_init(ot->rp, rt_done, ot); - config_add_obstacle(old); - rt_unlock_table(ot); - } - } + rtable *tab = o->table; + if (tab->deleted) + continue; + + r = rt_find_table_config(new, o->name); + if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + continue; + + DBG("\t%s: deleted\n", o->name); + tab->deleted = old; + config_add_obstacle(old); + rt_lock_table(tab); + rt_unlock_table(tab); } } @@ -2833,8 +3811,18 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } +static void +rt_feed_done(struct rt_export_hook *c) +{ + c->event->hook = rt_export_hook; + + rt_set_export_state(c, TES_READY); + + rt_send_export_event(c); +} + /** - * rt_feed_channel - advertise all routes to a channel + * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed * * This function performs one pass of advertisement of routes to a channel that @@ -2843,7 +3831,7 @@ rt_commit(struct config *new, struct config *old) * order not to monopolize CPU time.) */ static void -rt_feed_channel(void *data) +rt_feed_by_fib(void *data) { struct rt_export_hook *c = data; @@ -2852,7 +3840,9 @@ rt_feed_channel(void *data) ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { @@ -2864,37 +3854,134 @@ rt_feed_channel(void *data) if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) return; - if (c->req->export_bulk) - { - uint count = rte_feed_count(n); - if (count) - { - rte_update_lock(); - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); - max_feed -= count; - rte_update_unlock(); - } - } - else if (n->routes && rte_is_valid(&n->routes->rte)) - { - rte_update_lock(); - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - max_feed--; - rte_update_unlock(); - } - - for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) - rpe_mark_seen(c, rpe); + if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) + max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; - c->event->hook = rt_export_hook; - rt_send_export_event(c); + rt_feed_done(c); +} - rt_set_export_state(c, TES_READY); +static void +rt_feed_by_trie(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + ASSERT_DIE(c->walk_state); + struct f_trie_walk_state *ws = c->walk_state; + + int max_feed = 256; + + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + + net_addr addr; + while (trie_walk_next(ws, &addr)) + { + net *n = net_find(tab, &addr); + if (!n) + continue; + + if ((max_feed -= rt_feed_net(c, n)) <= 0) + return; + + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; + } + + rt_unlock_trie(tab, c->walk_lock); + c->walk_lock = NULL; + + mb_free(c->walk_state); + c->walk_state = NULL; + + rt_feed_done(c); +} + +static void +rt_feed_equal(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); + + net *n = net_find(tab, c->req->addr); + if (n) + rt_feed_net(c, n); + + rt_feed_done(c); +} + +static void +rt_feed_for(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); + + net *n = net_route(tab, c->req->addr); + if (n) + rt_feed_net(c, n); + + rt_feed_done(c); +} + +static uint +rt_feed_net(struct rt_export_hook *c, net *n) +{ + uint count = 0; + + if (c->req->export_bulk) + { + count = rte_feed_count(n); + if (count) + { + rte_update_lock(); + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); + rte_update_unlock(); + } + } + + else if (n->routes) + { + rte_update_lock(); + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + rte_update_unlock(); + count = 1; + } + + for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_mark_seen(c, rpe); + + return count; +} + +/* + * Import table + */ + +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + + for (uint i=0; i<count; i++) + if (feed[i]->sender == c->in_req.hook) + { + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; + + /* And reload the route */ + rte_update(c, net, &new, new.src); + } } @@ -2993,7 +4080,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) rem_node(&he->ln); hc_remove(hc, he); - sl_free(hc->slab, he); + sl_free(he); hc->hash_items--; if (hc->hash_items < hc->hash_min) @@ -3010,7 +4097,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, tab->rp, HC_DEF_ORDER); hc->slab = sl_new(tab->rp, sizeof(struct hostentry)); - hc->lp = lp_new(tab->rp, LP_GOOD_SIZE(1024)); + hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); tab->hostcache = hc; @@ -3042,11 +4129,11 @@ rt_free_hostcache(rtable *tab) static void rt_notify_hostcache(rtable *tab, net *net) { - if (ev_active(tab->hcu_event)) + if (tab->hcu_scheduled) return; if (trie_match_net(tab->hostcache->trie, net->n.addr)) - ev_schedule(tab->hcu_event); + rt_schedule_hcu(tab); } static int @@ -3062,14 +4149,14 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { - eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *ea = ea_find(rt->attrs, "igp_metric"); if (ea) return ea->u.data; - if (rt->attrs->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; if (rt->src->owner->class->rte_igp_metric) @@ -3081,13 +4168,12 @@ rt_get_igp_metric(rte *rt) static int rt_update_hostentry(rtable *tab, struct hostentry *he) { - rta *old_src = he->src; + ea_list *old_src = he->src; int direct = 0; int pxlen = 0; /* Reset the hostentry */ he->src = NULL; - he->dest = RTD_UNREACHABLE; he->nexthop_linkable = 0; he->igp_metric = 0; @@ -3097,11 +4183,13 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) if (n) { struct rte_storage *e = n->routes; - rta *a = e->rte.attrs; - word pref = a->pref; + ea_list *a = e->rte.attrs; + u32 pref = rt_get_preference(&e->rte); for (struct rte_storage *ee = n->routes; ee; ee = ee->next) - if ((ee->rte.attrs->pref >= pref) && ee->rte.attrs->hostentry) + if (rte_is_valid(&ee->rte) && + (rt_get_preference(&ee->rte) >= pref) && + ea_find(ee->rte.attrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3111,9 +4199,12 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) pxlen = n->n.addr->pxlen; - if (a->dest == RTD_UNICAST) - { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + eattr *nhea = ea_find(a, &ea_gen_nexthop); + ASSERT_DIE(nhea); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + + if (NEXTHOP_IS_REACHABLE(nhad)) + NEXTHOP_WALK(nh, nhad) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) @@ -3126,10 +4217,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) direct++; } - } he->src = rta_clone(a); - he->dest = a->dest; he->nexthop_linkable = !direct; he->igp_metric = rt_get_igp_metric(&e->rte); } @@ -3143,9 +4232,8 @@ done: } static void -rt_update_hostcache(void *data) +rt_update_hostcache(rtable *tab) { - rtable *tab = data; struct hostcache *hc = tab->hostcache; struct hostentry *he; node *n, *x; @@ -3166,9 +4254,11 @@ rt_update_hostcache(void *data) if (rt_update_hostentry(tab, he)) rt_schedule_nhu(he->tab); } + + tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; diff --git a/nest/rt.h b/nest/rt.h new file mode 100644 index 00000000..66111dde --- /dev/null +++ b/nest/rt.h @@ -0,0 +1,589 @@ +/* + * BIRD Internet Routing Daemon -- Routing Table + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NEST_RT_H_ +#define _BIRD_NEST_RT_H_ + +#include "lib/lists.h" +#include "lib/bitmap.h" +#include "lib/resource.h" +#include "lib/net.h" +#include "lib/type.h" +#include "lib/fib.h" +#include "lib/route.h" +#include "lib/event.h" +#include "lib/rcu.h" + +#include <stdatomic.h> + +struct ea_list; +struct protocol; +struct proto; +struct channel; +struct rte_src; +struct symbol; +struct timer; +struct filter; +struct f_trie; +struct f_trie_walk_state; +struct cli; + +struct rt_cork_threshold { + u64 low, high; +}; + +/* + * Master Routing Tables. Generally speaking, each of them contains a FIB + * with each entry pointing to a list of route entries representing routes + * to given network (with the selected one at the head). + * + * Each of the RTE's contains variable data (the preference and protocol-dependent + * metrics) and a pointer to a route attribute block common for many routes). + * + * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. + */ + +struct rtable_config { + node n; + char *name; + struct rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + uint gc_threshold; /* Maximum number of operations before GC is run */ + uint gc_period; /* Approximate time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + byte trie_used; /* Rtable has attached trie */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ + btime export_settle_time; /* Delay before exports are announced */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ +}; + +struct rt_export_hook; +struct rt_export_request; + +struct rt_exporter { + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ + + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(struct rt_export_hook *); + void (*used)(struct rt_exporter *); + + list pending; /* List of packed struct rt_pending_export */ + struct timer *export_timer; + + struct rt_pending_export *first; /* First export to announce */ + u64 next_seq; /* The next export will have this ID */ +}; + +typedef struct rtable { + resource r; + node n; /* Node in list of all tables */ + pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ + struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ + char *name; /* Name of this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int use_count; /* Number of protocols using this table */ + u32 rt_count; /* Number of routes in the table */ + + list imports; /* Registered route importers */ + struct rt_exporter exporter; /* Exporter API structure */ + + struct hmap id_map; + struct hostcache *hostcache; + struct rtable_config *config; /* Configuration of this table */ + struct config *deleted; /* Table doesn't exist in current configuration, + * delete as soon as use_count becomes 0 and remove + * obstacle from this routing table. + */ + struct event *rt_event; /* Routing table event */ + struct event *uncork_event; /* Called when uncork happens */ + struct timer *prune_timer; /* Timer for periodic pruning / GC */ + btime last_rt_change; /* Last time when route changed */ + btime base_settle_time; /* Start time of rtable settling interval */ + btime gc_time; /* Time of last GC */ + uint gc_counter; /* Number of operations since last GC */ + byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte hcu_corked; /* Hostcache update is corked with this state */ + byte nhu_state; /* Next Hop Update state */ + byte nhu_corked; /* Next Hop Update is corked with this state */ + byte export_used; /* Pending Export pruning is scheduled */ + byte cork_active; /* Cork has been activated */ + struct rt_cork_threshold cork_threshold; /* Threshold for table cork */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + list subscribers; /* Subscribers for notifications */ + struct timer *settle_timer; /* Settle time for notifications */ + list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ + struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +} rtable; + +struct rt_subscription { + node n; + rtable *tab; + void (*hook)(struct rt_subscription *b); + void *data; +}; + +struct rt_flowspec_link { + node n; + rtable *src; + rtable *dst; + u32 uc; +}; + +extern struct rt_cork { + _Atomic uint active; + event_list queue; + event run; +} rt_cork; + +static inline void rt_cork_acquire(void) +{ + atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel); +} + +static inline void rt_cork_release(void) +{ + if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) + { + synchronize_rcu(); + ev_schedule_work(&rt_cork.run); + } +} + +static inline int rt_cork_check(event *e) +{ + rcu_read_lock(); + + int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0); + if (corked) + ev_send(&rt_cork.queue, e); + + rcu_read_unlock(); + + return corked; +} + + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +typedef struct network { + struct rte_storage *routes; /* Available routes for this network */ + struct rt_pending_export *first, *last; + struct fib_node n; /* FIB flags reserved for kernel syncer */ +} net; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + byte update_hostcache; +}; + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + struct rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + ea_list *src; /* Source attributes */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) +#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + u64 flush_seq; /* Table export seq when the channel announced flushing */ + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { + struct rt_pending_export * _Atomic next; /* Next export for the same destination */ + struct rte_storage *new, *new_best, *old, *old_best; + u64 seq; /* Sequential ID (table-local) of the pending export */ +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + const net_addr *addr; /* Network prefilter address */ + u8 trace_routes; + u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */ + + event_list *list; /* Where to schedule export events */ + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + struct rt_exporter *table; /* The connected table */ + + pool *pool; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + union { + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + }; + u32 hash_iter; /* Iterator over hash */ + }; + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *rpe_next; /* Next pending export to process */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +/* Value of addr_mode */ +#define TE_ADDR_NONE 0 /* No address matching */ +#define TE_ADDR_EQUAL 1 /* Exact query - show route <addr> */ +#define TE_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ +#define TE_ADDR_IN 3 /* Interval query - show route in <addr> */ + + +#define TFT_FIB 1 +#define TFT_TRIE 2 +#define TFT_HASH 3 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rt_set_export_state(struct rt_export_hook *hook, u8 state); + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + +/* Get next rpe. If src is given, it must match. */ +struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); + +/* Mark the pending export processed */ +void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Get pending export seen status */ +int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ + +/* Return value of preexport() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + +extern list routing_tables; +struct config; + +void rt_init(void); +void rt_preconfig(struct config *); +void rt_postconfig(struct config *); +void rt_commit(struct config *new, struct config *old); +void rt_lock_table(rtable *); +void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); +void rt_subscribe(rtable *tab, struct rt_subscription *s); +void rt_unsubscribe(struct rt_subscription *s); +void rt_flowspec_link(rtable *src, rtable *dst); +void rt_flowspec_unlink(rtable *src, rtable *dst); +rtable *rt_setup(pool *, struct rtable_config *); +static inline void rt_shutdown(rtable *r) { rfree(r->rp); } + +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); +void rt_schedule_prune(rtable *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rt_dump(rtable *); +void rt_dump_all(void); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); +void rt_prune_sync(rtable *t, int all); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + + +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; + +struct rt_show_data_rtable { + node n; + const char *name; + struct rt_exporter *table; + struct channel *export_channel; + struct channel *prefilter; + struct krt_proto *kernel; +}; + +struct rt_show_data { + struct cli *cli; /* Pointer back to the CLI */ + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct rt_export_request req; /* Export request in use */ + int verbose, tables_defined_by; + const struct filter *filter; + struct proto *show_protocol; + struct proto *export_protocol; + struct channel *export_channel; + struct config *running_on_config; + struct rt_export_hook *kernel_export_hook; + int export_mode, addr_mode, primary_only, filtered, stats; + + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; + int show_counter_last_flush; +}; + +void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ + +/* Value of export_mode in struct rt_show_data */ +#define RSEM_NONE 0 /* Export mode not used */ +#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ +#define RSEM_EXPORT 2 /* Routes accepted by export filter */ +#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ +#define RSEM_EXPORTED 4 /* Routes marked in export map */ + +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; + +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); + +void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); +void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); + +/* + * Default protocol preferences + */ + +#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_STATIC 200 /* Static route */ +#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ +#define DEF_PREF_RIP 120 /* RIP */ +#define DEF_PREF_BGP 100 /* BGP */ +#define DEF_PREF_RPKI 100 /* RPKI */ +#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ + +/* + * Route Origin Authorization + */ + +#define ROA_UNKNOWN 0 +#define ROA_VALID 1 +#define ROA_INVALID 2 + +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); + +#endif |