diff options
Diffstat (limited to 'nest')
-rw-r--r-- | nest/Makefile | 9 | ||||
-rw-r--r-- | nest/a-path.c | 905 | ||||
-rw-r--r-- | nest/a-path_test.c | 218 | ||||
-rw-r--r-- | nest/a-set.c | 565 | ||||
-rw-r--r-- | nest/a-set_test.c | 259 | ||||
-rw-r--r-- | nest/attrs.h | 224 | ||||
-rw-r--r-- | nest/bird.h | 1 | ||||
-rw-r--r-- | nest/cmds.c | 69 | ||||
-rw-r--r-- | nest/config.Y | 75 | ||||
-rw-r--r-- | nest/iface.c | 2 | ||||
-rw-r--r-- | nest/limit.h | 49 | ||||
-rw-r--r-- | nest/neighbor.c | 2 | ||||
-rw-r--r-- | nest/proto.c | 763 | ||||
-rw-r--r-- | nest/protocol.h | 160 | ||||
-rw-r--r-- | nest/route.h | 727 | ||||
-rw-r--r-- | nest/rt-attr.c | 742 | ||||
-rw-r--r-- | nest/rt-dev.c | 23 | ||||
-rw-r--r-- | nest/rt-fib.c | 6 | ||||
-rw-r--r-- | nest/rt-show.c | 152 | ||||
-rw-r--r-- | nest/rt-table.c | 2450 | ||||
-rw-r--r-- | nest/rt.h | 486 |
21 files changed, 3456 insertions, 4431 deletions
diff --git a/nest/Makefile b/nest/Makefile index 884d3950..0350c3b6 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,8 +1,13 @@ -src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +src := cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,dev_build) -tests_src := a-set_test.c a-path_test.c +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) + $(E)echo GEN $@ + $(Q)echo "#include \"lib/birdlib.h\"\n$(patsubst %,void %(void);\n,$(PROTO_BUILD)) void protos_build_gen(void) { $(patsubst %, %();\n,$(PROTO_BUILD))}" > $@ + +tests_src := tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/nest/a-path.c deleted file mode 100644 index 2e34a3d1..00000000 --- a/nest/a-path.c +++ /dev/null @@ -1,905 +0,0 @@ -/* - * BIRD -- Path Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * (c) 2000 Pavel Machek <pavel@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "lib/unaligned.h" -#include "lib/string.h" -#include "filter/data.h" - -// static inline void put_as(byte *data, u32 as) { put_u32(data, as); } -// static inline u32 get_as(byte *data) { return get_u32(data); } - -#define put_as put_u32 -#define get_as get_u32 -#define BS 4 /* Default block size of ASN (autonomous system number) */ - -#define BAD(DSC, VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; }) - -int -as_path_valid(byte *data, uint len, int bs, int sets, int confed, char *err, uint elen) -{ - byte *pos = data; - char *err_dsc = NULL; - uint err_val = 0; - - while (len) - { - if (len < 2) - BAD("segment framing error", 0); - - /* Process one AS path segment */ - uint type = pos[0]; - uint slen = 2 + bs * pos[1]; - - if (len < slen) - BAD("segment framing error", len); - - switch (type) - { - case AS_PATH_SET: - if (!sets) - BAD("AS_SET segment", type); - break; - - case AS_PATH_SEQUENCE: - break; - - case AS_PATH_CONFED_SEQUENCE: - if (!confed) - BAD("AS_CONFED_SEQUENCE segment", type); - break; - - case AS_PATH_CONFED_SET: - if (!sets || !confed) - BAD("AS_CONFED_SET segment", type); - break; - - default: - BAD("unknown segment", type); - } - - if (pos[1] == 0) - BAD("zero-length segment", type); - - pos += slen; - len -= slen; - } - - return 1; - -bad: - if (err) - if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0) - err[0] = 0; - - return 0; -} - -int -as_path_16to32(byte *dst, const byte *src, uint len) -{ - byte *dst0 = dst; - const byte *end = src + len; - uint i, n; - - while (src < end) - { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; - - for (i = 0; i < n; i++) - { - put_u32(dst, get_u16(src)); - src += 2; - dst += 4; - } - } - - return dst - dst0; -} - -int -as_path_32to16(byte *dst, const byte *src, uint len) -{ - byte *dst0 = dst; - const byte *end = src + len; - uint i, n; - - while (src < end) - { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; - - for (i = 0; i < n; i++) - { - put_u16(dst, get_u32(src)); - src += 4; - dst += 2; - } - } - - return dst - dst0; -} - -int -as_path_contains_as4(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - uint i, n; - - while (pos < end) - { - n = pos[1]; - pos += 2; - - for (i = 0; i < n; i++) - { - if (get_as(pos) > 0xFFFF) - return 1; - - pos += BS; - } - } - - return 0; -} - -int -as_path_contains_confed(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint type = pos[0]; - uint slen = 2 + BS * pos[1]; - - if ((type == AS_PATH_CONFED_SEQUENCE) || - (type == AS_PATH_CONFED_SET)) - return 1; - - pos += slen; - } - - return 0; -} - -struct adata * -as_path_strip_confed(struct linpool *pool, const struct adata *path) -{ - struct adata *res = lp_alloc_adata(pool, path->length); - const byte *src = path->data; - const byte *end = src + path->length; - byte *dst = res->data; - - while (src < end) - { - uint type = src[0]; - uint slen = 2 + BS * src[1]; - - /* Copy regular segments */ - if ((type == AS_PATH_SET) || (type == AS_PATH_SEQUENCE)) - { - memcpy(dst, src, slen); - dst += slen; - } - - src += slen; - } - - /* Fix the result length */ - res->length = dst - res->data; - - return res; -} - -struct adata * -as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as) -{ - struct adata *np; - const byte *pos = op->data; - uint len = op->length; - - if (len && (pos[0] == seq) && (pos[1] < 255)) - { - /* Starting with matching segment => just prepend the AS number */ - np = lp_alloc_adata(pool, len + BS); - np->data[0] = seq; - np->data[1] = pos[1] + 1; - put_as(np->data + 2, as); - - uint dlen = BS * pos[1]; - memcpy(np->data + 2 + BS, pos + 2, dlen); - ADVANCE(pos, len, 2 + dlen); - } - else - { - /* Create a new path segment */ - np = lp_alloc_adata(pool, len + 2 + BS); - np->data[0] = seq; - np->data[1] = 1; - put_as(np->data + 2, as); - } - - if (len) - { - byte *dst = np->data + 2 + BS * np->data[1]; - - memcpy(dst, pos, len); - } - - return np; -} - - -struct adata * -as_path_to_old(struct linpool *pool, const struct adata *path) -{ - struct adata *res = lp_alloc_adata(pool, path->length); - byte *pos = res->data; - byte *end = pos + res->length; - uint i, n; - u32 as; - - /* Copy the whole path */ - memcpy(res->data, path->data, path->length); - - /* Replace 32-bit AS numbers with AS_TRANS */ - while (pos < end) - { - n = pos[1]; - pos += 2; - - for (i = 0; i < n; i++) - { - as = get_as(pos); - if (as > 0xFFFF) - put_as(pos, AS_TRANS); - - pos += BS; - } - } - - return res; -} - -/* - * Cut the path to the length @num, measured to the usual path metric. Note that - * AS_CONFED_* segments have zero length and must be added if they are on edge. - */ -struct adata * -as_path_cut(struct linpool *pool, const struct adata *path, uint num) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint t = pos[0]; - uint l = pos[1]; - uint n = 0; - - switch (t) - { - case AS_PATH_SET: n = 1; break; - case AS_PATH_SEQUENCE: n = l; break; - case AS_PATH_CONFED_SEQUENCE: n = 0; break; - case AS_PATH_CONFED_SET: n = 0; break; - default: bug("as_path_cut: Invalid path segment"); - } - - /* Cannot add whole segment, so try partial one and finish */ - if (num < n) - { - const byte *nend = pos; - if (num) - nend += 2 + BS * num; - - struct adata *res = lp_alloc_adata(pool, path->length); - res->length = nend - (const byte *) path->data; - memcpy(res->data, path->data, res->length); - - if (num) - { - byte *dpos = ((byte *) res->data) + (pos - (const byte *) path->data); - dpos[1] = num; - } - - return res; - } - - num -= n; - pos += 2 + BS * l; - } - - struct adata *res = lp_alloc_adata(pool, path->length); - res->length = path->length; - memcpy(res->data, path->data, res->length); - return res; -} - -/* - * Merge (concatenate) paths @p1 and @p2 and return the result. - * In contrast to other as_path_* functions, @p1 and @p2 may be reused. - */ -const struct adata * -as_path_merge(struct linpool *pool, const struct adata *p1, const struct adata *p2) -{ - if (p1->length == 0) - return p2; - - if (p2->length == 0) - return p1; - - struct adata *res = lp_alloc_adata(pool, p1->length + p2->length); - memcpy(res->data, p1->data, p1->length); - memcpy(res->data + p1->length, p2->data, p2->length); - - return res; -} - -void -as_path_format(const struct adata *path, byte *bb, uint size) -{ - buffer buf = { .start = bb, .pos = bb, .end = bb + size }, *b = &buf; - const byte *pos = path->data; - const byte *end = pos + path->length; - const char *ops, *cls; - - b->pos[0] = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: ops = "{"; cls = "}"; break; - case AS_PATH_SEQUENCE: ops = NULL; cls = NULL; break; - case AS_PATH_CONFED_SEQUENCE: ops = "("; cls = ")"; break; - case AS_PATH_CONFED_SET: ops = "({"; cls = "})"; break; - default: bug("Invalid path segment"); - } - - if (ops) - buffer_puts(b, ops); - - while (len--) - { - buffer_print(b, len ? "%u " : "%u", get_as(pos)); - pos += BS; - } - - if (cls) - buffer_puts(b, cls); - - if (pos < end) - buffer_puts(b, " "); - } - - /* Handle overflow */ - if (b->pos == b->end) - strcpy(b->end - 12, "..."); -} - -int -as_path_getlen(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - uint res = 0; - - while (pos < end) - { - uint t = pos[0]; - uint l = pos[1]; - uint n = 0; - - switch (t) - { - case AS_PATH_SET: n = 1; break; - case AS_PATH_SEQUENCE: n = l; break; - case AS_PATH_CONFED_SEQUENCE: n = 0; break; - case AS_PATH_CONFED_SET: n = 0; break; - default: bug("as_path_getlen: Invalid path segment"); - } - - res += n; - pos += 2 + BS * l; - } - - return res; -} - -int -as_path_get_last(const struct adata *path, u32 *orig_as) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - int found = 0; - u32 val = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - if (!len) - continue; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - found = 0; - break; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - val = get_as(pos + BS * (len - 1)); - found = 1; - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - if (found) - *orig_as = val; - return found; -} - -u32 -as_path_get_last_nonaggregated(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - u32 val = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - if (!len) - continue; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - return val; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - val = get_as(pos + BS * (len - 1)); - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - return val; -} - -int -as_path_get_first(const struct adata *path, u32 *last_as) -{ - const u8 *p = path->data; - - if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) - return 0; - - *last_as = get_as(p+2); - return 1; -} - -int -as_path_get_first_regular(const struct adata *path, u32 *last_as) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: - return 0; - - case AS_PATH_SEQUENCE: - if (len == 0) - return 0; - - *last_as = get_as(pos); - return 1; - - case AS_PATH_CONFED_SEQUENCE: - case AS_PATH_CONFED_SET: - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - return 0; -} - -int -as_path_contains(const struct adata *path, u32 as, int min) -{ - const u8 *p = path->data; - const u8 *q = p+path->length; - int num = 0; - int i, n; - - while (p<q) - { - n = p[1]; - p += 2; - for(i=0; i<n; i++) - { - if (get_as(p) == as) - if (++num == min) - return 1; - p += BS; - } - } - return 0; -} - -int -as_path_match_set(const struct adata *path, const struct f_tree *set) -{ - const u8 *p = path->data; - const u8 *q = p+path->length; - int i, n; - - while (p<q) - { - n = p[1]; - p += 2; - for (i=0; i<n; i++) - { - struct f_val v = {T_INT, .val.i = get_as(p)}; - if (find_tree(set, &v)) - return 1; - p += BS; - } - } - - return 0; -} - -const struct adata * -as_path_filter(struct linpool *pool, const struct adata *path, const struct f_tree *set, u32 key, int pos) -{ - if (!path) - return NULL; - - int len = path->length; - const u8 *p = path->data; - const u8 *q = path->data + len; - u8 *d, *d2; - int i, bt, sn, dn; - u8 buf[len]; - - d = buf; - while (p<q) - { - /* Read block header (type and length) */ - bt = p[0]; - sn = p[1]; - dn = 0; - p += 2; - d2 = d + 2; - - for (i = 0; i < sn; i++) - { - u32 as = get_as(p); - int match; - - if (set) - { - struct f_val v = {T_INT, .val.i = as}; - match = !!find_tree(set, &v); - } - else - match = (as == key); - - if (match == pos) - { - put_as(d2, as); - d2 += BS; - dn++; - } - - p += BS; - } - - if (dn > 0) - { - /* Nonempty block, set block header and advance */ - d[0] = bt; - d[1] = dn; - d = d2; - } - } - - uint nl = d - buf; - if (nl == path->length) - return path; - - struct adata *res = lp_alloc(pool, sizeof(struct adata) + nl); - res->length = nl; - memcpy(res->data, buf, nl); - - return res; -} - - -struct pm_pos -{ - u8 set; - u8 mark; - union - { - const char *sp; - u32 asn; - } val; -}; - -static int -parse_path(const struct adata *path, struct pm_pos *pp) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - struct pm_pos *op = pp; - uint i; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - pp->set = 1; - pp->mark = 0; - pp->val.sp = pos - 1; - pp++; - - pos += BS * len; - break; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - for (i = 0; i < len; i++) - { - pp->set = 0; - pp->mark = 0; - pp->val.asn = get_as(pos); - pp++; - - pos += BS; - } - break; - - default: - bug("Invalid path segment"); - } - } - - return pp - op; -} - -static int -pm_match_val(const struct pm_pos *pos, u32 asn, u32 asn2) -{ - u32 gas; - if (! pos->set) - return ((pos->val.asn >= asn) && (pos->val.asn <= asn2)); - - const u8 *p = pos->val.sp; - int len = *p++; - int i; - - for (i = 0; i < len; i++) - { - gas = get_as(p + i * BS); - - if ((gas >= asn) && (gas <= asn2)) - return 1; - } - - return 0; -} - -static int -pm_match_set(const struct pm_pos *pos, const struct f_tree *set) -{ - struct f_val asn = { .type = T_INT }; - - if (! pos->set) - { - asn.val.i = pos->val.asn; - return !!find_tree(set, &asn); - } - - const u8 *p = pos->val.sp; - int len = *p++; - int i; - - for (i = 0; i < len; i++) - { - asn.val.i = get_as(p + i * BS); - if (find_tree(set, &asn)) - return 1; - } - - return 0; -} - -static inline int -pm_match(const struct pm_pos *pos, const struct f_path_mask_item *mask, u32 asn, u32 asn2) -{ - return ((mask->kind == PM_QUESTION) || - ((mask->kind != PM_ASN_SET) ? - pm_match_val(pos, asn, asn2) : - pm_match_set(pos, mask->set))); -} - -static void -pm_mark(struct pm_pos *pos, int *i, int plen, int *nl, int *nh) -{ - int j = *i; - - if (pos[j].set) - do { pos[j].mark = 1; j++; } - while ((j < plen) && pos[j].set); - else - j++; - - pos[j].mark = 1; - - /* Update low, high based on first and last marked pos */ - int l = pos[*i].set ? *i : j; - - *nl = (*nl < 0) ? l : MIN(*nl, l); - *nh = MAX(*nh, j); - *i = j; -} - -/* AS path matching is nontrivial. Because AS path can - * contain sets, it is not a plain wildcard matching. A set - * in an AS path is interpreted as it might represent any - * sequence of AS numbers from that set (possibly with - * repetitions). So it is also a kind of a pattern, - * more complicated than a path mask. - * - * The algorithm for AS path matching is a variant - * of nondeterministic finite state machine, where - * positions in AS path are states, and items in - * path mask are input for that finite state machine. - * During execution of the algorithm we maintain a set - * of marked states - a state is marked if it can be - * reached by any walk through NFSM with regard to - * currently processed part of input. When we process - * next part of mask, we advance each marked state. - * We start with marked first position, when we - * run out of marked positions, we reject. When - * we process the whole mask, we accept if final position - * (auxiliary position after last real position in AS path) - * is marked. - */ -int -as_path_match(const struct adata *path, const struct f_path_mask *mask) -{ - struct pm_pos pos[2048 + 1]; - int plen = parse_path(path, pos); - int l, h, i, nh, nl, last, loop; - u32 val = 0; - u32 val2 = 0; - - /* l and h are bound of interval of positions where - are marked states */ - - pos[plen].set = 0; - pos[plen].mark = 0; - - l = h = loop = 0; - pos[0].mark = 1; - - for (uint m=0; m < mask->len; m++) - { - /* We remove this mark to not step after pos[plen] */ - pos[plen].mark = 0; - - switch (mask->item[m].kind) - { - case PM_ASTERISK: - for (i = l; i <= plen; i++) - pos[i].mark = 1; - h = plen; - break; - - case PM_LOOP: - loop = 1; - break; - - case PM_ASN: /* Define single ASN as ASN..ASN - very narrow interval */ - val2 = val = mask->item[m].asn; - goto step; - case PM_ASN_EXPR: - bug("Expressions should be evaluated on AS path mask construction."); - case PM_ASN_RANGE: - val = mask->item[m].from; - val2 = mask->item[m].to; - goto step; - case PM_QUESTION: - case PM_ASN_SET: - step: - nh = nl = -1; - last = plen; - for (i = h; i >= l; i--) - if (pos[i].mark) - { - pos[i].mark = 0; - int j = i; - - match: - if (pm_match(pos + j, &mask->item[m], val, val2)) - { - pm_mark(pos, &j, plen, &nl, &nh); - if (loop && (j < last)) - goto match; - } - - last = i; - } - - if (nh < 0) - return 0; - - h = nh; - l = nl; - loop = 0; - break; - } - } - - return pos[plen].mark; -} diff --git a/nest/a-path_test.c b/nest/a-path_test.c deleted file mode 100644 index 9ed0a786..00000000 --- a/nest/a-path_test.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * BIRD -- Path Operations Tests - * - * (c) 2015 CZ.NIC z.s.p.o. - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "test/birdtest.h" -#include "test/bt-utils.h" - -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" - -#define TESTS_NUM 30 -#define AS_PATH_LENGTH 1000 - -#if AS_PATH_LENGTH > AS_PATH_MAXLEN -#warning "AS_PATH_LENGTH should be <= AS_PATH_MAXLEN" -#endif - -static int -t_as_path_match(void) -{ - resource_init(); - - int round; - for (round = 0; round < TESTS_NUM; round++) - { - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - u32 first_prepended, last_prepended; - first_prepended = last_prepended = 0; - struct linpool *lp = lp_new_default(&root_pool); - - struct f_path_mask *mask = alloca(sizeof(struct f_path_mask) + AS_PATH_LENGTH * sizeof(struct f_path_mask_item)); - mask->len = AS_PATH_LENGTH; - for (int i = AS_PATH_LENGTH - 1; i >= 0; i--) - { - u32 val = bt_random(); - as_path = as_path_prepend(lp, as_path, val); - bt_debug("Prepending ASN: %10u \n", val); - - if (i == 0) - last_prepended = val; - if (i == AS_PATH_LENGTH-1) - first_prepended = val; - - mask->item[i].kind = PM_ASN; - mask->item[i].asn = val; - } - - bt_assert_msg(as_path_match(as_path, mask), "Mask should match with AS path"); - - u32 asn; - - bt_assert(as_path_get_first(as_path, &asn)); - bt_assert_msg(asn == last_prepended, "as_path_get_first() should return the last prepended ASN"); - - bt_assert(as_path_get_last(as_path, &asn)); - bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); - - rfree(lp); - } - - return 1; -} - -static int -t_path_format(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); - - uint i; - for (i = 4294967285; i <= 4294967294; i++) - { - as_path = as_path_prepend(lp, as_path, i); - bt_debug("Prepending ASN: %10u \n", i); - } - -#define BUFFER_SIZE 120 - byte buf[BUFFER_SIZE] = {}; - - as_path_format(&empty_as_path, buf, BUFFER_SIZE); - bt_assert_msg(strcmp(buf, "") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); - - as_path_format(as_path, buf, BUFFER_SIZE); - bt_assert_msg(strcmp(buf, "4294967294 4294967293 4294967292 4294967291 4294967290 4294967289 4294967288 4294967287 4294967286 4294967285") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); - -#define SMALL_BUFFER_SIZE 25 - byte buf2[SMALL_BUFFER_SIZE] = {}; - as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); - bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); - - rfree(lp); - - return 1; -} - -static int -count_asn_in_array(const u32 *array, u32 asn) -{ - int counts_of_contains = 0; - int u; - for (u = 0; u < AS_PATH_LENGTH; u++) - if (array[u] == asn) - counts_of_contains++; - return counts_of_contains; -} - -static int -t_path_include(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); - - u32 as_nums[AS_PATH_LENGTH] = {}; - int i; - for (i = 0; i < AS_PATH_LENGTH; i++) - { - u32 val = bt_random(); - as_nums[i] = val; - as_path = as_path_prepend(lp, as_path, val); - } - - for (i = 0; i < AS_PATH_LENGTH; i++) - { - int counts_of_contains = count_asn_in_array(as_nums, as_nums[i]); - bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); - - bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 0) != NULL); - bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 1) != NULL); - } - - for (i = 0; i < 10000; i++) - { - u32 test_val = bt_random(); - int counts_of_contains = count_asn_in_array(as_nums, test_val); - int result = as_path_contains(as_path, test_val, (counts_of_contains == 0 ? 1 : counts_of_contains)); - - if (counts_of_contains) - bt_assert_msg(result, "As path should contain %d-times the number %u", counts_of_contains, test_val); - else - bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); - } - - rfree(lp); - - return 1; -} - -#if 0 -static int -t_as_path_converting(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); -#define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 - - int i; - for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) - as_path = as_path_prepend(lp, as_path, i); - - bt_debug("data length: %u \n", as_path->length); - - byte buffer[100] = {}; - int used_size = as_path_convert_to_new(as_path, buffer, AS_PATH_LENGTH_FOR_CONVERTING_TEST-1); - bt_debug("as_path_convert_to_new: len %d \n%s\n", used_size, buffer); - for (i = 0; i < used_size; i++) - { - bt_debug("\\03%d", buffer[i]); - } - bt_debug("\n"); - bt_assert(memcmp(buffer, - "\032\039\030\030\030\030\030\030\030\039\030\030\030\030\030\030\030\038\030\030\030\030\030\030" - "\030\037\030\030\030\030\030\030\030\036\030\030\030\030", - 38)); - - bzero(buffer, sizeof(buffer)); - int new_used; - used_size = as_path_convert_to_old(as_path, buffer, &new_used); - bt_debug("as_path_convert_to_old: len %d, new_used: %d \n", used_size, new_used); - for (i = 0; i < used_size; i++) - { - bt_debug("\\03%d", buffer[i]); - } - bt_debug("\n"); - bt_assert(memcmp(buffer, - "\032\0310\030\039\030\038\030\037\030\036\030\035\030\034\030\033\030\032\030\031\030\030", - 22)); - - return 1; -} -#endif - -int -main(int argc, char *argv[]) -{ - bt_init(argc, argv); - - bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); - bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); - bt_test_suite(t_path_include, "Testing including a AS number in AS path"); - // bt_test_suite(t_as_path_converting, "Testing as_path_convert_to_*() output constancy"); - - return bt_exit_value(); -} diff --git a/nest/a-set.c b/nest/a-set.c deleted file mode 100644 index 1186eb56..00000000 --- a/nest/a-set.c +++ /dev/null @@ -1,565 +0,0 @@ -/* - * BIRD -- Set/Community-list Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * (c) 2000 Pavel Machek <pavel@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include <stdlib.h> - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "lib/string.h" - -/** - * int_set_format - format an &set for printing - * @set: set attribute to be formatted - * @way: style of format (0 for router ID list, 1 for community list) - * @from: starting position in set - * @buf: destination buffer - * @size: size of buffer - * - * This function takes a set attribute and formats it. @way specifies - * the style of format (router ID / community). @from argument can be - * used to specify the first printed value for the purpose of printing - * untruncated sets even with smaller buffers. If the output fits in - * the buffer, 0 is returned, otherwise the position of the first not - * printed item is returned. This value can be used as @from argument - * in subsequent calls. If truncated output suffices, -1 can be - * instead used as @from, in that case " ..." is eventually added at - * the buffer to indicate truncation. - */ -int -int_set_format(const struct adata *set, int way, int from, byte *buf, uint size) -{ - u32 *z = (u32 *) set->data; - byte *end = buf + size - 24; - int from2 = MAX(from, 0); - int to = set->length / 4; - int i; - - for (i = from2; i < to; i++) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, " ..."); - else - *buf = 0; - return i; - } - - if (i > from2) - *buf++ = ' '; - - if (way) - buf += bsprintf(buf, "(%d,%d)", z[i] >> 16, z[i] & 0xffff); - else - buf += bsprintf(buf, "%R", z[i]); - } - *buf = 0; - return 0; -} - -int -ec_format(byte *buf, u64 ec) -{ - u32 type, key, val; - char tbuf[16]; - const char *kind; - - type = ec >> 48; - kind = ec_subtype_str(type & 0xf0ff); - - if (!kind) { - bsprintf(tbuf, "unknown 0x%x", type); - kind = tbuf; - } - - switch (ec >> 56) - { - /* RFC 4360 3.1. Two-Octet AS Specific Extended Community */ - case 0x00: - case 0x40: - key = (ec >> 32) & 0xFFFF; - val = ec; - return bsprintf(buf, "(%s, %u, %u)", kind, key, val); - - /* RFC 4360 3.2. IPv4 Address Specific Extended Community */ - case 0x01: - case 0x41: - key = ec >> 16; - val = ec & 0xFFFF; - return bsprintf(buf, "(%s, %R, %u)", kind, key, val); - - /* RFC 5668 4-Octet AS Specific BGP Extended Community */ - case 0x02: - case 0x42: - key = ec >> 16; - val = ec & 0xFFFF; - return bsprintf(buf, "(%s, %u, %u)", kind, key, val); - - /* Generic format for unknown kinds of extended communities */ - default: - key = ec >> 32; - val = ec; - return bsprintf(buf, "(generic, 0x%x, 0x%x)", key, val); - } - -} - -int -ec_set_format(const struct adata *set, int from, byte *buf, uint size) -{ - u32 *z = int_set_get_data(set); - byte *end = buf + size - 64; - int from2 = MAX(from, 0); - int to = int_set_get_size(set); - int i; - - for (i = from2; i < to; i += 2) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, " ..."); - else - *buf = 0; - return i; - } - - if (i > from2) - *buf++ = ' '; - - buf += ec_format(buf, ec_get(z, i)); - } - *buf = 0; - return 0; -} - -int -lc_format(byte *buf, lcomm lc) -{ - return bsprintf(buf, "(%u, %u, %u)", lc.asn, lc.ldp1, lc.ldp2); -} - -int -lc_set_format(const struct adata *set, int from, byte *buf, uint bufsize) -{ - u32 *d = (u32 *) set->data; - byte *end = buf + bufsize - 64; - int from2 = MAX(from, 0); - int to = set->length / 4; - int i; - - for (i = from2; i < to; i += 3) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, "..."); - else - buf[-1] = 0; - return i; - } - - buf += bsprintf(buf, "(%u, %u, %u)", d[i], d[i+1], d[i+2]); - *buf++ = ' '; - } - - if (i != from2) - buf--; - - *buf = 0; - return 0; -} - -int -int_set_contains(const struct adata *list, u32 val) -{ - if (!list) - return 0; - - u32 *l = (u32 *) list->data; - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i++) - if (*l++ == val) - return 1; - - return 0; -} - -int -ec_set_contains(const struct adata *list, u64 val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - u32 eh = ec_hi(val); - u32 el = ec_lo(val); - int i; - - for (i=0; i < len; i += 2) - if (l[i] == eh && l[i+1] == el) - return 1; - - return 0; -} - -int -lc_set_contains(const struct adata *list, lcomm val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i += 3) - if (lc_match(l, i, val)) - return 1; - - return 0; -} - -const struct adata * -int_set_prepend(struct linpool *pool, const struct adata *list, u32 val) -{ - struct adata *res; - int len; - - if (int_set_contains(list, val)) - return list; - - len = list ? list->length : 0; - res = lp_alloc(pool, sizeof(struct adata) + len + 4); - res->length = len + 4; - - if (list) - memcpy(res->data + 4, list->data, list->length); - - * (u32 *) res->data = val; - - return res; -} - -const struct adata * -int_set_add(struct linpool *pool, const struct adata *list, u32 val) -{ - struct adata *res; - int len; - - if (int_set_contains(list, val)) - return list; - - len = list ? list->length : 0; - res = lp_alloc(pool, sizeof(struct adata) + len + 4); - res->length = len + 4; - - if (list) - memcpy(res->data, list->data, list->length); - - * (u32 *) (res->data + len) = val; - - return res; -} - -const struct adata * -ec_set_add(struct linpool *pool, const struct adata *list, u64 val) -{ - if (ec_set_contains(list, val)) - return list; - - int olen = list ? list->length : 0; - struct adata *res = lp_alloc(pool, sizeof(struct adata) + olen + 8); - res->length = olen + 8; - - if (list) - memcpy(res->data, list->data, list->length); - - u32 *l = (u32 *) (res->data + olen); - l[0] = ec_hi(val); - l[1] = ec_lo(val); - - return res; -} - -const struct adata * -lc_set_add(struct linpool *pool, const struct adata *list, lcomm val) -{ - if (lc_set_contains(list, val)) - return list; - - int olen = list ? list->length : 0; - struct adata *res = lp_alloc(pool, sizeof(struct adata) + olen + LCOMM_LENGTH); - res->length = olen + LCOMM_LENGTH; - - if (list) - memcpy(res->data, list->data, list->length); - - lc_put((u32 *) (res->data + olen), val); - - return res; -} - -const struct adata * -int_set_del(struct linpool *pool, const struct adata *list, u32 val) -{ - if (!int_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - 4); - res->length = list->length - 4; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i++) - if (l[i] != val) - *k++ = l[i]; - - return res; -} - -const struct adata * -ec_set_del(struct linpool *pool, const struct adata *list, u64 val) -{ - if (!ec_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - 8); - res->length = list->length - 8; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - u32 eh = ec_hi(val); - u32 el = ec_lo(val); - int i; - - for (i=0; i < len; i += 2) - if (! (l[i] == eh && l[i+1] == el)) - { - *k++ = l[i]; - *k++ = l[i+1]; - } - - return res; -} - -const struct adata * -lc_set_del(struct linpool *pool, const struct adata *list, lcomm val) -{ - if (!lc_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - LCOMM_LENGTH); - res->length = list->length - LCOMM_LENGTH; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - int i; - - for (i=0; i < len; i += 3) - if (! lc_match(l, i, val)) - k = lc_copy(k, l+i); - - return res; -} - -const struct adata * -int_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i++) - if (!int_set_contains(l1, l[i])) - *k++ = l[i]; - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - -const struct adata * -ec_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i += 2) - if (!ec_set_contains(l1, ec_get(l, i))) - { - *k++ = l[i]; - *k++ = l[i+1]; - } - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - -const struct adata * -lc_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i += 3) - if (!lc_set_contains(l1, lc_get(l, i))) - k = lc_copy(k, l+i); - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - - -struct adata * -ec_set_del_nontrans(struct linpool *pool, const struct adata *set) -{ - adata *res = lp_alloc_adata(pool, set->length); - u32 *src = int_set_get_data(set); - u32 *dst = int_set_get_data(res); - int len = int_set_get_size(set); - int i; - - /* Remove non-transitive communities (EC_TBIT set) */ - for (i = 0; i < len; i += 2) - { - if (src[i] & EC_TBIT) - continue; - - *dst++ = src[i]; - *dst++ = src[i+1]; - } - - res->length = ((byte *) dst) - res->data; - - return res; -} - -static int -int_set_cmp(const void *X, const void *Y) -{ - const u32 *x = X, *y = Y; - return (*x < *y) ? -1 : (*x > *y) ? 1 : 0; -} - -struct adata * -int_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / 4, 4, int_set_cmp); - return dst; -} - - -static int -ec_set_cmp(const void *X, const void *Y) -{ - u64 x = ec_get(X, 0); - u64 y = ec_get(Y, 0); - return (x < y) ? -1 : (x > y) ? 1 : 0; -} - -struct adata * -ec_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / 8, 8, ec_set_cmp); - return dst; -} - -void -ec_set_sort_x(struct adata *set) -{ - /* Sort in place */ - qsort(set->data, set->length / 8, 8, ec_set_cmp); -} - - -static int -lc_set_cmp(const void *X, const void *Y) -{ - const u32 *x = X, *y = Y; - if (x[0] != y[0]) - return (x[0] > y[0]) ? 1 : -1; - if (x[1] != y[1]) - return (x[1] > y[1]) ? 1 : -1; - if (x[2] != y[2]) - return (x[2] > y[2]) ? 1 : -1; - return 0; -} - -struct adata * -lc_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); - return dst; -} diff --git a/nest/a-set_test.c b/nest/a-set_test.c deleted file mode 100644 index 96b6a727..00000000 --- a/nest/a-set_test.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * BIRD -- Set/Community-list Operations Tests - * - * (c) 2015 CZ.NIC z.s.p.o. - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "test/birdtest.h" -#include "test/bt-utils.h" - -#include "lib/net.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" - -#define SET_SIZE 10 -static const struct adata *set_sequence; /* <0; SET_SIZE) */ -static const struct adata *set_sequence_same; /* <0; SET_SIZE) */ -static const struct adata *set_sequence_higher; /* <SET_SIZE; 2*SET_SIZE) */ -static const struct adata *set_random; - -#define BUFFER_SIZE 1000 -static byte buf[BUFFER_SIZE] = {}; - -#define SET_SIZE_FOR_FORMAT_OUTPUT 10 - -struct linpool *lp; - -enum set_type -{ - SET_TYPE_INT, - SET_TYPE_EC -}; - -static void -generate_set_sequence(enum set_type type, int len) -{ - struct adata empty_as_path = {}; - set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); - - int i; - for (i = 0; i < len; i++) - { - if (type == SET_TYPE_INT) - { - set_sequence = int_set_add(lp, set_sequence, i); - set_sequence_same = int_set_add(lp, set_sequence_same, i); - set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = int_set_add(lp, set_random, bt_random()); - } - else if (type == SET_TYPE_EC) - { - set_sequence = ec_set_add(lp, set_sequence, i); - set_sequence_same = ec_set_add(lp, set_sequence_same, i); - set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); - } - else - bt_abort_msg("This should be unreachable"); - } -} - -/* - * SET INT TESTS - */ - -static int -t_set_int_contains(void) -{ - int i; - - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - bt_assert(int_set_get_size(set_sequence) == SET_SIZE); - - for (i = 0; i < SET_SIZE; i++) - bt_assert(int_set_contains(set_sequence, i)); - bt_assert(int_set_contains(set_sequence, -1) == 0); - bt_assert(int_set_contains(set_sequence, SET_SIZE) == 0); - - int *data = int_set_get_data(set_sequence); - for (i = 0; i < SET_SIZE; i++) - bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); - - rfree(lp); - return 1; -} - -static int -t_set_int_union(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - const struct adata *set_union; - set_union = int_set_union(lp, set_sequence, set_sequence_same); - bt_assert(int_set_get_size(set_union) == SET_SIZE); - bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - - set_union = int_set_union(lp, set_sequence, set_sequence_higher); - bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); - bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - - rfree(lp); - return 1; -} - -static int -t_set_int_format(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE_FOR_FORMAT_OUTPUT); - - bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "0.0.0.0 0.0.0.1 0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); - - bzero(buf, BUFFER_SIZE); - bt_assert(int_set_format(set_sequence, 0, 2, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); - - bzero(buf, BUFFER_SIZE); - bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); - - rfree(lp); - return 1; -} - -static int -t_set_int_delete(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - const struct adata *deleting_sequence = set_sequence; - u32 i; - for (i = 0; i < SET_SIZE; i++) - { - deleting_sequence = int_set_del(lp, deleting_sequence, i); - bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), - "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", - int_set_get_size(deleting_sequence), - SET_SIZE-1-i); - } - - bt_assert(int_set_get_size(set_sequence) == SET_SIZE); - - return 1; -} - -/* - * SET EC TESTS - */ - -static int -t_set_ec_contains(void) -{ - u32 i; - - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); - - for (i = 0; i < SET_SIZE; i++) - bt_assert(ec_set_contains(set_sequence, i)); - bt_assert(ec_set_contains(set_sequence, -1) == 0); - bt_assert(ec_set_contains(set_sequence, SET_SIZE) == 0); - -// int *data = ec_set_get_data(set_sequence); -// for (i = 0; i < SET_SIZE; i++) -// bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); - - rfree(lp); - return 1; -} - -static int -t_set_ec_union(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - const struct adata *set_union; - set_union = ec_set_union(lp, set_sequence, set_sequence_same); - bt_assert(ec_set_get_size(set_union) == SET_SIZE); - bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - - set_union = ec_set_union(lp, set_sequence, set_sequence_higher); - bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); - bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - - rfree(lp); - return 1; -} - -static int -t_set_ec_format(void) -{ - resource_init(); - - const struct adata empty_as_path = {}; - set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); - - u64 i = 0; - set_sequence = ec_set_add(lp, set_sequence, i); - for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) - set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); - - bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); - bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, - "ec_set_format() returns '%s'", buf); - - rfree(lp); - return 1; -} - -static int -t_set_ec_delete(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - const struct adata *deleting_sequence = set_sequence; - u32 i; - for (i = 0; i < SET_SIZE; i++) - { - deleting_sequence = ec_set_del(lp, deleting_sequence, i); - bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), - "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", - ec_set_get_size(deleting_sequence), SET_SIZE-1-i); - } - - bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); - - return 1; -} - -int -main(int argc, char *argv[]) -{ - bt_init(argc, argv); - - bt_test_suite(t_set_int_contains, "Testing sets of integers: contains, get_data"); - bt_test_suite(t_set_int_format, "Testing sets of integers: format"); - bt_test_suite(t_set_int_union, "Testing sets of integers: union"); - bt_test_suite(t_set_int_delete, "Testing sets of integers: delete"); - - bt_test_suite(t_set_ec_contains, "Testing sets of Extended Community values: contains, get_data"); - bt_test_suite(t_set_ec_format, "Testing sets of Extended Community values: format"); - bt_test_suite(t_set_ec_union, "Testing sets of Extended Community values: union"); - bt_test_suite(t_set_ec_delete, "Testing sets of Extended Community values: delete"); - - return bt_exit_value(); -} diff --git a/nest/attrs.h b/nest/attrs.h deleted file mode 100644 index 50da817b..00000000 --- a/nest/attrs.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Attribute Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ATTRS_H_ -#define _BIRD_ATTRS_H_ - -#include <stdint.h> -#include "lib/unaligned.h" -#include "nest/route.h" - - -/* a-path.c */ - -#define AS_PATH_SET 1 /* Types of path segments */ -#define AS_PATH_SEQUENCE 2 -#define AS_PATH_CONFED_SEQUENCE 3 -#define AS_PATH_CONFED_SET 4 - -#define AS_PATH_MAXLEN 10000 - -#define AS_TRANS 23456 -/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF - * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details - */ - -struct f_tree; - -int as_path_valid(byte *data, uint len, int bs, int sets, int confed, char *err, uint elen); -int as_path_16to32(byte *dst, const byte *src, uint len); -int as_path_32to16(byte *dst, const byte *src, uint len); -int as_path_contains_as4(const struct adata *path); -int as_path_contains_confed(const struct adata *path); -struct adata *as_path_strip_confed(struct linpool *pool, const struct adata *op); -struct adata *as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as); -struct adata *as_path_to_old(struct linpool *pool, const struct adata *path); -struct adata *as_path_cut(struct linpool *pool, const struct adata *path, uint num); -const struct adata *as_path_merge(struct linpool *pool, const struct adata *p1, const struct adata *p2); -void as_path_format(const struct adata *path, byte *buf, uint size); -int as_path_getlen(const struct adata *path); -int as_path_getlen_int(const struct adata *path, int bs); -int as_path_get_first(const struct adata *path, u32 *orig_as); -int as_path_get_first_regular(const struct adata *path, u32 *last_as); -int as_path_get_last(const struct adata *path, u32 *last_as); -u32 as_path_get_last_nonaggregated(const struct adata *path); -int as_path_contains(const struct adata *path, u32 as, int min); -int as_path_match_set(const struct adata *path, const struct f_tree *set); -const struct adata *as_path_filter(struct linpool *pool, const struct adata *path, const struct f_tree *set, u32 key, int pos); - -static inline struct adata *as_path_prepend(struct linpool *pool, const struct adata *path, u32 as) -{ return as_path_prepend2(pool, path, AS_PATH_SEQUENCE, as); } - - -#define PM_ASN 0 -#define PM_QUESTION 1 -#define PM_ASTERISK 2 -#define PM_ASN_EXPR 3 -#define PM_ASN_RANGE 4 -#define PM_ASN_SET 5 -#define PM_LOOP 6 - -struct f_path_mask_item { - union { - u32 asn; /* PM_ASN */ - const struct f_line *expr; /* PM_ASN_EXPR */ - const struct f_tree *set; /* PM_ASN_SET */ - struct { /* PM_ASN_RANGE */ - u32 from; - u32 to; - }; - }; - int kind; -}; - -struct f_path_mask { - uint len; - struct f_path_mask_item item[0]; -}; - -int as_path_match(const struct adata *path, const struct f_path_mask *mask); - - -/* Counterparts to appropriate as_path_* functions */ - -static inline int -aggregator_16to32(byte *dst, const byte *src) -{ - put_u32(dst, get_u16(src)); - memcpy(dst+4, src+2, 4); - return 8; -} - -static inline int -aggregator_32to16(byte *dst, const byte *src) -{ - put_u16(dst, get_u32(src)); - memcpy(dst+2, src+4, 4); - return 6; -} - -static inline int -aggregator_contains_as4(const struct adata *a) -{ - return get_u32(a->data) > 0xFFFF; -} - -static inline struct adata * -aggregator_to_old(struct linpool *pool, const struct adata *a) -{ - struct adata *d = lp_alloc_adata(pool, 8); - put_u32(d->data, AS_TRANS); - memcpy(d->data + 4, a->data + 4, 4); - return d; -} - - -/* a-set.c */ - - -/* Extended Community subtypes (kinds) */ -enum ec_subtype { - EC_RT = 0x0002, - EC_RO = 0x0003, - EC_GENERIC = 0xFFFF, -}; - -static inline const char *ec_subtype_str(const enum ec_subtype ecs) { - switch (ecs) { - case EC_RT: return "rt"; - case EC_RO: return "ro"; - default: return NULL; - } -} - -/* Transitive bit (for first u32 half of EC) */ -#define EC_TBIT 0x40000000 - -#define ECOMM_LENGTH 8 - -static inline int int_set_get_size(const struct adata *list) -{ return list->length / 4; } - -static inline int ec_set_get_size(const struct adata *list) -{ return list->length / 8; } - -static inline int lc_set_get_size(const struct adata *list) -{ return list->length / 12; } - -static inline u32 *int_set_get_data(const struct adata *list) -{ return (u32 *) list->data; } - -static inline u32 ec_hi(u64 ec) { return ec >> 32; } -static inline u32 ec_lo(u64 ec) { return ec; } -static inline u64 ec_get(const u32 *l, int i) -{ return (((u64) l[i]) << 32) | l[i+1]; } - -/* RFC 4360 3.1. Two-Octet AS Specific Extended Community */ -static inline u64 ec_as2(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0000) << 48) | (key << 32) | val; } - -/* RFC 5668 4-Octet AS Specific BGP Extended Community */ -static inline u64 ec_as4(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0200) << 48) | (key << 16) | val; } - -/* RFC 4360 3.2. IPv4 Address Specific Extended Community */ -static inline u64 ec_ip4(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0100) << 48) | (key << 16) | val; } - -static inline u64 ec_generic(u64 key, u64 val) -{ return (key << 32) | val; } - -/* Large community value */ -typedef struct lcomm { - u32 asn; - u32 ldp1; - u32 ldp2; -} lcomm; - -#define LCOMM_LENGTH 12 - -static inline lcomm lc_get(const u32 *l, int i) -{ return (lcomm) { l[i], l[i+1], l[i+2] }; } - -static inline void lc_put(u32 *l, lcomm v) -{ l[0] = v.asn; l[1] = v.ldp1; l[2] = v.ldp2; } - -static inline int lc_match(const u32 *l, int i, lcomm v) -{ return (l[i] == v.asn && l[i+1] == v.ldp1 && l[i+2] == v.ldp2); } - -static inline u32 *lc_copy(u32 *dst, const u32 *src) -{ memcpy(dst, src, LCOMM_LENGTH); return dst + 3; } - - -int int_set_format(const struct adata *set, int way, int from, byte *buf, uint size); -int ec_format(byte *buf, u64 ec); -int ec_set_format(const struct adata *set, int from, byte *buf, uint size); -int lc_format(byte *buf, lcomm lc); -int lc_set_format(const struct adata *set, int from, byte *buf, uint size); -int int_set_contains(const struct adata *list, u32 val); -int ec_set_contains(const struct adata *list, u64 val); -int lc_set_contains(const struct adata *list, lcomm val); -const struct adata *int_set_prepend(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *int_set_add(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *ec_set_add(struct linpool *pool, const struct adata *list, u64 val); -const struct adata *lc_set_add(struct linpool *pool, const struct adata *list, lcomm val); -const struct adata *int_set_del(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *ec_set_del(struct linpool *pool, const struct adata *list, u64 val); -const struct adata *lc_set_del(struct linpool *pool, const struct adata *list, lcomm val); -const struct adata *int_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); -const struct adata *ec_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); -const struct adata *lc_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); - -struct adata *ec_set_del_nontrans(struct linpool *pool, const struct adata *set); -struct adata *int_set_sort(struct linpool *pool, const struct adata *src); -struct adata *ec_set_sort(struct linpool *pool, const struct adata *src); -struct adata *lc_set_sort(struct linpool *pool, const struct adata *src); - -void ec_set_sort_x(struct adata *set); /* Sort in place */ - -#endif diff --git a/nest/bird.h b/nest/bird.h index 55712abe..931974a0 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -9,7 +9,6 @@ #ifndef _BIRD_BIRD_H_ #define _BIRD_BIRD_H_ -#include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" #include "lib/net.h" diff --git a/nest/cmds.c b/nest/cmds.c index 18f39eb5..092be48a 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -8,7 +8,7 @@ #include "nest/bird.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "nest/cmds.h" @@ -51,47 +51,80 @@ cmd_show_symbols(struct sym_show_data *sd) cli_msg(1010, "%-8s\t%s", sd->sym->name, cf_symbol_class_name(sd->sym)); else { - HASH_WALK(config->sym_hash, next, sym) - { - if (!sym->scope->active) - continue; + for (const struct sym_scope *scope = config->root_scope; scope; scope = scope->next) + HASH_WALK(scope->hash, next, sym) + { + if (!sym->scope->active) + continue; - if (sd->type && (sym->class != sd->type)) - continue; + if (sd->type && (sym->class != sd->type)) + continue; - cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); - } - HASH_WALK_END; + cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); + } + HASH_WALK_END; cli_msg(0, ""); } } -static void -print_size(char *dsc, size_t val) +#define SIZE_SUFFIX " kMGT" +#define SIZE_FORMAT "% 4u.%1u % 1cB" +#define SIZE_ARGS(a) (a).val, (a).decimal, SIZE_SUFFIX[(a).magnitude] + +struct size_args { + u64 val:48; + u64 decimal:8; + u64 magnitude:8; +}; + +static struct size_args +get_size_args(u64 val) { - char *px = " kMG"; - int i = 0; - while ((val >= 10000) && (i < 3)) +#define VALDEC 10 /* One decimal place */ + val *= VALDEC; + + uint i = 0; + while ((val >= 10000 * VALDEC) && (i < 4)) { val = (val + 512) / 1024; i++; } - cli_msg(-1018, "%-17s %4u %cB", dsc, (unsigned) val, px[i]); + return (struct size_args) { + .val = (val / VALDEC), + .decimal = (val % VALDEC), + .magnitude = i, + }; +} + +static void +print_size(char *dsc, struct resmem vals) +{ + struct size_args effective = get_size_args(vals.effective); + struct size_args overhead = get_size_args(vals.overhead); + + cli_msg(-1018, "%-17s " SIZE_FORMAT " " SIZE_FORMAT, dsc, SIZE_ARGS(effective), SIZE_ARGS(overhead)); } extern pool *rt_table_pool; extern pool *rta_pool; +extern uint *pages_kept; void cmd_show_memory(void) { cli_msg(-1018, "BIRD memory usage"); + cli_msg(-1018, "%-17s Effective Overhead", ""); print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); print_size("Protocols:", rmemsize(proto_pool)); - print_size("Total:", rmemsize(&root_pool)); + struct resmem total = rmemsize(&root_pool); +#ifdef HAVE_MMAP + print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept }); + total.overhead += page_size * *pages_kept; +#endif + print_size("Total:", total); cli_msg(0, ""); } @@ -101,7 +134,7 @@ cmd_eval(const struct f_line *expr) buffer buf; LOG_BUFFER_INIT(buf); - if (f_eval_buf(expr, this_cli->parser_pool, &buf) > F_RETURN) + if (f_eval_buf(expr, &buf) > F_RETURN) { cli_msg(8008, "runtime error"); return; diff --git a/nest/config.Y b/nest/config.Y index ef3e27c0..7c68a09a 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -17,6 +17,7 @@ CF_HDR CF_DEFINES +static struct rtable_config *this_table; static struct proto_config *this_proto; static struct channel_config *this_channel; static struct iface_patt *this_ipatt; @@ -111,19 +112,20 @@ proto_postconfig(void) CF_DECLS -CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) +CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT, PIPE) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) -CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) -CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) +CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -131,7 +133,7 @@ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, CF_ENUM(T_ENUM_RTS, RTS_, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) +CF_ENUM(T_ENUM_RTD, RTD_, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) @@ -141,7 +143,7 @@ CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) %type <s> optproto %type <ra> r_args %type <sd> sym_args -%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type table_sorted tos password_algorithm +%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type tos password_algorithm %type <ps> proto_patt proto_patt2 %type <cc> channel_start proto_channel %type <cl> limit_spec @@ -206,16 +208,37 @@ CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, IP conf: table ; +table: table_start table_sorted table_opt_list ; + +table_start: net_type TABLE symbol { + this_table = rt_new_table($3, $1); + } + ; + table_sorted: - { $$ = 0; } - | SORTED { $$ = 1; } + /* empty */ + | SORTED { this_table->sorted = 1; } ; -table: net_type TABLE symbol table_sorted { - struct rtable_config *cf; - cf = rt_new_table($3, $1); - cf->sorted = $4; +table_opt: + SORTED bool { this_table->sorted = $2; } + | TRIE bool { + if (!net_val_match(this_table->addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR)) + cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); + this_table->trie_used = $2; } + | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } + | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } + ; + +table_opts: + /* empty */ + | table_opts table_opt ';' + ; + +table_opt_list: + /* empty */ + | '{' table_opts '}' ; @@ -348,6 +371,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } + | DEBUG PIPE bool { new_config->pipe_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -615,20 +639,28 @@ r_args: $$ = cfg_allocz(sizeof(struct rt_show_data)); init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; - $$->running_on_config = new_config->fallback; + $$->running_on_config = config; } | r_args net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; + $$->addr_mode = RSD_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); - $$->show_for = 1; $$->addr = $3; + $$->addr_mode = RSD_ADDR_FOR; + } + | r_args IN net_any { + $$ = $1; + if ($$->addr) cf_error("Only one prefix expected"); + if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); + $$->addr = $3; + $$->addr_mode = RSD_ADDR_IN; } - | r_args TABLE CF_SYM_KNOWN { +| r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); @@ -673,7 +705,7 @@ r_args: $$ = $1; $$->filtered = 1; } - | r_args export_mode CF_SYM_KNOWN { + | r_args export_mode symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -690,7 +722,7 @@ r_args: $$->export_channel = $3; $$->tables_defined_by = RSD_TDB_INDIRECT; } - | r_args PROTOCOL CF_SYM_KNOWN { + | r_args PROTOCOL symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -819,8 +851,10 @@ CF_CLI(DUMP NEIGHBORS,,, [[Dump neighbor cache]]) { neigh_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ATTRIBUTES,,, [[Dump attribute cache]]) { rta_dump_all(); cli_msg(0, ""); } ; -CF_CLI(DUMP ROUTES,,, [[Dump routing table]]) +CF_CLI(DUMP ROUTES,,, [[Dump routes]]) { rt_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP TABLES,,, [[Dump table connections]]) +{ rt_dump_hooks_all(); cli_msg(0, ""); } ; CF_CLI(DUMP PROTOCOLS,,, [[Dump protocol information]]) { protos_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) @@ -890,9 +924,6 @@ proto_patt2: | TEXT { $$.ptr = $1; $$.patt = 1; } ; -dynamic_attr: IGP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_GEN_IGP_METRIC); } ; - - CF_CODE CF_END diff --git a/nest/iface.c b/nest/iface.c index 83a633a3..682340c5 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -591,7 +591,7 @@ ifa_update(struct ifa *a) if (ipa_equal(b->brd, a->brd) && ipa_equal(b->opposite, a->opposite) && b->scope == a->scope && - !((b->flags ^ a->flags) & IA_PEER)) + !((b->flags ^ a->flags) & (IA_SECONDARY | IA_PEER | IA_HOST))) { b->flags |= IA_UPDATED; return b; diff --git a/nest/limit.h b/nest/limit.h new file mode 100644 index 00000000..5838ad3b --- /dev/null +++ b/nest/limit.h @@ -0,0 +1,49 @@ +/* + * BIRD Internet Routing Daemon -- Limits + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIMIT_H_ +#define _BIRD_LIMIT_H_ + +struct limit { + u32 max; + u32 count; + int (*action)(struct limit *, void *data); +}; + +static inline int limit_do_action(struct limit *l, void *data) +{ + return l->action ? l->action(l, data) : 1; +} + +static inline int limit_push(struct limit *l, void *data) +{ + if ((l->count >= l->max) && limit_do_action(l, data)) + return 1; + + l->count++; + return 0; +} + +static inline void limit_pop(struct limit *l) +{ + --l->count; +} + +static inline void limit_reset(struct limit *l) +{ + l->count = 0; +} + +static inline void limit_update(struct limit *l, void *data, u32 max) +{ + if (l->count > (l->max = max)) + limit_do_action(l, data); +} + +#endif diff --git a/nest/neighbor.c b/nest/neighbor.c index 1a31fb79..7cf9c85d 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -345,7 +345,7 @@ neigh_free(neighbor *n) { rem_node(&n->n); rem_node(&n->if_n); - sl_free(neigh_slab, n); + sl_free(n); } /** diff --git a/nest/proto.c b/nest/proto.c index 7cfb1555..77817888 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -16,17 +16,16 @@ #include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/iface.h" #include "nest/cli.h" #include "filter/filter.h" #include "filter/f-inst.h" pool *proto_pool; -list proto_list; +list STATIC_LIST_INIT(proto_list); -static list protocol_list; -struct protocol *class_to_protocol[PROTOCOL__MAX]; +static list STATIC_LIST_INIT(protocol_list); #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) @@ -43,8 +42,7 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; -static char *e_states[] = { "DOWN", "FEEDING", "READY" }; +static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; @@ -52,15 +50,17 @@ static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); -static void channel_verify_limits(struct channel *c); -static inline void channel_reset_limit(struct channel_limit *l); - +static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_reset_limit(struct channel *c, struct limit *l, int dir); +static void channel_feed_end(struct channel *c); +static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } static inline int channel_is_active(struct channel *c) -{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } +{ return (c->channel_state != CS_DOWN); } static inline int channel_reloadable(struct channel *c) { return c->proto->reload_routes && c->reloadable; } @@ -68,10 +68,46 @@ static inline int channel_reloadable(struct channel *c) static inline void channel_log_state_change(struct channel *c) { - if (c->export_state) - CD(c, "State changed to %s/%s", c_states[c->channel_state], e_states[c->export_state]); - else - CD(c, "State changed to %s", c_states[c->channel_state]); + CD(c, "State changed to %s", c_states[c->channel_state]); +} + +void +channel_import_log_state_change(struct rt_import_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + CD(c, "Channel import state changed to %s", rt_import_state_name(state)); +} + +void +channel_export_log_state_change(struct rt_export_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + CD(c, "Channel export state changed to %s", rt_export_state_name(state)); + + switch (state) + { + case TES_FEEDING: + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + break; + case TES_READY: + channel_feed_end(c); + break; + } +} + +static void +channel_dump_import_req(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + debug(" Channel %s.%s import request %p\n", c->proto->name, c->name, req); +} + +static void +channel_dump_export_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + debug(" Channel %s.%s export request %p\n", c->proto->name, c->name, req); } static void @@ -141,6 +177,15 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } +rte * channel_preimport(struct rt_import_request *req, rte *new, rte *old); + +void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_accepted(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_merged(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + /** * proto_add_channel - connect protocol to a routing table * @p: protocol instance @@ -165,12 +210,14 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; + rt_lock_table(c->table); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; @@ -181,7 +228,6 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; - c->export_state = ES_DOWN; c->last_state_change = current_time(); c->reloadable = 1; @@ -203,6 +249,7 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); + rt_unlock_table(c->table); rem_node(&c->n); mb_free(c); } @@ -223,7 +270,7 @@ proto_pause_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_START); + channel_set_state(c, CS_PAUSE); } static void @@ -232,7 +279,7 @@ proto_stop_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_FLUSHING); + channel_set_state(c, CS_STOP); } static void @@ -244,71 +291,6 @@ proto_remove_channels(struct proto *p) } static void -channel_schedule_feed(struct channel *c, int initial) -{ - // DBG("%s: Scheduling meal\n", p->name); - ASSERT(c->channel_state == CS_UP); - - c->export_state = ES_FEEDING; - c->refeeding = !initial; - - ev_schedule_work(c->feed_event); -} - -static void -channel_feed_loop(void *ptr) -{ - struct channel *c = ptr; - - if (c->export_state != ES_FEEDING) - return; - - /* Start feeding */ - if (!c->feed_active) - { - if (c->proto->feed_begin) - c->proto->feed_begin(c, !c->refeeding); - - c->refeed_pending = 0; - } - - // DBG("Feeding protocol %s continued\n", p->name); - if (!rt_feed_channel(c)) - { - ev_schedule_work(c->feed_event); - return; - } - - /* Reset export limit if the feed ended with acceptable number of exported routes */ - struct channel_limit *l = &c->out_limit; - if (c->refeeding && - (l->state == PLS_BLOCKED) && - (c->refeed_count <= l->limit) && - (c->stats.exp_routes <= l->limit)) - { - log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); - channel_reset_limit(&c->out_limit); - - /* Continue in feed - it will process routing table again from beginning */ - c->refeed_count = 0; - ev_schedule_work(c->feed_event); - return; - } - - // DBG("Feeding protocol %s finished\n", p->name); - c->export_state = ES_READY; - channel_log_state_change(c); - - if (c->proto->feed_end) - c->proto->feed_end(c); - - /* Restart feeding */ - if (c->refeed_pending) - channel_request_feeding(c); -} - - -static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; @@ -326,14 +308,12 @@ static void channel_roa_out_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = (c->export_state == ES_FEEDING); + CD(c, "Feeding triggered by RPKI change"); - CD(c, "Feeding triggered by RPKI change%s", active ? " - already active" : ""); + c->refeed_pending = 1; - if (!active) - channel_request_feeding(c); - else - c->refeed_pending = 1; + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); } /* Temporary code, subscriptions should be changed to resources */ @@ -409,14 +389,8 @@ channel_roa_subscribe_filter(struct channel *c, int dir) { switch (fi->fi_code) { - case FI_ROA_CHECK_IMPLICIT: - tab = fi->i_FI_ROA_CHECK_IMPLICIT.rtc->table; - if (valid) channel_roa_subscribe(c, tab, dir); - found = 1; - break; - - case FI_ROA_CHECK_EXPLICIT: - tab = fi->i_FI_ROA_CHECK_EXPLICIT.rtc->table; + case FI_ROA_CHECK: + tab = fi->i_FI_ROA_CHECK.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; @@ -445,32 +419,189 @@ channel_roa_unsubscribe_all(struct channel *c) } static void +channel_start_import(struct channel *c) +{ + if (c->in_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started import", c->proto->name, c->name); + return; + } + + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->in_req = (struct rt_import_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_import_req, + .log_state_change = channel_import_log_state_change, + .preimport = channel_preimport, + .rte_modify = c->proto->rte_modify, + }; + + ASSERT(c->channel_state == CS_UP); + + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); + + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + + DBG("%s.%s: Channel start import req=%p\n", c->proto->name, c->name, &c->in_req); + rt_request_import(c->table, &c->in_req); +} + +static void channel_start_export(struct channel *c) { + if (c->out_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); + return; + } + ASSERT(c->channel_state == CS_UP); - ASSERT(c->export_state == ES_DOWN); + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->out_req = (struct rt_export_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_export_req, + .log_state_change = channel_export_log_state_change, + }; + + bmap_init(&c->export_map, c->proto->pool, 1024); + bmap_init(&c->export_reject_map, c->proto->pool, 1024); + + channel_reset_limit(c, &c->out_limit, PLD_OUT); - channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); + + switch (c->ra_mode) { + case RA_OPTIMAL: + c->out_req.export_one = rt_notify_optimal; + break; + case RA_ANY: + c->out_req.export_one = rt_notify_any; + c->out_req.export_bulk = rt_feed_any; + break; + case RA_ACCEPTED: + c->out_req.export_bulk = rt_notify_accepted; + break; + case RA_MERGED: + c->out_req.export_bulk = rt_notify_merged; + break; + default: + bug("Unknown route announcement mode"); + } + + DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); + rt_request_export(c->table, &c->out_req); } static void -channel_stop_export(struct channel *c) +channel_check_stopped(struct channel *c) { - /* Need to abort feeding */ - if (c->export_state == ES_FEEDING) - rt_feed_channel_abort(c); + switch (c->channel_state) + { + case CS_STOP: + if (c->out_req.hook || c->in_req.hook) + return; + + channel_set_state(c, CS_DOWN); + ev_schedule(c->proto->event); + + break; + case CS_PAUSE: + if (c->out_req.hook) + return; + + channel_set_state(c, CS_START); + break; + default: + bug("Stopped channel in a bad state: %d", c->channel_state); + } + + DBG("%s.%s: Channel requests/hooks stopped (in state %s)\n", c->proto->name, c->name, c_states[c->channel_state]); +} + +void +channel_import_stopped(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + req->hook = NULL; + + if (c->in_table) + rt_prune_sync(c->in_table, 1); + + mb_free(c->in_req.name); + c->in_req.name = NULL; + + channel_check_stopped(c); +} + +static void +channel_export_stopped(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + /* The hook has already stopped */ + req->hook = NULL; + + if (c->refeed_pending) + { + c->refeeding = 1; + c->refeed_pending = 0; + rt_request_export(c->table, req); + return; + } + + /* Free the routes from out_table */ + if (c->out_table) + rt_prune_sync(c->out_table, 1); - c->export_state = ES_DOWN; - c->stats.exp_routes = 0; - bmap_reset(&c->export_map, 1024); + mb_free(c->out_req.name); + c->out_req.name = NULL; + + channel_check_stopped(c); } +static void +channel_feed_end(struct channel *c) +{ + struct rt_export_request *req = &c->out_req; + + /* Reset export limit if the feed ended with acceptable number of exported routes */ + struct limit *l = &c->out_limit; + if (c->refeeding && + (c->limit_active & (1 << PLD_OUT)) && + (c->refeed_count <= l->max) && + (l->count <= l->max)) + { + log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->max); + + c->refeed_pending = 1; + rt_stop_export(req, channel_export_stopped); + return; + } + + if (c->proto->feed_end) + c->proto->feed_end(c); + + if (c->refeed_pending) + rt_stop_export(req, channel_export_stopped); + else + c->refeeding = 0; +} /* Called by protocol for reload from in_table */ void channel_schedule_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); rt_reload_channel_abort(c); ev_schedule_work(c->reload_event); @@ -496,23 +627,6 @@ channel_reload_loop(void *ptr) channel_request_reload(c); } -static void -channel_reset_import(struct channel *c) -{ - /* Need to abort feeding */ - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); - - rt_prune_sync(c->in_table, 1); -} - -static void -channel_reset_export(struct channel *c) -{ - /* Just free the routes */ - rt_prune_sync(c->out_table, 1); -} - /* Called by protocol to activate in_table */ void channel_setup_in_table(struct channel *c) @@ -544,20 +658,11 @@ channel_setup_out_table(struct channel *c) static void channel_do_start(struct channel *c) { - rt_lock_table(c->table); - add_tail(&c->table->channels, &c->table_node); c->proto->active_channels++; - c->feed_event = ev_new_init(c->proto->pool, channel_feed_loop, c); - - bmap_init(&c->export_map, c->proto->pool, 1024); - memset(&c->stats, 0, sizeof(struct proto_stats)); - - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); - channel_reset_limit(&c->out_limit); - CALL(c->channel->start, c); + + channel_start_import(c); } static void @@ -572,9 +677,31 @@ channel_do_up(struct channel *c) } static void -channel_do_flush(struct channel *c) +channel_do_pause(struct channel *c) { - rt_schedule_prune(c->table); + /* Need to abort feeding */ + if (c->reload_event) + { + ev_postpone(c->reload_event); + rt_reload_channel_abort(c); + } + + /* Stop export */ + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); + + channel_roa_unsubscribe_all(c); + + bmap_free(&c->export_map); + bmap_free(&c->export_reject_map); +} + +static void +channel_do_stop(struct channel *c) +{ + /* Stop import */ + if (c->in_req.hook) + rt_stop_import(&c->in_req, channel_import_stopped); c->gr_wait = 0; if (c->gr_lock) @@ -583,28 +710,21 @@ channel_do_flush(struct channel *c) CALL(c->channel->shutdown, c); /* This have to be done in here, as channel pool is freed before channel_do_down() */ - bmap_free(&c->export_map); c->in_table = NULL; c->reload_event = NULL; c->out_table = NULL; - - channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->feed_active && !c->reload_active); + ASSERT(!c->reload_active); - rem_node(&c->table_node); - rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->stats.imp_routes + c->stats.filt_routes) != 0) - log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - // bmap_free(&c->export_map); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); c->in_table = NULL; c->reload_event = NULL; @@ -623,7 +743,6 @@ void channel_set_state(struct channel *c, uint state) { uint cs = c->channel_state; - uint es = c->export_state; DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); if (state == cs) @@ -635,20 +754,11 @@ channel_set_state(struct channel *c, uint state) switch (state) { case CS_START: - ASSERT(cs == CS_DOWN || cs == CS_UP); + ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); - if (es != ES_DOWN) - channel_stop_export(c); - - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); - - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); - break; case CS_UP: @@ -663,23 +773,24 @@ channel_set_state(struct channel *c, uint state) channel_do_up(c); break; - case CS_FLUSHING: - ASSERT(cs == CS_START || cs == CS_UP); + case CS_PAUSE: + ASSERT(cs == CS_UP); - if (es != ES_DOWN) - channel_stop_export(c); + if (cs == CS_UP) + channel_do_pause(c); + break; - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); + case CS_STOP: + ASSERT(cs == CS_UP || cs == CS_START || cs == CS_PAUSE); - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); + if (cs == CS_UP) + channel_do_pause(c); - channel_do_flush(c); + channel_do_stop(c); break; case CS_DOWN: - ASSERT(cs == CS_FLUSHING); + ASSERT(cs == CS_STOP); channel_do_down(c); break; @@ -704,35 +815,16 @@ channel_set_state(struct channel *c, uint state) void channel_request_feeding(struct channel *c) { - ASSERT(c->channel_state == CS_UP); - - CD(c, "Feeding requested"); - - /* Do nothing if we are still waiting for feeding */ - if (c->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (c->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (!c->feed_active) - return; - - rt_feed_channel_abort(c); - } + ASSERT(c->out_req.hook); - /* Track number of exported routes during refeed */ - c->refeed_count = 0; - - channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ - channel_log_state_change(c); + c->refeed_pending = 1; + rt_stop_export(&c->out_req, channel_export_stopped); } static void channel_request_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); ASSERT(channel_reloadable(c)); CD(c, "Reload requested"); @@ -743,8 +835,8 @@ channel_request_reload(struct channel *c) * Should this be done before reload_routes() hook? * Perhaps, but routes are updated asynchronously. */ - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -847,19 +939,19 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; c->debug = cf->debug; + c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; - channel_verify_limits(c); - /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; @@ -971,8 +1063,8 @@ proto_event(void *ptr) if (proto_is_done(p)) { - if (p->proto->cleanup) - p->proto->cleanup(p); + rfree(p->pool); + p->pool = NULL; p->active = 0; proto_log_state_change(p); @@ -1524,7 +1616,7 @@ graceful_restart_done(timer *t UNUSED) WALK_LIST(c, p->channels) { /* Resume postponed export of routes */ - if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) channel_start_export(c); /* Cleanup */ @@ -1614,7 +1706,11 @@ protos_dump_all(void) struct proto *p; WALK_LIST(p, proto_list) { - debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); +#define DPF(x) (p->x ? " " #x : "") + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", + p->name, p, p_states[p->proto_state], p->active_channels, + DPF(disabled), DPF(active), DPF(do_start), DPF(do_stop), DPF(reconfiguring)); +#undef DPF struct channel *c; WALK_LIST(c, p->channels) @@ -1624,6 +1720,9 @@ protos_dump_all(void) debug("\tInput filter: %s\n", filter_name(c->in_filter)); if (c->out_filter) debug("\tOutput filter: %s\n", filter_name(c->out_filter)); + debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], + c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", + c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -1643,14 +1742,13 @@ void proto_build(struct protocol *p) { add_tail(&protocol_list, &p->n); - ASSERT(p->class); - ASSERT(!class_to_protocol[p->class]); - class_to_protocol[p->class] = p; } /* FIXME: convert this call to some protocol hook */ extern void bfd_init_all(void); +void protos_build_gen(void); + /** * protos_build - build a protocol list * @@ -1663,44 +1761,7 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&proto_list); - init_list(&protocol_list); - - proto_build(&proto_device); -#ifdef CONFIG_RADV - proto_build(&proto_radv); -#endif -#ifdef CONFIG_RIP - proto_build(&proto_rip); -#endif -#ifdef CONFIG_STATIC - proto_build(&proto_static); -#endif -#ifdef CONFIG_MRT - proto_build(&proto_mrt); -#endif -#ifdef CONFIG_OSPF - proto_build(&proto_ospf); -#endif -#ifdef CONFIG_PIPE - proto_build(&proto_pipe); -#endif -#ifdef CONFIG_BGP - proto_build(&proto_bgp); -#endif -#ifdef CONFIG_BFD - proto_build(&proto_bfd); - bfd_init_all(); -#endif -#ifdef CONFIG_BABEL - proto_build(&proto_babel); -#endif -#ifdef CONFIG_RPKI - proto_build(&proto_rpki); -#endif -#ifdef CONFIG_PERF - proto_build(&proto_perf); -#endif + protos_build_gen(); proto_pool = rp_new(&root_pool, "Protocols"); proto_shutdown_timer = tm_new(proto_pool); @@ -1780,88 +1841,104 @@ proto_set_message(struct proto *p, char *msg, int len) } -static const char * -channel_limit_name(struct channel_limit *l) -{ - const char *actions[] = { - [PLA_WARN] = "warn", - [PLA_BLOCK] = "block", - [PLA_RESTART] = "restart", - [PLA_DISABLE] = "disable", - }; +static const char * channel_limit_name[] = { + [PLA_WARN] = "warn", + [PLA_BLOCK] = "block", + [PLA_RESTART] = "restart", + [PLA_DISABLE] = "disable", +}; - return actions[l->action]; -} -/** - * channel_notify_limit: notify about limit hit and take appropriate action - * @c: channel - * @l: limit being hit - * @dir: limit direction (PLD_*) - * @rt_count: the number of routes - * - * The function is called by the route processing core when limit @l - * is breached. It activates the limit and tooks appropriate action - * according to @l->action. - */ -void -channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) +static void +channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; - const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = c->proto; + log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", + c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); +} - if (l->state == PLS_BLOCKED) +static void +channel_activate_limit(struct channel *c, struct limit *l, int dir) +{ + if (c->limit_active & (1 << dir)) return; - /* For warning action, we want the log message every time we hit the limit */ - if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) - log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, channel_limit_name(l)); + c->limit_active |= (1 << dir); + channel_log_limit(c, l, dir); +} - switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; +static int +channel_limit_warn(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; + channel_log_limit(c, l, dir); - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + return 0; } -static void -channel_verify_limits(struct channel *c) +static int +channel_limit_block(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + return 1; +} + +static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; + +static int +channel_limit_down(struct limit *l, void *data) { - struct channel_limit *l; - u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + struct proto *p = c->proto; + int dir = cld->dir; - l = &c->rx_limit; - if (l->action && (all_routes > l->limit)) - channel_notify_limit(c, l, PLD_RX, all_routes); + channel_activate_limit(c, l, dir); - l = &c->in_limit; - if (l->action && (c->stats.imp_routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); + if (p->proto_state == PS_UP) + proto_schedule_down(p, c->limit_actions[dir] == PLA_RESTART, chl_dir_down[dir]); - l = &c->out_limit; - if (l->action && (c->stats.exp_routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); + return 1; } -static inline void -channel_reset_limit(struct channel_limit *l) +static int (*channel_limit_action[])(struct limit *, void *) = { + [PLA_NONE] = NULL, + [PLA_WARN] = channel_limit_warn, + [PLA_BLOCK] = channel_limit_block, + [PLA_RESTART] = channel_limit_down, + [PLA_DISABLE] = channel_limit_down, +}; + +static void +channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) { - if (l->action) - l->state = PLS_INITIAL; + l->action = channel_limit_action[cf->action]; + c->limit_actions[dir] = cf->action; + + struct channel_limit_data cld = { .c = c, .dir = dir }; + limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); +} + +static void +channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + channel_reset_limit(c, l, dir); + channel_update_limit(c, l, dir, cf); +} + +static void +channel_reset_limit(struct channel *c, struct limit *l, int dir) +{ + limit_reset(l); + c->limit_active &= ~(1 << dir); } static inline void @@ -1913,8 +1990,6 @@ proto_do_down(struct proto *p) { p->down_code = 0; neigh_prune(); - rfree(p->pool); - p->pool = NULL; /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) @@ -2009,38 +2084,58 @@ proto_state_name(struct proto *p) static void channel_show_stats(struct channel *c) { - struct proto_stats *s = &c->stats; + struct channel_import_stats *ch_is = &c->import_stats; + struct channel_export_stats *ch_es = &c->export_stats; + struct rt_import_stats *rt_is = c->in_req.hook ? &c->in_req.hook->stats : NULL; + struct rt_export_stats *rt_es = c->out_req.hook ? &c->out_req.hook->stats : NULL; + +#define SON(ie, item) ((ie) ? (ie)->item : 0) +#define SCI(item) SON(ch_is, item) +#define SCE(item) SON(ch_es, item) +#define SRI(item) SON(rt_is, item) +#define SRE(item) SON(rt_es, item) + + u32 rx_routes = c->rx_limit.count; + u32 in_routes = c->in_limit.count; + u32 out_routes = c->out_limit.count; if (c->in_keep_filtered) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); - - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s->imp_updates_received, s->imp_updates_invalid, - s->imp_updates_filtered, s->imp_updates_ignored, - s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s->imp_withdraws_received, s->imp_withdraws_invalid, - s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", - s->exp_updates_received, s->exp_updates_rejected, - s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", - s->exp_withdraws_received, s->exp_withdraws_accepted); + in_routes, out_routes, SRI(pref)); + + cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", + SCI(updates_received), SCI(updates_invalid), + SCI(updates_filtered), SRI(updates_ignored), + SCI(updates_limited_rx), SCI(updates_limited_in), + SRI(updates_accepted)); + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", + SCI(withdraws_received), SCI(withdraws_invalid), + SRI(withdraws_ignored), SRI(withdraws_accepted)); + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u %10u", + SRE(updates_received), SCE(updates_rejected), + SCE(updates_filtered), SCE(updates_limited), SCE(updates_accepted)); + cli_msg(-1006, " Export withdraws: %10u --- --- --- ---%10u", + SRE(withdraws_received), SCE(withdraws_accepted)); + +#undef SRI +#undef SRE +#undef SCI +#undef SCE +#undef SON } void -channel_show_limit(struct channel_limit *l, const char *dsc) +channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", channel_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void @@ -2048,6 +2143,8 @@ channel_show_info(struct channel *c) { cli_msg(-1006, " Channel %s", c->name); cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Import state: %s", rt_import_state_name(rt_import_get_state(c->in_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(c->out_req.hook))); cli_msg(-1006, " Table: %s", c->table->name); cli_msg(-1006, " Preference: %d", c->preference); cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); @@ -2058,9 +2155,9 @@ channel_show_info(struct channel *c) c->gr_lock ? " pending" : "", c->gr_wait ? " waiting" : ""); - channel_show_limit(&c->rx_limit, "Receive limit:"); - channel_show_limit(&c->in_limit, "Import limit:"); - channel_show_limit(&c->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); + channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); + channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); diff --git a/nest/protocol.h b/nest/protocol.h index 80b4509b..aeb60ac6 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,7 +12,8 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" -#include "nest/route.h" +#include "nest/rt.h" +#include "nest/limit.h" #include "conf/conf.h" struct iface; @@ -37,38 +38,20 @@ struct symbol; * Routing Protocol */ -enum protocol_class { - PROTOCOL_NONE, - PROTOCOL_BABEL, - PROTOCOL_BFD, - PROTOCOL_BGP, - PROTOCOL_DEVICE, - PROTOCOL_DIRECT, - PROTOCOL_KERNEL, - PROTOCOL_OSPF, - PROTOCOL_MRT, - PROTOCOL_PERF, - PROTOCOL_PIPE, - PROTOCOL_RADV, - PROTOCOL_RIP, - PROTOCOL_RPKI, - PROTOCOL_STATIC, - PROTOCOL__MAX -}; - -extern struct protocol *class_to_protocol[PROTOCOL__MAX]; struct protocol { node n; char *name; char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ - enum protocol_class class; /* Machine readable protocol class */ uint preference; /* Default protocol preference */ uint channel_mask; /* Mask of accepted channel types (NB_*) */ uint proto_size; /* Size of protocol data structure */ uint config_size; /* Size of protocol config data structure */ + uint eattr_begin; /* First ID of registered eattrs */ + uint eattr_end; /* End of eattr id zone */ + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ struct proto * (*init)(struct proto_config *); /* Create new instance */ @@ -76,16 +59,15 @@ struct protocol { void (*dump)(struct proto *); /* Debugging dump */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ - void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ - int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ +// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ }; -void protos_build(void); -void proto_build(struct protocol *); +void protos_build(void); /* Called from sysdep to initialize protocols */ +void proto_build(struct protocol *); /* Called from protocol to register itself */ void protos_preconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); struct proto * proto_spawn(struct proto_config *cf, uint disabled); @@ -132,31 +114,6 @@ struct proto_config { }; /* Protocol statistics */ -struct proto_stats { - /* Import - from protocol to core */ - u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes selected as best in the (adjacent) routing table */ - u32 imp_updates_received; /* Number of route updates received */ - u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ - u32 imp_updates_filtered; /* Number of route updates rejected by filters */ - u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ - u32 imp_updates_accepted; /* Number of route updates accepted and imported */ - u32 imp_withdraws_received; /* Number of route withdraws received */ - u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ - u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ - - /* Export - from core to protocol */ - u32 exp_routes; /* Number of routes successfully exported to the protocol */ - u32 exp_updates_received; /* Number of route updates received */ - u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ - u32 exp_updates_filtered; /* Number of route updates rejected by filters */ - u32 exp_updates_accepted; /* Number of route updates accepted and exported */ - u32 exp_withdraws_received; /* Number of route withdraws received */ - u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ -}; - struct proto { node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ @@ -235,7 +192,7 @@ struct proto { struct rte *(*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); - u32 (*rte_igp_metric)(struct rte *); + u32 (*rte_igp_metric)(const struct rte *); /* Hic sunt protocol-specific data */ }; @@ -275,7 +232,7 @@ void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_limit(struct limit *l, const char *dsc, int active, int action); void channel_show_info(struct channel *c); void channel_cmd_debug(struct channel *c, uint mask); @@ -430,19 +387,30 @@ extern struct proto_config *cf_dev_proto; #define PLA_RESTART 4 /* Force protocol restart */ #define PLA_DISABLE 5 /* Shutdown and disable protocol */ -#define PLS_INITIAL 0 /* Initial limit state after protocol start */ -#define PLS_ACTIVE 1 /* Limit was hit */ -#define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ - struct channel_limit { u32 limit; /* Maximum number of prefixes */ u8 action; /* Action to take (PLA_*) */ - u8 state; /* State of limit (PLS_*) */ }; -void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); +struct channel_limit_data { + struct channel *c; + int dir; +}; + +#define CLP__RX(_c) (&(_c)->rx_limit) +#define CLP__IN(_c) (&(_c)->in_limit) +#define CLP__OUT(_c) (&(_c)->out_limit) +#if 0 +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) log(L_TRACE "%s.%s: %s limit %s %u", (_c)->proto->name, (_c)->name, #_dir, _op, (CLP__##_dir(_c))->count) +#else +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) +#endif + +#define CHANNEL_LIMIT_PUSH(_c, _dir) ({ CHANNEL_LIMIT_LOG(_c, _dir, "push from"); struct channel_limit_data cld = { .c = (_c), .dir = PLD_##_dir }; limit_push(CLP__##_dir(_c), &cld); }) +#define CHANNEL_LIMIT_POP(_c, _dir) ({ limit_pop(CLP__##_dir(_c)); CHANNEL_LIMIT_LOG(_c, _dir, "pop to"); }) + /* * Channels */ @@ -484,6 +452,7 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol (relevant when in_keep_filtered is active) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ @@ -500,7 +469,6 @@ struct channel_config { struct channel { node n; /* Node in proto->channels */ - node table_node; /* Node in table->channels */ const char *name; /* Channel name (may be NULL) */ const struct channel_class *channel; @@ -509,14 +477,39 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ - struct bmap export_map; /* Keeps track which routes passed export filter */ - struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ - struct channel_limit in_limit; /* Input limit */ - struct channel_limit out_limit; /* Output limit */ - - struct event *feed_event; /* Event responsible for feeding */ - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct proto_stats stats; /* Per-channel protocol statistics */ + struct bmap export_map; /* Keeps track which routes were really exported */ + struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ + + struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit in_limit; /* Input limit */ + struct limit out_limit; /* Output limit */ + + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ + u8 limit_active; /* Flags for active limits */ + + struct channel_import_stats { + /* Import - from protocol to core */ + u32 updates_received; /* Number of route updates received */ + u32 updates_invalid; /* Number of route updates rejected as invalid */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_limited_rx; /* Number of route updates exceeding the rx_limit */ + u32 updates_limited_in; /* Number of route updates exceeding the in_limit */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_invalid; /* Number of route withdraws rejected as invalid */ + } import_stats; + + struct channel_export_stats { + /* Export - from core to protocol */ + u32 updates_rejected; /* Number of route updates rejected by protocol */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_accepted; /* Number of route updates accepted and exported */ + u32 updates_limited; /* Number of route updates exceeding the out_limit */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } export_stats; + + struct rt_import_request in_req; /* Table import connection */ + struct rt_export_request out_req; /* Table export connection */ + u32 refeed_count; /* Number of routes exported during refeed regardless of out_limit */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ @@ -529,10 +522,7 @@ struct channel { u8 stale; /* Used in reconfiguration */ u8 channel_state; - u8 export_state; /* Route export state (ES_*, see below) */ - u8 feed_active; - u8 flush_active; - u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 refeeding; /* Refeeding the channel. */ u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ @@ -580,34 +570,34 @@ struct channel { * restricted by that and is on volition of the protocol. Generally, channels * are opened in protocols' start() hooks when going to PS_UP. * - * CS_FLUSHING - The transitional state between initialized channel and closed + * CS_STOP - The transitional state between initialized channel and closed * channel. The channel is still initialized, but no route exchange is allowed. * Instead, the associated table is running flush loop to remove routes imported * through the channel. After that, the channel changes state to CS_DOWN and * is detached from the table (the table is unlocked and the channel is unlinked - * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * from it). Unlike other states, the CS_STOP state is not explicitly * entered or left by the protocol. A protocol may request to close a channel * (by calling channel_close()), which causes the channel to change state to - * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * CS_STOP and later to CS_DOWN. Also note that channels are closed * automatically by the core when the protocol is going down. * + * CS_PAUSE - Almost the same as CS_STOP, just the table import is kept and + * the table export is stopped before transitioning to CS_START. + * * Allowed transitions: * * CS_DOWN -> CS_START / CS_UP - * CS_START -> CS_UP / CS_FLUSHING - * CS_UP -> CS_START / CS_FLUSHING - * CS_FLUSHING -> CS_DOWN (automatic) + * CS_START -> CS_UP / CS_STOP + * CS_UP -> CS_PAUSE / CS_STOP + * CS_PAUSE -> CS_START (automatic) + * CS_STOP -> CS_DOWN (automatic) */ #define CS_DOWN 0 #define CS_START 1 #define CS_UP 2 -#define CS_FLUSHING 3 - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - +#define CS_STOP 3 +#define CS_PAUSE 4 struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) @@ -625,7 +615,7 @@ void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } -static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); diff --git a/nest/route.h b/nest/route.h deleted file mode 100644 index ade14857..00000000 --- a/nest/route.h +++ /dev/null @@ -1,727 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Routing Table - * - * (c) 1998--2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ROUTE_H_ -#define _BIRD_ROUTE_H_ - -#include "lib/lists.h" -#include "lib/bitmap.h" -#include "lib/resource.h" -#include "lib/net.h" - -struct ea_list; -struct protocol; -struct proto; -struct rte_src; -struct symbol; -struct timer; -struct filter; -struct cli; - -/* - * Generic data structure for storing network prefixes. Also used - * for the master routing table. Currently implemented as a hash - * table. - * - * Available operations: - * - insertion of new entry - * - deletion of entry - * - searching for entry by network prefix - * - asynchronous retrieval of fib contents - */ - -struct fib_node { - struct fib_node *next; /* Next in hash chain */ - struct fib_iterator *readers; /* List of readers of this node */ - net_addr addr[0]; -}; - -struct fib_iterator { /* See lib/slists.h for an explanation */ - struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ - byte efef; /* 0xff to distinguish between iterator and node */ - byte pad[3]; - struct fib_node *node; /* Or NULL if freshly merged */ - uint hash; -}; - -typedef void (*fib_init_fn)(void *); - -struct fib { - pool *fib_pool; /* Pool holding all our data */ - slab *fib_slab; /* Slab holding all fib nodes */ - struct fib_node **hash_table; /* Node hash table */ - uint hash_size; /* Number of hash table entries (a power of two) */ - uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 32 - hash_order */ - uint addr_type; /* Type of address data stored in fib (NET_*) */ - uint node_size; /* FIB node size, 0 for nonuniform */ - uint node_offset; /* Offset of fib_node struct inside of user data */ - uint entries; /* Number of entries */ - uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_fn init; /* Constructor */ -}; - -static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) -{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } - -static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) -{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } - -void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); -void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ -void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ -void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ -void fib_delete(struct fib *, void *); /* Remove fib entry */ -void fib_free(struct fib *); /* Destroy the fib */ -void fib_check(struct fib *); /* Consistency check for debugging */ - -void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ -struct fib_node *fit_get(struct fib *, struct fib_iterator *); -void fit_put(struct fib_iterator *, struct fib_node *); -void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -void fit_put_end(struct fib_iterator *i); -void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); - - -#define FIB_WALK(fib, type, z) do { \ - struct fib_node *fn_, **ff_ = (fib)->hash_table; \ - uint count_ = (fib)->hash_size; \ - type *z; \ - while (count_--) \ - for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) - -#define FIB_WALK_END } while (0) - -#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) - -#define FIB_ITERATE_START(fib, it, type, z) do { \ - struct fib_node *fn_ = fit_get(fib, it); \ - uint count_ = (fib)->hash_size; \ - uint hpos_ = (it)->hash; \ - type *z; \ - for(;;) { \ - if (!fn_) \ - { \ - if (++hpos_ >= count_) \ - break; \ - fn_ = (fib)->hash_table[hpos_]; \ - continue; \ - } \ - z = fib_node_to_user(fib, fn_); - -#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) - -#define FIB_ITERATE_PUT(it) fit_put(it, fn_) - -#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) - -#define FIB_ITERATE_PUT_END(it) fit_put_end(it) - -#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) - -#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) - - -/* - * Master Routing Tables. Generally speaking, each of them contains a FIB - * with each entry pointing to a list of route entries representing routes - * to given network (with the selected one at the head). - * - * Each of the RTE's contains variable data (the preference and protocol-dependent - * metrics) and a pointer to a route attribute block common for many routes). - * - * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. - */ - -struct rtable_config { - node n; - char *name; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ - pool *rp; /* Resource pool to allocate everything from, including itself */ - struct slab *rte_slab; /* Slab to allocate route objects */ - struct fib fib; - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int pipe_busy; /* Pipe loop detection */ - int use_count; /* Number of protocols using this table */ - u32 rt_count; /* Number of routes in the table */ - - byte internal; /* Internal table of a protocol */ - - struct hmap id_map; - struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ - struct config *deleted; /* Table doesn't exist in current configuration, - * delete as soon as use_count becomes 0 and remove - * obstacle from this routing table. - */ - struct event *rt_event; /* Routing table event */ - btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ - btime gc_time; /* Time of last GC */ - int gc_counter; /* Number of operations since last GC */ - byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte nhu_state; /* Next Hop Update state */ - struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ - struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ - - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ -} rtable; - -struct rt_subscription { - node n; - rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; -}; - -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - -typedef struct network { - struct rte_storage *routes; /* Available routes for this network */ - struct fib_node n; /* FIB flags reserved for kernel syncer */ -} net; - -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - struct rta *src; /* Source rta entry */ - byte dest; /* Chosen route destination type (RTD_...) */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - -typedef struct rte { - struct rta *attrs; /* Attributes of this route */ - const net_addr *net; /* Network this RTE belongs to */ - struct rte_src *src; /* Route source that created the route */ - struct channel *sender; /* Channel used to send the route to the routing table */ - btime lastmod; /* Last modified (set by table) */ - u32 id; /* Table specific route id */ - byte flags; /* Table-specific flags */ - byte pflags; /* Protocol-specific flags */ -} rte; - -struct rte_storage { - struct rte_storage *next; /* Next in chain */ - struct rte rte; /* Route data */ -}; - -#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) -#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) - -#define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ - -/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } - -/* Route just has REF_FILTERED flag */ -static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } - - -/* Types of route announcement, also used as flags */ -#define RA_UNDEF 0 /* Undefined RA type */ -#define RA_OPTIMAL 1 /* Announcement of optimal route change */ -#define RA_ACCEPTED 2 /* Announcement of first accepted route */ -#define RA_ANY 3 /* Announcement of any route change */ -#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ - -/* Return value of preexport() callback */ -#define RIC_ACCEPT 1 /* Accepted by protocol */ -#define RIC_PROCESS 0 /* Process it through import filter */ -#define RIC_REJECT -1 /* Rejected by protocol */ -#define RIC_DROP -2 /* Silently dropped by protocol */ - -/** - * rte_update - enter a new update to a routing table - * @c: channel doing the update - * @net: network address - * @rte: a &rte representing the new route - * @src: old route source identifier - * - * This function imports a new route to the appropriate table (via the channel). - * Table keys are @net (obligatory) and @rte->attrs->src. - * Both the @net and @rte pointers can be local. - * - * The route attributes (@rte->attrs) are obligatory. They can be also allocated - * locally. Anyway, if you use an already-cached attribute object, you shall - * call rta_clone() on that object yourself. (This semantics may change in future.) - * - * If the route attributes are local, you may set @rte->attrs->src to NULL, then - * the protocol's default route source will be supplied. - * - * When rte_update() gets a route, it automatically validates it. This includes - * checking for validity of the given network and next hop addresses and also - * checking for host-scope or link-scope routes. Then the import filters are - * processed and if accepted, the route is passed to route table recalculation. - * - * The accepted routes are then inserted into the table, replacing the old route - * for the same @net identified by @src. Then the route is announced - * to all the channels connected to the table using the standard export mechanism. - * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same - * as @src. - * - * All memory used for temporary allocations is taken from a special linpool - * @rte_update_pool and freed when rte_update() finishes. - */ -void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); - -extern list routing_tables; -struct config; - -void rt_init(void); -void rt_preconfig(struct config *); -void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); -rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } - -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); -int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct channel *c); -void rt_refresh_end(rtable *t, struct channel *c); -void rt_modify_stale(rtable *t, struct channel *c); -void rt_schedule_prune(rtable *t); -void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *, rtable *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); -void rt_dump(rtable *); -void rt_dump_all(void); -int rt_feed_channel(struct channel *c); -void rt_feed_channel_abort(struct channel *c); -int rt_reload_channel(struct channel *c); -void rt_reload_channel_abort(struct channel *c); -void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old, struct rte_storage **old_exported, int refeed); -struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - - -/* Default limit for ECMP next hops, defined in sysdep code */ -extern const int rt_default_ecmp; - -struct rt_show_data_rtable { - node n; - rtable *table; - struct channel *export_channel; -}; - -struct rt_show_data { - net_addr *addr; - list tables; - struct rt_show_data_rtable *tab; /* Iterator over table list */ - struct rt_show_data_rtable *last_table; /* Last table in output */ - struct fib_iterator fit; /* Iterator over networks in table */ - int verbose, tables_defined_by; - const struct filter *filter; - struct proto *show_protocol; - struct proto *export_protocol; - struct channel *export_channel; - struct config *running_on_config; - struct krt_proto *kernel; - int export_mode, primary_only, filtered, stats, show_for; - - int table_open; /* Iteration (fit) is open */ - int net_counter, rt_counter, show_counter, table_counter; - int net_counter_last, rt_counter_last, show_counter_last; -}; - -void rt_show(struct rt_show_data *); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); - -/* Value of table definition mode in struct rt_show_data */ -#define RSD_TDB_DEFAULT 0 /* no table specified */ -#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ -#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ -#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ - -#define RSD_TDB_SET 0x1 /* internal: show empty tables */ -#define RSD_TDB_NMN 0x2 /* internal: need matching net */ - -/* Value of export_mode in struct rt_show_data */ -#define RSEM_NONE 0 /* Export mode not used */ -#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ -#define RSEM_EXPORT 2 /* Routes accepted by export filter */ -#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ -#define RSEM_EXPORTED 4 /* Routes marked in export map */ - -/* - * Route Attributes - * - * Beware: All standard BGP attributes must be represented here instead - * of making them local to the route. This is needed to ensure proper - * construction of BGP route attribute lists. - */ - -/* Nexthop structure */ -struct nexthop { - ip_addr gw; /* Next hop */ - struct iface *iface; /* Outgoing interface */ - struct nexthop *next; - byte flags; - byte weight; - byte labels_orig; /* Number of labels before hostentry was applied */ - byte labels; /* Number of all labels */ - u32 label[0]; -}; - -#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ - - -struct rte_src { - struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ - u32 private_id; /* Private ID, assigned by the protocol */ - u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ -}; - - -typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* Hash over important fields */ - struct ea_list *eattrs; /* Extended Attribute chain */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - ip_addr from; /* Advertising router */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u16 cached:1; /* Are attributes cached? */ - u16 source:7; /* Route source (RTS_...) */ - u16 scope:4; /* Route scope (SCOPE_... -- see ip.h) */ - u16 dest:4; /* Route destination type (RTD_...) */ - word pref; - struct nexthop nh; /* Next hop */ -} rta; - -#define RTS_STATIC 1 /* Normal static route */ -#define RTS_INHERIT 2 /* Route inherited from kernel */ -#define RTS_DEVICE 3 /* Device route */ -#define RTS_STATIC_DEVICE 4 /* Static device route */ -#define RTS_REDIRECT 5 /* Learned via redirect */ -#define RTS_RIP 6 /* RIP route */ -#define RTS_OSPF 7 /* OSPF route */ -#define RTS_OSPF_IA 8 /* OSPF inter-area route */ -#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ -#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ -#define RTS_BGP 11 /* BGP route */ -#define RTS_PIPE 12 /* Inter-table wormhole */ -#define RTS_BABEL 13 /* Babel route */ -#define RTS_RPKI 14 /* Route Origin Authorization */ -#define RTS_PERF 15 /* Perf checker */ -#define RTS_MAX 16 - -#define RTD_NONE 0 /* Undefined next hop */ -#define RTD_UNICAST 1 /* Next hop is neighbor router */ -#define RTD_BLACKHOLE 2 /* Silently drop packets */ -#define RTD_UNREACHABLE 3 /* Reject as unreachable */ -#define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MAX 5 - -#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other - protocol-specific metric is availabe */ - - -extern const char * rta_dest_names[RTD_MAX]; - -static inline const char *rta_dest_name(uint n) -{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } - -/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ -static inline int rte_is_reachable(rte *r) -{ return r->attrs->dest == RTD_UNICAST; } - - -/* - * Extended Route Attributes - */ - -typedef struct eattr { - word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ - byte flags; /* Protocol-dependent flags */ - byte type; /* Attribute type and several flags (EAF_...) */ - union { - uintptr_t data; - const struct adata *ptr; /* Attribute data elsewhere */ - } u; -} eattr; - - -#define EA_CODE(proto,id) (((proto) << 8) | (id)) -#define EA_ID(ea) ((ea) & 0xff) -#define EA_PROTO(ea) ((ea) >> 8) -#define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) -#define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) -#define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) - -const char *ea_custom_name(uint ea); - -#define EA_GEN_IGP_METRIC EA_CODE(PROTOCOL_NONE, 0) - -#define EA_CODE_MASK 0xffff -#define EA_CUSTOM_BIT 0x8000 -#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ -#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ -#define EA_BIT_GET(ea) ((ea) >> 24) - -#define EAF_TYPE_MASK 0x1f /* Mask with this to get type */ -#define EAF_TYPE_INT 0x01 /* 32-bit unsigned integer number */ -#define EAF_TYPE_OPAQUE 0x02 /* Opaque byte string (not filterable) */ -#define EAF_TYPE_IP_ADDRESS 0x04 /* IP address */ -#define EAF_TYPE_ROUTER_ID 0x05 /* Router ID (IPv4 address) */ -#define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ -#define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ -#define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ -#define EAF_TYPE_PTR 0x0d /* Pointer to an object */ -#define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ -#define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ -#define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ -#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ -#define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ -#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ - -typedef struct adata { - uint length; /* Length of data */ - byte data[0]; -} adata; - -extern const adata null_adata; /* adata of length 0 */ - -static inline struct adata * -lp_alloc_adata(struct linpool *pool, uint len) -{ - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; -} - -static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } - - -typedef struct ea_list { - struct ea_list *next; /* In case we have an override list */ - byte flags; /* Flags: EALF_... */ - byte rfu; - word count; /* Number of attributes */ - eattr attrs[0]; /* Attribute definitions themselves */ -} ea_list; - -#define EALF_SORTED 1 /* Attributes are sorted by code */ -#define EALF_BISECT 2 /* Use interval bisection for searching */ -#define EALF_CACHED 4 /* Attributes belonging to cached rta */ - -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); - -struct ea_walk_state { - ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ - eattr *ea; /* Current eattr, initially NULL */ - u32 visited[4]; /* Bitfield, limiting max to 128 */ -}; - -eattr *ea_find(ea_list *, unsigned ea); -eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); -uintptr_t ea_get_int(ea_list *, unsigned ea, uintptr_t def); -void ea_dump(ea_list *); -void ea_sort(ea_list *); /* Sort entries in all sub-lists */ -unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ -void ea_merge(ea_list *from, ea_list *to); /* Merge sub-lists to allocated buffer */ -int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ -uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ -ea_list *ea_append(ea_list *to, ea_list *what); -void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); - -#define ea_normalize(ea) do { \ - if (ea->next) { \ - ea_list *t = alloca(ea_scan(ea)); \ - ea_merge(ea, t); \ - ea = t; \ - } \ - ea_sort(ea); \ - if (ea->count == 0) \ - ea = NULL; \ -} while(0) \ - -static inline eattr * -ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) -{ - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - eattr *e = &a->attrs[0]; - - a->flags = EALF_SORTED; - a->count = 1; - a->next = *to; - *to = a; - - e->id = id; - e->type = type; - e->flags = flags; - - if (type & EAF_EMBEDDED) - e->u.data = (u32) val; - else - e->u.ptr = (struct adata *) val; - - return e; -} - -static inline void -ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - memcpy(a->data, data, len); - ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); -} - - -#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - -static inline size_t nexthop_size(const struct nexthop *nh) -{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } -int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ -static inline int nexthop_same(struct nexthop *x, struct nexthop *y) -{ return (x == y) || nexthop__same(x, y); } -struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); -struct nexthop *nexthop_sort(struct nexthop *x); -static inline void nexthop_link(struct rta *a, struct nexthop *from) -{ memcpy(&a->nh, from, nexthop_size(from)); } -void nexthop_insert(struct nexthop **n, struct nexthop *y); -int nexthop_is_sorted(struct nexthop *x); - -void rta_init(void); -static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } -#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) -rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ -static inline int rta_is_cached(rta *r) { return r->cached; } -static inline rta *rta_clone(rta *r) { r->uc++; return r; } -void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } -rta *rta_do_cow(rta *o, linpool *lp); -static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } -void rta_dump(rta *); -void rta_dump_all(void); -void rta_show(struct cli *, rta *); - -u32 rt_get_igp_metric(rte *); -struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); - -static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); -} - -/* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ - -static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } -static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } - -/* - * Default protocol preferences - */ - -#define DEF_PREF_DIRECT 240 /* Directly connected */ -#define DEF_PREF_STATIC 200 /* Static route */ -#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ -#define DEF_PREF_BABEL 130 /* Babel */ -#define DEF_PREF_RIP 120 /* RIP */ -#define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_RPKI 100 /* RPKI */ -#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - -/* - * Route Origin Authorization - */ - -#define ROA_UNKNOWN 0 -#define ROA_VALID 1 -#define ROA_INVALID 2 - -#endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 1bece201..f548a575 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -45,11 +45,11 @@ */ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" #include "lib/idm.h" @@ -57,9 +57,25 @@ #include "lib/string.h" #include <stddef.h> +#include <stdlib.h> const adata null_adata; /* adata of length 0 */ +struct ea_class ea_gen_igp_metric = { + .name = "igp_metric", + .type = T_INT, +}; + +struct ea_class ea_gen_preference = { + .name = "preference", + .type = T_INT, +}; + +struct ea_class ea_gen_from = { + .name = "from", + .type = T_IP, +}; + const char * const rta_src_names[RTS_MAX] = { [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", @@ -77,6 +93,71 @@ const char * const rta_src_names[RTS_MAX] = { [RTS_RPKI] = "RPKI", }; +static void +ea_gen_source_format(const eattr *a, byte *buf, uint size) +{ + if ((a->u.data >= RTS_MAX) || !rta_src_names[a->u.data]) + bsnprintf(buf, size, "unknown"); + else + bsnprintf(buf, size, "%s", rta_src_names[a->u.data]); +} + +struct ea_class ea_gen_source = { + .name = "source", + .type = T_ENUM_RTS, + .readonly = 1, + .format = ea_gen_source_format, +}; + +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, + .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, +}; + const char * rta_dest_names[RTD_MAX] = { [RTD_NONE] = "", [RTD_UNICAST] = "unicast", @@ -85,10 +166,21 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; +static slab *rta_slab; static slab *rte_src_slab; static struct idm src_ids; @@ -154,7 +246,7 @@ rt_prune_sources(void) { HASH_DO_REMOVE(src_hash, RSH, sp); idm_free(&src_ids, src->global_id); - sl_free(rte_src_slab, src); + sl_free(src); } } HASH_WALK_FILTER_END; @@ -167,50 +259,10 @@ rt_prune_sources(void) * Multipath Next Hop */ -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) - { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); - - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); - } - - return h; -} - -int -nexthop__same(struct nexthop *x, struct nexthop *y) -{ - for (; x && y; x = x->next, y = y->next) - { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; - - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } - - return x == y; -} - static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -235,23 +287,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -268,138 +313,225 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); - while ((x || y) && max--) + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; + + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); + + if (!xvalid && !yvalid) + break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); if (cmp < 0) { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); } else if (cmp > 0) { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); } else { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); } - n = &((*n)->next); + cur = NEXTHOP_NEXT(cur); } - *n = NULL; - return root; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -void -nexthop_insert(struct nexthop **n, struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - if (cmp < 0) - continue; - else if (cmp > 0) - break; - else - return; - } + if (cnt <= 1) + return nhad; - x->next = *n; - *n = x; -} + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); -struct nexthop * -nexthop_sort(struct nexthop *x) -{ - struct nexthop *s = NULL; + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); - /* Simple insert-sort */ - while (x) + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) +/* + * Extended Attributes + */ + +#define EA_CLASS_INITIAL_MAX 128 +static struct ea_class **ea_class_global = NULL; +static uint ea_class_max; +static struct idm ea_class_idm; + +/* Config parser lex register function */ +void ea_lex_register(struct ea_class *def); +void ea_lex_unregister(struct ea_class *def); + +static void +ea_class_free(struct ea_class *cl) { - return nexthop_slab_[MIN(nh->labels, 3)]; + /* No more ea class references. Unregister the attribute. */ + idm_free(&ea_class_idm, cl->id); + ea_class_global[cl->id] = NULL; + ea_lex_unregister(cl); } -static struct nexthop * -nexthop_copy(struct nexthop *o) +static void +ea_class_ref_free(resource *r) { - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels_orig = o->labels_orig; - n->labels = o->labels; - for (int i=0; i<o->labels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + if (!--ref->class->uc) + ea_class_free(ref->class); +} - return first; +static void +ea_class_ref_dump(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + debug("name \"%s\", type=%d\n", ref->class->name, ref->class->type); } +static struct resclass ea_class_ref_class = { + .name = "Attribute class reference", + .size = sizeof(struct ea_class_ref), + .free = ea_class_ref_free, + .dump = ea_class_ref_dump, + .lookup = NULL, + .memsize = NULL, +}; + static void -nexthop_free(struct nexthop *o) +ea_class_init(void) { - struct nexthop *n; + idm_init(&ea_class_idm, rta_pool, EA_CLASS_INITIAL_MAX); + ea_class_global = mb_allocz(rta_pool, + sizeof(*ea_class_global) * (ea_class_max = EA_CLASS_INITIAL_MAX)); +} - while (o) - { - n = o->next; - sl_free(nexthop_slab(o), o); - o = n; - } +static struct ea_class_ref * +ea_ref_class(pool *p, struct ea_class *def) +{ + def->uc++; + struct ea_class_ref *ref = ralloc(p, &ea_class_ref_class); + ref->class = def; + return ref; } +static struct ea_class_ref * +ea_register(pool *p, struct ea_class *def) +{ + def->id = idm_alloc(&ea_class_idm); -/* - * Extended Attributes - */ + ASSERT_DIE(ea_class_global); + while (def->id >= ea_class_max) + ea_class_global = mb_realloc(ea_class_global, sizeof(*ea_class_global) * (ea_class_max *= 2)); + + ASSERT_DIE(def->id < ea_class_max); + ea_class_global[def->id] = def; + + ea_lex_register(def); + + return ea_ref_class(p, def); +} + +struct ea_class_ref * +ea_register_alloc(pool *p, struct ea_class cl) +{ + struct ea_class *clp = ea_class_find_by_name(cl.name); + if (clp && clp->type == cl.type) + return ea_ref_class(p, clp); + + uint namelen = strlen(cl.name) + 1; + + struct { + struct ea_class cl; + char name[0]; + } *cla = mb_alloc(rta_pool, sizeof(struct ea_class) + namelen); + cla->cl = cl; + memcpy(cla->name, cl.name, namelen); + cla->cl.name = cla->name; + + return ea_register(p, &cla->cl); +} + +void +ea_register_init(struct ea_class *clp) +{ + ASSERT_DIE(!ea_class_find_by_name(clp->name)); + ea_register(&root_pool, clp); +} + +struct ea_class * +ea_class_find_by_id(uint id) +{ + ASSERT_DIE(id < ea_class_max); + ASSERT_DIE(ea_class_global[id]); + return ea_class_global[id]; +} static inline eattr * ea__find(ea_list *e, unsigned id) @@ -444,12 +576,11 @@ ea__find(ea_list *e, unsigned id) * to its &eattr structure or %NULL if no such attribute exists. */ eattr * -ea_find(ea_list *e, unsigned id) +ea_find_by_id(ea_list *e, unsigned id) { eattr *a = ea__find(e, id & EA_CODE_MASK); - if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF && - !(id & EA_ALLOW_UNDEF)) + if (a && a->undef && !(id & EA_ALLOW_UNDEF)) return NULL; return a; } @@ -516,7 +647,7 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) BIT32_SET(s->visited, n); - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (a->undef) continue; s->eattrs = e; @@ -530,25 +661,6 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) return NULL; } -/** - * ea_get_int - fetch an integer attribute - * @e: attribute list - * @id: attribute ID - * @def: default value - * - * This function is a shortcut for retrieving a value of an integer attribute - * by calling ea_find() to find the attribute, extracting its value or returning - * a provided default if no such attribute is present. - */ -uintptr_t -ea_get_int(ea_list *e, unsigned id, uintptr_t def) -{ - eattr *a = ea_find(e, id); - if (!a) - return def; - return a->u.data; -} - static inline void ea_do_sort(ea_list *e) { @@ -616,14 +728,17 @@ ea_do_prune(ea_list *e) /* Now s0 is the most recent version, s[-1] the oldest one */ /* Drop undefs */ - if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (s0->undef) continue; /* Copy the newest version to destination */ *d = *s0; /* Preserve info whether it originated locally */ - d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); + d->originated = s[-1].originated; + + /* Not fresh any more, we prefer surstroemming */ + d->fresh = 0; /* Next destination */ d++; @@ -643,7 +758,7 @@ ea_do_prune(ea_list *e) * If an attribute occurs multiple times in a single &ea_list, * ea_sort() leaves only the first (the only significant) occurrence. */ -void +static void ea_sort(ea_list *e) { while (e) @@ -667,8 +782,8 @@ ea_sort(ea_list *e) * This function calculates an upper bound of the size of * a given &ea_list after merging with ea_merge(). */ -unsigned -ea_scan(ea_list *e) +static unsigned +ea_scan(const ea_list *e) { unsigned cnt = 0; @@ -694,8 +809,8 @@ ea_scan(ea_list *e) * segments with ea_merge() and finally sort and prune the result * by calling ea_sort(). */ -void -ea_merge(ea_list *e, ea_list *t) +static void +ea_merge(const ea_list *e, ea_list *t) { eattr *d = t->attrs; @@ -711,6 +826,16 @@ ea_merge(ea_list *e, ea_list *t) } } +ea_list * +ea_normalize(const ea_list *e) +{ + ea_list *t = tmp_alloc(ea_scan(e)); + ea_merge(e, t); + ea_sort(t); + + return t->count ? t : NULL; +} + /** * ea_same - compare two &ea_list's * @x: attribute list @@ -737,39 +862,47 @@ ea_same(ea_list *x, ea_list *y) if (a->id != b->id || a->flags != b->flags || a->type != b->type || + a->originated != b->originated || + a->fresh != b->fresh || + a->undef != b->undef || ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr))) return 0; } return 1; } -static inline ea_list * -ea_list_copy(ea_list *o) +uint +ea_list_size(ea_list *o) { - ea_list *n; - unsigned i, adpos, elen; + unsigned i, elen; - if (!o) - return NULL; - ASSERT(!o->next); - elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count; + ASSERT_DIE(o); + ASSERT_DIE(!o->next); + elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; if (!(a->type & EAF_EMBEDDED)) - elen += sizeof(struct adata) + a->u.ptr->length; + elen += ADATA_SIZE(a->u.ptr->length); } - n = mb_alloc(rta_pool, elen); + return elen; +} + +void +ea_list_copy(ea_list *n, ea_list *o, uint elen) +{ + uint adpos = sizeof(ea_list) + sizeof(eattr) * o->count; memcpy(n, o, adpos); - n->flags |= EALF_CACHED; - for(i=0; i<o->count; i++) + adpos = BIRD_CPU_ALIGN(adpos); + + for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; if (!(a->type & EAF_EMBEDDED)) { - unsigned size = sizeof(struct adata) + a->u.ptr->length; + unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); struct adata *d = ((void *) n) + adpos; @@ -779,30 +912,52 @@ ea_list_copy(ea_list *o) adpos += size; } } + ASSERT_DIE(adpos == elen); - return n; } -static inline void -ea_free(ea_list *o) +static void +ea_list_ref(ea_list *l) { - if (o) + for(uint i=0; i<l->count; i++) { - ASSERT(!o->next); - mb_free(o); + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); + cl->uc++; } } -static int -get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED) +static void +ea_list_unref(ea_list *l) { - if (a->id == EA_GEN_IGP_METRIC) + for(uint i=0; i<l->count; i++) { - *buf += bsprintf(*buf, "igp_metric"); - return GA_NAME; + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); + if (!--cl->uc) + ea_class_free(cl); } +} - return GA_UNKNOWN; +static inline void +ea_free(ea_list *o) +{ + if (o) + { + ea_list_unref(o); + ASSERT(!o->next); + mb_free(o); + } } void @@ -904,78 +1059,75 @@ ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte void ea_show(struct cli *c, const eattr *e) { - struct protocol *p; - int status = GA_UNKNOWN; const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr; byte buf[CLI_MSG_SIZE]; byte *pos = buf, *end = buf + sizeof(buf); - if (EA_IS_CUSTOM(e->id)) - { - const char *name = ea_custom_name(e->id); - if (name) - { - pos += bsprintf(pos, "%s", name); - status = GA_NAME; - } - else - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - } - else if (p = class_to_protocol[EA_PROTO(e->id)]) - { - pos += bsprintf(pos, "%s.", p->name); - if (p->get_attr) - status = p->get_attr(e, pos, end - pos); - pos += strlen(pos); - } - else if (EA_PROTO(e->id)) - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - else - status = get_generic_attr(e, &pos, end - pos); + ASSERT_DIE(e->id < ea_class_max); - if (status < GA_NAME) - pos += bsprintf(pos, "%02x", EA_ID(e->id)); - if (status < GA_FULL) - { - *pos++ = ':'; - *pos++ = ' '; - switch (e->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT: + struct ea_class *cls = ea_class_global[e->id]; + ASSERT_DIE(cls); + + pos += bsprintf(pos, "%s", cls->name); + + *pos++ = ':'; + *pos++ = ' '; + + if (e->undef) + bsprintf(pos, "undefined (should not happen)"); + else if (cls->format) + cls->format(e, buf, end - buf); + else + switch (e->type) + { + case T_INT: bsprintf(pos, "%u", e->u.data); break; - case EAF_TYPE_OPAQUE: + case T_OPAQUE: opaque_format(ad, pos, end - pos); break; - case EAF_TYPE_IP_ADDRESS: + case T_IP: bsprintf(pos, "%I", *(ip_addr *) ad->data); break; - case EAF_TYPE_ROUTER_ID: + case T_QUAD: bsprintf(pos, "%R", e->u.data); break; - case EAF_TYPE_AS_PATH: + case T_PATH: as_path_format(ad, pos, end - pos); break; - case EAF_TYPE_BITFIELD: - bsprintf(pos, "%08x", e->u.data); - break; - case EAF_TYPE_INT_SET: + case T_CLIST: ea_show_int_set(c, ad, 1, pos, buf, end); return; - case EAF_TYPE_EC_SET: + case T_ECLIST: ea_show_ec_set(c, ad, pos, buf, end); return; - case EAF_TYPE_LC_SET: + case T_LCLIST: ea_show_lc_set(c, ad, pos, buf, end); return; - case EAF_TYPE_UNDEF: default: bsprintf(pos, "<type %02x>", e->type); - } - } + } + cli_printf(c, -1012, "\t%s", buf); } +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } +} + /** * ea_dump - dump an extended attribute * @e: attribute to be dumped @@ -1002,12 +1154,17 @@ ea_dump(ea_list *e) for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; - debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags); - debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]); - if (a->type & EAF_ORIGINATED) + debug(" %04x.%02x", a->id, a->flags); + debug("=%c", + "?iO?IRP???S??pE?" + "??L???N?????????" + "?o???r??????????" [a->type]); + if (a->originated) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1037,6 +1194,7 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { + ASSERT_DIE(!e->next); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; @@ -1100,50 +1258,30 @@ rta_alloc_hash(void) static inline uint rta_hash(rta *a) { - u64 h; - mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); -#define BMIX(f) mem_hash_mix_num(&h, a->f); - MIX(hostentry); - MIX(from); - MIX(igp_metric); - BMIX(source); - BMIX(scope); - BMIX(dest); - MIX(pref); -#undef MIX - - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); + return ea_hash(a->eattrs); } static inline int rta_same(rta *x, rta *y) { - return (x->source == y->source && - x->scope == y->scope && - x->dest == y->dest && - x->igp_metric == y->igp_metric && - ipa_equal(x->from, y->from) && - x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && - ea_same(x->eattrs, y->eattrs)); -} - -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; + return ea_same(x->eattrs, y->eattrs); } static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab(o)); + rta *r = sl_alloc(rta_slab); memcpy(r, o, rta_size(o)); r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); - r->eattrs = ea_list_copy(o->eattrs); + if (!r->eattrs) + return r; + + uint elen = ea_list_size(o->eattrs); + r->eattrs = mb_alloc(rta_pool, elen); + ea_list_copy(r->eattrs, o->eattrs, elen); + ea_list_ref(r->eattrs); + r->eattrs->flags |= EALF_CACHED; return r; } @@ -1199,7 +1337,7 @@ rta_lookup(rta *o) ASSERT(!o->cached); if (o->eattrs) - ea_normalize(o->eattrs); + o->eattrs = ea_normalize(o->eattrs); h = rta_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) @@ -1209,7 +1347,6 @@ rta_lookup(rta *o) r = rta_copy(o); r->hash_key = h; r->cached = 1; - rt_lock_hostentry(r->hostentry); rta_insert(r); if (++rta_cache_count > rta_cache_limit) @@ -1226,12 +1363,9 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); - if (a->nh.next) - nexthop_free(a->nh.next); ea_free(a->eattrs); a->cached = 0; - sl_free(rta_slab(a), a); + sl_free(a); } rta * @@ -1239,12 +1373,6 @@ rta_do_cow(rta *o, linpool *lp) { rta *r = lp_alloc(lp, rta_size(o)); memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } r->cached = 0; r->uc = 0; return r; @@ -1259,27 +1387,10 @@ rta_do_cow(rta *o, linpool *lp) void rta_dump(rta *a) { - static char *rts[] = { "", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", - "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", - "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - - debug("pref=%d uc=%d %s %s%s h=%04x", - a->pref, a->uc, rts[a->source], ip_scope_text(a->scope), - rtd[a->dest], a->hash_key); + debug("uc=%d h=%04x", + a->uc, a->hash_key); if (!a->cached) debug(" !CACHED"); - debug(" <-%I", a->from); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; i<nh->labels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } if (a->eattrs) { debug(" EA: "); @@ -1313,8 +1424,6 @@ rta_dump_all(void) void rta_show(struct cli *c, rta *a) { - cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); - for(ea_list *eal = a->eattrs; eal; eal=eal->next) for(int i=0; i<eal->count; i++) ea_show(c, &eal->attrs[i]); @@ -1331,18 +1440,19 @@ rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_slab = sl_new(rta_pool, sizeof(rta)); rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + ea_register_init(&ea_gen_flowspec_valid); } /* diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 5d1e57b3..fa224f9a 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/rt-dev.h" #include "conf/conf.h" #include "lib/resource.h" @@ -79,14 +79,16 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rta a0 = { - .pref = c->preference, - .source = RTS_DEVICE, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh.iface = ad->iface, + rta a0 = {}; + struct nexthop_adata nhad = { + .nh = { .iface = ad->iface, }, + .ad = { .length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data, }, }; + ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, c->preference); + ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_DEVICE); + ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); + rte e0 = { .attrs = rta_lookup(&a0), .src = src, @@ -184,7 +186,6 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_device = { .name = "Direct", .template = "direct%d", - .class = PROTOCOL_DIRECT, .preference = DEF_PREF_DIRECT, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct rt_dev_proto), @@ -194,3 +195,9 @@ struct protocol proto_device = { .reconfigure = dev_reconfigure, .copy_config = dev_copy_config }; + +void +dev_build(void) +{ + proto_build(&proto_device); +} diff --git a/nest/rt-fib.c b/nest/rt-fib.c index a7f70371..801561da 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -55,7 +55,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/string.h" /* @@ -331,7 +331,7 @@ fib_get(struct fib *f, const net_addr *a) memset(b, 0, f->node_offset); if (f->init) - f->init(b); + f->init(f, b); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); @@ -475,7 +475,7 @@ fib_delete(struct fib *f, void *E) } if (f->fib_slab) - sl_free(f->fib_slab, E); + sl_free(E); else mb_free(E); diff --git a/nest/rt-show.c b/nest/rt-show.c index ae5000f5..cc5a9a10 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -10,11 +10,12 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/cli.h" #include "nest/iface.h" #include "filter/filter.h" +#include "filter/data.h" #include "sysdep/unix/krt.h" static void @@ -44,32 +45,38 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta *a = e->attrs; int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - struct nexthop *nh; + eattr *nhea = net_type_match(e->net, NB_DEST) ? + ea_find(a->eattrs, &ea_gen_nexthop) : NULL; + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + int dest = nhad ? (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) : RTD_NONE; + int flowspec_valid = net_is_flow(e->net) ? rt_get_flowspec_valid(e) : FLOWSPEC_UNKNOWN; tm_format_time(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) - bsprintf(from, " from %I", a->from); + ip_addr a_from = ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE); + if (ipa_nonzero(a_from) && (!nhad || !ipa_equal(a_from, nhad->nh.gw))) + bsprintf(from, " from %I", a_from); else from[0] = 0; /* Need to normalize the extended attributes */ if (d->verbose && !rta_is_cached(a) && a->eattrs) - ea_normalize(a->eattrs); + a->eattrs = ea_normalize(a->eattrs); get_route_info = e->src->proto->proto->get_route_info; if (get_route_info) get_route_info(e, info); else - bsprintf(info, " (%d)", a->pref); + bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) rt_show_table(c, d); - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : rta_dest_name(dest), + e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); - if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) + if (dest == RTD_UNICAST) + NEXTHOP_WALK(nh, nhad) { char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; @@ -83,7 +90,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary } *lsp = '\0'; - if (a->nh.next) + if (!NEXTHOP_ONE(nhad)) bsprintf(weight, " weight %d", nh->weight + 1); if (ipa_nonzero(nh->gw)) @@ -98,6 +105,29 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta_show(c, a); } +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); +} + static void rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { @@ -109,10 +139,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) ASSUME(!d->export_mode || ec); int first = 1; + int first_show = 1; int pass = 0; - bsnprintf(ia, sizeof(ia), "%N", n->n.addr); - for (struct rte_storage *er = n->routes; er; er = er->next) { if (rte_is_filtered(&er->rte) != d->filtered) @@ -128,7 +157,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) struct rte e = er->rte; /* Export channel is down, do not try to export routes to it */ - if (ec && (ec->export_state == ES_DOWN)) + if (ec && !ec->out_req.hook) goto skip; if (d->export_mode == RSEM_EXPORTED) @@ -143,7 +172,14 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { /* Special case for merged export */ pass = 1; - rte *em = rt_export_merged(ec, n, c->show_pool, 1); + uint count = rte_feed_count(n); + if (!count) + goto skip; + + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + rte *em = rt_export_merged(ec, feed, count, c->show_pool, 1); + if (em) e = *em; else @@ -168,7 +204,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) * command may change the export filter and do not update routes. */ int do_export = (ic > 0) || - (f_run(ec->out_filter, &e, c->show_pool, FF_SILENT) <= F_ACCEPT); + (f_run(ec->out_filter, &e, FF_SILENT) <= F_ACCEPT); if (do_export != (d->export_mode == RSEM_EXPORT)) goto skip; @@ -181,14 +217,21 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (d->show_protocol && (d->show_protocol != e.src->proto)) goto skip; - if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) + if (f_run(d->filter, &e, 0) > F_ACCEPT) goto skip; if (d->stats < 2) + { + if (first_show) + net_format(n->n.addr, ia, sizeof(ia)); + else + ia[0] = 0; + rt_show_rte(c, ia, &e, d, (n->routes == er)); + first_show = 0; + } d->show_counter++; - ia[0] = 0; skip: lp_flush(c->show_pool); @@ -205,9 +248,12 @@ rt_show_cleanup(struct cli *c) struct rt_show_data_rtable *tab; /* Unlink the iterator */ - if (d->table_open) + if (d->table_open && !d->trie_walk) fit_get(&d->tab->table->fib, &d->fit); + if (d->walk_lock) + rt_unlock_trie(d->tab->table, d->walk_lock); + /* Unlock referenced tables */ WALK_LIST(tab, d->tables) rt_unlock_table(tab->table); @@ -217,12 +263,13 @@ static void rt_show_cont(struct cli *c) { struct rt_show_data *d = c->rover; + struct rtable *tab = d->tab->table; #ifdef DEBUGGING unsigned max = 4; #else unsigned max = 64; #endif - struct fib *fib = &d->tab->table->fib; + struct fib *fib = &tab->fib; struct fib_iterator *it = &d->fit; if (d->running_on_config && (d->running_on_config != config)) @@ -233,7 +280,22 @@ rt_show_cont(struct cli *c) if (!d->table_open) { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + /* We use either trie-based walk or fib-based walk */ + d->trie_walk = tab->trie && + (d->addr_mode == RSD_ADDR_IN) && + net_val_match(tab->addr_type, NB_IP); + + if (d->trie_walk && !d->walk_state) + d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); + + if (d->trie_walk) + { + d->walk_lock = rt_lock_trie(tab); + trie_walk_init(d->walk_state, tab->trie, d->addr); + } + else + FIB_ITERATE_INIT(&d->fit, &tab->fib); + d->table_open = 1; d->table_counter++; d->kernel = rt_show_get_kernel(d); @@ -246,16 +308,44 @@ rt_show_cont(struct cli *c) rt_show_table(c, d); } - FIB_ITERATE_START(fib, it, net, n) + if (d->trie_walk) + { + /* Trie-based walk */ + net_addr addr; + while (trie_walk_next(d->walk_state, &addr)) + { + net *n = net_find(tab, &addr); + if (!n) + continue; + + rt_show_net(c, n, d); + + if (!--max) + return; + } + + rt_unlock_trie(tab, d->walk_lock); + d->walk_lock = NULL; + } + else { - if (!max--) + /* fib-based walk */ + FIB_ITERATE_START(fib, it, net, n) { - FIB_ITERATE_PUT(it); - return; + if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) + goto next; + + if (!max--) + { + FIB_ITERATE_PUT(it); + return; + } + rt_show_net(c, n, d); + + next:; } - rt_show_net(c, n, d); + FIB_ITERATE_END; } - FIB_ITERATE_END; if (d->stats) { @@ -264,7 +354,7 @@ rt_show_cont(struct cli *c) cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, d->tab->table->name); + d->net_counter - d->net_counter_last, tab->name); } d->kernel = NULL; @@ -315,7 +405,7 @@ rt_show_get_default_tables(struct rt_show_data *d) { WALK_LIST(c, d->export_protocol->channels) { - if (c->export_state == ES_DOWN) + if (!c->out_req.hook) continue; tab = rt_show_add_table(d, c->table); @@ -395,7 +485,7 @@ rt_show(struct rt_show_data *d) rt_show_prepare_tables(d); - if (!d->addr) + if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) { WALK_LIST(tab, d->tables) rt_lock_table(tab->table); @@ -413,7 +503,7 @@ rt_show(struct rt_show_data *d) d->tab = tab; d->kernel = rt_show_get_kernel(d); - if (d->show_for) + if (d->addr_mode == RSD_ADDR_FOR) n = net_route(tab->table, d->addr); else n = net_find(tab->table, d->addr); diff --git a/nest/rt-table.c b/nest/rt-table.c index ee69d7c4..946f4021 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -26,12 +26,72 @@ * (see the route attribute module for a precise explanation) holding the * remaining route attributes which are expected to be shared by multiple * routes in order to conserve memory. + * + * There are several mechanisms that allow automatic update of routes in one + * routing table (dst) as a result of changes in another routing table (src). + * They handle issues of recursive next hop resolving, flowspec validation and + * RPKI validation. + * + * The first such mechanism is handling of recursive next hops. A route in the + * dst table has an indirect next hop address, which is resolved through a route + * in the src table (which may also be the same table) to get an immediate next + * hop. This is implemented using structure &hostcache attached to the src + * table, which contains &hostentry structures for each tracked next hop + * address. These structures are linked from recursive routes in dst tables, + * possibly multiple routes sharing one hostentry (as many routes may have the + * same indirect next hop). There is also a trie in the hostcache, which matches + * all prefixes that may influence resolving of tracked next hops. + * + * When a best route changes in the src table, the hostcache is notified using + * rt_notify_hostcache(), which immediately checks using the trie whether the + * change is relevant and if it is, then it schedules asynchronous hostcache + * recomputation. The recomputation is done by rt_update_hostcache() (called + * from rt_event() of src table), it walks through all hostentries and resolves + * them (by rt_update_hostentry()). It also updates the trie. If a change in + * hostentry resolution was found, then it schedules asynchronous nexthop + * recomputation of associated dst table. That is done by rt_next_hop_update() + * (called from rt_event() of dst table), it iterates over all routes in the dst + * table and re-examines their hostentries for changes. Note that in contrast to + * hostcache update, next hop update can be interrupted by main loop. These two + * full-table walks (over hostcache and dst table) are necessary due to absence + * of direct lookups (route -> affected nexthop, nexthop -> its route). + * + * The second mechanism is for flowspec validation, where validity of flowspec + * routes depends of resolving their network prefixes in IP routing tables. This + * is similar to the recursive next hop mechanism, but simpler as there are no + * intermediate hostcache and hostentries (because flows are less likely to + * share common net prefix than routes sharing a common next hop). In src table, + * there is a list of dst tables (list flowspec_links), this list is updated by + * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during + * channel start/stop). Each dst table has its own trie of prefixes that may + * influence validation of flowspec routes in it (flowspec_trie). + * + * When a best route changes in the src table, rt_flowspec_notify() immediately + * checks all dst tables from the list using their tries to see whether the + * change is relevant for them. If it is, then an asynchronous re-validation of + * flowspec routes in the dst table is scheduled. That is also done by function + * rt_next_hop_update(), like nexthop recomputation above. It iterates over all + * flowspec routes and re-validates them. It also recalculates the trie. + * + * Note that in contrast to the hostcache update, here the trie is recalculated + * during the rt_next_hop_update(), which may be interleaved with IP route + * updates. The trie is flushed at the beginning of recalculation, which means + * that such updates may use partial trie to see if they are relevant. But it + * works anyway! Either affected flowspec was already re-validated and added to + * the trie, then IP route change would match the trie and trigger a next round + * of re-validation, or it was not yet re-validated and added to the trie, but + * will be re-validated later in this round anyway. + * + * The third mechanism is used for RPKI re-validation of IP routes and it is the + * simplest. It is just a list of subscribers in src table, who are notified + * when any change happened, but only after a settle time. Also, in RPKI case + * the dst is not a table, but a channel, who refeeds routes through a filter. */ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -44,54 +104,241 @@ #include "lib/hash.h" #include "lib/string.h" #include "lib/alloca.h" +#include "lib/flowspec.h" + +#ifdef CONFIG_BGP +#include "proto/bgp/bgp.h" +#endif pool *rt_table_pool; static linpool *rte_update_pool; list routing_tables; +list deleted_routing_tables; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); +static void rt_flowspec_notify(rtable *tab, net *net); +static void rt_feed_channel(void *); + +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; + +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) +{ + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} + +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} + +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +static void +net_init_with_trie(struct fib *f, void *N) +{ + rtable *tab = SKIP_BACK(rtable, fib, f); + net *n = N; + + if (tab->trie) + trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + + if (tab->trie_new) + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); +} + +static inline net * +net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) + { + net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} -/* Like fib_route(), but skips empty net entries */ static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen); + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } + } + + if (best) + return best; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) { + net_addr_ip4 n; + net_copy_ip4(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +{ + net_addr_vpn4 n; + net_copy_vpn4(&n, n0); - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip4_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); } return r; } -static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +static inline net * +net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) { + net_addr_ip6 n; + net_copy_ip6(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return r; +} + +static inline net * +net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +{ + net_addr_vpn6 n; + net_copy_vpn6(&n, n0); - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip6_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); } return r; } static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) { - struct fib_node *fn; + net_addr_ip6_sadr n; + net_copy_ip6_sadr(&n, n0); while (1) { @@ -100,13 +347,13 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) /* We need to do dst first matching. Since sadr addresses are hashed on dst prefix only, find the hash table chain and go through it to find the - match with the smallest matching src prefix. */ - for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) { net_addr_ip6_sadr *a = (void *) fn->addr; - if (net_equal_dst_ip6_sadr(n, a) && - net_in_net_src_ip6_sadr(n, a) && + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && (a->src_pxlen >= best_pxlen)) { best = fib_node_to_user(&t->fib, fn); @@ -117,38 +364,52 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) if (best) return best; - if (!n->dst_pxlen) + if (!n.dst_pxlen) break; - n->dst_pxlen--; - ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + n.dst_pxlen--; + ip6_clrbit(&n.dst_prefix, n.dst_pxlen); } return NULL; } -void * +net * net_route(rtable *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); - net_addr *n0 = alloca(n->length); - net_copy(n0, n); - switch (n->type) { case NET_IP4: + if (tab->trie) + return net_route_ip4_trie(tab, (net_addr_ip4 *) n); + else + return net_route_ip4_fib (tab, (net_addr_ip4 *) n); + case NET_VPN4: - case NET_ROA4: - return net_route_ip4(tab, (net_addr_ip4 *) n0); + if (tab->trie) + return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n); + else + return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n); case NET_IP6: + if (tab->trie) + return net_route_ip6_trie(tab, (net_addr_ip6 *) n); + else + return net_route_ip6_fib (tab, (net_addr_ip6 *) n); + case NET_VPN6: - case NET_ROA6: - return net_route_ip6(tab, (net_addr_ip6 *) n0); + if (tab->trie) + return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n); + else + return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n); case NET_IP6_SADR: - return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); + if (tab->trie) + return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); + else + return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); default: return NULL; @@ -157,7 +418,35 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP4(tab->trie, px, px0) + { + net_addr_roa4 roa0 = NET_ADDR_ROA4(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -189,7 +478,35 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP6(tab->trie, px, px0) + { + net_addr_roa6 roa0 = NET_ADDR_ROA6(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -239,9 +556,19 @@ int net_roa_check(rtable *tab, const net_addr *n, u32 asn) { if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) - return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else return ROA_UNKNOWN; /* Should not happen */ } @@ -293,11 +620,11 @@ rte_store(const rte *r, net *net, rtable *tab) */ void -rte_free(struct rte_storage *e, rtable *tab) +rte_free(struct rte_storage *e) { rt_unlock_source(e->rte.src); rta_free(e->rte.attrs); - sl_free(tab->rte_slab, e); + sl_free(e); } static int /* Actually better or at least as good as */ @@ -310,9 +637,12 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->attrs->pref > old->attrs->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->attrs->pref < old->attrs->pref) + if (np < op) return 0; if (new->src->proto->proto != old->src->proto->proto) { @@ -336,7 +666,7 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->attrs->pref != sec->attrs->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; if (pri->src->proto->proto != sec->src->proto->proto) @@ -349,154 +679,186 @@ rte_mergable(rte *pri, rte *sec) } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %uL %uG %s", - c->proto->name, c->name ?: "?", dir, msg, e->net, e->src->private_id, e->src->global_id, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N %uL %uG %s", + name, dir, msg, e->net, e->src->private_id, e->src->global_id, + rta_dest_name(rte_dest(e))); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt, linpool *pool, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - int v; + struct channel_export_stats *stats = &c->export_stats; - v = p->preexport ? p->preexport(c, rt) : 0; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; + + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); - goto reject; + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + goto reject_noset; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, rt, pool, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } accept: + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ return NULL; } -static inline rte * -export_filter(struct channel *c, rte *rt, int silent) -{ - return export_filter_(c, rt, rte_update_pool, silent); -} - static void -do_rt_notify(struct channel *c, const net_addr *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } + + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); /* Apply export table */ struct rte_storage *old_exported = NULL; if (c->out_table) { - if (!rte_update_out(c, net, new, old, &old_exported, refeed)) + if (!rte_update_out(c, net, new, old, &old_exported)) + { + channel_rte_trace_out(D_ROUTES, c, new, "idempotent"); return; + } } if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old); if (c->out_table && old_exported) - rte_free(old_exported, c->out_table); + rte_free(old_exported); } static void -rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; - - if (new) new = export_filter(c, new, 0); if (old && !bmap_test(&c->export_map, old->id)) @@ -505,176 +867,250 @@ rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old, int if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); + do_rt_notify(c, net, new, old); } -static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte nb0; - rte *new_best = NULL; - rte *old_best = NULL; - int new_first = 0; - - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte nb0, *new_best = NULL; + const rte *old_best = NULL; - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else + for (uint i = 0; i < count; i++) { - for (struct rte_storage *r = net->routes; r && rte_is_valid(&r->rte); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->rte.id)) + /* is still best */ + if (!new_best) { - old_best = &r->rte; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (&r->rte == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; + } + + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); } } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (struct rte_storage *r = net->routes; r && rte_is_valid(&r->rte); r = r->next) - if (new_best = export_filter(c, ((nb0 = r->rte), &nb0), 0)) + /* Check obsolete routes for previously exported */ + if (!old_best) + if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) + old_best = &rpe->old->rte; + +/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + { + if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + { + old_best = &rpe->old.rte; break; - } - else - { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, 0))) - new_best = new_changed; - else - return; - } + } - if (!new_best && !old_best) - return; + if (rpe == rpe_last) + break; + } + */ - do_rt_notify(c, net->n.addr, new_best, old_best, refeed); -} + /* Nothing to export */ + if (!new_best && !old_best) + { + DBG("rt_notify_accepted: nothing to export\n"); + goto done; + } + do_rt_notify(c, n, new_best, old_best); -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); +done: + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); } rte * -rt_export_merged(struct channel *c, net *net, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { + _Thread_local static rte rloc; + // struct proto *p = c->proto; - struct nexthop *nhs = NULL; - _Thread_local static rte rme; - struct rte_storage *best0 = net->routes; - rte *best; + struct nexthop_adata *nhs = NULL; + rte *best0 = feed[0]; + rte *best = NULL; - if (!best0 || !rte_is_valid(&best0->rte)) + if (!rte_is_valid(best0)) return NULL; - best = export_filter_(c, ((rme = best0->rte), &rme), pool, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; - if (!best || !rte_is_reachable(best)) + rloc = *best0; + best = export_filter(c, &rloc, silent); + + if (!best) + /* Best route doesn't pass the filter */ + return NULL; + + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (struct rte_storage *rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best, &rt0->rte)) + if (!rte_mergable(best0, feed[i])) continue; - rte rnh = rt0->rte; - rte *rt = export_filter_(c, &rnh, pool, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter(c, &tmp0, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(tmp->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); + + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best->attrs = rta_cow(best->attrs, pool); - nexthop_link(best->attrs, nhs); - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + + best->attrs = rta_cow(best->attrs, pool); + ea_set_attr(&best->attrs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); } return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - /* We assume that all rte arguments are either NULL or rte_is_valid() */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + if (!old_best) + if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) + old_best = &rpe->old->rte; + +/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + { + if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + { + old_best = &rpe->old.rte; + break; + } + + if (rpe == rpe_last) + break; + } + */ /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, rte_update_pool, 0) : NULL; - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); - if (!new_best && !old_best) - return; + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte n0; + + if (rpe->new_best != rpe->old_best) + rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte n0; - do_rt_notify(c, net->n.addr, new_best, old_best, refeed); + if (rpe->new != rpe->old) + rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); } +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + for (uint i=0; i<count; i++) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -690,13 +1126,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -711,19 +1140,19 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { - if (!new || !rte_is_valid(&new->rte)) + if (!rte_is_valid(RTE_OR_NULL(new))) new = NULL; - if (!old || !rte_is_valid(&old->rte)) + if (!rte_is_valid(RTE_OR_NULL(old))) old = NULL; - if (!new_best || !rte_is_valid(&new_best->rte)) + if (!rte_is_valid(RTE_OR_NULL(new_best))) new_best = NULL; - if (!old_best || !rte_is_valid(&old_best->rte)) + if (!rte_is_valid(RTE_OR_NULL(old_best))) old_best = NULL; if (!new && !old && !new_best && !old_best) @@ -732,51 +1161,50 @@ rte_announce(rtable *tab, uint type, net *net, struct rte_storage *new, struct r if (new_best != old_best) { if (new_best) - new_best->rte.sender->stats.pref_routes++; + new_best->rte.sender->stats.pref++; if (old_best) - old_best->rte.sender->stats.pref_routes--; + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); + + if (!EMPTY_LIST(tab->flowspec_links)) + rt_flowspec_notify(tab, net); } rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best }; + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) { - if (c->export_state == ES_DOWN) - continue; + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } - if (type && (type != c->ra_mode)) + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + if (eh->export_state == TES_STOP) continue; - rte n0; - switch (c->ra_mode) - { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net->n.addr, RTE_COPY(new_best, &n0), RTE_OR_NULL(old_best), 0); - break; - - case RA_ANY: - if (new != old) - rt_notify_basic(c, net->n.addr, RTE_COPY(new, &n0), RTE_OR_NULL(old), 0); - break; - - case RA_ACCEPTED: - rt_notify_accepted(c, net, RTE_OR_NULL(new), RTE_OR_NULL(old), 0); - break; + if (new) + eh->stats.updates_received++; + else + eh->stats.withdraws_received++; - case RA_MERGED: - rt_notify_merged(c, net, RTE_OR_NULL(new), RTE_OR_NULL(old), RTE_OR_NULL(new_best), RTE_OR_NULL(old_best), 0); - break; - } + if (eh->req->export_one) + eh->req->export_one(eh->req, net->n.addr, &rpe); + else if (eh->req->export_bulk) + eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count); + else + bug("Export request must always provide an export method"); } } static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; const net_addr *n = e->net; @@ -784,7 +1212,7 @@ rte_validate(rte *e) if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n, e->sender->proto->name); + n, ch->proto->name); return 0; } @@ -794,21 +1222,34 @@ rte_validate(rte *e) if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n, e->sender->proto->name); + n, ch->proto->name); return 0; } - if (net_type_match(n, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST)) { - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n, e->attrs->dest, e->sender->proto->name); - return 0; - } + eattr *nhea = ea_find(e->attrs->eattrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } + + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs->eattrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n, e->sender->proto->name); + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", + n, ch->proto->name); return 0; } @@ -829,12 +1270,11 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; + struct rt_import_request *req = c->req; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; + struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; @@ -846,22 +1286,20 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) { old = &(old_stored = (*before_old))->rte; - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - return; - } + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) + { + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + } if (new && rte_same(old, new)) { @@ -871,97 +1309,39 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } + } - return; - } - - *before_old = (*before_old)->next; - table->rt_count--; + *before_old = (*before_old)->next; + table->rt_count--; } if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + if (req->preimport) + new = req->preimport(req, new, old); + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - new = NULL; - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1:; struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ @@ -1064,23 +1444,20 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) } /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - if (new_ok) - rte_trace(c, &new_stored->rte, '>', new_stored == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (net->routes && rte_is_ok(&net->routes->rte)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new_stored, old_stored, + rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); if (!net->routes && @@ -1088,17 +1465,20 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) (table->gc_time + table->config->gc_min_time <= current_time())) rt_schedule_prune(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) p->rte_insert(net, &new_stored->rte); +#endif if (old) { if (!new_stored) hmap_clear(&table->id_map, old->id); - rte_free(old_stored, table); + rte_free(old_stored); } } @@ -1117,100 +1497,134 @@ rte_update_unlock(void) lp_flush(rte_update_pool); } -static int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +rte * +channel_preimport(struct rt_import_request *req, rte *new, rte *old) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return NULL; + + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); + + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); + + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep_filtered) + { + new->flags |= REF_FILTERED; + return new; + } + else + return NULL; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return new; +} + +static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { + if (!c->in_req.hook) + return; + + ASSERT(c->channel_state == CS_UP); + if (c->in_table && !rte_update_in(c, n, new, src)) return; - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - net *nn; + return rte_update_direct(c, n, new, src); +} - ASSERT(c->channel_state == CS_UP); +static void +rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +{ + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; rte_update_lock(); if (new) { new->net = n; - new->sender = c; - stats->imp_updates_received++; - if (!rte_validate(new)) + int fr; + + stats->updates_received++; + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); + + if (c->in_keep_filtered) + new->flags |= REF_FILTERED; + else + new = NULL; } - if (filter == FILTER_REJECT) + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; + } - if (! c->in_keep_filtered) - goto drop; + } + else + stats->withdraws_received++; - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; - } - else if (filter) - { - int fr = f_run(filter, new, rte_update_pool, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + rte_import(&c->in_req, n, new, src); - if (! c->in_keep_filtered) - goto drop; + rte_update_unlock(); +} - new->flags |= REF_FILTERED; - } - } +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; + net *nn; + if (new) + { /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); + nn = net_get(hook->table, n); new->net = nn->n.addr; + new->sender = hook; } - else + else if (!(nn = net_find(hook->table, n))) { - stats->imp_withdraws_received++; - - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } + req->hook->stats.withdraws_ignored++; + return; } - recalc: /* And recalculate the best route */ - rte_recalculate(c, nn, new, src); - - rte_update_unlock(); - return; - - drop: - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; - - rte_update_unlock(); + rte_recalculate(hook, nn, new, src); } /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ static inline void -rte_announce_i(rtable *tab, uint type, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce_i(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); + rte_announce(tab, net, new, old, new_best, old_best); rte_update_unlock(); } @@ -1228,7 +1642,7 @@ rte_modify(net *net, rte *old) { rte_update_lock(); - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); + rte *new = old->sender->req->rte_modify(old, rte_update_pool); if (new != old) { if (new) @@ -1246,26 +1660,144 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte { net *n = net_find(t, a); - if (!n || !n->routes) + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) return 0; rte rt = n->routes->rte; - if (!rte_is_valid(&rt)) - return 0; - rte_update_lock(); /* Rest is stripped down export_filter() */ int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); + v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); rte_update_unlock(); return v > 0; } +static void +rt_export_stopped(void *data) +{ + struct rt_export_hook *hook = data; + rtable *tab = hook->table; + + /* Unlist */ + rem_node(&hook->n); + + /* Reporting the channel as stopped. */ + hook->stopped(hook->req); + + /* Freeing the hook together with its coroutine. */ + rfree(hook->pool); + rt_unlock_table(tab); + + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); +} + + +static inline void +rt_set_import_state(struct rt_import_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +static inline void +rt_set_export_state(struct rt_export_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->export_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_request_import(rtable *tab, struct rt_import_request *req) +{ + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = tab; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); +} + +void +rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; + + rt_schedule_prune(hook->table); + + rt_set_import_state(hook, TIS_STOP); + + hook->stopped = stopped; +} + +void +rt_request_export(rtable *tab, struct rt_export_request *req) +{ + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, "Export hook"); + struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + hook->lp = lp_new_default(p); + + hook->req = req; + hook->table = tab; + + /* stats zeroed by mb_allocz */ + + rt_set_export_state(hook, TES_HUNGRY); + + hook->n = (node) {}; + add_tail(&tab->exports, &hook->n); + + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + rt_set_export_state(hook, TES_FEEDING); + + hook->event = ev_new_init(p, rt_feed_channel, hook); + ev_schedule_work(hook->event); +} + +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; + + rtable *tab = hook->table; + + /* Stop feeding */ + ev_postpone(hook->event); + + if (hook->export_state == TES_FEEDING) + fit_get(&tab->fib, &hook->feed_fit); + + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + rt_set_export_state(hook, TES_STOP); + ev_schedule(hook->event); +} /** * rt_refresh_begin - start a refresh cycle @@ -1282,12 +1814,12 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(rtable *t, struct rt_import_request *req) { FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == c) + if (e->rte.sender == req->hook) e->rte.flags |= REF_STALE; } FIB_WALK_END; @@ -1302,14 +1834,14 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(rtable *t, struct rt_import_request *req) { int prune = 0; FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == c) && (e->rte.flags & REF_STALE)) + if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) { e->rte.flags |= REF_DISCARD; prune = 1; @@ -1322,14 +1854,14 @@ rt_refresh_end(rtable *t, struct channel *c) } void -rt_modify_stale(rtable *t, struct channel *c) +rt_modify_stale(rtable *t, struct rt_import_request *req) { int prune = 0; FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == c) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) + if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) { e->rte.flags |= REF_MODIFY; prune = 1; @@ -1365,7 +1897,7 @@ rte_dump(struct rte_storage *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>\n", t->name); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif @@ -1391,6 +1923,54 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); +} + +void +rt_dump_hooks(rtable *tab) +{ + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u stopped=%p\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, eh->export_state, eh->stopped); + } + debug("\n"); +} + +void +rt_dump_hooks_all(void) +{ + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); + + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); } static inline void @@ -1508,6 +2088,7 @@ rt_subscribe(rtable *tab, struct rt_subscription *s) { s->tab = tab; rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); add_tail(&tab->subscribers, &s->n); } @@ -1518,6 +2099,90 @@ rt_unsubscribe(struct rt_subscription *s) rt_unlock_table(s->tab); } +static struct rt_flowspec_link * +rt_flowspec_find_link(rtable *src, rtable *dst) +{ + struct rt_flowspec_link *ln; + WALK_LIST(ln, src->flowspec_links) + if ((ln->src == src) && (ln->dst == dst)) + return ln; + + return NULL; +} + +void +rt_flowspec_link(rtable *src, rtable *dst) +{ + ASSERT(rt_is_ip(src)); + ASSERT(rt_is_flow(dst)); + + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + + if (!ln) + { + rt_lock_table(src); + rt_lock_table(dst); + + ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link)); + ln->src = src; + ln->dst = dst; + add_tail(&src->flowspec_links, &ln->n); + } + + ln->uc++; +} + +void +rt_flowspec_unlink(rtable *src, rtable *dst) +{ + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + + ASSERT(ln && (ln->uc > 0)); + + ln->uc--; + + if (!ln->uc) + { + rem_node(&ln->n); + mb_free(ln); + + rt_unlock_table(src); + rt_unlock_table(dst); + } +} + +static void +rt_flowspec_notify(rtable *src, net *net) +{ + /* Only IP tables are src links */ + ASSERT(rt_is_ip(src)); + + struct rt_flowspec_link *ln; + WALK_LIST(ln, src->flowspec_links) + { + rtable *dst = ln->dst; + ASSERT(rt_is_flow(dst)); + + /* No need to inspect it further if recalculation is already active */ + if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)) + continue; + + if (trie_match_net(dst->flowspec_trie, net->n.addr)) + rt_schedule_nhu(dst); + } +} + +static void +rt_flowspec_reset_trie(rtable *tab) +{ + linpool *lp = tab->flowspec_trie->lp; + int ipv4 = tab->flowspec_trie->ipv4; + + lp_flush(lp); + tab->flowspec_trie = f_new_trie(lp, 0); + tab->flowspec_trie->ipv4 = ipv4; +} + static void rt_free(resource *_r) { @@ -1564,12 +2229,7 @@ static struct resclass rt_class = { rtable * rt_setup(pool *pp, struct rtable_config *cf) { - int ns = strlen("Routing table ") + strlen(cf->name) + 1; - void *nb = mb_alloc(pp, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - - pool *p = rp_new(pp, nb); - mb_move(nb, p); + pool *p = rp_newf(pp, "Routing table %s", cf->name); rtable *t = ralloc(p, &rt_class); t->rp = p; @@ -1582,9 +2242,20 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + + init_list(&t->flowspec_links); + if (!(t->internal = cf->internal)) { - init_list(&t->channels); + init_list(&t->imports); + init_list(&t->exports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); @@ -1592,6 +2263,14 @@ rt_setup(pool *pp, struct rtable_config *cf) t->rt_event = ev_new_init(p, rt_event, t); t->last_rt_change = t->gc_time = current_time(); + + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); + } } return t; @@ -1610,6 +2289,7 @@ rt_init(void) rt_table_pool = rp_new(&root_pool, "Routing tables"); rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); + init_list(&deleted_routing_tables); } @@ -1631,9 +2311,9 @@ static void rt_prune_table(rtable *tab) { struct fib_iterator *fit = &tab->prune_fit; - int limit = 512; + int limit = 2000; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -1647,29 +2327,36 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; + + if (tab->prune_trie) + { + /* Init prefix trie pruning */ + tab->trie_new = f_new_trie(lp_new_default(tab->rp), 0); + tab->trie_new->ipv4 = tab->trie->ipv4; + } } again: FIB_ITERATE_START(&tab->fib, fit, net, n) { rescan: + if (limit <= 0) + { + FIB_ITERATE_PUT(fit); + ev_schedule(tab->rt_event); + return; + } + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->rte.sender->flush_active || (e->rte.flags & REF_DISCARD)) + if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - rte_discard(n, &e->rte); limit--; @@ -1678,13 +2365,6 @@ again: if (e->rte.flags & REF_MODIFY) { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - rte_modify(n, &e->rte); limit--; @@ -1698,6 +2378,12 @@ again: fib_delete(&tab->fib, n); goto again; } + + if (tab->trie_new) + { + trie_add_prefix(tab->trie_new, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); + limit--; + } } FIB_ITERATE_END; @@ -1711,23 +2397,117 @@ again: /* state change 2->0, 3->1 */ tab->prune_state &= 1; - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); + if (tab->trie_new) + { + /* Finish prefix trie pruning */ + + if (!tab->trie_lock_count) + { + rfree(tab->trie->lp); + } + else + { + ASSERT(!tab->trie_old); + tab->trie_old = tab->trie; + tab->trie_old_lock_count = tab->trie_lock_count; + tab->trie_lock_count = 0; + } + + tab->trie = tab->trie_new; + tab->trie_new = NULL; + tab->prune_trie = 0; + } + else + { + /* Schedule prefix trie pruning */ + if (tab->trie && !tab->trie_old && (tab->trie->prefix_count > (2 * tab->fib.entries))) + { + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; + tab->prune_trie = 1; + } + } - /* FIXME: This should be handled in a better way */ rt_prune_sources(); /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + rt_set_import_state(ih, TIS_CLEARED); + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } +} + +/** + * rt_lock_trie - lock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * + * The prune loop may rebuild the prefix trie and invalidate f_trie_walk_state + * structures. Therefore, asynchronous walks should lock the prefix trie using + * this function. That allows the prune loop to rebuild the trie, but postpones + * its freeing until all walks are done (unlocked by rt_unlock_trie()). + * + * Return a current trie that will be locked, the value should be passed back to + * rt_unlock_trie() for unlocking. + * + */ +struct f_trie * +rt_lock_trie(rtable *tab) +{ + ASSERT(tab->trie); + + tab->trie_lock_count++; + return tab->trie; +} + +/** + * rt_unlock_trie - unlock a prefix trie of a routing table + * @tab: routing table with prefix trie to be locked + * @trie: value returned by matching rt_lock_trie() + * + * Done for trie locked by rt_lock_trie() after walk over the trie is done. + * It may free the trie and schedule next trie pruning. + */ +void +rt_unlock_trie(rtable *tab, struct f_trie *trie) +{ + ASSERT(trie); + + if (trie == tab->trie) + { + /* Unlock the current prefix trie */ + ASSERT(tab->trie_lock_count); + tab->trie_lock_count--; + } + else if (trie == tab->trie_old) + { + /* Unlock the old prefix trie */ + ASSERT(tab->trie_old_lock_count); + tab->trie_old_lock_count--; + + /* Free old prefix trie that is no longer needed */ + if (!tab->trie_old_lock_count) + { + rfree(tab->trie_old->lp); + tab->trie_old = NULL; + + /* Kick prefix trie pruning that was postponed */ + if (tab->trie && (tab->trie->prefix_count > (2 * tab->fib.entries))) { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); + tab->prune_trie = 1; + rt_schedule_prune(tab); } - - return; + } + } + else + log(L_BUG "Invalid arg to rt_unlock_trie()"); } + void rt_preconfig(struct config *c) { @@ -1743,171 +2523,425 @@ rt_preconfig(struct config *c) * triggered by rt_schedule_nhu(). */ -static inline int -rta_next_hop_outdated(rta *a) +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - struct hostentry *he = a->hostentry; - - if (!he) - return 0; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); - if (!he->src) - return a->dest != RTD_UNREACHABLE; + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); } -void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) + +static void +rta_apply_hostentry(rta *a, struct hostentry_adata *head) { - a->hostentry = he; - a->dest = he->dest; - a->igp_metric = he->igp_metric; + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; + + ea_set_attr_u32(&a->eattrs, &ea_gen_igp_metric, 0, he->igp_metric); - if (a->dest != RTD_UNICAST) + if (!he->src) { - /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; - if (mls) - { /* Store the label stack for later changes */ - a->nh.labels_orig = a->nh.labels = mls->len; - memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); - } + ea_set_dest(&a->eattrs, 0, RTD_UNREACHABLE); return; } - if (((!mls) || (!mls->len)) && he->nexthop_linkable) + eattr *he_nh_ea = ea_find(he->src->eattrs, &ea_gen_nexthop); + ASSERT_DIE(he_nh_ea); + + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + int idest = nhea_dest(he_nh_ea); + + if ((idest != RTD_UNICAST) || + !lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(&a->eattrs, he->src->eattrs, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; + uint total_size = OFFSETOF(struct nexthop_adata, nh); - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); + } - if (mls) - { - nhp->labels = nh->labels + mls->len; - nhp->labels_orig = mls->len; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + struct nexthop_adata nha = { + .ad.length = NEXTHOP_DEST_SIZE, + .dest = RTD_UNREACHABLE, + }; + + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (lnum) { - nhp->labels = nh->labels; - nhp->labels_orig = 0; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(&a->eattrs, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); +} + +static inline struct hostentry_adata * +rta_next_hop_outdated(rta *a) +{ + /* First retrieve the hostentry */ + eattr *heea = ea_find(a->eattrs, &ea_gen_hostentry); + if (!heea) + return NULL; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + /* If no nexthop is present, we have to create one */ + eattr *a_nh_ea = ea_find(a->eattrs, &ea_gen_nexthop); + if (!a_nh_ea) + return head; + + struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr; + + /* Shortcut for unresolvable hostentry */ + if (!head->he->src) + return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL; + + /* Comparing our nexthop with the hostentry nexthop */ + eattr *he_nh_ea = ea_find(head->he->src->eattrs, &ea_gen_nexthop); + + return ( + (ea_get_int(a->eattrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; } static inline struct rte_storage * rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); - - mpls_label_stack mls = { .len = a->nh.labels_orig }; - memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) + return NULL; - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->cached = 0; + rta a = *old->attrs; + a.cached = 0; + rta_apply_hostentry(&a, head); rte e0 = *old; - e0.attrs = a; + e0.attrs = &a; return rte_store(&e0, n, tab); } +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs->eattrs, &ea_gen_hostentry); + if (!heea) + return; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + rta_apply_hostentry(r->attrs, head); +} + +#ifdef CONFIG_BGP + +static inline int +net_flow_has_dst_prefix(const net_addr *n) +{ + ASSUME(net_is_flow(n)); + + if (n->pxlen) + return 1; + + if (n->type == NET_FLOW4) + { + const net_addr_flow4 *n4 = (void *) n; + return (n4->length > sizeof(net_addr_flow4)) && (n4->data[0] == FLOW_TYPE_DST_PREFIX); + } + else + { + const net_addr_flow6 *n6 = (void *) n; + return (n6->length > sizeof(net_addr_flow6)) && (n6->data[0] == FLOW_TYPE_DST_PREFIX); + } +} + +static inline int +rta_as_path_is_empty(rta *a) +{ + eattr *e = ea_find(a->eattrs, "bgp_path"); + return !e || (as_path_getlen(e->u.ptr) == 0); +} + +static inline u32 +rta_get_first_asn(rta *a) +{ + eattr *e = ea_find(a->eattrs, "bgp_path"); + u32 asn; + + return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; +} + +static inline enum flowspec_valid +rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior) +{ + ASSERT(rt_is_ip(tab_ip)); + ASSERT(rt_is_flow(tab_flow)); + ASSERT(tab_ip->trie); + + /* RFC 8955 6. a) Flowspec has defined dst prefix */ + if (!net_flow_has_dst_prefix(n)) + return FLOWSPEC_INVALID; + + /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ + if (interior && rta_as_path_is_empty(a)) + return FLOWSPEC_VALID; + + + /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ + + /* Find flowspec dst prefix */ + net_addr dst; + if (n->type == NET_FLOW4) + net_fill_ip4(&dst, net4_prefix(n), net4_pxlen(n)); + else + net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n)); + + /* Find best-match BGP unicast route for flowspec dst prefix */ + net *nb = net_route(tab_ip, &dst); + const rte *rb = nb ? &nb->routes->rte : NULL; + + /* Register prefix to trie for tracking further changes */ + int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; + trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); + + /* No best-match BGP route -> no flowspec */ + if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + return FLOWSPEC_INVALID; + + /* Find ORIGINATOR_ID values */ + u32 orig_a = ea_get_int(a->eattrs, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb->attrs->eattrs, "bgp_originator_id", 0); + + /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ + if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( + ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE), + ea_get_ip(rb->attrs->eattrs, &ea_gen_from, IPA_NONE) + ))) + return FLOWSPEC_INVALID; + + + /* Find ASN of the best-match route, for use in next checks */ + u32 asn_b = rta_get_first_asn(rb->attrs); + if (!asn_b) + return FLOWSPEC_INVALID; + + /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ + if (!interior && (rta_get_first_asn(a) != asn_b)) + return FLOWSPEC_INVALID; + + /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ + TRIE_WALK(tab_ip->trie, subnet, &dst) + { + net *nc = net_find_valid(tab_ip, &subnet); + if (!nc) + continue; + + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + return FLOWSPEC_INVALID; + + if (rta_get_first_asn(rc->attrs) != asn_b) + return FLOWSPEC_INVALID; + } + TRIE_WALK_END; + + return FLOWSPEC_VALID; +} + +#endif /* CONFIG_BGP */ + +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) +{ +#ifdef CONFIG_BGP + if (rt_get_source_attr(r) != RTS_BGP) + return NULL; + + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); + if (!bc->base_table) + return NULL; + + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + + if (old == valid) + return NULL; + + rta *a = alloca(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + + ea_set_attr_u32(&a->eattrs, &ea_gen_flowspec_valid, 0, valid); + + rte new; + memcpy(&new, r, sizeof(rte)); + new.attrs = a; + + return rte_store(&new, n, tab); +#else + return NULL; +#endif +} + +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs->eattrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs->eattrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} + static inline int rt_next_hop_update_net(rtable *tab, net *n) { - struct rte_storage **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; + int is_flow = net_is_flow(n->n.addr); - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - if (rta_next_hop_outdated(e->rte.attrs)) + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + count++; + + if (!count) + return 0; + + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) { - new = rt_next_hop_update_rte(tab, n, &e->rte); - new->next = e->next; - *k = new; + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); - rte_trace_in(D_ROUTES, new->rte.sender, &new->rte, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); + if (!new) + continue; /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ if (e->rte.src->proto->rte_recalculate) - e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, NULL); + e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); - if (e != old_best) - rte_free(e, tab); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; - e = new; - count++; + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; } - if (!count) - return 0; + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; @@ -1922,15 +2956,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->rte.sender, &new->rte, "updated [best]"); - - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best); + } - if (free_old_best) - rte_free(old_best, tab); + for (int i=0; i<count; i++) + rte_free(updates[i].old); return count; } @@ -1948,6 +2984,9 @@ rt_next_hop_update(rtable *tab) { FIB_ITERATE_INIT(fit, &tab->fib); tab->nhu_state = NHU_RUNNING; + + if (tab->flowspec_trie) + rt_flowspec_reset_trie(tab); } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -2036,6 +3075,22 @@ rt_unlock_table(rtable *r) } } +static int +rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +{ + if ((new->addr_type != old->addr_type) || + (new->sorted != old->sorted) || + (new->trie_used != old->trie_used)) + return 0; + + DBG("\t%s: same\n", new->name); + new->table = tab; + tab->name = new->name; + tab->config = new; + + return 1; +} + static struct rtable_config * rt_find_table_config(struct config *cf, char *name) { @@ -2065,28 +3120,19 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; - if (!ot->deleted) - { - r = rt_find_table_config(new, o->name); - if (r && (r->addr_type == o->addr_type) && !new->shutdown) - { - DBG("\t%s: same\n", o->name); - r->table = ot; - ot->name = r->name; - ot->config = r; - if (o->sorted != r->sorted) - log(L_WARN "Reconfiguration of rtable sorted flag not implemented"); - } - else - { - DBG("\t%s: deleted\n", o->name); - ot->deleted = old; - config_add_obstacle(old); - rt_lock_table(ot); - rt_unlock_table(ot); - } - } + rtable *tab = o->table; + if (tab->deleted) + continue; + + r = rt_find_table_config(new, o->name); + if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + continue; + + DBG("\t%s: deleted\n", o->name); + tab->deleted = old; + config_add_obstacle(old); + rt_lock_table(tab); + rt_unlock_table(tab); } } @@ -2100,22 +3146,6 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) -{ - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - { - rte e0 = *e; - rt_notify_basic(c, n->n.addr, &e0, &e0, c->refeeding); - } - rte_update_unlock(); -} - /** * rt_feed_channel - advertise all routes to a channel * @c: channel to be fed @@ -2125,79 +3155,55 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_channel(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); - - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } + ASSERT(c->export_state == TES_FEEDING); FIB_ITERATE_START(&c->table->fib, fit, net, n) { - struct rte_storage *e = n->routes; if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - return 0; + ev_schedule_work(c->event); + return; } - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (e && rte_is_valid(&e->rte)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - do_feed_channel(c, n, &e->rte); - max_feed--; - } + if (c->export_state != TES_FEEDING) + goto done; - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(&e->rte)) - continue; - - do_feed_channel(c, n, &e->rte); - max_feed--; - } + if (c->req->export_bulk) + { + uint count = rte_feed_count(n); + if (count) + { + rte_update_lock(); + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + struct rt_pending_export rpe = { .new_best = n->routes }; + c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); + max_feed -= count; + rte_update_unlock(); + } + } + else if (n->routes && rte_is_valid(&n->routes->rte)) + { + rte_update_lock(); + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + max_feed--; + rte_update_unlock(); + } } FIB_ITERATE_END; done: - c->feed_active = 0; - return 1; -} - -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) -{ - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } + rt_set_export_state(c, TES_READY); } @@ -2205,7 +3211,7 @@ rt_feed_channel_abort(struct channel *c) * Import table */ -static int +int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { struct rtable *tab = c->in_table; @@ -2238,6 +3244,9 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr goto drop_update; } + if (!new) + CHANNEL_LIMIT_POP(c, RX); + /* Move iterator if needed */ if (*pos == c->reload_next_rte) c->reload_next_rte = (*pos)->next; @@ -2245,10 +3254,21 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr /* Remove the old rte */ struct rte_storage *del = *pos; *pos = (*pos)->next; - rte_free(del, tab); + rte_free(del); tab->rt_count--; } - else if (!new) + else if (new) + { + if (CHANNEL_LIMIT_PUSH(c, RX)) + { + /* Required by rte_trace_in() */ + new->net = n; + + channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); + goto drop_update; + } + } + else goto drop_withdraw; if (!new) @@ -2259,25 +3279,8 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr return 1; } - struct channel_limit *l = &c->rx_limit; - if (l->action && !*pos) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); - - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = n; - - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - /* Insert the new rte */ struct rte_storage *e = rte_store(new, net, tab); - e->rte.sender = c; e->rte.lastmod = current_time(); e->next = *pos; *pos = e; @@ -2285,8 +3288,8 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr return 1; drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; + c->import_stats.updates_received++; + c->in_req.hook->stats.updates_ignored++; if (!net->routes) fib_delete(&tab->fib, net); @@ -2294,8 +3297,8 @@ drop_update: return 0; drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; + c->import_stats.withdraws_received++; + c->in_req.hook->stats.withdraws_ignored++; return 0; } @@ -2325,7 +3328,7 @@ rt_reload_channel(struct channel *c) } rte r = e->rte; - rte_update(c, r.net, &r, r.src); + rte_update_direct(c, r.net, &r, r.src); } c->reload_next_rte = NULL; @@ -2375,7 +3378,7 @@ again: if (all || (e->rte.flags & (REF_STALE | REF_DISCARD))) { *ee = e->next; - rte_free(e, t); + rte_free(e); t->rt_count--; } else @@ -2398,7 +3401,7 @@ again: */ int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct rte_storage **old_exported, int refeed) +rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported) { struct rtable *tab = c->out_table; struct rte_src *src; @@ -2415,7 +3418,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct src = old0->src; if (!net) - goto drop_withdraw; + goto drop; } /* Find the old rte */ @@ -2425,7 +3428,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct if (old = *pos) { if (new && rte_same(&(*pos)->rte, new)) - goto drop_update; + goto drop; /* Remove the old rte */ *pos = old->next; @@ -2436,7 +3439,7 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct if (!new) { if (!old) - goto drop_withdraw; + goto drop; if (!net->routes) fib_delete(&tab->fib, net); @@ -2452,13 +3455,36 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, struct tab->rt_count++; return 1; -drop_update: - return refeed; - -drop_withdraw: +drop: return 0; } +void +rt_refeed_channel(struct channel *c) +{ + if (!c->out_table) + { + channel_request_feeding(c); + return; + } + + ASSERT_DIE(c->ra_mode != RA_ANY); + + c->proto->feed_begin(c, 0); + + FIB_WALK(&c->out_table->fib, net, n) + { + if (!n->routes) + continue; + + rte e = n->routes->rte; + c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); + } + FIB_WALK_END; + + c->proto->feed_end(c); +} + /* * Hostcache @@ -2555,7 +3581,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) rem_node(&he->ln); hc_remove(hc, he); - sl_free(hc->slab, he); + sl_free(he); hc->hash_items--; if (hc->hash_items < hc->hash_min) @@ -2572,7 +3598,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, tab->rp, HC_DEF_ORDER); hc->slab = sl_new(tab->rp, sizeof(struct hostentry)); - hc->lp = lp_new(tab->rp, LP_GOOD_SIZE(1024)); + hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); tab->hostcache = hc; @@ -2624,14 +3650,14 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { - eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *ea = ea_find(rt->attrs->eattrs, "igp_metric"); if (ea) return ea->u.data; - if (rt->attrs->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; if (rt->src->proto->rte_igp_metric) @@ -2649,7 +3675,6 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) /* Reset the hostentry */ he->src = NULL; - he->dest = RTD_UNREACHABLE; he->nexthop_linkable = 0; he->igp_metric = 0; @@ -2662,7 +3687,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) rta *a = e->rte.attrs; pxlen = n->n.addr->pxlen; - if (a->hostentry) + if (ea_find(a->eattrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -2670,9 +3695,12 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_UNICAST) - { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + + if (NEXTHOP_IS_REACHABLE(nhad)) + NEXTHOP_WALK(nh, nhad) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) @@ -2685,10 +3713,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) direct++; } - } he->src = rta_clone(a); - he->dest = a->dest; he->nexthop_linkable = !direct; he->igp_metric = rt_get_igp_metric(&e->rte); } @@ -2728,7 +3754,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; diff --git a/nest/rt.h b/nest/rt.h new file mode 100644 index 00000000..eb868aa7 --- /dev/null +++ b/nest/rt.h @@ -0,0 +1,486 @@ +/* + * BIRD Internet Routing Daemon -- Routing Table + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NEST_RT_H_ +#define _BIRD_NEST_RT_H_ + +#include "lib/lists.h" +#include "lib/bitmap.h" +#include "lib/resource.h" +#include "lib/net.h" +#include "lib/type.h" +#include "lib/fib.h" +#include "lib/route.h" + +struct ea_list; +struct protocol; +struct proto; +struct channel; +struct rte_src; +struct symbol; +struct timer; +struct filter; +struct f_trie; +struct f_trie_walk_state; +struct cli; + +/* + * Master Routing Tables. Generally speaking, each of them contains a FIB + * with each entry pointing to a list of route entries representing routes + * to given network (with the selected one at the head). + * + * Each of the RTE's contains variable data (the preference and protocol-dependent + * metrics) and a pointer to a route attribute block common for many routes). + * + * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. + */ + +struct rtable_config { + node n; + char *name; + struct rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int gc_max_ops; /* Maximum number of operations before GC is run */ + int gc_min_time; /* Minimum time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + byte internal; /* Internal table of a protocol */ + byte trie_used; /* Rtable has attached trie */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ +}; + +typedef struct rtable { + resource r; + node n; /* Node in list of all tables */ + pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ + struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ + char *name; /* Name of this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int use_count; /* Number of protocols using this table */ + u32 rt_count; /* Number of routes in the table */ + + list imports; /* Registered route importers */ + list exports; /* Registered route exporters */ + + struct hmap id_map; + struct hostcache *hostcache; + struct rtable_config *config; /* Configuration of this table */ + struct config *deleted; /* Table doesn't exist in current configuration, + * delete as soon as use_count becomes 0 and remove + * obstacle from this routing table. + */ + struct event *rt_event; /* Routing table event */ + btime last_rt_change; /* Last time when route changed */ + btime base_settle_time; /* Start time of rtable settling interval */ + btime gc_time; /* Time of last GC */ + int gc_counter; /* Number of operations since last GC */ + byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte nhu_state; /* Next Hop Update state */ + byte internal; /* This table is internal for some other object */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + list subscribers; /* Subscribers for notifications */ + struct timer *settle_timer; /* Settle time for notifications */ + list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ + struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +} rtable; + +struct rt_subscription { + node n; + rtable *tab; + void (*hook)(struct rt_subscription *b); + void *data; +}; + +struct rt_flowspec_link { + node n; + rtable *src; + rtable *dst; + u32 uc; +}; + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +typedef struct network { + struct rte_storage *routes; /* Available routes for this network */ + struct fib_node n; /* FIB flags reserved for kernel syncer */ +} net; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + byte update_hostcache; +}; + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + struct rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + struct rta *src; /* Source rta entry */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); + struct rte *(*rte_modify)(struct rte *, struct linpool *); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { + struct rte_storage *new, *new_best, *old, *old_best; +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + u8 trace_routes; + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + rtable *table; /* The connected table */ + + pool *pool; + linpool *lp; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + u8 export_state; /* Route export state (TES_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_HUNGRY 1 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(rtable *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + +/* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ + +/* Return value of preexport() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + +extern list routing_tables; +struct config; + +void rt_init(void); +void rt_preconfig(struct config *); +void rt_commit(struct config *new, struct config *old); +void rt_lock_table(rtable *); +void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); +void rt_subscribe(rtable *tab, struct rt_subscription *s); +void rt_unsubscribe(struct rt_subscription *s); +void rt_flowspec_link(rtable *src, rtable *dst); +void rt_flowspec_unlink(rtable *src, rtable *dst); +rtable *rt_setup(pool *, struct rtable_config *); +static inline void rt_shutdown(rtable *r) { rfree(r->rp); } + +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(rtable *t, struct rt_import_request *); +void rt_refresh_end(rtable *t, struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); +void rt_schedule_prune(rtable *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rt_dump(rtable *); +void rt_dump_all(void); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); +void rt_prune_sync(rtable *t, int all); +int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + + +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; + +struct rt_show_data_rtable { + node n; + rtable *table; + struct channel *export_channel; +}; + +struct rt_show_data { + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct fib_iterator fit; /* Iterator over networks in table */ + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + int verbose, tables_defined_by; + const struct filter *filter; + struct proto *show_protocol; + struct proto *export_protocol; + struct channel *export_channel; + struct config *running_on_config; + struct krt_proto *kernel; + struct rt_export_hook *kernel_export_hook; + int export_mode, addr_mode, primary_only, filtered, stats; + + int table_open; /* Iteration (fit) is open */ + int trie_walk; /* Current table is iterated using trie */ + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; +}; + +void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ + +/* Value of addr_mode */ +#define RSD_ADDR_EQUAL 1 /* Exact query - show route <addr> */ +#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ +#define RSD_ADDR_IN 3 /* Interval query - show route in <addr> */ + +/* Value of export_mode in struct rt_show_data */ +#define RSEM_NONE 0 /* Export mode not used */ +#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ +#define RSEM_EXPORT 2 /* Routes accepted by export filter */ +#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ +#define RSEM_EXPORTED 4 /* Routes marked in export map */ + +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; + +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); + +/* +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, u32 lnum, u32 labels[lnum]); + +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), lnum, labels); +} +*/ + +/* + * Default protocol preferences + */ + +#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_STATIC 200 /* Static route */ +#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ +#define DEF_PREF_RIP 120 /* RIP */ +#define DEF_PREF_BGP 100 /* BGP */ +#define DEF_PREF_RPKI 100 /* RPKI */ +#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ + +/* + * Route Origin Authorization + */ + +#define ROA_UNKNOWN 0 +#define ROA_VALID 1 +#define ROA_INVALID 2 + +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); + +#endif |