diff options
Diffstat (limited to 'nest')
-rw-r--r-- | nest/Makefile | 13 | ||||
-rw-r--r-- | nest/a-path.c | 936 | ||||
-rw-r--r-- | nest/a-path_test.c | 220 | ||||
-rw-r--r-- | nest/a-set.c | 743 | ||||
-rw-r--r-- | nest/a-set_test.c | 259 | ||||
-rw-r--r-- | nest/attrs.h | 235 | ||||
-rw-r--r-- | nest/bird.h | 1 | ||||
-rw-r--r-- | nest/cli.c | 8 | ||||
-rw-r--r-- | nest/cli.h | 4 | ||||
-rw-r--r-- | nest/cmds.c | 28 | ||||
-rw-r--r-- | nest/config.Y | 65 | ||||
-rw-r--r-- | nest/limit.h | 49 | ||||
-rw-r--r-- | nest/neighbor.c | 2 | ||||
-rw-r--r-- | nest/proto-hooks.c | 40 | ||||
-rw-r--r-- | nest/proto.c | 1005 | ||||
-rw-r--r-- | nest/protocol.h | 222 | ||||
-rw-r--r-- | nest/route.h | 792 | ||||
-rw-r--r-- | nest/rt-attr.c | 1146 | ||||
-rw-r--r-- | nest/rt-dev.c | 38 | ||||
-rw-r--r-- | nest/rt-fib.c | 4 | ||||
-rw-r--r-- | nest/rt-show.c | 396 | ||||
-rw-r--r-- | nest/rt-table.c | 3385 | ||||
-rw-r--r-- | nest/rt.h | 589 |
23 files changed, 4170 insertions, 6010 deletions
diff --git a/nest/Makefile b/nest/Makefile index 884d3950..39617350 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,8 +1,17 @@ -src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +src := cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,dev_build) -tests_src := a-set_test.c a-path_test.c +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) + $(E)echo GEN $@ + $(Q)echo "#include \"lib/birdlib.h\"" > $@ + $(Q)$(patsubst %,echo 'void %(void);' >> $@;,$(PROTO_BUILD)) + $(Q)echo "void protos_build_gen(void) {" >> $@ + $(Q)$(patsubst %,echo ' %();'>>$@;,$(PROTO_BUILD)) + $(Q)echo "}" >> $@ + +tests_src := tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/nest/a-path.c deleted file mode 100644 index 6bb18285..00000000 --- a/nest/a-path.c +++ /dev/null @@ -1,936 +0,0 @@ -/* - * BIRD -- Path Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * (c) 2000 Pavel Machek <pavel@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "lib/unaligned.h" -#include "lib/string.h" -#include "filter/data.h" - -// static inline void put_as(byte *data, u32 as) { put_u32(data, as); } -// static inline u32 get_as(byte *data) { return get_u32(data); } - -#define put_as put_u32 -#define get_as get_u32 -#define BS 4 /* Default block size of ASN (autonomous system number) */ - -#define BAD(DSC, VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; }) - -int -as_path_valid(byte *data, uint len, int bs, int sets, int confed, char *err, uint elen) -{ - byte *pos = data; - char *err_dsc = NULL; - uint err_val = 0; - - while (len) - { - if (len < 2) - BAD("segment framing error", 0); - - /* Process one AS path segment */ - uint type = pos[0]; - uint slen = 2 + bs * pos[1]; - - if (len < slen) - BAD("segment framing error", len); - - switch (type) - { - case AS_PATH_SET: - if (!sets) - BAD("AS_SET segment", type); - break; - - case AS_PATH_SEQUENCE: - break; - - case AS_PATH_CONFED_SEQUENCE: - if (!confed) - BAD("AS_CONFED_SEQUENCE segment", type); - break; - - case AS_PATH_CONFED_SET: - if (!sets || !confed) - BAD("AS_CONFED_SET segment", type); - break; - - default: - BAD("unknown segment", type); - } - - if (pos[1] == 0) - BAD("zero-length segment", type); - - pos += slen; - len -= slen; - } - - return 1; - -bad: - if (err) - if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0) - err[0] = 0; - - return 0; -} - -int -as_path_16to32(byte *dst, const byte *src, uint len) -{ - byte *dst0 = dst; - const byte *end = src + len; - uint i, n; - - while (src < end) - { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; - - for (i = 0; i < n; i++) - { - put_u32(dst, get_u16(src)); - src += 2; - dst += 4; - } - } - - return dst - dst0; -} - -int -as_path_32to16(byte *dst, const byte *src, uint len) -{ - byte *dst0 = dst; - const byte *end = src + len; - uint i, n; - - while (src < end) - { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; - - for (i = 0; i < n; i++) - { - put_u16(dst, get_u32(src)); - src += 4; - dst += 2; - } - } - - return dst - dst0; -} - -int -as_path_contains_as4(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - uint i, n; - - while (pos < end) - { - n = pos[1]; - pos += 2; - - for (i = 0; i < n; i++) - { - if (get_as(pos) > 0xFFFF) - return 1; - - pos += BS; - } - } - - return 0; -} - -int -as_path_contains_confed(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint type = pos[0]; - uint slen = 2 + BS * pos[1]; - - if ((type == AS_PATH_CONFED_SEQUENCE) || - (type == AS_PATH_CONFED_SET)) - return 1; - - pos += slen; - } - - return 0; -} - -struct adata * -as_path_strip_confed(struct linpool *pool, const struct adata *path) -{ - struct adata *res = lp_alloc_adata(pool, path->length); - const byte *src = path->data; - const byte *end = src + path->length; - byte *dst = res->data; - - while (src < end) - { - uint type = src[0]; - uint slen = 2 + BS * src[1]; - - /* Copy regular segments */ - if ((type == AS_PATH_SET) || (type == AS_PATH_SEQUENCE)) - { - memcpy(dst, src, slen); - dst += slen; - } - - src += slen; - } - - /* Fix the result length */ - res->length = dst - res->data; - - return res; -} - -struct adata * -as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as) -{ - struct adata *np; - const byte *pos = op->data; - uint len = op->length; - - if (len && (pos[0] == seq) && (pos[1] < 255)) - { - /* Starting with matching segment => just prepend the AS number */ - np = lp_alloc_adata(pool, len + BS); - np->data[0] = seq; - np->data[1] = pos[1] + 1; - put_as(np->data + 2, as); - - uint dlen = BS * pos[1]; - memcpy(np->data + 2 + BS, pos + 2, dlen); - ADVANCE(pos, len, 2 + dlen); - } - else - { - /* Create a new path segment */ - np = lp_alloc_adata(pool, len + 2 + BS); - np->data[0] = seq; - np->data[1] = 1; - put_as(np->data + 2, as); - } - - if (len) - { - byte *dst = np->data + 2 + BS * np->data[1]; - - memcpy(dst, pos, len); - } - - return np; -} - - -struct adata * -as_path_to_old(struct linpool *pool, const struct adata *path) -{ - struct adata *res = lp_alloc_adata(pool, path->length); - byte *pos = res->data; - byte *end = pos + res->length; - uint i, n; - u32 as; - - /* Copy the whole path */ - memcpy(res->data, path->data, path->length); - - /* Replace 32-bit AS numbers with AS_TRANS */ - while (pos < end) - { - n = pos[1]; - pos += 2; - - for (i = 0; i < n; i++) - { - as = get_as(pos); - if (as > 0xFFFF) - put_as(pos, AS_TRANS); - - pos += BS; - } - } - - return res; -} - -/* - * Cut the path to the length @num, measured to the usual path metric. Note that - * AS_CONFED_* segments have zero length and must be added if they are on edge. - */ -struct adata * -as_path_cut(struct linpool *pool, const struct adata *path, uint num) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint t = pos[0]; - uint l = pos[1]; - uint n = 0; - - switch (t) - { - case AS_PATH_SET: n = 1; break; - case AS_PATH_SEQUENCE: n = l; break; - case AS_PATH_CONFED_SEQUENCE: n = 0; break; - case AS_PATH_CONFED_SET: n = 0; break; - default: bug("as_path_cut: Invalid path segment"); - } - - /* Cannot add whole segment, so try partial one and finish */ - if (num < n) - { - const byte *nend = pos; - if (num) - nend += 2 + BS * num; - - struct adata *res = lp_alloc_adata(pool, path->length); - res->length = nend - (const byte *) path->data; - memcpy(res->data, path->data, res->length); - - if (num) - { - byte *dpos = ((byte *) res->data) + (pos - (const byte *) path->data); - dpos[1] = num; - } - - return res; - } - - num -= n; - pos += 2 + BS * l; - } - - struct adata *res = lp_alloc_adata(pool, path->length); - res->length = path->length; - memcpy(res->data, path->data, res->length); - return res; -} - -/* - * Merge (concatenate) paths @p1 and @p2 and return the result. - * In contrast to other as_path_* functions, @p1 and @p2 may be reused. - */ -const struct adata * -as_path_merge(struct linpool *pool, const struct adata *p1, const struct adata *p2) -{ - if (p1->length == 0) - return p2; - - if (p2->length == 0) - return p1; - - struct adata *res = lp_alloc_adata(pool, p1->length + p2->length); - memcpy(res->data, p1->data, p1->length); - memcpy(res->data + p1->length, p2->data, p2->length); - - return res; -} - -void -as_path_format(const struct adata *path, byte *bb, uint size) -{ - buffer buf = { .start = bb, .pos = bb, .end = bb + size }, *b = &buf; - const byte *pos = path->data; - const byte *end = pos + path->length; - const char *ops, *cls; - - b->pos[0] = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: ops = "{"; cls = "}"; break; - case AS_PATH_SEQUENCE: ops = NULL; cls = NULL; break; - case AS_PATH_CONFED_SEQUENCE: ops = "("; cls = ")"; break; - case AS_PATH_CONFED_SET: ops = "({"; cls = "})"; break; - default: bug("Invalid path segment"); - } - - if (ops) - buffer_puts(b, ops); - - while (len--) - { - buffer_print(b, len ? "%u " : "%u", get_as(pos)); - pos += BS; - } - - if (cls) - buffer_puts(b, cls); - - if (pos < end) - buffer_puts(b, " "); - } - - /* Handle overflow */ - if (b->pos == b->end) - strcpy(b->end - 12, "..."); -} - -int -as_path_getlen(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - uint res = 0; - - while (pos < end) - { - uint t = pos[0]; - uint l = pos[1]; - uint n = 0; - - switch (t) - { - case AS_PATH_SET: n = 1; break; - case AS_PATH_SEQUENCE: n = l; break; - case AS_PATH_CONFED_SEQUENCE: n = 0; break; - case AS_PATH_CONFED_SET: n = 0; break; - default: bug("as_path_getlen: Invalid path segment"); - } - - res += n; - pos += 2 + BS * l; - } - - return res; -} - -int -as_path_get_last(const struct adata *path, u32 *orig_as) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - int found = 0; - u32 val = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - if (!len) - continue; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - found = 0; - break; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - val = get_as(pos + BS * (len - 1)); - found = 1; - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - if (found) - *orig_as = val; - return found; -} - -u32 -as_path_get_last_nonaggregated(const struct adata *path) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - u32 val = 0; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - if (!len) - continue; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - return val; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - val = get_as(pos + BS * (len - 1)); - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - return val; -} - -int -as_path_get_first(const struct adata *path, u32 *last_as) -{ - const u8 *p = path->data; - - if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) - return 0; - - *last_as = get_as(p+2); - return 1; -} - -int -as_path_get_first_regular(const struct adata *path, u32 *last_as) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: - return 0; - - case AS_PATH_SEQUENCE: - if (len == 0) - return 0; - - *last_as = get_as(pos); - return 1; - - case AS_PATH_CONFED_SEQUENCE: - case AS_PATH_CONFED_SET: - break; - - default: - bug("Invalid path segment"); - } - - pos += BS * len; - } - - return 0; -} - -int -as_path_contains(const struct adata *path, u32 as, int min) -{ - const u8 *p = path->data; - const u8 *q = p+path->length; - int num = 0; - int i, n; - - while (p<q) - { - n = p[1]; - p += 2; - for(i=0; i<n; i++) - { - if (get_as(p) == as) - if (++num == min) - return 1; - p += BS; - } - } - return 0; -} - -int -as_path_match_set(const struct adata *path, const struct f_tree *set) -{ - const u8 *p = path->data; - const u8 *q = p+path->length; - int i, n; - - while (p<q) - { - n = p[1]; - p += 2; - for (i=0; i<n; i++) - { - struct f_val v = {T_INT, .val.i = get_as(p)}; - if (find_tree(set, &v)) - return 1; - p += BS; - } - } - - return 0; -} - -const struct adata * -as_path_filter(struct linpool *pool, const struct adata *path, const struct f_val *set, int pos) -{ - ASSERT((set->type == T_SET) || (set->type == T_INT)); - - if (!path) - return NULL; - - int len = path->length; - const u8 *p = path->data; - const u8 *q = path->data + len; - u8 *d, *d2; - int i, bt, sn, dn; - u8 buf[len]; - - d = buf; - while (p<q) - { - /* Read block header (type and length) */ - bt = p[0]; - sn = p[1]; - dn = 0; - p += 2; - d2 = d + 2; - - for (i = 0; i < sn; i++) - { - u32 as = get_as(p); - int match; - - if (set->type == T_SET) - { - struct f_val v = {T_INT, .val.i = as}; - match = !!find_tree(set->val.t, &v); - } - else /* T_INT */ - match = (as == set->val.i); - - if (match == pos) - { - put_as(d2, as); - d2 += BS; - dn++; - } - - p += BS; - } - - if (dn > 0) - { - /* Nonempty block, set block header and advance */ - d[0] = bt; - d[1] = dn; - d = d2; - } - } - - uint nl = d - buf; - if (nl == path->length) - return path; - - struct adata *res = lp_alloc(pool, sizeof(struct adata) + nl); - res->length = nl; - memcpy(res->data, buf, nl); - - return res; -} - -int -as_path_walk(const struct adata *path, uint *pos, uint *val) -{ - if (!path) - return 0; - - const u8 *p = path->data; - const u8 *q = p + path->length; - uint n, x = *pos; - - while (p < q) - { - n = p[1]; - p += 2; - - if (x < n) - { - *val = get_as(p + x * BS); - *pos += 1; - return 1; - } - - p += n * BS; - x -= n; - } - - return 0; -} - - -struct pm_pos -{ - u8 set; - u8 mark; - union - { - const char *sp; - u32 asn; - } val; -}; - -static int -parse_path(const struct adata *path, struct pm_pos *pp) -{ - const byte *pos = path->data; - const byte *end = pos + path->length; - struct pm_pos *op = pp; - uint i; - - while (pos < end) - { - uint type = pos[0]; - uint len = pos[1]; - pos += 2; - - switch (type) - { - case AS_PATH_SET: - case AS_PATH_CONFED_SET: - pp->set = 1; - pp->mark = 0; - pp->val.sp = pos - 1; - pp++; - - pos += BS * len; - break; - - case AS_PATH_SEQUENCE: - case AS_PATH_CONFED_SEQUENCE: - for (i = 0; i < len; i++) - { - pp->set = 0; - pp->mark = 0; - pp->val.asn = get_as(pos); - pp++; - - pos += BS; - } - break; - - default: - bug("Invalid path segment"); - } - } - - return pp - op; -} - -static int -pm_match_val(const struct pm_pos *pos, u32 asn, u32 asn2) -{ - u32 gas; - if (! pos->set) - return ((pos->val.asn >= asn) && (pos->val.asn <= asn2)); - - const u8 *p = pos->val.sp; - int len = *p++; - int i; - - for (i = 0; i < len; i++) - { - gas = get_as(p + i * BS); - - if ((gas >= asn) && (gas <= asn2)) - return 1; - } - - return 0; -} - -static int -pm_match_set(const struct pm_pos *pos, const struct f_tree *set) -{ - struct f_val asn = { .type = T_INT }; - - if (! pos->set) - { - asn.val.i = pos->val.asn; - return !!find_tree(set, &asn); - } - - const u8 *p = pos->val.sp; - int len = *p++; - int i; - - for (i = 0; i < len; i++) - { - asn.val.i = get_as(p + i * BS); - if (find_tree(set, &asn)) - return 1; - } - - return 0; -} - -static inline int -pm_match(const struct pm_pos *pos, const struct f_path_mask_item *mask, u32 asn, u32 asn2) -{ - return ((mask->kind == PM_QUESTION) || - ((mask->kind != PM_ASN_SET) ? - pm_match_val(pos, asn, asn2) : - pm_match_set(pos, mask->set))); -} - -static void -pm_mark(struct pm_pos *pos, int *i, int plen, int *nl, int *nh) -{ - int j = *i; - - if (pos[j].set) - do { pos[j].mark = 1; j++; } - while ((j < plen) && pos[j].set); - else - j++; - - pos[j].mark = 1; - - /* Update low, high based on first and last marked pos */ - int l = pos[*i].set ? *i : j; - - *nl = (*nl < 0) ? l : MIN(*nl, l); - *nh = MAX(*nh, j); - *i = j; -} - -/* AS path matching is nontrivial. Because AS path can - * contain sets, it is not a plain wildcard matching. A set - * in an AS path is interpreted as it might represent any - * sequence of AS numbers from that set (possibly with - * repetitions). So it is also a kind of a pattern, - * more complicated than a path mask. - * - * The algorithm for AS path matching is a variant - * of nondeterministic finite state machine, where - * positions in AS path are states, and items in - * path mask are input for that finite state machine. - * During execution of the algorithm we maintain a set - * of marked states - a state is marked if it can be - * reached by any walk through NFSM with regard to - * currently processed part of input. When we process - * next part of mask, we advance each marked state. - * We start with marked first position, when we - * run out of marked positions, we reject. When - * we process the whole mask, we accept if final position - * (auxiliary position after last real position in AS path) - * is marked. - */ -int -as_path_match(const struct adata *path, const struct f_path_mask *mask) -{ - struct pm_pos pos[2048 + 1]; - int plen = parse_path(path, pos); - int l, h, i, nh, nl, last, loop; - u32 val = 0; - u32 val2 = 0; - - /* l and h are bound of interval of positions where - are marked states */ - - pos[plen].set = 0; - pos[plen].mark = 0; - - l = h = loop = 0; - pos[0].mark = 1; - - for (uint m=0; m < mask->len; m++) - { - /* We remove this mark to not step after pos[plen] */ - pos[plen].mark = 0; - - switch (mask->item[m].kind) - { - case PM_ASTERISK: - for (i = l; i <= plen; i++) - pos[i].mark = 1; - h = plen; - break; - - case PM_LOOP: - loop = 1; - break; - - case PM_ASN: /* Define single ASN as ASN..ASN - very narrow interval */ - val2 = val = mask->item[m].asn; - goto step; - case PM_ASN_EXPR: - bug("Expressions should be evaluated on AS path mask construction."); - case PM_ASN_RANGE: - val = mask->item[m].from; - val2 = mask->item[m].to; - goto step; - case PM_QUESTION: - case PM_ASN_SET: - step: - nh = nl = -1; - last = plen; - for (i = h; i >= l; i--) - if (pos[i].mark) - { - pos[i].mark = 0; - int j = i; - - match: - if (pm_match(pos + j, &mask->item[m], val, val2)) - { - pm_mark(pos, &j, plen, &nl, &nh); - if (loop && (j < last)) - goto match; - } - - last = i; - } - - if (nh < 0) - return 0; - - h = nh; - l = nl; - loop = 0; - break; - } - } - - return pos[plen].mark; -} diff --git a/nest/a-path_test.c b/nest/a-path_test.c deleted file mode 100644 index 97924c00..00000000 --- a/nest/a-path_test.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * BIRD -- Path Operations Tests - * - * (c) 2015 CZ.NIC z.s.p.o. - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "test/birdtest.h" -#include "test/bt-utils.h" - -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "filter/data.h" - -#define TESTS_NUM 30 -#define AS_PATH_LENGTH 1000 - -#if AS_PATH_LENGTH > AS_PATH_MAXLEN -#warning "AS_PATH_LENGTH should be <= AS_PATH_MAXLEN" -#endif - -static int -t_as_path_match(void) -{ - resource_init(); - - int round; - for (round = 0; round < TESTS_NUM; round++) - { - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - u32 first_prepended, last_prepended; - first_prepended = last_prepended = 0; - struct linpool *lp = lp_new_default(&root_pool); - - struct f_path_mask *mask = alloca(sizeof(struct f_path_mask) + AS_PATH_LENGTH * sizeof(struct f_path_mask_item)); - mask->len = AS_PATH_LENGTH; - for (int i = AS_PATH_LENGTH - 1; i >= 0; i--) - { - u32 val = bt_random(); - as_path = as_path_prepend(lp, as_path, val); - bt_debug("Prepending ASN: %10u \n", val); - - if (i == 0) - last_prepended = val; - if (i == AS_PATH_LENGTH-1) - first_prepended = val; - - mask->item[i].kind = PM_ASN; - mask->item[i].asn = val; - } - - bt_assert_msg(as_path_match(as_path, mask), "Mask should match with AS path"); - - u32 asn; - - bt_assert(as_path_get_first(as_path, &asn)); - bt_assert_msg(asn == last_prepended, "as_path_get_first() should return the last prepended ASN"); - - bt_assert(as_path_get_last(as_path, &asn)); - bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); - - rfree(lp); - } - - return 1; -} - -static int -t_path_format(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); - - uint i; - for (i = 4294967285; i <= 4294967294; i++) - { - as_path = as_path_prepend(lp, as_path, i); - bt_debug("Prepending ASN: %10u \n", i); - } - -#define BUFFER_SIZE 120 - byte buf[BUFFER_SIZE] = {}; - - as_path_format(&empty_as_path, buf, BUFFER_SIZE); - bt_assert_msg(strcmp(buf, "") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); - - as_path_format(as_path, buf, BUFFER_SIZE); - bt_assert_msg(strcmp(buf, "4294967294 4294967293 4294967292 4294967291 4294967290 4294967289 4294967288 4294967287 4294967286 4294967285") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); - -#define SMALL_BUFFER_SIZE 25 - byte buf2[SMALL_BUFFER_SIZE] = {}; - as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); - bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); - - rfree(lp); - - return 1; -} - -static int -count_asn_in_array(const u32 *array, u32 asn) -{ - int counts_of_contains = 0; - int u; - for (u = 0; u < AS_PATH_LENGTH; u++) - if (array[u] == asn) - counts_of_contains++; - return counts_of_contains; -} - -static int -t_path_include(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); - - u32 as_nums[AS_PATH_LENGTH] = {}; - int i; - for (i = 0; i < AS_PATH_LENGTH; i++) - { - u32 val = bt_random(); - as_nums[i] = val; - as_path = as_path_prepend(lp, as_path, val); - } - - for (i = 0; i < AS_PATH_LENGTH; i++) - { - int counts_of_contains = count_asn_in_array(as_nums, as_nums[i]); - bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); - - struct f_val v = { .type = T_INT, .val.i = as_nums[i] }; - bt_assert(as_path_filter(lp, as_path, &v, 0) != NULL); - bt_assert(as_path_filter(lp, as_path, &v, 1) != NULL); - } - - for (i = 0; i < 10000; i++) - { - u32 test_val = bt_random(); - int counts_of_contains = count_asn_in_array(as_nums, test_val); - int result = as_path_contains(as_path, test_val, (counts_of_contains == 0 ? 1 : counts_of_contains)); - - if (counts_of_contains) - bt_assert_msg(result, "As path should contain %d-times the number %u", counts_of_contains, test_val); - else - bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); - } - - rfree(lp); - - return 1; -} - -#if 0 -static int -t_as_path_converting(void) -{ - resource_init(); - - struct adata empty_as_path = {}; - struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); -#define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 - - int i; - for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) - as_path = as_path_prepend(lp, as_path, i); - - bt_debug("data length: %u \n", as_path->length); - - byte buffer[100] = {}; - int used_size = as_path_convert_to_new(as_path, buffer, AS_PATH_LENGTH_FOR_CONVERTING_TEST-1); - bt_debug("as_path_convert_to_new: len %d \n%s\n", used_size, buffer); - for (i = 0; i < used_size; i++) - { - bt_debug("\\03%d", buffer[i]); - } - bt_debug("\n"); - bt_assert(memcmp(buffer, - "\032\039\030\030\030\030\030\030\030\039\030\030\030\030\030\030\030\038\030\030\030\030\030\030" - "\030\037\030\030\030\030\030\030\030\036\030\030\030\030", - 38)); - - bzero(buffer, sizeof(buffer)); - int new_used; - used_size = as_path_convert_to_old(as_path, buffer, &new_used); - bt_debug("as_path_convert_to_old: len %d, new_used: %d \n", used_size, new_used); - for (i = 0; i < used_size; i++) - { - bt_debug("\\03%d", buffer[i]); - } - bt_debug("\n"); - bt_assert(memcmp(buffer, - "\032\0310\030\039\030\038\030\037\030\036\030\035\030\034\030\033\030\032\030\031\030\030", - 22)); - - return 1; -} -#endif - -int -main(int argc, char *argv[]) -{ - bt_init(argc, argv); - - bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); - bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); - bt_test_suite(t_path_include, "Testing including a AS number in AS path"); - // bt_test_suite(t_as_path_converting, "Testing as_path_convert_to_*() output constancy"); - - return bt_exit_value(); -} diff --git a/nest/a-set.c b/nest/a-set.c deleted file mode 100644 index 40ed573c..00000000 --- a/nest/a-set.c +++ /dev/null @@ -1,743 +0,0 @@ -/* - * BIRD -- Set/Community-list Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * (c) 2000 Pavel Machek <pavel@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include <stdlib.h> - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" -#include "lib/string.h" - -/** - * int_set_format - format an &set for printing - * @set: set attribute to be formatted - * @way: style of format (0 for router ID list, 1 for community list) - * @from: starting position in set - * @buf: destination buffer - * @size: size of buffer - * - * This function takes a set attribute and formats it. @way specifies - * the style of format (router ID / community). @from argument can be - * used to specify the first printed value for the purpose of printing - * untruncated sets even with smaller buffers. If the output fits in - * the buffer, 0 is returned, otherwise the position of the first not - * printed item is returned. This value can be used as @from argument - * in subsequent calls. If truncated output suffices, -1 can be - * instead used as @from, in that case " ..." is eventually added at - * the buffer to indicate truncation. - */ -int -int_set_format(const struct adata *set, int way, int from, byte *buf, uint size) -{ - u32 *z = (u32 *) set->data; - byte *end = buf + size - 24; - int from2 = MAX(from, 0); - int to = set->length / 4; - int i; - - for (i = from2; i < to; i++) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, " ..."); - else - *buf = 0; - return i; - } - - if (i > from2) - *buf++ = ' '; - - if (way) - buf += bsprintf(buf, "(%d,%d)", z[i] >> 16, z[i] & 0xffff); - else - buf += bsprintf(buf, "%R", z[i]); - } - *buf = 0; - return 0; -} - -int -ec_format(byte *buf, u64 ec) -{ - u32 type, key, val; - char tbuf[16]; - const char *kind; - - type = ec >> 48; - kind = ec_subtype_str(type & 0xf0ff); - - if (!kind) { - bsprintf(tbuf, "unknown 0x%x", type); - kind = tbuf; - } - - switch (ec >> 56) - { - /* RFC 4360 3.1. Two-Octet AS Specific Extended Community */ - case 0x00: - case 0x40: - key = (ec >> 32) & 0xFFFF; - val = ec; - return bsprintf(buf, "(%s, %u, %u)", kind, key, val); - - /* RFC 4360 3.2. IPv4 Address Specific Extended Community */ - case 0x01: - case 0x41: - key = ec >> 16; - val = ec & 0xFFFF; - return bsprintf(buf, "(%s, %R, %u)", kind, key, val); - - /* RFC 5668 4-Octet AS Specific BGP Extended Community */ - case 0x02: - case 0x42: - key = ec >> 16; - val = ec & 0xFFFF; - return bsprintf(buf, "(%s, %u, %u)", kind, key, val); - - /* Generic format for unknown kinds of extended communities */ - default: - key = ec >> 32; - val = ec; - return bsprintf(buf, "(generic, 0x%x, 0x%x)", key, val); - } - -} - -int -ec_set_format(const struct adata *set, int from, byte *buf, uint size) -{ - u32 *z = int_set_get_data(set); - byte *end = buf + size - 64; - int from2 = MAX(from, 0); - int to = int_set_get_size(set); - int i; - - for (i = from2; i < to; i += 2) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, " ..."); - else - *buf = 0; - return i; - } - - if (i > from2) - *buf++ = ' '; - - buf += ec_format(buf, ec_get(z, i)); - } - *buf = 0; - return 0; -} - -int -lc_format(byte *buf, lcomm lc) -{ - return bsprintf(buf, "(%u, %u, %u)", lc.asn, lc.ldp1, lc.ldp2); -} - -int -lc_set_format(const struct adata *set, int from, byte *buf, uint bufsize) -{ - u32 *d = (u32 *) set->data; - byte *end = buf + bufsize - 64; - int from2 = MAX(from, 0); - int to = set->length / 4; - int i; - - for (i = from2; i < to; i += 3) - { - if (buf > end) - { - if (from < 0) - strcpy(buf, "..."); - else - buf[-1] = 0; - return i; - } - - buf += bsprintf(buf, "(%u, %u, %u)", d[i], d[i+1], d[i+2]); - *buf++ = ' '; - } - - if (i != from2) - buf--; - - *buf = 0; - return 0; -} - -int -int_set_contains(const struct adata *list, u32 val) -{ - if (!list) - return 0; - - u32 *l = (u32 *) list->data; - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i++) - if (*l++ == val) - return 1; - - return 0; -} - -int -ec_set_contains(const struct adata *list, u64 val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - u32 eh = ec_hi(val); - u32 el = ec_lo(val); - int i; - - for (i=0; i < len; i += 2) - if (l[i] == eh && l[i+1] == el) - return 1; - - return 0; -} - -int -lc_set_contains(const struct adata *list, lcomm val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i += 3) - if (lc_match(l, i, val)) - return 1; - - return 0; -} - -const struct adata * -int_set_prepend(struct linpool *pool, const struct adata *list, u32 val) -{ - struct adata *res; - int len; - - if (int_set_contains(list, val)) - return list; - - len = list ? list->length : 0; - res = lp_alloc(pool, sizeof(struct adata) + len + 4); - res->length = len + 4; - - if (list) - memcpy(res->data + 4, list->data, list->length); - - * (u32 *) res->data = val; - - return res; -} - -const struct adata * -int_set_add(struct linpool *pool, const struct adata *list, u32 val) -{ - struct adata *res; - int len; - - if (int_set_contains(list, val)) - return list; - - len = list ? list->length : 0; - res = lp_alloc(pool, sizeof(struct adata) + len + 4); - res->length = len + 4; - - if (list) - memcpy(res->data, list->data, list->length); - - * (u32 *) (res->data + len) = val; - - return res; -} - -const struct adata * -ec_set_add(struct linpool *pool, const struct adata *list, u64 val) -{ - if (ec_set_contains(list, val)) - return list; - - int olen = list ? list->length : 0; - struct adata *res = lp_alloc(pool, sizeof(struct adata) + olen + 8); - res->length = olen + 8; - - if (list) - memcpy(res->data, list->data, list->length); - - u32 *l = (u32 *) (res->data + olen); - l[0] = ec_hi(val); - l[1] = ec_lo(val); - - return res; -} - -const struct adata * -lc_set_add(struct linpool *pool, const struct adata *list, lcomm val) -{ - if (lc_set_contains(list, val)) - return list; - - int olen = list ? list->length : 0; - struct adata *res = lp_alloc(pool, sizeof(struct adata) + olen + LCOMM_LENGTH); - res->length = olen + LCOMM_LENGTH; - - if (list) - memcpy(res->data, list->data, list->length); - - lc_put((u32 *) (res->data + olen), val); - - return res; -} - -const struct adata * -int_set_del(struct linpool *pool, const struct adata *list, u32 val) -{ - if (!int_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - 4); - res->length = list->length - 4; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - int i; - - for (i = 0; i < len; i++) - if (l[i] != val) - *k++ = l[i]; - - return res; -} - -const struct adata * -ec_set_del(struct linpool *pool, const struct adata *list, u64 val) -{ - if (!ec_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - 8); - res->length = list->length - 8; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - u32 eh = ec_hi(val); - u32 el = ec_lo(val); - int i; - - for (i=0; i < len; i += 2) - if (! (l[i] == eh && l[i+1] == el)) - { - *k++ = l[i]; - *k++ = l[i+1]; - } - - return res; -} - -const struct adata * -lc_set_del(struct linpool *pool, const struct adata *list, lcomm val) -{ - if (!lc_set_contains(list, val)) - return list; - - struct adata *res; - res = lp_alloc(pool, sizeof(struct adata) + list->length - LCOMM_LENGTH); - res->length = list->length - LCOMM_LENGTH; - - u32 *l = int_set_get_data(list); - u32 *k = int_set_get_data(res); - int len = int_set_get_size(list); - int i; - - for (i=0; i < len; i += 3) - if (! lc_match(l, i, val)) - k = lc_copy(k, l+i); - - return res; -} - -const struct adata * -int_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i++) - if (!int_set_contains(l1, l[i])) - *k++ = l[i]; - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - -const struct adata * -ec_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i += 2) - if (!ec_set_contains(l1, ec_get(l, i))) - { - *k++ = l[i]; - *k++ = l[i+1]; - } - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - -const struct adata * -lc_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2) -{ - if (!l1) - return l2; - if (!l2) - return l1; - - struct adata *res; - int len = int_set_get_size(l2); - u32 *l = int_set_get_data(l2); - u32 tmp[len]; - u32 *k = tmp; - int i; - - for (i = 0; i < len; i += 3) - if (!lc_set_contains(l1, lc_get(l, i))) - k = lc_copy(k, l+i); - - if (k == tmp) - return l1; - - len = (k - tmp) * 4; - res = lp_alloc(pool, sizeof(struct adata) + l1->length + len); - res->length = l1->length + len; - memcpy(res->data, l1->data, l1->length); - memcpy(res->data + l1->length, tmp, len); - return res; -} - - -struct adata * -ec_set_del_nontrans(struct linpool *pool, const struct adata *set) -{ - adata *res = lp_alloc_adata(pool, set->length); - u32 *src = int_set_get_data(set); - u32 *dst = int_set_get_data(res); - int len = int_set_get_size(set); - int i; - - /* Remove non-transitive communities (EC_TBIT set) */ - for (i = 0; i < len; i += 2) - { - if (src[i] & EC_TBIT) - continue; - - *dst++ = src[i]; - *dst++ = src[i+1]; - } - - res->length = ((byte *) dst) - res->data; - - return res; -} - -static int -int_set_cmp(const void *X, const void *Y) -{ - const u32 *x = X, *y = Y; - return (*x < *y) ? -1 : (*x > *y) ? 1 : 0; -} - -struct adata * -int_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / 4, 4, int_set_cmp); - return dst; -} - -int -int_set_min(const struct adata *list, u32 *val) -{ - if (!list) - return 0; - - u32 *l = (u32 *) list->data; - int len = int_set_get_size(list); - int i; - - if (len < 1) - return 0; - - *val = *l++; - for (i = 1; i < len; i++, l++) - if (int_set_cmp(val, l) > 0) - *val = *l; - - return 1; -} - -int -int_set_max(const struct adata *list, u32 *val) -{ - if (!list) - return 0; - - u32 *l = (u32 *) list->data; - int len = int_set_get_size(list); - int i; - - if (len < 1) - return 0; - - *val = *l++; - for (i = 1; i < len; i++, l++) - if (int_set_cmp(val, l) < 0) - *val = *l; - - return 1; -} - - -static int -ec_set_cmp(const void *X, const void *Y) -{ - u64 x = ec_get(X, 0); - u64 y = ec_get(Y, 0); - return (x < y) ? -1 : (x > y) ? 1 : 0; -} - -struct adata * -ec_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / 8, 8, ec_set_cmp); - return dst; -} - -void -ec_set_sort_x(struct adata *set) -{ - /* Sort in place */ - qsort(set->data, set->length / 8, 8, ec_set_cmp); -} - -int -ec_set_min(const struct adata *list, u64 *val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - if (len < 1) - return 0; - - u32 *res = l; l += 2; - for (i = 2; i < len; i += 2, l += 2) - if (ec_set_cmp(res, l) > 0) - res = l; - - *val = ec_generic(res[0], res[1]); - return 1; -} - -int -ec_set_max(const struct adata *list, u64 *val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - if (len < 1) - return 0; - - u32 *res = l; l += 2; - for (i = 2; i < len; i += 2, l += 2) - if (ec_set_cmp(res, l) < 0) - res = l; - - *val = ec_generic(res[0], res[1]); - return 1; -} - - -static int -lc_set_cmp(const void *X, const void *Y) -{ - const u32 *x = X, *y = Y; - if (x[0] != y[0]) - return (x[0] > y[0]) ? 1 : -1; - if (x[1] != y[1]) - return (x[1] > y[1]) ? 1 : -1; - if (x[2] != y[2]) - return (x[2] > y[2]) ? 1 : -1; - return 0; -} - -struct adata * -lc_set_sort(struct linpool *pool, const struct adata *src) -{ - struct adata *dst = lp_alloc_adata(pool, src->length); - memcpy(dst->data, src->data, src->length); - qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); - return dst; -} - -int -lc_set_min(const struct adata *list, lcomm *val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - if (len < 1) - return 0; - - u32 *res = l; l += 3; - for (i = 3; i < len; i += 3, l += 3) - if (lc_set_cmp(res, l) > 0) - res = l; - - *val = (lcomm) { res[0], res[1], res[2] }; - return 1; -} - -int -lc_set_max(const struct adata *list, lcomm *val) -{ - if (!list) - return 0; - - u32 *l = int_set_get_data(list); - int len = int_set_get_size(list); - int i; - - if (len < 1) - return 0; - - u32 *res = l; l += 3; - for (i = 3; i < len; i += 3, l += 3) - if (lc_set_cmp(res, l) < 0) - res = l; - - *val = (lcomm) { res[0], res[1], res[2] }; - return 1; -} - -int -int_set_walk(const struct adata *list, uint *pos, uint *val) -{ - if (!list) - return 0; - - if (*pos >= (uint) int_set_get_size(list)) - return 0; - - u32 *res = int_set_get_data(list) + *pos; - *val = *res; - *pos += 1; - - return 1; -} - -int -ec_set_walk(const struct adata *list, uint *pos, u64 *val) -{ - if (!list) - return 0; - - if (*pos >= (uint) int_set_get_size(list)) - return 0; - - u32 *res = int_set_get_data(list) + *pos; - *val = ec_generic(res[0], res[1]); - *pos += 2; - - return 1; -} - -int -lc_set_walk(const struct adata *list, uint *pos, lcomm *val) -{ - if (!list) - return 0; - - if (*pos >= (uint) int_set_get_size(list)) - return 0; - - u32 *res = int_set_get_data(list) + *pos; - *val = (lcomm) { res[0], res[1], res[2] }; - *pos += 3; - - return 1; -} diff --git a/nest/a-set_test.c b/nest/a-set_test.c deleted file mode 100644 index 96b6a727..00000000 --- a/nest/a-set_test.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * BIRD -- Set/Community-list Operations Tests - * - * (c) 2015 CZ.NIC z.s.p.o. - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include "test/birdtest.h" -#include "test/bt-utils.h" - -#include "lib/net.h" -#include "nest/route.h" -#include "nest/attrs.h" -#include "lib/resource.h" - -#define SET_SIZE 10 -static const struct adata *set_sequence; /* <0; SET_SIZE) */ -static const struct adata *set_sequence_same; /* <0; SET_SIZE) */ -static const struct adata *set_sequence_higher; /* <SET_SIZE; 2*SET_SIZE) */ -static const struct adata *set_random; - -#define BUFFER_SIZE 1000 -static byte buf[BUFFER_SIZE] = {}; - -#define SET_SIZE_FOR_FORMAT_OUTPUT 10 - -struct linpool *lp; - -enum set_type -{ - SET_TYPE_INT, - SET_TYPE_EC -}; - -static void -generate_set_sequence(enum set_type type, int len) -{ - struct adata empty_as_path = {}; - set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); - - int i; - for (i = 0; i < len; i++) - { - if (type == SET_TYPE_INT) - { - set_sequence = int_set_add(lp, set_sequence, i); - set_sequence_same = int_set_add(lp, set_sequence_same, i); - set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = int_set_add(lp, set_random, bt_random()); - } - else if (type == SET_TYPE_EC) - { - set_sequence = ec_set_add(lp, set_sequence, i); - set_sequence_same = ec_set_add(lp, set_sequence_same, i); - set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); - } - else - bt_abort_msg("This should be unreachable"); - } -} - -/* - * SET INT TESTS - */ - -static int -t_set_int_contains(void) -{ - int i; - - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - bt_assert(int_set_get_size(set_sequence) == SET_SIZE); - - for (i = 0; i < SET_SIZE; i++) - bt_assert(int_set_contains(set_sequence, i)); - bt_assert(int_set_contains(set_sequence, -1) == 0); - bt_assert(int_set_contains(set_sequence, SET_SIZE) == 0); - - int *data = int_set_get_data(set_sequence); - for (i = 0; i < SET_SIZE; i++) - bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); - - rfree(lp); - return 1; -} - -static int -t_set_int_union(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - const struct adata *set_union; - set_union = int_set_union(lp, set_sequence, set_sequence_same); - bt_assert(int_set_get_size(set_union) == SET_SIZE); - bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - - set_union = int_set_union(lp, set_sequence, set_sequence_higher); - bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); - bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - - rfree(lp); - return 1; -} - -static int -t_set_int_format(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE_FOR_FORMAT_OUTPUT); - - bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "0.0.0.0 0.0.0.1 0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); - - bzero(buf, BUFFER_SIZE); - bt_assert(int_set_format(set_sequence, 0, 2, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); - - bzero(buf, BUFFER_SIZE); - bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); - bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); - - rfree(lp); - return 1; -} - -static int -t_set_int_delete(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_INT, SET_SIZE); - - const struct adata *deleting_sequence = set_sequence; - u32 i; - for (i = 0; i < SET_SIZE; i++) - { - deleting_sequence = int_set_del(lp, deleting_sequence, i); - bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), - "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", - int_set_get_size(deleting_sequence), - SET_SIZE-1-i); - } - - bt_assert(int_set_get_size(set_sequence) == SET_SIZE); - - return 1; -} - -/* - * SET EC TESTS - */ - -static int -t_set_ec_contains(void) -{ - u32 i; - - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); - - for (i = 0; i < SET_SIZE; i++) - bt_assert(ec_set_contains(set_sequence, i)); - bt_assert(ec_set_contains(set_sequence, -1) == 0); - bt_assert(ec_set_contains(set_sequence, SET_SIZE) == 0); - -// int *data = ec_set_get_data(set_sequence); -// for (i = 0; i < SET_SIZE; i++) -// bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); - - rfree(lp); - return 1; -} - -static int -t_set_ec_union(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - const struct adata *set_union; - set_union = ec_set_union(lp, set_sequence, set_sequence_same); - bt_assert(ec_set_get_size(set_union) == SET_SIZE); - bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - - set_union = ec_set_union(lp, set_sequence, set_sequence_higher); - bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); - bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - - rfree(lp); - return 1; -} - -static int -t_set_ec_format(void) -{ - resource_init(); - - const struct adata empty_as_path = {}; - set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); - - u64 i = 0; - set_sequence = ec_set_add(lp, set_sequence, i); - for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) - set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); - - bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); - bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, - "ec_set_format() returns '%s'", buf); - - rfree(lp); - return 1; -} - -static int -t_set_ec_delete(void) -{ - resource_init(); - generate_set_sequence(SET_TYPE_EC, SET_SIZE); - - const struct adata *deleting_sequence = set_sequence; - u32 i; - for (i = 0; i < SET_SIZE; i++) - { - deleting_sequence = ec_set_del(lp, deleting_sequence, i); - bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), - "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", - ec_set_get_size(deleting_sequence), SET_SIZE-1-i); - } - - bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); - - return 1; -} - -int -main(int argc, char *argv[]) -{ - bt_init(argc, argv); - - bt_test_suite(t_set_int_contains, "Testing sets of integers: contains, get_data"); - bt_test_suite(t_set_int_format, "Testing sets of integers: format"); - bt_test_suite(t_set_int_union, "Testing sets of integers: union"); - bt_test_suite(t_set_int_delete, "Testing sets of integers: delete"); - - bt_test_suite(t_set_ec_contains, "Testing sets of Extended Community values: contains, get_data"); - bt_test_suite(t_set_ec_format, "Testing sets of Extended Community values: format"); - bt_test_suite(t_set_ec_union, "Testing sets of Extended Community values: union"); - bt_test_suite(t_set_ec_delete, "Testing sets of Extended Community values: delete"); - - return bt_exit_value(); -} diff --git a/nest/attrs.h b/nest/attrs.h deleted file mode 100644 index 9412439b..00000000 --- a/nest/attrs.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Attribute Operations - * - * (c) 2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ATTRS_H_ -#define _BIRD_ATTRS_H_ - -#include <stdint.h> -#include "lib/unaligned.h" -#include "nest/route.h" - - -/* a-path.c */ - -#define AS_PATH_SET 1 /* Types of path segments */ -#define AS_PATH_SEQUENCE 2 -#define AS_PATH_CONFED_SEQUENCE 3 -#define AS_PATH_CONFED_SET 4 - -#define AS_PATH_MAXLEN 10000 - -#define AS_TRANS 23456 -/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF - * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details - */ - -struct f_val; -struct f_tree; - -int as_path_valid(byte *data, uint len, int bs, int sets, int confed, char *err, uint elen); -int as_path_16to32(byte *dst, const byte *src, uint len); -int as_path_32to16(byte *dst, const byte *src, uint len); -int as_path_contains_as4(const struct adata *path); -int as_path_contains_confed(const struct adata *path); -struct adata *as_path_strip_confed(struct linpool *pool, const struct adata *op); -struct adata *as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as); -struct adata *as_path_to_old(struct linpool *pool, const struct adata *path); -struct adata *as_path_cut(struct linpool *pool, const struct adata *path, uint num); -const struct adata *as_path_merge(struct linpool *pool, const struct adata *p1, const struct adata *p2); -void as_path_format(const struct adata *path, byte *buf, uint size); -int as_path_getlen(const struct adata *path); -int as_path_getlen_int(const struct adata *path, int bs); -int as_path_get_first(const struct adata *path, u32 *orig_as); -int as_path_get_first_regular(const struct adata *path, u32 *last_as); -int as_path_get_last(const struct adata *path, u32 *last_as); -u32 as_path_get_last_nonaggregated(const struct adata *path); -int as_path_contains(const struct adata *path, u32 as, int min); -int as_path_match_set(const struct adata *path, const struct f_tree *set); -const struct adata *as_path_filter(struct linpool *pool, const struct adata *path, const struct f_val *set, int pos); -int as_path_walk(const struct adata *path, uint *pos, uint *val); - -static inline struct adata *as_path_prepend(struct linpool *pool, const struct adata *path, u32 as) -{ return as_path_prepend2(pool, path, AS_PATH_SEQUENCE, as); } - - -#define PM_ASN 0 -#define PM_QUESTION 1 -#define PM_ASTERISK 2 -#define PM_ASN_EXPR 3 -#define PM_ASN_RANGE 4 -#define PM_ASN_SET 5 -#define PM_LOOP 6 - -struct f_path_mask_item { - union { - u32 asn; /* PM_ASN */ - const struct f_line *expr; /* PM_ASN_EXPR */ - const struct f_tree *set; /* PM_ASN_SET */ - struct { /* PM_ASN_RANGE */ - u32 from; - u32 to; - }; - }; - int kind; -}; - -struct f_path_mask { - uint len; - struct f_path_mask_item item[0]; -}; - -int as_path_match(const struct adata *path, const struct f_path_mask *mask); - - -/* Counterparts to appropriate as_path_* functions */ - -static inline int -aggregator_16to32(byte *dst, const byte *src) -{ - put_u32(dst, get_u16(src)); - memcpy(dst+4, src+2, 4); - return 8; -} - -static inline int -aggregator_32to16(byte *dst, const byte *src) -{ - put_u16(dst, get_u32(src)); - memcpy(dst+2, src+4, 4); - return 6; -} - -static inline int -aggregator_contains_as4(const struct adata *a) -{ - return get_u32(a->data) > 0xFFFF; -} - -static inline struct adata * -aggregator_to_old(struct linpool *pool, const struct adata *a) -{ - struct adata *d = lp_alloc_adata(pool, 8); - put_u32(d->data, AS_TRANS); - memcpy(d->data + 4, a->data + 4, 4); - return d; -} - - -/* a-set.c */ - - -/* Extended Community subtypes (kinds) */ -enum ec_subtype { - EC_RT = 0x0002, - EC_RO = 0x0003, - EC_GENERIC = 0xFFFF, -}; - -static inline const char *ec_subtype_str(const enum ec_subtype ecs) { - switch (ecs) { - case EC_RT: return "rt"; - case EC_RO: return "ro"; - default: return NULL; - } -} - -/* Transitive bit (for first u32 half of EC) */ -#define EC_TBIT 0x40000000 - -#define ECOMM_LENGTH 8 - -static inline int int_set_get_size(const struct adata *list) -{ return list->length / 4; } - -static inline int ec_set_get_size(const struct adata *list) -{ return list->length / 8; } - -static inline int lc_set_get_size(const struct adata *list) -{ return list->length / 12; } - -static inline u32 *int_set_get_data(const struct adata *list) -{ return (u32 *) list->data; } - -static inline u32 ec_hi(u64 ec) { return ec >> 32; } -static inline u32 ec_lo(u64 ec) { return ec; } -static inline u64 ec_get(const u32 *l, int i) -{ return (((u64) l[i]) << 32) | l[i+1]; } - -/* RFC 4360 3.1. Two-Octet AS Specific Extended Community */ -static inline u64 ec_as2(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0000) << 48) | (key << 32) | val; } - -/* RFC 5668 4-Octet AS Specific BGP Extended Community */ -static inline u64 ec_as4(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0200) << 48) | (key << 16) | val; } - -/* RFC 4360 3.2. IPv4 Address Specific Extended Community */ -static inline u64 ec_ip4(enum ec_subtype kind, u64 key, u64 val) -{ return (((u64) kind | 0x0100) << 48) | (key << 16) | val; } - -static inline u64 ec_generic(u64 key, u64 val) -{ return (key << 32) | val; } - -/* Large community value */ -typedef struct lcomm { - u32 asn; - u32 ldp1; - u32 ldp2; -} lcomm; - -#define LCOMM_LENGTH 12 - -static inline lcomm lc_get(const u32 *l, int i) -{ return (lcomm) { l[i], l[i+1], l[i+2] }; } - -static inline void lc_put(u32 *l, lcomm v) -{ l[0] = v.asn; l[1] = v.ldp1; l[2] = v.ldp2; } - -static inline int lc_match(const u32 *l, int i, lcomm v) -{ return (l[i] == v.asn && l[i+1] == v.ldp1 && l[i+2] == v.ldp2); } - -static inline u32 *lc_copy(u32 *dst, const u32 *src) -{ memcpy(dst, src, LCOMM_LENGTH); return dst + 3; } - - -int int_set_format(const struct adata *set, int way, int from, byte *buf, uint size); -int ec_format(byte *buf, u64 ec); -int ec_set_format(const struct adata *set, int from, byte *buf, uint size); -int lc_format(byte *buf, lcomm lc); -int lc_set_format(const struct adata *set, int from, byte *buf, uint size); -int int_set_contains(const struct adata *list, u32 val); -int ec_set_contains(const struct adata *list, u64 val); -int lc_set_contains(const struct adata *list, lcomm val); -const struct adata *int_set_prepend(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *int_set_add(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *ec_set_add(struct linpool *pool, const struct adata *list, u64 val); -const struct adata *lc_set_add(struct linpool *pool, const struct adata *list, lcomm val); -const struct adata *int_set_del(struct linpool *pool, const struct adata *list, u32 val); -const struct adata *ec_set_del(struct linpool *pool, const struct adata *list, u64 val); -const struct adata *lc_set_del(struct linpool *pool, const struct adata *list, lcomm val); -const struct adata *int_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); -const struct adata *ec_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); -const struct adata *lc_set_union(struct linpool *pool, const struct adata *l1, const struct adata *l2); - -struct adata *ec_set_del_nontrans(struct linpool *pool, const struct adata *set); -struct adata *int_set_sort(struct linpool *pool, const struct adata *src); -struct adata *ec_set_sort(struct linpool *pool, const struct adata *src); -struct adata *lc_set_sort(struct linpool *pool, const struct adata *src); -int int_set_min(const struct adata *list, u32 *val); -int ec_set_min(const struct adata *list, u64 *val); -int lc_set_min(const struct adata *list, lcomm *val); -int int_set_max(const struct adata *list, u32 *val); -int ec_set_max(const struct adata *list, u64 *val); -int lc_set_max(const struct adata *list, lcomm *val); -int int_set_walk(const struct adata *list, uint *pos, u32 *val); -int ec_set_walk(const struct adata *list, uint *pos, u64 *val); -int lc_set_walk(const struct adata *list, uint *pos, lcomm *val); - -void ec_set_sort_x(struct adata *set); /* Sort in place */ - -#endif diff --git a/nest/bird.h b/nest/bird.h index 55712abe..931974a0 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -9,7 +9,6 @@ #ifndef _BIRD_BIRD_H_ #define _BIRD_BIRD_H_ -#include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" #include "lib/net.h" @@ -319,7 +319,6 @@ cli_new(void *priv) c->event->data = c; c->cont = cli_hello; c->parser_pool = lp_new_default(c->pool); - c->show_pool = lp_new_default(c->pool); c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE); ev_schedule(c->event); return c; @@ -409,11 +408,14 @@ void cli_free(cli *c) { cli_set_log_echo(c, 0, 0); + int defer = 0; if (c->cleanup) - c->cleanup(c); + defer = c->cleanup(c); if (c == cmd_reconfig_stored_cli) cmd_reconfig_stored_cli = NULL; - rfree(c->pool); + + if (!defer) + rfree(c->pool); } /** @@ -33,12 +33,12 @@ typedef struct cli { struct cli_out *tx_buf, *tx_pos, *tx_write; event *event; void (*cont)(struct cli *c); - void (*cleanup)(struct cli *c); + int (*cleanup)(struct cli *c); /* Return 0 if finished and cli may be freed immediately. + Otherwise return 1 and call rfree(c->pool) when appropriate. */ void *rover; /* Private to continuation routine */ int last_reply; int restricted; /* CLI is restricted to read-only commands */ struct linpool *parser_pool; /* Pool used during parsing */ - struct linpool *show_pool; /* Pool used during route show */ byte *ring_buf; /* Ring buffer for asynchronous messages */ byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */ uint ring_overflow; /* Counter of ring overflows */ diff --git a/nest/cmds.c b/nest/cmds.c index 1a16f9c7..092be48a 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -8,7 +8,7 @@ #include "nest/bird.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "nest/cmds.h" @@ -51,17 +51,18 @@ cmd_show_symbols(struct sym_show_data *sd) cli_msg(1010, "%-8s\t%s", sd->sym->name, cf_symbol_class_name(sd->sym)); else { - HASH_WALK(config->sym_hash, next, sym) - { - if (!sym->scope->active) - continue; + for (const struct sym_scope *scope = config->root_scope; scope; scope = scope->next) + HASH_WALK(scope->hash, next, sym) + { + if (!sym->scope->active) + continue; - if (sd->type && (sym->class != sd->type)) - continue; + if (sd->type && (sym->class != sd->type)) + continue; - cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); - } - HASH_WALK_END; + cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); + } + HASH_WALK_END; cli_msg(0, ""); } @@ -108,6 +109,7 @@ print_size(char *dsc, struct resmem vals) extern pool *rt_table_pool; extern pool *rta_pool; +extern uint *pages_kept; void cmd_show_memory(void) @@ -119,8 +121,8 @@ cmd_show_memory(void) print_size("Protocols:", rmemsize(proto_pool)); struct resmem total = rmemsize(&root_pool); #ifdef HAVE_MMAP - print_size("Standby memory:", (struct resmem) { .overhead = get_page_size() * pages_kept }); - total.overhead += get_page_size() * pages_kept; + print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept }); + total.overhead += page_size * *pages_kept; #endif print_size("Total:", total); cli_msg(0, ""); @@ -132,7 +134,7 @@ cmd_eval(const struct f_line *expr) buffer buf; LOG_BUFFER_INIT(buf); - if (f_eval_buf(expr, this_cli->parser_pool, &buf) > F_RETURN) + if (f_eval_buf(expr, &buf) > F_RETURN) { cli_msg(8008, "runtime error"); return; diff --git a/nest/config.Y b/nest/config.Y index ac599c09..91147a29 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -112,28 +112,28 @@ proto_postconfig(void) CF_DECLS -CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) +CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT, PIPE) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) -CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) -CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) +CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) -CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, +CF_ENUM(T_ENUM_RTS, RTS_, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) +CF_ENUM(T_ENUM_RTD, RTD_, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) @@ -231,6 +231,10 @@ table_opt: | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } ; table_opts: @@ -308,12 +312,25 @@ channel_item_: this_channel->table = $2; } | IMPORT imexport { this_channel->in_filter = $2; } + | EXPORT IN net_any imexport { + if (this_channel->net_type && ($3->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + this_channel->out_subprefix = $3; + this_channel->out_filter = $4; + } | EXPORT imexport { this_channel->out_filter = $2; } | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } - | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + | IMPORT KEEP FILTERED bool { + if ($4) + this_channel->in_keep |= RIK_REJECTED; + else if ((this_channel->in_keep & RIK_PREFILTER) == RIK_PREFILTER) + cf_error("Import keep filtered is implied by the import table."); + else + this_channel->in_keep &= ~RIK_REJECTED; + } | RPKI RELOAD bool { this_channel->rpki_reload = $3; } ; @@ -373,6 +390,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } + | DEBUG TABLES bool { new_config->table_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -458,6 +476,7 @@ proto: dev_proto '}' ; dev_proto_start: proto_start DIRECT { this_proto = proto_config_new(&proto_device, $1); init_list(&DIRECT_CFG->iface_list); + this_proto->late_if_feed = 1; } ; @@ -640,28 +659,29 @@ r_args: $$ = cfg_allocz(sizeof(struct rt_show_data)); init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; - $$->running_on_config = new_config->fallback; + $$->running_on_config = config; + $$->cli = this_cli; } | r_args net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; - $$->addr_mode = RSD_ADDR_EQUAL; + $$->addr_mode = TE_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $3; - $$->addr_mode = RSD_ADDR_FOR; + $$->addr_mode = TE_ADDR_FOR; } | r_args IN net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); $$->addr = $3; - $$->addr_mode = RSD_ADDR_IN; + $$->addr_mode = TE_ADDR_IN; } -| r_args TABLE CF_SYM_KNOWN { +| r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); @@ -675,13 +695,13 @@ r_args: $$->tables_defined_by = RSD_TDB_ALL; } | r_args IMPORT TABLE channel_arg { - if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table); + if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); + rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->out_table); + rt_show_add_exporter($$, $4->out_table, "export"); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args FILTER filter { @@ -706,7 +726,7 @@ r_args: $$ = $1; $$->filtered = 1; } - | r_args export_mode CF_SYM_KNOWN { + | r_args export_mode symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -723,7 +743,7 @@ r_args: $$->export_channel = $3; $$->tables_defined_by = RSD_TDB_INDIRECT; } - | r_args PROTOCOL CF_SYM_KNOWN { + | r_args PROTOCOL symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -851,9 +871,11 @@ CF_CLI(DUMP INTERFACES,,, [[Dump interface information]]) CF_CLI(DUMP NEIGHBORS,,, [[Dump neighbor cache]]) { neigh_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ATTRIBUTES,,, [[Dump attribute cache]]) -{ rta_dump_all(); cli_msg(0, ""); } ; -CF_CLI(DUMP ROUTES,,, [[Dump routing table]]) +{ ea_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP ROUTES,,, [[Dump routes]]) { rt_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP TABLES,,, [[Dump table connections]]) +{ rt_dump_hooks_all(); cli_msg(0, ""); } ; CF_CLI(DUMP PROTOCOLS,,, [[Dump protocol information]]) { protos_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) @@ -923,9 +945,6 @@ proto_patt2: | TEXT { $$.ptr = $1; $$.patt = 1; } ; -dynamic_attr: IGP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_GEN_IGP_METRIC); } ; - - CF_CODE CF_END diff --git a/nest/limit.h b/nest/limit.h new file mode 100644 index 00000000..5838ad3b --- /dev/null +++ b/nest/limit.h @@ -0,0 +1,49 @@ +/* + * BIRD Internet Routing Daemon -- Limits + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIMIT_H_ +#define _BIRD_LIMIT_H_ + +struct limit { + u32 max; + u32 count; + int (*action)(struct limit *, void *data); +}; + +static inline int limit_do_action(struct limit *l, void *data) +{ + return l->action ? l->action(l, data) : 1; +} + +static inline int limit_push(struct limit *l, void *data) +{ + if ((l->count >= l->max) && limit_do_action(l, data)) + return 1; + + l->count++; + return 0; +} + +static inline void limit_pop(struct limit *l) +{ + --l->count; +} + +static inline void limit_reset(struct limit *l) +{ + l->count = 0; +} + +static inline void limit_update(struct limit *l, void *data, u32 max) +{ + if (l->count > (l->max = max)) + limit_do_action(l, data); +} + +#endif diff --git a/nest/neighbor.c b/nest/neighbor.c index 1a31fb79..7cf9c85d 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -345,7 +345,7 @@ neigh_free(neighbor *n) { rem_node(&n->n); rem_node(&n->if_n); - sl_free(neigh_slab, n); + sl_free(n); } /** diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c index bc88b4b4..716ce86c 100644 --- a/nest/proto-hooks.c +++ b/nest/proto-hooks.c @@ -76,16 +76,6 @@ void dump(struct proto *p) { DUMMY; } /** - * dump_attrs - dump protocol-dependent attributes - * @e: a route entry - * - * This hook dumps all attributes in the &rte which belong to this - * protocol to the debug output. - */ -void dump_attrs(rte *e) -{ DUMMY; } - -/** * start - request instance startup * @p: protocol instance * @@ -228,36 +218,6 @@ void neigh_notify(neighbor *neigh) { DUMMY; } /** - * make_tmp_attrs - convert embedded attributes to temporary ones - * @e: route entry - * @pool: linear pool to allocate attribute memory in - * - * This hook is called by the routing table functions if they need - * to convert the protocol attributes embedded directly in the &rte - * to temporary extended attributes in order to distribute them - * to other protocols or to filters. make_tmp_attrs() creates - * an &ea_list in the linear pool @pool, fills it with values of the - * temporary attributes and returns a pointer to it. - */ -ea_list *make_tmp_attrs(rte *e, struct linpool *pool) -{ DUMMY; } - -/** - * store_tmp_attrs - convert temporary attributes to embedded ones - * @e: route entry - * @attrs: temporary attributes to be converted - * - * This hook is an exact opposite of make_tmp_attrs() -- it takes - * a list of extended attributes and converts them to attributes - * embedded in the &rte corresponding to this protocol. - * - * You must be prepared for any of the attributes being missing - * from the list and use default values instead. - */ -void store_tmp_attrs(rte *e, ea_list *attrs) -{ DUMMY; } - -/** * preexport - pre-filtering decisions before route export * @p: protocol instance the route is going to be exported to * @e: the route in question diff --git a/nest/proto.c b/nest/proto.c index 31ee1fa1..853b1cf9 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -16,17 +16,16 @@ #include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/iface.h" #include "nest/cli.h" #include "filter/filter.h" #include "filter/f-inst.h" pool *proto_pool; -list proto_list; +list STATIC_LIST_INIT(proto_list); -static list protocol_list; -struct protocol *class_to_protocol[PROTOCOL__MAX]; +static list STATIC_LIST_INIT(protocol_list); #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) @@ -43,8 +42,7 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; -static char *e_states[] = { "DOWN", "FEEDING", "READY" }; +static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; @@ -52,15 +50,38 @@ static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); -static void channel_verify_limits(struct channel *c); -static inline void channel_reset_limit(struct channel_limit *l); - +static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_reset_limit(struct channel *c, struct limit *l, int dir); +static void channel_feed_end(struct channel *c); +static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) -{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } +{ return (p->proto_state == PS_DOWN) && proto_is_inactive(p); } + +static inline event_list *proto_event_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_event_list : birdloop_event_list(p->loop); } + +static inline event_list *proto_work_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_work_list : birdloop_event_list(p->loop); } + +static inline void proto_send_event(struct proto *p) +{ ev_send(proto_event_list(p), p->event); } + +#define PROTO_ENTER_FROM_MAIN(p) ({ \ + ASSERT_DIE(birdloop_inside(&main_birdloop)); \ + struct birdloop *_loop = (p)->loop; \ + if (_loop != &main_birdloop) birdloop_enter(_loop); \ + _loop; \ + }) + +#define PROTO_LEAVE_FROM_MAIN(loop) ({ if (loop != &main_birdloop) birdloop_leave(loop); }) + +#define PROTO_LOCKED_FROM_MAIN(p) for (struct birdloop *_proto_loop = PROTO_ENTER_FROM_MAIN(p); _proto_loop; PROTO_LEAVE_FROM_MAIN(_proto_loop), (_proto_loop = NULL)) + static inline int channel_is_active(struct channel *c) -{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } +{ return (c->channel_state != CS_DOWN); } static inline int channel_reloadable(struct channel *c) { return c->proto->reload_routes && c->reloadable; } @@ -68,10 +89,46 @@ static inline int channel_reloadable(struct channel *c) static inline void channel_log_state_change(struct channel *c) { - if (c->export_state) - CD(c, "State changed to %s/%s", c_states[c->channel_state], e_states[c->export_state]); - else - CD(c, "State changed to %s", c_states[c->channel_state]); + CD(c, "State changed to %s", c_states[c->channel_state]); +} + +void +channel_import_log_state_change(struct rt_import_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + CD(c, "Channel import state changed to %s", rt_import_state_name(state)); +} + +void +channel_export_log_state_change(struct rt_export_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + CD(c, "Channel export state changed to %s", rt_export_state_name(state)); + + switch (state) + { + case TES_FEEDING: + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + break; + case TES_READY: + channel_feed_end(c); + break; + } +} + +static void +channel_dump_import_req(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + debug(" Channel %s.%s import request %p\n", c->proto->name, c->name, req); +} + +static void +channel_dump_export_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + debug(" Channel %s.%s export request %p\n", c->proto->name, c->name, req); } static void @@ -141,6 +198,15 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } +int channel_preimport(struct rt_import_request *req, rte *new, rte *old); + +void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_accepted(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_merged(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + /** * proto_add_channel - connect protocol to a routing table * @p: protocol instance @@ -165,23 +231,25 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; + rt_lock_table(c->table); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + c->out_subprefix = cf->out_subprefix; + + channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; - c->export_state = ES_DOWN; c->last_state_change = current_time(); c->reloadable = 1; @@ -203,6 +271,7 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); + rt_unlock_table(c->table); rem_node(&c->n); mb_free(c); } @@ -223,7 +292,7 @@ proto_pause_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_START); + channel_set_state(c, CS_PAUSE); } static void @@ -232,7 +301,7 @@ proto_stop_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_FLUSHING); + channel_set_state(c, CS_STOP); } static void @@ -244,75 +313,10 @@ proto_remove_channels(struct proto *p) } static void -channel_schedule_feed(struct channel *c, int initial) -{ - // DBG("%s: Scheduling meal\n", p->name); - ASSERT(c->channel_state == CS_UP); - - c->export_state = ES_FEEDING; - c->refeeding = !initial; - - ev_schedule_work(c->feed_event); -} - -static void -channel_feed_loop(void *ptr) -{ - struct channel *c = ptr; - - if (c->export_state != ES_FEEDING) - return; - - /* Start feeding */ - if (!c->feed_active) - { - if (c->proto->feed_begin) - c->proto->feed_begin(c, !c->refeeding); - - c->refeed_pending = 0; - } - - // DBG("Feeding protocol %s continued\n", p->name); - if (!rt_feed_channel(c)) - { - ev_schedule_work(c->feed_event); - return; - } - - /* Reset export limit if the feed ended with acceptable number of exported routes */ - struct channel_limit *l = &c->out_limit; - if (c->refeeding && - (l->state == PLS_BLOCKED) && - (c->refeed_count <= l->limit) && - (c->stats.exp_routes <= l->limit)) - { - log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); - channel_reset_limit(&c->out_limit); - - /* Continue in feed - it will process routing table again from beginning */ - c->refeed_count = 0; - ev_schedule_work(c->feed_event); - return; - } - - // DBG("Feeding protocol %s finished\n", p->name); - c->export_state = ES_READY; - channel_log_state_change(c); - - if (c->proto->feed_end) - c->proto->feed_end(c); - - /* Restart feeding */ - if (c->refeed_pending) - channel_request_feeding(c); -} - - -static void -channel_roa_in_changed(struct rt_subscription *s) +channel_roa_in_changed(void *_data) { - struct channel *c = s->data; - int active = c->reload_event && ev_active(c->reload_event); + struct channel *c = _data; + int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -323,17 +327,15 @@ channel_roa_in_changed(struct rt_subscription *s) } static void -channel_roa_out_changed(struct rt_subscription *s) +channel_roa_out_changed(void *_data) { - struct channel *c = s->data; - int active = (c->export_state == ES_FEEDING); + struct channel *c = _data; + CD(c, "Feeding triggered by RPKI change"); - CD(c, "Feeding triggered by RPKI change%s", active ? " - already active" : ""); + c->refeed_pending = 1; - if (!active) - channel_request_feeding(c); - else - c->refeed_pending = 1; + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); } /* Temporary code, subscriptions should be changed to resources */ @@ -345,14 +347,14 @@ struct roa_subscription { static int channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) { - void (*hook)(struct rt_subscription *) = + void (*hook)(void *) = dir ? channel_roa_in_changed : channel_roa_out_changed; struct roa_subscription *s; node *n; WALK_LIST2(s, n, c->roa_subscriptions, roa_node) - if ((s->s.tab == tab) && (s->s.hook == hook)) + if ((s->s.tab == tab) && (s->s.event->hook == hook)) return 1; return 0; @@ -366,9 +368,9 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir) return; struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription)); + s->s.event = ev_new_init(c->proto->pool, dir ? channel_roa_in_changed : channel_roa_out_changed, c); + s->s.list = proto_work_list(c->proto); - s->s.hook = dir ? channel_roa_in_changed : channel_roa_out_changed; - s->s.data = c; rt_subscribe(tab, &s->s); add_tail(&c->roa_subscriptions, &s->roa_node); @@ -379,6 +381,7 @@ channel_roa_unsubscribe(struct roa_subscription *s) { rt_unsubscribe(&s->s); rem_node(&s->roa_node); + rfree(s->s.event); mb_free(s); } @@ -399,7 +402,7 @@ channel_roa_subscribe_filter(struct channel *c, int dir) #ifdef CONFIG_BGP /* No automatic reload for BGP channels without in_table / out_table */ if (c->channel == &channel_bgp) - valid = dir ? !!c->in_table : !!c->out_table; + valid = dir ? ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) : !!c->out_table; #endif struct filter_iterator fit; @@ -409,14 +412,8 @@ channel_roa_subscribe_filter(struct channel *c, int dir) { switch (fi->fi_code) { - case FI_ROA_CHECK_IMPLICIT: - tab = fi->i_FI_ROA_CHECK_IMPLICIT.rtc->table; - if (valid) channel_roa_subscribe(c, tab, dir); - found = 1; - break; - - case FI_ROA_CHECK_EXPLICIT: - tab = fi->i_FI_ROA_CHECK_EXPLICIT.rtc->table; + case FI_ROA_CHECK: + tab = fi->i_FI_ROA_CHECK.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; @@ -445,119 +442,239 @@ channel_roa_unsubscribe_all(struct channel *c) } static void -channel_start_export(struct channel *c) +channel_start_import(struct channel *c) { + if (c->in_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started import", c->proto->name, c->name); + return; + } + + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->in_req = (struct rt_import_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_import_req, + .log_state_change = channel_import_log_state_change, + .preimport = channel_preimport, + }; + ASSERT(c->channel_state == CS_UP); - ASSERT(c->export_state == ES_DOWN); - channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); + + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + + DBG("%s.%s: Channel start import req=%p\n", c->proto->name, c->name, &c->in_req); + rt_request_import(c->table, &c->in_req); } static void -channel_stop_export(struct channel *c) +channel_start_export(struct channel *c) { - /* Need to abort feeding */ - if (c->export_state == ES_FEEDING) - rt_feed_channel_abort(c); + if (c->out_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); + return; + } + + ASSERT(c->channel_state == CS_UP); + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->out_req = (struct rt_export_request) { + .name = rn, + .list = proto_work_list(c->proto), + .addr = c->out_subprefix, + .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_export_req, + .log_state_change = channel_export_log_state_change, + }; + + bmap_init(&c->export_map, c->proto->pool, 1024); + bmap_init(&c->export_reject_map, c->proto->pool, 1024); + + channel_reset_limit(c, &c->out_limit, PLD_OUT); + + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); + + switch (c->ra_mode) { + case RA_OPTIMAL: + c->out_req.export_one = rt_notify_optimal; + break; + case RA_ANY: + c->out_req.export_one = rt_notify_any; + c->out_req.export_bulk = rt_feed_any; + break; + case RA_ACCEPTED: + c->out_req.export_bulk = rt_notify_accepted; + break; + case RA_MERGED: + c->out_req.export_bulk = rt_notify_merged; + break; + default: + bug("Unknown route announcement mode"); + } - c->export_state = ES_DOWN; - c->stats.exp_routes = 0; - bmap_reset(&c->export_map, 1024); + DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); + rt_request_export(&c->table->exporter, &c->out_req); } +static void +channel_check_stopped(struct channel *c) +{ + switch (c->channel_state) + { + case CS_STOP: + if (c->out_req.hook || c->in_req.hook) + return; + + channel_set_state(c, CS_DOWN); + proto_send_event(c->proto); + + break; + case CS_PAUSE: + if (c->out_req.hook) + return; + + channel_set_state(c, CS_START); + break; + default: + bug("Stopped channel in a bad state: %d", c->channel_state); + } + + DBG("%s.%s: Channel requests/hooks stopped (in state %s)\n", c->proto->name, c->name, c_states[c->channel_state]); +} -/* Called by protocol for reload from in_table */ void -channel_schedule_reload(struct channel *c) +channel_import_stopped(struct rt_import_request *req) { - ASSERT(c->channel_state == CS_UP); + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + req->hook = NULL; - rt_reload_channel_abort(c); - ev_schedule_work(c->reload_event); + mb_free(c->in_req.name); + c->in_req.name = NULL; + + channel_check_stopped(c); } static void -channel_reload_loop(void *ptr) +channel_export_stopped(struct rt_export_request *req) { - struct channel *c = ptr; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* Start reload */ - if (!c->reload_active) - c->reload_pending = 0; + /* The hook has already stopped */ + req->hook = NULL; - if (!rt_reload_channel(c)) + if (c->refeed_pending) { - ev_schedule_work(c->reload_event); + c->refeeding = 1; + c->refeed_pending = 0; + rt_request_export(&c->table->exporter, req); return; } - /* Restart reload */ - if (c->reload_pending) - channel_request_reload(c); + mb_free(c->out_req.name); + c->out_req.name = NULL; + + channel_check_stopped(c); } static void -channel_reset_import(struct channel *c) +channel_feed_end(struct channel *c) { - /* Need to abort feeding */ - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); + struct rt_export_request *req = &c->out_req; + + /* Reset export limit if the feed ended with acceptable number of exported routes */ + struct limit *l = &c->out_limit; + if (c->refeeding && + (c->limit_active & (1 << PLD_OUT)) && + (c->refeed_count <= l->max) && + (l->count <= l->max)) + { + log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->max); + + c->refeed_pending = 1; + rt_stop_export(req, channel_export_stopped); + return; + } - rt_prune_sync(c->in_table, 1); + if (c->proto->feed_end) + c->proto->feed_end(c); + + if (c->refeed_pending) + rt_stop_export(req, channel_export_stopped); + else + c->refeeding = 0; } -static void -channel_reset_export(struct channel *c) +/* Called by protocol for reload from in_table */ +void +channel_schedule_reload(struct channel *c) { - /* Just free the routes */ - rt_prune_sync(c->out_table, 1); + ASSERT(c->in_req.hook); + + rt_request_export(&c->table->exporter, &c->reload_req); } -/* Called by protocol to activate in_table */ -void -channel_setup_in_table(struct channel *c) +static void +channel_reload_stopped(struct rt_export_request *req) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - cf->name = "import"; - cf->addr_type = c->net_type; - cf->internal = 1; + /* Restart reload */ + if (c->reload_pending) + channel_request_reload(c); +} - c->in_table = rt_setup(c->proto->pool, cf); +static void +channel_reload_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, channel_reload_stopped); +} - c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); +static void +channel_reload_dump_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + debug(" Channel %s.%s import reload request %p\n", c->proto->name, c->name, req); } -/* Called by protocol to activate out_table */ +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + +/* Called by protocol to activate in_table */ void -channel_setup_out_table(struct channel *c) +channel_setup_in_table(struct channel *c) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - cf->name = "export"; - cf->addr_type = c->net_type; - cf->internal = 1; + c->reload_req = (struct rt_export_request) { + .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), + .list = proto_work_list(c->proto), + .trace_routes = c->debug | c->proto->debug, + .export_bulk = channel_reload_export_bulk, + .dump_req = channel_reload_dump_req, + .log_state_change = channel_reload_log_state_change, + }; - c->out_table = rt_setup(c->proto->pool, cf); + c->in_keep |= RIK_PREFILTER; } static void channel_do_start(struct channel *c) { - rt_lock_table(c->table); - add_tail(&c->table->channels, &c->table_node); c->proto->active_channels++; - c->feed_event = ev_new_init(c->proto->pool, channel_feed_loop, c); - - bmap_init(&c->export_map, c->proto->pool, 1024); - memset(&c->stats, 0, sizeof(struct proto_stats)); - - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); - channel_reset_limit(&c->out_limit); - CALL(c->channel->start, c); + + channel_start_import(c); } static void @@ -572,9 +689,31 @@ channel_do_up(struct channel *c) } static void -channel_do_flush(struct channel *c) +channel_do_pause(struct channel *c) +{ + /* Need to abort feeding */ + if (c->reload_req.hook) + { + c->reload_pending = 0; + rt_stop_export(&c->reload_req, channel_reload_stopped); + } + + /* Stop export */ + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); + + channel_roa_unsubscribe_all(c); + + bmap_free(&c->export_map); + bmap_free(&c->export_reject_map); +} + +static void +channel_do_stop(struct channel *c) { - rt_schedule_prune(c->table); + /* Stop import */ + if (c->in_req.hook) + rt_stop_import(&c->in_req, channel_import_stopped); c->gr_wait = 0; if (c->gr_lock) @@ -582,32 +721,19 @@ channel_do_flush(struct channel *c) CALL(c->channel->shutdown, c); - /* This have to be done in here, as channel pool is freed before channel_do_down() */ - bmap_free(&c->export_map); - c->in_table = NULL; - c->reload_event = NULL; - c->out_table = NULL; - - channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->feed_active && !c->reload_active); + ASSERT(!c->reload_req.hook); - rem_node(&c->table_node); - rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->stats.imp_routes + c->stats.filt_routes) != 0) - log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - // bmap_free(&c->export_map); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; /* The in_table and out_table are going to be freed by freeing their resource pools. */ @@ -616,14 +742,13 @@ channel_do_down(struct channel *c) /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) - ev_schedule(c->proto->event); + proto_send_event(c->proto); } void channel_set_state(struct channel *c, uint state) { uint cs = c->channel_state; - uint es = c->export_state; DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); if (state == cs) @@ -635,20 +760,11 @@ channel_set_state(struct channel *c, uint state) switch (state) { case CS_START: - ASSERT(cs == CS_DOWN || cs == CS_UP); + ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); - if (es != ES_DOWN) - channel_stop_export(c); - - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); - - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); - break; case CS_UP: @@ -663,23 +779,24 @@ channel_set_state(struct channel *c, uint state) channel_do_up(c); break; - case CS_FLUSHING: - ASSERT(cs == CS_START || cs == CS_UP); + case CS_PAUSE: + ASSERT(cs == CS_UP); - if (es != ES_DOWN) - channel_stop_export(c); + if (cs == CS_UP) + channel_do_pause(c); + break; - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); + case CS_STOP: + ASSERT(cs == CS_UP || cs == CS_START || cs == CS_PAUSE); - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); + if (cs == CS_UP) + channel_do_pause(c); - channel_do_flush(c); + channel_do_stop(c); break; case CS_DOWN: - ASSERT(cs == CS_FLUSHING); + ASSERT(cs == CS_STOP); channel_do_down(c); break; @@ -704,35 +821,16 @@ channel_set_state(struct channel *c, uint state) void channel_request_feeding(struct channel *c) { - ASSERT(c->channel_state == CS_UP); - - CD(c, "Feeding requested"); - - /* Do nothing if we are still waiting for feeding */ - if (c->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (c->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (!c->feed_active) - return; - - rt_feed_channel_abort(c); - } + ASSERT(c->out_req.hook); - /* Track number of exported routes during refeed */ - c->refeed_count = 0; - - channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ - channel_log_state_change(c); + c->refeed_pending = 1; + rt_stop_export(&c->out_req, channel_export_stopped); } static void channel_request_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); ASSERT(channel_reloadable(c)); CD(c, "Reload requested"); @@ -743,8 +841,8 @@ channel_request_reload(struct channel *c) * Should this be done before reload_routes() hook? * Perhaps, but routes are updated asynchronously. */ - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -830,7 +928,12 @@ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* FIXME: better handle these changes, also handle in_keep_filtered */ - if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + if ((c->table != cf->table->table) || + (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || + (cf->in_keep != c->in_keep) || + cf->out_subprefix && c->out_subprefix && + !net_equal(cf->out_subprefix, c->out_subprefix) || + (!cf->out_subprefix != !c->out_subprefix)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ @@ -847,19 +950,19 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; + c->out_req.addr = c->out_subprefix = cf->out_subprefix; c->debug = cf->debug; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->rpki_reload = cf->rpki_reload; - channel_verify_limits(c); - /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; @@ -950,17 +1053,34 @@ proto_configure_channel(struct proto *p, struct channel **pc, struct channel_con return 1; } +static void +proto_cleanup(struct proto *p) +{ + rfree(p->pool); + p->pool = NULL; + + p->active = 0; + proto_log_state_change(p); + proto_rethink_goal(p); +} static void -proto_event(void *ptr) +proto_loop_stopped(void *ptr) { struct proto *p = ptr; - if (p->do_start) - { - if_feed_baby(p); - p->do_start = 0; - } + birdloop_enter(&main_birdloop); + + p->loop = &main_birdloop; + proto_cleanup(p); + + birdloop_leave(&main_birdloop); +} + +static void +proto_event(void *ptr) +{ + struct proto *p = ptr; if (p->do_stop) { @@ -970,14 +1090,10 @@ proto_event(void *ptr) } if (proto_is_done(p)) - { - if (p->proto->cleanup) - p->proto->cleanup(p); - - p->active = 0; - proto_log_state_change(p); - proto_rethink_goal(p); - } + if (p->loop != &main_birdloop) + birdloop_stop_self(p->loop, proto_loop_stopped, p); + else + proto_cleanup(p); } @@ -1018,6 +1134,7 @@ proto_init(struct proto_config *c, node *n) struct protocol *pr = c->protocol; struct proto *p = pr->init(c); + p->loop = &main_birdloop; p->proto_state = PS_DOWN; p->last_state_change = current_time(); p->vrf = c->vrf; @@ -1034,11 +1151,19 @@ proto_init(struct proto_config *c, node *n) static void proto_start(struct proto *p) { - /* Here we cannot use p->cf->name since it won't survive reconfiguration */ - p->pool = rp_new(proto_pool, p->proto->name); + DBG("Kicking %s up\n", p->name); + PD(p, "Starting"); + + p->pool = rp_newf(proto_pool, "Protocol %s", p->cf->name); if (graceful_restart_state == GRS_INIT) p->gr_recovery = 1; + + if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) + p->loop = birdloop_new(p->pool, p->cf->loop_order, p->pool->name); + + PROTO_LOCKED_FROM_MAIN(p) + proto_notify_state(p, (p->proto->start ? p->proto->start(p) : PS_UP)); } @@ -1074,6 +1199,7 @@ proto_config_new(struct protocol *pr, int class) cf->class = class; cf->debug = new_config->proto_default_debug; cf->mrtdump = new_config->proto_default_mrtdump; + cf->loop_order = DOMAIN_ORDER(the_bird); init_list(&cf->channels); @@ -1363,11 +1489,20 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty } static void -proto_rethink_goal(struct proto *p) +proto_shutdown(struct proto *p) { - struct protocol *q; - byte goal; + if (p->proto_state == PS_START || p->proto_state == PS_UP) + { + /* Going down */ + DBG("Kicking %s down\n", p->name); + PD(p, "Shutting down"); + proto_notify_state(p, (p->proto->shutdown ? p->proto->shutdown(p) : PS_DOWN)); + } +} +static void +proto_rethink_goal(struct proto *p) +{ if (p->reconfiguring && !p->active) { struct proto_config *nc = p->cf_new; @@ -1387,32 +1522,12 @@ proto_rethink_goal(struct proto *p) /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) - goal = PS_DOWN; - else - goal = PS_UP; - - q = p->proto; - if (goal == PS_UP) - { - if (!p->active) - { - /* Going up */ - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - proto_start(p); - proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); - } - } - else { - if (p->proto_state == PS_START || p->proto_state == PS_UP) - { - /* Going down */ - DBG("Kicking %s down\n", p->name); - PD(p, "Shutting down"); - proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); - } + PROTO_LOCKED_FROM_MAIN(p) + proto_shutdown(p); } + else if (!p->active) + proto_start(p); } struct proto * @@ -1524,7 +1639,7 @@ graceful_restart_done(timer *t UNUSED) WALK_LIST(c, p->channels) { /* Resume postponed export of routes */ - if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) channel_start_export(c); /* Cleanup */ @@ -1614,7 +1729,11 @@ protos_dump_all(void) struct proto *p; WALK_LIST(p, proto_list) { - debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); +#define DPF(x) (p->x ? " " #x : "") + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", + p->name, p, p_states[p->proto_state], p->active_channels, + DPF(disabled), DPF(active), DPF(do_stop), DPF(reconfiguring)); +#undef DPF struct channel *c; WALK_LIST(c, p->channels) @@ -1624,6 +1743,9 @@ protos_dump_all(void) debug("\tInput filter: %s\n", filter_name(c->in_filter)); if (c->out_filter) debug("\tOutput filter: %s\n", filter_name(c->out_filter)); + debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], + c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", + c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -1643,14 +1765,13 @@ void proto_build(struct protocol *p) { add_tail(&protocol_list, &p->n); - ASSERT(p->class); - ASSERT(!class_to_protocol[p->class]); - class_to_protocol[p->class] = p; } /* FIXME: convert this call to some protocol hook */ extern void bfd_init_all(void); +void protos_build_gen(void); + /** * protos_build - build a protocol list * @@ -1663,44 +1784,7 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&proto_list); - init_list(&protocol_list); - - proto_build(&proto_device); -#ifdef CONFIG_RADV - proto_build(&proto_radv); -#endif -#ifdef CONFIG_RIP - proto_build(&proto_rip); -#endif -#ifdef CONFIG_STATIC - proto_build(&proto_static); -#endif -#ifdef CONFIG_MRT - proto_build(&proto_mrt); -#endif -#ifdef CONFIG_OSPF - proto_build(&proto_ospf); -#endif -#ifdef CONFIG_PIPE - proto_build(&proto_pipe); -#endif -#ifdef CONFIG_BGP - proto_build(&proto_bgp); -#endif -#ifdef CONFIG_BFD - proto_build(&proto_bfd); - bfd_init_all(); -#endif -#ifdef CONFIG_BABEL - proto_build(&proto_babel); -#endif -#ifdef CONFIG_RPKI - proto_build(&proto_rpki); -#endif -#ifdef CONFIG_PERF - proto_build(&proto_perf); -#endif + protos_build_gen(); proto_pool = rp_new(&root_pool, "Protocols"); proto_shutdown_timer = tm_new(proto_pool); @@ -1780,108 +1864,132 @@ proto_set_message(struct proto *p, char *msg, int len) } -static const char * -channel_limit_name(struct channel_limit *l) -{ - const char *actions[] = { - [PLA_WARN] = "warn", - [PLA_BLOCK] = "block", - [PLA_RESTART] = "restart", - [PLA_DISABLE] = "disable", - }; +static const char * channel_limit_name[] = { + [PLA_WARN] = "warn", + [PLA_BLOCK] = "block", + [PLA_RESTART] = "restart", + [PLA_DISABLE] = "disable", +}; - return actions[l->action]; -} -/** - * channel_notify_limit: notify about limit hit and take appropriate action - * @c: channel - * @l: limit being hit - * @dir: limit direction (PLD_*) - * @rt_count: the number of routes - * - * The function is called by the route processing core when limit @l - * is breached. It activates the limit and tooks appropriate action - * according to @l->action. - */ -void -channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) +static void +channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; - const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = c->proto; + log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", + c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); +} - if (l->state == PLS_BLOCKED) +static void +channel_activate_limit(struct channel *c, struct limit *l, int dir) +{ + if (c->limit_active & (1 << dir)) return; - /* For warning action, we want the log message every time we hit the limit */ - if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) - log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, channel_limit_name(l)); + c->limit_active |= (1 << dir); + channel_log_limit(c, l, dir); +} - switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; +static int +channel_limit_warn(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; + channel_log_limit(c, l, dir); - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + return 0; } -static void -channel_verify_limits(struct channel *c) +static int +channel_limit_block(struct limit *l, void *data) { - struct channel_limit *l; - u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; - l = &c->rx_limit; - if (l->action && (all_routes > l->limit)) - channel_notify_limit(c, l, PLD_RX, all_routes); + channel_activate_limit(c, l, dir); - l = &c->in_limit; - if (l->action && (c->stats.imp_routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); + return 1; +} - l = &c->out_limit; - if (l->action && (c->stats.exp_routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); +static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; + +static int +channel_limit_down(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + struct proto *p = c->proto; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + if (p->proto_state == PS_UP) + proto_schedule_down(p, c->limit_actions[dir] == PLA_RESTART, chl_dir_down[dir]); + + return 1; } -static inline void -channel_reset_limit(struct channel_limit *l) +static int (*channel_limit_action[])(struct limit *, void *) = { + [PLA_NONE] = NULL, + [PLA_WARN] = channel_limit_warn, + [PLA_BLOCK] = channel_limit_block, + [PLA_RESTART] = channel_limit_down, + [PLA_DISABLE] = channel_limit_down, +}; + +static void +channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + l->action = channel_limit_action[cf->action]; + c->limit_actions[dir] = cf->action; + + struct channel_limit_data cld = { .c = c, .dir = dir }; + limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); +} + +static void +channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) { - if (l->action) - l->state = PLS_INITIAL; + channel_reset_limit(c, l, dir); + channel_update_limit(c, l, dir, cf); } +static void +channel_reset_limit(struct channel *c, struct limit *l, int dir) +{ + limit_reset(l); + c->limit_active &= ~(1 << dir); +} + +static struct rte_owner_class default_rte_owner_class; + static inline void proto_do_start(struct proto *p) { p->active = 1; - p->do_start = 1; - ev_schedule(p->event); + + rt_init_sources(&p->sources, p->name, proto_event_list(p)); + if (!p->sources.class) + p->sources.class = &default_rte_owner_class; + + if (!p->cf->late_if_feed) + if_feed_baby(p); } static void proto_do_up(struct proto *p) { if (!p->main_source) - { p->main_source = rt_get_source(p, 0); - rt_lock_source(p->main_source); - } + // Locked automaticaly proto_start_channels(p); + + if (p->cf->late_if_feed) + if_feed_baby(p); } static inline void @@ -1896,9 +2004,6 @@ proto_do_stop(struct proto *p) p->down_sched = 0; p->gr_recovery = 0; - p->do_stop = 1; - ev_schedule(p->event); - if (p->main_source) { rt_unlock_source(p->main_source); @@ -1906,6 +2011,10 @@ proto_do_stop(struct proto *p) } proto_stop_channels(p); + rt_destroy_sources(&p->sources, p->event); + + p->do_stop = 1; + proto_send_event(p); } static void @@ -1913,12 +2022,10 @@ proto_do_down(struct proto *p) { p->down_code = 0; neigh_prune(); - rfree(p->pool); - p->pool = NULL; /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) - ev_schedule(p->event); + proto_send_event(p); } @@ -2009,38 +2116,58 @@ proto_state_name(struct proto *p) static void channel_show_stats(struct channel *c) { - struct proto_stats *s = &c->stats; + struct channel_import_stats *ch_is = &c->import_stats; + struct channel_export_stats *ch_es = &c->export_stats; + struct rt_import_stats *rt_is = c->in_req.hook ? &c->in_req.hook->stats : NULL; + struct rt_export_stats *rt_es = c->out_req.hook ? &c->out_req.hook->stats : NULL; - if (c->in_keep_filtered) +#define SON(ie, item) ((ie) ? (ie)->item : 0) +#define SCI(item) SON(ch_is, item) +#define SCE(item) SON(ch_es, item) +#define SRI(item) SON(rt_is, item) +#define SRE(item) SON(rt_es, item) + + u32 rx_routes = c->rx_limit.count; + u32 in_routes = c->in_limit.count; + u32 out_routes = c->out_limit.count; + + if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); - - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s->imp_updates_received, s->imp_updates_invalid, - s->imp_updates_filtered, s->imp_updates_ignored, - s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s->imp_withdraws_received, s->imp_withdraws_invalid, - s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", - s->exp_updates_received, s->exp_updates_rejected, - s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", - s->exp_withdraws_received, s->exp_withdraws_accepted); + in_routes, out_routes, SRI(pref)); + + cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", + SCI(updates_received), SCI(updates_invalid), + SCI(updates_filtered), SRI(updates_ignored), + SCI(updates_limited_rx), SCI(updates_limited_in), + SRI(updates_accepted)); + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", + SCI(withdraws_received), SCI(withdraws_invalid), + SRI(withdraws_ignored), SRI(withdraws_accepted)); + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u %10u", + SRE(updates_received), SCE(updates_rejected), + SCE(updates_filtered), SCE(updates_limited), SCE(updates_accepted)); + cli_msg(-1006, " Export withdraws: %10u --- --- --- ---%10u", + SRE(withdraws_received), SCE(withdraws_accepted)); + +#undef SRI +#undef SRE +#undef SCI +#undef SCE +#undef SON } void -channel_show_limit(struct channel_limit *l, const char *dsc) +channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", channel_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void @@ -2048,6 +2175,8 @@ channel_show_info(struct channel *c) { cli_msg(-1006, " Channel %s", c->name); cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Import state: %s", rt_import_state_name(rt_import_get_state(c->in_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(c->out_req.hook))); cli_msg(-1006, " Table: %s", c->table->name); cli_msg(-1006, " Preference: %d", c->preference); cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); @@ -2058,9 +2187,9 @@ channel_show_info(struct channel *c) c->gr_lock ? " pending" : "", c->gr_wait ? " waiting" : ""); - channel_show_limit(&c->rx_limit, "Receive limit:"); - channel_show_limit(&c->in_limit, "Import limit:"); - channel_show_limit(&c->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); + channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); + channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); @@ -2135,7 +2264,7 @@ proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_DISABLE; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); cli_msg(-9, "%s: disabled", p->name); } @@ -2168,9 +2297,9 @@ proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_RESTART; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); p->disabled = 0; - proto_rethink_goal(p); + /* After the protocol shuts down, proto_rethink_goal() is run from proto_event. */ cli_msg(-12, "%s: restarted", p->name); } @@ -2243,8 +2372,15 @@ proto_apply_cmd_symbol(const struct symbol *s, void (* cmd)(struct proto *, uint return; } - cmd(s->proto->proto, arg, 0); - cli_msg(0, ""); + if (s->proto->proto) + { + struct proto *p = s->proto->proto; + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, 0); + cli_msg(0, ""); + } + else + cli_msg(9002, "%s does not exist", s->name); } static void @@ -2255,7 +2391,8 @@ proto_apply_cmd_patt(const char *patt, void (* cmd)(struct proto *, uintptr_t, i WALK_LIST(p, proto_list) if (!patt || patmatch(patt, p->name)) - cmd(p, arg, cnt++); + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); diff --git a/nest/protocol.h b/nest/protocol.h index 3b3812a5..709d6415 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,7 +12,8 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" -#include "nest/route.h" +#include "nest/rt.h" +#include "nest/limit.h" #include "conf/conf.h" struct iface; @@ -37,56 +38,35 @@ struct symbol; * Routing Protocol */ -enum protocol_class { - PROTOCOL_NONE, - PROTOCOL_BABEL, - PROTOCOL_BFD, - PROTOCOL_BGP, - PROTOCOL_DEVICE, - PROTOCOL_DIRECT, - PROTOCOL_KERNEL, - PROTOCOL_OSPF, - PROTOCOL_MRT, - PROTOCOL_PERF, - PROTOCOL_PIPE, - PROTOCOL_RADV, - PROTOCOL_RIP, - PROTOCOL_RPKI, - PROTOCOL_STATIC, - PROTOCOL__MAX -}; - -extern struct protocol *class_to_protocol[PROTOCOL__MAX]; struct protocol { node n; char *name; char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ - enum protocol_class class; /* Machine readable protocol class */ uint preference; /* Default protocol preference */ uint channel_mask; /* Mask of accepted channel types (NB_*) */ uint proto_size; /* Size of protocol data structure */ uint config_size; /* Size of protocol config data structure */ + uint eattr_begin; /* First ID of registered eattrs */ + uint eattr_end; /* End of eattr id zone */ + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ struct proto * (*init)(struct proto_config *); /* Create new instance */ int (*reconfigure)(struct proto *, struct proto_config *); /* Try to reconfigure instance, returns success */ void (*dump)(struct proto *); /* Debugging dump */ - void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ - void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ - void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ - int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ +// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ }; -void protos_build(void); -void proto_build(struct protocol *); +void protos_build(void); /* Called from sysdep to initialize protocols */ +void proto_build(struct protocol *); /* Called from protocol to register itself */ void protos_preconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); struct proto * proto_spawn(struct proto_config *cf, uint disabled); @@ -121,8 +101,10 @@ struct proto_config { u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ u8 disabled; /* Protocol enabled/disabled by default */ u8 vrf_set; /* Related VRF instance (below) is defined */ + u8 late_if_feed; /* Delay interface feed after channels are up */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ u32 router_id; /* Protocol specific router ID */ + uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */ list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ @@ -133,31 +115,6 @@ struct proto_config { }; /* Protocol statistics */ -struct proto_stats { - /* Import - from protocol to core */ - u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes selected as best in the (adjacent) routing table */ - u32 imp_updates_received; /* Number of route updates received */ - u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ - u32 imp_updates_filtered; /* Number of route updates rejected by filters */ - u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ - u32 imp_updates_accepted; /* Number of route updates accepted and imported */ - u32 imp_withdraws_received; /* Number of route withdraws received */ - u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ - u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ - - /* Export - from core to protocol */ - u32 exp_routes; /* Number of routes successfully exported to the protocol */ - u32 exp_updates_received; /* Number of route updates received */ - u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ - u32 exp_updates_filtered; /* Number of route updates rejected by filters */ - u32 exp_updates_accepted; /* Number of route updates accepted and exported */ - u32 exp_withdraws_received; /* Number of route withdraws received */ - u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ -}; - struct proto { node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ @@ -165,22 +122,24 @@ struct proto { struct proto_config *cf_new; /* Configuration we want to switch to after shutdown (NULL=delete) */ pool *pool; /* Pool containing local objects */ event *event; /* Protocol event */ + struct birdloop *loop; /* BIRDloop running this protocol */ list channels; /* List of channels to rtables (struct channel) */ struct channel *main_channel; /* Primary channel */ struct rte_src *main_source; /* Primary route source */ + struct rte_owner sources; /* Route source owner structure */ struct iface *vrf; /* Related VRF instance, NULL if global */ const char *name; /* Name of this instance (== cf->name) */ u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ uint active_channels; /* Number of active channels */ + uint active_loops; /* Number of active IO loops */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte vrf_set; /* Related VRF instance (above) is defined */ byte proto_state; /* Protocol state machine (PS_*, see below) */ byte active; /* From PS_START to cleanup after PS_STOP */ - byte do_start; /* Start actions are scheduled */ byte do_stop; /* Stop actions are scheduled */ byte reconfiguring; /* We're shutting down due to reconfiguration */ byte gr_recovery; /* Protocol should participate in graceful restart recovery */ @@ -198,12 +157,11 @@ struct proto { * ifa_notify Notify protocol about interface address changes. * rt_notify Notify protocol about routing table updates. * neigh_notify Notify protocol about neighbor cache events. - * make_tmp_attrs Add attributes to rta from from private attrs stored in rte. The route and rta MUST NOT be cached. - * store_tmp_attrs Store private attrs back to rte and undef added attributes. The route and rta MUST NOT be cached. - * preexport Called as the first step of the route exporting process. - * It can construct a new rte, add private attributes and - * decide whether the route shall be exported: 1=yes, -1=no, - * 0=process it through the export filter set by the user. + * preexport Called as the first step of the route exporting process. + * It can decide whether the route shall be exported: + * -1 = reject, + * 0 = continue to export filter + * 1 = accept immediately * reload_routes Request channel to reload all its routes to the core * (using rte_update()). Returns: 0=reload cannot be done, * 1= reload is scheduled and will happen (asynchronously). @@ -213,11 +171,9 @@ struct proto { void (*if_notify)(struct proto *, unsigned flags, struct iface *i); void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); - void (*rt_notify)(struct proto *, struct channel *, struct network *net, struct rte *new, struct rte *old); + void (*rt_notify)(struct proto *, struct channel *, const net_addr *net, struct rte *new, const struct rte *old); void (*neigh_notify)(struct neighbor *neigh); - void (*make_tmp_attrs)(struct rte *rt, struct linpool *pool); - void (*store_tmp_attrs)(struct rte *rt, struct linpool *pool); - int (*preexport)(struct channel *, struct rte **rt, struct linpool *pool); + int (*preexport)(struct channel *, struct rte *rt); void (*reload_routes)(struct channel *); void (*feed_begin)(struct channel *, int initial); void (*feed_end)(struct channel *); @@ -235,11 +191,10 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); - int (*rte_same)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte * (*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); + u32 (*rte_igp_metric)(const struct rte *); /* Hic sunt protocol-specific data */ }; @@ -279,7 +234,7 @@ void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_limit(struct limit *l, const char *dsc, int active, int action); void channel_show_info(struct channel *c); void channel_cmd_debug(struct channel *c, uint mask); @@ -386,6 +341,8 @@ void proto_notify_state(struct proto *p, unsigned state); * as a result of received ROUTE-REFRESH request). */ +static inline int proto_is_inactive(struct proto *p) +{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); } /* @@ -434,18 +391,29 @@ extern struct proto_config *cf_dev_proto; #define PLA_RESTART 4 /* Force protocol restart */ #define PLA_DISABLE 5 /* Shutdown and disable protocol */ -#define PLS_INITIAL 0 /* Initial limit state after protocol start */ -#define PLS_ACTIVE 1 /* Limit was hit */ -#define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ - struct channel_limit { u32 limit; /* Maximum number of prefixes */ u8 action; /* Action to take (PLA_*) */ - u8 state; /* State of limit (PLS_*) */ }; -void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); +struct channel_limit_data { + struct channel *c; + int dir; +}; + +#define CLP__RX(_c) (&(_c)->rx_limit) +#define CLP__IN(_c) (&(_c)->in_limit) +#define CLP__OUT(_c) (&(_c)->out_limit) + + +#if 0 +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) log(L_TRACE "%s.%s: %s limit %s %u", (_c)->proto->name, (_c)->name, #_dir, _op, (CLP__##_dir(_c))->count) +#else +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) +#endif +#define CHANNEL_LIMIT_PUSH(_c, _dir) ({ CHANNEL_LIMIT_LOG(_c, _dir, "push from"); struct channel_limit_data cld = { .c = (_c), .dir = PLD_##_dir }; limit_push(CLP__##_dir(_c), &cld); }) +#define CHANNEL_LIMIT_POP(_c, _dir) ({ limit_pop(CLP__##_dir(_c)); CHANNEL_LIMIT_LOG(_c, _dir, "pop to"); }) /* * Channels @@ -469,7 +437,6 @@ struct channel_class { void (*dump)(struct proto *); /* Debugging dump */ - void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ @@ -489,8 +456,10 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ + (relevant when in_keep & RIK_REJECTED) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ @@ -499,13 +468,12 @@ struct channel_config { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 rpki_reload; /* RPKI changes trigger channel reload */ }; struct channel { node n; /* Node in proto->channels */ - node table_node; /* Node in table->channels */ const char *name; /* Channel name (may be NULL) */ const struct channel_class *channel; @@ -514,14 +482,40 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ - struct bmap export_map; /* Keeps track which routes passed export filter */ - struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ - struct channel_limit in_limit; /* Input limit */ - struct channel_limit out_limit; /* Output limit */ - - struct event *feed_event; /* Event responsible for feeding */ - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct proto_stats stats; /* Per-channel protocol statistics */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ + struct bmap export_map; /* Keeps track which routes were really exported */ + struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ + + struct limit rx_limit; /* Receive limit (for in_keep & RIK_REJECTED) */ + struct limit in_limit; /* Input limit */ + struct limit out_limit; /* Output limit */ + + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ + u8 limit_active; /* Flags for active limits */ + + struct channel_import_stats { + /* Import - from protocol to core */ + u32 updates_received; /* Number of route updates received */ + u32 updates_invalid; /* Number of route updates rejected as invalid */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_limited_rx; /* Number of route updates exceeding the rx_limit */ + u32 updates_limited_in; /* Number of route updates exceeding the in_limit */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_invalid; /* Number of route withdraws rejected as invalid */ + } import_stats; + + struct channel_export_stats { + /* Export - from core to protocol */ + u32 updates_rejected; /* Number of route updates rejected by protocol */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_accepted; /* Number of route updates accepted and exported */ + u32 updates_limited; /* Number of route updates exceeding the out_limit */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } export_stats; + + struct rt_import_request in_req; /* Table import connection */ + struct rt_export_request out_req; /* Table export connection */ + u32 refeed_count; /* Number of routes exported during refeed regardless of out_limit */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ @@ -529,36 +523,31 @@ struct channel { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 disabled; u8 stale; /* Used in reconfiguration */ u8 channel_state; - u8 export_state; /* Route export state (ES_*, see below) */ - u8 feed_active; - u8 flush_active; - u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 refeeding; /* Refeeding the channel. */ u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ btime last_state_change; /* Time of last state transition */ - struct rtable *in_table; /* Internal table for received routes */ - struct event *reload_event; /* Event responsible for reloading from in_table */ - struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte *reload_next_rte; /* Route iterator in in_table used during reloading */ - u8 reload_active; /* Iterator reload_fit is linked */ + struct rt_export_request reload_req; /* Feeder for import reload */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 rpki_reload; /* RPKI changes trigger channel reload */ - struct rtable *out_table; /* Internal table for exported routes */ + struct rt_exporter *out_table; /* Internal table for exported routes */ list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; +#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ +#define RIK_PREFILTER (2 | RIK_REJECTED) /* All routes' attribute state before import filter is kept */ /* * Channel states @@ -585,34 +574,34 @@ struct channel { * restricted by that and is on volition of the protocol. Generally, channels * are opened in protocols' start() hooks when going to PS_UP. * - * CS_FLUSHING - The transitional state between initialized channel and closed + * CS_STOP - The transitional state between initialized channel and closed * channel. The channel is still initialized, but no route exchange is allowed. * Instead, the associated table is running flush loop to remove routes imported * through the channel. After that, the channel changes state to CS_DOWN and * is detached from the table (the table is unlocked and the channel is unlinked - * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * from it). Unlike other states, the CS_STOP state is not explicitly * entered or left by the protocol. A protocol may request to close a channel * (by calling channel_close()), which causes the channel to change state to - * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * CS_STOP and later to CS_DOWN. Also note that channels are closed * automatically by the core when the protocol is going down. * + * CS_PAUSE - Almost the same as CS_STOP, just the table import is kept and + * the table export is stopped before transitioning to CS_START. + * * Allowed transitions: * * CS_DOWN -> CS_START / CS_UP - * CS_START -> CS_UP / CS_FLUSHING - * CS_UP -> CS_START / CS_FLUSHING - * CS_FLUSHING -> CS_DOWN (automatic) + * CS_START -> CS_UP / CS_STOP + * CS_UP -> CS_PAUSE / CS_STOP + * CS_PAUSE -> CS_START (automatic) + * CS_STOP -> CS_DOWN (automatic) */ #define CS_DOWN 0 #define CS_START 1 #define CS_UP 2 -#define CS_FLUSHING 3 - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - +#define CS_STOP 3 +#define CS_PAUSE 4 struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) @@ -625,30 +614,15 @@ int proto_configure_channel(struct proto *p, struct channel **c, struct channel_ void channel_set_state(struct channel *c, uint state); void channel_setup_in_table(struct channel *c); -void channel_setup_out_table(struct channel *c); void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } -static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); int channel_reconfigure(struct channel *c, struct channel_config *cf); - -/* Moved from route.h to avoid dependency conflicts */ -static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } - -static inline void -rte_update3(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - if (c->in_table && !rte_update_in(c, n, new, src)) - return; - - rte_update2(c, n, new, src); -} - - #endif diff --git a/nest/route.h b/nest/route.h deleted file mode 100644 index 1dacd92f..00000000 --- a/nest/route.h +++ /dev/null @@ -1,792 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Routing Table - * - * (c) 1998--2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ROUTE_H_ -#define _BIRD_ROUTE_H_ - -#include "lib/lists.h" -#include "lib/bitmap.h" -#include "lib/resource.h" -#include "lib/net.h" - -struct ea_list; -struct protocol; -struct proto; -struct rte_src; -struct symbol; -struct timer; -struct fib; -struct filter; -struct f_trie; -struct f_trie_walk_state; -struct cli; - -/* - * Generic data structure for storing network prefixes. Also used - * for the master routing table. Currently implemented as a hash - * table. - * - * Available operations: - * - insertion of new entry - * - deletion of entry - * - searching for entry by network prefix - * - asynchronous retrieval of fib contents - */ - -struct fib_node { - struct fib_node *next; /* Next in hash chain */ - struct fib_iterator *readers; /* List of readers of this node */ - net_addr addr[0]; -}; - -struct fib_iterator { /* See lib/slists.h for an explanation */ - struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ - byte efef; /* 0xff to distinguish between iterator and node */ - byte pad[3]; - struct fib_node *node; /* Or NULL if freshly merged */ - uint hash; -}; - -typedef void (*fib_init_fn)(struct fib *, void *); - -struct fib { - pool *fib_pool; /* Pool holding all our data */ - slab *fib_slab; /* Slab holding all fib nodes */ - struct fib_node **hash_table; /* Node hash table */ - uint hash_size; /* Number of hash table entries (a power of two) */ - uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 32 - hash_order */ - uint addr_type; /* Type of address data stored in fib (NET_*) */ - uint node_size; /* FIB node size, 0 for nonuniform */ - uint node_offset; /* Offset of fib_node struct inside of user data */ - uint entries; /* Number of entries */ - uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_fn init; /* Constructor */ -}; - -static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) -{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } - -static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) -{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } - -void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); -void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ -void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ -void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ -void fib_delete(struct fib *, void *); /* Remove fib entry */ -void fib_free(struct fib *); /* Destroy the fib */ -void fib_check(struct fib *); /* Consistency check for debugging */ - -void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ -struct fib_node *fit_get(struct fib *, struct fib_iterator *); -void fit_put(struct fib_iterator *, struct fib_node *); -void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -void fit_put_end(struct fib_iterator *i); -void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); - - -#define FIB_WALK(fib, type, z) do { \ - struct fib_node *fn_, **ff_ = (fib)->hash_table; \ - uint count_ = (fib)->hash_size; \ - type *z; \ - while (count_--) \ - for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) - -#define FIB_WALK_END } while (0) - -#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) - -#define FIB_ITERATE_START(fib, it, type, z) do { \ - struct fib_node *fn_ = fit_get(fib, it); \ - uint count_ = (fib)->hash_size; \ - uint hpos_ = (it)->hash; \ - type *z; \ - for(;;) { \ - if (!fn_) \ - { \ - if (++hpos_ >= count_) \ - break; \ - fn_ = (fib)->hash_table[hpos_]; \ - continue; \ - } \ - z = fib_node_to_user(fib, fn_); - -#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) - -#define FIB_ITERATE_PUT(it) fit_put(it, fn_) - -#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) - -#define FIB_ITERATE_PUT_END(it) fit_put_end(it) - -#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) - -#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) - - -/* - * Master Routing Tables. Generally speaking, each of them contains a FIB - * with each entry pointing to a list of route entries representing routes - * to given network (with the selected one at the head). - * - * Each of the RTE's contains variable data (the preference and protocol-dependent - * metrics) and a pointer to a route attribute block common for many routes). - * - * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. - */ - -struct rtable_config { - node n; - char *name; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - uint gc_threshold; /* Maximum number of operations before GC is run */ - uint gc_period; /* Approximate time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ - byte trie_used; /* Rtable has attached trie */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ - pool *rp; /* Resource pool to allocate everything from, including itself */ - struct fib fib; - struct f_trie *trie; /* Trie of prefixes defined in fib */ - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int pipe_busy; /* Pipe loop detection */ - int use_count; /* Number of protocols using this table */ - u32 rt_count; /* Number of routes in the table */ - - byte internal; /* Internal table of a protocol */ - - struct hmap id_map; - struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ - struct config *deleted; /* Table doesn't exist in current configuration, - * delete as soon as use_count becomes 0 and remove - * obstacle from this routing table. - */ - struct event *rt_event; /* Routing table event */ - struct timer *prune_timer; /* Timer for periodic pruning / GC */ - btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ - btime gc_time; /* Time of last GC */ - uint gc_counter; /* Number of operations since last GC */ - byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte prune_trie; /* Prune prefix trie during next table prune */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte nhu_state; /* Next Hop Update state */ - struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ - struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ - struct f_trie *trie_new; /* New prefix trie defined during pruning */ - struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ - u32 trie_lock_count; /* Prefix trie locked by walks */ - u32 trie_old_lock_count; /* Old prefix trie locked by walks */ - - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ - list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ - struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ -} rtable; - -struct rt_subscription { - node n; - rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; -}; - -struct rt_flowspec_link { - node n; - rtable *src; - rtable *dst; - u32 uc; -}; - -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - -typedef struct network { - struct rte *routes; /* Available routes for this network */ - struct fib_node n; /* FIB flags reserved for kernel syncer */ -} net; - -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - struct rta *src; /* Source rta entry */ - byte dest; /* Chosen route destination type (RTD_...) */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - -typedef struct rte { - struct rte *next; - net *net; /* Network this RTE belongs to */ - struct channel *sender; /* Channel used to send the route to the routing table */ - struct rta *attrs; /* Attributes of this route */ - u32 id; /* Table specific route id */ - byte flags; /* Flags (REF_...) */ - byte pflags; /* Protocol-specific flags */ - word pref; /* Route preference */ - btime lastmod; /* Last modified */ - union { /* Protocol-dependent data (metrics etc.) */ -#ifdef CONFIG_RIP - struct { - struct iface *from; /* Incoming iface */ - u8 metric; /* RIP metric */ - u16 tag; /* External route tag */ - } rip; -#endif -#ifdef CONFIG_OSPF - struct { - u32 metric1, metric2; /* OSPF Type 1 and Type 2 metrics */ - u32 tag; /* External route tag */ - u32 router_id; /* Router that originated this route */ - } ospf; -#endif -#ifdef CONFIG_BGP - struct { - u8 suppressed; /* Used for deterministic MED comparison */ - s8 stale; /* Route is LLGR_STALE, -1 if unknown */ - struct rtable *base_table; /* Base table for Flowspec validation */ - } bgp; -#endif -#ifdef CONFIG_BABEL - struct { - u16 seqno; /* Babel seqno */ - u16 metric; /* Babel metric */ - u64 router_id; /* Babel router id */ - } babel; -#endif - struct { /* Routes generated by krt sync (both temporary and inherited ones) */ - s8 src; /* Alleged route source (see krt.h) */ - u8 proto; /* Kernel source protocol ID */ - u8 seen; /* Seen during last scan */ - u8 best; /* Best route in network, propagated to core */ - u32 metric; /* Kernel metric */ - } krt; - } u; -} rte; - -#define REF_COW 1 /* Copy this rte on write */ -#define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ - -/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } - -/* Route just has REF_FILTERED flag */ -static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } - - -/* Types of route announcement, also used as flags */ -#define RA_UNDEF 0 /* Undefined RA type */ -#define RA_OPTIMAL 1 /* Announcement of optimal route change */ -#define RA_ACCEPTED 2 /* Announcement of first accepted route */ -#define RA_ANY 3 /* Announcement of any route change */ -#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ - -/* Return value of preexport() callback */ -#define RIC_ACCEPT 1 /* Accepted by protocol */ -#define RIC_PROCESS 0 /* Process it through import filter */ -#define RIC_REJECT -1 /* Rejected by protocol */ -#define RIC_DROP -2 /* Silently dropped by protocol */ - -extern list routing_tables; -struct config; - -void rt_init(void); -void rt_preconfig(struct config *); -void rt_postconfig(struct config *); -void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -struct f_trie * rt_lock_trie(rtable *tab); -void rt_unlock_trie(rtable *tab, struct f_trie *trie); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); -void rt_flowspec_link(rtable *src, rtable *dst); -void rt_flowspec_unlink(rtable *src, rtable *dst); -rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } - -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -net *net_get(rtable *tab, const net_addr *addr); -net *net_route(rtable *tab, const net_addr *n); -int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -rte *rte_find(net *net, struct rte_src *src); -rte *rte_get_temp(struct rta *); -void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -/* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct channel *c); -void rt_refresh_end(rtable *t, struct channel *c); -void rt_modify_stale(rtable *t, struct channel *c); -void rt_schedule_prune(rtable *t); -void rte_dump(rte *); -void rte_free(rte *); -rte *rte_do_cow(rte *); -static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } -rte *rte_cow_rta(rte *r, linpool *lp); -void rte_init_tmp_attrs(struct rte *r, linpool *lp, uint max); -void rte_make_tmp_attr(struct rte *r, uint id, uint type, uintptr_t val); -void rte_make_tmp_attrs(struct rte **r, struct linpool *pool, struct rta **old_attrs); -uintptr_t rte_store_tmp_attr(struct rte *r, uint id); -void rt_dump(rtable *); -void rt_dump_all(void); -int rt_feed_channel(struct channel *c); -void rt_feed_channel_abort(struct channel *c); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -int rt_reload_channel(struct channel *c); -void rt_reload_channel_abort(struct channel *c); -void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed); -struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - -static inline int rt_is_ip(rtable *tab) -{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } - -static inline int rt_is_vpn(rtable *tab) -{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } - -static inline int rt_is_roa(rtable *tab) -{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } - -static inline int rt_is_flow(rtable *tab) -{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } - - -/* Default limit for ECMP next hops, defined in sysdep code */ -extern const int rt_default_ecmp; - -struct rt_show_data_rtable { - node n; - rtable *table; - struct channel *export_channel; -}; - -struct rt_show_data { - net_addr *addr; - list tables; - struct rt_show_data_rtable *tab; /* Iterator over table list */ - struct rt_show_data_rtable *last_table; /* Last table in output */ - struct fib_iterator fit; /* Iterator over networks in table */ - struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ - struct f_trie *walk_lock; /* Locked trie for walking */ - int verbose, tables_defined_by; - const struct filter *filter; - struct proto *show_protocol; - struct proto *export_protocol; - struct channel *export_channel; - struct config *running_on_config; - struct krt_proto *kernel; - int export_mode, addr_mode, primary_only, filtered, stats; - - int table_open; /* Iteration (fit) is open */ - int trie_walk; /* Current table is iterated using trie */ - int net_counter, rt_counter, show_counter, table_counter; - int net_counter_last, rt_counter_last, show_counter_last; -}; - -void rt_show(struct rt_show_data *); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); - -/* Value of table definition mode in struct rt_show_data */ -#define RSD_TDB_DEFAULT 0 /* no table specified */ -#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ -#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ -#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ - -#define RSD_TDB_SET 0x1 /* internal: show empty tables */ -#define RSD_TDB_NMN 0x2 /* internal: need matching net */ - -/* Value of addr_mode */ -#define RSD_ADDR_EQUAL 1 /* Exact query - show route <addr> */ -#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ -#define RSD_ADDR_IN 3 /* Interval query - show route in <addr> */ - -/* Value of export_mode in struct rt_show_data */ -#define RSEM_NONE 0 /* Export mode not used */ -#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ -#define RSEM_EXPORT 2 /* Routes accepted by export filter */ -#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ -#define RSEM_EXPORTED 4 /* Routes marked in export map */ - -/* - * Route Attributes - * - * Beware: All standard BGP attributes must be represented here instead - * of making them local to the route. This is needed to ensure proper - * construction of BGP route attribute lists. - */ - -/* Nexthop structure */ -struct nexthop { - ip_addr gw; /* Next hop */ - struct iface *iface; /* Outgoing interface */ - struct nexthop *next; - byte flags; - byte weight; - byte labels_orig; /* Number of labels before hostentry was applied */ - byte labels; /* Number of all labels */ - u32 label[0]; -}; - -#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ - - -struct rte_src { - struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ - u32 private_id; /* Private ID, assigned by the protocol */ - u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ -}; - - -typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* Hash over important fields */ - struct ea_list *eattrs; /* Extended Attribute chain */ - struct rte_src *src; /* Route source that created the route */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - ip_addr from; /* Advertising router */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u8 source; /* Route source (RTS_...) */ - u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ - u8 dest; /* Route destination type (RTD_...) */ - u8 aflags; - struct nexthop nh; /* Next hop */ -} rta; - -#define RTS_DUMMY 0 /* Dummy route to be removed soon */ -#define RTS_STATIC 1 /* Normal static route */ -#define RTS_INHERIT 2 /* Route inherited from kernel */ -#define RTS_DEVICE 3 /* Device route */ -#define RTS_STATIC_DEVICE 4 /* Static device route */ -#define RTS_REDIRECT 5 /* Learned via redirect */ -#define RTS_RIP 6 /* RIP route */ -#define RTS_OSPF 7 /* OSPF route */ -#define RTS_OSPF_IA 8 /* OSPF inter-area route */ -#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ -#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ -#define RTS_BGP 11 /* BGP route */ -#define RTS_PIPE 12 /* Inter-table wormhole */ -#define RTS_BABEL 13 /* Babel route */ -#define RTS_RPKI 14 /* Route Origin Authorization */ -#define RTS_PERF 15 /* Perf checker */ -#define RTS_MAX 16 - -#define RTC_UNICAST 0 -#define RTC_BROADCAST 1 -#define RTC_MULTICAST 2 -#define RTC_ANYCAST 3 /* IPv6 Anycast */ - -#define RTD_NONE 0 /* Undefined next hop */ -#define RTD_UNICAST 1 /* Next hop is neighbor router */ -#define RTD_BLACKHOLE 2 /* Silently drop packets */ -#define RTD_UNREACHABLE 3 /* Reject as unreachable */ -#define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MAX 5 - -#define RTAF_CACHED 1 /* This is a cached rta */ - -#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other - protocol-specific metric is availabe */ - - -extern const char * rta_dest_names[RTD_MAX]; - -static inline const char *rta_dest_name(uint n) -{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } - -/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ -static inline int rte_is_reachable(rte *r) -{ return r->attrs->dest == RTD_UNICAST; } - - -/* - * Extended Route Attributes - */ - -typedef struct eattr { - word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ - byte flags; /* Protocol-dependent flags */ - byte type; /* Attribute type and several flags (EAF_...) */ - union { - u32 data; - const struct adata *ptr; /* Attribute data elsewhere */ - } u; -} eattr; - - -#define EA_CODE(proto,id) (((proto) << 8) | (id)) -#define EA_ID(ea) ((ea) & 0xff) -#define EA_PROTO(ea) ((ea) >> 8) -#define EA_ID_FLAG(ea) (1 << EA_ID(ea)) -#define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) -#define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) -#define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) - -const char *ea_custom_name(uint ea); - -#define EA_GEN_IGP_METRIC EA_CODE(PROTOCOL_NONE, 0) - -#define EA_CODE_MASK 0xffff -#define EA_CUSTOM_BIT 0x8000 -#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ -#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ -#define EA_BIT_GET(ea) ((ea) >> 24) - -#define EAF_TYPE_MASK 0x1f /* Mask with this to get type */ -#define EAF_TYPE_INT 0x01 /* 32-bit unsigned integer number */ -#define EAF_TYPE_OPAQUE 0x02 /* Opaque byte string (not filterable) */ -#define EAF_TYPE_IP_ADDRESS 0x04 /* IP address */ -#define EAF_TYPE_ROUTER_ID 0x05 /* Router ID (IPv4 address) */ -#define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ -#define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ -#define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ -#define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ -#define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ -#define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ -#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ -#define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ -#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ - -typedef struct adata { - uint length; /* Length of data */ - byte data[0]; -} adata; - -extern const adata null_adata; /* adata of length 0 */ - -static inline struct adata * -lp_alloc_adata(struct linpool *pool, uint len) -{ - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; -} - -static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } - - -typedef struct ea_list { - struct ea_list *next; /* In case we have an override list */ - byte flags; /* Flags: EALF_... */ - byte rfu; - word count; /* Number of attributes */ - eattr attrs[0]; /* Attribute definitions themselves */ -} ea_list; - -#define EALF_SORTED 1 /* Attributes are sorted by code */ -#define EALF_BISECT 2 /* Use interval bisection for searching */ -#define EALF_CACHED 4 /* Attributes belonging to cached rta */ -#define EALF_TEMP 8 /* Temporary ea_list added by make_tmp_attrs hooks */ - -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); - -struct ea_walk_state { - ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ - eattr *ea; /* Current eattr, initially NULL */ - u32 visited[4]; /* Bitfield, limiting max to 128 */ -}; - -eattr *ea_find(ea_list *, unsigned ea); -eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); -int ea_get_int(ea_list *, unsigned ea, int def); -void ea_dump(ea_list *); -void ea_sort(ea_list *); /* Sort entries in all sub-lists */ -unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ -void ea_merge(ea_list *from, ea_list *to); /* Merge sub-lists to allocated buffer */ -int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ -uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ -ea_list *ea_append(ea_list *to, ea_list *what); -void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); - -#define ea_normalize(ea) do { \ - if (ea->next) { \ - ea_list *t = alloca(ea_scan(ea)); \ - ea_merge(ea, t); \ - ea = t; \ - } \ - ea_sort(ea); \ - if (ea->count == 0) \ - ea = NULL; \ -} while(0) \ - -static inline eattr * -ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) -{ - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - eattr *e = &a->attrs[0]; - - a->flags = EALF_SORTED; - a->count = 1; - a->next = *to; - *to = a; - - e->id = id; - e->type = type; - e->flags = flags; - - if (type & EAF_EMBEDDED) - e->u.data = (u32) val; - else - e->u.ptr = (struct adata *) val; - - return e; -} - -static inline void -ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - memcpy(a->data, data, len); - ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); -} - - -#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - -static inline size_t nexthop_size(const struct nexthop *nh) -{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } -int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ -static inline int nexthop_same(struct nexthop *x, struct nexthop *y) -{ return (x == y) || nexthop__same(x, y); } -struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); -struct nexthop *nexthop_sort(struct nexthop *x); -static inline void nexthop_link(struct rta *a, struct nexthop *from) -{ memcpy(&a->nh, from, nexthop_size(from)); } -void nexthop_insert(struct nexthop **n, struct nexthop *y); -int nexthop_is_sorted(struct nexthop *x); - -void rta_init(void); -static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } -#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) -rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ -static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } -static inline rta *rta_clone(rta *r) { r->uc++; return r; } -void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } -rta *rta_do_cow(rta *o, linpool *lp); -static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } -void rta_dump(rta *); -void rta_dump_all(void); -void rta_show(struct cli *, rta *); - -u32 rt_get_igp_metric(rte *rt); -struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); - -static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); -} - -/* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ - -static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } -static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } - -int rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior); - - -/* - * Default protocol preferences - */ - -#define DEF_PREF_DIRECT 240 /* Directly connected */ -#define DEF_PREF_STATIC 200 /* Static route */ -#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ -#define DEF_PREF_BABEL 130 /* Babel */ -#define DEF_PREF_RIP 120 /* RIP */ -#define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_RPKI 100 /* RPKI */ -#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - -/* - * Route Origin Authorization - */ - -#define ROA_UNKNOWN 0 -#define ROA_VALID 1 -#define ROA_INVALID 2 - -#endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index c630aa95..471209ee 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -45,11 +45,11 @@ */ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" #include "lib/idm.h" @@ -57,11 +57,26 @@ #include "lib/string.h" #include <stddef.h> +#include <stdlib.h> const adata null_adata; /* adata of length 0 */ +struct ea_class ea_gen_igp_metric = { + .name = "igp_metric", + .type = T_INT, +}; + +struct ea_class ea_gen_preference = { + .name = "preference", + .type = T_INT, +}; + +struct ea_class ea_gen_from = { + .name = "from", + .type = T_IP, +}; + const char * const rta_src_names[RTS_MAX] = { - [RTS_DUMMY] = "", [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", [RTS_DEVICE] = "device", @@ -78,6 +93,71 @@ const char * const rta_src_names[RTS_MAX] = { [RTS_RPKI] = "RPKI", }; +static void +ea_gen_source_format(const eattr *a, byte *buf, uint size) +{ + if ((a->u.data >= RTS_MAX) || !rta_src_names[a->u.data]) + bsnprintf(buf, size, "unknown"); + else + bsnprintf(buf, size, "%s", rta_src_names[a->u.data]); +} + +struct ea_class ea_gen_source = { + .name = "source", + .type = T_ENUM_RTS, + .readonly = 1, + .format = ea_gen_source_format, +}; + +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, + .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, +}; + const char * rta_dest_names[RTD_MAX] = { [RTD_NONE] = "", [RTD_UNICAST] = "unicast", @@ -86,10 +166,22 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + +DOMAIN(attrs) attrs_domain; + pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; static slab *rte_src_slab; static struct idm src_ids; @@ -97,121 +189,176 @@ static struct idm src_ids; /* rte source hash */ -#define RSH_KEY(n) n->proto, n->private_id +#define RSH_KEY(n) n->private_id #define RSH_NEXT(n) n->next -#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2 -#define RSH_FN(p,n) p->hash_key ^ u32_hash(n) +#define RSH_EQ(n1,n2) n1 == n2 +#define RSH_FN(n) u32_hash(n) #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 -#define RSH_INIT_ORDER 6 - -static HASH(struct rte_src) src_hash; +#define RSH_INIT_ORDER 2 +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); - - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) -struct rte_src * -rt_find_source(struct proto *p, u32 id) +static struct rte_src * +rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * -rt_get_source(struct proto *p, u32 id) +rt_get_source_o(struct rte_owner *p, u32 id) { + if (p->stop) + bug("Stopping route owner asked for another source."); + struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; + RTA_UNLOCK; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + +static inline void +rt_done_sources(struct rte_owner *o) +{ + ev_send(o->list, o->stop); +} + void -rt_prune_sources(void) +rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) + synchronize_rcu(); + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; + rte_src_global[src->global_id] = NULL; idm_free(&src_ids, src->global_id); - sl_free(rte_src_slab, src); + sl_free(src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); -} - - -/* - * Multipath Next Hop - */ + RTA_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) + if (o->stop && !o->uc) { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + rfree(o->prune); + RTA_UNLOCK; + + if (config->table_debug) + log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + rt_done_sources(o); } + else + RTA_UNLOCK; +} - return h; +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + RTA_LOCK; + HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + RTA_UNLOCK; } -int -nexthop__same(struct nexthop *x, struct nexthop *y) +void +rt_destroy_sources(struct rte_owner *o, event *done) { - for (; x && y; x = x->next, y = y->next) + o->stop = done; + + if (!o->uc) { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } + RTA_LOCK; + rfree(o->prune); + RTA_UNLOCK; - return x == y; + rt_done_sources(o); + } + else + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); } +/* + * Multipath Next Hop + */ + static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -236,23 +383,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -269,138 +409,227 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); - while ((x || y) && max--) + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; + + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); + + if (!xvalid && !yvalid) + break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); if (cmp < 0) { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); } else if (cmp > 0) { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); } else { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); } - n = &((*n)->next); + cur = NEXTHOP_NEXT(cur); } - *n = NULL; - return root; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -void -nexthop_insert(struct nexthop **n, struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - if (cmp < 0) - continue; - else if (cmp > 0) - break; - else - return; - } + if (cnt <= 1) + return nhad; - x->next = *n; - *n = x; -} + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); -struct nexthop * -nexthop_sort(struct nexthop *x) -{ - struct nexthop *s = NULL; + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); - /* Simple insert-sort */ - while (x) + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) +/* + * Extended Attributes + */ + +#define EA_CLASS_INITIAL_MAX 128 +static struct ea_class **ea_class_global = NULL; +static uint ea_class_max; +static struct idm ea_class_idm; + +/* Config parser lex register function */ +void ea_lex_register(struct ea_class *def); +void ea_lex_unregister(struct ea_class *def); + +static void +ea_class_free(struct ea_class *cl) { - return nexthop_slab_[MIN(nh->labels, 3)]; + /* No more ea class references. Unregister the attribute. */ + idm_free(&ea_class_idm, cl->id); + ea_class_global[cl->id] = NULL; + if (!cl->hidden) + ea_lex_unregister(cl); } -static struct nexthop * -nexthop_copy(struct nexthop *o) +static void +ea_class_ref_free(resource *r) { - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels_orig = o->labels_orig; - n->labels = o->labels; - for (int i=0; i<o->labels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + if (!--ref->class->uc) + ea_class_free(ref->class); +} - return first; +static void +ea_class_ref_dump(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + debug("name \"%s\", type=%d\n", ref->class->name, ref->class->type); } +static struct resclass ea_class_ref_class = { + .name = "Attribute class reference", + .size = sizeof(struct ea_class_ref), + .free = ea_class_ref_free, + .dump = ea_class_ref_dump, + .lookup = NULL, + .memsize = NULL, +}; + static void -nexthop_free(struct nexthop *o) +ea_class_init(void) { - struct nexthop *n; + idm_init(&ea_class_idm, rta_pool, EA_CLASS_INITIAL_MAX); + ea_class_global = mb_allocz(rta_pool, + sizeof(*ea_class_global) * (ea_class_max = EA_CLASS_INITIAL_MAX)); +} - while (o) - { - n = o->next; - sl_free(nexthop_slab(o), o); - o = n; - } +static struct ea_class_ref * +ea_ref_class(pool *p, struct ea_class *def) +{ + def->uc++; + struct ea_class_ref *ref = ralloc(p, &ea_class_ref_class); + ref->class = def; + return ref; } +static struct ea_class_ref * +ea_register(pool *p, struct ea_class *def) +{ + def->id = idm_alloc(&ea_class_idm); -/* - * Extended Attributes - */ + ASSERT_DIE(ea_class_global); + while (def->id >= ea_class_max) + ea_class_global = mb_realloc(ea_class_global, sizeof(*ea_class_global) * (ea_class_max *= 2)); + + ASSERT_DIE(def->id < ea_class_max); + ea_class_global[def->id] = def; + + if (!def->hidden) + ea_lex_register(def); + + return ea_ref_class(p, def); +} + +struct ea_class_ref * +ea_register_alloc(pool *p, struct ea_class cl) +{ + struct ea_class *clp = ea_class_find_by_name(cl.name); + if (clp && clp->type == cl.type) + return ea_ref_class(p, clp); + + uint namelen = strlen(cl.name) + 1; + + struct { + struct ea_class cl; + char name[0]; + } *cla = mb_alloc(rta_pool, sizeof(struct ea_class) + namelen); + cla->cl = cl; + memcpy(cla->name, cl.name, namelen); + cla->cl.name = cla->name; + + return ea_register(p, &cla->cl); +} + +void +ea_register_init(struct ea_class *clp) +{ + ASSERT_DIE(!ea_class_find_by_name(clp->name)); + ea_register(&root_pool, clp); +} + +struct ea_class * +ea_class_find_by_id(uint id) +{ + ASSERT_DIE(id < ea_class_max); + ASSERT_DIE(ea_class_global[id]); + return ea_class_global[id]; +} static inline eattr * ea__find(ea_list *e, unsigned id) @@ -445,12 +674,11 @@ ea__find(ea_list *e, unsigned id) * to its &eattr structure or %NULL if no such attribute exists. */ eattr * -ea_find(ea_list *e, unsigned id) +ea_find_by_id(ea_list *e, unsigned id) { eattr *a = ea__find(e, id & EA_CODE_MASK); - if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF && - !(id & EA_ALLOW_UNDEF)) + if (a && a->undef && !(id & EA_ALLOW_UNDEF)) return NULL; return a; } @@ -517,7 +745,7 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) BIT32_SET(s->visited, n); - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (a->undef) continue; s->eattrs = e; @@ -531,25 +759,6 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) return NULL; } -/** - * ea_get_int - fetch an integer attribute - * @e: attribute list - * @id: attribute ID - * @def: default value - * - * This function is a shortcut for retrieving a value of an integer attribute - * by calling ea_find() to find the attribute, extracting its value or returning - * a provided default if no such attribute is present. - */ -int -ea_get_int(ea_list *e, unsigned id, int def) -{ - eattr *a = ea_find(e, id); - if (!a) - return def; - return a->u.data; -} - static inline void ea_do_sort(ea_list *e) { @@ -616,15 +825,18 @@ ea_do_prune(ea_list *e) s++; /* Now s0 is the most recent version, s[-1] the oldest one */ - /* Drop undefs */ - if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + /* Drop undefs unless this is a true overlay */ + if (s0->undef && (s[-1].undef || !e->next)) continue; /* Copy the newest version to destination */ *d = *s0; /* Preserve info whether it originated locally */ - d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); + d->originated = s[-1].originated; + + /* Not fresh any more, we prefer surstroemming */ + d->fresh = 0; /* Next destination */ d++; @@ -644,21 +856,18 @@ ea_do_prune(ea_list *e) * If an attribute occurs multiple times in a single &ea_list, * ea_sort() leaves only the first (the only significant) occurrence. */ -void +static void ea_sort(ea_list *e) { - while (e) - { - if (!(e->flags & EALF_SORTED)) - { - ea_do_sort(e); - ea_do_prune(e); - e->flags |= EALF_SORTED; - } - if (e->count > 5) - e->flags |= EALF_BISECT; - e = e->next; - } + if (!(e->flags & EALF_SORTED)) + { + ea_do_sort(e); + ea_do_prune(e); + e->flags |= EALF_SORTED; + } + + if (e->count > 5) + e->flags |= EALF_BISECT; } /** @@ -668,8 +877,8 @@ ea_sort(ea_list *e) * This function calculates an upper bound of the size of * a given &ea_list after merging with ea_merge(). */ -unsigned -ea_scan(ea_list *e) +static unsigned +ea_scan(const ea_list *e, int overlay) { unsigned cnt = 0; @@ -677,6 +886,8 @@ ea_scan(ea_list *e) { cnt += e->count; e = e->next; + if (e && overlay && ea_is_cached(e)) + break; } return sizeof(ea_list) + sizeof(eattr)*cnt; } @@ -695,21 +906,36 @@ ea_scan(ea_list *e) * segments with ea_merge() and finally sort and prune the result * by calling ea_sort(). */ -void -ea_merge(ea_list *e, ea_list *t) +static void +ea_merge(ea_list *e, ea_list *t, int overlay) { eattr *d = t->attrs; t->flags = 0; t->count = 0; - t->next = NULL; + while (e) { memcpy(d, e->attrs, sizeof(eattr)*e->count); t->count += e->count; d += e->count; e = e->next; + + if (e && overlay && ea_is_cached(e)) + break; } + + t->next = e; +} + +ea_list * +ea_normalize(ea_list *e, int overlay) +{ + ea_list *t = tmp_alloc(ea_scan(e, overlay)); + ea_merge(e, t, overlay); + ea_sort(t); + + return t->count ? t : t->next; } /** @@ -727,7 +953,8 @@ ea_same(ea_list *x, ea_list *y) if (!x || !y) return x == y; - ASSERT(!x->next && !y->next); + if (x->next != y->next) + return 0; if (x->count != y->count) return 0; for(c=0; c<x->count; c++) @@ -738,39 +965,46 @@ ea_same(ea_list *x, ea_list *y) if (a->id != b->id || a->flags != b->flags || a->type != b->type || + a->originated != b->originated || + a->fresh != b->fresh || + a->undef != b->undef || ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr))) return 0; } return 1; } -static inline ea_list * -ea_list_copy(ea_list *o) +uint +ea_list_size(ea_list *o) { - ea_list *n; - unsigned i, adpos, elen; + unsigned i, elen; - if (!o) - return NULL; - ASSERT(!o->next); - elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count; + ASSERT_DIE(o); + elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) - elen += sizeof(struct adata) + a->u.ptr->length; + if (!a->undef && !(a->type & EAF_EMBEDDED)) + elen += ADATA_SIZE(a->u.ptr->length); } - n = mb_alloc(rta_pool, elen); + return elen; +} + +void +ea_list_copy(ea_list *n, ea_list *o, uint elen) +{ + uint adpos = sizeof(ea_list) + sizeof(eattr) * o->count; memcpy(n, o, adpos); - n->flags |= EALF_CACHED; - for(i=0; i<o->count; i++) + adpos = BIRD_CPU_ALIGN(adpos); + + for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) { - unsigned size = sizeof(struct adata) + a->u.ptr->length; + unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); struct adata *d = ((void *) n) + adpos; @@ -780,30 +1014,58 @@ ea_list_copy(ea_list *o) adpos += size; } } + ASSERT_DIE(adpos == elen); - return n; } -static inline void -ea_free(ea_list *o) +static void +ea_list_ref(ea_list *l) { - if (o) + for(uint i=0; i<l->count; i++) { - ASSERT(!o->next); - mb_free(o); + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); + cl->uc++; } + + if (l->next) + { + ASSERT_DIE(ea_is_cached(l->next)); + ea_clone(l->next); + } } -static int -get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED) +static void ea_free_nested(ea_list *l); + +static void +ea_list_unref(ea_list *l) { - if (a->id == EA_GEN_IGP_METRIC) + for(uint i=0; i<l->count; i++) { - *buf += bsprintf(*buf, "igp_metric"); - return GA_NAME; + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); + if (!--cl->uc) + ea_class_free(cl); } - return GA_UNKNOWN; + if (l->next) + ea_free_nested(l->next); } void @@ -856,41 +1118,90 @@ opaque_format(const struct adata *ad, byte *buf, uint size) } static inline void -ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end) +ea_show_int_set(struct cli *c, const char *name, const struct adata *ad, int way, byte *buf) { - int i = int_set_format(ad, way, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = int_set_format(ad, way, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = int_set_format(ad, way, i, buf, end - buf - 1); + i = int_set_format(ad, way, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_ec_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = ec_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = ec_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = ec_set_format(ad, i, buf, end - buf - 1); + i = ec_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_lc_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = lc_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = lc_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = lc_set_format(ad, i, buf, end - buf - 1); + i = lc_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } +void +ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad) +{ + if (!NEXTHOP_IS_REACHABLE(nhad)) + return; + + NEXTHOP_WALK(nh, nhad) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (!NEXTHOP_ONE(nhad)) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + if (nh->iface) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tvia %I", nh->gw); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } +} + +void +ea_show_hostentry(const struct adata *ad, byte *buf, uint size) +{ + const struct hostentry_adata *had = (const struct hostentry_adata *) ad; + + if (ipa_nonzero(had->he->link) && !ipa_equal(had->he->link, had->he->addr)) + bsnprintf(buf, size, "via %I %I table %s", had->he->addr, had->he->link, had->he->tab->name); + else + bsnprintf(buf, size, "via %I table %s", had->he->addr, had->he->tab->name); +} + /** * ea_show - print an &eattr to CLI * @c: destination CLI @@ -902,79 +1213,80 @@ ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte * If the protocol defining the attribute provides its own * get_attr() hook, it's consulted first. */ -void +static void ea_show(struct cli *c, const eattr *e) { - struct protocol *p; - int status = GA_UNKNOWN; const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr; byte buf[CLI_MSG_SIZE]; byte *pos = buf, *end = buf + sizeof(buf); - if (EA_IS_CUSTOM(e->id)) - { - const char *name = ea_custom_name(e->id); - if (name) - { - pos += bsprintf(pos, "%s", name); - status = GA_NAME; - } - else - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - } - else if (p = class_to_protocol[EA_PROTO(e->id)]) - { - pos += bsprintf(pos, "%s.", p->name); - if (p->get_attr) - status = p->get_attr(e, pos, end - pos); - pos += strlen(pos); - } - else if (EA_PROTO(e->id)) - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); + ASSERT_DIE(e->id < ea_class_max); + + struct ea_class *cls = ea_class_global[e->id]; + ASSERT_DIE(cls); + + if (e->undef || cls->hidden) + return; + else if (cls->format) + cls->format(e, buf, end - buf); else - status = get_generic_attr(e, &pos, end - pos); + switch (e->type) + { + case T_INT: + if ((cls == &ea_gen_igp_metric) && e->u.data >= IGP_METRIC_UNKNOWN) + return; - if (status < GA_NAME) - pos += bsprintf(pos, "%02x", EA_ID(e->id)); - if (status < GA_FULL) - { - *pos++ = ':'; - *pos++ = ' '; - switch (e->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT: bsprintf(pos, "%u", e->u.data); break; - case EAF_TYPE_OPAQUE: + case T_OPAQUE: opaque_format(ad, pos, end - pos); break; - case EAF_TYPE_IP_ADDRESS: + case T_IP: bsprintf(pos, "%I", *(ip_addr *) ad->data); break; - case EAF_TYPE_ROUTER_ID: + case T_QUAD: bsprintf(pos, "%R", e->u.data); break; - case EAF_TYPE_AS_PATH: + case T_PATH: as_path_format(ad, pos, end - pos); break; - case EAF_TYPE_BITFIELD: - bsprintf(pos, "%08x", e->u.data); - break; - case EAF_TYPE_INT_SET: - ea_show_int_set(c, ad, 1, pos, buf, end); + case T_CLIST: + ea_show_int_set(c, cls->name, ad, 1, buf); + return; + case T_ECLIST: + ea_show_ec_set(c, cls->name, ad, buf); return; - case EAF_TYPE_EC_SET: - ea_show_ec_set(c, ad, pos, buf, end); + case T_LCLIST: + ea_show_lc_set(c, cls->name, ad, buf); return; - case EAF_TYPE_LC_SET: - ea_show_lc_set(c, ad, pos, buf, end); + case T_NEXTHOP_LIST: + ea_show_nexthop_list(c, (struct nexthop_adata *) e->u.ptr); return; - case EAF_TYPE_UNDEF: + case T_HOSTENTRY: + ea_show_hostentry(ad, pos, end - pos); + break; default: bsprintf(pos, "<type %02x>", e->type); - } + } + + cli_printf(c, -1012, "\t%s: %s", cls->name, buf); +} + +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); } - cli_printf(c, -1012, "\t%s", buf); } /** @@ -996,19 +1308,26 @@ ea_dump(ea_list *e) } while (e) { - debug("[%c%c%c]", + struct ea_storage *s = ea_is_cached(e) ? ea_get_storage(e) : NULL; + debug("[%c%c%c] uc=%d h=%08x", (e->flags & EALF_SORTED) ? 'S' : 's', (e->flags & EALF_BISECT) ? 'B' : 'b', - (e->flags & EALF_CACHED) ? 'C' : 'c'); + (e->flags & EALF_CACHED) ? 'C' : 'c', + s ? s->uc : 0, s ? s->hash_key : 0); for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; - debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags); - debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]); - if (a->type & EAF_ORIGINATED) + debug(" %04x.%02x", a->id, a->flags); + debug("=%c", + "?iO?IRP???S??pE?" + "??L???N?????????" + "?o???r??????????" [a->type]); + if (a->originated) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1038,10 +1357,13 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { + h ^= mem_hash(&e->next, sizeof(e->next)); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; h ^= a->id; h *= mul; + if (a->undef) + continue; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else @@ -1085,12 +1407,12 @@ static uint rta_cache_count; static uint rta_cache_size = 32; static uint rta_cache_limit; static uint rta_cache_mask; -static rta **rta_hash_table; +static struct ea_storage **rta_hash_table; static void rta_alloc_hash(void) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); + rta_hash_table = mb_allocz(rta_pool, sizeof(struct ea_storage *) * rta_cache_size); if (rta_cache_size < 32768) rta_cache_limit = rta_cache_size * 2; else @@ -1098,64 +1420,14 @@ rta_alloc_hash(void) rta_cache_mask = rta_cache_size - 1; } -static inline uint -rta_hash(rta *a) -{ - u64 h; - mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); - MIX(src); - MIX(hostentry); - MIX(from); - MIX(igp_metric); - MIX(source); - MIX(scope); - MIX(dest); -#undef MIX - - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); -} - -static inline int -rta_same(rta *x, rta *y) -{ - return (x->src == y->src && - x->source == y->source && - x->scope == y->scope && - x->dest == y->dest && - x->igp_metric == y->igp_metric && - ipa_equal(x->from, y->from) && - x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && - ea_same(x->eattrs, y->eattrs)); -} - -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; -} - -static rta * -rta_copy(rta *o) -{ - rta *r = sl_alloc(rta_slab(o)); - - memcpy(r, o, rta_size(o)); - r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); - r->eattrs = ea_list_copy(o->eattrs); - return r; -} - static inline void -rta_insert(rta *r) +rta_insert(struct ea_storage *r) { uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; + r->next_hash = rta_hash_table[h]; + if (r->next_hash) + r->next_hash->pprev_hash = &r->next_hash; + r->pprev_hash = &rta_hash_table[h]; rta_hash_table[h] = r; } @@ -1164,8 +1436,8 @@ rta_rehash(void) { uint ohs = rta_cache_size; uint h; - rta *r, *n; - rta **oht = rta_hash_table; + struct ea_storage *r, *n; + struct ea_storage **oht = rta_hash_table; rta_cache_size = 2*rta_cache_size; DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); @@ -1173,7 +1445,7 @@ rta_rehash(void) for(h=0; h<ohs; h++) for(r=oht[h]; r; r=n) { - n = r->next; + n = r->next_hash; rta_insert(r); } mb_free(oht); @@ -1192,102 +1464,75 @@ rta_rehash(void) * The extended attribute lists attached to the &rta are automatically * converted to the normalized form. */ -rta * -rta_lookup(rta *o) +ea_list * +ea_lookup(ea_list *o, int overlay) { - rta *r; + struct ea_storage *r; uint h; - ASSERT(!(o->aflags & RTAF_CACHED)); - if (o->eattrs) - ea_normalize(o->eattrs); + ASSERT(!ea_is_cached(o)); + o = ea_normalize(o, overlay); + h = ea_hash(o); + + RTA_LOCK; + + for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) + if (r->hash_key == h && ea_same(r->l, o)) + { + atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + RTA_UNLOCK; + return r->l; + } - h = rta_hash(o); - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - return rta_clone(r); + uint elen = ea_list_size(o); + r = mb_alloc(rta_pool, elen + sizeof(struct ea_storage)); + ea_list_copy(r->l, o, elen); + ea_list_ref(r->l); - r = rta_copy(o); + r->l->flags |= EALF_CACHED; r->hash_key = h; - r->aflags = RTAF_CACHED; - rt_lock_source(r->src); - rt_lock_hostentry(r->hostentry); + r->uc = 1; + rta_insert(r); if (++rta_cache_count > rta_cache_limit) rta_rehash(); - return r; + RTA_UNLOCK; + return r->l; } -void -rta__free(rta *a) +static void +ea_free_locked(struct ea_storage *a) { - ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED)); + /* Somebody has cloned this rta inbetween. This sometimes happens. */ + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + return; + + ASSERT(rta_cache_count); rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); - rt_unlock_source(a->src); - if (a->nh.next) - nexthop_free(a->nh.next); - ea_free(a->eattrs); - a->aflags = 0; /* Poison the entry */ - sl_free(rta_slab(a), a); + *a->pprev_hash = a->next_hash; + if (a->next_hash) + a->next_hash->pprev_hash = a->pprev_hash; + + ea_list_unref(a->l); + mb_free(a); } -rta * -rta_do_cow(rta *o, linpool *lp) +static void +ea_free_nested(struct ea_list *l) { - rta *r = lp_alloc(lp, rta_size(o)); - memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } - r->aflags = 0; - r->uc = 0; - return r; + struct ea_storage *r = ea_get_storage(l); + if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) + ea_free_locked(r); } -/** - * rta_dump - dump route attributes - * @a: attribute structure to dump - * - * This function takes a &rta and dumps its contents to the debug output. - */ void -rta_dump(rta *a) +ea__free(struct ea_storage *a) { - static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", - "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", - "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - - debug("p=%s uc=%d %s %s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), - rtd[a->dest], a->hash_key); - if (!(a->aflags & RTAF_CACHED)) - debug(" !CACHED"); - debug(" <-%I", a->from); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; i<nh->labels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } - if (a->eattrs) - { - debug(" EA: "); - ea_dump(a->eattrs); - } + RTA_LOCK; + ea_free_locked(a); + RTA_UNLOCK; } /** @@ -1297,30 +1542,29 @@ rta_dump(rta *a) * to the debug output. */ void -rta_dump_all(void) +ea_dump_all(void) { - rta *a; - uint h; + RTA_LOCK; debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; h<rta_cache_size; h++) - for(a=rta_hash_table[h]; a; a=a->next) + for (uint h=0; h < rta_cache_size; h++) + for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash) { debug("%p ", a); - rta_dump(a); + ea_dump(a->l); debug("\n"); } debug("\n"); + + RTA_UNLOCK; } void -rta_show(struct cli *c, rta *a) +ea_show_list(struct cli *c, ea_list *eal) { - cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); - - for(ea_list *eal = a->eattrs; eal; eal=eal->next) - for(int i=0; i<eal->count; i++) - ea_show(c, &eal->attrs[i]); + ea_list *n = ea_normalize(eal, 0); + for (int i =0; i < n->count; i++) + ea_show(c, &n->attrs[i]); } /** @@ -1332,20 +1576,24 @@ rta_show(struct cli *c, rta *a) void rta_init(void) { - rta_pool = rp_new(&root_pool, "Attributes"); + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_pool = rp_new(&root_pool, "Attributes"); rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + /* These attributes are required to be first for nice "show route" output */ + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + + /* Other generic route attributes */ + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_flowspec_valid); } /* diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 61f025ce..4199e17c 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/rt-dev.h" #include "conf/conf.h" #include "lib/resource.h" @@ -67,13 +67,11 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rte_update2(c, net, NULL, src); + rte_update(c, net, NULL, src); + rt_unlock_source(src); } else if (flags & IF_CHANGE_UP) { - rta *a; - rte *e; - DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); if (cf->check_link && !(ad->iface->flags & IF_LINK_UP)) @@ -82,18 +80,23 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rta a0 = { + ea_list *ea = NULL; + struct nexthop_adata nhad = { + .nh = { .iface = ad->iface, }, + .ad = { .length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data, }, + }; + + ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_DEVICE); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); + + rte e0 = { + .attrs = ea, .src = src, - .source = RTS_DEVICE, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh.iface = ad->iface, }; - a = rta_lookup(&a0); - e = rte_get_temp(a); - e->pflags = 0; - rte_update2(c, net, e, src); + rte_update(c, net, &e0, src); + rt_unlock_source(src); } } @@ -185,7 +188,6 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_device = { .name = "Direct", .template = "direct%d", - .class = PROTOCOL_DIRECT, .preference = DEF_PREF_DIRECT, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct rt_dev_proto), @@ -195,3 +197,9 @@ struct protocol proto_device = { .reconfigure = dev_reconfigure, .copy_config = dev_copy_config }; + +void +dev_build(void) +{ + proto_build(&proto_device); +} diff --git a/nest/rt-fib.c b/nest/rt-fib.c index 1690a8f6..801561da 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -55,7 +55,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/string.h" /* @@ -475,7 +475,7 @@ fib_delete(struct fib *f, void *E) } if (f->fib_slab) - sl_free(f->fib_slab, E); + sl_free(E); else mb_free(E); diff --git a/nest/rt-show.c b/nest/rt-show.c index 7639e5f7..17400029 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -10,7 +10,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/cli.h" #include "nest/iface.h" @@ -19,90 +19,83 @@ #include "sysdep/unix/krt.h" static void -rt_show_table(struct cli *c, struct rt_show_data *d) +rt_show_table(struct rt_show_data *d) { + struct cli *c = d->cli; + /* No table blocks in 'show route count' */ if (d->stats == 2) return; if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, -1007, "Table %s:", d->tab->table->name); + cli_printf(c, -1007, "Table %s:", + d->tab->name); d->last_table = d->tab; } -static inline struct krt_proto * -rt_show_get_kernel(struct rt_show_data *d) -{ - struct proto_config *krt = d->tab->table->config->krt_attached; - return krt ? (struct krt_proto *) krt->proto : NULL; -} - static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary) { byte from[IPA_MAX_TEXT_LENGTH+8]; byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; - rta *a = e->attrs; - int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; + ea_list *a = e->attrs; + int sync_error = d->tab->kernel ? krt_get_sync_error(d->tab->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - struct nexthop *nh; + eattr *nhea = net_type_match(e->net, NB_DEST) ? + ea_find(a, &ea_gen_nexthop) : NULL; + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + int dest = nhad ? (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) : RTD_NONE; + int flowspec_valid = net_is_flow(e->net) ? rt_get_flowspec_valid(e) : FLOWSPEC_UNKNOWN; tm_format_time(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) - bsprintf(from, " from %I", a->from); + ip_addr a_from = ea_get_ip(a, &ea_gen_from, IPA_NONE); + if (ipa_nonzero(a_from) && (!nhad || !ipa_equal(a_from, nhad->nh.gw))) + bsprintf(from, " from %I", a_from); else from[0] = 0; /* Need to normalize the extended attributes */ - if (d->verbose && !rta_is_cached(a) && a->eattrs) - ea_normalize(a->eattrs); + if (d->verbose && !rta_is_cached(a) && a) + a = ea_normalize(a, 0); - get_route_info = a->src->proto->proto->get_route_info; + get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL; if (get_route_info) get_route_info(e, info); else - bsprintf(info, " (%d)", e->pref); + bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) - rt_show_table(c, d); - - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); - - if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) - { - char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; - char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; - char weight[16] = ""; - - if (nh->labels) - { - lsp += bsprintf(lsp, " mpls %d", nh->label[0]); - for (int i=1;i<nh->labels; i++) - lsp += bsprintf(lsp, "/%d", nh->label[i]); - } - *lsp = '\0'; + rt_show_table(d); - if (a->nh.next) - bsprintf(weight, " weight %d", nh->weight + 1); + eattr *heea; + struct hostentry_adata *had = NULL; + if (!net_is_flow(e->net) && (dest == RTD_NONE) && (heea = ea_find(a, &ea_gen_hostentry))) + had = (struct hostentry_adata *) heea->u.ptr; - if (ipa_nonzero(nh->gw)) - cli_printf(c, -1007, "\tvia %I on %s%s%s%s", - nh->gw, nh->iface->name, mpls, onlink, weight); - else - cli_printf(c, -1007, "\tdev %s%s%s", - nh->iface->name, mpls, onlink, weight); - } + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), + e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) - rta_show(c, a); + { + ea_show_list(c, a); + cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS id %u", + e->src->private_id, e->src->global_id, e->stale_cycle, e->id); + } + else if (dest == RTD_UNICAST) + ea_show_nexthop_list(c, nhad); + else if (had) + { + char hetext[256]; + ea_show_hostentry(&had->ad, hetext, sizeof hetext); + cli_printf(c, -1007, "\t%s", hetext); + } } static void -rt_show_net(struct cli *c, net *n, struct rt_show_data *d) +rt_show_net(struct rt_show_data *d, const net_addr *n, rte **feed, uint count) { - rte *e, *ee; + struct cli *c = d->cli; byte ia[NET_MAX_TEXT_LENGTH+1]; struct channel *ec = d->tab->export_channel; @@ -114,9 +107,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) int first_show = 1; int pass = 0; - for (e = n->routes; e; e = e->next) + for (uint i = 0; i < count; i++) { - if (rte_is_filtered(e) != d->filtered) + if (!d->tab->prefilter && (rte_is_filtered(feed[i]) != d->filtered)) continue; d->rt_counter++; @@ -126,16 +119,20 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (pass) continue; - ee = e; - rte_make_tmp_attrs(&e, c->show_pool, NULL); + struct rte e = *feed[i]; + if (d->tab->prefilter) + if (e.sender != d->tab->prefilter->in_req.hook) + continue; + else while (e.attrs->next) + e.attrs = e.attrs->next; /* Export channel is down, do not try to export routes to it */ - if (ec && (ec->export_state == ES_DOWN)) + if (ec && !ec->out_req.hook) goto skip; if (d->export_mode == RSEM_EXPORTED) { - if (!bmap_test(&ec->export_map, ee->id)) + if (!bmap_test(&ec->export_map, e.id)) goto skip; // if (ec->ra_mode != RA_ANY) @@ -144,17 +141,18 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) { /* Special case for merged export */ - rte *rt_free; - e = rt_export_merged(ec, n, &rt_free, c->show_pool, 1); pass = 1; + rte *em = rt_export_merged(ec, feed, count, tmp_linpool, 1); - if (!e) - { e = ee; goto skip; } + if (em) + e = *em; + else + goto skip; } else if (d->export_mode) { struct proto *ep = ec->proto; - int ic = ep->preexport ? ep->preexport(ec, &e, c->show_pool) : 0; + int ic = ep->preexport ? ep->preexport(ec, &e) : 0; if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED) pass = 1; @@ -170,7 +168,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) * command may change the export filter and do not update routes. */ int do_export = (ic > 0) || - (f_run(ec->out_filter, &e, c->show_pool, FF_SILENT) <= F_ACCEPT); + (f_run(ec->out_filter, &e, FF_SILENT) <= F_ACCEPT); if (do_export != (d->export_mode == RSEM_EXPORT)) goto skip; @@ -180,184 +178,193 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) } } - if (d->show_protocol && (d->show_protocol != e->attrs->src->proto)) + if (d->show_protocol && (&d->show_protocol->sources != e.src->owner)) goto skip; - if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) + if (f_run(d->filter, &e, 0) > F_ACCEPT) goto skip; if (d->stats < 2) { if (first_show) - net_format(n->n.addr, ia, sizeof(ia)); + net_format(n, ia, sizeof(ia)); else ia[0] = 0; - rt_show_rte(c, ia, e, d, (e->net->routes == ee)); + rt_show_rte(c, ia, &e, d, !d->tab->prefilter && !i); first_show = 0; } d->show_counter++; skip: - if (e != ee) - { - rte_free(e); - e = ee; - } - lp_flush(c->show_pool); - if (d->primary_only) break; } + + if ((d->show_counter - d->show_counter_last_flush) > 64) + { + d->show_counter_last_flush = d->show_counter; + cli_write_trigger(d->cli); + } } static void -rt_show_cleanup(struct cli *c) +rt_show_net_export_bulk(struct rt_export_request *req, const net_addr *n, + struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rt_show_data *d = c->rover; - struct rt_show_data_rtable *tab; + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + return rt_show_net(d, n, feed, count); +} - /* Unlink the iterator */ - if (d->table_open && !d->trie_walk) - fit_get(&d->tab->table->fib, &d->fit); +static void +rt_show_export_stopped_cleanup(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); - if (d->walk_lock) - rt_unlock_trie(d->tab->table, d->walk_lock); + /* The hook is now invalid */ + req->hook = NULL; - /* Unlock referenced tables */ - WALK_LIST(tab, d->tables) - rt_unlock_table(tab->table); + /* And free the CLI (deferred) */ + rfree(d->cli->pool); } -static void -rt_show_cont(struct cli *c) +static int +rt_show_cleanup(struct cli *c) { struct rt_show_data *d = c->rover; - struct rtable *tab = d->tab->table; -#ifdef DEBUGGING - unsigned max = 4; -#else - unsigned max = 64; -#endif - struct fib *fib = &tab->fib; - struct fib_iterator *it = &d->fit; - if (d->running_on_config && (d->running_on_config != config)) + /* Cancel the feed */ + if (d->req.hook) { - cli_printf(c, 8004, "Stopped due to reconfiguration"); - goto done; + rt_stop_export(&d->req, rt_show_export_stopped_cleanup); + return 1; } + else + return 0; +} - if (!d->table_open) - { - /* We use either trie-based walk or fib-based walk */ - d->trie_walk = tab->trie && - (d->addr_mode == RSD_ADDR_IN) && - net_val_match(tab->addr_type, NB_IP); +static void rt_show_export_stopped(struct rt_export_request *req); - if (d->trie_walk && !d->walk_state) - d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); +static void +rt_show_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, rt_show_export_stopped); +} - if (d->trie_walk) - { - d->walk_lock = rt_lock_trie(tab); - trie_walk_init(d->walk_state, tab->trie, d->addr); - } - else - FIB_ITERATE_INIT(&d->fit, &tab->fib); +static void +rt_show_dump_req(struct rt_export_request *req) +{ + debug(" CLI Show Route Feed %p\n", req); +} - d->table_open = 1; - d->table_counter++; - d->kernel = rt_show_get_kernel(d); +static void +rt_show_done(struct rt_show_data *d) +{ + /* No more action */ + d->cli->cleanup = NULL; + d->cli->cont = NULL; + d->cli->rover = NULL; - d->show_counter_last = d->show_counter; - d->rt_counter_last = d->rt_counter; - d->net_counter_last = d->net_counter; + /* Write pending messages */ + cli_write_trigger(d->cli); +} - if (d->tables_defined_by & RSD_TDB_SET) - rt_show_table(c, d); - } +static void +rt_show_cont(struct rt_show_data *d) +{ + struct cli *c = d->cli; - if (d->trie_walk) + if (d->running_on_config && (d->running_on_config != config)) { - /* Trie-based walk */ - net_addr addr; - while (trie_walk_next(d->walk_state, &addr)) - { - net *n = net_find(tab, &addr); - if (!n) - continue; + cli_printf(c, 8004, "Stopped due to reconfiguration"); + return rt_show_done(d); + } - rt_show_net(c, n, d); + d->req = (struct rt_export_request) { + .addr = d->addr, + .name = "CLI Show Route", + .list = &global_work_list, + .export_bulk = rt_show_net_export_bulk, + .dump_req = rt_show_dump_req, + .log_state_change = rt_show_log_state_change, + .addr_mode = d->addr_mode, + }; - if (!--max) - return; - } + d->table_counter++; - rt_unlock_trie(tab, d->walk_lock); - d->walk_lock = NULL; - } - else - { - /* fib-based walk */ - FIB_ITERATE_START(fib, it, net, n) - { - if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) - goto next; + d->show_counter_last = d->show_counter; + d->rt_counter_last = d->rt_counter; + d->net_counter_last = d->net_counter; - if (!max--) - { - FIB_ITERATE_PUT(it); - return; - } - rt_show_net(c, n, d); + if (d->tables_defined_by & RSD_TDB_SET) + rt_show_table(d); - next:; - } - FIB_ITERATE_END; - } + rt_request_export(d->tab->table, &d->req); +} + +static void +rt_show_export_stopped(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + + /* The hook is now invalid */ + req->hook = NULL; if (d->stats) { if (d->last_table != d->tab) - rt_show_table(c, d); + rt_show_table(d); - cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", + cli_printf(d->cli, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, tab->name); + d->net_counter - d->net_counter_last, d->tab->name); } - d->kernel = NULL; - d->table_open = 0; d->tab = NODE_NEXT(d->tab); if (NODE_VALID(d->tab)) - return; + return rt_show_cont(d); + /* Printout total stats */ if (d->stats && (d->table_counter > 1)) { - if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables", + if (d->last_table) cli_printf(d->cli, -1007, ""); + cli_printf(d->cli, 14, "Total: %d of %d routes for %d networks in %d tables", d->show_counter, d->rt_counter, d->net_counter, d->table_counter); } + else if (!d->rt_counter && ((d->addr_mode == TE_ADDR_EQUAL) || (d->addr_mode == TE_ADDR_FOR))) + cli_printf(d->cli, 8001, "Network not found"); else - cli_printf(c, 0, ""); + cli_printf(d->cli, 0, ""); -done: - rt_show_cleanup(c); - c->cont = c->cleanup = NULL; + /* No more route showing */ + rt_show_done(d); } struct rt_show_data_rtable * -rt_show_add_table(struct rt_show_data *d, rtable *t) +rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name) { struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable)); tab->table = t; + tab->name = name; add_tail(&(d->tables), &(tab->n)); return tab; } +struct rt_show_data_rtable * +rt_show_add_table(struct rt_show_data *d, struct rtable *t) +{ + struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name); + + struct proto_config *krt = t->config->krt_attached; + if (krt) + rsdr->kernel = (struct krt_proto *) krt->proto; + + return rsdr; +} + static inline void rt_show_get_default_tables(struct rt_show_data *d) { @@ -376,7 +383,7 @@ rt_show_get_default_tables(struct rt_show_data *d) { WALK_LIST(c, d->export_protocol->channels) { - if (c->export_state == ES_DOWN) + if (!c->out_req.hook) continue; tab = rt_show_add_table(d, c->table); @@ -393,7 +400,7 @@ rt_show_get_default_tables(struct rt_show_data *d) } for (int i=1; i<NET_MAX; i++) - if (config->def_tables[i]) + if (config->def_tables[i] && config->def_tables[i]->table) rt_show_add_table(d, config->def_tables[i]->table); } @@ -412,16 +419,16 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->export_mode) { if (!tab->export_channel && d->export_channel && - (tab->table == d->export_channel->table)) + (tab->table == &d->export_channel->table->exporter)) tab->export_channel = d->export_channel; if (!tab->export_channel && d->export_protocol) - tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table); + tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table)); if (!tab->export_channel) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("No export channel for table %s", tab->table->name); + cf_error("No export channel for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -432,7 +439,7 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->addr && (tab->table->addr_type != d->addr->type)) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("Incompatible type of prefix/ip for table %s", tab->table->name); + cf_error("Incompatible type of prefix/ip for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -444,48 +451,29 @@ rt_show_prepare_tables(struct rt_show_data *d) cf_error("No valid tables"); } +static void +rt_show_dummy_cont(struct cli *c UNUSED) +{ + /* Explicitly do nothing to prevent CLI from trying to parse another command. */ +} + void rt_show(struct rt_show_data *d) { - struct rt_show_data_rtable *tab; - net *n; - /* Filtered routes are neither exported nor have sensible ordering */ if (d->filtered && (d->export_mode || d->primary_only)) cf_error("Incompatible show route options"); rt_show_prepare_tables(d); - if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) - { - WALK_LIST(tab, d->tables) - rt_lock_table(tab->table); - - /* There is at least one table */ - d->tab = HEAD(d->tables); - this_cli->cont = rt_show_cont; - this_cli->cleanup = rt_show_cleanup; - this_cli->rover = d; - } - else - { - WALK_LIST(tab, d->tables) - { - d->tab = tab; - d->kernel = rt_show_get_kernel(d); + if (EMPTY_LIST(d->tables)) + cf_error("No suitable tables found"); - if (d->addr_mode == RSD_ADDR_FOR) - n = net_route(tab->table, d->addr); - else - n = net_find(tab->table, d->addr); + d->tab = HEAD(d->tables); - if (n) - rt_show_net(this_cli, n, d); - } + this_cli->cleanup = rt_show_cleanup; + this_cli->rover = d; + this_cli->cont = rt_show_dummy_cont; - if (d->rt_counter) - cli_msg(0, ""); - else - cli_msg(8001, "Network not found"); - } + rt_show_cont(d); } diff --git a/nest/rt-table.c b/nest/rt-table.c index a2fa5e77..3ade4237 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -91,7 +91,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -110,22 +110,83 @@ #include "proto/bgp/bgp.h" #endif -pool *rt_table_pool; +#include <stdatomic.h> -static slab *rte_slab; -static linpool *rte_update_pool; +pool *rt_table_pool; list routing_tables; +list deleted_routing_tables; + +struct rt_cork rt_cork; + +/* Data structures for export journal */ +#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + +struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; +}; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); static void rt_kick_prune_timer(rtable *tab); +static void rt_feed_by_fib(void *); +static void rt_feed_by_trie(void *); +static void rt_feed_equal(void *); +static void rt_feed_for(void *); +static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(rtable *tab); +static void rt_check_cork_high(rtable *tab); +static void rt_cork_release_hook(void *); + +static inline void rt_export_used(struct rt_exporter *); +static void rt_export_cleanup(rtable *tab); + +static int rte_same(rte *x, rte *y); + +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; + +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) +{ + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} + +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); static void net_init_with_trie(struct fib *f, void *N) @@ -395,7 +456,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -422,7 +483,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -455,7 +516,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -482,7 +543,7 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -541,253 +602,55 @@ net_roa_check(rtable *tab, const net_addr *n, u32 asn) * @net: network node * @src: route source * - * The rte_find() function returns a route for destination @net - * which is from route source @src. + * The rte_find() function returns a pointer to a route for destination @net + * which is from route source @src. List end pointer is returned if no route is found. */ -rte * +static struct rte_storage ** rte_find(net *net, struct rte_src *src) { - rte *e = net->routes; + struct rte_storage **e = &net->routes; - while (e && e->attrs->src != src) - e = e->next; - return e; -} - -/** - * rte_get_temp - get a temporary &rte - * @a: attributes to assign to the new route (a &rta; in case it's - * un-cached, rte_update() will create a cached copy automatically) - * - * Create a temporary &rte and bind it with the attributes @a. - * Also set route preference to the default preference set for - * the protocol. - */ -rte * -rte_get_temp(rta *a) -{ - rte *e = sl_alloc(rte_slab); + while ((*e) && (*e)->rte.src != src) + e = &(*e)->next; - e->attrs = a; - e->id = 0; - e->flags = 0; - e->pref = 0; return e; } -rte * -rte_do_cow(rte *r) -{ - rte *e = sl_alloc(rte_slab); - - memcpy(e, r, sizeof(rte)); - e->attrs = rta_clone(r->attrs); - e->flags = 0; - return e; -} - -/** - * rte_cow_rta - get a private writable copy of &rte with writable &rta - * @r: a route entry to be copied - * @lp: a linpool from which to allocate &rta - * - * rte_cow_rta() takes a &rte and prepares it and associated &rta for - * modification. There are three possibilities: First, both &rte and &rta are - * private copies, in that case they are returned unchanged. Second, &rte is - * private copy, but &rta is cached, in that case &rta is duplicated using - * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case - * both structures are duplicated by rte_do_cow() and rta_do_cow(). - * - * Note that in the second case, cached &rta loses one reference, while private - * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, - * nexthops, ...) with it. To work properly, original shared &rta should have - * another reference during the life of created private copy. - * - * Result: a pointer to the new writable &rte with writable &rta. - */ -rte * -rte_cow_rta(rte *r, linpool *lp) -{ - if (!rta_is_cached(r->attrs)) - return r; - - r = rte_cow(r); - rta *a = rta_do_cow(r->attrs, lp); - rta_free(r->attrs); - r->attrs = a; - return r; -} - -/** - * rte_init_tmp_attrs - initialize temporary ea_list for route - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @max: maximum number of added temporary attribus - * - * This function is supposed to be called from make_tmp_attrs() and - * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr() - * functions. It allocates &ea_list with length for @max items for temporary - * attributes and puts it on top of eattrs stack. - */ -void -rte_init_tmp_attrs(rte *r, linpool *lp, uint max) +struct rte_storage * +rte_store(const rte *r, net *net, rtable *tab) { - struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr)); + struct rte_storage *e = sl_alloc(tab->rte_slab); - e->next = r->attrs->eattrs; - e->flags = EALF_SORTED | EALF_TEMP; - e->count = 0; + e->rte = *r; + e->rte.net = net->n.addr; - r->attrs->eattrs = e; -} + rt_lock_source(e->rte.src); -/** - * rte_make_tmp_attr - make temporary eattr from private route fields - * @r: route entry to be modified - * @id: attribute ID - * @type: attribute type - * @val: attribute value (u32 or adata ptr) - * - * This function is supposed to be called from make_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to - * be defined (based on route pflags) and if so then it fills &eattr field in - * preallocated temporary &ea_list on top of route @r eattrs stack. - * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs(). - */ -void -rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val) -{ - if (r->pflags & EA_ID_FLAG(id)) - { - ea_list *e = r->attrs->eattrs; - eattr *a = &e->attrs[e->count++]; - a->id = id; - a->type = type; - a->flags = 0; + if (ea_is_cached(e->rte.attrs)) + e->rte.attrs = rta_clone(e->rte.attrs); + else + e->rte.attrs = rta_lookup(e->rte.attrs, 1); - if (type & EAF_EMBEDDED) - a->u.data = (u32) val; - else - a->u.ptr = (struct adata *) val; - } + return e; } /** - * rte_store_tmp_attr - store temporary eattr to private route fields - * @r: route entry to be modified - * @id: attribute ID - * - * This function is supposed to be called from store_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in - * route @r eattrs stack, updates route pflags accordingly, undefines it by - * filling &eattr field in preallocated temporary &ea_list on top of the eattrs - * stack, and returns the value. Caller is supposed to store it in the - * appropriate private field. + * rte_free - delete a &rte + * @e: &struct rte_storage to be deleted + * @tab: the table which the rte belongs to * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs() + * rte_free() deletes the given &rte from the routing table it's linked to. */ -uintptr_t -rte_store_tmp_attr(rte *r, uint id) -{ - ea_list *e = r->attrs->eattrs; - eattr *a = ea_find(e->next, id); - - if (a) - { - e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF }; - r->pflags |= EA_ID_FLAG(id); - return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr; - } - else - { - r->pflags &= ~EA_ID_FLAG(id); - return 0; - } -} -/** - * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes - * @r: route entry to be modified (may be replaced if COW) - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta (may be NULL) - * - * This function expands privately stored protocol-dependent route attributes - * to a uniform &eattr / &ea_list representation. It is essentially a wrapper - * around protocol make_tmp_attrs() hook, which does some additional work like - * ensuring that route @r is writable. - * - * The route @r may be read-only (with %REF_COW flag), in that case rw copy is - * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be - * directly modified (and it is never copied). - * - * If the @old_attrs ptr is supplied, the function obtains another reference of - * old cached &rta, that is necessary in some cases (see rte_cow_rta() for - * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free(). - * - * Generally, if caller ensures that @r is read-only (e.g. in route export) then - * it may ignore @old_attrs (and set it to NULL), but must handle replacement of - * @r. If caller ensures that @r is writable (e.g. in route import) then it may - * ignore replacement of @r, but it must handle @old_attrs. - */ void -rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs) -{ - void (*make_tmp_attrs)(rte *r, linpool *lp); - make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs; - - if (!make_tmp_attrs) - return; - - /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */ - if (old_attrs) - *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL; - - *r = rte_cow_rta(*r, lp); - make_tmp_attrs(*r, lp); -} - -/** - * rte_store_tmp_attrs - store temporary route attributes back to private route fields - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta - * - * This function stores temporary route attributes that were expanded by - * rte_make_tmp_attrs() back to private route fields and also undefines them. - * It is essentially a wrapper around protocol store_tmp_attrs() hook, which - * does some additional work like shortcut if there is no change and cleanup - * of @old_attrs reference obtained by rte_make_tmp_attrs(). - */ -static void -rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs) +rte_free(struct rte_storage *e) { - void (*store_tmp_attrs)(rte *rt, linpool *lp); - store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs; - - if (!store_tmp_attrs) - return; - - ASSERT(!rta_is_cached(r->attrs)); - - /* If there is no new ea_list, we just skip the temporary ea_list */ - ea_list *ea = r->attrs->eattrs; - if (ea && (ea->flags & EALF_TEMP)) - r->attrs->eattrs = ea->next; - else - store_tmp_attrs(r, lp); - - /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */ - r->attrs = rta_lookup(r->attrs); - rta_free(old_attrs); + rt_unlock_source(e->rte.src); + rta_free(e->rte.attrs); + sl_free(e); } - static int /* Actually better or at least as good as */ rte_better(rte *new, rte *old) { @@ -798,20 +661,23 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->pref > old->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->pref < old->pref) + if (np < op) return 0; - if (new->attrs->src->proto->proto != old->attrs->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->attrs->src->proto->proto > old->attrs->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->attrs->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -824,180 +690,197 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->pref != sec->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; - if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->attrs->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %s", - c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, + rta_dest_name(rte_dest(e))); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + count++; + + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - rte *rt; - int v; + struct channel_export_stats *stats = &c->export_stats; - rt = rt0; - *rt_free = NULL; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; - v = p->preexport ? p->preexport(c, &rt, pool) : 0; + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); - goto reject; + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + goto reject_noset; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } - rte_make_tmp_attrs(&rt, pool, NULL); - v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, &rt, pool, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } -#ifdef CONFIG_PIPE - /* Pipes need rte with stored tmpattrs, remaining protocols need expanded tmpattrs */ - if (p->proto == &proto_pipe) - rte_store_tmp_attrs(rt, pool, NULL); -#endif - accept: - if (rt != rt0) - *rt_free = rt; + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ - if (rt != rt0) - rte_free(rt); return NULL; } -static inline rte * -export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent) -{ - return export_filter_(c, rt0, rt_free, rte_update_pool, silent); -} - static void -do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } - /* Apply export table */ - if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed)) - return; + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } p->rt_notify(p, c, net, new, old); } static void -rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { - // struct proto *p = c->proto; - rte *new_free = NULL; - - if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + if (new && old && rte_same(new, old)) + { + if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) + { + bmap_set(&c->export_map, new->id); + bmap_clear(&c->export_map, old->id); + } + return; + } if (new) - new = export_filter(c, new, &new_free, 0); + new = export_filter(c, new, 0); if (old && !bmap_test(&c->export_map, old->id)) old = NULL; @@ -1005,197 +888,351 @@ rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + do_rt_notify(c, net, new, old); } static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *rpe) { - // struct proto *p = c->proto; - rte *new_best = NULL; - rte *old_best = NULL; - rte *new_free = NULL; - int new_first = 0; - - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rpe_mark_seen(req->hook, rpe); + if (rpe->old) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + rte nb0, *new_best = NULL; + const rte *old_best = NULL; + + for (uint i = 0; i < count; i++) { - for (rte *r = net->routes; rte_is_valid(r); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->id)) + /* is still best */ + if (!new_best) { - old_best = r; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (r == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; } - } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (rte *r = net->routes; rte_is_valid(r); r = r->next) - if (new_best = export_filter(c, r, &new_free, 0)) - break; + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); + } } - else + +done: + /* Check obsolete routes for previously exported */ + while (rpe) { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0))) - new_best = new_changed; - else - return; + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); } - if (!new_best && !old_best) - return; - - do_rt_notify(c, net, new_best, old_best, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); -} - - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); + /* Nothing to export */ + if (new_best || old_best) + do_rt_notify(c, n, new_best, old_best); + else + DBG("rt_notify_accepted: nothing to export\n"); } rte * -rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { - // struct proto *p = c->proto; - struct nexthop *nhs = NULL; - rte *best0, *best, *rt0, *rt, *tmp; + _Thread_local static rte rloc; - best0 = net->routes; - *rt_free = NULL; + // struct proto *p = c->proto; + struct nexthop_adata *nhs = NULL; + rte *best0 = feed[0]; + rte *best = NULL; if (!rte_is_valid(best0)) return NULL; - best = export_filter_(c, best0, rt_free, pool, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; - if (!best || !rte_is_reachable(best)) + rloc = *best0; + best = export_filter(c, &rloc, silent); + + if (!best) + /* Best route doesn't pass the filter */ + return NULL; + + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best0, rt0)) + if (!rte_mergable(best0, feed[i])) continue; - rt = export_filter_(c, rt0, &tmp, pool, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter(c, &tmp0, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(tmp->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (tmp) - rte_free(tmp); + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best = rte_cow_rta(best, pool); - nexthop_link(best->attrs, nhs); - } - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); - if (best != best0) - *rt_free = best; + ea_set_attr(&best->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); + } return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte *new_free = NULL; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* We assume that all rte arguments are either NULL or rte_is_valid() */ - - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); + } /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, tmp_linpool, 0) : NULL; + + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); +} + +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte *o = RTE_VALID_OR_NULL(rpe->old_best); + struct rte_storage *new_best = rpe->new_best; + + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + rpe = rpe_next(rpe, NULL); + } - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + rte n0 = RTE_COPY_VALID(new_best); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); +} + +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (!new_best && !old_best) + rte *n = RTE_VALID_OR_NULL(rpe->new); + rte *o = RTE_VALID_OR_NULL(rpe->old); + + if (!n && !o) + { + channel_rpe_mark_seen(req, rpe); return; + } - do_rt_notify(c, net, new_best, old_best, refeed); + struct rte_src *src = n ? n->src : o->src; + struct rte_storage *new_latest = rpe->new; - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_latest = rpe->new; + rpe = rpe_next(rpe, src); + } + + rte n0 = RTE_COPY_VALID(new_latest); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); +} + +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + for (uint i=0; i<count; i++) + if (rte_is_valid(feed[i])) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} + +void +rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + bmap_set(&hook->seq_map, rpe->seq); +} + +struct rt_pending_export * +rpe_next(struct rt_pending_export *rpe, struct rte_src *src) +{ + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; } +static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); +static void +rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + if (bmap_test(&hook->seq_map, rpe->seq)) + goto ignore; /* Seen already */ + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + + switch (hook->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", hook->req->addr_mode); + } + + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) + { + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + hook->req->export_bulk(hook->req, n, rpe, feed, count); + } + else + bug("Export request must always provide an export method"); + +ignore: + /* Get the next export if exists */ + hook->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) + CALL(hook->table->used, hook->table); + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&hook->last_export, rpe, memory_order_release); +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -1211,13 +1248,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -1232,30 +1262,21 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, + struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(new)) - new = NULL; - - if (!rte_is_valid(old)) - old = NULL; + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); - if (!rte_is_valid(new_best)) - new_best = NULL; - - if (!rte_is_valid(old_best)) - old_best = NULL; - - if (!new && !old && !new_best && !old_best) + if ((new == old) && (new_best == old_best)) return; - if (new_best != old_best) + if (new_best_valid || old_best_valid) { - if (new_best) - new_best->sender->stats.pref_routes++; - if (old_best) - old_best->sender->stats.pref_routes--; + if (new_best_valid) + new_best->rte.sender->stats.pref++; + if (old_best_valid) + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); @@ -1266,103 +1287,255 @@ rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) { - if (c->export_state == ES_DOWN) - continue; + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; - if (type && (type != c->ra_mode)) - continue; + hmap_clear(&tab->id_map, old->rte.id); + rte_free(old); + return; + } + + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; - switch (c->ra_mode) + if (!EMPTY_LIST(tab->exporter.pending)) + { + rpeb = TAIL(tab->exporter.pending); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net, new_best, old_best, 0); - break; + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; - case RA_ANY: - if (new != old) - rt_notify_basic(c, net, new, old, 0); - break; + rpeb = NULL; + end = 0; + } + } - case RA_ACCEPTED: - rt_notify_accepted(c, net, new, old, 0); - break; + if (!rpeb) + { + rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; + add_tail(&tab->exporter.pending, &rpeb->n); + } - case RA_MERGED: - rt_notify_merged(c, net, new, old, new_best, old_best, 0); - break; + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, + .seq = tab->exporter.next_seq++, + }; + + DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", + tab->name, net->n.addr, + new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, + old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, + new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); + + if (rpebsnl) + { + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); + } + + /* Append to the same-network squasher list */ + if (net->last) + { + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); + + } + + net->last = rpe; + + if (!net->first) + net->first = rpe; + + if (tab->exporter.first == NULL) + tab->exporter.first = rpe; + + rt_check_cork_high(tab); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); +} + +static struct rt_pending_export * +rt_next_export_fast(struct rt_pending_export *last) +{ + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) + { + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; +} + +static struct rt_pending_export * +rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) +{ + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else + return tab->first; +} + +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, hook->event); +} + +static void +rt_announce_exports(timer *tm) +{ + rtable *tab = tm->data; + + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) + continue; + + rt_send_export_event(c); + } +} + +static struct rt_pending_export * +rt_last_export(struct rt_exporter *tab) +{ + struct rt_pending_export *rpe = NULL; + + if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } + + return rpe; +} + +#define RT_EXPORT_BULK 1024 + +static void +rt_export_hook(void *_data) +{ + struct rt_export_hook *c = _data; + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { + CALL(c->table->used, c->table); + return; } } + + /* Process the export */ + for (uint i=0; i<RT_EXPORT_BULK; i++) + { + rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; + } + + rt_send_export_event(c); } + static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; - net *n = e->net; + const net_addr *n = e->net; - if (!net_validate(n->n.addr)) + if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } /* FIXME: better handling different nettypes */ - c = !net_is_flow(n->n.addr) ? - net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); + c = !net_is_flow(n) ? + net_classify(n): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } - if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST)) { - /* Exception for flowspec that failed validation */ - if (net_is_flow(n->n.addr) && (e->attrs->dest == RTD_UNREACHABLE)) - return 1; + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n->n.addr, e->attrs->dest, e->sender->proto->name); - return 0; - } + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n->n.addr, e->sender->proto->name); + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", + n, ch->proto->name); return 0; } return 1; } -/** - * rte_free - delete a &rte - * @e: &rte to be deleted - * - * rte_free() deletes the given &rte from the routing table it's linked to. - */ -void -rte_free(rte *e) -{ - if (rta_is_cached(e->attrs)) - rta_free(e->attrs); - sl_free(rte_slab, e); -} - -static inline void -rte_free_quick(rte *e) -{ - rta_free(e->attrs); - sl_free(rte_slab, e); -} - static int rte_same(rte *x, rte *y) { @@ -1370,175 +1543,116 @@ rte_same(rte *x, rte *y) return x->attrs == y->attrs && x->pflags == y->pflags && - x->pref == y->pref && - (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) && + x->src == y->src && rte_is_filtered(x) == rte_is_filtered(y); } static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; + struct rt_import_request *req = c->req; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; - rte *before_old = NULL; - rte *old_best = net->routes; + struct rt_import_stats *stats = &c->stats; + struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; + rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; - rte **k; - k = &net->routes; /* Find and remove original route from the same protocol */ - while (old = *k) + /* If the new route is identical to the old one, we find the attributes in + * cache and clone these with no performance drop. OTOH, if we were to lookup + * the attributes, such a route definitely hasn't been anywhere yet, + * therefore it's definitely worth the time. */ + struct rte_storage *new_stored = NULL; + if (new) + new = &(new_stored = rte_store(new, net, table))->rte; + + /* Find and remove original route from the same protocol */ + struct rte_storage **before_old = rte_find(net, src); + + if (*before_old) { - if (old->attrs->src == src) + old = &(old_stored = (*before_old))->rte; + + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) { - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - rte_free_quick(new); - } - return; - } + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); - if (new && rte_same(old, new)) + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); + } + + if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } - rte_free_quick(new); + /* We need to free the already stored route here before returning */ + rte_free(new_stored); return; - } - *k = old->next; - table->rt_count--; - break; - } - k = &old->next; - before_old = old; - } - - /* Save the last accessed position */ - rte **pos = k; + } - if (!old) - before_old = NULL; + *before_old = (*before_old)->next; + table->rt_count--; + } if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + /* If rejected by import limit, we need to pretend there is no route */ + if (req->preimport && (req->preimport(req, new, old) == 0)) + { + rte_free(new_stored); + new_stored = NULL; + new = NULL; + } + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - rte_free_quick(new); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - { rte_free_quick(new); new = NULL; } - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1: - - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ - if (new) + if (new_stored) { - if (before_old && !rte_better(new, before_old)) - k = &before_old->next; + struct rte_storage **k; + if ((before_old != &net->routes) && !rte_better(new, &SKIP_BACK(struct rte_storage, next, before_old)->rte)) + k = before_old; else k = &net->routes; for (; *k; k=&(*k)->next) - if (rte_better(new, *k)) + if (rte_better(new, &(*k)->rte)) break; - new->next = *k; - *k = new; + new_stored->next = *k; + *k = new_stored; table->rt_count++; } @@ -1548,16 +1662,17 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; - if (new && rte_better(new, old_best)) + if (new_stored && rte_better(&new_stored->rte, old_best)) { /* The first case - the new route is cleary optimal, we link it at the first position */ - new->next = net->routes; - net->routes = new; + new_stored->next = net->routes; + net->routes = new_stored; table->rt_count++; } @@ -1571,10 +1686,10 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) do_recalculate: /* Add the new route to the list */ - if (new) + if (new_stored) { - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } @@ -1582,335 +1697,417 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* Find a new optimal route (if there is any) */ if (net->routes) { - rte **bp = &net->routes; - for (k=&(*bp)->next; *k; k=&(*k)->next) - if (rte_better(*k, *bp)) + struct rte_storage **bp = &net->routes; + for (struct rte_storage **k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(&(*k)->rte, &(*bp)->rte)) bp = k; /* And relink it */ - rte *best = *bp; + struct rte_storage *best = *bp; *bp = best->next; best->next = net->routes; net->routes = best; } } - else if (new) + else if (new_stored) { /* The third case - the new route is not better than the old best route (therefore old_best != NULL) and the old best route was not removed (therefore old_best == net->routes). We just link the new route to the old/last position. */ - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - if (new) + if (new_stored) { - new->lastmod = current_time(); - - if (!old) - { - new->id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new->id); - } - else - new->id = old->id; + new_stored->rte.lastmod = current_time(); + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - if (new_ok) - rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (rte_is_ok(net->routes)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best); + rte_announce(table, net, new_stored, old_stored, + net->routes, old_best_stored); if (!net->routes && (table->gc_counter++ >= table->config->gc_threshold)) rt_kick_prune_timer(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) - p->rte_insert(net, new); - - if (old) - { - if (!new) - hmap_clear(&table->id_map, old->id); + p->rte_insert(net, &new_stored->rte); +#endif - rte_free_quick(old); - } } -static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */ - -static inline void -rte_update_lock(void) +int +channel_preimport(struct rt_import_request *req, rte *new, rte *old) { - rte_update_nest_cnt++; -} + struct channel *c = SKIP_BACK(struct channel, in_req, req); -static inline void -rte_update_unlock(void) -{ - if (!--rte_update_nest_cnt) - lp_flush(rte_update_pool); -} + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return 0; -static inline void -rte_hide_dummy_routes(net *net, rte **dummy) -{ - if (net->routes && net->routes->attrs->source == RTS_DUMMY) - { - *dummy = net->routes; - net->routes = (*dummy)->next; - } -} + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); -static inline void -rte_unhide_dummy_routes(net *net, rte **dummy) -{ - if (*dummy) - { - (*dummy)->next = net->routes; - net->routes = *dummy; - } -} + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); -/** - * rte_update - enter a new update to a routing table - * @table: table to be updated - * @c: channel doing the update - * @net: network node - * @p: protocol submitting the update - * @src: protocol originating the update - * @new: a &rte representing the new route or %NULL for route removal. - * - * This function is called by the routing protocols whenever they discover - * a new route or wish to update/remove an existing route. The right announcement - * sequence is to build route attributes first (either un-cached with @aflags set - * to zero or a cached one using rta_lookup(); in this case please note that - * you need to increase the use count of the attributes yourself by calling - * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all - * the appropriate data and finally submit the new &rte by calling rte_update(). - * - * @src specifies the protocol that originally created the route and the meaning - * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the - * same value as @new->attrs->proto. @p specifies the protocol that called - * rte_update(). In most cases it is the same protocol as @src. rte_update() - * stores @p in @new->sender; - * - * When rte_update() gets any route, it automatically validates it (checks, - * whether the network and next hop address are valid IP addresses and also - * whether a normal routing protocol doesn't try to smuggle a host or link - * scope route to the table), converts all protocol dependent attributes stored - * in the &rte to temporary extended attributes, consults import filters of the - * protocol to see if the route should be accepted and/or its attributes modified, - * stores the temporary attributes back to the &rte. - * - * Now, having a "public" version of the route, we - * automatically find any old route defined by the protocol @src - * for network @n, replace it by the new one (or removing it if @new is %NULL), - * recalculate the optimal route for this destination and finally broadcast - * the change (if any) to all routing protocols by calling rte_announce(). - * - * All memory used for attribute lists and other temporary allocations is taken - * from a special linear pool @rte_update_pool and freed when rte_update() - * finishes. - */ + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep & RIK_REJECTED) + { + new->flags |= REF_FILTERED; + return 1; + } + else + return 0; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return 1; +} void -rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - rte *dummy = NULL; - net *nn; + if (!c->in_req.hook) + return; ASSERT(c->channel_state == CS_UP); - rte_update_lock(); + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); + + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; + if (new) { - /* Create a temporary table node */ - nn = alloca(sizeof(net) + n->length); - memset(nn, 0, sizeof(net) + n->length); - net_copy(nn->n.addr, n); + new->net = n; - new->net = nn; - new->sender = c; + int fr; - if (!new->pref) - new->pref = c->preference; - - stats->imp_updates_received++; - if (!rte_validate(new)) + stats->updates_received++; + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); + + if (c->in_keep & RIK_REJECTED) + new->flags |= REF_FILTERED; + else + new = NULL; } - if (filter == FILTER_REJECT) + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; + } - if (! c->in_keep_filtered) - goto drop; + } + else + stats->withdraws_received++; - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; - } - else if (filter) - { - rta *old_attrs = NULL; - rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs); + rte_import(&c->in_req, n, new, src); - int fr = f_run(filter, &new, rte_update_pool, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + { + /* There may be some updates on top of the original attribute block */ + ea_list *a = new->attrs; + while (a->next) + a = a->next; - if (! c->in_keep_filtered) - { - rta_free(old_attrs); - goto drop; - } + ea_free(a); + } - new->flags |= REF_FILTERED; - } +} - rte_store_tmp_attrs(new, rte_update_pool, old_attrs); - } - if (!rta_is_cached(new->attrs)) /* Need to copy attributes */ - new->attrs = rta_lookup(new->attrs); - new->flags |= REF_COW; +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; + net *nn; + if (new) + { /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); - new->net = nn; + nn = net_get(hook->table, n); + new->net = nn->n.addr; + new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } - else + else if (!(nn = net_find(hook->table, n))) { - stats->imp_withdraws_received++; - - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } + req->hook->stats.withdraws_ignored++; + return; } - recalc: /* And recalculate the best route */ - rte_hide_dummy_routes(nn, &dummy); - rte_recalculate(c, nn, new, src); - rte_unhide_dummy_routes(nn, &dummy); + rte_recalculate(hook, nn, new, src); +} + +/* Check rtable for best route to given net whether it would be exported do p */ +int +rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +{ + net *n = net_find(t, a); + + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) + return 0; - rte_update_unlock(); - return; + rte rt = n->routes->rte; - drop: - rte_free(new); - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; + /* Rest is stripped down export_filter() */ + int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; + if (v == RIC_PROCESS) + v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); - rte_update_unlock(); + return v > 0; } -/* Independent call to rte_announce(), used from next hop - recalculation, outside of rte_update(). new must be non-NULL */ -static inline void -rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +static void +rt_table_export_done(struct rt_export_hook *hook) { - rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); - rte_update_unlock(); + struct rt_exporter *re = hook->table; + struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + + rt_unlock_table(tab); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); } -static inline void -rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */ +static void +rt_export_stopped(void *data) { - rte_update_lock(); - rte_recalculate(old->sender, old->net, NULL, old->attrs->src); - rte_update_unlock(); + struct rt_export_hook *hook = data; + struct rt_exporter *tab = hook->table; + + /* Drop pending exports */ + CALL(tab->used, tab); + + /* Unlist */ + rem_node(&hook->n); + + /* Report the channel as stopped. */ + hook->stopped(hook->req); + + /* Reporting the hook as finished. */ + CALL(tab->done, hook); + + /* Free the hook. */ + rfree(hook->pool); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ static inline void -rte_modify(rte *old) +rt_set_import_state(struct rt_import_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_set_export_state(struct rt_export_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + atomic_store_explicit(&hook->export_state, state, memory_order_release); + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_request_import(rtable *tab, struct rt_import_request *req) +{ + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = tab; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); +} + +void +rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) { - rte_update_lock(); + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); - if (new != old) + rt_schedule_prune(hook->table); + + rt_set_import_state(hook, TIS_STOP); + + hook->stopped = stopped; +} + +static struct rt_export_hook * +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + rtable *tab = SKIP_BACK(rtable, exporter, re); + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + + /* stats zeroed by mb_allocz */ + switch (req->addr_mode) { - if (new) - { - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - new->flags = (old->flags & ~REF_MODIFY) | REF_COW; - } + case TE_ADDR_IN: + if (tab->trie && net_val_match(tab->addr_type, NB_IP)) + { + hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_lock = rt_lock_trie(tab); + trie_walk_init(hook->walk_state, tab->trie, req->addr); + hook->event = ev_new_init(p, rt_feed_by_trie, hook); + break; + } + /* fall through */ + case TE_ADDR_NONE: + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + hook->event = ev_new_init(p, rt_feed_by_fib, hook); + break; + + case TE_ADDR_EQUAL: + hook->event = ev_new_init(p, rt_feed_equal, hook); + break; + + case TE_ADDR_FOR: + hook->event = ev_new_init(p, rt_feed_for, hook); + break; - rte_recalculate(old->sender, old->net, new, old->attrs->src); + default: + bug("Requested an unknown export address mode"); } - rte_update_unlock(); + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + return hook; } -/* Check rtable for best route to given net whether it would be exported do p */ -int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) { - struct proto *p = c->proto; - net *n = net_find(t, a); - rte *rt = n ? n->routes : NULL; + struct rt_export_hook *hook = req->hook = re->start(re, req); - if (!rte_is_valid(rt)) - return 0; + hook->req = req; + hook->table = re; - rte_update_lock(); + bmap_init(&hook->seq_map, hook->pool, 1024); - /* Rest is stripped down export_filter() */ - int v = p->preexport ? p->preexport(c, &rt, rte_update_pool) : 0; - if (v == RIC_PROCESS) + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + + hook->n = (node) {}; + add_tail(&re->hooks, &hook->n); + + /* Regular export */ + rt_set_export_state(hook, TES_FEEDING); + rt_send_export_event(hook); +} + +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + + if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) + return; + + switch (hook->req->addr_mode) { - rte_make_tmp_attrs(&rt, rte_update_pool, NULL); - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; } +} - /* Discard temporary rte */ - if (rt != n->routes) - rte_free(rt); +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; - rte_update_unlock(); + /* Cancel the feeder event */ + ev_postpone(hook->event); - return v > 0; -} + /* Stop feeding from the exporter */ + CALL(hook->table->stop, hook); + /* Reset the event as the stopped event */ + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + /* Update export state */ + rt_set_export_state(hook, TES_STOP); + + /* Run the stopped event */ + rt_send_export_event(hook); +} /** * rt_refresh_begin - start a refresh cycle @@ -1927,16 +2124,38 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if (e->sender == c) - e->flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1948,45 +2167,18 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; - - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE)) - { - e->flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); - if (prune) - rt_schedule_prune(t); -} + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); -void -rt_modify_stale(rtable *t, struct channel *c) -{ - int prune = 0; + rt_schedule_prune(hook->table); - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED)) - { - e->flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } /** @@ -1996,14 +2188,11 @@ rt_modify_stale(rtable *t, struct channel *c) * This functions dumps contents of a &rte to debug output. */ void -rte_dump(rte *e) -{ - net *n = e->net; - debug("%-1N ", n->n.addr); - debug("PF=%02x pref=%d ", e->pflags, e->pref); - rta_dump(e->attrs); - if (e->attrs->src->proto->proto->dump_attrs) - e->attrs->src->proto->proto->dump_attrs(e); +rte_dump(struct rte_storage *e) +{ + debug("%-1N ", e->rte.net); + debug("PF=%02x ", e->rte.pflags); + ea_dump(e->rte.attrs); debug("\n"); } @@ -2016,14 +2205,13 @@ rte_dump(rte *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>\n", t->name); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif FIB_WALK(&t->fib, net, n) { - rte *e; - for(e=n->routes; e; e=e->next) + for(struct rte_storage *e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; @@ -2043,6 +2231,54 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); +} + +void +rt_dump_hooks(rtable *tab) +{ + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exporter.hooks) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); + } + debug("\n"); +} + +void +rt_dump_hooks_all(void) +{ + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); + + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); } static inline void @@ -2078,6 +2314,20 @@ rt_schedule_prune(rtable *tab) tab->prune_state |= 1; } +static void +rt_export_used(struct rt_exporter *e) +{ + rtable *tab = SKIP_BACK(rtable, exporter, e); + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup requested", tab->name); + + if (tab->export_used) + return; + + tab->export_used = 1; + ev_schedule(tab->rt_event); +} static void rt_event(void *ptr) @@ -2086,6 +2336,25 @@ rt_event(void *ptr) rt_lock_table(tab); + if (tab->export_used) + rt_export_cleanup(tab); + + if ( + tab->hcu_corked || + tab->nhu_corked || + (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) + ) + { + if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) + log(L_TRACE "%s: Auxiliary routines corked", tab->name); + + tab->hcu_corked |= tab->hcu_scheduled; + tab->hcu_scheduled = 0; + + tab->nhu_corked |= tab->nhu_state; + tab->nhu_state = 0; + } + if (tab->hcu_scheduled) rt_update_hostcache(tab); @@ -2098,6 +2367,22 @@ rt_event(void *ptr) rt_unlock_table(tab); } +static void +rt_uncork_event(void *ptr) +{ + rtable *tab = ptr; + + tab->hcu_scheduled |= tab->hcu_corked; + tab->hcu_corked = 0; + + tab->nhu_state |= tab->nhu_corked; + tab->nhu_corked = 0; + + if (config->table_debug) + log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); + + ev_schedule(tab->rt_event); +} static void rt_prune_timer(timer *t) @@ -2151,7 +2436,7 @@ rt_settle_timer(timer *t) struct rt_subscription *s; WALK_LIST(s, tab->subscribers) - s->hook(s); + ev_send(s->list, s->event); } static void @@ -2183,6 +2468,7 @@ rt_subscribe(rtable *tab, struct rt_subscription *s) { s->tab = tab; rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); add_tail(&tab->subscribers, &s->n); } @@ -2285,9 +2571,6 @@ rt_free(resource *_r) DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - if (r->internal) - return; - r->config->table = NULL; rem_node(&r->n); @@ -2323,16 +2606,13 @@ static struct resclass rt_class = { rtable * rt_setup(pool *pp, struct rtable_config *cf) { - int ns = strlen("Routing table ") + strlen(cf->name) + 1; - void *nb = mb_alloc(pp, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - - pool *p = rp_new(pp, nb); - mb_move(nb, p); + pool *p = rp_newf(pp, "Routing table %s", cf->name); rtable *t = ralloc(p, &rt_class); t->rp = p; + t->rte_slab = sl_new(p, sizeof(struct rte_storage)); + t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; @@ -2347,24 +2627,41 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->channels); init_list(&t->flowspec_links); + + t->exporter = (struct rt_exporter) { + .addr_type = t->addr_type, + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, + .used = rt_export_used, + }; + + init_list(&t->exporter.hooks); + init_list(&t->exporter.pending); + + init_list(&t->imports); + + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); + init_list(&t->subscribers); - if (!(t->internal = cf->internal)) - { - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + t->rt_event = ev_new_init(p, rt_event, t); + t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); + t->last_rt_change = t->gc_time = current_time(); + t->exporter.next_seq = 1; - t->rt_event = ev_new_init(p, rt_event, t); - t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); - t->last_rt_change = t->gc_time = current_time(); + t->cork_threshold = cf->cork_threshold; - if (rt_is_flow(t)) - { - t->flowspec_trie = f_new_trie(lp_new_default(p), 0); - t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); - } + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } return t; @@ -2381,9 +2678,10 @@ rt_init(void) { rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new_default(rt_table_pool); - rte_slab = sl_new(rt_table_pool, sizeof(rte)); init_list(&routing_tables); + init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; } @@ -2407,7 +2705,7 @@ rt_prune_table(rtable *tab) struct fib_iterator *fit = &tab->prune_fit; int limit = 2000; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -2421,9 +2719,16 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2442,8 +2747,6 @@ rt_prune_table(rtable *tab) again: FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e; - rescan: if (limit <= 0) { @@ -2452,26 +2755,21 @@ again: return; } - for (e=n->routes; e; e=e->next) + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->sender->flush_active || (e->flags & REF_DISCARD)) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { - rte_discard(e); - limit--; - - goto rescan; - } - - if (e->flags & REF_MODIFY) - { - rte_modify(e); + rte_recalculate(e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; } } - if (!n->routes) /* Orphaned FIB entry */ + if (!n->routes && !n->first) /* Orphaned FIB entry */ { FIB_ITERATE_PUT(fit); fib_delete(&tab->fib, n); @@ -2524,21 +2822,203 @@ again: } } - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); - - /* FIXME: This should be handled in a better way */ - rt_prune_sources(); + uint flushed_channels = 0; /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + ih->flush_seq = tab->exporter.next_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; + } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } + + /* In some cases, we may want to directly proceed to export cleanup */ + if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + rt_export_cleanup(tab); +} + +static void +rt_export_cleanup(rtable *tab) +{ + tab->export_used = 0; + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; + struct rt_pending_export *first = tab->exporter.first; + + struct rt_export_hook *eh; + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + { + case TES_DOWN: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + + tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", + tab->name, + first ? first->seq : 0, + tab->exporter.first ? tab->exporter.first->seq : 0, + min_seq); + + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + + while (first && (first->seq <= min_seq)) + { + ASSERT_DIE(first->new || first->old); + + const net_addr *n = first->new ? + first->new->rte.net : + first->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + + ASSERT_DIE(net->first == first); + + if (first == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ + net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + + /* For now, the old route may be finally freed */ + if (first->old) + { + rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); + hmap_clear(&tab->id_map, first->old->rte.id); + rte_free(first->old); + } + +#ifdef LOCAL_DEBUG + memset(first, 0xbd, sizeof(struct rt_pending_export)); +#endif + + struct rt_export_block *reb = HEAD(tab->exporter.pending); + ASSERT_DIE(reb == PAGE_HEAD(first)); + + u32 pos = (first - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + +#ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); +#endif + + free_page(reb); + + if (EMPTY_LIST(tab->exporter.pending)) { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); + if (config->table_debug) + log(L_TRACE "%s: Resetting export seq", tab->name); + + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->seq_map, 1024); + } + + tab->exporter.next_seq = 1; + } + else + { + reb = HEAD(tab->exporter.pending); + next = &reb->export[0]; } + } + + first = next; + } + + rt_check_cork_low(tab); + +done:; + struct rt_import_hook *ih; node *x; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) + if (!first || (first->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } + + if (tab->export_used) + ev_schedule(tab->rt_event); + - return; + if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) + tm_stop(tab->exporter.export_timer); +} + +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); } /** @@ -2637,134 +3117,168 @@ rt_postconfig(struct config *c) */ void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - a->hostentry = he; - a->dest = he->dest; - a->igp_metric = he->igp_metric; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); + + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); + + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); +} - if (a->dest != RTD_UNICAST) + +static void +rta_apply_hostentry(ea_list **to, struct hostentry_adata *head) +{ + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; + + ea_set_attr_u32(to, &ea_gen_igp_metric, 0, he->igp_metric); + + if (!he->src) { - /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; - if (mls) - { /* Store the label stack for later changes */ - a->nh.labels_orig = a->nh.labels = mls->len; - memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); - } + ea_set_dest(to, 0, RTD_UNREACHABLE); return; } - if (((!mls) || (!mls->len)) && he->nexthop_linkable) + eattr *he_nh_ea = ea_find(he->src, &ea_gen_nexthop); + ASSERT_DIE(he_nh_ea); + + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + int idest = nhea_dest(he_nh_ea); + + if ((idest != RTD_UNICAST) || + !lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(to, he->src, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; + uint total_size = OFFSETOF(struct nexthop_adata, nh); - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); + } - if (mls) - { - nhp->labels = nh->labels + mls->len; - nhp->labels_orig = mls->len; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + struct nexthop_adata nha = { + .ad.length = NEXTHOP_DEST_SIZE, + .dest = RTD_UNREACHABLE, + }; + + ea_set_attr_data(to, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (lnum) { - nhp->labels = nh->labels; - nhp->labels_orig = 0; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); } -static inline int -rta_next_hop_outdated(rta *a) +static inline struct hostentry_adata * +rta_next_hop_outdated(ea_list *a) { - struct hostentry *he = a->hostentry; + /* First retrieve the hostentry */ + eattr *heea = ea_find(a, &ea_gen_hostentry); + if (!heea) + return NULL; - if (!he) - return 0; + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - if (!he->src) - return a->dest != RTD_UNREACHABLE; + /* If no nexthop is present, we have to create one */ + eattr *a_nh_ea = ea_find(a, &ea_gen_nexthop); + if (!a_nh_ea) + return head; + + struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr; + + /* Shortcut for unresolvable hostentry */ + if (!head->he->src) + return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL; - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + /* Comparing our nexthop with the hostentry nexthop */ + eattr *he_nh_ea = ea_find(head->he->src, &ea_gen_nexthop); + + return ( + (ea_get_int(a, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; } -static inline rte * -rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) +static inline struct rte_storage * +rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { - if (!rta_next_hop_outdated(old->attrs)) + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); + rte e0 = *old; + rta_apply_hostentry(&e0.attrs, head); - mpls_label_stack mls = { .len = a->nh.labels_orig }; - memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + return rte_store(&e0, n, tab); +} - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->aflags = 0; +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs, &ea_gen_hostentry); + if (!heea) + return; - rte *e = sl_alloc(rte_slab); - memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(a); + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - return e; + rta_apply_hostentry(&r->attrs, head); } - #ifdef CONFIG_BGP static inline int @@ -2788,23 +3302,23 @@ net_flow_has_dst_prefix(const net_addr *n) } static inline int -rta_as_path_is_empty(rta *a) +rta_as_path_is_empty(ea_list *a) { - eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(a, "bgp_path"); return !e || (as_path_getlen(e->u.ptr) == 0); } static inline u32 -rta_get_first_asn(rta *a) +rta_get_first_asn(ea_list *a) { - eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(a, "bgp_path"); u32 asn; return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; } -int -rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior) +static inline enum flowspec_valid +rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *a, int interior) { ASSERT(rt_is_ip(tab_ip)); ASSERT(rt_is_flow(tab_flow)); @@ -2812,11 +3326,11 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ if (interior && rta_as_path_is_empty(a)) - return 1; + return FLOWSPEC_VALID; /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ @@ -2830,33 +3344,36 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* Find best-match BGP unicast route for flowspec dst prefix */ net *nb = net_route(tab_ip, &dst); - rte *rb = nb ? nb->routes : NULL; + const rte *rb = nb ? &nb->routes->rte : NULL; /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); /* No best-match BGP route -> no flowspec */ - if (!rb || (rb->attrs->source != RTS_BGP)) - return 0; + if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ - u32 orig_a = ea_get_int(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0); - u32 orig_b = ea_get_int(rb->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0); + u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ - if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal(a->from, rb->attrs->from))) - return 0; + if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( + ea_get_ip(a, &ea_gen_from, IPA_NONE), + ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ))) + return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ u32 asn_b = rta_get_first_asn(rb->attrs); if (!asn_b) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ if (!interior && (rta_get_first_asn(a) != asn_b)) - return 0; + return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ TRIE_WALK(tab_ip->trie, subnet, &dst) @@ -2865,99 +3382,139 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i if (!nc) continue; - rte *rc = nc->routes; - if (rc->attrs->source != RTS_BGP) - return 0; + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + return FLOWSPEC_INVALID; if (rta_get_first_asn(rc->attrs) != asn_b) - return 0; + return FLOWSPEC_INVALID; } TRIE_WALK_END; - return 1; + return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static rte * -rt_flowspec_update_rte(rtable *tab, rte *r) +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) { #ifdef CONFIG_BGP - if ((r->attrs->source != RTS_BGP) || !r->u.bgp.base_table) + if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) return NULL; - const net_addr *n = r->net->n.addr; - struct bgp_proto *p = (void *) r->attrs->src->proto; - int valid = rt_flowspec_check(r->u.bgp.base_table, tab, n, r->attrs, p->is_interior); - int dest = valid ? RTD_NONE : RTD_UNREACHABLE; - - if (dest == r->attrs->dest) + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); + if (!bc->base_table) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, r->attrs, rta_size(r->attrs)); - a->dest = dest; - a->aflags = 0; + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + + if (old == valid) + return NULL; - rte *new = sl_alloc(rte_slab); - memcpy(new, r, sizeof(rte)); - new->attrs = rta_lookup(a); + rte new = *r; + ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); - return new; + return rte_store(&new, n, tab); #else return NULL; #endif } +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} static inline int rt_next_hop_update_net(rtable *tab, net *n) { - rte **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; + int is_flow = net_is_flow(n->n.addr); - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - { - if (!net_is_flow(n->n.addr)) - new = rt_next_hop_update_rte(tab, e); - else - new = rt_flowspec_update_rte(tab, e); + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + count++; - if (new) + if (!count) + return 0; + + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) { - *k = new; + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); - rte_trace_in(D_ROUTES, new->sender, new, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); + if (!new) + continue; /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->attrs->src->proto->rte_recalculate) - e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL); - - if (e != old_best) - rte_free_quick(e); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; - - e = new; - count++; + if (e->rte.src->owner->rte_recalculate) + e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; + + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; + + /* Get a new ID for the route */ + new->rte.lastmod = current_time(); + new->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, new->rte.id); } - } - if (!count) - return 0; + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { - if (!new_best || rte_better(e, *new_best)) + if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; } @@ -2970,15 +3527,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->sender, new, "updated [best]"); - - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); - - if (free_old_best) - rte_free_quick(old_best); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { + { "autoupdated", "autoupdated [-best]" }, + { "autoupdated [+best]", "autoupdated [best]" } + }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best); + } return count; } @@ -3042,6 +3601,8 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; add_tail(&new_config->tables, &c->n); @@ -3087,6 +3648,36 @@ rt_unlock_table(rtable *r) } } +static void +rt_check_cork_low(rtable *tab) +{ + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + if (config->table_debug) + log(L_TRACE "%s: Uncorked", tab->name); + } +} + +static void +rt_check_cork_high(rtable *tab) +{ + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + if (config->table_debug) + log(L_TRACE "%s: Corked", tab->name); + } +} + + static int rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) { @@ -3100,6 +3691,14 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old tab->name = new->name; tab->config = new; + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@ -3158,21 +3757,18 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) +static void +rt_feed_done(struct rt_export_hook *c) { - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - rt_notify_basic(c, n, e, e, c->refeeding); - rte_update_unlock(); + c->event->hook = rt_export_hook; + + rt_set_export_state(c, TES_READY); + + rt_send_export_event(c); } /** - * rt_feed_channel - advertise all routes to a channel + * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed * * This function performs one pass of advertisement of routes to a channel that @@ -3180,370 +3776,154 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_by_fib(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e = n->routes; if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - return 0; + rt_send_export_event(c); + return; } - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (rte_is_valid(e)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - do_feed_channel(c, n, e); - max_feed--; - } - - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(e)) - continue; + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; - do_feed_channel(c, n, e); - max_feed--; - } + if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) + max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; -done: - c->feed_active = 0; - return 1; + rt_feed_done(c); } -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) +static void +rt_feed_by_trie(void *data) { - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } -} + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + ASSERT_DIE(c->walk_state); + struct f_trie_walk_state *ws = c->walk_state; -/* - * Import table - */ + int max_feed = 256; -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - struct rtable *tab = c->in_table; - rte *old, **pos; - net *net; + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - if (new) + net_addr addr; + while (trie_walk_next(ws, &addr)) { - net = net_get(tab, n); - - if (!new->pref) - new->pref = c->preference; + net *n = net_find(tab, &addr); + if (!n) + continue; - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); + if ((max_feed -= rt_feed_net(c, n)) <= 0) + return; - if (!net) - goto drop_withdraw; + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; } - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if (old->attrs->src == src) - { - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - /* Move iterator if needed */ - if (old == c->reload_next_rte) - c->reload_next_rte = old->next; - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; - - break; - } - - if (!new) - { - if (!old) - goto drop_withdraw; + rt_unlock_trie(tab, c->walk_lock); + c->walk_lock = NULL; - if (!net->routes) - fib_delete(&tab->fib, net); + mb_free(c->walk_state); + c->walk_state = NULL; - return 1; - } + rt_feed_done(c); +} - struct channel_limit *l = &c->rx_limit; - if (l->action && !old) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); +static void +rt_feed_equal(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = net; + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } + net *n = net_find(tab, c->req->addr); + if (n) + rt_feed_net(c, n); - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; + rt_feed_done(c); +} -drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; - rte_free(new); +static void +rt_feed_for(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (!net->routes) - fib_delete(&tab->fib, net); + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - return 0; + net *n = net_route(tab, c->req->addr); + if (n) + rt_feed_net(c, n); -drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; - return 0; + rt_feed_done(c); } -int -rt_reload_channel(struct channel *c) +static uint +rt_feed_net(struct rt_export_hook *c, net *n) { - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); + uint count = 0; - if (!c->reload_active) + if (c->req->export_bulk) { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } - - do { - for (rte *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } - - rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src); - } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) + count = rte_feed_count(n); + if (count) { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); } - FIB_ITERATE_END; } - while (c->reload_next_rte); - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) + else if (n->routes) { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + count = 1; } -} - -void -rt_prune_sync(rtable *t, int all) -{ - struct fib_iterator fit; - FIB_ITERATE_INIT(&fit, &t->fib); + for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_mark_seen(c, rpe); -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - rte *e, **ee = &n->routes; - - while (e = *ee) - { - if (all || (e->flags & (REF_STALE | REF_DISCARD))) - { - *ee = e->next; - rte_free_quick(e); - t->rt_count--; - } - else - ee = &e->next; - } - - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; + return count; } - /* - * Export table + * Import table */ -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed) +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rtable *tab = c->out_table; - struct rte_src *src; - rte *old, **pos; - net *net; - - if (new) - { - net = net_get(tab, n); - src = new->attrs->src; - - rte_store_tmp_attrs(new, rte_update_pool, NULL); - - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); - src = old0->attrs->src; + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if ((c->ra_mode != RA_ANY) || (old->attrs->src == src)) + for (uint i=0; i<count; i++) + if (feed[i]->sender == c->in_req.hook) { - if (new && rte_same(old, new)) - { - /* REF_STALE / REF_DISCARD not used in export table */ - /* - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - */ - - goto drop_update; - } - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; - break; + /* And reload the route */ + rte_update(c, net, &new, new.src); } - - if (!new) - { - if (!old) - goto drop_withdraw; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - return refeed; - -drop_withdraw: - return 0; } @@ -3642,7 +4022,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) rem_node(&he->ln); hc_remove(hc, he); - sl_free(hc->slab, he); + sl_free(he); hc->hash_items--; if (hc->hash_items < hc->hash_min) @@ -3659,7 +4039,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, tab->rp, HC_DEF_ORDER); hc->slab = sl_new(tab->rp, sizeof(struct hostentry)); - hc->lp = lp_new(tab->rp, LP_GOOD_SIZE(1024)); + hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); tab->hostcache = hc; @@ -3711,56 +4091,31 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { - eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *ea = ea_find(rt->attrs, "igp_metric"); if (ea) return ea->u.data; - rta *a = rt->attrs; - -#ifdef CONFIG_OSPF - if ((a->source == RTS_OSPF) || - (a->source == RTS_OSPF_IA) || - (a->source == RTS_OSPF_EXT1)) - return rt->u.ospf.metric1; -#endif - -#ifdef CONFIG_RIP - if (a->source == RTS_RIP) - return rt->u.rip.metric; -#endif - -#ifdef CONFIG_BGP - if (a->source == RTS_BGP) - { - u64 metric = bgp_total_aigp_metric(rt); - return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); - } -#endif - -#ifdef CONFIG_BABEL - if (a->source == RTS_BABEL) - return rt->u.babel.metric; -#endif - - if (a->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); + return IGP_METRIC_UNKNOWN; } static int rt_update_hostentry(rtable *tab, struct hostentry *he) { - rta *old_src = he->src; + ea_list *old_src = he->src; int direct = 0; int pxlen = 0; /* Reset the hostentry */ he->src = NULL; - he->dest = RTD_UNREACHABLE; he->nexthop_linkable = 0; he->igp_metric = 0; @@ -3769,11 +4124,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) net *n = net_route(tab, &he_addr); if (n) { - rte *e = n->routes; - rta *a = e->attrs; - pxlen = n->n.addr->pxlen; - - if (a->hostentry) + struct rte_storage *e = n->routes; + ea_list *a = e->rte.attrs; + u32 pref = rt_get_preference(&e->rte); + + for (struct rte_storage *ee = n->routes; ee; ee = ee->next) + if (rte_is_valid(&ee->rte) && + (rt_get_preference(&ee->rte) >= pref) && + ea_find(ee->rte.attrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3781,9 +4139,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_UNICAST) - { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + pxlen = n->n.addr->pxlen; + + eattr *nhea = ea_find(a, &ea_gen_nexthop); + ASSERT_DIE(nhea); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + + if (NEXTHOP_IS_REACHABLE(nhad)) + NEXTHOP_WALK(nh, nhad) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) @@ -3796,12 +4159,10 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) direct++; } - } he->src = rta_clone(a); - he->dest = a->dest; he->nexthop_linkable = !direct; - he->igp_metric = rt_get_igp_metric(e); + he->igp_metric = rt_get_igp_metric(&e->rte); } done: @@ -3839,7 +4200,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; diff --git a/nest/rt.h b/nest/rt.h new file mode 100644 index 00000000..1a6b7a93 --- /dev/null +++ b/nest/rt.h @@ -0,0 +1,589 @@ +/* + * BIRD Internet Routing Daemon -- Routing Table + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NEST_RT_H_ +#define _BIRD_NEST_RT_H_ + +#include "lib/lists.h" +#include "lib/bitmap.h" +#include "lib/resource.h" +#include "lib/net.h" +#include "lib/type.h" +#include "lib/fib.h" +#include "lib/route.h" +#include "lib/event.h" +#include "lib/rcu.h" + +#include <stdatomic.h> + +struct ea_list; +struct protocol; +struct proto; +struct channel; +struct rte_src; +struct symbol; +struct timer; +struct filter; +struct f_trie; +struct f_trie_walk_state; +struct cli; + +struct rt_cork_threshold { + u64 low, high; +}; + +/* + * Master Routing Tables. Generally speaking, each of them contains a FIB + * with each entry pointing to a list of route entries representing routes + * to given network (with the selected one at the head). + * + * Each of the RTE's contains variable data (the preference and protocol-dependent + * metrics) and a pointer to a route attribute block common for many routes). + * + * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. + */ + +struct rtable_config { + node n; + char *name; + struct rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + uint gc_threshold; /* Maximum number of operations before GC is run */ + uint gc_period; /* Approximate time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + byte trie_used; /* Rtable has attached trie */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ + btime export_settle_time; /* Delay before exports are announced */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ +}; + +struct rt_export_hook; +struct rt_export_request; + +struct rt_exporter { + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ + + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(struct rt_export_hook *); + void (*used)(struct rt_exporter *); + + list pending; /* List of packed struct rt_pending_export */ + struct timer *export_timer; + + struct rt_pending_export *first; /* First export to announce */ + u64 next_seq; /* The next export will have this ID */ +}; + +typedef struct rtable { + resource r; + node n; /* Node in list of all tables */ + pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ + struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ + char *name; /* Name of this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int use_count; /* Number of protocols using this table */ + u32 rt_count; /* Number of routes in the table */ + + list imports; /* Registered route importers */ + struct rt_exporter exporter; /* Exporter API structure */ + + struct hmap id_map; + struct hostcache *hostcache; + struct rtable_config *config; /* Configuration of this table */ + struct config *deleted; /* Table doesn't exist in current configuration, + * delete as soon as use_count becomes 0 and remove + * obstacle from this routing table. + */ + struct event *rt_event; /* Routing table event */ + struct event *uncork_event; /* Called when uncork happens */ + struct timer *prune_timer; /* Timer for periodic pruning / GC */ + btime last_rt_change; /* Last time when route changed */ + btime base_settle_time; /* Start time of rtable settling interval */ + btime gc_time; /* Time of last GC */ + uint gc_counter; /* Number of operations since last GC */ + byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte hcu_corked; /* Hostcache update is corked with this state */ + byte nhu_state; /* Next Hop Update state */ + byte nhu_corked; /* Next Hop Update is corked with this state */ + byte export_used; /* Pending Export pruning is scheduled */ + byte cork_active; /* Cork has been activated */ + struct rt_cork_threshold cork_threshold; /* Threshold for table cork */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + list subscribers; /* Subscribers for notifications */ + struct timer *settle_timer; /* Settle time for notifications */ + list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ + struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +} rtable; + +struct rt_subscription { + node n; + rtable *tab; + event *event; + event_list *list; +}; + +struct rt_flowspec_link { + node n; + rtable *src; + rtable *dst; + u32 uc; +}; + +extern struct rt_cork { + _Atomic uint active; + event_list queue; + event run; +} rt_cork; + +static inline void rt_cork_acquire(void) +{ + atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel); +} + +static inline void rt_cork_release(void) +{ + if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) + { + synchronize_rcu(); + ev_schedule_work(&rt_cork.run); + } +} + +static inline int rt_cork_check(event *e) +{ + rcu_read_lock(); + + int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0); + if (corked) + ev_send(&rt_cork.queue, e); + + rcu_read_unlock(); + + return corked; +} + + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +typedef struct network { + struct rte_storage *routes; /* Available routes for this network */ + struct rt_pending_export *first, *last; + struct fib_node n; /* FIB flags reserved for kernel syncer */ +} net; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + byte update_hostcache; +}; + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + struct rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + ea_list *src; /* Source attributes */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) +#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + u64 flush_seq; /* Table export seq when the channel announced flushing */ + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { + struct rt_pending_export * _Atomic next; /* Next export for the same destination */ + struct rte_storage *new, *new_best, *old, *old_best; + u64 seq; /* Sequential ID (table-local) of the pending export */ +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + const net_addr *addr; /* Network prefilter address */ + u8 trace_routes; + u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */ + + event_list *list; /* Where to schedule export events */ + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + struct rt_exporter *table; /* The connected table */ + + pool *pool; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + union { + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + }; + u32 hash_iter; /* Iterator over hash */ + }; + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *rpe_next; /* Next pending export to process */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +/* Value of addr_mode */ +#define TE_ADDR_NONE 0 /* No address matching */ +#define TE_ADDR_EQUAL 1 /* Exact query - show route <addr> */ +#define TE_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ +#define TE_ADDR_IN 3 /* Interval query - show route in <addr> */ + + +#define TFT_FIB 1 +#define TFT_TRIE 2 +#define TFT_HASH 3 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rt_set_export_state(struct rt_export_hook *hook, u8 state); + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + +/* Get next rpe. If src is given, it must match. */ +struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); + +/* Mark the pending export processed */ +void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Get pending export seen status */ +int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ + +/* Return value of preexport() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + +extern list routing_tables; +struct config; + +void rt_init(void); +void rt_preconfig(struct config *); +void rt_postconfig(struct config *); +void rt_commit(struct config *new, struct config *old); +void rt_lock_table(rtable *); +void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); +void rt_subscribe(rtable *tab, struct rt_subscription *s); +void rt_unsubscribe(struct rt_subscription *s); +void rt_flowspec_link(rtable *src, rtable *dst); +void rt_flowspec_unlink(rtable *src, rtable *dst); +rtable *rt_setup(pool *, struct rtable_config *); +static inline void rt_shutdown(rtable *r) { rfree(r->rp); } + +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); +void rt_schedule_prune(rtable *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rt_dump(rtable *); +void rt_dump_all(void); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); +void rt_prune_sync(rtable *t, int all); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + + +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; + +struct rt_show_data_rtable { + node n; + const char *name; + struct rt_exporter *table; + struct channel *export_channel; + struct channel *prefilter; + struct krt_proto *kernel; +}; + +struct rt_show_data { + struct cli *cli; /* Pointer back to the CLI */ + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct rt_export_request req; /* Export request in use */ + int verbose, tables_defined_by; + const struct filter *filter; + struct proto *show_protocol; + struct proto *export_protocol; + struct channel *export_channel; + struct config *running_on_config; + struct rt_export_hook *kernel_export_hook; + int export_mode, addr_mode, primary_only, filtered, stats; + + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; + int show_counter_last_flush; +}; + +void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ + +/* Value of export_mode in struct rt_show_data */ +#define RSEM_NONE 0 /* Export mode not used */ +#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ +#define RSEM_EXPORT 2 /* Routes accepted by export filter */ +#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ +#define RSEM_EXPORTED 4 /* Routes marked in export map */ + +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; + +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); + +void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); +void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); + +/* + * Default protocol preferences + */ + +#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_STATIC 200 /* Static route */ +#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ +#define DEF_PREF_RIP 120 /* RIP */ +#define DEF_PREF_BGP 100 /* BGP */ +#define DEF_PREF_RPKI 100 /* RPKI */ +#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ + +/* + * Route Origin Authorization + */ + +#define ROA_UNKNOWN 0 +#define ROA_VALID 1 +#define ROA_INVALID 2 + +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); + +#endif |