diff options
Diffstat (limited to 'nest')
-rw-r--r-- | nest/Makefile | 12 | ||||
-rw-r--r-- | nest/a-path.c | 712 | ||||
-rw-r--r-- | nest/a-path_test.c | 220 | ||||
-rw-r--r-- | nest/a-set.c | 91 | ||||
-rw-r--r-- | nest/a-set_test.c | 260 | ||||
-rw-r--r-- | nest/attrs.h | 79 | ||||
-rw-r--r-- | nest/bfd.h | 2 | ||||
-rw-r--r-- | nest/bird.h | 1 | ||||
-rw-r--r-- | nest/cli.c | 7 | ||||
-rw-r--r-- | nest/cli.h | 1 | ||||
-rw-r--r-- | nest/cmds.c | 9 | ||||
-rw-r--r-- | nest/config.Y | 426 | ||||
-rw-r--r-- | nest/iface.c | 282 | ||||
-rw-r--r-- | nest/iface.h | 38 | ||||
-rw-r--r-- | nest/mrtdump.h | 1 | ||||
-rw-r--r-- | nest/neighbor.c | 141 | ||||
-rw-r--r-- | nest/password.c | 10 | ||||
-rw-r--r-- | nest/password.h | 4 | ||||
-rw-r--r-- | nest/proto-hooks.c | 4 | ||||
-rw-r--r-- | nest/proto.c | 1996 | ||||
-rw-r--r-- | nest/proto.sgml | 17 | ||||
-rw-r--r-- | nest/protocol.h | 297 | ||||
-rw-r--r-- | nest/route.h | 381 | ||||
-rw-r--r-- | nest/rt-attr.c | 293 | ||||
-rw-r--r-- | nest/rt-dev.c | 105 | ||||
-rw-r--r-- | nest/rt-dev.h | 6 | ||||
-rw-r--r-- | nest/rt-fib.c | 304 | ||||
-rw-r--r-- | nest/rt-roa.c | 440 | ||||
-rw-r--r-- | nest/rt-show.c | 421 | ||||
-rw-r--r-- | nest/rt-table.c | 1329 |
30 files changed, 4650 insertions, 3239 deletions
diff --git a/nest/Makefile b/nest/Makefile index e6928668..884d3950 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,6 +1,8 @@ -source=rt-table.c rt-fib.c rt-attr.c rt-roa.c proto.c iface.c rt-dev.c password.c cli.c locks.c cmds.c neighbor.c \ - a-path.c a-set.c -root-rel=../ -dir-name=nest +src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) -include ../Rules +tests_src := a-set_test.c a-path_test.c +tests_targets := $(tests_targets) $(tests-target-files) +tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/nest/a-path.c index 0272c6d7..6bad9747 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -20,263 +20,528 @@ #define put_as put_u32 #define get_as get_u32 -#define BS 4 +#define BS 4 /* Default block size of ASN (autonomous system number) */ -struct adata * -as_path_prepend(struct linpool *pool, struct adata *olda, u32 as) +#define BAD(DSC, VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; }) + +int +as_path_valid(byte *data, uint len, int bs, int confed, char *err, uint elen) { - struct adata *newa; + byte *pos = data; + char *err_dsc = NULL; + uint err_val = 0; + + while (len) + { + if (len < 2) + BAD("segment framing error", 0); + + /* Process one AS path segment */ + uint type = pos[0]; + uint slen = 2 + bs * pos[1]; + + if (len < slen) + BAD("segment framing error", len); - if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && olda->data[1] < 255) - /* Starting with sequence => just prepend the AS number */ + switch (type) { - int nl = olda->length + BS; - newa = lp_alloc(pool, sizeof(struct adata) + nl); - newa->length = nl; - newa->data[0] = AS_PATH_SEQUENCE; - newa->data[1] = olda->data[1] + 1; - memcpy(newa->data + BS + 2, olda->data + 2, olda->length - 2); + case AS_PATH_SET: + case AS_PATH_SEQUENCE: + break; + + case AS_PATH_CONFED_SEQUENCE: + case AS_PATH_CONFED_SET: + if (!confed) + BAD("AS_CONFED* segment", type); + break; + + default: + BAD("unknown segment", type); } - else /* Create new path segment */ + + if (pos[1] == 0) + BAD("zero-length segment", type); + + pos += slen; + len -= slen; + } + + return 1; + +bad: + if (err) + if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0) + err[0] = 0; + + return 0; +} + +int +as_path_16to32(byte *dst, byte *src, uint len) +{ + byte *dst0 = dst; + byte *end = src + len; + uint i, n; + + while (src < end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; + + for (i = 0; i < n; i++) { - int nl = olda->length + BS + 2; - newa = lp_alloc(pool, sizeof(struct adata) + nl); - newa->length = nl; - newa->data[0] = AS_PATH_SEQUENCE; - newa->data[1] = 1; - memcpy(newa->data + BS + 2, olda->data, olda->length); + put_u32(dst, get_u16(src)); + src += 2; + dst += 4; } - put_as(newa->data + 2, as); - return newa; + } + + return dst - dst0; } int -as_path_convert_to_old(struct adata *path, byte *dst, int *new_used) +as_path_32to16(byte *dst, byte *src, uint len) { - byte *src = path->data; - byte *src_end = src + path->length; - byte *dst_start = dst; - u32 as; - int i, n; - *new_used = 0; + byte *dst0 = dst; + byte *end = src + len; + uint i, n; + + while (src < end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; - while (src < src_end) + for (i = 0; i < n; i++) { - n = src[1]; - *dst++ = *src++; - *dst++ = *src++; + put_u16(dst, get_u32(src)); + src += 4; + dst += 2; + } + } - for(i=0; i<n; i++) - { - as = get_u32(src); - if (as > 0xFFFF) - { - as = AS_TRANS; - *new_used = 1; - } - put_u16(dst, as); - src += 4; - dst += 2; - } + return dst - dst0; +} + +int +as_path_contains_as4(const struct adata *path) +{ + const byte *pos = path->data; + const byte *end = pos + path->length; + uint i, n; + + while (pos < end) + { + n = pos[1]; + pos += 2; + + for (i = 0; i < n; i++) + { + if (get_as(pos) > 0xFFFF) + return 1; + + pos += BS; } + } - return dst - dst_start; + return 0; } int -as_path_convert_to_new(struct adata *path, byte *dst, int req_as) +as_path_contains_confed(const struct adata *path) { - byte *src = path->data; - byte *src_end = src + path->length; - byte *dst_start = dst; - u32 as; - int i, t, n; + const byte *pos = path->data; + const byte *end = pos + path->length; + + while (pos < end) + { + uint type = pos[0]; + uint slen = 2 + BS * pos[1]; + + if ((type == AS_PATH_CONFED_SEQUENCE) || + (type == AS_PATH_CONFED_SET)) + return 1; + + pos += slen; + } + return 0; +} + +struct adata * +as_path_strip_confed(struct linpool *pool, const struct adata *path) +{ + struct adata *res = lp_alloc_adata(pool, path->length); + const byte *src = path->data; + const byte *end = src + path->length; + byte *dst = res->data; + + while (src < end) + { + uint type = src[0]; + uint slen = 2 + BS * src[1]; - while ((src < src_end) && (req_as > 0)) + /* Copy regular segments */ + if ((type == AS_PATH_SET) || (type == AS_PATH_SEQUENCE)) { - t = *src++; - n = *src++; + memcpy(dst, src, slen); + dst += slen; + } - if (t == AS_PATH_SEQUENCE) - { - if (n > req_as) - n = req_as; + src += slen; + } - req_as -= n; - } - else // t == AS_PATH_SET - req_as--; + /* Fix the result length */ + res->length = dst - res->data; + + return res; +} - *dst++ = t; - *dst++ = n; +struct adata * +as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as) +{ + struct adata *np; + const byte *pos = op->data; + uint len = op->length; - for(i=0; i<n; i++) - { - as = get_u16(src); - put_u32(dst, as); - src += 2; - dst += 4; - } + if (len && (pos[0] == seq) && (pos[1] < 255)) + { + /* Starting with matching segment => just prepend the AS number */ + np = lp_alloc_adata(pool, len + BS); + np->data[0] = seq; + np->data[1] = pos[1] + 1; + put_as(np->data + 2, as); + + uint dlen = BS * pos[1]; + memcpy(np->data + 2 + BS, pos + 2, dlen); + ADVANCE(pos, len, 2 + dlen); + } + else + { + /* Create a new path segment */ + np = lp_alloc_adata(pool, len + 2 + BS); + np->data[0] = seq; + np->data[1] = 1; + put_as(np->data + 2, as); + } + + if (len) + { + byte *dst = np->data + 2 + BS * np->data[1]; + + memcpy(dst, pos, len); + } + + return np; +} + + +struct adata * +as_path_to_old(struct linpool *pool, const struct adata *path) +{ + struct adata *res = lp_alloc_adata(pool, path->length); + byte *pos = res->data; + byte *end = pos + res->length; + uint i, n; + u32 as; + + /* Copy the whole path */ + memcpy(res->data, path->data, path->length); + + /* Replace 32-bit AS numbers with AS_TRANS */ + while (pos < end) + { + n = pos[1]; + pos += 2; + + for (i = 0; i < n; i++) + { + as = get_as(pos); + if (as > 0xFFFF) + put_as(pos, AS_TRANS); + + pos += BS; } + } - return dst - dst_start; + return res; } +/* + * Cut the path to the length @num, measured to the usual path metric. Note that + * AS_CONFED_* segments have zero length and must be added if they are on edge. + * In contrast to other as_path_* functions, @path is modified in place. + */ void -as_path_format(struct adata *path, byte *buf, uint size) +as_path_cut(struct adata *path, uint num) { - byte *p = path->data; - byte *e = p + path->length; - byte *end = buf + size - 16; - int sp = 1; - int l, isset; + byte *pos = path->data; + byte *end = pos + path->length; - while (p < e) + while (pos < end) + { + uint t = pos[0]; + uint l = pos[1]; + uint n = 0; + + switch (t) { - if (buf > end) - { - strcpy(buf, " ..."); - return; - } - isset = (*p++ == AS_PATH_SET); - l = *p++; - if (isset) - { - if (!sp) - *buf++ = ' '; - *buf++ = '{'; - sp = 0; - } - while (l-- && buf <= end) - { - if (!sp) - *buf++ = ' '; - buf += bsprintf(buf, "%u", get_as(p)); - p += BS; - sp = 0; - } - if (isset) - { - *buf++ = ' '; - *buf++ = '}'; - sp = 0; - } + case AS_PATH_SET: n = 1; break; + case AS_PATH_SEQUENCE: n = l; break; + case AS_PATH_CONFED_SEQUENCE: n = 0; break; + case AS_PATH_CONFED_SET: n = 0; break; + default: bug("as_path_cut: Invalid path segment"); } - *buf = 0; + + /* Cannot add whole segment, so try partial one and finish */ + if (num < n) + { + if (num) + { + pos[1] = num; + pos += 2 + BS * num; + } + + break; + } + + num -= n; + pos += 2 + BS * l; + } + + path->length = pos - path->data; } -int -as_path_getlen(struct adata *path) +/* + * Merge (concatenate) paths @p1 and @p2 and return the result. + * In contrast to other as_path_* functions, @p1 and @p2 may be reused. + */ +struct adata * +as_path_merge(struct linpool *pool, struct adata *p1, struct adata *p2) { - return as_path_getlen_int(path, BS); + if (p1->length == 0) + return p2; + + if (p2->length == 0) + return p1; + + struct adata *res = lp_alloc_adata(pool, p1->length + p2->length); + memcpy(res->data, p1->data, p1->length); + memcpy(res->data + p1->length, p2->data, p2->length); + + return res; +} + +void +as_path_format(const struct adata *path, byte *bb, uint size) +{ + buffer buf = { .start = bb, .pos = bb, .end = bb + size }, *b = &buf; + const byte *pos = path->data; + const byte *end = pos + path->length; + const char *ops, *cls; + + b->pos[0] = 0; + + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + switch (type) + { + case AS_PATH_SET: ops = "{"; cls = "}"; break; + case AS_PATH_SEQUENCE: ops = NULL; cls = NULL; break; + case AS_PATH_CONFED_SEQUENCE: ops = "("; cls = ")"; break; + case AS_PATH_CONFED_SET: ops = "({"; cls = "})"; break; + default: bug("Invalid path segment"); + } + + if (ops) + buffer_puts(b, ops); + + while (len--) + { + buffer_print(b, len ? "%u " : "%u", get_as(pos)); + pos += BS; + } + + if (cls) + buffer_puts(b, cls); + + if (pos < end) + buffer_puts(b, " "); + } + + /* Handle overflow */ + if (b->pos == b->end) + strcpy(b->end - 12, "..."); } int -as_path_getlen_int(struct adata *path, int bs) +as_path_getlen(const struct adata *path) { - int res = 0; - u8 *p = path->data; - u8 *q = p+path->length; - int len; + const byte *pos = path->data; + const byte *end = pos + path->length; + uint res = 0; - while (p<q) + while (pos < end) + { + uint t = pos[0]; + uint l = pos[1]; + uint n = 0; + + switch (t) { - switch (*p++) - { - case AS_PATH_SET: len = *p++; res++; p += bs * len; break; - case AS_PATH_SEQUENCE: len = *p++; res += len; p += bs * len; break; - default: bug("as_path_getlen: Invalid path segment"); - } + case AS_PATH_SET: n = 1; break; + case AS_PATH_SEQUENCE: n = l; break; + case AS_PATH_CONFED_SEQUENCE: n = 0; break; + case AS_PATH_CONFED_SET: n = 0; break; + default: bug("as_path_getlen: Invalid path segment"); } + + res += n; + pos += 2 + BS * l; + } + return res; } int -as_path_get_last(struct adata *path, u32 *orig_as) +as_path_get_last(const struct adata *path, u32 *orig_as) { + const byte *pos = path->data; + const byte *end = pos + path->length; int found = 0; - u32 res = 0; - u8 *p = path->data; - u8 *q = p+path->length; - int len; + u32 val = 0; - while (p<q) + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + if (!len) + continue; + + switch (type) { - switch (*p++) - { - case AS_PATH_SET: - if (len = *p++) - { - found = 0; - p += BS * len; - } - break; - case AS_PATH_SEQUENCE: - if (len = *p++) - { - found = 1; - res = get_as(p + BS * (len - 1)); - p += BS * len; - } - break; - default: bug("Invalid path segment"); - } + case AS_PATH_SET: + case AS_PATH_CONFED_SET: + found = 0; + break; + + case AS_PATH_SEQUENCE: + case AS_PATH_CONFED_SEQUENCE: + val = get_as(pos + BS * (len - 1)); + found = 1; + break; + + default: + bug("Invalid path segment"); } + pos += BS * len; + } + if (found) - *orig_as = res; + *orig_as = val; return found; } u32 -as_path_get_last_nonaggregated(struct adata *path) +as_path_get_last_nonaggregated(const struct adata *path) { - u8 *p = path->data; - u8 *q = p+path->length; - u32 res = 0; - int len; + const byte *pos = path->data; + const byte *end = pos + path->length; + u32 val = 0; - while (p<q) + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + if (!len) + continue; + + switch (type) { - switch (*p++) - { - case AS_PATH_SET: - return res; + case AS_PATH_SET: + case AS_PATH_CONFED_SET: + return val; - case AS_PATH_SEQUENCE: - if (len = *p++) - res = get_as(p + BS * (len - 1)); - p += BS * len; - break; + case AS_PATH_SEQUENCE: + case AS_PATH_CONFED_SEQUENCE: + val = get_as(pos + BS * (len - 1)); + break; - default: bug("Invalid path segment"); - } + default: + bug("Invalid path segment"); } - return res; -} + pos += BS * len; + } + return val; +} int -as_path_get_first(struct adata *path, u32 *last_as) +as_path_get_first(const struct adata *path, u32 *last_as) { - u8 *p = path->data; + const u8 *p = path->data; if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) return 0; - else + + *last_as = get_as(p+2); + return 1; +} + +int +as_path_get_first_regular(const struct adata *path, u32 *last_as) +{ + const byte *pos = path->data; + const byte *end = pos + path->length; + + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; + + switch (type) { - *last_as = get_as(p+2); + case AS_PATH_SET: + return 0; + + case AS_PATH_SEQUENCE: + if (len == 0) + return 0; + + *last_as = get_as(pos); return 1; + + case AS_PATH_CONFED_SEQUENCE: + case AS_PATH_CONFED_SET: + break; + + default: + bug("Invalid path segment"); } + + pos += BS * len; + } + + return 0; } int -as_path_contains(struct adata *path, u32 as, int min) +as_path_contains(const struct adata *path, u32 as, int min) { - u8 *p = path->data; - u8 *q = p+path->length; + const u8 *p = path->data; + const u8 *q = p+path->length; int num = 0; int i, n; @@ -296,10 +561,10 @@ as_path_contains(struct adata *path, u32 as, int min) } int -as_path_match_set(struct adata *path, struct f_tree *set) +as_path_match_set(const struct adata *path, struct f_tree *set) { - u8 *p = path->data; - u8 *q = p+path->length; + const u8 *p = path->data; + const u8 *q = p+path->length; int i, n; while (p<q) @@ -325,8 +590,8 @@ as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 return NULL; int len = path->length; - u8 *p = path->data; - u8 *q = path->data + len; + const u8 *p = path->data; + const u8 *q = path->data + len; u8 *d, *d2; int i, bt, sn, dn; u8 buf[len]; @@ -388,51 +653,57 @@ struct pm_pos u8 mark; union { - char *sp; + const char *sp; u32 asn; } val; }; static int -parse_path(struct adata *path, struct pm_pos *pos) +parse_path(const struct adata *path, struct pm_pos *pp) { - u8 *p = path->data; - u8 *q = p + path->length; - struct pm_pos *opos = pos; - int i, len; + const byte *pos = path->data; + const byte *end = pos + path->length; + struct pm_pos *op = pp; + uint i; + while (pos < end) + { + uint type = pos[0]; + uint len = pos[1]; + pos += 2; - while (p < q) - switch (*p++) + switch (type) + { + case AS_PATH_SET: + case AS_PATH_CONFED_SET: + pp->set = 1; + pp->mark = 0; + pp->val.sp = pos - 1; + pp++; + + pos += BS * len; + break; + + case AS_PATH_SEQUENCE: + case AS_PATH_CONFED_SEQUENCE: + for (i = 0; i < len; i++) { - case AS_PATH_SET: - pos->set = 1; - pos->mark = 0; - pos->val.sp = p; - len = *p; - p += 1 + BS * len; - pos++; - break; - - case AS_PATH_SEQUENCE: - len = *p++; - for (i = 0; i < len; i++) - { - pos->set = 0; - pos->mark = 0; - pos->val.asn = get_as(p); - p += BS; - pos++; - } - break; - - default: - bug("as_path_match: Invalid path component"); + pp->set = 0; + pp->mark = 0; + pp->val.asn = get_as(pos); + pp++; + + pos += BS; } - - return pos - opos; -} + break; + + default: + bug("Invalid path segment"); + } + } + return pp - op; +} static int pm_match(struct pm_pos *pos, u32 asn, u32 asn2) @@ -441,7 +712,7 @@ pm_match(struct pm_pos *pos, u32 asn, u32 asn2) if (! pos->set) return ((pos->val.asn >= asn) && (pos->val.asn <= asn2)); - u8 *p = pos->val.sp; + const u8 *p = pos->val.sp; int len = *p++; int i; @@ -463,7 +734,7 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) if (pos[i].set) pos[i].mark = 1; - + for (j = i + 1; (j < plen) && pos[j].set && (! pos[j].mark); j++) pos[j].mark = 1; pos[j].mark = 1; @@ -478,7 +749,7 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) } /* AS path matching is nontrivial. Because AS path can - * contain sets, it is not a plain wildcard matching. A set + * contain sets, it is not a plain wildcard matching. A set * in an AS path is interpreted as it might represent any * sequence of AS numbers from that set (possibly with * repetitions). So it is also a kind of a pattern, @@ -499,9 +770,8 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) * (auxiliary position after last real position in AS path) * is marked. */ - int -as_path_match(struct adata *path, struct f_path_mask *mask) +as_path_match(const struct adata *path, struct f_path_mask *mask) { struct pm_pos pos[2048 + 1]; int plen = parse_path(path, pos); @@ -517,7 +787,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) l = h = 0; pos[0].mark = 1; - + while (mask) { /* We remove this mark to not step after pos[plen] */ @@ -539,7 +809,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) case PM_ASN_RANGE: val = mask->val; val2 = mask->val2; - goto step; + goto step; case PM_QUESTION: step: nh = nl = -1; diff --git a/nest/a-path_test.c b/nest/a-path_test.c new file mode 100644 index 00000000..a71b48ba --- /dev/null +++ b/nest/a-path_test.c @@ -0,0 +1,220 @@ +/* + * BIRD -- Path Operations Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "nest/route.h" +#include "nest/attrs.h" +#include "lib/resource.h" + +#define TESTS_NUM 30 +#define AS_PATH_LENGTH 1000 + +#if AS_PATH_LENGTH > AS_PATH_MAXLEN +#warning "AS_PATH_LENGTH should be <= AS_PATH_MAXLEN" +#endif + +static int +t_as_path_match(void) +{ + resource_init(); + + int round; + for (round = 0; round < TESTS_NUM; round++) + { + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + u32 first_prepended, last_prepended; + first_prepended = last_prepended = 0; + struct linpool *lp = lp_new_default(&root_pool); + + struct f_path_mask mask[AS_PATH_LENGTH] = {}; + int i; + for (i = 0; i < AS_PATH_LENGTH; i++) + { + u32 val = bt_random(); + as_path = as_path_prepend(lp, as_path, val); + bt_debug("Prepending ASN: %10u \n", val); + + if (i == 0) + first_prepended = val; + if (i == AS_PATH_LENGTH-1) + last_prepended = val; + + mask[i].kind = PM_ASN; + mask[i].val = val; + if (i) + mask[i].next = &mask[i-1]; + } + + bt_assert_msg(as_path_match(as_path, &mask[AS_PATH_LENGTH-1]), "Mask should match with AS path"); + + u32 asn; + + bt_assert(as_path_get_first(as_path, &asn)); + bt_assert_msg(asn == last_prepended, "as_path_get_first() should return the last prepended ASN"); + + bt_assert(as_path_get_last(as_path, &asn)); + bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); + + rfree(lp); + } + + return 1; +} + +static int +t_path_format(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + struct linpool *lp = lp_new_default(&root_pool); + + uint i; + for (i = 4294967285; i <= 4294967294; i++) + { + as_path = as_path_prepend(lp, as_path, i); + bt_debug("Prepending ASN: %10u \n", i); + } + +#define BUFFER_SIZE 120 + byte buf[BUFFER_SIZE] = {}; + + as_path_format(&empty_as_path, buf, BUFFER_SIZE); + bt_assert_msg(strcmp(buf, "") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); + + as_path_format(as_path, buf, BUFFER_SIZE); + bt_assert_msg(strcmp(buf, "4294967294 4294967293 4294967292 4294967291 4294967290 4294967289 4294967288 4294967287 4294967286 4294967285") == 0, "Buffer(%zu): '%s'", strlen(buf), buf); + +#define SMALL_BUFFER_SIZE 25 + byte buf2[SMALL_BUFFER_SIZE] = {}; + as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); + bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); + + rfree(lp); + + return 1; +} + +static int +count_asn_in_array(const u32 *array, u32 asn) +{ + int counts_of_contains = 0; + int u; + for (u = 0; u < AS_PATH_LENGTH; u++) + if (array[u] == asn) + counts_of_contains++; + return counts_of_contains; +} + +static int +t_path_include(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + struct linpool *lp = lp_new_default(&root_pool); + + u32 as_nums[AS_PATH_LENGTH] = {}; + int i; + for (i = 0; i < AS_PATH_LENGTH; i++) + { + u32 val = bt_random(); + as_nums[i] = val; + as_path = as_path_prepend(lp, as_path, val); + } + + for (i = 0; i < AS_PATH_LENGTH; i++) + { + int counts_of_contains = count_asn_in_array(as_nums, as_nums[i]); + bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); + + bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 0) != NULL); + bt_assert(as_path_filter(lp, as_path, NULL, as_nums[i], 1) != NULL); + } + + for (i = 0; i < 10000; i++) + { + u32 test_val = bt_random(); + int counts_of_contains = count_asn_in_array(as_nums, test_val); + int result = as_path_contains(as_path, test_val, (counts_of_contains == 0 ? 1 : counts_of_contains)); + + if (counts_of_contains) + bt_assert_msg(result, "As path should contain %d-times the number %u", counts_of_contains, test_val); + else + bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); + } + + rfree(lp); + + return 1; +} + +#if 0 +static int +t_as_path_converting(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + struct adata *as_path = &empty_as_path; + struct linpool *lp = lp_new_default(&root_pool); +#define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 + + int i; + for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) + as_path = as_path_prepend(lp, as_path, i); + + bt_debug("data length: %u \n", as_path->length); + + byte buffer[100] = {}; + int used_size = as_path_convert_to_new(as_path, buffer, AS_PATH_LENGTH_FOR_CONVERTING_TEST-1); + bt_debug("as_path_convert_to_new: len %d \n%s\n", used_size, buffer); + for (i = 0; i < used_size; i++) + { + bt_debug("\\03%d", buffer[i]); + } + bt_debug("\n"); + bt_assert(memcmp(buffer, + "\032\039\030\030\030\030\030\030\030\039\030\030\030\030\030\030\030\038\030\030\030\030\030\030" + "\030\037\030\030\030\030\030\030\030\036\030\030\030\030", + 38)); + + bzero(buffer, sizeof(buffer)); + int new_used; + used_size = as_path_convert_to_old(as_path, buffer, &new_used); + bt_debug("as_path_convert_to_old: len %d, new_used: %d \n", used_size, new_used); + for (i = 0; i < used_size; i++) + { + bt_debug("\\03%d", buffer[i]); + } + bt_debug("\n"); + bt_assert(memcmp(buffer, + "\032\0310\030\039\030\038\030\037\030\036\030\035\030\034\030\033\030\032\030\031\030\030", + 22)); + + return 1; +} +#endif + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); + bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); + bt_test_suite(t_path_include, "Testing including a AS number in AS path"); + // bt_test_suite(t_as_path_converting, "Testing as_path_convert_to_*() output constancy"); + + return bt_exit_value(); +} diff --git a/nest/a-set.c b/nest/a-set.c index a6c07f45..048e522d 100644 --- a/nest/a-set.c +++ b/nest/a-set.c @@ -7,6 +7,8 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +#include <stdlib.h> + #include "nest/bird.h" #include "nest/route.h" #include "nest/attrs.h" @@ -474,3 +476,92 @@ lc_set_union(struct linpool *pool, struct adata *l1, struct adata *l2) memcpy(res->data + l1->length, tmp, len); return res; } + + +struct adata * +ec_set_del_nontrans(struct linpool *pool, struct adata *set) +{ + adata *res = lp_alloc_adata(pool, set->length); + u32 *src = int_set_get_data(set); + u32 *dst = int_set_get_data(res); + int len = int_set_get_size(set); + int i; + + /* Remove non-transitive communities (EC_TBIT set) */ + for (i = 0; i < len; i += 2) + { + if (src[i] & EC_TBIT) + continue; + + *dst++ = src[i]; + *dst++ = src[i+1]; + } + + res->length = ((byte *) dst) - res->data; + + return res; +} + +static int +int_set_cmp(const void *X, const void *Y) +{ + const u32 *x = X, *y = Y; + return (*x < *y) ? -1 : (*x > *y) ? 1 : 0; +} + +struct adata * +int_set_sort(struct linpool *pool, struct adata *src) +{ + struct adata *dst = lp_alloc_adata(pool, src->length); + memcpy(dst->data, src->data, src->length); + qsort(dst->data, dst->length / 4, 4, int_set_cmp); + return dst; +} + + +static int +ec_set_cmp(const void *X, const void *Y) +{ + u64 x = ec_get(X, 0); + u64 y = ec_get(Y, 0); + return (x < y) ? -1 : (x > y) ? 1 : 0; +} + +struct adata * +ec_set_sort(struct linpool *pool, struct adata *src) +{ + struct adata *dst = lp_alloc_adata(pool, src->length); + memcpy(dst->data, src->data, src->length); + qsort(dst->data, dst->length / 8, 8, ec_set_cmp); + return dst; +} + +void +ec_set_sort_x(struct adata *set) +{ + /* Sort in place */ + qsort(set->data, set->length / 8, 8, ec_set_cmp); +} + + +static int +lc_set_cmp(const void *X, const void *Y) +{ + const u32 *x = X, *y = Y; + if (x[0] != y[0]) + return (x[0] > y[0]) ? 1 : -1; + if (x[1] != y[1]) + return (x[1] > y[1]) ? 1 : -1; + if (x[2] != y[2]) + return (x[2] > y[2]) ? 1 : -1; + return 0; +} + +struct adata * +lc_set_sort(struct linpool *pool, struct adata *src) +{ + struct adata *dst = lp_alloc_adata(pool, src->length); + memcpy(dst->data, src->data, src->length); + qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); + return dst; +} diff --git a/nest/a-set_test.c b/nest/a-set_test.c new file mode 100644 index 00000000..a5081f9f --- /dev/null +++ b/nest/a-set_test.c @@ -0,0 +1,260 @@ +/* + * BIRD -- Set/Community-list Operations Tests + * + * (c) 2015 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "test/bt-utils.h" + +#include "lib/net.h" +#include "nest/route.h" +#include "nest/attrs.h" +#include "lib/resource.h" + +#define SET_SIZE 10 +static struct adata *set_sequence; /* <0; SET_SIZE) */ +static struct adata *set_sequence_same; /* <0; SET_SIZE) */ +static struct adata *set_sequence_higher; /* <SET_SIZE; 2*SET_SIZE) */ +static struct adata *set_random; + +#define BUFFER_SIZE 1000 +static byte buf[BUFFER_SIZE] = {}; + +#define SET_SIZE_FOR_FORMAT_OUTPUT 10 + +struct linpool *lp; + +enum set_type +{ + SET_TYPE_INT, + SET_TYPE_EC +}; + +static void +generate_set_sequence(enum set_type type) +{ + struct adata empty_as_path = {}; + set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; + lp = lp_new_default(&root_pool); + + int i; + for (i = 0; i < SET_SIZE; i++) + { + if (type == SET_TYPE_INT) + { + set_sequence = int_set_add(lp, set_sequence, i); + set_sequence_same = int_set_add(lp, set_sequence_same, i); + set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); + set_random = int_set_add(lp, set_random, bt_random()); + } + else if (type == SET_TYPE_EC) + { + set_sequence = ec_set_add(lp, set_sequence, i); + set_sequence_same = ec_set_add(lp, set_sequence_same, i); + set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); + set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); + } + else + bt_abort_msg("This should be unreachable"); + } +} + +/* + * SET INT TESTS + */ + +static int +t_set_int_contains(void) +{ + int i; + + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + bt_assert(int_set_get_size(set_sequence) == SET_SIZE); + + for (i = 0; i < SET_SIZE; i++) + bt_assert(int_set_contains(set_sequence, i)); + bt_assert(int_set_contains(set_sequence, -1) == 0); + bt_assert(int_set_contains(set_sequence, SET_SIZE) == 0); + + int *data = int_set_get_data(set_sequence); + for (i = 0; i < SET_SIZE; i++) + bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); + + rfree(lp); + return 1; +} + +static int +t_set_int_union(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + struct adata *set_union; + set_union = int_set_union(lp, set_sequence, set_sequence_same); + bt_assert(int_set_get_size(set_union) == SET_SIZE); + bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); + + set_union = int_set_union(lp, set_sequence, set_sequence_higher); + bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); + bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); + + rfree(lp); + return 1; +} + +static int +t_set_int_format(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + set_sequence->length = 4 * SET_SIZE_FOR_FORMAT_OUTPUT; /* dirty */ + bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); + bt_assert(strcmp(buf, "0.0.0.0 0.0.0.1 0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); + + bzero(buf, BUFFER_SIZE); + bt_assert(int_set_format(set_sequence, 0, 2, buf, BUFFER_SIZE) == 0); + bt_assert(strcmp(buf, "0.0.0.2 0.0.0.3 0.0.0.4 0.0.0.5 0.0.0.6 0.0.0.7 0.0.0.8 0.0.0.9") == 0); + + bzero(buf, BUFFER_SIZE); + bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); + bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); + + rfree(lp); + return 1; +} + +static int +t_set_int_delete(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_INT); + + struct adata *deleting_sequence = set_sequence; + u32 i; + for (i = 0; i < SET_SIZE; i++) + { + deleting_sequence = int_set_del(lp, deleting_sequence, i); + bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), + "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", + int_set_get_size(deleting_sequence), + SET_SIZE-1-i); + } + + bt_assert(int_set_get_size(set_sequence) == SET_SIZE); + + return 1; +} + +/* + * SET EC TESTS + */ + +static int +t_set_ec_contains(void) +{ + u32 i; + + resource_init(); + generate_set_sequence(SET_TYPE_EC); + + bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); + + for (i = 0; i < SET_SIZE; i++) + bt_assert(ec_set_contains(set_sequence, i)); + bt_assert(ec_set_contains(set_sequence, -1) == 0); + bt_assert(ec_set_contains(set_sequence, SET_SIZE) == 0); + +// int *data = ec_set_get_data(set_sequence); +// for (i = 0; i < SET_SIZE; i++) +// bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); + + rfree(lp); + return 1; +} + +static int +t_set_ec_union(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_EC); + + struct adata *set_union; + set_union = ec_set_union(lp, set_sequence, set_sequence_same); + bt_assert(ec_set_get_size(set_union) == SET_SIZE); + bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); + + set_union = ec_set_union(lp, set_sequence, set_sequence_higher); + bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); + bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); + + rfree(lp); + return 1; +} + +static int +t_set_ec_format(void) +{ + resource_init(); + + struct adata empty_as_path = {}; + set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; + lp = lp_new_default(&root_pool); + + u64 i = 0; + set_sequence = ec_set_add(lp, set_sequence, i); + for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) + set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); + + bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); + bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, + "ec_set_format() returns '%s'", buf); + + rfree(lp); + return 1; +} + +static int +t_set_ec_delete(void) +{ + resource_init(); + generate_set_sequence(SET_TYPE_EC); + + struct adata *deleting_sequence = set_sequence; + u32 i; + for (i = 0; i < SET_SIZE; i++) + { + deleting_sequence = ec_set_del(lp, deleting_sequence, i); + bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), + "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", + ec_set_get_size(deleting_sequence), SET_SIZE-1-i); + } + + bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); + + return 1; +} + +int +main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_set_int_contains, "Testing sets of integers: contains, get_data"); + bt_test_suite(t_set_int_format, "Testing sets of integers: format"); + bt_test_suite(t_set_int_union, "Testing sets of integers: union"); + bt_test_suite(t_set_int_delete, "Testing sets of integers: delete"); + + bt_test_suite(t_set_ec_contains, "Testing sets of Extended Community values: contains, get_data"); + bt_test_suite(t_set_ec_format, "Testing sets of Extended Community values: format"); + bt_test_suite(t_set_ec_union, "Testing sets of Extended Community values: union"); + bt_test_suite(t_set_ec_delete, "Testing sets of Extended Community values: delete"); + + return bt_exit_value(); +} diff --git a/nest/attrs.h b/nest/attrs.h index a34e64d3..102f378a 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -10,6 +10,9 @@ #define _BIRD_ATTRS_H_ #include <stdint.h> +#include "lib/unaligned.h" +#include "nest/route.h" + /* a-path.c */ @@ -27,19 +30,30 @@ struct f_tree; -struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as); -int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used); -int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); -void as_path_format(struct adata *path, byte *buf, uint size); -int as_path_getlen(struct adata *path); -int as_path_getlen_int(struct adata *path, int bs); -int as_path_get_first(struct adata *path, u32 *orig_as); -int as_path_get_last(struct adata *path, u32 *last_as); -u32 as_path_get_last_nonaggregated(struct adata *path); -int as_path_contains(struct adata *path, u32 as, int min); -int as_path_match_set(struct adata *path, struct f_tree *set); +int as_path_valid(byte *data, uint len, int bs, int confed, char *err, uint elen); +int as_path_16to32(byte *dst, byte *src, uint len); +int as_path_32to16(byte *dst, byte *src, uint len); +int as_path_contains_as4(const struct adata *path); +int as_path_contains_confed(const struct adata *path); +struct adata *as_path_strip_confed(struct linpool *pool, const struct adata *op); +struct adata *as_path_prepend2(struct linpool *pool, const struct adata *op, int seq, u32 as); +struct adata *as_path_to_old(struct linpool *pool, const struct adata *path); +void as_path_cut(struct adata *path, uint num); +struct adata *as_path_merge(struct linpool *pool, struct adata *p1, struct adata *p2); +void as_path_format(const struct adata *path, byte *buf, uint size); +int as_path_getlen(const struct adata *path); +int as_path_getlen_int(const struct adata *path, int bs); +int as_path_get_first(const struct adata *path, u32 *orig_as); +int as_path_get_first_regular(const struct adata *path, u32 *last_as); +int as_path_get_last(const struct adata *path, u32 *last_as); +u32 as_path_get_last_nonaggregated(const struct adata *path); +int as_path_contains(const struct adata *path, u32 as, int min); +int as_path_match_set(const struct adata *path, struct f_tree *set); struct adata *as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos); +static inline struct adata *as_path_prepend(struct linpool *pool, const struct adata *path, u32 as) +{ return as_path_prepend2(pool, path, AS_PATH_SEQUENCE, as); } + #define PM_ASN 0 #define PM_QUESTION 1 @@ -54,7 +68,42 @@ struct f_path_mask { uintptr_t val2; }; -int as_path_match(struct adata *path, struct f_path_mask *mask); +int as_path_match(const struct adata *path, struct f_path_mask *mask); + + +/* Counterparts to appropriate as_path_* functions */ + +static inline int +aggregator_16to32(byte *dst, byte *src) +{ + put_u32(dst, get_u16(src)); + memcpy(dst+4, src+2, 4); + return 8; +} + +static inline int +aggregator_32to16(byte *dst, byte *src) +{ + put_u16(dst, get_u32(src)); + memcpy(dst+2, src+4, 4); + return 6; +} + +static inline int +aggregator_contains_as4(struct adata *a) +{ + return get_u32(a->data) > 0xFFFF; +} + +static inline struct adata * +aggregator_to_old(struct linpool *pool, struct adata *a) +{ + struct adata *d = lp_alloc_adata(pool, 8); + put_u32(d->data, 0xFFFF); + memcpy(d->data + 4, a->data + 4, 4); + return d; +} + /* a-set.c */ @@ -143,5 +192,11 @@ struct adata *int_set_union(struct linpool *pool, struct adata *l1, struct adata struct adata *ec_set_union(struct linpool *pool, struct adata *l1, struct adata *l2); struct adata *lc_set_union(struct linpool *pool, struct adata *l1, struct adata *l2); +struct adata *ec_set_del_nontrans(struct linpool *pool, struct adata *set); +struct adata *int_set_sort(struct linpool *pool, struct adata *src); +struct adata *ec_set_sort(struct linpool *pool, struct adata *src); +struct adata *lc_set_sort(struct linpool *pool, struct adata *src); + +void ec_set_sort_x(struct adata *set); /* Sort in place */ #endif @@ -46,7 +46,7 @@ static inline void cf_check_bfd(int use UNUSED) { } #else -static inline struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, void (*hook)(struct bfd_request *), void *data) { return NULL; } +static inline struct bfd_request * bfd_request_session(pool *p UNUSED, ip_addr addr UNUSED, ip_addr local UNUSED, struct iface *iface UNUSED, void (*hook)(struct bfd_request *) UNUSED, void *data UNUSED) { return NULL; } static inline void cf_check_bfd(int use) { if (use) cf_error("BFD not available"); } diff --git a/nest/bird.h b/nest/bird.h index 3c7d749b..55712abe 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -12,5 +12,6 @@ #include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" +#include "lib/net.h" #endif @@ -60,7 +60,7 @@ * the new one. When the consumer processes everything in the buffer * queue, it calls cli_written(), tha frees all buffers (except the * first one) and schedules cli.event . - * + * */ #include "nest/bird.h" @@ -136,7 +136,7 @@ cli_printf(cli *c, int code, char *msg, ...) } else if (cd == CLI_ASYNC_CODE) { - size = 1; buf[0] = '+'; + size = 1; buf[0] = '+'; errcode = cd; } else @@ -316,7 +316,8 @@ cli_new(void *priv) c->event->hook = cli_event; c->event->data = c; c->cont = cli_hello; - c->parser_pool = lp_new(c->pool, 4096); + c->parser_pool = lp_new_default(c->pool); + c->show_pool = lp_new_default(c->pool); c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE); ev_schedule(c->event); return c; @@ -38,6 +38,7 @@ typedef struct cli { int last_reply; int restricted; /* CLI is restricted to read-only commands */ struct linpool *parser_pool; /* Pool used during parsing */ + struct linpool *show_pool; /* Pool used during route show */ byte *ring_buf; /* Ring buffer for asynchronous messages */ byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */ uint ring_overflow; /* Counter of ring overflows */ diff --git a/nest/cmds.c b/nest/cmds.c index 0bc9b9d1..ca601ef2 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -25,12 +25,12 @@ cmd_show_status(void) byte tim[TM_DATETIME_BUFFER_SIZE]; cli_msg(-1000, "BIRD " BIRD_VERSION); - tm_format_datetime(tim, &config->tf_base, now); + tm_format_time(tim, &config->tf_base, current_time()); cli_msg(-1011, "Router ID is %R", config->router_id); cli_msg(-1011, "Current server time is %s", tim); - tm_format_datetime(tim, &config->tf_base, boot_time); + tm_format_time(tim, &config->tf_base, boot_time); cli_msg(-1011, "Last reboot on %s", tim); - tm_format_datetime(tim, &config->tf_base, config->load_time); + tm_format_time(tim, &config->tf_base, config->load_time); cli_msg(-1011, "Last reconfiguration on %s", tim); graceful_restart_show_status(); @@ -82,8 +82,6 @@ print_size(char *dsc, size_t val) extern pool *rt_table_pool; extern pool *rta_pool; -extern pool *roa_pool; -extern pool *proto_pool; void cmd_show_memory(void) @@ -91,7 +89,6 @@ cmd_show_memory(void) cli_msg(-1018, "BIRD memory usage"); print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); - print_size("ROA tables:", rmemsize(roa_pool)); print_size("Protocols:", rmemsize(proto_pool)); print_size("Total:", rmemsize(&root_pool)); cli_msg(0, ""); diff --git a/nest/config.Y b/nest/config.Y index 358c7745..ab09a10c 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -18,9 +18,10 @@ CF_HDR CF_DEFINES static struct proto_config *this_proto; +static struct channel_config *this_channel; static struct iface_patt *this_ipatt; static struct iface_patt_node *this_ipn; -static struct roa_table_config *this_roa_table; +/* static struct roa_table_config *this_roa_table; */ static list *this_p_list; static struct password_item *this_p_item; static int password_id; @@ -31,7 +32,7 @@ iface_patt_check(void) struct iface_patt_node *pn; WALK_LIST(pn, this_ipatt->ipn_list) - if (!pn->pattern || pn->pxlen) + if (!pn->pattern || pn->prefix.type) cf_error("Interface name/mask expected, not IP prefix"); } @@ -50,25 +51,38 @@ get_passwords(void) return rv; } +static void +proto_postconfig(void) +{ + CALL(this_proto->protocol->postconfig, this_proto); + this_channel = NULL; + this_proto = NULL; +} + + #define DIRECT_CFG ((struct rt_dev_config *) this_proto) CF_DECLS CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, TABLE, STATES, ROUTES, FILTERS) +CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED) CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, NOEXPORT, GENERATE, ROA) -CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) +CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, GENERATE) +CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +/* For r_args_channel */ +CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) + CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST) -CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH) +CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <i32> idval @@ -76,12 +90,14 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <r> rtable %type <s> optsym %type <ra> r_args -%type <ro> roa_args -%type <rot> roa_table_arg %type <sd> sym_args -%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode roa_mode limit_action tab_sorted tos password_algorithm +%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type table_sorted tos password_algorithm %type <ps> proto_patt proto_patt2 -%type <g> limit_spec +%type <cc> channel_start proto_channel +%type <cl> limit_spec +%type <net> r_args_for_val +%type <net_ptr> r_args_for +%type <t> r_args_channel CF_GRAMMAR @@ -97,86 +113,60 @@ rtrid: idval: NUM { $$ = $1; } | '(' term ')' { $$ = f_eval_int($2); } - | RTRID - | IPA { -#ifndef IPV6 - $$ = ipa_to_u32($1); -#else - cf_error("Router IDs must be entered as hexadecimal numbers or IPv4 addresses in IPv6 version"); -#endif - } + | IP4 { $$ = ip4_to_u32($1); } | SYM { if ($1->class == (SYM_CONSTANT | T_INT) || $1->class == (SYM_CONSTANT | T_QUAD)) $$ = SYM_VAL($1).i; -#ifndef IPV6 - else if ($1->class == (SYM_CONSTANT | T_IP)) - $$ = ipa_to_u32(SYM_VAL($1).px.ip); -#endif + else if (($1->class == (SYM_CONSTANT | T_IP)) && ipa_is_ip4(SYM_VAL($1).ip)) + $$ = ipa_to_u32(SYM_VAL($1).ip); else cf_error("Number or IPv4 address constant expected"); } ; +CF_ADDTO(conf, gr_opts) -CF_ADDTO(conf, listen) +gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ; -listen: LISTEN BGP listen_opts ';' ; -listen_opts: - /* Nothing */ - | listen_opts listen_opt - ; +/* Network types (for tables, channels) */ -listen_opt: - ADDRESS ipa { new_config->listen_bgp_addr = $2; } - | PORT expr { new_config->listen_bgp_port = $2; } - | V6ONLY { new_config->listen_bgp_flags = 0; } - | DUAL { new_config->listen_bgp_flags = 1; } +net_type: + IPV4 { $$ = NET_IP4; } + | IPV6 { $$ = NET_IP6; } + | IPV6 SADR { $$ = NET_IP6_SADR; } + | VPN4 { $$ = NET_VPN4; } + | VPN6 { $$ = NET_VPN6; } + | ROA4 { $$ = NET_ROA4; } + | ROA6 { $$ = NET_ROA6; } + | FLOW4{ $$ = NET_FLOW4; } + | FLOW6{ $$ = NET_FLOW6; } + | MPLS { $$ = NET_MPLS; } ; - -CF_ADDTO(conf, gr_opts) - -gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ; +CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, IP6_SADR) /* Creation of routing tables */ -tab_sorted: - { $$ = 0; } +CF_ADDTO(conf, table) + +table_sorted: + { $$ = 0; } | SORTED { $$ = 1; } ; -CF_ADDTO(conf, newtab) - -newtab: TABLE SYM tab_sorted { +table: net_type TABLE SYM table_sorted { struct rtable_config *cf; - cf = rt_new_table($2); - cf->sorted = $3; - } - ; - -CF_ADDTO(conf, roa_table) - -roa_table_start: ROA TABLE SYM { - this_roa_table = roa_new_table_config($3); -}; - -roa_table_opts: - /* empty */ - | roa_table_opts ROA prefix MAX NUM AS NUM ';' { - roa_add_item_config(this_roa_table, $3.addr, $3.len, $5, $7); + cf = rt_new_table($3, $1); + cf->sorted = $4; } ; -roa_table: - roa_table_start - | roa_table_start '{' roa_table_opts '}' - ; /* Definition of protocols */ -CF_ADDTO(conf, proto) +CF_ADDTO(conf, proto { proto_postconfig(); }) proto_start: PROTOCOL { $$ = SYM_PROTO; } @@ -214,23 +204,61 @@ proto_name: proto_item: /* EMPTY */ - | PREFERENCE expr { - if ($2 < 0 || $2 > 0xFFFF) cf_error("Invalid preference"); - this_proto->preference = $2; - } | DISABLED bool { this_proto->disabled = $2; } | DEBUG debug_mask { this_proto->debug = $2; } | MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; } - | IMPORT imexport { this_proto->in_filter = $2; } - | EXPORT imexport { this_proto->out_filter = $2; } - | RECEIVE LIMIT limit_spec { this_proto->rx_limit = $3; } - | IMPORT LIMIT limit_spec { this_proto->in_limit = $3; } - | EXPORT LIMIT limit_spec { this_proto->out_limit = $3; } - | IMPORT KEEP FILTERED bool { this_proto->in_keep_filtered = $4; } - | VRF text { this_proto->vrf = if_get_by_name($2); } - | TABLE rtable { this_proto->table = $2; } | ROUTER ID idval { this_proto->router_id = $3; } | DESCRIPTION text { this_proto->dsc = $2; } + | VRF text { this_proto->vrf = if_get_by_name($2); } + ; + + +channel_start: net_type +{ + $$ = this_channel = channel_config_get(NULL, net_label[$1], $1, this_proto); +}; + +channel_item: + TABLE rtable { + if (this_channel->net_type && ($2->addr_type != this_channel->net_type)) + cf_error("Incompatible table type"); + this_channel->table = $2; + } + | IMPORT imexport { this_channel->in_filter = $2; } + | EXPORT imexport { this_channel->out_filter = $2; } + | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } + | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } + | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } + | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } + | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + ; + +channel_opts: + /* empty */ + | channel_opts channel_item ';' + ; + +channel_opt_list: + /* empty */ + | '{' channel_opts '}' + ; + +channel_end: +{ + if (!this_channel->table) + cf_error("Routing table not specified"); + + this_channel = NULL; +}; + +proto_channel: channel_start channel_opt_list channel_end; + + +rtable: + SYM { + if ($1->class != SYM_TABLE) cf_error("Table expected"); + $$ = $1->def; + } ; imexport: @@ -249,21 +277,10 @@ limit_action: ; limit_spec: - expr limit_action { - struct proto_limit *l = cfg_allocz(sizeof(struct proto_limit)); - l->limit = $1; - l->action = $2; - $$ = l; - } - | OFF { $$ = NULL; } + expr limit_action { $$ = (struct channel_limit){ .limit = $1, $$.action = $2 }; } + | OFF { $$ = (struct channel_limit){}; } ; -rtable: - SYM { - if ($1->class != SYM_TABLE) cf_error("Table name expected"); - $$ = $1->def; - } - ; CF_ADDTO(conf, debug_default) @@ -274,6 +291,31 @@ debug_default: /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ +CF_ADDTO(conf, timeformat_base) + +timeformat_which: + ROUTE { $$ = &new_config->tf_route; } + | PROTOCOL { $$ = &new_config->tf_proto; } + | BASE { $$ = &new_config->tf_base; } + | LOG { $$ = &new_config->tf_log; } + ; + +timeformat_spec: + timeformat_which TEXT { *$1 = (struct timeformat){$2, NULL, 0}; } + | timeformat_which TEXT expr TEXT { *$1 = (struct timeformat){$2, $4, (s64) $3 S_}; } + | timeformat_which ISO SHORT { *$1 = TM_ISO_SHORT_S; } + | timeformat_which ISO SHORT MS { *$1 = TM_ISO_SHORT_MS; } + | timeformat_which ISO SHORT US { *$1 = TM_ISO_SHORT_US; } + | timeformat_which ISO LONG { *$1 = TM_ISO_LONG_S; } + | timeformat_which ISO LONG MS { *$1 = TM_ISO_LONG_MS; } + | timeformat_which ISO LONG US { *$1 = TM_ISO_LONG_US; } + ; + +timeformat_base: + TIMEFORMAT timeformat_spec ';' + ; + + /* Interface patterns */ iface_patt_node_init: @@ -285,9 +327,8 @@ iface_patt_node_init: ; iface_patt_node_body: - TEXT { this_ipn->pattern = $1; this_ipn->prefix = IPA_NONE; this_ipn->pxlen = 0; } - | prefix_or_ipa { this_ipn->pattern = NULL; this_ipn->prefix = $1.addr; this_ipn->pxlen = $1.len; } - | TEXT prefix_or_ipa { this_ipn->pattern = $1; this_ipn->prefix = $2.addr; this_ipn->pxlen = $2.len; } + TEXT { this_ipn->pattern = $1; /* this_ipn->prefix stays zero */ } + | opttext net_or_ipa { this_ipn->pattern = $1; this_ipn->prefix = $2; } ; iface_negate: @@ -320,8 +361,8 @@ iface_patt: ; tos: - CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); } - | DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); } + CLASS expr { $$ = $2 & 0xfc; if ($2 > 255) cf_error("TX class must be in range 0-255"); } + | DSCP expr { $$ = ($2 & 0x3f) << 2; if ($2 > 63) cf_error("TX DSCP must be in range 0-63"); } ; /* Direct device route protocol */ @@ -337,6 +378,7 @@ dev_proto_start: proto_start DIRECT { dev_proto: dev_proto_start proto_name '{' | dev_proto proto_item ';' + | dev_proto proto_channel ';' | dev_proto dev_iface_patt ';' | dev_proto CHECK LINK bool ';' { DIRECT_CFG->check_link = $4; } ; @@ -413,9 +455,9 @@ password_item: password_item_begin: PASSWORD text { if (!this_p_list) { - this_p_list = cfg_alloc(sizeof(list)); - init_list(this_p_list); - password_id = 1; + this_p_list = cfg_alloc(sizeof(list)); + init_list(this_p_list); + password_id = 1; } this_p_item = cfg_alloc(sizeof (struct password_item)); this_p_item->password = $2; @@ -432,12 +474,12 @@ password_item_begin: password_item_params: /* empty */ { } - | GENERATE FROM datetime ';' password_item_params { this_p_item->genfrom = $3; } - | GENERATE TO datetime ';' password_item_params { this_p_item->gento = $3; } - | ACCEPT FROM datetime ';' password_item_params { this_p_item->accfrom = $3; } - | ACCEPT TO datetime ';' password_item_params { this_p_item->accto = $3; } - | FROM datetime ';' password_item_params { this_p_item->genfrom = this_p_item->accfrom = $2; } - | TO datetime ';' password_item_params { this_p_item->gento = this_p_item->accto = $2; } + | GENERATE FROM time ';' password_item_params { this_p_item->genfrom = $3; } + | GENERATE TO time ';' password_item_params { this_p_item->gento = $3; } + | ACCEPT FROM time ';' password_item_params { this_p_item->accfrom = $3; } + | ACCEPT TO time ';' password_item_params { this_p_item->accto = $3; } + | FROM time ';' password_item_params { this_p_item->genfrom = this_p_item->accfrom = $2; } + | TO time ';' password_item_params { this_p_item->gento = this_p_item->accto = $2; } | ID expr ';' password_item_params { this_p_item->id = $2; if ($2 <= 0) cf_error("Password ID has to be greated than zero."); } | ALGORITHM password_algorithm ';' password_item_params { this_p_item->alg = $2; } ; @@ -488,26 +530,32 @@ CF_CLI(SHOW ROUTE, r_args, [[[<prefix>|for <prefix>|for <ip>] [table <t>] [filte r_args: /* empty */ { $$ = cfg_allocz(sizeof(struct rt_show_data)); - $$->pxlen = 256; + init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; } - | r_args prefix { + | r_args net_any { $$ = $1; - if ($$->pxlen != 256) cf_error("Only one prefix expected"); - $$->prefix = $2.addr; - $$->pxlen = $2.len; + if ($$->addr) cf_error("Only one prefix expected"); + $$->addr = $2; } - | r_args FOR prefix_or_ipa { + | r_args FOR r_args_for { $$ = $1; - if ($$->pxlen != 256) cf_error("Only one prefix expected"); - $$->prefix = $3.addr; - $$->pxlen = $3.len; + if ($$->addr) cf_error("Only one prefix expected"); $$->show_for = 1; + $$->addr = $3; } | r_args TABLE SYM { $$ = $1; if ($3->class != SYM_TABLE) cf_error("%s is not a table", $3->name); - $$->table = ((struct rtable_config *)$3->def)->table; + rt_show_add_table($$, ((struct rtable_config *)$3->def)->table); + $$->tables_defined_by = RSD_TDB_DIRECT; + } + | r_args TABLE ALL { + struct rtable_config *t; + $$ = $1; + WALK_LIST(t, config->tables) + rt_show_add_table($$, t->table); + $$->tables_defined_by = RSD_TDB_ALL; } | r_args FILTER filter { $$ = $1; @@ -534,11 +582,23 @@ r_args: | r_args export_mode SYM { struct proto_config *c = (struct proto_config *) $3->def; $$ = $1; - if ($$->export_mode) cf_error("Protocol specified twice"); + if ($$->export_mode) cf_error("Export specified twice"); if ($3->class != SYM_PROTO || !c->proto) cf_error("%s is not a protocol", $3->name); $$->export_mode = $2; $$->export_protocol = c->proto; $$->running_on_config = c->proto->cf->global; + $$->tables_defined_by = RSD_TDB_INDIRECT; + } + | r_args export_mode SYM '.' r_args_channel { + struct proto_config *c = (struct proto_config *) $3->def; + $$ = $1; + if ($$->export_mode) cf_error("Export specified twice"); + if ($3->class != SYM_PROTO || !c->proto) cf_error("%s is not a protocol", $3->name); + $$->export_mode = $2; + $$->export_channel = proto_find_channel_by_name(c->proto, $5); + if (!$$->export_channel) cf_error("Export channel not found"); + $$->running_on_config = c->proto->cf->global; + $$->tables_defined_by = RSD_TDB_INDIRECT; } | r_args PROTOCOL SYM { struct proto_config *c = (struct proto_config *) $3->def; @@ -547,6 +607,7 @@ r_args: if ($3->class != SYM_PROTO || !c->proto) cf_error("%s is not a protocol", $3->name); $$->show_protocol = c->proto; $$->running_on_config = c->proto->cf->global; + $$->tables_defined_by = RSD_TDB_INDIRECT; } | r_args STATS { $$ = $1; @@ -558,52 +619,77 @@ r_args: } ; +r_args_for: + r_args_for_val { + $$ = cfg_alloc($1.length); + net_copy($$, &$1); + } + | net_vpn4_ + | net_vpn6_ + | net_ip6_sadr_ + | VPN_RD IP4 { + $$ = cfg_alloc(sizeof(net_addr_vpn4)); + net_fill_vpn4($$, $2, IP4_MAX_PREFIX_LENGTH, $1); + } + | VPN_RD IP6 { + $$ = cfg_alloc(sizeof(net_addr_vpn6)); + net_fill_vpn6($$, $2, IP6_MAX_PREFIX_LENGTH, $1); + } + | IP6 FROM IP6 { + $$ = cfg_alloc(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr($$, $1, IP6_MAX_PREFIX_LENGTH, $3, IP6_MAX_PREFIX_LENGTH); + } + | SYM { + if ($1->class == (SYM_CONSTANT | T_IP)) + { + $$ = cfg_alloc(ipa_is_ip4(SYM_VAL($1).ip) ? sizeof(net_addr_ip4) : sizeof(net_addr_ip6)); + net_fill_ip_host($$, SYM_VAL($1).ip); + } + else if (($1->class == (SYM_CONSTANT | T_NET)) && net_type_match(SYM_VAL($1).net, NB_IP | NB_VPN)) + $$ = (net_addr *) SYM_VAL($1).net; /* Avoid const warning */ + else + cf_error("IP address or network expected"); + } + ; + +r_args_for_val: + net_ip4_ + | net_ip6_ + | IP4 { net_fill_ip4(&($$), $1, IP4_MAX_PREFIX_LENGTH); } + | IP6 { net_fill_ip6(&($$), $1, IP6_MAX_PREFIX_LENGTH); } + export_mode: PREEXPORT { $$ = RSEM_PREEXPORT; } | EXPORT { $$ = RSEM_EXPORT; } | NOEXPORT { $$ = RSEM_NOEXPORT; } ; - -CF_CLI_HELP(SHOW ROA, ..., [[Show ROA table]]) -CF_CLI(SHOW ROA, roa_args, [<prefix> | in <prefix> | for <prefix>] [as <num>] [table <t>], [[Show ROA table]]) -{ roa_show($3); } ; - -roa_args: - /* empty */ { - $$ = cfg_allocz(sizeof(struct roa_show_data)); - $$->mode = ROA_SHOW_ALL; - $$->table = roa_table_default; - if (roa_table_default == NULL) - cf_error("No ROA table defined"); - } - | roa_args roa_mode prefix { - $$ = $1; - if ($$->mode != ROA_SHOW_ALL) cf_error("Only one prefix expected"); - $$->prefix = $3.addr; - $$->pxlen = $3.len; - $$->mode = $2; - } - | roa_args AS NUM { - $$ = $1; - $$->asn = $3; - } - | roa_args TABLE SYM { - $$ = $1; - if ($3->class != SYM_ROA) cf_error("%s is not a ROA table", $3->name); - $$->table = ((struct roa_table_config *)$3->def)->table; - } - ; - -roa_mode: - { $$ = ROA_SHOW_PX; } - | IN { $$ = ROA_SHOW_IN; } - | FOR { $$ = ROA_SHOW_FOR; } +/* This is ugly hack */ +r_args_channel: + IPV4 { $$ = "ipv4"; } + | IPV4_MC { $$ = "ipv4-mc"; } + | IPV4_MPLS { $$ = "ipv4-mpls"; } + | IPV6 { $$ = "ipv6"; } + | IPV6_MC { $$ = "ipv6-mc"; } + | IPV6_MPLS { $$ = "ipv6-mpls"; } + | IPV6_SADR { $$ = "ipv6-sadr"; } + | VPN4 { $$ = "vpn4"; } + | VPN4_MC { $$ = "vpn4-mc"; } + | VPN4_MPLS { $$ = "vpn4-mpls"; } + | VPN6 { $$ = "vpn6"; } + | VPN6_MC { $$ = "vpn6-mc"; } + | VPN6_MPLS { $$ = "vpn6-mpls"; } + | ROA4 { $$ = "roa4"; } + | ROA6 { $$ = "roa6"; } + | FLOW4 { $$ = "flow4"; } + | FLOW6 { $$ = "flow6"; } + | MPLS { $$ = "mpls"; } + | PRI { $$ = "pri"; } + | SEC { $$ = "sec"; } ; - CF_CLI_HELP(SHOW SYMBOLS, ..., [[Show all known symbolic names]]) -CF_CLI(SHOW SYMBOLS, sym_args, [table|filter|function|protocol|template|roa|<symbol>], [[Show all known symbolic names]]) +CF_CLI(SHOW SYMBOLS, sym_args, [table|filter|function|protocol|template|<symbol>], [[Show all known symbolic names]]) { cmd_show_symbols($3); } ; sym_args: @@ -615,46 +701,10 @@ sym_args: | sym_args FILTER { $$ = $1; $$->type = SYM_FILTER; } | sym_args PROTOCOL { $$ = $1; $$->type = SYM_PROTO; } | sym_args TEMPLATE { $$ = $1; $$->type = SYM_TEMPLATE; } - | sym_args ROA { $$ = $1; $$->type = SYM_ROA; } | sym_args SYM { $$ = $1; $$->sym = $2; } ; -roa_table_arg: - /* empty */ { - if (roa_table_default == NULL) - cf_error("No ROA table defined"); - $$ = roa_table_default; - } - | TABLE SYM { - if ($2->class != SYM_ROA) - cf_error("%s is not a ROA table", $2->name); - $$ = ((struct roa_table_config *)$2->def)->table; - } - ; - -CF_CLI_HELP(ADD, roa ..., [[Add ROA record]]) -CF_CLI(ADD ROA, prefix MAX NUM AS NUM roa_table_arg, <prefix> max <num> as <num> [table <name>], [[Add ROA record]]) -{ - if (! cli_access_restricted()) - { roa_add_item($8, $3.addr, $3.len, $5, $7, ROA_SRC_DYNAMIC); cli_msg(0, ""); } -}; - -CF_CLI_HELP(DELETE, roa ..., [[Delete ROA record]]) -CF_CLI(DELETE ROA, prefix MAX NUM AS NUM roa_table_arg, <prefix> max <num> as <num> [table <name>], [[Delete ROA record]]) -{ - if (! cli_access_restricted()) - { roa_delete_item($8, $3.addr, $3.len, $5, $7, ROA_SRC_DYNAMIC); cli_msg(0, ""); } -}; - -CF_CLI_HELP(FLUSH, roa [table <name>], [[Removes all dynamic ROA records]]) -CF_CLI(FLUSH ROA, roa_table_arg, [table <name>], [[Removes all dynamic ROA records]]) -{ - if (! cli_access_restricted()) - { roa_flush($3, ROA_SRC_DYNAMIC); cli_msg(0, ""); } -}; - - CF_CLI_HELP(DUMP, ..., [[Dump debugging information]]) CF_CLI(DUMP RESOURCES,,, [[Dump all allocated resource]]) { rdump(&root_pool); cli_msg(0, ""); } ; @@ -696,11 +746,11 @@ echo_size: } ; -CF_CLI(DISABLE, proto_patt text_or_none, (<protocol> | \"<pattern>\" | all) [message], [[Disable protocol]]) +CF_CLI(DISABLE, proto_patt opttext, (<protocol> | \"<pattern>\" | all) [message], [[Disable protocol]]) { proto_apply_cmd($2, proto_cmd_disable, 1, (uintptr_t) $3); } ; -CF_CLI(ENABLE, proto_patt text_or_none, (<protocol> | \"<pattern>\" | all) [message], [[Enable protocol]]) +CF_CLI(ENABLE, proto_patt opttext, (<protocol> | \"<pattern>\" | all) [message], [[Enable protocol]]) { proto_apply_cmd($2, proto_cmd_enable, 1, (uintptr_t) $3); } ; -CF_CLI(RESTART, proto_patt text_or_none, (<protocol> | \"<pattern>\" | all) [message], [[Restart protocol]]) +CF_CLI(RESTART, proto_patt opttext, (<protocol> | \"<pattern>\" | all) [message], [[Restart protocol]]) { proto_apply_cmd($2, proto_cmd_restart, 1, (uintptr_t) $3); } ; CF_CLI(RELOAD, proto_patt, <protocol> | \"<pattern>\" | all, [[Reload protocol]]) { proto_apply_cmd($2, proto_cmd_reload, 1, CMD_RELOAD); } ; diff --git a/nest/iface.c b/nest/iface.c index 3dd45065..a633f748 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -32,11 +32,14 @@ #include "lib/resource.h" #include "lib/string.h" #include "conf/conf.h" +#include "sysdep/unix/krt.h" static pool *if_pool; list iface_list; +static void if_recalc_preferred(struct iface *i); + /** * ifa_dump - dump interface address * @a: interface address descriptor @@ -46,10 +49,11 @@ list iface_list; void ifa_dump(struct ifa *a) { - debug("\t%I, net %I/%-2d bc %I -> %I%s%s%s\n", a->ip, a->prefix, a->pxlen, a->brd, a->opposite, - (a->flags & IF_UP) ? "" : " DOWN", - (a->flags & IA_PRIMARY) ? "" : " SEC", - (a->flags & IA_PEER) ? "PEER" : ""); + debug("\t%I, net %N bc %I -> %I%s%s%s%s\n", a->ip, &a->prefix, a->brd, a->opposite, + (a->flags & IA_PRIMARY) ? " PRIMARY" : "", + (a->flags & IA_SECONDARY) ? " SEC" : "", + (a->flags & IA_HOST) ? " HOST" : "", + (a->flags & IA_PEER) ? " PEER" : ""); } /** @@ -89,7 +93,8 @@ if_dump(struct iface *i) WALK_LIST(a, i->addrs) { ifa_dump(a); - ASSERT((a != i->addr) == !(a->flags & IA_PRIMARY)); + ASSERT(!!(a->flags & IA_PRIMARY) == + ((a == i->addr4) || (a == i->addr6) || (a == i->llv6))); } } @@ -140,12 +145,13 @@ if_copy(struct iface *to, struct iface *from) static inline void ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) { - if (p->ifa_notify && (!p->vrf || p->vrf == a->iface->master)) + if (p->ifa_notify && + (p->proto_state != PS_DOWN) && + (!p->vrf || p->vrf == a->iface->master)) { if (p->debug & D_IFACES) - log(L_TRACE "%s <%s address %I/%d on interface %s %s", - p->name, (a->flags & IA_PRIMARY) ? " primary" : "", - a->prefix, a->pxlen, a->iface->name, + log(L_TRACE "%s < address %N on interface %s %s", + p->name, &a->prefix, a->iface->name, (c & IF_CHANGE_UP) ? "added" : "removed"); p->ifa_notify(p, c, a); } @@ -158,7 +164,7 @@ ifa_notify_change_(unsigned c, struct ifa *a) DBG("IFA change notification (%x) for %s:%I\n", c, a->iface->name, a->ip); - WALK_LIST(p, active_proto_list) + WALK_LIST(p, proto_list) ifa_send_notify(p, c, a); } @@ -177,7 +183,9 @@ ifa_notify_change(unsigned c, struct ifa *a) static inline void if_send_notify(struct proto *p, unsigned c, struct iface *i) { - if (p->if_notify && (!p->vrf || p->vrf == i->master)) + if (p->if_notify && + (p->proto_state != PS_DOWN) && + (!p->vrf || p->vrf == i->master)) { if (p->debug & D_IFACES) log(L_TRACE "%s < interface %s %s", p->name, i->name, @@ -185,6 +193,7 @@ if_send_notify(struct proto *p, unsigned c, struct iface *i) (c & IF_CHANGE_DOWN) ? "goes down" : (c & IF_CHANGE_MTU) ? "changes MTU" : (c & IF_CHANGE_LINK) ? "changes link" : + (c & IF_CHANGE_PREFERRED) ? "changes preferred address" : (c & IF_CHANGE_CREATE) ? "created" : "sends unknown event"); p->if_notify(p, c, i); @@ -213,20 +222,14 @@ if_notify_change(unsigned c, struct iface *i) if (c & IF_CHANGE_DOWN) WALK_LIST(a, i->addrs) - { - a->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - ifa_notify_change_(IF_CHANGE_DOWN, a); - } + ifa_notify_change_(IF_CHANGE_DOWN, a); - WALK_LIST(p, active_proto_list) + WALK_LIST(p, proto_list) if_send_notify(p, c, i); if (c & IF_CHANGE_UP) WALK_LIST(a, i->addrs) - { - a->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - ifa_notify_change_(IF_CHANGE_UP, a); - } + ifa_notify_change_(IF_CHANGE_UP, a); if (c & IF_CHANGE_UP) neigh_if_up(i); @@ -235,25 +238,25 @@ if_notify_change(unsigned c, struct iface *i) neigh_if_link(i); } -static unsigned -if_recalc_flags(struct iface *i, unsigned flags) +static uint +if_recalc_flags(struct iface *i UNUSED, uint flags) { - if ((flags & (IF_SHUTDOWN | IF_TMP_DOWN)) || - !(flags & IF_ADMIN_UP) || - !i->addr || - (i->master_index && !i->master)) - flags &= ~IF_UP; - else + if ((flags & IF_ADMIN_UP) && + !(flags & (IF_SHUTDOWN | IF_TMP_DOWN)) && + !(i->master_index && !i->master)) flags |= IF_UP; + else + flags &= ~IF_UP; + return flags; } static void -if_change_flags(struct iface *i, unsigned flags) +if_change_flags(struct iface *i, uint flags) { - unsigned of = i->flags; - + uint of = i->flags; i->flags = if_recalc_flags(i, flags); + if ((i->flags ^ of) & IF_UP) if_notify_change((i->flags & IF_UP) ? IF_CHANGE_UP : IF_CHANGE_DOWN, i); } @@ -301,7 +304,6 @@ if_update(struct iface *new) WALK_LIST(i, iface_list) if (!strcmp(new->name, i->name)) { - new->addr = i->addr; new->flags = if_recalc_flags(new, new->flags); c = if_what_changed(i, new); if (c & IF_CHANGE_TOO_MUCH) /* Changed a lot, convert it to down/up */ @@ -309,10 +311,13 @@ if_update(struct iface *new) DBG("Interface %s changed too much -- forcing down/up transition\n", i->name); if_change_flags(i, i->flags | IF_TMP_DOWN); rem_node(&i->n); - new->addr = i->addr; + new->addr4 = i->addr4; + new->addr6 = i->addr6; + new->llv6 = i->llv6; + new->sysdep = i->sysdep; memcpy(&new->addrs, &i->addrs, sizeof(i->addrs)); memcpy(i, new, sizeof(*i)); - i->flags &= ~IF_UP; /* IF_TMP_DOWN will be added later */ + i->flags &= ~IF_UP; /* IF_TMP_DOWN will be added later */ goto newif; } @@ -343,13 +348,16 @@ if_start_update(void) { i->flags &= ~IF_UPDATED; WALK_LIST(a, i->addrs) - a->flags &= ~IF_UPDATED; + a->flags &= ~IA_UPDATED; } } void if_end_partial_update(struct iface *i) { + if (i->flags & IF_NEEDS_RECALC) + if_recalc_preferred(i); + if (i->flags & IF_TMP_DOWN) if_change_flags(i, i->flags & ~IF_TMP_DOWN); } @@ -367,7 +375,7 @@ if_end_update(void) else { WALK_LIST_DELSAFE(a, b, i->addrs) - if (!(a->flags & IF_UPDATED)) + if (!(a->flags & IA_UPDATED)) ifa_delete(a); if_end_partial_update(i); } @@ -464,47 +472,105 @@ if_get_by_name(char *name) return i; } -struct ifa *kif_choose_primary(struct iface *i); +static inline void +if_set_preferred(struct ifa **pos, struct ifa *new) +{ + if (*pos) + (*pos)->flags &= ~IA_PRIMARY; + if (new) + new->flags |= IA_PRIMARY; + + *pos = new; +} -static int -ifa_recalc_primary(struct iface *i) +static void +if_recalc_preferred(struct iface *i) { - struct ifa *a = kif_choose_primary(i); + /* + * Preferred address selection priority: + * 1) Address configured in Device protocol + * 2) Sysdep IPv4 address (BSD) + * 3) Old preferred address + * 4) First address in list + */ - if (a == i->addr) - return 0; + struct kif_iface_config *ic = kif_get_iface_config(i); + struct ifa *a4 = i->addr4, *a6 = i->addr6, *ll = i->llv6; + ip_addr pref_v4 = ic->pref_v4; + uint change = 0; + + if (kif_update_sysdep_addr(i)) + change |= IF_CHANGE_SYSDEP; - if (i->addr) - i->addr->flags &= ~IA_PRIMARY; + /* BSD sysdep address */ + if (ipa_zero(pref_v4) && ip4_nonzero(i->sysdep)) + pref_v4 = ipa_from_ip4(i->sysdep); - if (a) + struct ifa *a; + WALK_LIST(a, i->addrs) { - a->flags |= IA_PRIMARY; - rem_node(&a->n); - add_head(&i->addrs, &a->n); + /* Secondary address is never selected */ + if (a->flags & IA_SECONDARY) + continue; + + if (ipa_is_ip4(a->ip)) { + if (!a4 || ipa_equal(a->ip, pref_v4)) + a4 = a; + } else if (!ipa_is_link_local(a->ip)) { + if (!a6 || ipa_equal(a->ip, ic->pref_v6)) + a6 = a; + } else { + if (!ll || ipa_equal(a->ip, ic->pref_ll)) + ll = a; + } } - i->addr = a; - return 1; + if (a4 != i->addr4) + { + if_set_preferred(&i->addr4, a4); + change |= IF_CHANGE_ADDR4; + } + + if (a6 != i->addr6) + { + if_set_preferred(&i->addr6, a6); + change |= IF_CHANGE_ADDR6; + } + + if (ll != i->llv6) + { + if_set_preferred(&i->llv6, ll); + change |= IF_CHANGE_LLV6; + } + + i->flags &= ~IF_NEEDS_RECALC; + + /* + * FIXME: There should be proper notification instead of iface restart: + * if_notify_change(change, i) + */ + if (change) + if_change_flags(i, i->flags | IF_TMP_DOWN); } void -ifa_recalc_all_primary_addresses(void) +if_recalc_all_preferred_addresses(void) { struct iface *i; WALK_LIST(i, iface_list) - { - if (ifa_recalc_primary(i)) - if_change_flags(i, i->flags | IF_TMP_DOWN); - } + { + if_recalc_preferred(i); + + if (i->flags & IF_TMP_DOWN) + if_change_flags(i, i->flags & ~IF_TMP_DOWN); + } } static inline int ifa_same(struct ifa *a, struct ifa *b) { - return ipa_equal(a->ip, b->ip) && ipa_equal(a->prefix, b->prefix) && - a->pxlen == b->pxlen; + return ipa_equal(a->ip, b->ip) && net_equal(&a->prefix, &b->prefix); } @@ -530,25 +596,23 @@ ifa_update(struct ifa *a) b->scope == a->scope && !((b->flags ^ a->flags) & IA_PEER)) { - b->flags |= IF_UPDATED; + b->flags |= IA_UPDATED; return b; } ifa_delete(b); break; } -#ifndef IPV6 - if ((i->flags & IF_BROADCAST) && !ipa_nonzero(a->brd)) - log(L_ERR "Missing broadcast address for interface %s", i->name); -#endif + if ((a->prefix.type == NET_IP4) && (i->flags & IF_BROADCAST) && ipa_zero(a->brd)) + log(L_WARN "Missing broadcast address for interface %s", i->name); b = mb_alloc(if_pool, sizeof(struct ifa)); memcpy(b, a, sizeof(struct ifa)); add_tail(&i->addrs, &b->n); - b->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - if (ifa_recalc_primary(i)) - if_change_flags(i, i->flags | IF_TMP_DOWN); - if (b->flags & IF_UP) + b->flags |= IA_UPDATED; + + i->flags |= IF_NEEDS_RECALC; + if (i->flags & IF_UP) ifa_notify_change(IF_CHANGE_CREATE | IF_CHANGE_UP, b); return b; } @@ -571,25 +635,32 @@ ifa_delete(struct ifa *a) if (ifa_same(b, a)) { rem_node(&b->n); - if (b->flags & IF_UP) - { - b->flags &= ~IF_UP; - ifa_notify_change(IF_CHANGE_DOWN, b); - } + if (b->flags & IA_PRIMARY) { - if_change_flags(i, i->flags | IF_TMP_DOWN); - ifa_recalc_primary(i); + /* + * We unlink deleted preferred address and mark for recalculation. + * FIXME: This could break if we make iface scan non-atomic, as + * protocols still could use the freed address until they get + * if_notify from preferred route recalculation. + */ + if (b == i->addr4) i->addr4 = NULL; + if (b == i->addr6) i->addr6 = NULL; + if (b == i->llv6) i->llv6 = NULL; + i->flags |= IF_NEEDS_RECALC; } + + if (i->flags & IF_UP) + ifa_notify_change(IF_CHANGE_DOWN, b); + mb_free(b); return; } } u32 -if_choose_router_id(struct iface_patt *mask UNUSED6, u32 old_id UNUSED6) +if_choose_router_id(struct iface_patt *mask, u32 old_id) { -#ifndef IPV6 struct iface *i; struct ifa *a, *b; @@ -602,6 +673,9 @@ if_choose_router_id(struct iface_patt *mask UNUSED6, u32 old_id UNUSED6) WALK_LIST(a, i->addrs) { + if (a->prefix.type != NET_IP4) + continue; + if (a->flags & IA_SECONDARY) continue; @@ -626,10 +700,6 @@ if_choose_router_id(struct iface_patt *mask UNUSED6, u32 old_id UNUSED6) log(L_INFO "Chosen router ID %R according to interface %s", id, b->iface->name); return id; - -#else - return 0; -#endif } /** @@ -672,17 +742,17 @@ iface_patt_match(struct iface_patt *ifp, struct iface *i, struct ifa *a) continue; } - if (p->pxlen == 0) + if (p->prefix.pxlen == 0) return pos; if (!a) continue; - if (ipa_in_net(a->ip, p->prefix, p->pxlen)) + if (ipa_in_netX(a->ip, &p->prefix)) return pos; if ((a->flags & IA_PEER) && - ipa_in_net(a->opposite, p->prefix, p->pxlen)) + ipa_in_netX(a->opposite, &p->prefix)) return pos; continue; @@ -716,8 +786,7 @@ iface_plists_equal(struct iface_patt *pa, struct iface_patt *pb) (!x->pattern && y->pattern) || /* This nasty lines where written by me... :-( Feela */ (!y->pattern && x->pattern) || ((x->pattern != y->pattern) && strcmp(x->pattern, y->pattern)) || - !ipa_equal(x->prefix, y->prefix) || - (x->pxlen != y->pxlen)) + !net_equal(&x->prefix, &y->prefix)) return 0; x = (void *) x->n.next; y = (void *) y->n.next; @@ -750,16 +819,17 @@ iface_patts_equal(list *a, list *b, int (*comp)(struct iface_patt *, struct ifac static void if_show_addr(struct ifa *a) { - byte opp[STD_ADDRESS_P_LENGTH + 16]; + byte *flg, opp[IPA_MAX_TEXT_LENGTH + 16]; + + flg = (a->flags & IA_PRIMARY) ? "Preferred, " : (a->flags & IA_SECONDARY) ? "Secondary, " : ""; if (ipa_nonzero(a->opposite)) - bsprintf(opp, ", opposite %I", a->opposite); + bsprintf(opp, "opposite %I, ", a->opposite); else opp[0] = 0; - cli_msg(-1003, "\t%I/%d (%s%s, scope %s)", - a->ip, a->pxlen, - (a->flags & IA_PRIMARY) ? "Primary" : (a->flags & IA_SECONDARY) ? "Secondary" : "Unselected", - opp, ip_scope_text(a->scope)); + + cli_msg(-1003, "\t%I/%d (%s%sscope %s)", + a->ip, a->prefix.pxlen, flg, opp, ip_scope_text(a->scope)); } void @@ -780,7 +850,7 @@ if_show(void) else if (i->master_index) bsprintf(mbuf, " master=#%u", i->master_index); - cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "DOWN", i->index, mbuf); + cli_msg(-1001, "%s %s (index=%d%s)", i->name, (i->flags & IF_UP) ? "up" : "down", i->index, mbuf); if (!(i->flags & IF_MULTIACCESS)) type = "PtP"; else @@ -794,10 +864,13 @@ if_show(void) (i->flags & IF_LOOPBACK) ? " Loopback" : "", (i->flags & IF_IGNORE) ? " Ignored" : "", i->mtu); - if (i->addr) - if_show_addr(i->addr); + WALK_LIST(a, i->addrs) - if (a != i->addr) + if (a->prefix.type == NET_IP4) + if_show_addr(a); + + WALK_LIST(a, i->addrs) + if (a->prefix.type == NET_IP6) if_show_addr(a); } cli_msg(0, ""); @@ -807,16 +880,25 @@ void if_show_summary(void) { struct iface *i; - byte addr[STD_ADDRESS_P_LENGTH + 16]; - cli_msg(-2005, "interface state address"); + cli_msg(-2005, "%-10s %-6s %-18s %s", "Interface", "State", "IPv4 address", "IPv6 address"); WALK_LIST(i, iface_list) { - if (i->addr) - bsprintf(addr, "%I/%d", i->addr->ip, i->addr->pxlen); + byte a4[IPA_MAX_TEXT_LENGTH + 17]; + byte a6[IPA_MAX_TEXT_LENGTH + 17]; + + if (i->addr4) + bsprintf(a4, "%I/%d", i->addr4->ip, i->addr4->prefix.pxlen); else - addr[0] = 0; - cli_msg(-1005, "%-9s %-5s %s", i->name, (i->flags & IF_UP) ? "up" : "DOWN", addr); + a4[0] = 0; + + if (i->addr6) + bsprintf(a6, "%I/%d", i->addr6->ip, i->addr6->prefix.pxlen); + else + a6[0] = 0; + + cli_msg(-1005, "%-10s %-6s %-18s %s", + i->name, (i->flags & IF_UP) ? "up" : "down", a4, a6); } cli_msg(0, ""); } diff --git a/nest/iface.h b/nest/iface.h index b8e69838..ab3f8f35 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -10,6 +10,7 @@ #define _BIRD_IFACE_H_ #include "lib/lists.h" +#include "lib/ip.h" extern list iface_list; @@ -19,9 +20,8 @@ struct pool; struct ifa { /* Interface address */ node n; struct iface *iface; /* Interface this address belongs to */ + net_addr prefix; /* Network prefix */ ip_addr ip; /* IP address of this host */ - ip_addr prefix; /* Network prefix */ - unsigned pxlen; /* Prefix length */ ip_addr brd; /* Broadcast address */ ip_addr opposite; /* Opposite end of a point-to-point link */ unsigned scope; /* Interface address scope */ @@ -35,13 +35,16 @@ struct iface { unsigned mtu; unsigned index; /* OS-dependent interface index */ unsigned master_index; /* Interface index of master iface */ - list addrs; /* Addresses assigned to this interface */ - struct ifa *addr; /* Primary address */ struct iface *master; /* Master iface (e.g. for VRF) */ + list addrs; /* Addresses assigned to this interface */ + struct ifa *addr4; /* Primary address for IPv4 */ + struct ifa *addr6; /* Primary address for IPv6 */ + struct ifa *llv6; /* Primary link-local address for IPv6 */ + ip4_addr sysdep; /* Arbitrary IPv4 address for internal sysdep use */ list neighbors; /* All neighbors on this interface */ }; -#define IF_UP 1 /* IF_ADMIN_UP and IP address known */ +#define IF_UP 1 /* Currently just IF_ADMIN_UP */ #define IF_MULTIACCESS 2 #define IF_BROADCAST 4 #define IF_MULTICAST 8 @@ -72,7 +75,10 @@ struct iface { #define IF_JUST_CREATED 0x10000000 /* Send creation event as soon as possible */ #define IF_TMP_DOWN 0x20000000 /* Temporary shutdown due to interface reconfiguration */ -#define IF_UPDATED 0x40000000 /* Touched in last scan */ +#define IF_UPDATED 0x40000000 /* Iface touched in last scan */ +#define IF_NEEDS_RECALC 0x80000000 /* Preferred address recalculation is needed */ + +#define IA_UPDATED IF_UPDATED /* Address touched in last scan */ /* Interface change events */ @@ -81,8 +87,14 @@ struct iface { #define IF_CHANGE_MTU 4 #define IF_CHANGE_CREATE 8 /* Seen this interface for the first time */ #define IF_CHANGE_LINK 0x10 +#define IF_CHANGE_ADDR4 0x100 /* Change of iface->addr4 */ +#define IF_CHANGE_ADDR6 0x200 /* ... */ +#define IF_CHANGE_LLV6 0x400 +#define IF_CHANGE_SYSDEP 0x800 #define IF_CHANGE_TOO_MUCH 0x40000000 /* Used internally */ +#define IF_CHANGE_PREFERRED (IF_CHANGE_ADDR4 | IF_CHANGE_ADDR6 | IF_CHANGE_LLV6) + void if_init(void); void if_dump(struct iface *); void if_dump_all(void); @@ -101,7 +113,7 @@ void if_feed_baby(struct proto *); struct iface *if_find_by_index(unsigned); struct iface *if_find_by_name(char *); struct iface *if_get_by_name(char *); -void ifa_recalc_all_primary_addresses(void); +void if_recalc_all_preferred_addresses(void); /* The Neighbor Cache */ @@ -120,12 +132,15 @@ typedef struct neighbor { SCOPE_HOST when it's our own address */ } neighbor; -#define NEF_STICKY 1 -#define NEF_ONLINK 2 -#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_STICKY 1 +#define NEF_ONLINK 2 +#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_IFACE 8 /* Neighbors bound to iface */ + neighbor *neigh_find(struct proto *, ip_addr *, unsigned flags); neighbor *neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags); +neighbor *neigh_find_iface(struct proto *p, struct iface *ifa); static inline int neigh_connected_to(struct proto *p, ip_addr *a, struct iface *i) { @@ -150,8 +165,7 @@ struct iface_patt_node { node n; int positive; byte *pattern; - ip_addr prefix; - int pxlen; + net_addr prefix; }; struct iface_patt { diff --git a/nest/mrtdump.h b/nest/mrtdump.h index 73932553..28b3bdfd 100644 --- a/nest/mrtdump.h +++ b/nest/mrtdump.h @@ -28,4 +28,3 @@ void mrt_dump_message(struct proto *p, u16 type, u16 subtype, byte *buf, u32 len); #endif - diff --git a/nest/neighbor.c b/nest/neighbor.c index f8159d35..4f93e29e 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -46,14 +46,15 @@ #include "lib/resource.h" #define NEIGH_HASH_SIZE 256 +#define NEIGH_HASH_OFFSET 24 static slab *neigh_slab; -static list sticky_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; +static list sticky_neigh_list, iface_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; static inline uint neigh_hash(struct proto *p, ip_addr *a) { - return (p->hash_key ^ ipa_hash(*a)) & (NEIGH_HASH_SIZE-1); + return (p->hash_key ^ ipa_hash(*a)) >> NEIGH_HASH_OFFSET; } static int @@ -80,17 +81,17 @@ if_connected(ip_addr *a, struct iface *i, struct ifa **ap) } else { - if (ipa_in_net(*a, b->prefix, b->pxlen)) + if (ipa_in_netX(*a, &b->prefix)) { -#ifndef IPV6 - if ((b->pxlen < (BITS_PER_IP_ADDRESS - 1)) && - (ipa_equal(*a, b->prefix) || /* Network address */ + /* Do not allow IPv4 network and broadcast addresses */ + if (ipa_is_ip4(*a) && + (net_pxlen(&b->prefix) < (IP4_MAX_PREFIX_LENGTH - 1)) && + (ipa_equal(*a, net_prefix(&b->prefix)) || /* Network address */ ipa_equal(*a, b->brd))) /* Broadcast */ { *ap = NULL; return -1; } -#endif return b->scope; } @@ -155,7 +156,7 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) WALK_LIST(i, iface_list) if ((!p->vrf || p->vrf == i->master) && ((scope = if_connected(a, i, &addr)) >= 0)) - { + { ifa = i; break; } @@ -167,6 +168,8 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return NULL; n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + n->addr = *a; if (scope >= 0) { @@ -188,6 +191,35 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return n; } +neighbor * +neigh_find_iface(struct proto *p, struct iface *ifa) +{ + neighbor *n; + node *nn; + + /* We keep neighbors with NEF_IFACE foremost in ifa->neighbors list */ + WALK_LIST2(n, nn, ifa->neighbors, if_n) + { + if (! (n->flags & NEF_IFACE)) + break; + + if (n->proto == p) + return n; + } + + n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + + add_tail(&iface_neigh_list, &n->n); + add_head(&ifa->neighbors, &n->if_n); + n->iface = ifa; + n->proto = p; + n->flags = NEF_IFACE; + n->scope = (ifa->flags & IF_UP) ? SCOPE_HOST : -1; + + return n; +} + /** * neigh_dump - dump specified neighbor entry. * @n: the entry to dump @@ -206,6 +238,8 @@ neigh_dump(neighbor *n) debug("%s %p %08x scope %s", n->proto->name, n->data, n->aux, ip_scope_text(n->scope)); if (n->flags & NEF_STICKY) debug(" STICKY"); + if (n->flags & NEF_IFACE) + debug(" IFACE"); debug("\n"); } @@ -224,6 +258,8 @@ neigh_dump_all(void) debug("Known neighbors:\n"); WALK_LIST(n, sticky_neigh_list) neigh_dump(n); + WALK_LIST(n, iface_neigh_list) + neigh_dump(n); for(i=0; i<NEIGH_HASH_SIZE; i++) WALK_LIST(n, neigh_hash_table[i]) neigh_dump(n); @@ -233,14 +269,19 @@ neigh_dump_all(void) static void neigh_up(neighbor *n, struct iface *i, int scope, struct ifa *a) { + DBG("Waking up sticky neighbor %I\n", n->addr); n->iface = i; n->ifa = a; n->scope = scope; - add_tail(&i->neighbors, &n->if_n); - rem_node(&n->n); - add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); - DBG("Waking up sticky neighbor %I\n", n->addr); - if (n->proto->neigh_notify && n->proto->core_state != FS_FLUSHING) + + if (! (n->flags & NEF_IFACE)) + { + add_tail(&i->neighbors, &n->if_n); + rem_node(&n->n); + add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); + } + + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); } @@ -248,14 +289,20 @@ static void neigh_down(neighbor *n) { DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name); - rem_node(&n->if_n); - if (! (n->flags & NEF_BIND)) + if (! (n->flags & (NEF_BIND | NEF_IFACE))) n->iface = NULL; n->ifa = NULL; n->scope = -1; - if (n->proto->neigh_notify && n->proto->core_state != FS_FLUSHING) + + if (! (n->flags & NEF_IFACE)) + { + rem_node(&n->if_n); + rem_node(&n->n); + } + + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); - rem_node(&n->n); + if (n->flags & NEF_STICKY) { add_tail(&sticky_neigh_list, &n->n); @@ -273,7 +320,8 @@ neigh_down(neighbor *n) return; } } - else + + if (! (n->flags & (NEF_STICKY | NEF_IFACE))) sl_free(neigh_slab, n); } @@ -291,10 +339,17 @@ void neigh_if_up(struct iface *i) { struct ifa *a; - neighbor *n, *next; + neighbor *n; + node *x, *y; int scope; - WALK_LIST_DELSAFE(n, next, sticky_neigh_list) + /* Wake up all iface neighbors */ + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if ((n->scope < 0) && (n->flags & NEF_IFACE)) + neigh_up(n, i, SCOPE_HOST, NULL); + + /* Wake up appropriate sticky neighbors */ + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) if ((!n->iface || n->iface == i) && ((scope = if_connected(&n->addr, i, &a)) >= 0)) neigh_up(n, i, scope, a); @@ -312,10 +367,11 @@ neigh_if_up(struct iface *i) void neigh_if_down(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - neigh_down(SKIP_BACK(neighbor, if_n, x)); + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + neigh_down(n); } /** @@ -329,14 +385,12 @@ neigh_if_down(struct iface *i) void neigh_if_link(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (n->proto->neigh_notify && n->proto->core_state != FS_FLUSHING) - n->proto->neigh_notify(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) + n->proto->neigh_notify(n); } /** @@ -353,19 +407,21 @@ void neigh_ifa_update(struct ifa *a) { struct iface *i = a->iface; + struct ifa *aa; node *x, *y; - + neighbor *n; + int scope; + /* Remove all neighbors whose scope has changed */ - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - struct ifa *aa; - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (if_connected(&n->addr, i, &aa) != n->scope) - neigh_down(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->ifa && (if_connected(&n->addr, i, &aa) != n->scope)) + neigh_down(n); /* Wake up all sticky neighbors that are reachable now */ - neigh_if_up(i); + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) + if ((!n->iface || n->iface == i) && + ((scope = if_connected(&n->addr, i, &aa)) >= 0)) + neigh_up(n, i, scope, aa); } static inline void @@ -374,7 +430,7 @@ neigh_prune_one(neighbor *n) if (n->proto->proto_state != PS_DOWN) return; rem_node(&n->n); - if (n->scope >= 0) + if (n->if_n.next) rem_node(&n->if_n); sl_free(neigh_slab, n); } @@ -399,6 +455,8 @@ neigh_prune(void) neigh_prune_one(n); WALK_LIST_DELSAFE(n, m, sticky_neigh_list) neigh_prune_one(n); + WALK_LIST_DELSAFE(n, m, iface_neigh_list) + neigh_prune_one(n); } /** @@ -411,10 +469,11 @@ neigh_prune(void) void neigh_init(pool *if_pool) { - int i; - neigh_slab = sl_new(if_pool, sizeof(neighbor)); + init_list(&sticky_neigh_list); - for(i=0; i<NEIGH_HASH_SIZE; i++) + init_list(&iface_neigh_list); + + for(int i = 0; i < NEIGH_HASH_SIZE; i++) init_list(&neigh_hash_table[i]); } diff --git a/nest/password.c b/nest/password.c index e4813741..6f87af21 100644 --- a/nest/password.c +++ b/nest/password.c @@ -10,6 +10,7 @@ #include "nest/bird.h" #include "nest/password.h" #include "lib/string.h" +#include "lib/timer.h" #include "lib/mac.h" struct password_item *last_password_item = NULL; @@ -19,12 +20,13 @@ password_find(list *l, int first_fit) { struct password_item *pi; struct password_item *pf = NULL; + btime now_ = current_real_time(); if (l) { WALK_LIST(pi, *l) { - if ((pi->genfrom < now_real) && (pi->gento > now_real)) + if ((pi->genfrom < now_) && (pi->gento > now_)) { if (first_fit) return pi; @@ -41,12 +43,13 @@ struct password_item * password_find_by_id(list *l, uint id) { struct password_item *pi; + btime now_ = current_real_time(); if (!l) return NULL; WALK_LIST(pi, *l) - if ((pi->id == id) && (pi->accfrom <= now_real) && (now_real < pi->accto)) + if ((pi->id == id) && (pi->accfrom <= now_) && (now_ < pi->accto)) return pi; return NULL; @@ -56,12 +59,13 @@ struct password_item * password_find_by_value(list *l, char *pass, uint size) { struct password_item *pi; + btime now_ = current_real_time(); if (!l) return NULL; WALK_LIST(pi, *l) - if (password_verify(pi, pass, size) && (pi->accfrom <= now_real) && (now_real < pi->accto)) + if (password_verify(pi, pass, size) && (pi->accfrom <= now_) && (now_ < pi->accto)) return pi; return NULL; diff --git a/nest/password.h b/nest/password.h index f21483c4..c4017848 100644 --- a/nest/password.h +++ b/nest/password.h @@ -10,15 +10,13 @@ #ifndef PASSWORD_H #define PASSWORD_H -#include "lib/timer.h" - struct password_item { node n; char *password; /* Key data, null terminated */ uint length; /* Key length, without null */ uint id; /* Key ID */ uint alg; /* MAC algorithm */ - bird_clock_t accfrom, accto, genfrom, gento; + btime accfrom, accto, genfrom, gento; }; extern struct password_item *last_password_item; diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c index 7395b45e..71cddd64 100644 --- a/nest/proto-hooks.c +++ b/nest/proto-hooks.c @@ -190,7 +190,7 @@ void ifa_notify(struct proto *p, unsigned flags, struct ifa *a) /** * rt_notify - notify instance about routing table change * @p: protocol instance - * @table: a routing table + * @channel: notifying channel * @net: a network entry * @new: new route for the network * @old: old route for the network @@ -281,7 +281,7 @@ int import_control(struct proto *p, rte **e, ea_list **attrs, struct linpool *po /** * rte_recalculate - prepare routes for comparison - * @table: a routing table + * @table: a routing table * @net: a network entry * @new: new route for the network * @old: old route for the network diff --git a/nest/proto.c b/nest/proto.c index 552d53ae..15d6f4de 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -13,6 +13,7 @@ #include "lib/resource.h" #include "lib/lists.h" #include "lib/event.h" +#include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" #include "nest/route.h" @@ -21,19 +22,12 @@ #include "filter/filter.h" pool *proto_pool; +list proto_list; static list protocol_list; -static list proto_list; #define PD(pr, msg, args...) do { if (pr->debug & D_STATES) { log(L_TRACE "%s: " msg, pr->name , ## args); } } while(0) -list active_proto_list; -static list inactive_proto_list; -static list initial_proto_list; -static list flush_proto_list; -static struct proto *initial_device_proto; - -static event *proto_flush_event; static timer *proto_shutdown_timer; static timer *gr_wait_timer; @@ -46,199 +40,680 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "HUNGRY", "???", "HAPPY", "FLUSHING" }; +static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; + +extern struct protocol proto_unix_iface; -static void proto_flush_loop(void *); -static void proto_shutdown_loop(struct timer *); +static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); -static void proto_want_export_up(struct proto *p); -static void proto_fell_down(struct proto *p); static char *proto_state_name(struct proto *p); +static void channel_verify_limits(struct channel *c); +static inline void channel_reset_limit(struct channel_limit *l); -static void -proto_relink(struct proto *p) -{ - list *l = NULL; - switch (p->core_state) - { - case FS_HUNGRY: - l = &inactive_proto_list; - break; - case FS_HAPPY: - l = &active_proto_list; - break; - case FS_FLUSHING: - l = &flush_proto_list; - break; - default: - ASSERT(0); - } +static inline int proto_is_done(struct proto *p) +{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } - rem_node(&p->n); - add_tail(l, &p->n); -} +static inline int channel_is_active(struct channel *c) +{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } static void proto_log_state_change(struct proto *p) { if (p->debug & D_STATES) + { + char *name = proto_state_name(p); + if (name != p->last_state_name_announced) { - char *name = proto_state_name(p); - if (name != p->last_state_name_announced) - { - p->last_state_name_announced = name; - PD(p, "State changed to %s", proto_state_name(p)); - } + p->last_state_name_announced = name; + PD(p, "State changed to %s", proto_state_name(p)); } + } else p->last_state_name_announced = NULL; } +struct channel_config * +proto_cf_find_channel(struct proto_config *pc, uint net_type) +{ + struct channel_config *cc; + + WALK_LIST(cc, pc->channels) + if (cc->net_type == net_type) + return cc; + + return NULL; +} + /** - * proto_new - create a new protocol instance - * @c: protocol configuration - * @size: size of protocol data structure (each protocol instance is represented by - * a structure starting with generic part [struct &proto] and continued - * with data specific to the protocol) + * proto_find_channel_by_table - find channel connected to a routing table + * @p: protocol instance + * @t: routing table * - * When a new configuration has been read in, the core code starts - * initializing all the protocol instances configured by calling their - * init() hooks with the corresponding instance configuration. The initialization - * code of the protocol is expected to create a new instance according to the - * configuration by calling this function and then modifying the default settings - * to values wanted by the protocol. + * Returns pointer to channel or NULL */ -void * -proto_new(struct proto_config *c, unsigned size) +struct channel * +proto_find_channel_by_table(struct proto *p, struct rtable *t) { - struct protocol *pr = c->protocol; - struct proto *p = mb_allocz(proto_pool, size); - - p->cf = c; - p->debug = c->debug; - p->mrtdump = c->mrtdump; - p->name = c->name; - p->preference = c->preference; - p->disabled = c->disabled; - p->proto = pr; - p->table = c->table->table; - p->hash_key = random_u32(); - c->proto = p; - return p; + struct channel *c; + + WALK_LIST(c, p->channels) + if (c->table == t) + return c; + + return NULL; } -static void -proto_init_instance(struct proto *p) +/** + * proto_find_channel_by_name - find channel by its name + * @p: protocol instance + * @n: channel name + * + * Returns pointer to channel or NULL + */ +struct channel * +proto_find_channel_by_name(struct proto *p, const char *n) { - /* Here we cannot use p->cf->name since it won't survive reconfiguration */ - p->pool = rp_new(proto_pool, p->proto->name); - p->attn = ev_new(p->pool); - p->attn->data = p; + struct channel *c; - if (graceful_restart_state == GRS_INIT) - p->gr_recovery = 1; + WALK_LIST(c, p->channels) + if (!strcmp(c->name, n)) + return c; - if (! p->proto->multitable) - rt_lock_table(p->table); + return NULL; } -extern pool *rt_table_pool; /** - * proto_add_announce_hook - connect protocol to a routing table + * proto_add_channel - connect protocol to a routing table * @p: protocol instance - * @t: routing table to connect to - * @stats: per-table protocol statistics + * @cf: channel configuration * - * This function creates a connection between the protocol instance @p and the - * routing table @t, making the protocol hear all changes in the table. + * This function creates a channel between the protocol instance @p and the + * routing table specified in the configuration @cf, making the protocol hear + * all changes in the table and allowing the protocol to update routes in the + * table. * - * The announce hook is linked in the protocol ahook list. Announce hooks are - * allocated from the routing table resource pool and when protocol accepts - * routes also in the table ahook list. The are linked to the table ahook list - * and unlinked from it depending on export_state (in proto_want_export_up() and - * proto_want_export_down()) and they are automatically freed after the protocol - * is flushed (in proto_fell_down()). - * - * Unless you want to listen to multiple routing tables (as the Pipe protocol - * does), you needn't to worry about this function since the connection to the - * protocol's primary routing table is initialized automatically by the core - * code. + * The channel is linked in the protocol channel list and when active also in + * the table channel list. Channels are allocated from the global resource pool + * (@proto_pool) and they are automatically freed when the protocol is removed. */ -struct announce_hook * -proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats) + +struct channel * +proto_add_channel(struct proto *p, struct channel_config *cf) +{ + struct channel *c = mb_allocz(proto_pool, cf->channel->channel_size); + + c->name = cf->name; + c->channel = cf->channel; + c->proto = p; + c->table = cf->table->table; + + c->in_filter = cf->in_filter; + c->out_filter = cf->out_filter; + c->rx_limit = cf->rx_limit; + c->in_limit = cf->in_limit; + c->out_limit = cf->out_limit; + + c->net_type = cf->net_type; + c->ra_mode = cf->ra_mode; + c->preference = cf->preference; + c->merge_limit = cf->merge_limit; + c->in_keep_filtered = cf->in_keep_filtered; + + c->channel_state = CS_DOWN; + c->export_state = ES_DOWN; + c->last_state_change = current_time(); + c->reloadable = 1; + + CALL(c->channel->init, c, cf); + + add_tail(&p->channels, &c->n); + + PD(p, "Channel %s connected to table %s", c->name, c->table->name); + + return c; +} + +void +proto_remove_channel(struct proto *p, struct channel *c) +{ + ASSERT(c->channel_state == CS_DOWN); + + PD(p, "Channel %s removed", c->name); + + rem_node(&c->n); + mb_free(c); +} + + +static void +proto_start_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST(c, p->channels) + if (!c->disabled) + channel_set_state(c, CS_UP); +} + +static void +proto_pause_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST(c, p->channels) + if (!c->disabled && channel_is_active(c)) + channel_set_state(c, CS_START); +} + +static void +proto_stop_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST(c, p->channels) + if (!c->disabled && channel_is_active(c)) + channel_set_state(c, CS_FLUSHING); +} + +static void +proto_remove_channels(struct proto *p) +{ + struct channel *c; + WALK_LIST_FIRST(c, p->channels) + proto_remove_channel(p, c); +} + +static void +channel_schedule_feed(struct channel *c, int initial) +{ + // DBG("%s: Scheduling meal\n", p->name); + ASSERT(c->channel_state == CS_UP); + + c->export_state = ES_FEEDING; + c->refeeding = !initial; + + ev_schedule(c->feed_event); +} + +static void +channel_feed_loop(void *ptr) +{ + struct channel *c = ptr; + + if (c->export_state != ES_FEEDING) + return; + + if (!c->feed_active) + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + + // DBG("Feeding protocol %s continued\n", p->name); + if (!rt_feed_channel(c)) + { + ev_schedule(c->feed_event); + return; + } + + // DBG("Feeding protocol %s finished\n", p->name); + c->export_state = ES_READY; + // proto_log_state_change(p); + + if (c->proto->feed_end) + c->proto->feed_end(c); +} + + +static void +channel_start_export(struct channel *c) +{ + ASSERT(c->channel_state == CS_UP); + ASSERT(c->export_state == ES_DOWN); + + channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ +} + +static void +channel_stop_export(struct channel *c) +{ + /* Need to abort feeding */ + if (c->export_state == ES_FEEDING) + rt_feed_channel_abort(c); + + c->export_state = ES_DOWN; + c->stats.exp_routes = 0; +} + +static void +channel_do_start(struct channel *c) +{ + rt_lock_table(c->table); + add_tail(&c->table->channels, &c->table_node); + c->proto->active_channels++; + + c->feed_event = ev_new(c->proto->pool); + c->feed_event->data = c; + c->feed_event->hook = channel_feed_loop; + + channel_reset_limit(&c->rx_limit); + channel_reset_limit(&c->in_limit); + channel_reset_limit(&c->out_limit); + + CALL(c->channel->start, c); +} + +static void +channel_do_flush(struct channel *c) +{ + rt_schedule_prune(c->table); + + c->gr_wait = 0; + if (c->gr_lock) + channel_graceful_restart_unlock(c); + + CALL(c->channel->shutdown, c); +} + +static void +channel_do_down(struct channel *c) { - struct announce_hook *h; + rem_node(&c->table_node); + rt_unlock_table(c->table); + c->proto->active_channels--; - DBG("Connecting protocol %s to table %s\n", p->name, t->name); - PD(p, "Connected to table %s", t->name); + if ((c->stats.imp_routes + c->stats.filt_routes) != 0) + log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - h = mb_allocz(rt_table_pool, sizeof(struct announce_hook)); - h->table = t; - h->proto = p; - h->stats = stats; + memset(&c->stats, 0, sizeof(struct proto_stats)); - h->next = p->ahooks; - p->ahooks = h; + CALL(c->channel->cleanup, c); - if (p->rt_notify && (p->export_state != ES_DOWN)) - add_tail(&t->hooks, &h->n); - return h; + /* Schedule protocol shutddown */ + if (proto_is_done(c->proto)) + ev_schedule(c->proto->event); +} + +void +channel_set_state(struct channel *c, uint state) +{ + uint cs = c->channel_state; + uint es = c->export_state; + + DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); + if (state == cs) + return; + + c->channel_state = state; + c->last_state_change = current_time(); + + switch (state) + { + case CS_START: + ASSERT(cs == CS_DOWN || cs == CS_UP); + + if (cs == CS_DOWN) + channel_do_start(c); + + if (es != ES_DOWN) + channel_stop_export(c); + + break; + + case CS_UP: + ASSERT(cs == CS_DOWN || cs == CS_START); + + if (cs == CS_DOWN) + channel_do_start(c); + + if (!c->gr_wait && c->proto->rt_notify) + channel_start_export(c); + + break; + + case CS_FLUSHING: + ASSERT(cs == CS_START || cs == CS_UP); + + if (es != ES_DOWN) + channel_stop_export(c); + + channel_do_flush(c); + break; + + case CS_DOWN: + ASSERT(cs == CS_FLUSHING); + + channel_do_down(c); + break; + + default: + ASSERT(0); + } + // XXXX proto_log_state_change(c); } /** - * proto_find_announce_hook - find announce hooks - * @p: protocol instance - * @t: routing table + * channel_request_feeding - request feeding routes to the channel + * @c: given channel * - * Returns pointer to announce hook or NULL + * Sometimes it is needed to send again all routes to the channel. This is + * called feeding and can be requested by this function. This would cause + * channel export state transition to ES_FEEDING (during feeding) and when + * completed, it will switch back to ES_READY. This function can be called + * even when feeding is already running, in that case it is restarted. */ -struct announce_hook * -proto_find_announce_hook(struct proto *p, struct rtable *t) +void +channel_request_feeding(struct channel *c) { - struct announce_hook *a; + ASSERT(c->channel_state == CS_UP); - for (a = p->ahooks; a; a = a->next) - if (a->table == t) - return a; + /* Do nothing if we are still waiting for feeding */ + if (c->export_state == ES_DOWN) + return; - return NULL; + /* If we are already feeding, we want to restart it */ + if (c->export_state == ES_FEEDING) + { + /* Unless feeding is in initial state */ + if (!c->feed_active) + return; + + rt_feed_channel_abort(c); + } + + channel_reset_limit(&c->out_limit); + + /* Hack: reset exp_routes during refeed, and do not decrease it later */ + c->stats.exp_routes = 0; + + channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ + // proto_log_state_change(c); +} + +static inline int +channel_reloadable(struct channel *c) +{ + return c->proto->reload_routes && c->reloadable; } static void -proto_link_ahooks(struct proto *p) +channel_request_reload(struct channel *c) { - struct announce_hook *h; + ASSERT(c->channel_state == CS_UP); + ASSERT(channel_reloadable(c)); + + c->proto->reload_routes(c); - if (p->rt_notify) - for(h=p->ahooks; h; h=h->next) - add_tail(&h->table->hooks, &h->n); + /* + * Should this be done before reload_routes() hook? + * Perhaps, but routes are updated asynchronously. + */ + channel_reset_limit(&c->rx_limit); + channel_reset_limit(&c->in_limit); } -static void -proto_unlink_ahooks(struct proto *p) +const struct channel_class channel_basic = { + .channel_size = sizeof(struct channel), + .config_size = sizeof(struct channel_config) +}; + +void * +channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto) +{ + struct channel_config *cf = NULL; + struct rtable_config *tab = NULL; + + if (net_type) + { + if (!net_val_match(net_type, proto->protocol->channel_mask)) + cf_error("Unsupported channel type"); + + if (proto->net_type && (net_type != proto->net_type)) + cf_error("Different channel type"); + + tab = new_config->def_tables[net_type]; + } + + if (!cc) + cc = &channel_basic; + + cf = cfg_allocz(cc->config_size); + cf->name = name; + cf->channel = cc; + cf->parent = proto; + cf->table = tab; + cf->out_filter = FILTER_REJECT; + + cf->net_type = net_type; + cf->ra_mode = RA_OPTIMAL; + cf->preference = proto->protocol->preference; + + add_tail(&proto->channels, &cf->n); + + return cf; +} + +void * +channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto) +{ + struct channel_config *cf; + + /* We are using name as token, so no strcmp() */ + WALK_LIST(cf, proto->channels) + if (cf->name == name) + { + /* Allow to redefine channel only if inherited from template */ + if (cf->parent == proto) + cf_error("Multiple %s channels", name); + + cf->parent = proto; + return cf; + } + + return channel_config_new(cc, name, net_type, proto); +} + +struct channel_config * +channel_copy_config(struct channel_config *src, struct proto_config *proto) { - struct announce_hook *h; + struct channel_config *dst = cfg_alloc(src->channel->config_size); - if (p->rt_notify) - for(h=p->ahooks; h; h=h->next) - rem_node(&h->n); + memcpy(dst, src, src->channel->config_size); + add_tail(&proto->channels, &dst->n); + CALL(src->channel->copy_config, dst, src); + + return dst; } + +static int reconfigure_type; /* Hack to propagate type info to channel_reconfigure() */ + +int +channel_reconfigure(struct channel *c, struct channel_config *cf) +{ + /* FIXME: better handle these changes, also handle in_keep_filtered */ + if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + return 0; + + /* Note that filter_same() requires arguments in (new, old) order */ + int import_changed = !filter_same(cf->in_filter, c->in_filter); + int export_changed = !filter_same(cf->out_filter, c->out_filter); + + if (c->preference != cf->preference) + import_changed = 1; + + if (c->merge_limit != cf->merge_limit) + export_changed = 1; + + /* Reconfigure channel fields */ + c->in_filter = cf->in_filter; + c->out_filter = cf->out_filter; + c->rx_limit = cf->rx_limit; + c->in_limit = cf->in_limit; + c->out_limit = cf->out_limit; + + // c->ra_mode = cf->ra_mode; + c->merge_limit = cf->merge_limit; + c->preference = cf->preference; + c->in_keep_filtered = cf->in_keep_filtered; + + channel_verify_limits(c); + + /* Execute channel-specific reconfigure hook */ + if (c->channel->reconfigure && !c->channel->reconfigure(c, cf)) + return 0; + + /* If the channel is not open, it has no routes and we cannot reload it anyways */ + if (c->channel_state != CS_UP) + return 1; + + if (reconfigure_type == RECONFIG_SOFT) + { + if (import_changed) + log(L_INFO "Channel %s.%s changed import", c->proto->name, c->name); + + if (export_changed) + log(L_INFO "Channel %s.%s changed export", c->proto->name, c->name); + + return 1; + } + + /* Route reload may be not supported */ + if (import_changed && !channel_reloadable(c)) + return 0; + + if (import_changed || export_changed) + log(L_INFO "Reloading channel %s.%s", c->proto->name, c->name); + + if (import_changed) + channel_request_reload(c); + + if (export_changed) + channel_request_feeding(c); + + return 1; +} + + +int +proto_configure_channel(struct proto *p, struct channel **pc, struct channel_config *cf) +{ + struct channel *c = *pc; + + if (!c && cf) + { + *pc = proto_add_channel(p, cf); + } + else if (c && !cf) + { + if (c->channel_state != CS_DOWN) + { + log(L_INFO "Cannot remove channel %s.%s", c->proto->name, c->name); + return 0; + } + + proto_remove_channel(p, c); + *pc = NULL; + } + else if (c && cf) + { + if (!channel_reconfigure(c, cf)) + { + log(L_INFO "Cannot reconfigure channel %s.%s", c->proto->name, c->name); + return 0; + } + } + + return 1; +} + + static void -proto_free_ahooks(struct proto *p) +proto_event(void *ptr) { - struct announce_hook *h, *hn; + struct proto *p = ptr; + + if (p->do_start) + { + if_feed_baby(p); + p->do_start = 0; + } - for(h = p->ahooks; h; h = hn) + if (p->do_stop) { - hn = h->next; - mb_free(h); + if (p->proto == &proto_unix_iface) + if_flush_ifaces(p); + p->do_stop = 0; } - p->ahooks = NULL; - p->main_ahook = NULL; + if (proto_is_done(p)) + { + if (p->proto->cleanup) + p->proto->cleanup(p); + + p->active = 0; + proto_log_state_change(p); + proto_rethink_goal(p); + } +} + + +/** + * proto_new - create a new protocol instance + * @c: protocol configuration + * + * When a new configuration has been read in, the core code starts + * initializing all the protocol instances configured by calling their + * init() hooks with the corresponding instance configuration. The initialization + * code of the protocol is expected to create a new instance according to the + * configuration by calling this function and then modifying the default settings + * to values wanted by the protocol. + */ +void * +proto_new(struct proto_config *cf) +{ + struct proto *p = mb_allocz(proto_pool, cf->protocol->proto_size); + + p->cf = cf; + p->debug = cf->debug; + p->mrtdump = cf->mrtdump; + p->name = cf->name; + p->proto = cf->protocol; + p->net_type = cf->net_type; + p->disabled = cf->disabled; + p->hash_key = random_u32(); + cf->proto = p; + + init_list(&p->channels); + + return p; +} + +static struct proto * +proto_init(struct proto_config *c, node *n) +{ + struct protocol *pr = c->protocol; + struct proto *p = pr->init(c); + + p->proto_state = PS_DOWN; + p->last_state_change = current_time(); + p->vrf = c->vrf; + insert_node(&p->n, n); + + p->event = ev_new(proto_pool); + p->event->hook = proto_event; + p->event->data = p; + + PD(p, "Initializing%s", p->disabled ? " [disabled]" : ""); + + return p; +} + +static void +proto_start(struct proto *p) +{ + /* Here we cannot use p->cf->name since it won't survive reconfiguration */ + p->pool = rp_new(proto_pool, p->proto->name); + + if (graceful_restart_state == GRS_INIT) + p->gr_recovery = 1; } @@ -263,22 +738,24 @@ proto_free_ahooks(struct proto *p) void * proto_config_new(struct protocol *pr, int class) { - struct proto_config *c = cfg_allocz(pr->config_size); + struct proto_config *cf = cfg_allocz(pr->config_size); if (class == SYM_PROTO) - add_tail(&new_config->protos, &c->n); - c->global = new_config; - c->protocol = pr; - c->name = pr->name; - c->preference = pr->preference; - c->class = class; - c->out_filter = FILTER_REJECT; - c->table = c->global->master_rtc; - c->debug = new_config->proto_default_debug; - c->mrtdump = new_config->proto_default_mrtdump; - return c; + add_tail(&new_config->protos, &cf->n); + + cf->global = new_config; + cf->protocol = pr; + cf->name = pr->name; + cf->class = class; + cf->debug = new_config->proto_default_debug; + cf->mrtdump = new_config->proto_default_mrtdump; + + init_list(&cf->channels); + + return cf; } + /** * proto_copy_config - copy a protocol configuration * @dest: destination protocol configuration @@ -293,6 +770,7 @@ proto_config_new(struct protocol *pr, int class) void proto_copy_config(struct proto_config *dest, struct proto_config *src) { + struct channel_config *cc; node old_node; int old_class; char *old_name; @@ -305,7 +783,7 @@ proto_copy_config(struct proto_config *dest, struct proto_config *src) DBG("Copying configuration from %s to %s\n", src->name, dest->name); - /* + /* * Copy struct proto_config here. Keep original node, class and name. * protocol-specific config copy is handled by protocol copy_config() hook */ @@ -314,12 +792,17 @@ proto_copy_config(struct proto_config *dest, struct proto_config *src) old_class = dest->class; old_name = dest->name; - memcpy(dest, src, sizeof(struct proto_config)); + memcpy(dest, src, src->protocol->config_size); dest->n = old_node; dest->class = old_class; dest->name = old_name; + init_list(&dest->channels); + + WALK_LIST(cc, src->channels) + channel_copy_config(cc, dest); + /* FIXME: allow for undefined copy_config */ dest->protocol->copy_config(dest, src); } @@ -339,67 +822,15 @@ protos_preconfig(struct config *c) init_list(&c->protos); DBG("Protocol preconfig:"); WALK_LIST(p, protocol_list) - { - DBG(" %s", p->name); - p->name_counter = 0; - if (p->preconfig) - p->preconfig(p, c); - } - DBG("\n"); -} - -/** - * protos_postconfig - post-configuration processing - * @c: new configuration - * - * This function calls the postconfig() hooks of all protocol - * instances specified in configuration @c. The hooks are not - * called for protocol templates. - */ -void -protos_postconfig(struct config *c) -{ - struct proto_config *x; - struct protocol *p; - - DBG("Protocol postconfig:"); - WALK_LIST(x, c->protos) - { - DBG(" %s", x->name); - - p = x->protocol; - if (p->postconfig) - p->postconfig(x); - } + { + DBG(" %s", p->name); + p->name_counter = 0; + if (p->preconfig) + p->preconfig(p, c); + } DBG("\n"); } -extern struct protocol proto_unix_iface; - -static struct proto * -proto_init(struct proto_config *c) -{ - struct protocol *p = c->protocol; - struct proto *q = p->init(c); - - q->proto_state = PS_DOWN; - q->core_state = FS_HUNGRY; - q->export_state = ES_DOWN; - q->last_state_change = now; - q->vrf = c->vrf; - - add_tail(&initial_proto_list, &q->n); - - if (p == &proto_unix_iface) - initial_device_proto = q; - - add_tail(&proto_list, &q->glob_node); - PD(q, "Initializing%s", q->disabled ? " [disabled]" : ""); - return q; -} - -int proto_reconfig_type; /* Hack to propagate type info to pipe reconfigure hook */ - static int proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config *nc, int type) { @@ -409,75 +840,23 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config /* If there is a too big change in core attributes, ... */ if ((nc->protocol != oc->protocol) || + (nc->net_type != oc->net_type) || (nc->disabled != p->disabled) || - (nc->vrf != oc->vrf) || - (nc->table->table != oc->table->table)) + (nc->vrf != oc->vrf)) return 0; + p->name = nc->name; p->debug = nc->debug; p->mrtdump = nc->mrtdump; - proto_reconfig_type = type; + reconfigure_type = type; /* Execute protocol specific reconfigure hook */ - if (! (p->proto->reconfigure && p->proto->reconfigure(p, nc))) + if (!p->proto->reconfigure || !p->proto->reconfigure(p, nc)) return 0; DBG("\t%s: same\n", oc->name); PD(p, "Reconfigured"); p->cf = nc; - p->name = nc->name; - p->preference = nc->preference; - - - /* Multitable protocols handle rest in their reconfigure hooks */ - if (p->proto->multitable) - return 1; - - /* Update filters and limits in the main announce hook - Note that this also resets limit state */ - if (p->main_ahook) - { - struct announce_hook *ah = p->main_ahook; - ah->in_filter = nc->in_filter; - ah->out_filter = nc->out_filter; - ah->rx_limit = nc->rx_limit; - ah->in_limit = nc->in_limit; - ah->out_limit = nc->out_limit; - ah->in_keep_filtered = nc->in_keep_filtered; - proto_verify_limits(ah); - } - - /* Update routes when filters changed. If the protocol in not UP, - it has no routes and we can ignore such changes */ - if ((p->proto_state != PS_UP) || (type == RECONFIG_SOFT)) - return 1; - - int import_changed = ! filter_same(nc->in_filter, oc->in_filter); - int export_changed = ! filter_same(nc->out_filter, oc->out_filter); - - /* We treat a change in preferences by reimporting routes */ - if (nc->preference != oc->preference) - import_changed = 1; - - if (import_changed || export_changed) - log(L_INFO "Reloading protocol %s", p->name); - - /* If import filter changed, call reload hook */ - if (import_changed && ! (p->reload_routes && p->reload_routes(p))) - { - /* Now, the protocol is reconfigured. But route reload failed - and we have to do regular protocol restart. */ - log(L_INFO "Restarting protocol %s", p->name); - p->disabled = 1; - p->down_code = PDC_CF_RESTART; - proto_rethink_goal(p); - p->disabled = 0; - proto_rethink_goal(p); - return 1; - } - - if (export_changed) - proto_request_feeding(p); return 1; } @@ -514,85 +893,94 @@ void protos_commit(struct config *new, struct config *old, int force_reconfig, int type) { struct proto_config *oc, *nc; - struct proto *p, *n; struct symbol *sym; + struct proto *p; + node *n; + DBG("protos_commit:\n"); if (old) + { + WALK_LIST(oc, old->protos) { - WALK_LIST(oc, old->protos) - { - p = oc->proto; - sym = cf_find_symbol(new, oc->name); - if (sym && sym->class == SYM_PROTO && !new->shutdown) - { - /* Found match, let's check if we can smoothly switch to new configuration */ - /* No need to check description */ - nc = sym->def; - nc->proto = p; - - /* We will try to reconfigure protocol p */ - if (! force_reconfig && proto_reconfigure(p, oc, nc, type)) - continue; - - /* Unsuccessful, we will restart it */ - if (!p->disabled && !nc->disabled) - log(L_INFO "Restarting protocol %s", p->name); - else if (p->disabled && !nc->disabled) - log(L_INFO "Enabling protocol %s", p->name); - else if (!p->disabled && nc->disabled) - log(L_INFO "Disabling protocol %s", p->name); - - p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART; - p->cf_new = nc; - } - else if (!new->shutdown) - { - log(L_INFO "Removing protocol %s", p->name); - p->down_code = PDC_CF_REMOVE; - p->cf_new = NULL; - } - else /* global shutdown */ - { - p->down_code = PDC_CMD_SHUTDOWN; - p->cf_new = NULL; - } - - p->reconfiguring = 1; - config_add_obstacle(old); - proto_rethink_goal(p); - } + p = oc->proto; + sym = cf_find_symbol(new, oc->name); + if (sym && sym->class == SYM_PROTO && !new->shutdown) + { + /* Found match, let's check if we can smoothly switch to new configuration */ + /* No need to check description */ + nc = sym->def; + nc->proto = p; + + /* We will try to reconfigure protocol p */ + if (! force_reconfig && proto_reconfigure(p, oc, nc, type)) + continue; + + /* Unsuccessful, we will restart it */ + if (!p->disabled && !nc->disabled) + log(L_INFO "Restarting protocol %s", p->name); + else if (p->disabled && !nc->disabled) + log(L_INFO "Enabling protocol %s", p->name); + else if (!p->disabled && nc->disabled) + log(L_INFO "Disabling protocol %s", p->name); + + p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART; + p->cf_new = nc; + } + else if (!new->shutdown) + { + log(L_INFO "Removing protocol %s", p->name); + p->down_code = PDC_CF_REMOVE; + p->cf_new = NULL; + } + else /* global shutdown */ + { + p->down_code = PDC_CMD_SHUTDOWN; + p->cf_new = NULL; + } + + p->reconfiguring = 1; + config_add_obstacle(old); + proto_rethink_goal(p); } + } + struct proto *first_dev_proto = NULL; + + n = NODE &(proto_list.head); WALK_LIST(nc, new->protos) if (!nc->proto) - { - if (old) /* Not a first-time configuration */ - log(L_INFO "Adding protocol %s", nc->name); - proto_init(nc); - } - DBG("\tdone\n"); + { + /* Not a first-time configuration */ + if (old) + log(L_INFO "Adding protocol %s", nc->name); + + p = proto_init(nc, n); + n = NODE p; + + if (p->proto == &proto_unix_iface) + first_dev_proto = p; + } + else + n = NODE nc->proto; DBG("Protocol start\n"); /* Start device protocol first */ - if (initial_device_proto) - { - proto_rethink_goal(initial_device_proto); - initial_device_proto = NULL; - } + if (first_dev_proto) + proto_rethink_goal(first_dev_proto); /* Determine router ID for the first time - it has to be here and not in global_commit() because it is postponed after start of device protocol */ if (!config->router_id) - { - config->router_id = if_choose_router_id(config->router_id_from, 0); - if (!config->router_id) - die("Cannot determine router ID, please configure it manually"); - } + { + config->router_id = if_choose_router_id(config->router_id_from, 0); + if (!config->router_id) + die("Cannot determine router ID, please configure it manually"); + } - /* Start all other protocols */ - WALK_LIST_DELSAFE(p, n, initial_proto_list) + /* Start all new protocols */ + WALK_LIST_DELSAFE(p, n, proto_list) proto_rethink_goal(p); } @@ -602,20 +990,22 @@ proto_rethink_goal(struct proto *p) struct protocol *q; byte goal; - if (p->reconfiguring && p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN) - { - struct proto_config *nc = p->cf_new; - DBG("%s has shut down for reconfiguration\n", p->name); - p->cf->proto = NULL; - config_del_obstacle(p->cf->global); - rem_node(&p->n); - rem_node(&p->glob_node); - mb_free(p->message); - mb_free(p); - if (!nc) - return; - p = proto_init(nc); - } + if (p->reconfiguring && !p->active) + { + struct proto_config *nc = p->cf_new; + node *n = p->n.prev; + DBG("%s has shut down for reconfiguration\n", p->name); + p->cf->proto = NULL; + config_del_obstacle(p->cf->global); + proto_remove_channels(p); + rem_node(&p->n); + rfree(p->event); + mb_free(p->message); + mb_free(p); + if (!nc) + return; + p = proto_init(nc, n); + } /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) @@ -624,25 +1014,27 @@ proto_rethink_goal(struct proto *p) goal = PS_UP; q = p->proto; - if (goal == PS_UP) /* Going up */ + if (goal == PS_UP) + { + if (!p->active) { - if (p->proto_state == PS_DOWN && p->core_state == FS_HUNGRY) - { - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - proto_init_instance(p); - proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); - } + /* Going up */ + DBG("Kicking %s up\n", p->name); + PD(p, "Starting"); + proto_start(p); + proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); } - else /* Going down */ + } + else + { + if (p->proto_state == PS_START || p->proto_state == PS_UP) { - if (p->proto_state == PS_START || p->proto_state == PS_UP) - { - DBG("Kicking %s down\n", p->name); - PD(p, "Shutting down"); - proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); - } + /* Going down */ + DBG("Kicking %s down\n", p->name); + PD(p, "Shutting down"); + proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); } + } } @@ -664,20 +1056,21 @@ proto_rethink_goal(struct proto *p) * When graceful restart recovery need is detected during initialization, then * enabled protocols are marked with @gr_recovery flag before start. Such * protocols then decide how to proceed with graceful restart, participation is - * voluntary. Protocols could lock the recovery by proto_graceful_restart_lock() - * (stored in @gr_lock flag), which means that they want to postpone the end of - * the recovery until they converge and then unlock it. They also could set - * @gr_wait before advancing to %PS_UP, which means that the core should defer - * route export to that protocol until the end of the recovery. This should be - * done by protocols that expect their neigbors to keep the proper routes - * (kernel table, BGP sessions with BGP graceful restart capability). + * voluntary. Protocols could lock the recovery for each channel by function + * channel_graceful_restart_lock() (state stored in @gr_lock flag), which means + * that they want to postpone the end of the recovery until they converge and + * then unlock it. They also could set @gr_wait before advancing to %PS_UP, + * which means that the core should defer route export to that channel until + * the end of the recovery. This should be done by protocols that expect their + * neigbors to keep the proper routes (kernel table, BGP sessions with BGP + * graceful restart capability). * * The graceful restart recovery is finished when either all graceful restart * locks are unlocked or when graceful restart wait timer fires. * */ -static void graceful_restart_done(struct timer *t); +static void graceful_restart_done(timer *t); /** * graceful_restart_recovery - request initial graceful restart recovery @@ -708,15 +1101,14 @@ graceful_restart_init(void) log(L_INFO "Graceful restart started"); if (!graceful_restart_locks) - { - graceful_restart_done(NULL); - return; - } + { + graceful_restart_done(NULL); + return; + } graceful_restart_state = GRS_ACTIVE; - gr_wait_timer = tm_new(proto_pool); - gr_wait_timer->hook = graceful_restart_done; - tm_start(gr_wait_timer, config->gr_wait); + gr_wait_timer = tm_new_init(proto_pool, graceful_restart_done, NULL, 0, 0); + tm_start(gr_wait_timer, config->gr_wait S); } /** @@ -730,32 +1122,32 @@ graceful_restart_init(void) * restart wait timer fires (but there are still some locks). */ static void -graceful_restart_done(struct timer *t UNUSED) +graceful_restart_done(timer *t UNUSED) { - struct proto *p; - node *n; - log(L_INFO "Graceful restart done"); graceful_restart_state = GRS_DONE; - WALK_LIST2(p, n, proto_list, glob_node) - { - if (!p->gr_recovery) - continue; + struct proto *p; + WALK_LIST(p, proto_list) + { + if (!p->gr_recovery) + continue; + struct channel *c; + WALK_LIST(c, p->channels) + { /* Resume postponed export of routes */ - if ((p->proto_state == PS_UP) && p->gr_wait) - { - proto_want_export_up(p); - proto_log_state_change(p); - } + if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + channel_start_export(c); /* Cleanup */ - p->gr_recovery = 0; - p->gr_wait = 0; - p->gr_lock = 0; + c->gr_wait = 0; + c->gr_lock = 0; } + p->gr_recovery = 0; + } + graceful_restart_locks = 0; } @@ -766,17 +1158,17 @@ graceful_restart_show_status(void) return; cli_msg(-24, "Graceful restart recovery in progress"); - cli_msg(-24, " Waiting for %d protocols to recover", graceful_restart_locks); - cli_msg(-24, " Wait timer is %d/%d", tm_remains(gr_wait_timer), config->gr_wait); + cli_msg(-24, " Waiting for %d channels to recover", graceful_restart_locks); + cli_msg(-24, " Wait timer is %t/%u", tm_remains(gr_wait_timer), config->gr_wait); } /** - * proto_graceful_restart_lock - lock graceful restart by protocol - * @p: protocol instance + * channel_graceful_restart_lock - lock graceful restart by channel + * @p: channel instance * * This function allows a protocol to postpone the end of graceful restart * recovery until it converges. The lock is removed when the protocol calls - * proto_graceful_restart_unlock() or when the protocol is stopped. + * channel_graceful_restart_unlock() or when the channel is closed. * * The function have to be called during the initial phase of graceful restart * recovery and only for protocols that are part of graceful restart (i.e. their @@ -784,32 +1176,32 @@ graceful_restart_show_status(void) * hooks. */ void -proto_graceful_restart_lock(struct proto *p) +channel_graceful_restart_lock(struct channel *c) { ASSERT(graceful_restart_state == GRS_INIT); - ASSERT(p->gr_recovery); + ASSERT(c->proto->gr_recovery); - if (p->gr_lock) + if (c->gr_lock) return; - p->gr_lock = 1; + c->gr_lock = 1; graceful_restart_locks++; } /** - * proto_graceful_restart_unlock - unlock graceful restart by protocol - * @p: protocol instance + * channel_graceful_restart_unlock - unlock graceful restart by channel + * @p: channel instance * - * This function unlocks a lock from proto_graceful_restart_lock(). It is also + * This function unlocks a lock from channel_graceful_restart_lock(). It is also * automatically called when the lock holding protocol went down. */ void -proto_graceful_restart_unlock(struct proto *p) +channel_graceful_restart_unlock(struct channel *c) { - if (!p->gr_lock) + if (!c->gr_lock) return; - p->gr_lock = 0; + c->gr_lock = 0; graceful_restart_locks--; if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks) @@ -830,34 +1222,26 @@ proto_graceful_restart_unlock(struct proto *p) void protos_dump_all(void) { - struct proto *p; - struct announce_hook *a; - debug("Protocols:\n"); - WALK_LIST(p, active_proto_list) + struct proto *p; + WALK_LIST(p, proto_list) + { + debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); + + struct channel *c; + WALK_LIST(c, p->channels) { - debug(" protocol %s state %s/%s\n", p->name, - p_states[p->proto_state], c_states[p->core_state]); - for (a = p->ahooks; a; a = a->next) - { - debug("\tTABLE %s\n", a->table->name); - if (a->in_filter) - debug("\tInput filter: %s\n", filter_name(a->in_filter)); - if (a->out_filter != FILTER_REJECT) - debug("\tOutput filter: %s\n", filter_name(a->out_filter)); - } - if (p->disabled) - debug("\tDISABLED\n"); - else if (p->proto->dump) - p->proto->dump(p); + debug("\tTABLE %s\n", c->table->name); + if (c->in_filter) + debug("\tInput filter: %s\n", filter_name(c->in_filter)); + if (c->out_filter) + debug("\tOutput filter: %s\n", filter_name(c->out_filter)); } - WALK_LIST(p, inactive_proto_list) - debug(" inactive %s: state %s/%s\n", p->name, p_states[p->proto_state], c_states[p->core_state]); - WALK_LIST(p, initial_proto_list) - debug(" initial %s\n", p->name); - WALK_LIST(p, flush_proto_list) - debug(" flushing %s\n", p->name); + + if (p->proto->dump && (p->proto_state != PS_DOWN)) + p->proto->dump(p); + } } /** @@ -894,12 +1278,9 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&protocol_list); init_list(&proto_list); - init_list(&active_proto_list); - init_list(&inactive_proto_list); - init_list(&initial_proto_list); - init_list(&flush_proto_list); + init_list(&protocol_list); + proto_build(&proto_device); #ifdef CONFIG_RADV proto_build(&proto_radv); @@ -926,160 +1307,37 @@ protos_build(void) #ifdef CONFIG_BABEL proto_build(&proto_babel); #endif +#ifdef CONFIG_RPKI + proto_build(&proto_rpki); +#endif proto_pool = rp_new(&root_pool, "Protocols"); - proto_flush_event = ev_new(proto_pool); - proto_flush_event->hook = proto_flush_loop; proto_shutdown_timer = tm_new(proto_pool); proto_shutdown_timer->hook = proto_shutdown_loop; } -static void -proto_feed_more(void *P) -{ - struct proto *p = P; - - if (p->export_state != ES_FEEDING) - return; - - DBG("Feeding protocol %s continued\n", p->name); - if (rt_feed_baby(p)) - { - DBG("Feeding protocol %s finished\n", p->name); - p->export_state = ES_READY; - proto_log_state_change(p); - - if (p->feed_end) - p->feed_end(p); - } - else - { - p->attn->hook = proto_feed_more; - ev_schedule(p->attn); /* Will continue later... */ - } -} - -static void -proto_feed_initial(void *P) -{ - struct proto *p = P; - - if (p->export_state != ES_FEEDING) - return; - - DBG("Feeding protocol %s\n", p->name); - - if_feed_baby(p); - proto_feed_more(P); -} - -static void -proto_schedule_feed(struct proto *p, int initial) -{ - DBG("%s: Scheduling meal\n", p->name); - - p->export_state = ES_FEEDING; - p->refeeding = !initial; - - p->attn->hook = initial ? proto_feed_initial : proto_feed_more; - ev_schedule(p->attn); - - if (p->feed_begin) - p->feed_begin(p, initial); -} - -/* - * Flushing loop is responsible for flushing routes and protocols - * after they went down. It runs in proto_flush_event. At the start of - * one round, protocols waiting to flush are marked in - * proto_schedule_flush_loop(). At the end of the round (when routing - * table flush is complete), marked protocols are flushed and a next - * round may start. - */ - -static int flush_loop_state; /* 1 -> running */ - -static void -proto_schedule_flush_loop(void) -{ - struct proto *p; - struct announce_hook *h; - - if (flush_loop_state) - return; - flush_loop_state = 1; - - WALK_LIST(p, flush_proto_list) - { - p->flushing = 1; - for (h=p->ahooks; h; h=h->next) - rt_mark_for_prune(h->table); - } - - ev_schedule(proto_flush_event); -} - -static void -proto_flush_loop(void *unused UNUSED) -{ - struct proto *p; - - if (! rt_prune_loop()) - { - /* Rtable pruning is not finished */ - ev_schedule(proto_flush_event); - return; - } - - rt_prune_sources(); - - again: - WALK_LIST(p, flush_proto_list) - if (p->flushing) - { - /* This will flush interfaces in the same manner - like rt_prune_all() flushes routes */ - if (p->proto == &proto_unix_iface) - if_flush_ifaces(p); - - DBG("Flushing protocol %s\n", p->name); - p->flushing = 0; - p->core_state = FS_HUNGRY; - proto_relink(p); - proto_log_state_change(p); - if (p->proto_state == PS_DOWN) - proto_fell_down(p); - goto again; - } - - /* This round finished, perhaps there will be another one */ - flush_loop_state = 0; - if (!EMPTY_LIST(flush_proto_list)) - proto_schedule_flush_loop(); -} - /* Temporary hack to propagate restart to BGP */ int proto_restart; static void -proto_shutdown_loop(struct timer *t UNUSED) +proto_shutdown_loop(timer *t UNUSED) { struct proto *p, *p_next; - WALK_LIST_DELSAFE(p, p_next, active_proto_list) + WALK_LIST_DELSAFE(p, p_next, proto_list) if (p->down_sched) - { - proto_restart = (p->down_sched == PDS_RESTART); + { + proto_restart = (p->down_sched == PDS_RESTART); - p->disabled = 1; + p->disabled = 1; + proto_rethink_goal(p); + if (proto_restart) + { + p->disabled = 0; proto_rethink_goal(p); - if (proto_restart) - { - p->disabled = 0; - proto_rethink_goal(p); - } } + } } static inline void @@ -1094,7 +1352,7 @@ proto_schedule_down(struct proto *p, byte restart, byte code) p->down_sched = restart ? PDS_RESTART : PDS_DISABLE; p->down_code = code; - tm_start_max(proto_shutdown_timer, restart ? 2 : 0); + tm_start_max(proto_shutdown_timer, restart ? 250 MS : 0); } /** @@ -1131,50 +1389,8 @@ proto_set_message(struct proto *p, char *msg, int len) } -/** - * proto_request_feeding - request feeding routes to the protocol - * @p: given protocol - * - * Sometimes it is needed to send again all routes to the - * protocol. This is called feeding and can be requested by this - * function. This would cause protocol export state transition - * to ES_FEEDING (during feeding) and when completed, it will - * switch back to ES_READY. This function can be called even - * when feeding is already running, in that case it is restarted. - */ -void -proto_request_feeding(struct proto *p) -{ - ASSERT(p->proto_state == PS_UP); - - /* Do nothing if we are still waiting for feeding */ - if (p->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (p->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (p->attn->hook == proto_feed_initial) - return; - - rt_feed_baby_abort(p); - } - - /* FIXME: This should be changed for better support of multitable protos */ - struct announce_hook *ah; - for (ah = p->ahooks; ah; ah = ah->next) - proto_reset_limit(ah->out_limit); - - /* Hack: reset exp_routes during refeed, and do not decrease it later */ - p->stats.exp_routes = 0; - - proto_schedule_feed(p, 0); - proto_log_state_change(p); -} - static const char * -proto_limit_name(struct proto_limit *l) +channel_limit_name(struct channel_limit *l) { const char *actions[] = { [PLA_WARN] = "warn", @@ -1187,22 +1403,22 @@ proto_limit_name(struct proto_limit *l) } /** - * proto_notify_limit: notify about limit hit and take appropriate action - * @ah: announce hook + * channel_notify_limit: notify about limit hit and take appropriate action + * @c: channel * @l: limit being hit * @dir: limit direction (PLD_*) - * @rt_count: the number of routes + * @rt_count: the number of routes * * The function is called by the route processing core when limit @l * is breached. It activates the limit and tooks appropriate action * according to @l->action. */ void -proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count) +channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = ah->proto; + struct proto *p = c->proto; if (l->state == PLS_BLOCKED) return; @@ -1210,148 +1426,112 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 /* For warning action, we want the log message every time we hit the limit */ if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, proto_limit_name(l)); + p->name, dir_name[dir], l->limit, channel_limit_name(l)); switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; - - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; - - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + { + case PLA_WARN: + l->state = PLS_ACTIVE; + break; + + case PLA_BLOCK: + l->state = PLS_BLOCKED; + break; + + case PLA_RESTART: + case PLA_DISABLE: + l->state = PLS_BLOCKED; + if (p->proto_state == PS_UP) + proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); + break; + } } -void -proto_verify_limits(struct announce_hook *ah) +static void +channel_verify_limits(struct channel *c) { - struct proto_limit *l; - struct proto_stats *stats = ah->stats; - u32 all_routes = stats->imp_routes + stats->filt_routes; + struct channel_limit *l; + u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; - l = ah->rx_limit; - if (l && (all_routes > l->limit)) - proto_notify_limit(ah, l, PLD_RX, all_routes); + l = &c->rx_limit; + if (l->action && (all_routes > l->limit)) + channel_notify_limit(c, l, PLD_RX, all_routes); - l = ah->in_limit; - if (l && (stats->imp_routes > l->limit)) - proto_notify_limit(ah, l, PLD_IN, stats->imp_routes); + l = &c->in_limit; + if (l->action && (c->stats.imp_routes > l->limit)) + channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); - l = ah->out_limit; - if (l && (stats->exp_routes > l->limit)) - proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes); + l = &c->out_limit; + if (l->action && (c->stats.exp_routes > l->limit)) + channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); } - -static void -proto_want_core_up(struct proto *p) +static inline void +channel_reset_limit(struct channel_limit *l) { - ASSERT(p->core_state == FS_HUNGRY); - - if (!p->proto->multitable) - { - p->main_source = rt_get_source(p, 0); - rt_lock_source(p->main_source); - - /* Connect protocol to routing table */ - p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats); - p->main_ahook->in_filter = p->cf->in_filter; - p->main_ahook->out_filter = p->cf->out_filter; - p->main_ahook->rx_limit = p->cf->rx_limit; - p->main_ahook->in_limit = p->cf->in_limit; - p->main_ahook->out_limit = p->cf->out_limit; - p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered; - - proto_reset_limit(p->main_ahook->rx_limit); - proto_reset_limit(p->main_ahook->in_limit); - proto_reset_limit(p->main_ahook->out_limit); - } - - p->core_state = FS_HAPPY; - proto_relink(p); + if (l->action) + l->state = PLS_INITIAL; } -static void -proto_want_export_up(struct proto *p) +static inline void +proto_do_start(struct proto *p) { - ASSERT(p->core_state == FS_HAPPY); - ASSERT(p->export_state == ES_DOWN); - - proto_link_ahooks(p); - proto_schedule_feed(p, 1); /* Sets ES_FEEDING */ + p->active = 1; + p->do_start = 1; + ev_schedule(p->event); } static void -proto_want_export_down(struct proto *p) +proto_do_up(struct proto *p) { - ASSERT(p->export_state != ES_DOWN); - - /* Need to abort feeding */ - if (p->export_state == ES_FEEDING) - rt_feed_baby_abort(p); + if (!p->main_source) + { + p->main_source = rt_get_source(p, 0); + rt_lock_source(p->main_source); + } - p->export_state = ES_DOWN; - p->stats.exp_routes = 0; - proto_unlink_ahooks(p); + proto_start_channels(p); } -static void -proto_want_core_down(struct proto *p) +static inline void +proto_do_pause(struct proto *p) { - ASSERT(p->core_state == FS_HAPPY); - ASSERT(p->export_state == ES_DOWN); - - p->core_state = FS_FLUSHING; - proto_relink(p); - proto_schedule_flush_loop(); - - if (!p->proto->multitable) - { - rt_unlock_source(p->main_source); - p->main_source = NULL; - } + proto_pause_channels(p); } static void -proto_falling_down(struct proto *p) +proto_do_stop(struct proto *p) { + p->down_sched = 0; p->gr_recovery = 0; - p->gr_wait = 0; - if (p->gr_lock) - proto_graceful_restart_unlock(p); -} - -static void -proto_fell_down(struct proto *p) -{ - DBG("Protocol %s down\n", p->name); - - u32 all_routes = p->stats.imp_routes + p->stats.filt_routes; - if (all_routes != 0) - log(L_ERR "Protocol %s is down but still has %d routes", p->name, all_routes); - bzero(&p->stats, sizeof(struct proto_stats)); - proto_free_ahooks(p); + p->do_stop = 1; + ev_schedule(p->event); - if (! p->proto->multitable) - rt_unlock_table(p->table); + if (p->main_source) + { + rt_unlock_source(p->main_source); + p->main_source = NULL; + } - if (p->proto->cleanup) - p->proto->cleanup(p); + proto_stop_channels(p); +} - proto_rethink_goal(p); +static void +proto_do_down(struct proto *p) +{ + p->down_code = 0; + neigh_prune(); + rfree(p->pool); + p->pool = NULL; + + /* Shutdown is finished in the protocol event */ + if (proto_is_done(p)) + ev_schedule(p->event); } + /** * proto_notify_state - notify core about protocol state change * @p: protocol the state of which has changed @@ -1367,78 +1547,53 @@ proto_fell_down(struct proto *p) * it should be used at tail positions of protocol callbacks. */ void -proto_notify_state(struct proto *p, unsigned ps) +proto_notify_state(struct proto *p, uint state) { - unsigned ops = p->proto_state; - unsigned cs = p->core_state; - unsigned es = p->export_state; + uint ps = p->proto_state; - DBG("%s reporting state transition %s/%s -> */%s\n", p->name, c_states[cs], p_states[ops], p_states[ps]); - if (ops == ps) + DBG("%s reporting state transition %s -> %s\n", p->name, p_states[ps], p_states[state]); + if (state == ps) return; - p->proto_state = ps; - p->last_state_change = now; + p->proto_state = state; + p->last_state_change = current_time(); - switch (ps) - { - case PS_START: - ASSERT(ops == PS_DOWN || ops == PS_UP); - ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY); - - if (es != ES_DOWN) - proto_want_export_down(p); - break; - - case PS_UP: - ASSERT(ops == PS_DOWN || ops == PS_START); - ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY); - ASSERT(es == ES_DOWN); - - if (cs == FS_HUNGRY) - proto_want_core_up(p); - if (!p->gr_wait) - proto_want_export_up(p); - break; - - case PS_STOP: - ASSERT(ops == PS_START || ops == PS_UP); - - p->down_sched = 0; - - if (es != ES_DOWN) - proto_want_export_down(p); - if (cs == FS_HAPPY) - proto_want_core_down(p); - proto_falling_down(p); - break; - - case PS_DOWN: - p->down_code = 0; - p->down_sched = 0; - - if (es != ES_DOWN) - proto_want_export_down(p); - if (cs == FS_HAPPY) - proto_want_core_down(p); - if (ops != PS_STOP) - proto_falling_down(p); - - neigh_prune(); // FIXME convert neighbors to resource? - rfree(p->pool); - p->pool = NULL; - - if (cs == FS_HUNGRY) /* Shutdown finished */ - { - proto_log_state_change(p); - proto_fell_down(p); - return; /* The protocol might have ceased to exist */ - } - break; - - default: - bug("%s: Invalid state %d", p->name, ps); - } + switch (state) + { + case PS_START: + ASSERT(ps == PS_DOWN || ps == PS_UP); + + if (ps == PS_DOWN) + proto_do_start(p); + else + proto_do_pause(p); + break; + + case PS_UP: + ASSERT(ps == PS_DOWN || ps == PS_START); + + if (ps == PS_DOWN) + proto_do_start(p); + + proto_do_up(p); + break; + + case PS_STOP: + ASSERT(ps == PS_START || ps == PS_UP); + + proto_do_stop(p); + break; + + case PS_DOWN: + if (ps != PS_STOP) + proto_do_stop(p); + + proto_do_down(p); + break; + + default: + bug("%s: Invalid state %d", p->name, ps); + } proto_log_state_change(p); } @@ -1450,84 +1605,74 @@ proto_notify_state(struct proto *p, unsigned ps) static char * proto_state_name(struct proto *p) { -#define P(x,y) ((x << 4) | y) - switch (P(p->proto_state, p->core_state)) - { - case P(PS_DOWN, FS_HUNGRY): return "down"; - case P(PS_START, FS_HUNGRY): - case P(PS_START, FS_HAPPY): return "start"; - case P(PS_UP, FS_HAPPY): - switch (p->export_state) - { - case ES_DOWN: return "wait"; - case ES_FEEDING: return "feed"; - case ES_READY: return "up"; - default: return "???"; - } - case P(PS_STOP, FS_HUNGRY): - case P(PS_STOP, FS_FLUSHING): return "stop"; - case P(PS_DOWN, FS_FLUSHING): return "flush"; - default: return "???"; - } -#undef P + switch (p->proto_state) + { + case PS_DOWN: return p->active ? "flush" : "down"; + case PS_START: return "start"; + case PS_UP: return "up"; + case PS_STOP: return "stop"; + default: return "???"; + } } static void -proto_show_stats(struct proto_stats *s, int in_keep_filtered) +channel_show_stats(struct channel *c) { - if (in_keep_filtered) - cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + struct proto_stats *s = &c->stats; + + if (c->in_keep_filtered) + cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported", + s->imp_routes, s->filt_routes, s->exp_routes); else - cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); + cli_msg(-1006, " Routes: %u imported, %u exported", + s->imp_routes, s->exp_routes); - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", + cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", s->imp_updates_received, s->imp_updates_invalid, s->imp_updates_filtered, s->imp_updates_ignored, s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", s->imp_withdraws_received, s->imp_withdraws_invalid, s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", s->exp_updates_received, s->exp_updates_rejected, s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", + cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", s->exp_withdraws_received, s->exp_withdraws_accepted); } void -proto_show_limit(struct proto_limit *l, const char *dsc) +channel_show_limit(struct channel_limit *l, const char *dsc) { - if (!l) + if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", proto_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name(l)); } void -proto_show_basic_info(struct proto *p) +channel_show_info(struct channel *c) { - if (p->vrf) - cli_msg(-1006, " VRF: %s", p->vrf->name); - - cli_msg(-1006, " Preference: %d", p->preference); - cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter)); - cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter)); + cli_msg(-1006, " Channel %s", c->name); + cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Table: %s", c->table->name); + cli_msg(-1006, " Preference: %d", c->preference); + cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); + cli_msg(-1006, " Output filter: %s", filter_name(c->out_filter)); if (graceful_restart_state == GRS_ACTIVE) - cli_msg(-1006, " GR recovery: %s%s", - p->gr_lock ? " pending" : "", - p->gr_wait ? " waiting" : ""); + cli_msg(-1006, " GR recovery: %s%s", + c->gr_lock ? " pending" : "", + c->gr_wait ? " waiting" : ""); - proto_show_limit(p->cf->rx_limit, "Receive limit:"); - proto_show_limit(p->cf->in_limit, "Import limit:"); - proto_show_limit(p->cf->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:"); + channel_show_limit(&c->in_limit, "Import limit:"); + channel_show_limit(&c->out_limit, "Export limit:"); - if (p->proto_state != PS_DOWN) - proto_show_stats(&p->stats, p->cf->in_keep_filtered); + if (c->channel_state != CS_DOWN) + channel_show_stats(c); } void @@ -1537,47 +1682,53 @@ proto_cmd_show(struct proto *p, uintptr_t verbose, int cnt) /* First protocol - show header */ if (!cnt) - cli_msg(-2002, "name proto table state since info"); + cli_msg(-2002, "%-10s %-10s %-10s %-6s %-12s %s", + "Name", "Proto", "Table", "State", "Since", "Info"); buf[0] = 0; if (p->proto->get_status) p->proto->get_status(p, buf); - tm_format_datetime(tbuf, &config->tf_proto, p->last_state_change); - cli_msg(-1002, "%-8s %-8s %-8s %-5s %-10s %s", + tm_format_time(tbuf, &config->tf_proto, p->last_state_change); + cli_msg(-1002, "%-10s %-10s %-10s %-6s %-12s %s", p->name, p->proto->name, - p->table->name, + p->main_channel ? p->main_channel->table->name : "---", proto_state_name(p), tbuf, buf); + if (verbose) + { + if (p->cf->dsc) + cli_msg(-1006, " Description: %s", p->cf->dsc); + if (p->message) + cli_msg(-1006, " Message: %s", p->message); + if (p->cf->router_id) + cli_msg(-1006, " Router ID: %R", p->cf->router_id); + if (p->vrf) + cli_msg(-1006, " VRF: %s", p->vrf->name); + + if (p->proto->show_proto_info) + p->proto->show_proto_info(p); + else { - if (p->cf->dsc) - cli_msg(-1006, " Description: %s", p->cf->dsc); - - if (p->message) - cli_msg(-1006, " Message: %s", p->message); - - if (p->cf->router_id) - cli_msg(-1006, " Router ID: %R", p->cf->router_id); - - if (p->proto->show_proto_info) - p->proto->show_proto_info(p); - else - proto_show_basic_info(p); - - cli_msg(-1006, ""); + struct channel *c; + WALK_LIST(c, p->channels) + channel_show_info(c); } + + cli_msg(-1006, ""); + } } void proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (p->disabled) - { - cli_msg(-8, "%s: already disabled", p->name); - return; - } + { + cli_msg(-8, "%s: already disabled", p->name); + return; + } log(L_INFO "Disabling protocol %s", p->name); p->disabled = 1; @@ -1591,10 +1742,10 @@ void proto_cmd_enable(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (!p->disabled) - { - cli_msg(-10, "%s: already enabled", p->name); - return; - } + { + cli_msg(-10, "%s: already enabled", p->name); + return; + } log(L_INFO "Enabling protocol %s", p->name); p->disabled = 0; @@ -1607,10 +1758,10 @@ void proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) { if (p->disabled) - { - cli_msg(-8, "%s: already disabled", p->name); - return; - } + { + cli_msg(-8, "%s: already disabled", p->name); + return; + } log(L_INFO "Restarting protocol %s", p->name); p->disabled = 1; @@ -1625,41 +1776,38 @@ proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) void proto_cmd_reload(struct proto *p, uintptr_t dir, int cnt UNUSED) { + struct channel *c; + if (p->disabled) - { - cli_msg(-8, "%s: already disabled", p->name); - return; - } + { + cli_msg(-8, "%s: already disabled", p->name); + return; + } /* If the protocol in not UP, it has no routes */ if (p->proto_state != PS_UP) return; + /* All channels must support reload */ + if (dir != CMD_RELOAD_OUT) + WALK_LIST(c, p->channels) + if (!channel_reloadable(c)) + { + cli_msg(-8006, "%s: reload failed", p->name); + return; + } + log(L_INFO "Reloading protocol %s", p->name); /* re-importing routes */ if (dir != CMD_RELOAD_OUT) - { - if (! (p->reload_routes && p->reload_routes(p))) - { - cli_msg(-8006, "%s: reload failed", p->name); - return; - } - - /* - * Should be done before reload_routes() hook? - * Perhaps, but these hooks work asynchronously. - */ - if (!p->proto->multitable) - { - proto_reset_limit(p->main_ahook->rx_limit); - proto_reset_limit(p->main_ahook->in_limit); - } - } + WALK_LIST(c, p->channels) + channel_request_reload(c); /* re-exporting routes */ if (dir != CMD_RELOAD_IN) - proto_request_feeding(p); + WALK_LIST(c, p->channels) + channel_request_feeding(c); cli_msg(-15, "%s: reloading", p->name); } @@ -1680,10 +1828,10 @@ static void proto_apply_cmd_symbol(struct symbol *s, void (* cmd)(struct proto *, uintptr_t, int), uintptr_t arg) { if (s->class != SYM_PROTO) - { - cli_msg(9002, "%s is not a protocol", s->name); - return; - } + { + cli_msg(9002, "%s is not a protocol", s->name); + return; + } cmd(((struct proto_config *)s->def)->proto, arg, 0); cli_msg(0, ""); @@ -1692,16 +1840,12 @@ proto_apply_cmd_symbol(struct symbol *s, void (* cmd)(struct proto *, uintptr_t, static void proto_apply_cmd_patt(char *patt, void (* cmd)(struct proto *, uintptr_t, int), uintptr_t arg) { + struct proto *p; int cnt = 0; - node *nn; - WALK_LIST(nn, proto_list) - { - struct proto *p = SKIP_BACK(struct proto, glob_node, nn); - - if (!patt || patmatch(patt, p->name)) - cmd(p, arg, cnt++); - } + WALK_LIST(p, proto_list) + if (!patt || patmatch(patt, p->name)) + cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); @@ -1728,25 +1872,27 @@ proto_get_named(struct symbol *sym, struct protocol *pr) struct proto *p, *q; if (sym) - { - if (sym->class != SYM_PROTO) - cf_error("%s: Not a protocol", sym->name); - p = ((struct proto_config *)sym->def)->proto; - if (!p || p->proto != pr) - cf_error("%s: Not a %s protocol", sym->name, pr->name); - } + { + if (sym->class != SYM_PROTO) + cf_error("%s: Not a protocol", sym->name); + + p = ((struct proto_config *) sym->def)->proto; + if (!p || p->proto != pr) + cf_error("%s: Not a %s protocol", sym->name, pr->name); + } else - { - p = NULL; - WALK_LIST(q, active_proto_list) - if (q->proto == pr) - { - if (p) - cf_error("There are multiple %s protocols running", pr->name); - p = q; - } - if (!p) - cf_error("There is no %s protocol running", pr->name); - } + { + p = NULL; + WALK_LIST(q, proto_list) + if ((q->proto == pr) && (q->proto_state != PS_DOWN)) + { + if (p) + cf_error("There are multiple %s protocols running", pr->name); + p = q; + } + if (!p) + cf_error("There is no %s protocol running", pr->name); + } + return p; } diff --git a/nest/proto.sgml b/nest/proto.sgml index 1d4c31a7..53da78b8 100644 --- a/nest/proto.sgml +++ b/nest/proto.sgml @@ -69,23 +69,6 @@ its state by calling the <func/proto_notify_state/ function. <p>At any time, the core code can ask the protocol to shut itself down by calling its stop() hook. -<p>The <em/core state machine/ takes care of the core view of protocol state. -The states are traversed according to changes of the protocol state machine, but -sometimes the transitions are delayed if the core needs to finish some actions -(for example sending of new routes to the protocol) before proceeding to the -new state. There are the following core states: - -<descrip> - <tag/FS_HUNGRY/ The protocol is down, it doesn't have any routes and - doesn't want them. - <tag/FS_FEEDING/ The protocol has reached the <tt/PS_UP/ state, but - we are still busy sending the initial set of routes to it. - <tag/FS_HAPPY/ The protocol is up and has complete routing information. - <tag/FS_FLUSHING/ The protocol is shutting down (it's in either <tt/PS_STOP/ - or <tt/PS_DOWN/ state) and we're flushing all of its routes from the - routing tables. -</descrip> - <sect1>Functions of the protocol module <p>The protocol module provides the following functions: diff --git a/nest/protocol.h b/nest/protocol.h index 5aca9a4e..8a22d76b 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -11,7 +11,7 @@ #include "lib/lists.h" #include "lib/resource.h" -#include "lib/timer.h" +#include "lib/event.h" #include "nest/route.h" #include "conf/conf.h" @@ -23,13 +23,16 @@ struct neighbor; struct rta; struct network; struct proto_config; +struct channel_limit; +struct channel_config; struct config; struct proto; -struct event; +struct channel; struct ea_list; struct eattr; struct symbol; + /* * Routing Protocol */ @@ -40,9 +43,10 @@ struct protocol { char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ int attr_class; /* Attribute class known to this protocol */ - int multitable; /* Protocol handles all announce hooks itself */ uint preference; /* Default protocol preference */ - uint config_size; /* Size of protocol config */ + uint channel_mask; /* Mask of accepted channel types (NB_*) */ + uint proto_size; /* Size of protocol data structure */ + uint config_size; /* Size of protocol config data structure */ void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ @@ -63,7 +67,6 @@ struct protocol { void protos_build(void); void proto_build(struct protocol *); void protos_preconfig(struct config *); -void protos_postconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); void protos_dump_all(void); @@ -77,7 +80,7 @@ void protos_dump_all(void); extern struct protocol proto_device, proto_radv, proto_rip, proto_static, - proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel; + proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel, proto_rpki; /* * Routing Protocol Instance @@ -91,17 +94,13 @@ struct proto_config { char *name; char *dsc; int class; /* SYM_PROTO or SYM_TEMPLATE */ + u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ + u8 disabled; /* Protocol enabled/disabled by default */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ - unsigned preference, disabled; /* Generic parameters */ - int in_keep_filtered; /* Routes rejected in import filter are kept */ u32 router_id; /* Protocol specific router ID */ + + list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ - struct rtable_config *table; /* Table we're attached to */ - struct filter *in_filter, *out_filter; /* Attached filters */ - struct proto_limit *rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ - struct proto_limit *in_limit; /* Limit for importing routes from protocol */ - struct proto_limit *out_limit; /* Limit for exporting routes to protocol */ /* Check proto_reconfigure() and proto_copy_config() after changing struct proto_config */ @@ -113,7 +112,6 @@ struct proto_stats { /* Import - from protocol to core */ u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes that are preferred, sum over all routing tables */ u32 imp_updates_received; /* Number of route updates received */ u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ u32 imp_updates_filtered; /* Number of route updates rejected by filters */ @@ -135,37 +133,36 @@ struct proto_stats { }; struct proto { - node n; /* Node in *_proto_list */ - node glob_node; /* Node in global proto_list */ + node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ struct proto_config *cf; /* Configuration data */ struct proto_config *cf_new; /* Configuration we want to switch to after shutdown (NULL=delete) */ pool *pool; /* Pool containing local objects */ - struct event *attn; /* "Pay attention" event */ + event *event; /* Protocol event */ + + list channels; /* List of channels to rtables (struct channel) */ + struct channel *main_channel; /* Primary channel */ + struct rte_src *main_source; /* Primary route source */ + struct iface *vrf; /* Related VRF instance, NULL if global */ char *name; /* Name of this instance (== cf->name) */ u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ - unsigned preference; /* Default route preference */ - byte accept_ra_types; /* Which types of route announcements are accepted (RA_OPTIMAL or RA_ANY) */ + uint active_channels; /* Number of active channels */ + byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte proto_state; /* Protocol state machine (PS_*, see below) */ - byte core_state; /* Core state machine (FS_*, see below) */ - byte export_state; /* Route export state (ES_*, see below) */ + byte active; /* From PS_START to cleanup after PS_STOP */ + byte do_start; /* Start actions are scheduled */ + byte do_stop; /* Stop actions are scheduled */ byte reconfiguring; /* We're shutting down due to reconfiguration */ - byte refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ - byte flushing; /* Protocol is flushed in current flush loop round */ byte gr_recovery; /* Protocol should participate in graceful restart recovery */ - byte gr_lock; /* Graceful restart mechanism should wait for this proto */ - byte gr_wait; /* Route export to protocol is postponed until graceful restart */ byte down_sched; /* Shutdown is scheduled for later (PDS_*) */ byte down_code; /* Reason for shutdown (PDC_* codes) */ - byte merge_limit; /* Maximal number of nexthops for RA_MERGED */ u32 hash_key; /* Random key used for hashing of neighbors */ - bird_clock_t last_state_change; /* Time of last state transition */ + btime last_state_change; /* Time of last state transition */ char *last_state_name_announced; /* Last state name we've announced to the user */ char *message; /* State-change message, allocated from proto_pool */ - struct proto_stats stats; /* Current protocol statistics */ /* * General protocol hooks: @@ -180,23 +177,23 @@ struct proto { * It can construct a new rte, add private attributes and * decide whether the route shall be imported: 1=yes, -1=no, * 0=process it through the import filter set by the user. - * reload_routes Request protocol to reload all its routes to the core + * reload_routes Request channel to reload all its routes to the core * (using rte_update()). Returns: 0=reload cannot be done, * 1= reload is scheduled and will happen (asynchronously). - * feed_begin Notify protocol about beginning of route feeding. - * feed_end Notify protocol about finish of route feeding. + * feed_begin Notify channel about beginning of route feeding. + * feed_end Notify channel about finish of route feeding. */ void (*if_notify)(struct proto *, unsigned flags, struct iface *i); void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); - void (*rt_notify)(struct proto *, struct rtable *table, struct network *net, struct rte *new, struct rte *old, struct ea_list *attrs); + void (*rt_notify)(struct proto *, struct channel *, struct network *net, struct rte *new, struct rte *old, struct ea_list *attrs); void (*neigh_notify)(struct neighbor *neigh); struct ea_list *(*make_tmp_attrs)(struct rte *rt, struct linpool *pool); void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs); int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool); - int (*reload_routes)(struct proto *); - void (*feed_begin)(struct proto *, int initial); - void (*feed_end)(struct proto *); + void (*reload_routes)(struct channel *); + void (*feed_begin)(struct channel *, int initial); + void (*feed_end)(struct channel *); /* * Routing entry hooks (called only for routes belonging to this protocol): @@ -216,15 +213,6 @@ struct proto { void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); - struct iface *vrf; /* Related VRF instance, NULL if global */ - struct rtable *table; /* Our primary routing table */ - struct rte_src *main_source; /* Primary route source */ - struct announce_hook *main_ahook; /* Primary announcement hook */ - struct announce_hook *ahooks; /* Announcement hooks for this protocol */ - - struct fib_iterator *feed_iterator; /* Routing table iterator used during protocol feeding */ - struct announce_hook *feed_ahook; /* Announce hook we currently feed */ - /* Hic sunt protocol-specific data */ }; @@ -248,26 +236,21 @@ struct proto_spec { #define PDC_OUT_LIMIT_HIT 0x23 /* Route export limit reached */ -void *proto_new(struct proto_config *, unsigned size); +void *proto_new(struct proto_config *); void *proto_config_new(struct protocol *, int class); void proto_copy_config(struct proto_config *dest, struct proto_config *src); void proto_set_message(struct proto *p, char *msg, int len); -void proto_request_feeding(struct proto *p); - -static inline void -proto_copy_rest(struct proto_config *dest, struct proto_config *src, unsigned size) -{ memcpy(dest + 1, src + 1, size - sizeof(struct proto_config)); } void graceful_restart_recovery(void); void graceful_restart_init(void); void graceful_restart_show_status(void); -void proto_graceful_restart_lock(struct proto *p); -void proto_graceful_restart_unlock(struct proto *p); +void channel_graceful_restart_lock(struct channel *c); +void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void proto_show_limit(struct proto_limit *l, const char *dsc); -void proto_show_basic_info(struct proto *p); +void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_info(struct channel *c); void proto_cmd_show(struct proto *, uintptr_t, int); void proto_cmd_disable(struct proto *, uintptr_t, int); @@ -299,9 +282,10 @@ rte_make_tmp_attrs(struct rte *rt, struct linpool *pool) } /* Moved from route.h to avoid dependency conflicts */ -static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } +static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } -extern list active_proto_list; +extern pool *proto_pool; +extern list proto_list; /* * Each protocol instance runs two different state machines: @@ -353,7 +337,7 @@ void proto_notify_state(struct proto *p, unsigned state); * * HUNGRY ----> FEEDING * ^ | - * | V + * | V * FLUSHING <---- HAPPY * * States: HUNGRY Protocol either administratively down (i.e., @@ -377,16 +361,6 @@ void proto_notify_state(struct proto *p, unsigned state); * as a result of received ROUTE-REFRESH request). */ -#define FS_HUNGRY 0 -#define FS_FEEDING 1 /* obsolete */ -#define FS_HAPPY 2 -#define FS_FLUSHING 3 - - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - /* @@ -429,6 +403,7 @@ extern struct proto_config *cf_dev_proto; #define PLD_OUT 2 /* Export limit */ #define PLD_MAX 3 +#define PLA_NONE 0 /* No limit */ #define PLA_WARN 1 /* Issue log warning */ #define PLA_BLOCK 2 /* Block new routes */ #define PLA_RESTART 4 /* Force protocol restart */ @@ -438,42 +413,182 @@ extern struct proto_config *cf_dev_proto; #define PLS_ACTIVE 1 /* Limit was hit */ #define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ -struct proto_limit { +struct channel_limit { u32 limit; /* Maximum number of prefixes */ - byte action; /* Action to take (PLA_*) */ - byte state; /* State of limit (PLS_*) */ + u8 action; /* Action to take (PLA_*) */ + u8 state; /* State of limit (PLS_*) */ }; -void proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count); -void proto_verify_limits(struct announce_hook *ah); - -static inline void -proto_reset_limit(struct proto_limit *l) -{ - if (l) - l->state = PLS_INITIAL; -} +void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); /* - * Route Announcement Hook + * Channels */ -struct announce_hook { +struct channel_class { + uint channel_size; /* Size of channel data structure */ + uint config_size; /* Size of channel config data structure */ + + void (*init)(struct channel *, struct channel_config *); /* Create new instance */ + int (*reconfigure)(struct channel *, struct channel_config *); /* Try to reconfigure instance, returns success */ + int (*start)(struct channel *); /* Start the instance */ + void (*shutdown)(struct channel *); /* Stop the instance */ + void (*cleanup)(struct channel *); /* Channel finished flush */ + + void (*copy_config)(struct channel_config *, struct channel_config *); /* Copy config from given channel instance */ +#if 0 + XXXX; + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ + void (*postconfig)(struct proto_config *); /* After configuring each instance */ + + + void (*dump)(struct proto *); /* Debugging dump */ + void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ + + void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ + void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); /* Get route information (for `show route' command) */ + int (*get_attr)(struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ + void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ + +#endif +}; + +extern struct channel_class channel_bgp; + +struct channel_config { node n; - struct rtable *table; + const char *name; + const struct channel_class *channel; + + struct proto_config *parent; /* Where channel is defined (proto or template) */ + struct rtable_config *table; /* Table we're attached to */ + struct filter *in_filter, *out_filter; /* Attached filters */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol + (relevant when in_keep_filtered is active) */ + struct channel_limit in_limit; /* Limit for importing routes from protocol */ + struct channel_limit out_limit; /* Limit for exporting routes to protocol */ + + u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ + u8 ra_mode; /* Mode of received route advertisements (RA_*) */ + u16 preference; /* Default route preference */ + u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ + u8 in_keep_filtered; /* Routes rejected in import filter are kept */ +}; + +struct channel { + node n; /* Node in proto->channels */ + node table_node; /* Node in table->channels */ + + const char *name; /* Channel name (may be NULL) */ + const struct channel_class *channel; struct proto *proto; + + struct rtable *table; struct filter *in_filter; /* Input filter */ struct filter *out_filter; /* Output filter */ - struct proto_limit *rx_limit; /* Receive limit (for in_keep_filtered) */ - struct proto_limit *in_limit; /* Input limit */ - struct proto_limit *out_limit; /* Output limit */ - struct proto_stats *stats; /* Per-table protocol statistics */ - struct announce_hook *next; /* Next hook for the same protocol */ - int in_keep_filtered; /* Routes rejected in import filter are kept */ + struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct channel_limit in_limit; /* Input limit */ + struct channel_limit out_limit; /* Output limit */ + + struct event *feed_event; /* Event responsible for feeding */ + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct proto_stats stats; /* Per-channel protocol statistics */ + + u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ + u8 ra_mode; /* Mode of received route advertisements (RA_*) */ + u16 preference; /* Default route preference */ + u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ + u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 disabled; + u8 stale; /* Used in reconfiguration */ + + u8 channel_state; + u8 export_state; /* Route export state (ES_*, see below) */ + u8 feed_active; + u8 flush_active; + u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 reloadable; /* Hook reload_routes() is allowed on the channel */ + u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ + u8 gr_wait; /* Route export to channel is postponed until graceful restart */ + + btime last_state_change; /* Time of last state transition */ }; -struct announce_hook *proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats); -struct announce_hook *proto_find_announce_hook(struct proto *p, struct rtable *t); + +/* + * Channel states + * + * CS_DOWN - The initial and the final state of a channel. There is no route + * exchange between the protocol and the table. Channel is not counted as + * active. Channel keeps a ptr to the table, but do not lock the table and is + * not linked in the table. Generally, new closed channels are created in + * protocols' init() hooks. The protocol is expected to explicitly activate its + * channels (by calling channel_init() or channel_open()). + * + * CS_START - The channel as a connection between the protocol and the table is + * initialized (counted as active by the protocol, linked in the table and keeps + * the table locked), but there is no current route exchange. There still may be + * routes associated with the channel in the routing table if the channel falls + * to CS_START from CS_UP. Generally, channels are initialized in protocols' + * start() hooks when going to PS_START. + * + * CS_UP - The channel is initialized and the route exchange is allowed. Note + * that even in CS_UP state, route export may still be down (ES_DOWN) by the + * core decision (e.g. waiting for table convergence after graceful restart). + * I.e., the protocol decides to open the channel but the core decides to start + * route export. Route import (caused by rte_update() from the protocol) is not + * restricted by that and is on volition of the protocol. Generally, channels + * are opened in protocols' start() hooks when going to PS_UP. + * + * CS_FLUSHING - The transitional state between initialized channel and closed + * channel. The channel is still initialized, but no route exchange is allowed. + * Instead, the associated table is running flush loop to remove routes imported + * through the channel. After that, the channel changes state to CS_DOWN and + * is detached from the table (the table is unlocked and the channel is unlinked + * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * entered or left by the protocol. A protocol may request to close a channel + * (by calling channel_close()), which causes the channel to change state to + * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * automatically by the core when the protocol is going down. + * + * Allowed transitions: + * + * CS_DOWN -> CS_START / CS_UP + * CS_START -> CS_UP / CS_FLUSHING + * CS_UP -> CS_START / CS_FLUSHING + * CS_FLUSHING -> CS_DOWN (automatic) + */ + +#define CS_DOWN 0 +#define CS_START 1 +#define CS_UP 2 +#define CS_FLUSHING 3 + +#define ES_DOWN 0 +#define ES_FEEDING 1 +#define ES_READY 2 + + +struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); +static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) +{ struct channel_config *cc = HEAD(pc->channels); return NODE_VALID(cc) ? cc : NULL; } + +struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); +struct channel *proto_find_channel_by_name(struct proto *p, const char *n); +struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); +int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); + +void channel_set_state(struct channel *c, uint state); + +static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } +static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } + +void channel_request_feeding(struct channel *c); +void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); +void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); +int channel_reconfigure(struct channel *c, struct channel_config *cf); + #endif diff --git a/nest/route.h b/nest/route.h index 0834da45..79127519 100644 --- a/nest/route.h +++ b/nest/route.h @@ -11,7 +11,7 @@ #include "lib/lists.h" #include "lib/resource.h" -#include "lib/timer.h" +#include "lib/net.h" struct ea_list; struct protocol; @@ -36,11 +36,8 @@ struct cli; struct fib_node { struct fib_node *next; /* Next in hash chain */ struct fib_iterator *readers; /* List of readers of this node */ - byte pxlen; - byte flags; /* User-defined */ - byte x0, x1; /* User-defined */ - u32 uid; /* Unique ID based on hash */ - ip_addr prefix; /* In host order */ + byte flags; /* User-defined, will be removed */ + net_addr addr[0]; }; struct fib_iterator { /* See lib/slists.h for an explanation */ @@ -51,7 +48,7 @@ struct fib_iterator { /* See lib/slists.h for an explanation */ uint hash; }; -typedef void (*fib_init_func)(struct fib_node *); +typedef void (*fib_init_fn)(void *); struct fib { pool *fib_pool; /* Pool holding all our data */ @@ -59,16 +56,26 @@ struct fib { struct fib_node **hash_table; /* Node hash table */ uint hash_size; /* Number of hash table entries (a power of two) */ uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 16 - hash_log */ + uint hash_shift; /* 32 - hash_order */ + uint addr_type; /* Type of address data stored in fib (NET_*) */ + uint node_size; /* FIB node size, 0 for nonuniform */ + uint node_offset; /* Offset of fib_node struct inside of user data */ uint entries; /* Number of entries */ uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_func init; /* Constructor */ + fib_init_fn init; /* Constructor */ }; -void fib_init(struct fib *, pool *, unsigned node_size, unsigned hash_order, fib_init_func init); -void *fib_find(struct fib *, ip_addr *, int); /* Find or return NULL if doesn't exist */ -void *fib_get(struct fib *, ip_addr *, int); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, ip_addr, int); /* Longest-match routing lookup */ +static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) +{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } + +static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) +{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } + +void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); +void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ +void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ +void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ +void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ void fib_delete(struct fib *, void *); /* Remove fib entry */ void fib_free(struct fib *); /* Destroy the fib */ void fib_check(struct fib *); /* Consistency check for debugging */ @@ -79,34 +86,37 @@ void fit_put(struct fib_iterator *, struct fib_node *); void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -#define FIB_WALK(fib, z) do { \ - struct fib_node *z, **ff = (fib)->hash_table; \ - uint count = (fib)->hash_size; \ - while (count--) \ - for(z = *ff++; z; z=z->next) +#define FIB_WALK(fib, type, z) do { \ + struct fib_node *fn_, **ff_ = (fib)->hash_table; \ + uint count_ = (fib)->hash_size; \ + type *z; \ + while (count_--) \ + for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) #define FIB_WALK_END } while (0) #define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) -#define FIB_ITERATE_START(fib, it, z) do { \ - struct fib_node *z = fit_get(fib, it); \ - uint count = (fib)->hash_size; \ - uint hpos = (it)->hash; \ +#define FIB_ITERATE_START(fib, it, type, z) do { \ + struct fib_node *fn_ = fit_get(fib, it); \ + uint count_ = (fib)->hash_size; \ + uint hpos_ = (it)->hash; \ + type *z; \ for(;;) { \ - if (!z) \ - { \ - if (++hpos >= count) \ + if (!fn_) \ + { \ + if (++hpos_ >= count_) \ break; \ - z = (fib)->hash_table[hpos]; \ + fn_ = (fib)->hash_table[hpos_]; \ continue; \ - } + } \ + z = fib_node_to_user(fib, fn_); -#define FIB_ITERATE_END(z) z = z->next; } } while(0) +#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) -#define FIB_ITERATE_PUT(it, z) fit_put(it, z) +#define FIB_ITERATE_PUT(it) fit_put(it, fn_) -#define FIB_ITERATE_PUT_NEXT(it, fib, z) fit_put_next(fib, it, z, hpos) +#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) #define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) @@ -127,6 +137,7 @@ struct rtable_config { char *name; struct rtable *table; struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ int gc_max_ops; /* Maximum number of operations before GC is run */ int gc_min_time; /* Minimum time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ @@ -136,7 +147,8 @@ typedef struct rtable { node n; /* Node in list of all tables */ struct fib fib; char *name; /* Name of this table */ - list hooks; /* List of announcement hooks */ + list channels; /* List of attached channels (struct channel) */ + uint addr_type; /* Type of address data stored in table (NET_*) */ int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ struct hostcache *hostcache; @@ -146,9 +158,8 @@ typedef struct rtable { * obstacle from this routing table. */ struct event *rt_event; /* Routing table event */ + btime gc_time; /* Time of last GC */ int gc_counter; /* Number of operations since last GC */ - bird_clock_t gc_time; /* Time of last GC */ - byte gc_scheduled; /* GC is scheduled */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ @@ -156,13 +167,14 @@ typedef struct rtable { struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ } rtable; -#define RPS_NONE 0 -#define RPS_SCHEDULED 1 -#define RPS_RUNNING 2 +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 typedef struct network { - struct fib_node n; /* FIB flags reserved for kernel syncer */ struct rte *routes; /* Available routes for this network */ + struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; struct hostcache { @@ -187,20 +199,20 @@ struct hostentry { unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ struct rta *src; /* Source rta entry */ - ip_addr gw; /* Chosen next hop */ byte dest; /* Chosen route destination type (RTD_...) */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ u32 igp_metric; /* Chosen route IGP metric */ }; typedef struct rte { struct rte *next; net *net; /* Network this RTE belongs to */ - struct announce_hook *sender; /* Announce hook used to send the route to the routing table */ + struct channel *sender; /* Channel used to send the route to the routing table */ struct rta *attrs; /* Attributes of this route */ byte flags; /* Flags (REF_...) */ byte pflags; /* Protocol-specific flags */ word pref; /* Route preference */ - bird_clock_t lastmod; /* Last modified */ + btime lastmod; /* Last modified */ union { /* Protocol-dependent data (metrics etc.) */ #ifdef CONFIG_RIP struct { @@ -223,6 +235,7 @@ typedef struct rte { #endif #ifdef CONFIG_BABEL struct { + u16 seqno; /* Babel seqno */ u16 metric; /* Babel metric */ u64 router_id; /* Babel router id */ } babel; @@ -250,6 +263,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); /* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */ #define RA_ACCEPTED 2 /* Announcement of first accepted route */ #define RA_ANY 3 /* Announcement of any route change */ @@ -268,17 +282,22 @@ void rt_preconfig(struct config *); void rt_commit(struct config *new, struct config *old); void rt_lock_table(rtable *); void rt_unlock_table(rtable *); -void rt_setup(pool *, rtable *, char *, struct rtable_config *); -static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_find(&tab->fib, &addr, len); } -static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_get(&tab->fib, &addr, len); } +void rt_setup(pool *, rtable *, struct rtable_config *); +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +void *net_route(rtable *tab, const net_addr *n); +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); -void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src); +void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); /* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); -rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct announce_hook *ah); -void rt_refresh_end(rtable *t, struct announce_hook *ah); +int rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter); +rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); +void rt_refresh_begin(rtable *t, struct channel *c); +void rt_refresh_end(rtable *t, struct channel *c); +void rt_schedule_prune(rtable *t); void rte_dump(rte *); void rte_free(rte *); rte *rte_do_cow(rte *); @@ -286,35 +305,49 @@ static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r rte *rte_cow_rta(rte *r, linpool *lp); void rt_dump(rtable *); void rt_dump_all(void); -int rt_feed_baby(struct proto *p); -void rt_feed_baby_abort(struct proto *p); -int rt_prune_loop(void); -struct rtable_config *rt_new_table(struct symbol *s); +int rt_feed_channel(struct channel *c); +void rt_feed_channel_abort(struct channel *c); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); -static inline void -rt_mark_for_prune(rtable *tab) -{ - if (tab->prune_state == RPS_RUNNING) - fit_get(&tab->fib, &tab->prune_fit); +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; - tab->prune_state = RPS_SCHEDULED; -} +struct rt_show_data_rtable { + node n; + rtable *table; + struct channel *export_channel; +}; struct rt_show_data { - ip_addr prefix; - unsigned pxlen; - rtable *table; + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct fib_iterator fit; /* Iterator over networks in table */ + int verbose, tables_defined_by; struct filter *filter; - int verbose; - struct fib_iterator fit; struct proto *show_protocol; struct proto *export_protocol; - int export_mode, primary_only, filtered; + struct channel *export_channel; struct config *running_on_config; - int net_counter, rt_counter, show_counter; - int stats, show_for; + int export_mode, primary_only, filtered, stats, show_for; + + int table_open; /* Iteration (fit) is open */ + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; }; + void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ /* Value of export_mode in struct rt_show_data */ #define RSEM_NONE 0 /* Export mode not used */ @@ -330,14 +363,21 @@ void rt_show(struct rt_show_data *); * construction of BGP route attribute lists. */ -/* Multipath next-hop */ -struct mpnh { +/* Nexthop structure */ +struct nexthop { ip_addr gw; /* Next hop */ struct iface *iface; /* Outgoing interface */ - struct mpnh *next; + struct nexthop *next; + byte flags; byte weight; + byte labels_orig; /* Number of labels before hostentry was applied */ + byte labels; /* Number of all labels */ + u32 label[0]; }; +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + struct rte_src { struct rte_src *next; /* Hash chain */ struct proto *proto; /* Protocol the source is based on */ @@ -349,22 +389,18 @@ struct rte_src { typedef struct rta { struct rta *next, **pprev; /* Hash chain */ + u32 uc; /* Use count */ + u32 hash_key; /* Hash over important fields */ + struct ea_list *eattrs; /* Extended Attribute chain */ struct rte_src *src; /* Route source that created the route */ - unsigned uc; /* Use count */ - byte source; /* Route source (RTS_...) */ - byte scope; /* Route scope (SCOPE_... -- see ip.h) */ - byte cast; /* Casting type (RTC_...) */ - byte dest; /* Route destination type (RTD_...) */ - byte flags; /* Route flags (RTF_...), now unused */ - byte aflags; /* Attribute cache flags (RTAF_...) */ - u16 hash_key; /* Hash over important fields */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - ip_addr gw; /* Next hop */ - ip_addr from; /* Advertising router */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - struct iface *iface; /* Outgoing interface */ - struct mpnh *nexthops; /* Next-hops for multipath routes */ - struct ea_list *eattrs; /* Extended Attribute chain */ + ip_addr from; /* Advertising router */ + u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ + u8 source; /* Route source (RTS_...) */ + u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ + u8 dest; /* Route destination type (RTD_...) */ + u8 aflags; + struct nexthop nh; /* Next hop */ } rta; #define RTS_DUMMY 0 /* Dummy route to be removed soon */ @@ -381,19 +417,20 @@ typedef struct rta { #define RTS_BGP 11 /* BGP route */ #define RTS_PIPE 12 /* Inter-table wormhole */ #define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ + #define RTC_UNICAST 0 #define RTC_BROADCAST 1 #define RTC_MULTICAST 2 #define RTC_ANYCAST 3 /* IPv6 Anycast */ -#define RTD_ROUTER 0 /* Next hop is neighbor router */ -#define RTD_DEVICE 1 /* Points to device */ +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* Next hop is neighbor router */ #define RTD_BLACKHOLE 2 /* Silently drop packets */ #define RTD_UNREACHABLE 3 /* Reject as unreachable */ #define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */ -#define RTD_NONE 6 /* Invalid RTD */ +#define RTD_MAX 5 /* Flags for net->n.flags, used by kernel syncer */ #define KRF_INSTALLED 0x80 /* This route should be installed in the kernel */ @@ -405,9 +442,14 @@ typedef struct rta { protocol-specific metric is availabe */ +const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ static inline int rte_is_reachable(rte *r) -{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); } +{ return r->attrs->dest == RTD_UNICAST; } /* @@ -456,13 +498,22 @@ typedef struct eattr { #define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ #define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ #define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x40 /* The attribute has originated locally */ +#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ +#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ #define EAF_TEMP 0x80 /* A temporary attribute (the one stored in the tmp attr list) */ -struct adata { +typedef struct adata { uint length; /* Length of data */ byte data[0]; -}; +} adata; + +static inline struct adata * +lp_alloc_adata(struct linpool *pool, uint len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} static inline int adata_same(struct adata *a, struct adata *b) { return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } @@ -504,14 +555,62 @@ uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); -int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */ -static inline int mpnh_same(struct mpnh *x, struct mpnh *y) -{ return (x == y) || mpnh__same(x, y); } -struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp); -void mpnh_insert(struct mpnh **n, struct mpnh *y); -int mpnh_is_sorted(struct mpnh *x); +static inline eattr * +ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) +{ + ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); + eattr *e = &a->attrs[0]; + + a->flags = EALF_SORTED; + a->count = 1; + a->next = *to; + *to = a; + + e->id = id; + e->type = type; + e->flags = flags; + + if (type & EAF_EMBEDDED) + e->u.data = (u32) val; + else + e->u.ptr = (struct adata *) val; + + return e; +} + +static inline void +ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) +{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } + +static inline void +ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) +{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } + +static inline void +ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) +{ + struct adata *a = lp_alloc_adata(pool, len); + memcpy(a->data, data, len); + ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); +} + + +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) + +static inline size_t nexthop_size(const struct nexthop *nh) +{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } +int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ +static inline int nexthop_same(struct nexthop *x, struct nexthop *y) +{ return (x == y) || nexthop__same(x, y); } +struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); +static inline void nexthop_link(struct rta *a, struct nexthop *from) +{ memcpy(&a->nh, from, nexthop_size(from)); } +void nexthop_insert(struct nexthop **n, struct nexthop *y); +int nexthop_is_sorted(struct nexthop *x); void rta_init(void); +static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } +#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } @@ -522,7 +621,15 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); -void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll); + +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); + +static inline void +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) +{ + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); +} /* * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills @@ -556,93 +663,21 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; * Default protocol preferences */ -#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_DIRECT 240 /* Directly connected */ #define DEF_PREF_STATIC 200 /* Static route */ #define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ #define DEF_PREF_BABEL 130 /* Babel */ #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_PIPE 70 /* Routes piped from other tables */ +#define DEF_PREF_RPKI 100 /* RPKI */ #define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - /* * Route Origin Authorization */ -struct roa_item { - u32 asn; - byte maxlen; - byte src; - struct roa_item *next; -}; - -struct roa_node { - struct fib_node n; - struct roa_item *items; - // u32 cached_asn; -}; - -struct roa_table { - node n; /* Node in roa_table_list */ - struct fib fib; - char *name; /* Name of this ROA table */ - struct roa_table_config *cf; /* Configuration of this ROA table */ -}; - -struct roa_item_config { - ip_addr prefix; - byte pxlen, maxlen; - u32 asn; - struct roa_item_config *next; -}; - -struct roa_table_config { - node n; /* Node in config->rpa_tables */ - char *name; /* Name of this ROA table */ - struct roa_table *table; - - struct roa_item_config *roa_items; /* Preconfigured ROA items */ - - // char *filename; - // int gc_max_ops; /* Maximum number of operations before GC is run */ - // int gc_min_time; /* Minimum time between two consecutive GC runs */ -}; - -struct roa_show_data { - struct fib_iterator fit; - struct roa_table *table; - ip_addr prefix; - byte pxlen; - byte mode; /* ROA_SHOW_* values */ - u32 asn; /* Filter ASN, 0 -> all */ -}; - #define ROA_UNKNOWN 0 #define ROA_VALID 1 #define ROA_INVALID 2 -#define ROA_SRC_ANY 0 -#define ROA_SRC_CONFIG 1 -#define ROA_SRC_DYNAMIC 2 - -#define ROA_SHOW_ALL 0 -#define ROA_SHOW_PX 1 -#define ROA_SHOW_IN 2 -#define ROA_SHOW_FOR 3 - -extern struct roa_table *roa_table_default; - -void roa_add_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src); -void roa_delete_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src); -void roa_flush(struct roa_table *t, byte src); -byte roa_check(struct roa_table *t, ip_addr prefix, byte pxlen, u32 asn); -struct roa_table_config * roa_new_table_config(struct symbol *s); -void roa_add_item_config(struct roa_table_config *rtc, ip_addr prefix, byte pxlen, byte maxlen, u32 asn); -void roa_init(void); -void roa_preconfig(struct config *c); -void roa_commit(struct config *new, struct config *old); -void roa_show(struct roa_show_data *d); - - #endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index edf27d44..881687de 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -52,18 +52,27 @@ #include "nest/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" +#include "lib/idm.h" #include "lib/resource.h" #include "lib/string.h" +#include <stddef.h> + +const char * rta_dest_names[RTD_MAX] = { + [RTD_NONE] = "", + [RTD_UNICAST] = "unicast", + [RTD_BLACKHOLE] = "blackhole", + [RTD_UNREACHABLE] = "unreachable", + [RTD_PROHIBIT] = "prohibited", +}; + pool *rta_pool; -static slab *rta_slab; -static slab *mpnh_slab; +static slab *rta_slab_[4]; +static slab *nexthop_slab_[4]; static slab *rte_src_slab; -/* rte source ID bitmap */ -static u32 *src_ids; -static u32 src_id_size, src_id_used, src_id_pos; +static struct idm src_ids; #define SRC_ID_INIT_SIZE 4 /* rte source hash */ @@ -87,64 +96,11 @@ rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); - src_id_pos = 0; - src_id_size = SRC_ID_INIT_SIZE; - src_ids = mb_allocz(rta_pool, src_id_size * sizeof(u32)); - - /* ID 0 is reserved */ - src_ids[0] = 1; - src_id_used = 1; + idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } -static inline int u32_cto(uint x) { return ffs(~x) - 1; } - -static inline u32 -rte_src_alloc_id(void) -{ - uint i, j; - for (i = src_id_pos; i < src_id_size; i++) - if (src_ids[i] != 0xffffffff) - goto found; - - /* If we are at least 7/8 full, expand */ - if (src_id_used > (src_id_size * 28)) - { - src_id_size *= 2; - src_ids = mb_realloc(src_ids, src_id_size * sizeof(u32)); - bzero(src_ids + i, (src_id_size - i) * sizeof(u32)); - goto found; - } - - for (i = 0; i < src_id_pos; i++) - if (src_ids[i] != 0xffffffff) - goto found; - - ASSERT(0); - - found: - ASSERT(i < 0x8000000); - - src_id_pos = i; - j = u32_cto(src_ids[i]); - - src_ids[i] |= (1 << j); - src_id_used++; - return 32 * i + j; -} - -static inline void -rte_src_free_id(u32 id) -{ - int i = id / 32; - int j = id % 32; - - ASSERT((i < src_id_size) && (src_ids[i] & (1 << j))); - src_ids[i] &= ~(1 << j); - src_id_used--; -} - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) @@ -165,7 +121,7 @@ rt_get_source(struct proto *p, u32 id) src = sl_alloc(rte_src_slab); src->proto = p; src->private_id = id; - src->global_id = rte_src_alloc_id(); + src->global_id = idm_alloc(&src_ids); src->uc = 0; HASH_INSERT2(src_hash, RSH, rta_pool, src); @@ -181,7 +137,7 @@ rt_prune_sources(void) if (src->uc == 0) { HASH_DO_REMOVE(src_hash, RSH, sp); - rte_src_free_id(src->global_id); + idm_free(&src_ids, src->global_id); sl_free(rte_src_slab, src); } } @@ -195,28 +151,41 @@ rt_prune_sources(void) * Multipath Next Hop */ -static inline uint -mpnh_hash(struct mpnh *x) +static inline u32 +nexthop_hash(struct nexthop *x) { - uint h = 0; + u32 h = 0; for (; x; x = x->next) - h ^= ipa_hash(x->gw); + { + h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + + for (int i = 0; i < x->labels; i++) + h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + } return h; } int -mpnh__same(struct mpnh *x, struct mpnh *y) +nexthop__same(struct nexthop *x, struct nexthop *y) { for (; x && y; x = x->next, y = y->next) - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight)) + { + if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || + (x->flags != y->flags) || (x->weight != y->weight) || + (x->labels != y->labels)) return 0; + for (int i = 0; i < x->labels; i++) + if (x->label[i] != y->label[i]) + return 0; + } + return x == y; } static int -mpnh_compare_node(struct mpnh *x, struct mpnh *y) +nexthop_compare_node(struct nexthop *x, struct nexthop *y) { int r; @@ -226,6 +195,8 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y) if (!y) return -1; + /* Should we also compare flags ? */ + r = ((int) y->weight) - ((int) x->weight); if (r) return r; @@ -234,22 +205,33 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y) if (r) return r; + r = ((int) y->labels) - ((int) x->labels); + if (r) + return r; + + for (int i = 0; i < y->labels; i++) + { + r = ((int) y->label[i]) - ((int) x->label[i]); + if (r) + return r; + } + return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct mpnh * -mpnh_copy_node(const struct mpnh *src, linpool *lp) +static inline struct nexthop * +nexthop_copy_node(const struct nexthop *src, linpool *lp) { - struct mpnh *n = lp_alloc(lp, sizeof(struct mpnh)); - n->gw = src->gw; - n->iface = src->iface; + struct nexthop *n = lp_alloc(lp, nexthop_size(src)); + + memcpy(n, src, nexthop_size(src)); n->next = NULL; - n->weight = src->weight; + return n; } /** - * mpnh_merge - merge nexthop lists + * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 * @rx: reusability of list @x @@ -257,7 +239,7 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * @max: max number of nexthops * @lp: linpool for allocating nexthops * - * The mpnh_merge() function takes two nexthop lists @x and @y and merges them, + * The nexthop_merge() function takes two nexthop lists @x and @y and merges them, * eliminating possible duplicates. The input lists must be sorted and the * result is sorted too. The number of nexthops in result is limited by @max. * New nodes are allocated from linpool @lp. @@ -270,28 +252,28 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct mpnh * -mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) +struct nexthop * +nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) { - struct mpnh *root = NULL; - struct mpnh **n = &root; + struct nexthop *root = NULL; + struct nexthop **n = &root; while ((x || y) && max--) { - int cmp = mpnh_compare_node(x, y); + int cmp = nexthop_compare_node(x, y); if (cmp < 0) { - *n = rx ? x : mpnh_copy_node(x, lp); + *n = rx ? x : nexthop_copy_node(x, lp); x = x->next; } else if (cmp > 0) { - *n = ry ? y : mpnh_copy_node(y, lp); + *n = ry ? y : nexthop_copy_node(y, lp); y = y->next; } else { - *n = rx ? x : (ry ? y : mpnh_copy_node(x, lp)); + *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); x = x->next; y = y->next; } @@ -303,11 +285,11 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) } void -mpnh_insert(struct mpnh **n, struct mpnh *x) +nexthop_insert(struct nexthop **n, struct nexthop *x) { for (; *n; n = &((*n)->next)) { - int cmp = mpnh_compare_node(*n, x); + int cmp = nexthop_compare_node(*n, x); if (cmp < 0) continue; @@ -322,28 +304,37 @@ mpnh_insert(struct mpnh **n, struct mpnh *x) } int -mpnh_is_sorted(struct mpnh *x) +nexthop_is_sorted(struct nexthop *x) { for (; x && x->next; x = x->next) - if (mpnh_compare_node(x, x->next) >= 0) + if (nexthop_compare_node(x, x->next) >= 0) return 0; return 1; } -static struct mpnh * -mpnh_copy(struct mpnh *o) +static inline slab * +nexthop_slab(struct nexthop *nh) +{ + return nexthop_slab_[MIN(nh->labels, 3)]; +} + +static struct nexthop * +nexthop_copy(struct nexthop *o) { - struct mpnh *first = NULL; - struct mpnh **last = &first; + struct nexthop *first = NULL; + struct nexthop **last = &first; for (; o; o = o->next) { - struct mpnh *n = sl_alloc(mpnh_slab); + struct nexthop *n = sl_alloc(nexthop_slab(o)); n->gw = o->gw; n->iface = o->iface; n->next = NULL; n->weight = o->weight; + n->labels = o->labels; + for (int i=0; i<o->labels; i++) + n->label[i] = o->label[i]; *last = n; last = &(n->next); @@ -353,14 +344,14 @@ mpnh_copy(struct mpnh *o) } static void -mpnh_free(struct mpnh *o) +nexthop_free(struct nexthop *o) { - struct mpnh *n; + struct nexthop *n; while (o) { n = o->next; - sl_free(mpnh_slab, o); + sl_free(nexthop_slab(o), o); o = n; } } @@ -580,7 +571,7 @@ ea_do_prune(ea_list *e) if ((s0->type & EAF_TYPE_MASK) != EAF_TYPE_UNDEF) { *d = *s0; - d->type = (d->type & ~EAF_ORIGINATED) | (s[-1].type & EAF_ORIGINATED); + d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); d++; i++; } @@ -972,7 +963,8 @@ ea_dump(ea_list *e) inline uint ea_hash(ea_list *e) { - u32 h = 0; + const u64 mul = 0x68576150f3d6847; + u64 h = 0xafcef24eda8b29; int i; if (e) /* Assuming chain of length 1 */ @@ -980,29 +972,18 @@ ea_hash(ea_list *e) for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; - h ^= a->id; + h ^= a->id; h *= mul; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else { struct adata *d = a->u.ptr; - int size = d->length; - byte *z = d->data; - while (size >= 4) - { - h ^= *(u32 *)z; - z += 4; - size -= 4; - } - while (size--) - h = (h >> 24) ^ (h << 8) ^ *z++; + h ^= mem_hash(d->data, d->length); } + h *= mul; } - h ^= h >> 16; - h ^= h >> 6; - h &= 0xffff; } - return h; + return (h >> 32) ^ (h & 0xffffffff); } /** @@ -1051,8 +1032,19 @@ rta_alloc_hash(void) static inline uint rta_hash(rta *a) { - return (((uint) (uintptr_t) a->src) ^ ipa_hash(a->gw) ^ - mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff; + u64 h; + mem_hash_init(&h); +#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); + MIX(src); + MIX(hostentry); + MIX(from); + MIX(igp_metric); + MIX(source); + MIX(scope); + MIX(dest); +#undef MIX + + return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); } static inline int @@ -1061,26 +1053,28 @@ rta_same(rta *x, rta *y) return (x->src == y->src && x->source == y->source && x->scope == y->scope && - x->cast == y->cast && x->dest == y->dest && - x->flags == y->flags && x->igp_metric == y->igp_metric && - ipa_equal(x->gw, y->gw) && ipa_equal(x->from, y->from) && - x->iface == y->iface && x->hostentry == y->hostentry && - mpnh_same(x->nexthops, y->nexthops) && + nexthop_same(&(x->nh), &(y->nh)) && ea_same(x->eattrs, y->eattrs)); } +static inline slab * +rta_slab(rta *a) +{ + return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; +} + static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab); + rta *r = sl_alloc(rta_slab(o)); - memcpy(r, o, sizeof(rta)); + memcpy(r, o, rta_size(o)); r->uc = 1; - r->nexthops = mpnh_copy(o->nexthops); + r->nh.next = nexthop_copy(o->nh.next); r->eattrs = ea_list_copy(o->eattrs); return r; } @@ -1173,19 +1167,26 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - a->aflags = 0; /* Poison the entry */ rt_unlock_hostentry(a->hostentry); rt_unlock_source(a->src); - mpnh_free(a->nexthops); + if (a->nh.next) + nexthop_free(a->nh.next); ea_free(a->eattrs); - sl_free(rta_slab, a); + a->aflags = 0; /* Poison the entry */ + sl_free(rta_slab(a), a); } rta * rta_do_cow(rta *o, linpool *lp) { - rta *r = lp_alloc(lp, sizeof(rta)); - memcpy(r, o, sizeof(rta)); + rta *r = lp_alloc(lp, rta_size(o)); + memcpy(r, o, rta_size(o)); + for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) + { + *nhn = lp_alloc(lp, nexthop_size(nho)); + memcpy(*nhn, nho, nexthop_size(nho)); + nhn = &((*nhn)->next); + } r->aflags = 0; r->uc = 0; return r; @@ -1203,20 +1204,24 @@ rta_dump(rta *a) static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtc[] = { "", " BC", " MC", " AC" }; + "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - debug("p=%s uc=%d %s %s%s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtc[a->cast], + debug("p=%s uc=%d %s %s%s h=%04x", + a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtd[a->dest], a->hash_key); if (!(a->aflags & RTAF_CACHED)) debug(" !CACHED"); debug(" <-%I", a->from); - if (a->dest == RTD_ROUTER) - debug(" ->%I", a->gw); - if (a->dest == RTD_DEVICE || a->dest == RTD_ROUTER) - debug(" [%s]", a->iface ? a->iface->name : "???" ); + if (a->dest == RTD_UNICAST) + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } if (a->eattrs) { debug(" EA: "); @@ -1252,10 +1257,9 @@ rta_show(struct cli *c, rta *a, ea_list *eal) { static char *src_names[] = { "dummy", "static", "inherit", "device", "static-device", "redirect", "RIP", "OSPF", "OSPF-IA", "OSPF-E1", "OSPF-E2", "BGP", "pipe" }; - static char *cast_names[] = { "unicast", "broadcast", "multicast", "anycast" }; int i; - cli_printf(c, -1008, "\tType: %s %s %s", src_names[a->source], cast_names[a->cast], ip_scope_text(a->scope)); + cli_printf(c, -1008, "\tType: %s %s", src_names[a->source], ip_scope_text(a->scope)); if (!eal) eal = a->eattrs; for(; eal; eal=eal->next) @@ -1273,8 +1277,17 @@ void rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab = sl_new(rta_pool, sizeof(rta)); - mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh)); + + rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); + rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); + rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); + rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + + nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); + nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); + nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); + nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_alloc_hash(); rte_src_init(); } diff --git a/nest/rt-dev.c b/nest/rt-dev.c index ed6c06af..718c4578 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -24,14 +24,17 @@ #include "lib/resource.h" #include "lib/string.h" + static void -dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) +dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) { - struct rt_dev_config *P = (void *) p->cf; + struct rt_dev_proto *p = (void *) P; + struct rt_dev_config *cf = (void *) P->cf; + struct channel *c; - if (!EMPTY_LIST(P->iface_list) && - !iface_patt_find(&P->iface_list, ad->iface, ad->iface->addr)) - /* Empty list is automagically treated as "*" */ + if (!EMPTY_LIST(cf->iface_list) && + !iface_patt_find(&cf->iface_list, ad->iface, ad)) + /* Empty list is automatically treated as "*" */ return; if (ad->flags & IA_SECONDARY) @@ -40,51 +43,49 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) if (ad->scope <= SCOPE_LINK) return; - if (c & IF_CHANGE_DOWN) - { - net *n; + if (ad->prefix.type == NET_IP4) + c = p->ip4_channel; + else if (ad->prefix.type == NET_IP6) + c = p->ip6_channel; + else + return; + + if (!c) + return; + if (flags & IF_CHANGE_DOWN) + { DBG("dev_if_notify: %s:%I going down\n", ad->iface->name, ad->ip); - n = net_find(p->table, ad->prefix, ad->pxlen); - if (!n) - { - DBG("dev_if_notify: device shutdown: prefix not found\n"); - return; - } /* Use iface ID as local source ID */ - struct rte_src *src = rt_get_source(p, ad->iface->index); - rte_update2(p->main_ahook, n, NULL, src); + struct rte_src *src = rt_get_source(P, ad->iface->index); + rte_update2(c, &ad->prefix, NULL, src); } - else if (c & IF_CHANGE_UP) + else if (flags & IF_CHANGE_UP) { rta *a; - net *n; rte *e; DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); - if (P->check_link && !(ad->iface->flags & IF_LINK_UP)) + if (cf->check_link && !(ad->iface->flags & IF_LINK_UP)) return; /* Use iface ID as local source ID */ - struct rte_src *src = rt_get_source(p, ad->iface->index); + struct rte_src *src = rt_get_source(P, ad->iface->index); rta a0 = { .src = src, .source = RTS_DEVICE, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = RTD_DEVICE, - .iface = ad->iface + .dest = RTD_UNICAST, + .nh.iface = ad->iface, }; a = rta_lookup(&a0); - n = net_get(p->table, ad->prefix, ad->pxlen); e = rte_get_temp(a); - e->net = n; e->pflags = 0; - rte_update2(p->main_ahook, n, e, src); + rte_update2(c, &ad->prefix, e, src); } } @@ -108,30 +109,44 @@ dev_if_notify(struct proto *p, uint c, struct iface *iface) static struct proto * -dev_init(struct proto_config *c) +dev_init(struct proto_config *CF) { - struct proto *p = proto_new(c, sizeof(struct proto)); + struct proto *P = proto_new(CF); + struct rt_dev_proto *p = (void *) P; + // struct rt_dev_config *cf = (void *) CF; + + proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)); + proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)); + + P->if_notify = dev_if_notify; + P->ifa_notify = dev_ifa_notify; - p->if_notify = dev_if_notify; - p->ifa_notify = dev_ifa_notify; - return p; + return P; } static int -dev_reconfigure(struct proto *p, struct proto_config *new) +dev_reconfigure(struct proto *P, struct proto_config *CF) { - struct rt_dev_config *o = (struct rt_dev_config *) p->cf; - struct rt_dev_config *n = (struct rt_dev_config *) new; + struct rt_dev_proto *p = (void *) P; + struct rt_dev_config *o = (void *) P->cf; + struct rt_dev_config *n = (void *) CF; + + if (!iface_patts_equal(&o->iface_list, &n->iface_list, NULL) || + (o->check_link != n->check_link)) + return 0; + + return + proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)) && + proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)); - return iface_patts_equal(&o->iface_list, &n->iface_list, NULL) && - (o->check_link == n->check_link); + return 1; } static void dev_copy_config(struct proto_config *dest, struct proto_config *src) { - struct rt_dev_config *d = (struct rt_dev_config *) dest; - struct rt_dev_config *s = (struct rt_dev_config *) src; + struct rt_dev_config *d = (void *) dest; + struct rt_dev_config *s = (void *) src; /* * We copy iface_list as ifaces can be shared by more direct protocols. @@ -144,11 +159,13 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) } struct protocol proto_device = { - .name = "Direct", - .template = "direct%d", - .preference = DEF_PREF_DIRECT, + .name = "Direct", + .template = "direct%d", + .preference = DEF_PREF_DIRECT, + .channel_mask = NB_IP, + .proto_size = sizeof(struct rt_dev_proto), .config_size = sizeof(struct rt_dev_config), - .init = dev_init, - .reconfigure = dev_reconfigure, - .copy_config = dev_copy_config + .init = dev_init, + .reconfigure = dev_reconfigure, + .copy_config = dev_copy_config }; diff --git a/nest/rt-dev.h b/nest/rt-dev.h index 191b9a02..20b88a64 100644 --- a/nest/rt-dev.h +++ b/nest/rt-dev.h @@ -15,4 +15,10 @@ struct rt_dev_config { int check_link; }; +struct rt_dev_proto { + struct proto p; + struct channel *ip4_channel; + struct channel *ip6_channel; +}; + #endif diff --git a/nest/rt-fib.c b/nest/rt-fib.c index 9af333c9..18ccbfc3 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -61,16 +61,17 @@ #define HASH_DEF_ORDER 10 #define HASH_HI_MARK *4 #define HASH_HI_STEP 2 -#define HASH_HI_MAX 16 /* Must be at most 16 */ +#define HASH_HI_MAX 16 #define HASH_LO_MARK /5 #define HASH_LO_STEP 2 #define HASH_LO_MIN 10 + static void fib_ht_alloc(struct fib *f) { f->hash_size = 1 << f->hash_order; - f->hash_shift = 16 - f->hash_order; + f->hash_shift = 32 - f->hash_order; if (f->hash_order > HASH_HI_MAX - HASH_HI_STEP) f->entries_max = ~0; else @@ -90,16 +91,8 @@ fib_ht_free(struct fib_node **h) mb_free(h); } -static inline unsigned -fib_hash(struct fib *f, ip_addr *a) -{ - return ipa_hash(*a) >> f->hash_shift; -} -static void -fib_dummy_init(struct fib_node *dummy UNUSED) -{ -} +static inline u32 fib_hash(struct fib *f, const net_addr *a); /** * fib_init - initialize a new FIB @@ -114,18 +107,23 @@ fib_dummy_init(struct fib_node *dummy UNUSED) * This function initializes a newly allocated FIB and prepares it for use. */ void -fib_init(struct fib *f, pool *p, unsigned node_size, unsigned hash_order, fib_init_func init) +fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init) { + uint addr_length = net_addr_length[addr_type]; + if (!hash_order) hash_order = HASH_DEF_ORDER; f->fib_pool = p; - f->fib_slab = sl_new(p, node_size); + f->fib_slab = addr_length ? sl_new(p, node_size + addr_length) : NULL; + f->addr_type = addr_type; + f->node_size = node_size; + f->node_offset = node_offset; f->hash_order = hash_order; fib_ht_alloc(f); bzero(f->hash_table, f->hash_size * sizeof(struct fib_node *)); f->entries = 0; f->entries_min = 0; - f->init = init ? : fib_dummy_init; + f->init = init; } static void @@ -151,7 +149,7 @@ fib_rehash(struct fib *f, int step) while (e = x) { x = e->next; - nh = fib_hash(f, &e->prefix); + nh = fib_hash(f, e->addr); while (nh > ni) { *t = NULL; @@ -171,127 +169,201 @@ fib_rehash(struct fib *f, int step) fib_ht_free(m); } +#define CAST(t) (const net_addr_##t *) +#define CAST2(t) (net_addr_##t *) + +#define FIB_HASH(f,a,t) (net_hash_##t(CAST(t) a) >> f->hash_shift) + +#define FIB_FIND(f,a,t) \ + ({ \ + struct fib_node *e = f->hash_table[FIB_HASH(f, a, t)]; \ + while (e && !net_equal_##t(CAST(t) e->addr, CAST(t) a)) \ + e = e->next; \ + fib_node_to_user(f, e); \ + }) + +#define FIB_INSERT(f,a,e,t) \ + ({ \ + u32 h = net_hash_##t(CAST(t) a); \ + struct fib_node **ee = f->hash_table + (h >> f->hash_shift); \ + struct fib_node *g; \ + \ + while ((g = *ee) && (net_hash_##t(CAST(t) g->addr) < h)) \ + ee = &g->next; \ + \ + net_copy_##t(CAST2(t) e->addr, CAST(t) a); \ + e->next = *ee; \ + *ee = e; \ + }) + + +static inline u32 +fib_hash(struct fib *f, const net_addr *a) +{ + /* Same as FIB_HASH() */ + return net_hash(a) >> f->hash_shift; +} + +void * +fib_get_chain(struct fib *f, const net_addr *a) +{ + ASSERT(f->addr_type == a->type); + + struct fib_node *e = f->hash_table[fib_hash(f, a)]; + return e; +} + /** * fib_find - search for FIB node by prefix * @f: FIB to search in - * @a: pointer to IP address of the prefix - * @len: prefix length + * @n: network address * * Search for a FIB node corresponding to the given prefix, return * a pointer to it or %NULL if no such node exists. */ void * -fib_find(struct fib *f, ip_addr *a, int len) +fib_find(struct fib *f, const net_addr *a) { - struct fib_node *e = f->hash_table[fib_hash(f, a)]; - - while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) - e = e->next; - return e; + ASSERT(f->addr_type == a->type); + + switch (f->addr_type) + { + case NET_IP4: return FIB_FIND(f, a, ip4); + case NET_IP6: return FIB_FIND(f, a, ip6); + case NET_VPN4: return FIB_FIND(f, a, vpn4); + case NET_VPN6: return FIB_FIND(f, a, vpn6); + case NET_ROA4: return FIB_FIND(f, a, roa4); + case NET_ROA6: return FIB_FIND(f, a, roa6); + case NET_FLOW4: return FIB_FIND(f, a, flow4); + case NET_FLOW6: return FIB_FIND(f, a, flow6); + case NET_IP6_SADR: return FIB_FIND(f, a, ip6_sadr); + case NET_MPLS: return FIB_FIND(f, a, mpls); + default: bug("invalid type"); + } } -/* -int -fib_histogram(struct fib *f) +static void +fib_insert(struct fib *f, const net_addr *a, struct fib_node *e) { - log(L_WARN "Histogram dump start %d %d", f->hash_size, f->entries); - - int i, j; - struct fib_node *e; - - for (i = 0; i < f->hash_size; i++) - { - j = 0; - for (e = f->hash_table[i]; e != NULL; e = e->next) - j++; - if (j > 0) - log(L_WARN "Histogram line %d: %d", i, j); - } - - log(L_WARN "Histogram dump end"); + ASSERT(f->addr_type == a->type); + + switch (f->addr_type) + { + case NET_IP4: FIB_INSERT(f, a, e, ip4); return; + case NET_IP6: FIB_INSERT(f, a, e, ip6); return; + case NET_VPN4: FIB_INSERT(f, a, e, vpn4); return; + case NET_VPN6: FIB_INSERT(f, a, e, vpn6); return; + case NET_ROA4: FIB_INSERT(f, a, e, roa4); return; + case NET_ROA6: FIB_INSERT(f, a, e, roa6); return; + case NET_FLOW4: FIB_INSERT(f, a, e, flow4); return; + case NET_FLOW6: FIB_INSERT(f, a, e, flow6); return; + case NET_IP6_SADR: FIB_INSERT(f, a, e, ip6_sadr); return; + case NET_MPLS: FIB_INSERT(f, a, e, mpls); return; + default: bug("invalid type"); + } } -*/ + /** * fib_get - find or create a FIB node * @f: FIB to work with - * @a: pointer to IP address of the prefix - * @len: prefix length + * @n: network address * * Search for a FIB node corresponding to the given prefix and * return a pointer to it. If no such node exists, create it. */ void * -fib_get(struct fib *f, ip_addr *a, int len) +fib_get(struct fib *f, const net_addr *a) { - uint h = ipa_hash(*a); - struct fib_node **ee = f->hash_table + (h >> f->hash_shift); - struct fib_node *g, *e = *ee; - u32 uid = h << 16; - - while (e && (e->pxlen != len || !ipa_equal(*a, e->prefix))) - e = e->next; - if (e) - return e; -#ifdef DEBUGGING - if (len < 0 || len > BITS_PER_IP_ADDRESS || !ip_is_prefix(*a,len)) - bug("fib_get() called for invalid address"); -#endif + void *b = fib_find(f, a); + if (b) + return b; - while ((g = *ee) && g->uid < uid) - ee = &g->next; - while ((g = *ee) && g->uid == uid) - { - ee = &g->next; - uid++; - } + if (f->fib_slab) + b = sl_alloc(f->fib_slab); + else + b = mb_alloc(f->fib_pool, f->node_size + a->length); - if ((uid >> 16) != h) - log(L_ERR "FIB hash table chains are too long"); + struct fib_node *e = fib_user_to_node(f, b); + e->readers = NULL; + e->flags = 0; + fib_insert(f, a, e); - // log (L_WARN "FIB_GET %I %x %x", *a, h, uid); + memset(b, 0, f->node_offset); + if (f->init) + f->init(b); - e = sl_alloc(f->fib_slab); - e->prefix = *a; - e->pxlen = len; - e->next = *ee; - e->uid = uid; - *ee = e; - e->readers = NULL; - f->init(e); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); - return e; + return b; +} + +static inline void * +fib_route_ip4(struct fib *f, net_addr_ip4 *n) +{ + void *r; + + while (!(r = fib_find(f, (net_addr *) n)) && (n->pxlen > 0)) + { + n->pxlen--; + ip4_clrbit(&n->prefix, n->pxlen); + } + + return r; +} + +static inline void * +fib_route_ip6(struct fib *f, net_addr_ip6 *n) +{ + void *r; + + while (!(r = fib_find(f, (net_addr *) n)) && (n->pxlen > 0)) + { + n->pxlen--; + ip6_clrbit(&n->prefix, n->pxlen); + } + + return r; } /** * fib_route - CIDR routing lookup * @f: FIB to search in - * @a: pointer to IP address of the prefix - * @len: prefix length + * @n: network address * * Search for a FIB node with longest prefix matching the given * network, that is a node which a CIDR router would use for routing * that network. */ void * -fib_route(struct fib *f, ip_addr a, int len) +fib_route(struct fib *f, const net_addr *n) { - ip_addr a0; - void *t; - - while (len >= 0) - { - a0 = ipa_and(a, ipa_mkmask(len)); - t = fib_find(f, &a0, len); - if (t) - return t; - len--; - } - return NULL; + ASSERT(f->addr_type == n->type); + + net_addr *n0 = alloca(n->length); + net_copy(n0, n); + + switch (n->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + case NET_FLOW4: + return fib_route_ip4(f, (net_addr_ip4 *) n0); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + case NET_FLOW6: + return fib_route_ip6(f, (net_addr_ip6 *) n0); + + default: + return NULL; + } } + static inline void fib_merge_readers(struct fib_iterator *i, struct fib_node *to) { @@ -338,8 +410,8 @@ fib_merge_readers(struct fib_iterator *i, struct fib_node *to) void fib_delete(struct fib *f, void *E) { - struct fib_node *e = E; - uint h = fib_hash(f, &e->prefix); + struct fib_node *e = fib_user_to_node(f, E); + uint h = fib_hash(f, e->addr); struct fib_node **ee = f->hash_table + h; struct fib_iterator *it; @@ -361,7 +433,12 @@ fib_delete(struct fib *f, void *E) } fib_merge_readers(it, l); } - sl_free(f->fib_slab, e); + + if (f->fib_slab) + sl_free(f->fib_slab, E); + else + mb_free(E); + if (f->entries-- < f->entries_min) fib_rehash(f, -HASH_LO_STEP); return; @@ -431,7 +508,7 @@ fit_get(struct fib *f, struct fib_iterator *i) if (k = i->next) k->prev = j; j->next = k; - i->hash = fib_hash(f, &n->prefix); + i->hash = fib_hash(f, n->addr); return n; } @@ -479,21 +556,17 @@ found: void fib_check(struct fib *f) { - uint i, ec, lo, nulls; + uint i, ec, nulls; ec = 0; for(i=0; i<f->hash_size; i++) { struct fib_node *n; - lo = 0; for(n=f->hash_table[i]; n; n=n->next) { struct fib_iterator *j, *j0; - uint h0 = ipa_hash(n->prefix); - if (h0 < lo) - bug("fib_check: discord in hash chains"); - lo = h0; - if ((h0 >> f->hash_shift) != i) + uint h0 = fib_hash(f, n->addr); + if (h0 != i) bug("fib_check: mishashed %x->%x (order %d)", h0, i, f->hash_order); j0 = (struct fib_iterator *) n; nulls = 0; @@ -514,8 +587,31 @@ fib_check(struct fib *f) } if (ec != f->entries) bug("fib_check: invalid entry count (%d != %d)", ec, f->entries); + return; } +/* +int +fib_histogram(struct fib *f) +{ + log(L_WARN "Histogram dump start %d %d", f->hash_size, f->entries); + + int i, j; + struct fib_node *e; + + for (i = 0; i < f->hash_size; i++) + { + j = 0; + for (e = f->hash_table[i]; e != NULL; e = e->next) + j++; + if (j > 0) + log(L_WARN "Histogram line %d: %d", i, j); + } + + log(L_WARN "Histogram dump end"); +} +*/ + #endif #ifdef TEST @@ -535,7 +631,7 @@ void dump(char *m) struct fib_iterator *j; for(n=f.hash_table[i]; n; n=n->next) { - debug("%04x %04x %p %I/%2d", i, ipa_hash(n->prefix), n, n->prefix, n->pxlen); + debug("%04x %08x %p %N", i, ipa_hash(n->prefix), n, n->addr); for(j=n->readers; j; j=j->next) debug(" %p[%p]", j, j->node); debug("\n"); diff --git a/nest/rt-roa.c b/nest/rt-roa.c deleted file mode 100644 index bf457e30..00000000 --- a/nest/rt-roa.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * BIRD -- Route Origin Authorization - * - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#undef LOCAL_DEBUG - -#include "nest/bird.h" -#include "nest/route.h" -#include "nest/cli.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/string.h" -#include "conf/conf.h" - - -pool *roa_pool; -static slab *roa_slab; /* Slab of struct roa_item */ -static list roa_table_list; /* List of struct roa_table */ -struct roa_table *roa_table_default; /* The first ROA table in the config */ - -static inline int -src_match(struct roa_item *it, byte src) -{ return !src || it->src == src; } - -/** - * roa_add_item - add a ROA entry - * @t: ROA table - * @prefix: prefix of the ROA entry - * @pxlen: prefix length of the ROA entry - * @maxlen: max length field of the ROA entry - * @asn: AS number field of the ROA entry - * @src: source of the ROA entry (ROA_SRC_*) - * - * The function adds a new ROA entry to the ROA table. If the same ROA - * is already in the table, nothing is added. @src field is used to - * distinguish different sources of ROAs. - */ -void -roa_add_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src) -{ - struct roa_node *n = fib_get(&t->fib, &prefix, pxlen); - - // if ((n->items == NULL) && (n->n.x0 != ROA_INVALID)) - // t->cached_items--; - - struct roa_item *it; - for (it = n->items; it; it = it->next) - if ((it->maxlen == maxlen) && (it->asn == asn) && src_match(it, src)) - return; - - it = sl_alloc(roa_slab); - it->asn = asn; - it->maxlen = maxlen; - it->src = src; - it->next = n->items; - n->items = it; -} - -/** - * roa_delete_item - delete a ROA entry - * @t: ROA table - * @prefix: prefix of the ROA entry - * @pxlen: prefix length of the ROA entry - * @maxlen: max length field of the ROA entry - * @asn: AS number field of the ROA entry - * @src: source of the ROA entry (ROA_SRC_*) - * - * The function removes a specified ROA entry from the ROA table and - * frees it. If @src field is not ROA_SRC_ANY, only entries from - * that source are considered. - */ -void -roa_delete_item(struct roa_table *t, ip_addr prefix, byte pxlen, byte maxlen, u32 asn, byte src) -{ - struct roa_node *n = fib_find(&t->fib, &prefix, pxlen); - - if (!n) - return; - - struct roa_item *it, **itp; - for (itp = &n->items; it = *itp; itp = &it->next) - if ((it->maxlen == maxlen) && (it->asn == asn) && src_match(it, src)) - break; - - if (!it) - return; - - *itp = it->next; - sl_free(roa_slab, it); - - // if ((n->items == NULL) && (n->n.x0 != ROA_INVALID)) - // t->cached_items++; -} - - -/** - * roa_flush - flush a ROA table - * @t: ROA table - * @src: source of ROA entries (ROA_SRC_*) - * - * The function removes and frees ROA entries from the ROA table. If - * @src is ROA_SRC_ANY, all entries in the table are removed, - * otherwise only all entries from that source are removed. - */ -void -roa_flush(struct roa_table *t, byte src) -{ - struct roa_item *it, **itp; - struct roa_node *n; - - FIB_WALK(&t->fib, fn) - { - n = (struct roa_node *) fn; - - itp = &n->items; - while (it = *itp) - if (src_match(it, src)) - { - *itp = it->next; - sl_free(roa_slab, it); - } - else - itp = &it->next; - } - FIB_WALK_END; - - // TODO add cleanup of roa_nodes -} - - - -/* -byte -roa_check(struct roa_table *t, ip_addr prefix, byte pxlen, u32 asn) -{ - struct roa_node *n = fib_find(&t->fib, &prefix, pxlen); - - if (n && n->n.x0 == ROA_UNKNOWN) - return ROA_UNKNOWN; - - if (n && n->n.x0 == ROA_VALID && asn == n->cached_asn) - return ROA_VALID; - - byte rv = roa_match(t, n, prefix, pxlen, asn); - - if (rv != ROA_INVALID) - { - if (!n) - { - if (t->cached_items >= t->cached_items_max) - n = fib_get(&t->fib, &prefix, pxlen); - t->cached_items++; - } - - n->cached_asn = asn; - n->n.x0 = rv; - } - - return rv; -} -*/ - -/** - * roa_check - check validity of route origination in a ROA table - * @t: ROA table - * @prefix: network prefix to check - * @pxlen: length of network prefix - * @asn: AS number of network prefix - * - * Implements RFC 6483 route validation for the given network - * prefix. The procedure is to find all candidate ROAs - ROAs whose - * prefixes cover the give network prefix. If there is no candidate - * ROA, return ROA_UNKNOWN. If there is a candidate ROA with matching - * ASN and maxlen field greater than or equal to the given prefix - * length, return ROA_VALID. Otherwise return ROA_INVALID. If caller - * cannot determine origin AS, 0 could be used (in that case ROA_VALID - * cannot happen). - */ -byte -roa_check(struct roa_table *t, ip_addr prefix, byte pxlen, u32 asn) -{ - struct roa_node *n; - ip_addr px; - byte anything = 0; - - int len; - for (len = pxlen; len >= 0; len--) - { - px = ipa_and(prefix, ipa_mkmask(len)); - n = fib_find(&t->fib, &px, len); - - if (!n) - continue; - - struct roa_item *it; - for (it = n->items; it; it = it->next) - { - anything = 1; - if ((it->maxlen >= pxlen) && (it->asn == asn) && asn) - return ROA_VALID; - } - } - - return anything ? ROA_INVALID : ROA_UNKNOWN; -} - -static void -roa_node_init(struct fib_node *fn) -{ - struct roa_node *n = (struct roa_node *) fn; - n->items = NULL; -} - -static inline void -roa_populate(struct roa_table *t) -{ - struct roa_item_config *ric; - for (ric = t->cf->roa_items; ric; ric = ric->next) - roa_add_item(t, ric->prefix, ric->pxlen, ric->maxlen, ric->asn, ROA_SRC_CONFIG); -} - -static void -roa_new_table(struct roa_table_config *cf) -{ - struct roa_table *t; - - t = mb_allocz(roa_pool, sizeof(struct roa_table)); - fib_init(&t->fib, roa_pool, sizeof(struct roa_node), 0, roa_node_init); - t->name = cf->name; - t->cf = cf; - - cf->table = t; - add_tail(&roa_table_list, &t->n); - - roa_populate(t); -} - -struct roa_table_config * -roa_new_table_config(struct symbol *s) -{ - struct roa_table_config *rtc = cfg_allocz(sizeof(struct roa_table_config)); - - cf_define_symbol(s, SYM_ROA, rtc); - rtc->name = s->name; - add_tail(&new_config->roa_tables, &rtc->n); - return rtc; -} - -/** - * roa_add_item_config - add a static ROA entry to a ROA table configuration - * - * Arguments are self-explanatory. The first is the ROA table config, rest - * are specifying the ROA entry. - */ -void -roa_add_item_config(struct roa_table_config *rtc, ip_addr prefix, byte pxlen, byte maxlen, u32 asn) -{ - struct roa_item_config *ric = cfg_allocz(sizeof(struct roa_item_config)); - - ric->prefix = prefix; - ric->pxlen = pxlen; - ric->maxlen = maxlen; - ric->asn = asn; - ric->next = rtc->roa_items; - rtc->roa_items = ric; -} - -/** - * roa_init - initialize ROA tables - * - * This function is called during BIRD startup. It initializes - * the ROA table module. - */ -void -roa_init(void) -{ - roa_pool = rp_new(&root_pool, "ROA tables"); - roa_slab = sl_new(roa_pool, sizeof(struct roa_item)); - init_list(&roa_table_list); -} - -void -roa_preconfig(struct config *c) -{ - init_list(&c->roa_tables); -} - - -/** - * roa_commit - commit new ROA table configuration - * @new: new configuration - * @old: original configuration or %NULL if it's boot time config - * - * Scan differences between @old and @new configuration and modify the - * ROA tables according to these changes. If @new defines a previously - * unknown table, create it, if it omits a table existing in @old, - * delete it (there are no references, only indirect through struct - * roa_table_config). If it exists in both configurations, update the - * configured ROA entries. - */ -void -roa_commit(struct config *new, struct config *old) -{ - struct roa_table_config *cf; - struct roa_table *t, *tx; - - if (old) - WALK_LIST_DELSAFE(t, tx, roa_table_list) - { - struct symbol *sym = cf_find_symbol(new, t->name); - if (sym && sym->class == SYM_ROA) - { - /* Found old table in new config */ - cf = sym->def; - cf->table = t; - t->name = cf->name; - t->cf = cf; - - /* Reconfigure it */ - roa_flush(t, ROA_SRC_CONFIG); - roa_populate(t); - } - else - { - t->cf->table = NULL; - - /* Free it now */ - roa_flush(t, ROA_SRC_ANY); - rem_node(&t->n); - fib_free(&t->fib); - mb_free(t); - } - } - - /* Add new tables */ - WALK_LIST(cf, new->roa_tables) - if (! cf->table) - roa_new_table(cf); - - roa_table_default = EMPTY_LIST(new->roa_tables) ? NULL : - ((struct roa_table_config *) HEAD(new->roa_tables))->table; -} - - - -static void -roa_show_node(struct cli *c, struct roa_node *rn, int len, u32 asn) -{ - struct roa_item *ri; - - for (ri = rn->items; ri; ri = ri->next) - if ((ri->maxlen >= len) && (!asn || (ri->asn == asn))) - cli_printf(c, -1019, "%I/%d max %d as %u", rn->n.prefix, rn->n.pxlen, ri->maxlen, ri->asn); -} - -static void -roa_show_cont(struct cli *c) -{ - struct roa_show_data *d = c->rover; - struct fib *fib = &d->table->fib; - struct fib_iterator *it = &d->fit; - struct roa_node *rn; - unsigned max = 32; - - FIB_ITERATE_START(fib, it, f) - { - rn = (struct roa_node *) f; - - if (!max--) - { - FIB_ITERATE_PUT(it, f); - return; - } - - if ((d->mode == ROA_SHOW_ALL) || - net_in_net(rn->n.prefix, rn->n.pxlen, d->prefix, d->pxlen)) - roa_show_node(c, rn, 0, d->asn); - } - FIB_ITERATE_END(f); - - cli_printf(c, 0, ""); - c->cont = c->cleanup = NULL; -} - -static void -roa_show_cleanup(struct cli *c) -{ - struct roa_show_data *d = c->rover; - - /* Unlink the iterator */ - fit_get(&d->table->fib, &d->fit); -} - -void -roa_show(struct roa_show_data *d) -{ - struct roa_node *rn; - ip_addr px; - int len; - - switch (d->mode) - { - case ROA_SHOW_ALL: - case ROA_SHOW_IN: - FIB_ITERATE_INIT(&d->fit, &d->table->fib); - this_cli->cont = roa_show_cont; - this_cli->cleanup = roa_show_cleanup; - this_cli->rover = d; - break; - - case ROA_SHOW_PX: - rn = fib_find(&d->table->fib, &d->prefix, d->pxlen); - if (rn) - { - roa_show_node(this_cli, rn, 0, d->asn); - cli_msg(0, ""); - } - else - cli_msg(-8001, "Network not in table"); - break; - - case ROA_SHOW_FOR: - for (len = d->pxlen; len >= 0; len--) - { - px = ipa_and(d->prefix, ipa_mkmask(len)); - rn = fib_find(&d->table->fib, &px, len); - - if (!rn) - continue; - - roa_show_node(this_cli, rn, 0, d->asn); - } - cli_msg(0, ""); - break; - } -} diff --git a/nest/rt-show.c b/nest/rt-show.c new file mode 100644 index 00000000..1f1b73d2 --- /dev/null +++ b/nest/rt-show.c @@ -0,0 +1,421 @@ +/* + * BIRD -- Route Display Routines + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2017 Jan Moskyto Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#undef LOCAL_DEBUG + +#include "nest/bird.h" +#include "nest/route.h" +#include "nest/protocol.h" +#include "nest/cli.h" +#include "nest/iface.h" +#include "filter/filter.h" + +static void +rt_show_table(struct cli *c, struct rt_show_data *d) +{ + /* No table blocks in 'show route count' */ + if (d->stats == 2) + return; + + if (d->last_table) cli_printf(c, -1007, ""); + cli_printf(c, -1007, "Table %s:", d->tab->table->name); + d->last_table = d->tab; +} + +static void +rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) +{ + byte from[IPA_MAX_TEXT_LENGTH+8]; + byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; + rta *a = e->attrs; + int primary = (e->net->routes == e); + int sync_error = (e->net->n.flags & KRF_SYNC_ERROR); + void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); + struct nexthop *nh; + + tm_format_time(tm, &config->tf_route, e->lastmod); + if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) + bsprintf(from, " from %I", a->from); + else + from[0] = 0; + + get_route_info = a->src->proto->proto->get_route_info; + if (get_route_info || d->verbose) + { + /* Need to normalize the extended attributes */ + ea_list *t = tmpa; + t = ea_append(t, a->eattrs); + tmpa = alloca(ea_scan(t)); + ea_merge(t, tmpa); + ea_sort(tmpa); + } + if (get_route_info) + get_route_info(e, info, tmpa); + else + bsprintf(info, " (%d)", e->pref); + + if (d->last_table != d->tab) + rt_show_table(c, d); + + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), + a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + + if (a->dest == RTD_UNICAST) + for (nh = &(a->nh); nh; nh = nh->next) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (a->nh.next) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } + + if (d->verbose) + rta_show(c, a, tmpa); +} + +static void +rt_show_net(struct cli *c, net *n, struct rt_show_data *d) +{ + rte *e, *ee; + byte ia[NET_MAX_TEXT_LENGTH+1]; + struct ea_list *tmpa; + struct channel *ec = d->tab->export_channel; + int first = 1; + int pass = 0; + + bsnprintf(ia, sizeof(ia), "%N", n->n.addr); + + for (e = n->routes; e; e = e->next) + { + if (rte_is_filtered(e) != d->filtered) + continue; + + d->rt_counter++; + d->net_counter += first; + first = 0; + + if (pass) + continue; + + ee = e; + tmpa = rte_make_tmp_attrs(e, c->show_pool); + + /* Export channel is down, do not try to export routes to it */ + if (ec && (ec->export_state == ES_DOWN)) + goto skip; + + /* Special case for merged export */ + if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) + { + rte *rt_free; + e = rt_export_merged(ec, n, &rt_free, &tmpa, c->show_pool, 1); + pass = 1; + + if (!e) + { e = ee; goto skip; } + } + else if (d->export_mode) + { + struct proto *ep = ec->proto; + int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, c->show_pool) : 0; + + if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED) + pass = 1; + + if (ic < 0) + goto skip; + + if (d->export_mode > RSEM_PREEXPORT) + { + /* + * FIXME - This shows what should be exported according to current + * filters, but not what was really exported. 'configure soft' + * command may change the export filter and do not update routes. + */ + int do_export = (ic > 0) || + (f_run(ec->out_filter, &e, &tmpa, c->show_pool, + FF_FORCE_TMPATTR | FF_SILENT) <= F_ACCEPT); + + if (do_export != (d->export_mode == RSEM_EXPORT)) + goto skip; + + if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_ACCEPTED)) + pass = 1; + } + } + + if (d->show_protocol && (d->show_protocol != e->attrs->src->proto)) + goto skip; + + if (f_run(d->filter, &e, &tmpa, c->show_pool, FF_FORCE_TMPATTR) > F_ACCEPT) + goto skip; + + if (d->stats < 2) + rt_show_rte(c, ia, e, d, tmpa); + + d->show_counter++; + ia[0] = 0; + + skip: + if (e != ee) + { + rte_free(e); + e = ee; + } + lp_flush(c->show_pool); + + if (d->primary_only) + break; + } +} + +static void +rt_show_cleanup(struct cli *c) +{ + struct rt_show_data *d = c->rover; + struct rt_show_data_rtable *tab; + + /* Unlink the iterator */ + if (d->table_open) + fit_get(&d->tab->table->fib, &d->fit); + + /* Unlock referenced tables */ + WALK_LIST(tab, d->tables) + rt_unlock_table(tab->table); +} + +static void +rt_show_cont(struct cli *c) +{ + struct rt_show_data *d = c->rover; +#ifdef DEBUGGING + unsigned max = 4; +#else + unsigned max = 64; +#endif + struct fib *fib = &d->tab->table->fib; + struct fib_iterator *it = &d->fit; + + if (d->running_on_config && (d->running_on_config != config)) + { + cli_printf(c, 8004, "Stopped due to reconfiguration"); + goto done; + } + + if (!d->table_open) + { + FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + d->table_open = 1; + d->table_counter++; + + d->show_counter_last = d->show_counter; + d->rt_counter_last = d->rt_counter; + d->net_counter_last = d->net_counter; + + if (d->tables_defined_by & RSD_TDB_SET) + rt_show_table(c, d); + } + + FIB_ITERATE_START(fib, it, net, n) + { + if (!max--) + { + FIB_ITERATE_PUT(it); + return; + } + rt_show_net(c, n, d); + } + FIB_ITERATE_END; + + if (d->stats) + { + if (d->last_table != d->tab) + rt_show_table(c, d); + + cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", + d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, + d->net_counter - d->net_counter_last, d->tab->table->name); + } + + d->table_open = 0; + d->tab = NODE_NEXT(d->tab); + + if (NODE_VALID(d->tab)) + return; + + if (d->stats && (d->table_counter > 1)) + { + if (d->last_table) cli_printf(c, -1007, ""); + cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables", + d->show_counter, d->rt_counter, d->net_counter, d->table_counter); + } + else + cli_printf(c, 0, ""); + +done: + rt_show_cleanup(c); + c->cont = c->cleanup = NULL; +} + +struct rt_show_data_rtable * +rt_show_add_table(struct rt_show_data *d, rtable *t) +{ + struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable)); + tab->table = t; + add_tail(&(d->tables), &(tab->n)); + return tab; +} + +static inline void +rt_show_get_default_tables(struct rt_show_data *d) +{ + struct channel *c; + struct rt_show_data_rtable *tab; + + if (d->export_channel) + { + c = d->export_channel; + tab = rt_show_add_table(d, c->table); + tab->export_channel = c; + return; + } + + if (d->export_protocol) + { + WALK_LIST(c, d->export_protocol->channels) + { + if (c->export_state == ES_DOWN) + continue; + + tab = rt_show_add_table(d, c->table); + tab->export_channel = c; + } + return; + } + + if (d->show_protocol) + { + WALK_LIST(c, d->show_protocol->channels) + rt_show_add_table(d, c->table); + return; + } + + for (int i=1; i<NET_MAX; i++) + if (config->def_tables[i]) + rt_show_add_table(d, config->def_tables[i]->table); +} + +static inline void +rt_show_prepare_tables(struct rt_show_data *d) +{ + struct rt_show_data_rtable *tab, *tabx; + + /* Add implicit tables if no table is specified */ + if (EMPTY_LIST(d->tables)) + rt_show_get_default_tables(d); + + WALK_LIST_DELSAFE(tab, tabx, d->tables) + { + /* Ensure there is defined export_channel for each table */ + if (d->export_mode) + { + if (!tab->export_channel && d->export_channel && + (tab->table == d->export_channel->table)) + tab->export_channel = d->export_channel; + + if (!tab->export_channel && d->export_protocol) + tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table); + + if (!tab->export_channel) + { + if (d->tables_defined_by & RSD_TDB_NMN) + cf_error("No export channel for table %s", tab->table->name); + + rem_node(&(tab->n)); + continue; + } + } + + /* Ensure specified network is compatible with each table */ + if (d->addr && (tab->table->addr_type != d->addr->type)) + { + if (d->tables_defined_by & RSD_TDB_NMN) + cf_error("Incompatible type of prefix/ip for table %s", tab->table->name); + + rem_node(&(tab->n)); + continue; + } + } + + /* Ensure there is at least one table */ + if (EMPTY_LIST(d->tables)) + cf_error("No valid tables"); +} + +void +rt_show(struct rt_show_data *d) +{ + struct rt_show_data_rtable *tab; + net *n; + + /* Filtered routes are neither exported nor have sensible ordering */ + if (d->filtered && (d->export_mode || d->primary_only)) + cf_error("Incompatible show route options"); + + rt_show_prepare_tables(d); + + if (!d->addr) + { + WALK_LIST(tab, d->tables) + rt_lock_table(tab->table); + + /* There is at least one table */ + d->tab = HEAD(d->tables); + this_cli->cont = rt_show_cont; + this_cli->cleanup = rt_show_cleanup; + this_cli->rover = d; + } + else + { + WALK_LIST(tab, d->tables) + { + d->tab = tab; + + if (d->show_for) + n = net_route(tab->table, d->addr); + else + n = net_find(tab->table, d->addr); + + if (n) + rt_show_net(this_cli, n, d); + } + + if (d->rt_counter) + cli_msg(0, ""); + else + cli_msg(8001, "Network not found"); + } +} diff --git a/nest/rt-table.c b/nest/rt-table.c index 28fe5baa..686d0e84 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -33,7 +33,6 @@ #include "nest/bird.h" #include "nest/route.h" #include "nest/protocol.h" -#include "nest/cli.h" #include "nest/iface.h" #include "lib/resource.h" #include "lib/event.h" @@ -50,41 +49,198 @@ static linpool *rte_update_pool; static list routing_tables; -static byte *rt_format_via(rte *e); static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); -static inline int rt_prune_table(rtable *tab); -static inline void rt_schedule_gc(rtable *tab); -static inline void rt_schedule_prune(rtable *tab); +static inline void rt_prune_table(rtable *tab); /* Like fib_route(), but skips empty net entries */ -static net * -net_route(rtable *tab, ip_addr a, int len) +static inline void * +net_route_ip4(rtable *t, net_addr_ip4 *n) { - ip_addr a0; - net *n; + net *r; - while (len >= 0) + while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + { + n->pxlen--; + ip4_clrbit(&n->prefix, n->pxlen); + } + + return r; +} + +static inline void * +net_route_ip6(rtable *t, net_addr_ip6 *n) +{ + net *r; + + while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) + { + n->pxlen--; + ip6_clrbit(&n->prefix, n->pxlen); + } + + return r; +} + +static inline void * +net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +{ + struct fib_node *fn; + + while (1) + { + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the smallest matching src prefix. */ + for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) { - a0 = ipa_and(a, ipa_mkmask(len)); - n = fib_find(&tab->fib, &a0, len); - if (n && rte_is_valid(n->routes)) - return n; - len--; + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(n, a) && + net_in_net_src_ip6_sadr(n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } } + + if (best) + return best; + + if (!n->dst_pxlen) + break; + + n->dst_pxlen--; + ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + } + return NULL; } -static void -rte_init(struct fib_node *N) +void * +net_route(rtable *tab, const net_addr *n) { - net *n = (net *) N; + ASSERT(tab->addr_type == n->type); + + net_addr *n0 = alloca(n->length); + net_copy(n0, n); + + switch (n->type) + { + case NET_IP4: + case NET_VPN4: + case NET_ROA4: + return net_route_ip4(tab, (net_addr_ip4 *) n0); + + case NET_IP6: + case NET_VPN6: + case NET_ROA6: + return net_route_ip6(tab, (net_addr_ip6 *) n0); + + case NET_IP6_SADR: + return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); - N->flags = 0; - n->routes = NULL; + default: + return NULL; + } +} + + +static int +net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); + struct fib_node *fn; + int anything = 0; + + while (1) + { + for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + + if (n.pxlen == 0) + break; + + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); + struct fib_node *fn; + int anything = 0; + + while (1) + { + for (fn = fib_get_chain(&tab->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + + if (n.pxlen == 0) + break; + + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +/** + * roa_check - check validity of route origination in a ROA table + * @tab: ROA table + * @n: network prefix to check + * @asn: AS number of network prefix + * + * Implements RFC 6483 route validation for the given network prefix. The + * procedure is to find all candidate ROAs - ROAs whose prefixes cover the given + * network prefix. If there is no candidate ROA, return ROA_UNKNOWN. If there is + * a candidate ROA with matching ASN and maxlen field greater than or equal to + * the given prefix length, return ROA_VALID. Otherwise, return ROA_INVALID. If + * caller cannot determine origin AS, 0 could be used (in that case ROA_VALID + * cannot happen). Table @tab must have type NET_ROA4 or NET_ROA6, network @n + * must have type NET_IP4 or NET_IP6, respectively. + */ +int +net_roa_check(rtable *tab, const net_addr *n, u32 asn) +{ + if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) + return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + else + return ROA_UNKNOWN; /* Should not happen */ } /** @@ -121,7 +277,7 @@ rte_get_temp(rta *a) e->attrs = a; e->flags = 0; - e->pref = a->src->proto->preference; + e->pref = 0; return e; } @@ -219,7 +375,7 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { - log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, rt_format_via(e)); + log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest)); } static inline void @@ -237,11 +393,11 @@ rte_trace_out(uint flag, struct proto *p, rte *e, char *msg) } static rte * -export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) +export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { - struct proto *p = ah->proto; - struct filter *filter = ah->out_filter; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct filter *filter = c->out_filter; + struct proto_stats *stats = &c->stats; ea_list *tmpb = NULL; rte *rt; int v; @@ -298,16 +454,16 @@ export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa } static inline rte * -export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) +export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) { - return export_filter_(ah, rt0, rt_free, tmpa, rte_update_pool, silent); + return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent); } static void -do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) +do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct proto_stats *stats = &c->stats; /* @@ -337,11 +493,11 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm * also non-new updates (contrary to import blocking). */ - struct proto_limit *l = ah->out_limit; - if (l && new) + struct channel_limit *l = &c->out_limit; + if (l->action && new) { if ((!old || refeed) && (stats->exp_routes >= l->limit)) - proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes); + channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); if (l->state == PLS_BLOCKED) { @@ -378,25 +534,24 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm rte_trace_out(D_ROUTES, p, old, "removed"); } if (!new) - p->rt_notify(p, ah->table, net, NULL, old, NULL); + p->rt_notify(p, c, net, NULL, old, NULL); else if (tmpa) { ea_list *t = tmpa; while (t->next) t = t->next; t->next = new->attrs->eattrs; - p->rt_notify(p, ah->table, net, new, old, tmpa); + p->rt_notify(p, c, net, new, old, tmpa); t->next = NULL; } else - p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs); + p->rt_notify(p, c, net, new, old, new->attrs->eattrs); } static void -rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed) +rt_notify_basic(struct channel *c, net *net, rte *new0, rte *old0, int refeed) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; rte *new = new0; rte *old = old0; @@ -405,9 +560,9 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re ea_list *tmpa = NULL; if (new) - stats->exp_updates_received++; + c->stats.exp_updates_received++; else - stats->exp_withdraws_received++; + c->stats.exp_withdraws_received++; /* * This is a tricky part - we don't know whether route 'old' was @@ -430,10 +585,10 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re */ if (new) - new = export_filter(ah, new, &new_free, &tmpa, 0); + new = export_filter(c, new, &new_free, &tmpa, 0); if (old && !refeed) - old = export_filter(ah, old, &old_free, NULL, 1); + old = export_filter(c, old, &old_free, NULL, 1); if (!new && !old) { @@ -450,13 +605,13 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re #ifdef CONFIG_PIPE if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto)) - p->rt_notify(p, ah->table, net, NULL, old0, NULL); + p->rt_notify(p, c, net, NULL, old0, NULL); #endif return; } - do_rt_notify(ah, net, new, old, tmpa, refeed); + do_rt_notify(c, net, new, old, tmpa, refeed); /* Discard temporary rte's */ if (new_free) @@ -466,10 +621,9 @@ rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int re } static void -rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed) +rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed) { - // struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + // struct proto *p = c->proto; rte *r; rte *new_best = NULL; @@ -487,14 +641,14 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol was not valid, caller must use NULL for both old_changed and before_old. */ if (new_changed) - stats->exp_updates_received++; + c->stats.exp_updates_received++; else - stats->exp_withdraws_received++; + c->stats.exp_withdraws_received++; /* First, find the new_best route - first accepted by filters */ for (r=net->routes; rte_is_valid(r); r=r->next) { - if (new_best = export_filter(ah, r, &new_free, &tmpa, 0)) + if (new_best = export_filter(c, r, &new_free, &tmpa, 0)) break; /* Note if we walked around the position of old_changed route */ @@ -502,9 +656,9 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol old_meet = 1; } - /* + /* * Second, handle the feed case. That means we do not care for - * old_best. It is NULL for feed, and the new_best for refeed. + * old_best. It is NULL for feed, and the new_best for refeed. * For refeed, there is a hack similar to one in rt_notify_basic() * to ensure withdraws in case of changed filters */ @@ -545,7 +699,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol /* First case */ if (old_meet) - if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1)) + if (old_best = export_filter(c, old_changed, &old_free, NULL, 1)) goto found; /* Second case */ @@ -563,18 +717,18 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol /* Fourth case */ for (r=r->next; rte_is_valid(r); r=r->next) { - if (old_best = export_filter(ah, r, &old_free, NULL, 1)) + if (old_best = export_filter(c, r, &old_free, NULL, 1)) goto found; if (r == before_old) - if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1)) + if (old_best = export_filter(c, old_changed, &old_free, NULL, 1)) goto found; } /* Implicitly, old_best is NULL and new_best is non-NULL */ found: - do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2)); + do_rt_notify(c, net, new_best, old_best, tmpa, (feed == 2)); /* Discard temporary rte's */ if (new_free) @@ -584,19 +738,17 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol } -static struct mpnh * -mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max) +static struct nexthop * +nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) { - struct mpnh nh = { .gw = a->gw, .iface = a->iface }; - struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; - return mpnh_merge(nhs, nh2, 1, 0, max, pool); + return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); } rte * -rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) +rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { - // struct proto *p = ah->proto; - struct mpnh *nhs = NULL; + // struct proto *p = c->proto; + struct nexthop *nhs = NULL; rte *best0, *best, *rt0, *rt, *tmp; best0 = net->routes; @@ -605,7 +757,7 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm if (!rte_is_valid(best0)) return NULL; - best = export_filter_(ah, best0, rt_free, tmpa, pool, silent); + best = export_filter_(c, best0, rt_free, tmpa, pool, silent); if (!best || !rte_is_reachable(best)) return best; @@ -615,13 +767,13 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm if (!rte_mergable(best0, rt0)) continue; - rt = export_filter_(ah, rt0, &tmp, NULL, pool, 1); + rt = export_filter_(c, rt0, &tmp, NULL, pool, 1); if (!rt) continue; if (rte_is_reachable(rt)) - nhs = mpnh_merge_rta(nhs, rt->attrs, pool, ah->proto->merge_limit); + nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); if (tmp) rte_free(tmp); @@ -629,13 +781,12 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm if (nhs) { - nhs = mpnh_merge_rta(nhs, best->attrs, pool, ah->proto->merge_limit); + nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); if (nhs->next) { best = rte_cow_rta(best, pool); - best->attrs->dest = RTD_MULTIPATH; - best->attrs->nexthops = nhs; + nexthop_link(best->attrs, nhs); } } @@ -647,10 +798,10 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm static void -rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, +rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, rte *new_best, rte*old_best, int refeed) { - // struct proto *p = ah->proto; + // struct proto *p = c->proto; rte *new_best_free = NULL; rte *old_best_free = NULL; @@ -668,31 +819,31 @@ rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_ if ((new_best == old_best) && !refeed) { new_changed = rte_mergable(new_best, new_changed) ? - export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL; + export_filter(c, new_changed, &new_changed_free, NULL, 1) : NULL; old_changed = rte_mergable(old_best, old_changed) ? - export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL; + export_filter(c, old_changed, &old_changed_free, NULL, 1) : NULL; if (!new_changed && !old_changed) return; } if (new_best) - ah->stats->exp_updates_received++; + c->stats.exp_updates_received++; else - ah->stats->exp_withdraws_received++; + c->stats.exp_withdraws_received++; /* Prepare new merged route */ if (new_best) - new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, rte_update_pool, 0); + new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0); /* Prepare old merged route (without proper merged next hops) */ /* There are some issues with running filter on old route - see rt_notify_basic() */ if (old_best && !refeed) - old_best = export_filter(ah, old_best, &old_best_free, NULL, 1); + old_best = export_filter(c, old_best, &old_best_free, NULL, 1); if (new_best || old_best) - do_rt_notify(ah, net, new_best, old_best, tmpa, refeed); + do_rt_notify(c, net, new_best, old_best, tmpa, refeed); /* Discard temporary rte's */ if (new_best_free) @@ -716,7 +867,7 @@ rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_ * @new_best: the new best route for the same network * @old_best: the previous best route for the same network * @before_old: The previous route before @old for the same network. - * If @before_old is NULL @old was the first. + * If @before_old is NULL @old was the first. * * This function gets a routing table update and announces it * to all protocols that acccepts given type of route announcement @@ -757,28 +908,22 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, if (!old && !new) return; - if (type == RA_OPTIMAL) - { - if (new) - new->attrs->src->proto->stats.pref_routes++; - if (old) - old->attrs->src->proto->stats.pref_routes--; - - if (tab->hostcache) - rt_notify_hostcache(tab, net); - } + if ((type == RA_OPTIMAL) && tab->hostcache) + rt_notify_hostcache(tab, net); - struct announce_hook *a; - WALK_LIST(a, tab->hooks) + struct channel *c; node *n; + WALK_LIST2(c, n, tab->channels, table_node) { - ASSERT(a->proto->export_state != ES_DOWN); - if (a->proto->accept_ra_types == type) + if (c->export_state == ES_DOWN) + continue; + + if (c->ra_mode == type) if (type == RA_ACCEPTED) - rt_notify_accepted(a, net, new, old, before_old, 0); + rt_notify_accepted(c, net, new, old, before_old, 0); else if (type == RA_MERGED) - rt_notify_merged(a, net, new, old, new_best, old_best, 0); + rt_notify_merged(c, net, new, old, new_best, old_best, 0); else - rt_notify_basic(a, net, new, old, 0); + rt_notify_basic(c, net, new, old, 0); } } @@ -788,27 +933,36 @@ rte_validate(rte *e) int c; net *n = e->net; - if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen)) - { - log(L_WARN "Ignoring bogus prefix %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + if (!net_validate(n->n.addr)) + { + log(L_WARN "Ignoring bogus prefix %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } - c = ipa_classify_net(n->n.prefix); + /* FIXME: better handling different nettypes */ + c = !net_is_flow(n->n.addr) ? + net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) - { - log(L_WARN "Ignoring bogus route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + { + log(L_WARN "Ignoring bogus route %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } - if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops)) - { - log(L_WARN "Ignoring unsorted multipath route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest) + { + log(L_WARN "Ignoring route %N with invalid dest %d received via %s", + n->n.addr, e->attrs->dest, e->sender->proto->name); + return 0; + } + + if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } return 1; } @@ -848,11 +1002,11 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = ah->proto; - struct rtable *table = ah->table; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct rtable *table = c->table; + struct proto_stats *stats = &c->stats; static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; rte *before_old = NULL; rte *old_best = net->routes; @@ -877,8 +1031,8 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr { if (new) { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s", - net->n.prefix, net->n.pxlen, table->name); + log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", + net->n.addr, table->name); rte_free_quick(new); } return; @@ -916,13 +1070,13 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct proto_limit *l = ah->rx_limit; - if (l && !old && new) + struct channel_limit *l = &c->rx_limit; + if (l->action && !old && new) { u32 all_routes = stats->imp_routes + stats->filt_routes; if (all_routes >= l->limit) - proto_notify_limit(ah, l, PLD_RX, all_routes); + channel_notify_limit(c, l, PLD_RX, all_routes); if (l->state == PLS_BLOCKED) { @@ -936,11 +1090,11 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr } } - l = ah->in_limit; - if (l && !old_ok && new_ok) + l = &c->in_limit; + if (l->action && !old_ok && new_ok) { if (stats->imp_routes >= l->limit) - proto_notify_limit(ah, l, PLD_IN, stats->imp_routes); + channel_notify_limit(c, l, PLD_IN, stats->imp_routes); if (l->state == PLS_BLOCKED) { @@ -954,13 +1108,13 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr stats->imp_updates_ignored++; rte_trace_in(D_FILTERS, p, new, "ignored [limit]"); - if (ah->in_keep_filtered) + if (c->in_keep_filtered) new->flags |= REF_FILTERED; else { rte_free_quick(new); new = NULL; } /* Note that old && !new could be possible when - ah->in_keep_filtered changed in the recent past. */ + c->in_keep_filtered changed in the recent past. */ if (!old && !new) return; @@ -1064,7 +1218,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr } if (new) - new->lastmod = now; + new->lastmod = current_time(); /* Log the route change */ if (p->debug & D_ROUTES) @@ -1092,8 +1246,8 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && - (table->gc_time + table->config->gc_min_time <= now)) - rt_schedule_gc(table); + (table->gc_time + table->config->gc_min_time <= current_time())) + rt_schedule_prune(table); if (old_ok && p->rte_remove) p->rte_remove(net, old); @@ -1142,7 +1296,7 @@ rte_unhide_dummy_routes(net *net, rte **dummy) /** * rte_update - enter a new update to a routing table * @table: table to be updated - * @ah: pointer to table announce hook + * @c: channel doing the update * @net: network node * @p: protocol submitting the update * @src: protocol originating the update @@ -1182,18 +1336,27 @@ rte_unhide_dummy_routes(net *net, rte **dummy) */ void -rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) +rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; - struct filter *filter = ah->in_filter; + struct proto *p = c->proto; + struct proto_stats *stats = &c->stats; + struct filter *filter = c->in_filter; ea_list *tmpa = NULL; rte *dummy = NULL; + net *nn; + + ASSERT(c->channel_state == CS_UP); rte_update_lock(); if (new) { - new->sender = ah; + nn = net_get(c->table, n); + + new->net = nn; + new->sender = c; + + if (!new->pref) + new->pref = c->preference; stats->imp_updates_received++; if (!rte_validate(new)) @@ -1208,7 +1371,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) stats->imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); - if (! ah->in_keep_filtered) + if (! c->in_keep_filtered) goto drop; /* new is a private copy, i could modify it */ @@ -1226,7 +1389,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) stats->imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); - if (! ah->in_keep_filtered) + if (! c->in_keep_filtered) goto drop; new->flags |= REF_FILTERED; @@ -1243,7 +1406,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) { stats->imp_withdraws_received++; - if (!net || !src) + if (!(nn = net_find(c->table, n)) || !src) { stats->imp_withdraws_ignored++; rte_update_unlock(); @@ -1252,9 +1415,9 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) } recalc: - rte_hide_dummy_routes(net, &dummy); - rte_recalculate(ah, net, new, src); - rte_unhide_dummy_routes(net, &dummy); + rte_hide_dummy_routes(nn, &dummy); + rte_recalculate(c, nn, new, src); + rte_unhide_dummy_routes(nn, &dummy); rte_update_unlock(); return; @@ -1266,7 +1429,7 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ -static inline void +static inline void rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *new_best, rte *old_best) { @@ -1285,9 +1448,9 @@ rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collec /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter) +rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter) { - net *n = net_find(t, prefix, pxlen); + net *n = net_find(t, a); rte *rt = n ? n->routes : NULL; if (!rte_is_valid(rt)) @@ -1315,28 +1478,25 @@ rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter /** * rt_refresh_begin - start a refresh cycle * @t: related routing table - * @ah: related announce hook + * @c related channel * * This function starts a refresh cycle for given routing table and announce * hook. The refresh cycle is a sequence where the protocol sends all its valid * routes to the routing table (by rte_update()). After that, all protocol - * routes (more precisely routes with @ah as @sender) not sent during the + * routes (more precisely routes with @c as @sender) not sent during the * refresh cycle but still in the table from the past are pruned. This is * implemented by marking all related routes as stale by REF_STALE flag in * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct announce_hook *ah) +rt_refresh_begin(rtable *t, struct channel *c) { - net *n; - rte *e; - - FIB_WALK(&t->fib, fn) + FIB_WALK(&t->fib, net, n) { - n = (net *) fn; + rte *e; for (e = n->routes; e; e = e->next) - if (e->sender == ah) + if (e->sender == c) e->flags |= REF_STALE; } FIB_WALK_END; @@ -1345,23 +1505,21 @@ rt_refresh_begin(rtable *t, struct announce_hook *ah) /** * rt_refresh_end - end a refresh cycle * @t: related routing table - * @ah: related announce hook + * @c: related channel * - * This function starts a refresh cycle for given routing table and announce + * This function ends a refresh cycle for given routing table and announce * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct announce_hook *ah) +rt_refresh_end(rtable *t, struct channel *c) { int prune = 0; - net *n; - rte *e; - FIB_WALK(&t->fib, fn) + FIB_WALK(&t->fib, net, n) { - n = (net *) fn; + rte *e; for (e = n->routes; e; e = e->next) - if ((e->sender == ah) && (e->flags & REF_STALE)) + if ((e->sender == c) && (e->flags & REF_STALE)) { e->flags |= REF_DISCARD; prune = 1; @@ -1384,8 +1542,8 @@ void rte_dump(rte *e) { net *n = e->net; - debug("%-1I/%2d ", n->n.prefix, n->n.pxlen); - debug("KF=%02x PF=%02x pref=%d lm=%d ", n->n.flags, e->pflags, e->pref, now-e->lastmod); + debug("%-1N ", n->n.addr); + debug("KF=%02x PF=%02x pref=%d ", n->n.flags, e->pflags, e->pref); rta_dump(e->attrs); if (e->attrs->src->proto->proto->dump_attrs) e->attrs->src->proto->proto->dump_attrs(e); @@ -1401,23 +1559,17 @@ rte_dump(rte *e) void rt_dump(rtable *t) { - rte *e; - net *n; - struct announce_hook *a; - debug("Dump of routing table <%s>\n", t->name); #ifdef DEBUGGING fib_check(&t->fib); #endif - FIB_WALK(&t->fib, fn) + FIB_WALK(&t->fib, net, n) { - n = (net *) fn; + rte *e; for(e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; - WALK_LIST(a, t->hooks) - debug("\tAnnounces routes to protocol %s\n", a->proto->name); debug("\n"); } @@ -1436,23 +1588,6 @@ rt_dump_all(void) } static inline void -rt_schedule_prune(rtable *tab) -{ - rt_mark_for_prune(tab); - ev_schedule(tab->rt_event); -} - -static inline void -rt_schedule_gc(rtable *tab) -{ - if (tab->gc_scheduled) - return; - - tab->gc_scheduled = 1; - ev_schedule(tab->rt_event); -} - -static inline void rt_schedule_hcu(rtable *tab) { if (tab->hcu_scheduled) @@ -1465,51 +1600,34 @@ rt_schedule_hcu(rtable *tab) static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) ev_schedule(tab->rt_event); - /* state change 0->1, 2->3 */ - tab->nhu_state |= 1; + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + tab->nhu_state |= NHU_SCHEDULED; } - -static void -rt_prune_nets(rtable *tab) +void +rt_schedule_prune(rtable *tab) { - struct fib_iterator fit; - int ncnt = 0, ndel = 0; - -#ifdef DEBUGGING - fib_check(&tab->fib); -#endif - - FIB_ITERATE_INIT(&fit, &tab->fib); -again: - FIB_ITERATE_START(&tab->fib, &fit, f) - { - net *n = (net *) f; - ncnt++; - if (!n->routes) /* Orphaned FIB entry */ - { - FIB_ITERATE_PUT(&fit, f); - fib_delete(&tab->fib, f); - ndel++; - goto again; - } - } - FIB_ITERATE_END(f); - DBG("Pruned %d of %d networks\n", ndel, ncnt); + if (tab->prune_state == 0) + ev_schedule(tab->rt_event); - tab->gc_counter = 0; - tab->gc_time = now; - tab->gc_scheduled = 0; + /* state change 0->1, 2->3 */ + tab->prune_state |= 1; } + static void rt_event(void *ptr) { rtable *tab = ptr; + rt_lock_table(tab); + if (tab->hcu_scheduled) rt_update_hostcache(tab); @@ -1517,35 +1635,25 @@ rt_event(void *ptr) rt_next_hop_update(tab); if (tab->prune_state) - if (!rt_prune_table(tab)) - { - /* Table prune unfinished */ - ev_schedule(tab->rt_event); - return; - } + rt_prune_table(tab); - if (tab->gc_scheduled) - { - rt_prune_nets(tab); - rt_prune_sources(); // FIXME this should be moved to independent event - } + rt_unlock_table(tab); } void -rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf) +rt_setup(pool *p, rtable *t, struct rtable_config *cf) { bzero(t, sizeof(*t)); - fib_init(&t->fib, p, sizeof(net), 0, rte_init); - t->name = name; + t->name = cf->name; t->config = cf; - init_list(&t->hooks); - if (cf) - { - t->rt_event = ev_new(p); - t->rt_event->hook = rt_event; - t->rt_event->data = t; - t->gc_time = now; - } + t->addr_type = cf->addr_type; + fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + init_list(&t->channels); + + t->rt_event = ev_new(p); + t->rt_event->hook = rt_event; + t->rt_event->data = t; + t->gc_time = current_time(); } /** @@ -1559,121 +1667,123 @@ rt_init(void) { rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new(rt_table_pool, 4080); + rte_update_pool = lp_new_default(rt_table_pool); rte_slab = sl_new(rt_table_pool, sizeof(rte)); init_list(&routing_tables); } -static int -rt_prune_step(rtable *tab, int *limit) +/** + * rt_prune_table - prune a routing table + * + * The prune loop scans routing tables and removes routes belonging to flushing + * protocols, discarded routes and also stale network entries. It is called from + * rt_event(). The event is rescheduled if the current iteration do not finish + * the table. The pruning is directed by the prune state (@prune_state), + * specifying whether the prune cycle is scheduled or running, and there + * is also a persistent pruning iterator (@prune_fit). + * + * The prune loop is used also for channel flushing. For this purpose, the + * channels to flush are marked before the iteration and notified after the + * iteration. + */ +static void +rt_prune_table(rtable *tab) { struct fib_iterator *fit = &tab->prune_fit; + int limit = 512; + + struct channel *c; + node *n, *x; DBG("Pruning route table %s\n", tab->name); #ifdef DEBUGGING fib_check(&tab->fib); #endif - if (tab->prune_state == RPS_NONE) - return 1; + if (tab->prune_state == 0) + return; - if (tab->prune_state == RPS_SCHEDULED) - { - FIB_ITERATE_INIT(fit, &tab->fib); - tab->prune_state = RPS_RUNNING; - } + if (tab->prune_state == 1) + { + /* Mark channels to flush */ + WALK_LIST2(c, n, tab->channels, table_node) + if (c->channel_state == CS_FLUSHING) + c->flush_active = 1; + + FIB_ITERATE_INIT(fit, &tab->fib); + tab->prune_state = 2; + } again: - FIB_ITERATE_START(&tab->fib, fit, fn) + FIB_ITERATE_START(&tab->fib, fit, net, n) { - net *n = (net *) fn; rte *e; rescan: for (e=n->routes; e; e=e->next) - if (e->sender->proto->flushing || (e->flags & REF_DISCARD)) + if (e->sender->flush_active || (e->flags & REF_DISCARD)) { - if (*limit <= 0) + if (limit <= 0) { - FIB_ITERATE_PUT(fit, fn); - return 0; + FIB_ITERATE_PUT(fit); + ev_schedule(tab->rt_event); + return; } rte_discard(e); - (*limit)--; + limit--; goto rescan; } + if (!n->routes) /* Orphaned FIB entry */ { - FIB_ITERATE_PUT(fit, fn); - fib_delete(&tab->fib, fn); + FIB_ITERATE_PUT(fit); + fib_delete(&tab->fib, n); goto again; } } - FIB_ITERATE_END(fn); + FIB_ITERATE_END; #ifdef DEBUGGING fib_check(&tab->fib); #endif - tab->prune_state = RPS_NONE; - return 1; -} + tab->gc_counter = 0; + tab->gc_time = current_time(); -/** - * rt_prune_table - prune a routing table - * @tab: a routing table for pruning - * - * This function scans the routing table @tab and removes routes belonging to - * flushing protocols, discarded routes and also stale network entries, in a - * similar fashion like rt_prune_loop(). Returns 1 when all such routes are - * pruned. Contrary to rt_prune_loop(), this function is not a part of the - * protocol flushing loop, but it is called from rt_event() for just one routing - * table. - * - * Note that rt_prune_table() and rt_prune_loop() share (for each table) the - * prune state (@prune_state) and also the pruning iterator (@prune_fit). - */ -static inline int -rt_prune_table(rtable *tab) -{ - int limit = 512; - return rt_prune_step(tab, &limit); -} + /* state change 2->0, 3->1 */ + tab->prune_state &= 1; -/** - * rt_prune_loop - prune routing tables - * - * The prune loop scans routing tables and removes routes belonging to flushing - * protocols, discarded routes and also stale network entries. Returns 1 when - * all such routes are pruned. It is a part of the protocol flushing loop. - */ -int -rt_prune_loop(void) -{ - int limit = 512; - rtable *t; + if (tab->prune_state > 0) + ev_schedule(tab->rt_event); - WALK_LIST(t, routing_tables) - if (! rt_prune_step(t, &limit)) - return 0; + /* FIXME: This should be handled in a better way */ + rt_prune_sources(); - return 1; + /* Close flushed channels */ + WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) + if (c->flush_active) + { + c->flush_active = 0; + channel_set_state(c, CS_DOWN); + } + + return; } void rt_preconfig(struct config *c) { - struct symbol *s = cf_get_symbol("master"); - init_list(&c->tables); - c->master_rtc = rt_new_table(s); + + rt_new_table(cf_get_symbol("master4"), NET_IP4); + rt_new_table(cf_get_symbol("master6"), NET_IP6); } -/* +/* * Some functions for handing internal next hop updates * triggered by rt_schedule_nhu(). */ @@ -1689,33 +1799,105 @@ rta_next_hop_outdated(rta *a) if (!he->src) return a->dest != RTD_UNREACHABLE; - return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) || - (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - !mpnh_same(a->nexthops, he->src->nexthops); + return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || + (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); } -static inline void -rta_apply_hostentry(rta *a, struct hostentry *he) +void +rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) { a->hostentry = he; - a->iface = he->src ? he->src->iface : NULL; - a->gw = he->gw; a->dest = he->dest; a->igp_metric = he->igp_metric; - a->nexthops = he->src ? he->src->nexthops : NULL; + + if (a->dest != RTD_UNICAST) + { + /* No nexthop */ +no_nexthop: + a->nh = (struct nexthop) {}; + if (mls) + { /* Store the label stack for later changes */ + a->nh.labels_orig = a->nh.labels = mls->len; + memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); + } + return; + } + + if (((!mls) || (!mls->len)) && he->nexthop_linkable) + { /* Just link the nexthop chain, no label append happens. */ + memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + return; + } + + struct nexthop *nhp = NULL, *nhr = NULL; + int skip_nexthop = 0; + + for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + { + if (skip_nexthop) + skip_nexthop--; + else + { + nhr = nhp; + nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + } + + nhp->iface = nh->iface; + nhp->weight = nh->weight; + if (mls) + { + nhp->labels = nh->labels + mls->len; + nhp->labels_orig = mls->len; + if (nhp->labels <= MPLS_MAX_LABEL_STACK) + { + memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ + memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ + } + else + { + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); + skip_nexthop++; + continue; + } + } + if (ipa_nonzero(nh->gw)) + { + nhp->gw = nh->gw; /* Router nexthop */ + nhp->flags |= (nh->flags & RNF_ONLINK); + } + else if (ipa_nonzero(he->link)) + nhp->gw = he->link; /* Device nexthop with link-local address known */ + else + nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + } + + if (skip_nexthop) + if (nhr) + nhr->next = NULL; + else + { + a->dest = RTD_UNREACHABLE; + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + goto no_nexthop; + } } static inline rte * rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) { - rta a; - memcpy(&a, old->attrs, sizeof(rta)); - rta_apply_hostentry(&a, old->attrs->hostentry); - a.aflags = 0; + rta *a = alloca(RTA_MAX_SIZE); + memcpy(a, old->attrs, rta_size(old->attrs)); + + mpls_label_stack mls = { .len = a->nh.labels_orig }; + memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + + rta_apply_hostentry(a, old->attrs->hostentry, &mls); + a->aflags = 0; rte *e = sl_alloc(rte_slab); memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(&a); + e->attrs = rta_lookup(a); return e; } @@ -1796,49 +1978,61 @@ rt_next_hop_update(rtable *tab) struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) return; - if (tab->nhu_state == 1) + if (tab->nhu_state == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = 2; + tab->nhu_state = NHU_RUNNING; } - FIB_ITERATE_START(&tab->fib, fit, fn) + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { - FIB_ITERATE_PUT(fit, fn); + FIB_ITERATE_PUT(fit); ev_schedule(tab->rt_event); return; } - max_feed -= rt_next_hop_update_net(tab, (net *) fn); + max_feed -= rt_next_hop_update_net(tab, n); } - FIB_ITERATE_END(fn); + FIB_ITERATE_END; - /* state change 2->0, 3->1 */ + /* State change: + * NHU_DIRTY -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_CLEAN + */ tab->nhu_state &= 1; - if (tab->nhu_state > 0) + if (tab->nhu_state != NHU_CLEAN) ev_schedule(tab->rt_event); } struct rtable_config * -rt_new_table(struct symbol *s) +rt_new_table(struct symbol *s, uint addr_type) { /* Hack that allows to 'redefine' the master table */ - if ((s->class == SYM_TABLE) && (s->def == new_config->master_rtc)) + if ((s->class == SYM_TABLE) && + (s->def == new_config->def_tables[addr_type]) && + ((addr_type == NET_IP4) || (addr_type == NET_IP6))) return s->def; struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config)); cf_define_symbol(s, SYM_TABLE, c); c->name = s->name; - add_tail(&new_config->tables, &c->n); + c->addr_type = addr_type; c->gc_max_ops = 1000; c->gc_min_time = 5; + + add_tail(&new_config->tables, &c->n); + + /* First table of each type is kept as default */ + if (! new_config->def_tables[addr_type]) + new_config->def_tables[addr_type] = c; + return c; } @@ -1935,7 +2129,7 @@ rt_commit(struct config *new, struct config *old) { rtable *t = mb_alloc(rt_table_pool, sizeof(struct rtable)); DBG("\t%s: created\n", r->name); - rt_setup(rt_table_pool, t, r->name, r); + rt_setup(rt_table_pool, t, r); add_tail(&routing_tables, &t->n); r->table = t; } @@ -1943,119 +2137,104 @@ rt_commit(struct config *new, struct config *old) } static inline void -do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e) +do_feed_channel(struct channel *c, net *n, rte *e) { rte_update_lock(); - if (type == RA_ACCEPTED) - rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1); - else if (type == RA_MERGED) - rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding); - else - rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding); + if (c->ra_mode == RA_ACCEPTED) + rt_notify_accepted(c, n, e, NULL, NULL, c->refeeding ? 2 : 1); + else if (c->ra_mode == RA_MERGED) + rt_notify_merged(c, n, NULL, NULL, e, c->refeeding ? e : NULL, c->refeeding); + else /* RA_BASIC */ + rt_notify_basic(c, n, e, c->refeeding ? e : NULL, c->refeeding); rte_update_unlock(); } /** - * rt_feed_baby - advertise routes to a new protocol - * @p: protocol to be fed + * rt_feed_channel - advertise all routes to a channel + * @c: channel to be fed * - * This function performs one pass of advertisement of routes to a newly - * initialized protocol. It's called by the protocol code as long as it - * has something to do. (We avoid transferring all the routes in single - * pass in order not to monopolize CPU time.) + * This function performs one pass of advertisement of routes to a channel that + * is in the ES_FEEDING state. It is called by the protocol code as long as it + * has something to do. (We avoid transferring all the routes in single pass in + * order not to monopolize CPU time.) */ int -rt_feed_baby(struct proto *p) +rt_feed_channel(struct channel *c) { - struct announce_hook *h; - struct fib_iterator *fit; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - if (!p->feed_ahook) /* Need to initialize first */ + ASSERT(c->export_state == ES_FEEDING); + + if (!c->feed_active) { - if (!p->ahooks) - return 1; - DBG("Announcing routes to new protocol %s\n", p->name); - p->feed_ahook = p->ahooks; - fit = p->feed_iterator = mb_alloc(p->pool, sizeof(struct fib_iterator)); - goto next_hook; + FIB_ITERATE_INIT(fit, &c->table->fib); + c->feed_active = 1; } - fit = p->feed_iterator; -again: - h = p->feed_ahook; - FIB_ITERATE_START(&h->table->fib, fit, fn) + FIB_ITERATE_START(&c->table->fib, fit, net, n) { - net *n = (net *) fn; rte *e = n->routes; if (max_feed <= 0) { - FIB_ITERATE_PUT(fit, fn); + FIB_ITERATE_PUT(fit); return 0; } - /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */ + /* FIXME: perhaps we should change feed for RA_ACCEPTED to not use 'new' */ - if ((p->accept_ra_types == RA_OPTIMAL) || - (p->accept_ra_types == RA_ACCEPTED) || - (p->accept_ra_types == RA_MERGED)) + if ((c->ra_mode == RA_OPTIMAL) || + (c->ra_mode == RA_ACCEPTED) || + (c->ra_mode == RA_MERGED)) if (rte_is_valid(e)) { - if (p->export_state != ES_FEEDING) - return 1; /* In the meantime, the protocol fell down. */ + /* In the meantime, the protocol may fell down */ + if (c->export_state != ES_FEEDING) + goto done; - do_feed_baby(p, p->accept_ra_types, h, n, e); + do_feed_channel(c, n, e); max_feed--; } - if (p->accept_ra_types == RA_ANY) + if (c->ra_mode == RA_ANY) for(e = n->routes; e; e = e->next) { - if (p->export_state != ES_FEEDING) - return 1; /* In the meantime, the protocol fell down. */ + /* In the meantime, the protocol may fell down */ + if (c->export_state != ES_FEEDING) + goto done; if (!rte_is_valid(e)) continue; - do_feed_baby(p, RA_ANY, h, n, e); + do_feed_channel(c, n, e); max_feed--; } } - FIB_ITERATE_END(fn); - p->feed_ahook = h->next; - if (!p->feed_ahook) - { - mb_free(p->feed_iterator); - p->feed_iterator = NULL; - return 1; - } + FIB_ITERATE_END; -next_hook: - h = p->feed_ahook; - FIB_ITERATE_INIT(fit, &h->table->fib); - goto again; +done: + c->feed_active = 0; + return 1; } /** * rt_feed_baby_abort - abort protocol feeding - * @p: protocol + * @c: channel * - * This function is called by the protocol code when the protocol - * stops or ceases to exist before the last iteration of rt_feed_baby() - * has finished. + * This function is called by the protocol code when the protocol stops or + * ceases to exist during the feeding. */ void -rt_feed_baby_abort(struct proto *p) +rt_feed_channel_abort(struct channel *c) { - if (p->feed_ahook) + if (c->feed_active) { - /* Unlink the iterator and exit */ - fit_get(&p->feed_ahook->table->fib, p->feed_iterator); - p->feed_ahook = NULL; + /* Unlink the iterator */ + fit_get(&c->table->fib, &c->feed_fit); + c->feed_active = 0; } } - static inline unsigned ptr_hash(void *ptr) { @@ -2063,10 +2242,10 @@ ptr_hash(void *ptr) return p ^ (p << 8) ^ (p >> 16); } -static inline unsigned +static inline u32 hc_hash(ip_addr a, rtable *dep) { - return (ipa_hash(a) ^ ptr_hash(dep)) & 0xffff; + return ipa_hash(a) ^ ptr_hash(dep); } static inline void @@ -2100,7 +2279,7 @@ hc_alloc_table(struct hostcache *hc, unsigned order) { uint hsize = 1 << order; hc->hash_order = order; - hc->hash_shift = 16 - order; + hc->hash_shift = 32 - order; hc->hash_max = (order >= HC_HI_ORDER) ? ~0U : (hsize HC_HI_MARK); hc->hash_min = (order <= HC_LO_ORDER) ? 0U : (hsize HC_LO_MARK); @@ -2130,12 +2309,12 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig { struct hostentry *he = sl_alloc(hc->slab); - he->addr = a; - he->link = ll; - he->tab = dep; - he->hash_key = k; - he->uc = 0; - he->src = NULL; + *he = (struct hostentry) { + .addr = a, + .link = ll, + .tab = dep, + .hash_key = k, + }; add_tail(&hc->hostentries, &he->ln); hc_insert(hc, he); @@ -2171,7 +2350,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, HC_DEF_ORDER); hc->slab = sl_new(rt_table_pool, sizeof(struct hostentry)); - hc->lp = lp_new(rt_table_pool, 1008); + hc->lp = lp_new(rt_table_pool, LP_GOOD_SIZE(1024)); hc->trie = f_new_trie(hc->lp, sizeof(struct f_trie_node)); tab->hostcache = hc; @@ -2201,12 +2380,10 @@ rt_free_hostcache(rtable *tab) static void rt_notify_hostcache(rtable *tab, net *net) { - struct hostcache *hc = tab->hostcache; - if (tab->hcu_scheduled) return; - if (trie_match_prefix(hc->trie, net->n.prefix, net->n.pxlen)) + if (trie_match_net(tab->hostcache->trie, net->n.addr)) rt_schedule_hcu(tab); } @@ -2222,7 +2399,7 @@ if_local_addr(ip_addr a, struct iface *i) return 0; } -static u32 +static u32 rt_get_igp_metric(rte *rt) { eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); @@ -2244,8 +2421,7 @@ rt_get_igp_metric(rte *rt) return rt->u.rip.metric; #endif - /* Device routes */ - if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH)) + if (a->source == RTS_DEVICE) return 0; return IGP_METRIC_UNKNOWN; @@ -2255,57 +2431,58 @@ static int rt_update_hostentry(rtable *tab, struct hostentry *he) { rta *old_src = he->src; + int direct = 0; int pxlen = 0; - /* Reset the hostentry */ + /* Reset the hostentry */ he->src = NULL; - he->gw = IPA_NONE; he->dest = RTD_UNREACHABLE; + he->nexthop_linkable = 0; he->igp_metric = 0; - net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH); + net_addr he_addr; + net_fill_ip_host(&he_addr, he->addr); + net *n = net_route(tab, &he_addr); if (n) { rte *e = n->routes; rta *a = e->attrs; - pxlen = n->n.pxlen; + pxlen = n->n.addr->pxlen; if (a->hostentry) { /* Recursive route should not depend on another recursive route */ - log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d", - he->addr, n->n.prefix, pxlen); + log(L_WARN "Next hop address %I resolvable through recursive route for %N", + he->addr, n->n.addr); goto done; } - if (a->dest == RTD_DEVICE) - { - if (if_local_addr(he->addr, a->iface)) - { - /* The host address is a local address, this is not valid */ - log(L_WARN "Next hop address %I is a local address of iface %s", - he->addr, a->iface->name); - goto done; - } - - /* The host is directly reachable, use link as a gateway */ - he->gw = he->link; - he->dest = RTD_ROUTER; - } - else + if (a->dest == RTD_UNICAST) { - /* The host is reachable through some route entry */ - he->gw = a->gw; - he->dest = a->dest; + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + if (ipa_zero(nh->gw)) + { + if (if_local_addr(he->addr, nh->iface)) + { + /* The host address is a local address, this is not valid */ + log(L_WARN "Next hop address %I is a local address of iface %s", + he->addr, nh->iface->name); + goto done; + } + + direct++; + } } he->src = rta_clone(a); + he->dest = a->dest; + he->nexthop_linkable = !direct; he->igp_metric = rt_get_igp_metric(e); } - done: +done: /* Add a prefix range to the trie */ - trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH); + trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen); rta_free(old_src); return old_src != he->src; @@ -2338,7 +2515,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -static struct hostentry * +struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; @@ -2346,277 +2523,17 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) if (!tab->hostcache) rt_init_hostcache(tab); - uint k = hc_hash(a, dep); + u32 k = hc_hash(a, dep); struct hostcache *hc = tab->hostcache; for (he = hc->hash_table[k >> hc->hash_shift]; he != NULL; he = he->next) if (ipa_equal(he->addr, a) && (he->tab == dep)) return he; - he = hc_new_hostentry(hc, a, ll, dep, k); + he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k); rt_update_hostentry(tab, he); return he; } -void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_addr *ll) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, *gw, *ll, dep)); -} - - -/* - * CLI commands - */ - -static byte * -rt_format_via(rte *e) -{ - rta *a = e->attrs; - - /* Max text length w/o IP addr and interface name is 16 */ - static byte via[STD_ADDRESS_P_LENGTH+sizeof(a->iface->name)+16]; - - switch (a->dest) - { - case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break; - case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break; - case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break; - case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break; - case RTD_PROHIBIT: bsprintf(via, "prohibited"); break; - case RTD_MULTIPATH: bsprintf(via, "multipath"); break; - default: bsprintf(via, "???"); - } - return via; -} - -static void -rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) -{ - byte from[STD_ADDRESS_P_LENGTH+8]; - byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; - rta *a = e->attrs; - int primary = (e->net->routes == e); - int sync_error = (e->net->n.flags & KRF_SYNC_ERROR); - void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); - struct mpnh *nh; - - tm_format_datetime(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw)) - bsprintf(from, " from %I", a->from); - else - from[0] = 0; - - get_route_info = a->src->proto->proto->get_route_info; - if (get_route_info || d->verbose) - { - /* Need to normalize the extended attributes */ - ea_list *t = tmpa; - t = ea_append(t, a->eattrs); - tmpa = alloca(ea_scan(t)); - ea_merge(t, tmpa); - ea_sort(tmpa); - } - if (get_route_info) - get_route_info(e, info, tmpa); - else - bsprintf(info, " (%d)", e->pref); - cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name, - tm, from, primary ? (sync_error ? " !" : " *") : "", info); - for (nh = a->nexthops; nh; nh = nh->next) - cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1); - if (d->verbose) - rta_show(c, a, tmpa); -} - -static void -rt_show_net(struct cli *c, net *n, struct rt_show_data *d) -{ - rte *e, *ee; - byte ia[STD_ADDRESS_P_LENGTH+8]; - struct ea_list *tmpa; - struct announce_hook *a = NULL; - int first = 1; - int pass = 0; - - bsprintf(ia, "%I/%d", n->n.prefix, n->n.pxlen); - - if (d->export_mode) - { - if (! d->export_protocol->rt_notify) - return; - - a = proto_find_announce_hook(d->export_protocol, d->table); - if (!a) - return; - } - - for (e = n->routes; e; e = e->next) - { - if (rte_is_filtered(e) != d->filtered) - continue; - - d->rt_counter++; - d->net_counter += first; - first = 0; - - if (pass) - continue; - - ee = e; - rte_update_lock(); /* We use the update buffer for filtering */ - tmpa = rte_make_tmp_attrs(e, rte_update_pool); - - /* Special case for merged export */ - if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED)) - { - rte *rt_free; - e = rt_export_merged(a, n, &rt_free, &tmpa, rte_update_pool, 1); - pass = 1; - - if (!e) - { e = ee; goto skip; } - } - else if (d->export_mode) - { - struct proto *ep = d->export_protocol; - int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0; - - if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED) - pass = 1; - - if (ic < 0) - goto skip; - - if (d->export_mode > RSEM_PREEXPORT) - { - /* - * FIXME - This shows what should be exported according to current - * filters, but not what was really exported. 'configure soft' - * command may change the export filter and do not update routes. - */ - int do_export = (ic > 0) || - (f_run(a->out_filter, &e, &tmpa, rte_update_pool, - FF_FORCE_TMPATTR | FF_SILENT) <= F_ACCEPT); - - if (do_export != (d->export_mode == RSEM_EXPORT)) - goto skip; - - if ((d->export_mode == RSEM_EXPORT) && (ep->accept_ra_types == RA_ACCEPTED)) - pass = 1; - } - } - - if (d->show_protocol && (d->show_protocol != e->attrs->src->proto)) - goto skip; - - if (f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT) - goto skip; - - d->show_counter++; - if (d->stats < 2) - rt_show_rte(c, ia, e, d, tmpa); - ia[0] = 0; - - skip: - if (e != ee) - { - rte_free(e); - e = ee; - } - rte_update_unlock(); - - if (d->primary_only) - break; - } -} - -static void -rt_show_cont(struct cli *c) -{ - struct rt_show_data *d = c->rover; -#ifdef DEBUGGING - unsigned max = 4; -#else - unsigned max = 64; -#endif - struct fib *fib = &d->table->fib; - struct fib_iterator *it = &d->fit; - - FIB_ITERATE_START(fib, it, f) - { - net *n = (net *) f; - if (d->running_on_config && d->running_on_config != config) - { - cli_printf(c, 8004, "Stopped due to reconfiguration"); - goto done; - } - if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN)) - { - cli_printf(c, 8005, "Protocol is down"); - goto done; - } - if (!max--) - { - FIB_ITERATE_PUT(it, f); - return; - } - rt_show_net(c, n, d); - } - FIB_ITERATE_END(f); - if (d->stats) - cli_printf(c, 14, "%d of %d routes for %d networks", d->show_counter, d->rt_counter, d->net_counter); - else - cli_printf(c, 0, ""); -done: - c->cont = c->cleanup = NULL; -} - -static void -rt_show_cleanup(struct cli *c) -{ - struct rt_show_data *d = c->rover; - - /* Unlink the iterator */ - fit_get(&d->table->fib, &d->fit); -} - -void -rt_show(struct rt_show_data *d) -{ - net *n; - - /* Default is either a master table or a table related to a respective protocol */ - if (!d->table && d->export_protocol) d->table = d->export_protocol->table; - if (!d->table && d->show_protocol) d->table = d->show_protocol->table; - if (!d->table) d->table = config->master_rtc->table; - - /* Filtered routes are neither exported nor have sensible ordering */ - if (d->filtered && (d->export_mode || d->primary_only)) - cli_msg(0, ""); - - if (d->pxlen == 256) - { - FIB_ITERATE_INIT(&d->fit, &d->table->fib); - this_cli->cont = rt_show_cont; - this_cli->cleanup = rt_show_cleanup; - this_cli->rover = d; - } - else - { - if (d->show_for) - n = net_route(d->table, d->prefix, d->pxlen); - else - n = net_find(d->table, d->prefix, d->pxlen); - - if (n) - rt_show_net(this_cli, n, d); - - if (d->rt_counter) - cli_msg(0, ""); - else - cli_msg(8001, "Network not in table"); - } -} /* * Documentation for functions declared inline in route.h @@ -2627,26 +2544,24 @@ rt_show(struct rt_show_data *d) * net_find - find a network entry * @tab: a routing table * @addr: address of the network - * @len: length of the network prefix * * net_find() looks up the given network in routing table @tab and * returns a pointer to its &net entry or %NULL if no such network * exists. */ -static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) +static inline net *net_find(rtable *tab, net_addr *addr) { DUMMY; } /** * net_get - obtain a network entry * @tab: a routing table * @addr: address of the network - * @len: length of the network prefix * * net_get() looks up the given network in routing table @tab and * returns a pointer to its &net entry. If no such entry exists, it's * created. */ -static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) +static inline net *net_get(rtable *tab, net_addr *addr) { DUMMY; } /** |