diff options
author | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2018-09-18 17:29:00 +0200 |
---|---|---|
committer | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2018-09-18 17:50:45 +0200 |
commit | c0fc3e67185c1e0ff2d083572c6ad3983ba4ef25 (patch) | |
tree | 9fe17603f4fec18b7f9fbd8d8efc1f13a9bef8ee /proto | |
parent | afa14f1868f2c753efdc81ce8e2c2d44e6bdd80e (diff) |
The MRT protocol
The new MRT protocol is responsible for periodic RIB table dumps in the
MRT format (RFC 6396). Also the existing code for BGP4MP MRT dumps is
refactored and splitted between BGP to MRT protocols, will be more
integrated into MRT in the future.
Example:
protocol mrt {
table "*";
filename "%N_%F_%T.mrt";
period 60;
}
It is partially based on the old MRT code from Pavel Tvrdik.
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/attrs.c | 7 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 2 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 2 | ||||
-rw-r--r-- | proto/bgp/packets.c | 96 | ||||
-rw-r--r-- | proto/mrt/Makefile | 5 | ||||
-rw-r--r-- | proto/mrt/config.Y | 67 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 882 | ||||
-rw-r--r-- | proto/mrt/mrt.h | 156 | ||||
-rw-r--r-- | proto/mrt/progdoc | 1 |
9 files changed, 1147 insertions, 71 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 3b88791d..05fcfe72 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -471,7 +471,7 @@ bgp_get_attr_len(eattr *a) /** * bgp_encode_attrs - encode BGP attributes - * @p: BGP instance + * @p: BGP instance (or NULL) * @w: buffer * @attrs: a list of extended attributes * @remains: remaining space in the buffer @@ -485,6 +485,7 @@ uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) { uint i, code, type, flags; + int as4_session = p ? p->as4_session : 1; byte *start = w; int len, rv; @@ -504,7 +505,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH * as optional AS4_PATH attribute. */ - if ((code == BA_AS_PATH) && (! p->as4_session)) + if ((code == BA_AS_PATH) && !as4_session) { len = a->u.ptr->length; @@ -546,7 +547,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) } /* The same issue with AGGREGATOR attribute */ - if ((code == BA_AGGREGATOR) && (! p->as4_session)) + if ((code == BA_AGGREGATOR) && !as4_session) { int new_used; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index a4b37691..d4b056be 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -352,7 +352,7 @@ static inline void bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state) { if (conn->bgp->p.mrtdump & MD_STATES) - mrt_dump_bgp_state_change(conn, conn->state, new_state); + bgp_dump_state_change(conn, conn->state, new_state); conn->state = new_state; } diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 2ff61834..d76f7f22 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -288,7 +288,7 @@ inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, /* packets.c */ -void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new); +void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new); void bgp_schedule_packet(struct bgp_conn *conn, int type); void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index a39670ef..2248b9f9 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -13,7 +13,7 @@ #include "nest/protocol.h" #include "nest/route.h" #include "nest/attrs.h" -#include "nest/mrtdump.h" +#include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" #include "lib/socket.h" @@ -38,81 +38,45 @@ static byte fsm_err_subcode[BS_MAX] = { [BS_ESTABLISHED] = 3 }; -/* - * MRT Dump format is not semantically specified. - * We will use these values in appropriate fields: - * - * Local AS, Remote AS - configured AS numbers for given BGP instance. - * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection) - * - * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state - * changes) and MESSAGE (for received BGP messages). - * - * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant - * only when AS4 session is established and even in that case MESSAGE - * does not use AS4 variant for initial OPEN message. This strange - * behavior is here for compatibility with Quagga and Bgpdump, - */ - -static byte * -mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4) +static void +init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d) { struct bgp_proto *p = conn->bgp; - - if (as4) - { - put_u32(buf+0, p->remote_as); - put_u32(buf+4, p->local_as); - buf+=8; - } - else - { - put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS); - put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS); - buf+=4; - } - - put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0); - put_u16(buf+2, BGP_AF); - buf+=4; - buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE); - buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE); - - return buf; + int p_ok = conn->state >= BS_OPENCONFIRM; + + memset(d, 0, sizeof(struct mrt_bgp_data)); + d->peer_as = p->remote_as; + d->local_as = p->local_as; + d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0; + d->af = BGP_AF; + d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE; + d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE; + d->as4 = p_ok ? p->as4_session : 0; + d->add_path = p_ok ? p->add_path_rx : 0; } static void -mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len) +bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len) { - byte *buf = alloca(128+len); /* 128 is enough for MRT headers */ - byte *bp = buf + MRTDUMP_HDR_LENGTH; - int as4 = conn->bgp->as4_session; - - bp = mrt_put_bgp4_hdr(bp, conn, as4); - memcpy(bp, pkt, len); - bp += len; - mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE, - buf, bp-buf); -} + struct mrt_bgp_data d; + init_mrt_bgp_data(conn, &d); -static inline u16 -convert_state(unsigned state) -{ - /* Convert state from our BS_* values to values used in MRTDump */ - return (state == BS_CLOSE) ? 1 : state + 1; + d.message = pkt; + d.msg_len = len; + + mrt_dump_bgp_message(&d); } void -mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new) +bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new) { - byte buf[128]; - byte *bp = buf + MRTDUMP_HDR_LENGTH; - - bp = mrt_put_bgp4_hdr(bp, conn, 1); - put_u16(bp+0, convert_state(old)); - put_u16(bp+2, convert_state(new)); - bp += 4; - mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf); + struct mrt_bgp_data d; + init_mrt_bgp_data(conn, &d); + + d.old_state = old; + d.new_state = new; + + mrt_dump_bgp_state_change(&d); } static byte * @@ -1764,7 +1728,7 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len) DBG("BGP: Got packet %02x (%d bytes)\n", type, len); if (conn->bgp->p.mrtdump & MD_MESSAGES) - mrt_dump_bgp_packet(conn, pkt, len); + bgp_dump_message(conn, pkt, len); switch (type) { diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile new file mode 100644 index 00000000..aebb0f74 --- /dev/null +++ b/proto/mrt/Makefile @@ -0,0 +1,5 @@ +source=mrt.c +root-rel=../../ +dir-name=proto/mrt + +include ../../Rules diff --git a/proto/mrt/config.Y b/proto/mrt/config.Y new file mode 100644 index 00000000..b6fecd1e --- /dev/null +++ b/proto/mrt/config.Y @@ -0,0 +1,67 @@ +/* + * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol + * + * (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2017--2018 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +#include "proto/mrt/mrt.h" + +CF_DEFINES + +#define MRT_CFG ((struct mrt_config *) this_proto) + +CF_DECLS + +CF_KEYWORDS(MRT, TABLE, FILTER, FILENAME, PERIOD, ALWAYS, ADD, PATH, DUMP, TO) + +%type <md> mrt_dump_args + +CF_GRAMMAR + +CF_ADDTO(proto, mrt_proto) + +mrt_proto_start: proto_start MRT +{ + this_proto = proto_config_new(&proto_mrt, $1); +}; + +mrt_proto_item: + proto_item + | TABLE TEXT { MRT_CFG->table_expr = $2; } + | FILTER filter { MRT_CFG->filter = $2; } + | where_filter { MRT_CFG->filter = $1; } + | FILENAME text { MRT_CFG->filename = $2; } + | PERIOD expr { MRT_CFG->period = $2; } + | ALWAYS ADD PATH bool { MRT_CFG->always_add_path = $4; } + ; + +mrt_proto_opts: + /* empty */ + | mrt_proto_opts mrt_proto_item ';' + ; + +mrt_proto: + mrt_proto_start proto_name '{' mrt_proto_opts '}' { mrt_check_config(this_proto); }; + +CF_CLI_HELP(MRT DUMP, [table <name>|\"<pattern>\"] [to \"<file>\"] [filter <filter>|where <where filter>] , [[Save MRT Table Dump into a file]]) +CF_CLI(MRT DUMP, mrt_dump_args, [table <name>|\"<pattern>\"] [to \"<file>\"] [filter <filter>|where <where filter>], [[Save mrt table dump v2 of table name <t> right now]]) +{ mrt_dump_cmd($3); } ; + +mrt_dump_args: + /* empty */ { $$ = cfg_allocz(sizeof(struct mrt_dump_data)); } + | mrt_dump_args TABLE rtable { $$ = $1; $$->table_ptr = $3->table; } + | mrt_dump_args TABLE TEXT { $$ = $1; $$->table_expr = $3; } + | mrt_dump_args FILTER filter { $$ = $1; $$->filter = $3; } + | mrt_dump_args where_filter { $$ = $1; $$->filter = $2; } + | mrt_dump_args TO text { $$ = $1; $$->filename = $3; } + ; + + +CF_CODE + +CF_END diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c new file mode 100644 index 00000000..5c457b6c --- /dev/null +++ b/proto/mrt/mrt.c @@ -0,0 +1,882 @@ +/* + * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol + * + * (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2017--2018 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: Multi-Threaded Routing Toolkit (MRT) protocol + * + * The MRT protocol is implemented in just one file: |mrt.c|. It contains of + * several parts: Generic functions for preparing MRT messages in a buffer, + * functions for MRT table dump (called from timer or CLI), functions for MRT + * BGP4MP dump (called from BGP), and the usual protocol glue. For the MRT table + * dump, the key structure is struct mrt_table_dump_state, which contains all + * necessary data and created when the MRT dump cycle is started for the + * duration of the MRT dump. The MBGP4MP dump is currently not bound to MRT + * protocol instance and uses the config->mrtdump_file fd. + * + * The protocol is simple, just periodically scans routing table and export it + * to a file. It does not use the regular update mechanism, but a direct access + * in order to handle iteration through multiple routing tables. The table dump + * needs to dump all peers first and then use indexes to address the peers, we + * use a hash table (@peer_hash) to find peer index based on BGP protocol key + * attributes. + * + * One thing worth documenting is the locking. During processing, the currently + * processed table (@table field in the state structure) is locked and also the + * explicitly named table is locked (@table_ptr field in the state structure) if + * specified. Between dumps no table is locked. Also the current config is + * locked (by config_add_obstacle()) during table dumps as some data (strings, + * filters) are shared from the config and the running table dump may be + * interrupted by reconfiguration. + * + * Supported standards: + * - RFC 6396 - MRT format standard + * - RFC 8050 - ADD_PATH extension + */ + +#include <unistd.h> +#include <limits.h> +#include <errno.h> + +#include "mrt.h" + +#include "nest/cli.h" +#include "filter/filter.h" +#include "proto/bgp/bgp.h" +#include "sysdep/unix/unix.h" + + +#ifdef PATH_MAX +#define BIRD_PATH_MAX PATH_MAX +#else +#define BIRD_PATH_MAX 4096 +#endif + +#define mrt_log(s, msg, args...) \ + ({ \ + if (s->cli) \ + cli_printf(s->cli, -8009, msg, ## args); \ + if (s->proto) \ + log(L_ERR "%s: " msg, s->proto->p.name, ## args); \ + }) + + +/* + * MRT buffer code + */ + +static void +mrt_buffer_init(buffer *b, pool *pool, size_t n) +{ + b->start = mb_alloc(pool, n); + b->pos = b->start; + b->end = b->start + n; +} + +static void +mrt_buffer_grow(buffer *b, size_t n) +{ + size_t used = b->pos - b->start; + size_t size = b->end - b->start; + size_t req = used + n; + + while (size < req) + size = size * 3 / 2; + + b->start = mb_realloc(b->start, size); + b->pos = b->start + used; + b->end = b->start + size; +} + +static inline void +mrt_buffer_need(buffer *b, size_t n) +{ + if (b->pos + n > b->end) + mrt_buffer_grow(b, n); +} + +static inline uint +mrt_buffer_pos(buffer *b) +{ + return b->pos - b->start; +} + +static inline void +mrt_buffer_flush(buffer *b) +{ + b->pos = b->start; +} + +#define MRT_DEFINE_TYPE(S, T) \ + static inline void mrt_put_##S##_(buffer *b, T x) \ + { \ + put_##S(b->pos, x); \ + b->pos += sizeof(T); \ + } \ + \ + static inline void mrt_put_##S(buffer *b, T x) \ + { \ + mrt_buffer_need(b, sizeof(T)); \ + put_##S(b->pos, x); \ + b->pos += sizeof(T); \ + } + +MRT_DEFINE_TYPE(u8, u8) +MRT_DEFINE_TYPE(u16, u16) +MRT_DEFINE_TYPE(u32, u32) +MRT_DEFINE_TYPE(u64, u64) +MRT_DEFINE_TYPE(ip4, ip4_addr) +MRT_DEFINE_TYPE(ip6, ip6_addr) + +static inline void +mrt_put_ipa(buffer *b, ip_addr x) +{ + if (ipa_is_ip4(x)) + mrt_put_ip4(b, ipa_to_ip4(x)); + else + mrt_put_ip6(b, ipa_to_ip6(x)); +} + +static inline void +mrt_put_data(buffer *b, const void *src, size_t n) +{ + if (!n) + return; + + mrt_buffer_need(b, n); + memcpy(b->pos, src, n); + b->pos += n; +} + +static void +mrt_init_message(buffer *b, u16 type, u16 subtype) +{ + /* Reset buffer */ + mrt_buffer_flush(b); + mrt_buffer_need(b, MRT_HDR_LENGTH); + + /* Prepare header */ + mrt_put_u32_(b, now_real); + mrt_put_u16_(b, type); + mrt_put_u16_(b, subtype); + + /* Message length, will be fixed later */ + mrt_put_u32_(b, 0); +} + +static void +mrt_dump_message(buffer *b, int fd) +{ + uint len = mrt_buffer_pos(b); + + /* Fix message length */ + ASSERT(len >= MRT_HDR_LENGTH); + put_u32(b->start + 8, len - MRT_HDR_LENGTH); + + if (fd < 0) + return; + + if (write(fd, b->start, len) < 0) + log(L_ERR "Write to MRT file failed: %m"); /* TODO: name of file */ +} + +static int +bstrsub(char *dst, size_t n, const char *src, const char *key, const char *val) +{ + const char *last, *next; + char *pos = dst; + size_t step, klen = strlen(key), vlen = strlen(val); + + for (last = src; next = strstr(last, key); last = next + klen) + { + step = next - last; + if (n <= step + vlen) + return 0; + + memcpy(pos, last, step); + ADVANCE(pos, n, step); + + memcpy(pos, val, vlen); + ADVANCE(pos, n, vlen); + } + + step = strlen(last); + if (n <= step) + return 0; + + memcpy(pos, last, step); + ADVANCE(pos, n, step); + + pos[0] = 0; + return 1; +} + +static inline rtable * +mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) +{ + /* Handle explicit table, return it in the first pass */ + if (tab_ptr) + return !tab ? tab_ptr : NULL; + + /* Walk routing_tables list, starting after tab (if non-NULL) */ + for (tab = !tab ? HEAD(routing_tables) : NODE_NEXT(tab); + NODE_VALID(tab); + tab = NODE_NEXT(tab)) + if (patmatch(pattern, tab->name)) + return tab; + + return NULL; +} + +static rtable * +mrt_next_table(struct mrt_table_dump_state *s) +{ + rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); + + if (s->table) + rt_unlock_table(s->table); + + s->table = tab; + + if (s->table) + rt_lock_table(s->table); + + return s->table; +} + +static int +mrt_open_file(struct mrt_table_dump_state *s) +{ + char fmt1[BIRD_PATH_MAX]; + char name[BIRD_PATH_MAX]; + + if (!bstrsub(fmt1, sizeof(fmt1), s->filename, "%N", s->table->name) || + !tm_format_real_time(name, sizeof(name), fmt1, now_real)) + { + mrt_log(s, "Invalid filename '%s'", s->filename); + return 0; + } + + s->file = rf_open(s->pool, name, "a"); + if (!s->file) + { + mrt_log(s, "Unable to open MRT file '%s': %m", name); + return 0; + } + + s->fd = rf_fileno(s->file); + s->time_offset = now_real - now; + + return 1; +} + +static void +mrt_close_file(struct mrt_table_dump_state *s) +{ + rfree(s->file); + s->file = NULL; + s->fd = -1; +} + + +/* + * MRT Table Dump: Peer Index Table + */ + +#define PEER_KEY(n) n->peer_id, n->peer_as, n->peer_ip +#define PEER_NEXT(n) n->next +#define PEER_EQ(id1,as1,ip1,id2,as2,ip2) \ + id1 == id2 && as1 == as2 && ipa_equal(ip1, ip2) +#define PEER_FN(id,as,ip) ipa_hash(ip) + +static void +mrt_peer_table_header(struct mrt_table_dump_state *s, u32 router_id, const char *name) +{ + buffer *b = &s->buf; + + /* Collector BGP ID */ + mrt_put_u32(b, router_id); + + /* View Name */ + uint name_length = name ? strlen(name) : 0; + name_length = MIN(name_length, 65535); + mrt_put_u16(b, name_length); + mrt_put_data(b, name, name_length); + + /* Peer Count, will be fixed later */ + s->peer_count = 0; + s->peer_count_offset = mrt_buffer_pos(b); + mrt_put_u16(b, 0); + + HASH_INIT(s->peer_hash, s->pool, 10); +} + +static void +mrt_peer_table_entry(struct mrt_table_dump_state *s, u32 peer_id, u32 peer_as, ip_addr peer_ip) +{ + buffer *b = &s->buf; + + uint type = MRT_PEER_TYPE_32BIT_ASN; + if (ipa_is_ip6(peer_ip)) + type |= MRT_PEER_TYPE_IPV6; + + /* Dump peer to buffer */ + mrt_put_u8(b, type); + mrt_put_u32(b, peer_id); + mrt_put_ipa(b, peer_ip); + mrt_put_u32(b, peer_as); + + /* Add peer to hash table */ + struct mrt_peer_entry *n = lp_allocz(s->peer_lp, sizeof(struct mrt_peer_entry)); + n->peer_id = peer_id; + n->peer_as = peer_as; + n->peer_ip = peer_ip; + n->index = s->peer_count++; + + HASH_INSERT(s->peer_hash, PEER, n); +} + +static void +mrt_peer_table_dump(struct mrt_table_dump_state *s) +{ + mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, MRT_PEER_INDEX_TABLE); + mrt_peer_table_header(s, config->router_id, s->table->name); + + /* 0 is fake peer for non-BGP routes */ + mrt_peer_table_entry(s, 0, 0, IPA_NONE); + +#ifdef CONFIG_BGP + struct proto *P; + WALK_LIST(P, active_proto_list) + if (P->proto == &proto_bgp) + { + struct bgp_proto *p = (void *) P; + mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->cf->remote_ip); + } +#endif + + /* Fix Peer Count */ + put_u16(s->buf.start + s->peer_count_offset, s->peer_count); + + mrt_dump_message(&s->buf, s->fd); +} + +static void +mrt_peer_table_flush(struct mrt_table_dump_state *s) +{ + lp_flush(s->peer_lp); + HASH_FREE(s->peer_hash); +} + + +/* + * MRT Table Dump: RIB Table + */ + +static void +mrt_rib_table_header(struct mrt_table_dump_state *s, net *n) +{ + buffer *b = &s->buf; + + /* Sequence Number */ + mrt_put_u32(b, s->seqnum); + + /* Network Prefix */ + ip_addr a = n->n.prefix; + ipa_hton(a); + + mrt_put_u8(b, n->n.pxlen); + mrt_put_data(b, &a, BYTES(n->n.pxlen)); + + /* Entry Count, will be fixed later */ + s->entry_count = 0; + s->entry_count_offset = mrt_buffer_pos(b); + mrt_put_u16(b, 0); +} + +static void +mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r, struct ea_list *tmpa) +{ + buffer *b = &s->buf; + uint peer = 0; + +#ifdef CONFIG_BGP + /* Find peer index */ + if (r->attrs->src->proto->proto == &proto_bgp) + { + struct bgp_proto *p = (void *) r->attrs->src->proto; + struct mrt_peer_entry *n = + HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->cf->remote_ip); + + peer = n ? n->index : 0; + } +#endif + + /* Peer Index and Originated Time */ + mrt_put_u16(b, peer); + mrt_put_u32(b, r->lastmod + s->time_offset); + + /* Path Identifier */ + if (s->add_path) + mrt_put_u32(b, r->attrs->src->private_id); + + /* Route Attributes */ + mrt_put_u16(b, 0); + +#ifdef CONFIG_BGP + if (r->attrs->eattrs || tmpa) + { + struct ea_list *eattrs = r->attrs->eattrs; + + if (!rta_is_cached(r->attrs) || tmpa) + { + /* Attributes must be merged and sorted for bgp_encode_attrs() */ + tmpa = ea_append(tmpa, eattrs); + eattrs = alloca(ea_scan(tmpa)); + ea_merge(tmpa, eattrs); + ea_sort(eattrs); + } + + mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); + int alen = bgp_encode_attrs(NULL, b->pos, eattrs, MRT_ATTR_BUFFER_SIZE); + + if (alen < 0) + { + mrt_log(s, "Attribute list too long for %I/%d", + r->net->n.prefix, r->net->n.pxlen); + alen = 0; + } + + put_u16(b->pos - 2, alen); + b->pos += alen; + } +#endif + + s->entry_count++; +} + +static void +mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) +{ + rte *rt, *rt0; + int subtype; + + s->add_path = add_path; + +#ifndef IPV6 + subtype = !add_path ? MRT_RIB_IPV4_UNICAST : MRT_RIB_IPV4_UNICAST_ADDPATH; +#else + subtype = !add_path ? MRT_RIB_IPV6_UNICAST : MRT_RIB_IPV6_UNICAST_ADDPATH; +#endif + + mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); + mrt_rib_table_header(s, n); + + for (rt0 = n->routes; rt = rt0; rt0 = rt0->next) + { + if (rte_is_filtered(rt)) + continue; + + /* Skip routes that should be reported in the other phase */ + if (!s->always_add_path && (!rt->attrs->src->private_id != !s->add_path)) + { + s->want_add_path = 1; + continue; + } + + struct ea_list *tmp_attrs = rte_make_tmp_attrs(rt, s->linpool); + + if (f_run(s->filter, &rt, &tmp_attrs, s->linpool, 0) <= F_ACCEPT) + mrt_rib_table_entry(s, rt, tmp_attrs); + + if (rt != rt0) + rte_free(rt); + + lp_flush(s->linpool); + } + + /* Fix Entry Count */ + put_u16(s->buf.start + s->entry_count_offset, s->entry_count); + + /* Update max counter */ + s->max -= 1 + s->entry_count; + + /* Skip empty entries */ + if (!s->entry_count) + return; + + s->seqnum++; + mrt_dump_message(&s->buf, s->fd); +} + + +/* + * MRT Table Dump: main logic + */ + +static struct mrt_table_dump_state * +mrt_table_dump_init(pool *pp) +{ + pool *pool = rp_new(pp, "MRT Table Dump"); + struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); + + s->pool = pool; + s->linpool = lp_new(pool, 4080); + s->peer_lp = lp_new(pool, 4080); + mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); + + /* We lock the current config as we may reference it indirectly by filter */ + s->config = config; + config_add_obstacle(s->config); + + s->fd = -1; + + return s; +} + +static void +mrt_table_dump_free(struct mrt_table_dump_state *s) +{ + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); + + if (s->table) + rt_unlock_table(s->table); + + if (s->table_ptr) + rt_unlock_table(s->table_ptr); + + config_del_obstacle(s->config); + + rfree(s->pool); +} + + +static int +mrt_table_dump_step(struct mrt_table_dump_state *s) +{ + s->max = 2048; + + if (s->table_open) + goto step; + + while (mrt_next_table(s)) + { + if (!mrt_open_file(s)) + continue; + + mrt_peer_table_dump(s); + + FIB_ITERATE_INIT(&s->fit, &s->table->fib); + s->table_open = 1; + + step: + FIB_ITERATE_START(&s->table->fib, &s->fit, fn) + { + if (s->max < 0) + { + FIB_ITERATE_PUT(&s->fit, fn); + return 0; + } + + /* With Always ADD_PATH option, we jump directly to second phase */ + s->want_add_path = s->always_add_path; + + if (s->want_add_path == 0) + mrt_rib_table_dump(s, (net *) fn, 0); + + if (s->want_add_path == 1) + mrt_rib_table_dump(s, (net *) fn, 1); + } + FIB_ITERATE_END(fn); + s->table_open = 0; + + mrt_close_file(s); + mrt_peer_table_flush(s); + } + + return 1; +} + +static void +mrt_timer(timer *t) +{ + struct mrt_proto *p = t->data; + struct mrt_config *cf = (void *) (p->p.cf); + + if (p->table_dump) + { + log(L_WARN "%s: Earlier RIB table dump still not finished, skipping next one", p->p.name); + return; + } + + TRACE(D_EVENTS, "RIB table dump started"); + + struct mrt_table_dump_state *s = mrt_table_dump_init(p->p.pool); + + s->proto = p; + s->table_expr = cf->table_expr; + s->table_ptr = cf->table_cf ? cf->table_cf->table : NULL; + s->filter = cf->filter; + s->filename = cf->filename; + s->always_add_path = cf->always_add_path; + + if (s->table_ptr) + rt_lock_table(s->table_ptr); + + p->table_dump = s; + ev_schedule(p->event); +} + +static void +mrt_event(void *P) +{ + struct mrt_proto *p = P; + + if (!p->table_dump) + return; + + if (!mrt_table_dump_step(p->table_dump)) + { + ev_schedule(p->event); + return; + } + + mrt_table_dump_free(p->table_dump); + p->table_dump = NULL; + + TRACE(D_EVENTS, "RIB table dump done"); + + if (p->p.proto_state == PS_STOP) + proto_notify_state(&p->p, PS_DOWN); +} + + +/* + * MRT Table Dump: CLI command + */ + +static void +mrt_dump_cont(struct cli *c) +{ + if (!mrt_table_dump_step(c->rover)) + return; + + cli_printf(c, 0, ""); + c->cont = c->cleanup = NULL; +} + +static void +mrt_dump_cleanup(struct cli *c) +{ + mrt_table_dump_free(c->rover); + c->rover = NULL; +} + +void +mrt_dump_cmd(struct mrt_dump_data *d) +{ + if (cli_access_restricted()) + return; + + if (!d->table_expr && !d->table_ptr) + cf_error("Table not specified"); + + if (!d->filename) + cf_error("File not specified"); + + struct mrt_table_dump_state *s = mrt_table_dump_init(this_cli->pool); + + s->cli = this_cli; + s->table_expr = d->table_expr; + s->table_ptr = d->table_ptr; + s->filter = d->filter; + s->filename = d->filename; + + if (s->table_ptr) + rt_lock_table(s->table_ptr); + + this_cli->cont = mrt_dump_cont; + this_cli->cleanup = mrt_dump_cleanup; + this_cli->rover = s; +} + + +/* + * MRT BGP4MP dump + */ + +static buffer * +mrt_bgp_buffer(void) +{ + /* Static buffer for BGP4MP dump, TODO: change to use MRT protocol */ + static buffer b; + + if (!b.start) + mrt_buffer_init(&b, &root_pool, 1024); + + return &b; +} + +static void +mrt_bgp_header(buffer *b, struct mrt_bgp_data *d) +{ + if (d->as4) + { + mrt_put_u32(b, d->peer_as); + mrt_put_u32(b, d->local_as); + } + else + { + mrt_put_u16(b, (d->peer_as <= 0xFFFF) ? d->peer_as : AS_TRANS); + mrt_put_u16(b, (d->local_as <= 0xFFFF) ? d->local_as : AS_TRANS); + } + + mrt_put_u16(b, (d->index <= 0xFFFF) ? d->index : 0); + mrt_put_u16(b, d->af); + + if (d->af == BGP_AF_IPV4) + { + mrt_put_ip4(b, ipa_to_ip4(d->peer_ip)); + mrt_put_ip4(b, ipa_to_ip4(d->local_ip)); + } + else + { + mrt_put_ip6(b, ipa_to_ip6(d->peer_ip)); + mrt_put_ip6(b, ipa_to_ip6(d->local_ip)); + } +} + +void +mrt_dump_bgp_message(struct mrt_bgp_data *d) +{ + const u16 subtypes[] = { + MRT_BGP4MP_MESSAGE, MRT_BGP4MP_MESSAGE_AS4, + MRT_BGP4MP_MESSAGE_LOCAL, MRT_BGP4MP_MESSAGE_AS4_LOCAL, + MRT_BGP4MP_MESSAGE_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_ADDPATH, + MRT_BGP4MP_MESSAGE_LOCAL_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_LOCAL_ADDPATH, + }; + + buffer *b = mrt_bgp_buffer(); + mrt_init_message(b, MRT_BGP4MP, subtypes[d->as4 + 4*d->add_path]); + mrt_bgp_header(b, d); + mrt_put_data(b, d->message, d->msg_len); + mrt_dump_message(b, config->mrtdump_file); +} + +void +mrt_dump_bgp_state_change(struct mrt_bgp_data *d) +{ + /* Convert state from our BS_* values to values used in MRTDump */ + const u16 states[BS_MAX] = {1, 2, 3, 4, 5, 6, 1}; + + if (states[d->old_state] == states[d->new_state]) + return; + + /* Always use AS4 mode for STATE_CHANGE */ + d->as4 = 1; + + buffer *b = mrt_bgp_buffer(); + mrt_init_message(b, MRT_BGP4MP, MRT_BGP4MP_STATE_CHANGE_AS4); + mrt_bgp_header(b, d); + mrt_put_u16(b, states[d->old_state]); + mrt_put_u16(b, states[d->new_state]); + mrt_dump_message(b, config->mrtdump_file); +} + + +/* + * MRT protocol glue + */ + +void +mrt_check_config(struct proto_config *C) +{ + struct mrt_config *cf = (void *) C; + + /* c.table must be always defined, but it is relevant only if table_expr is not set */ + if (!cf->table_expr) + cf->table_cf = cf->c.table; + + if (!cf->table_expr && !cf->table_cf) + cf_error("Table not specified"); + + if (!cf->filename) + cf_error("File not specified"); + + if (!cf->period) + cf_error("Period not specified"); +} + +static struct proto * +mrt_init(struct proto_config *C) +{ + struct proto *P = proto_new(C, sizeof(struct mrt_proto)); + + return P; +} + +static int +mrt_start(struct proto *P) +{ + struct mrt_proto *p = (void *) P; + struct mrt_config *cf = (void *) (P->cf); + + p->timer = tm_new_set(P->pool, mrt_timer, p, 0, cf->period); + p->event = ev_new_set(P->pool, mrt_event, p); + + tm_start(p->timer, cf->period); + + return PS_UP; +} + +static int +mrt_shutdown(struct proto *P) +{ + struct mrt_proto *p = (void *) P; + + return p->table_dump ? PS_STOP : PS_DOWN; +} + +static int +mrt_reconfigure(struct proto *P, struct proto_config *CF) +{ + struct mrt_proto *p = (void *) P; + struct mrt_config *old = (void *) (P->cf); + struct mrt_config *new = (void *) CF; + + if (new->period != old->period) + { + TRACE(D_EVENTS, "Changing period from %d to %d s", old->period, new->period); + + bird_clock_t new_time = p->timer->expires - old->period + new->period; + tm_start(p->timer, (new_time > now) ? (new_time - now) : 0); + p->timer->recurrent = new->period; + } + + return 1; +} + +static void +mrt_copy_config(struct proto_config *dest, struct proto_config *src) +{ + /* Just a shallow copy, not many items here */ + proto_copy_rest(dest, src, sizeof(struct mrt_config)); +} + + +struct protocol proto_mrt = { + .name = "MRT", + .template = "mrt%d", + .config_size = sizeof(struct mrt_config), + .init = mrt_init, + .start = mrt_start, + .shutdown = mrt_shutdown, + .reconfigure = mrt_reconfigure, + .copy_config = mrt_copy_config, +}; diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h new file mode 100644 index 00000000..3f13a859 --- /dev/null +++ b/proto/mrt/mrt.h @@ -0,0 +1,156 @@ +/* + * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol + * + * (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2017--2018 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_MRT_H_ +#define _BIRD_MRT_H_ + +#include "nest/bird.h" +#include "nest/protocol.h" +#include "lib/lists.h" +#include "nest/route.h" +#include "lib/event.h" +#include "lib/hash.h" + + +struct mrt_config { + struct proto_config c; + + struct rtable_config *table_cf; + const char *table_expr; + struct filter *filter; + const char *filename; + uint period; + int always_add_path; +}; + +struct mrt_proto { + struct proto p; + timer *timer; + event *event; + + struct mrt_target *file; + struct mrt_table_dump_state *table_dump; +}; + +struct mrt_dump_data { + const char *table_expr; + struct rtable *table_ptr; + struct filter *filter; + char *filename; +}; + +struct mrt_peer_entry { + u32 index; + u32 peer_id; + u32 peer_as; + ip_addr peer_ip; + struct mrt_peer_entry *next; +}; + +struct mrt_table_dump_state { + struct mrt_proto *proto; /* Protocol for regular MRT dumps (or NULL) */ + struct cli *cli; /* CLI for irregular MRT dumps (or NULL) */ + struct config *config; /* Config valid during start of dump, locked */ + + /* Configuration information */ + const char *table_expr; /* Wildcard for table name (or NULL) */ + struct rtable *table_ptr; /* Explicit table (or NULL) */ + struct filter *filter; /* Optional filter */ + const char *filename; /* Filename pattern */ + int always_add_path; /* Always use *_ADDPATH message subtypes */ + + /* Allocated by mrt_table_dump_init() */ + pool *pool; /* Pool for table dump */ + linpool *linpool; /* Temporary linear pool */ + linpool *peer_lp; /* Linear pool for peer entries in peer_hash */ + buffer buf; /* Buffer for MRT messages */ + + HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */ + + struct rtable *table; /* Processed table, NULL initially */ + struct fib_iterator fit; /* Iterator in processed table */ + int table_open; /* Whether iterator is linked */ + + int add_path; /* Current message subtype is *_ADDPATH */ + int want_add_path; /* Want *_ADDPATH message later */ + int max; /* Decreasing counter of dumped routes */ + u32 seqnum; /* MRT message sequence number */ + bird_clock_t time_offset; /* Time offset between monotonic and real time */ + + u16 peer_count; /* Number of peers */ + u32 peer_count_offset; /* Buffer offset to store peer_count later */ + u16 entry_count; /* Number of RIB Entries */ + u32 entry_count_offset; /* Buffer offset to store entry_count later */ + + struct rfile *file; /* tracking for mrt table dump file */ + int fd; +}; + +struct mrt_bgp_data { + uint peer_as; + uint local_as; + uint index; + uint af; + ip_addr peer_ip; + ip_addr local_ip; + byte *message; + uint msg_len; + uint old_state; + uint new_state; + u8 as4; + u8 add_path; +}; + + +#define MRT_HDR_LENGTH 12 /* MRT Timestamp + MRT Type + MRT Subtype + MRT Load Length */ +#define MRT_PEER_TYPE_32BIT_ASN 2 /* MRT Table Dump: Peer Index Table: Peer Type: Use 32bit ASN */ +#define MRT_PEER_TYPE_IPV6 1 /* MRT Table Dump: Peer Index Table: Peer Type: Use IPv6 IP Address */ + +#define MRT_ATTR_BUFFER_SIZE 65536 + +/* MRT Types */ +#define MRT_TABLE_DUMP_V2 13 +#define MRT_BGP4MP 16 + +/* MRT Table Dump v2 Subtypes */ +#define MRT_PEER_INDEX_TABLE 1 +#define MRT_RIB_IPV4_UNICAST 2 +#define MRT_RIB_IPV4_MULTICAST 3 +#define MRT_RIB_IPV6_UNICAST 4 +#define MRT_RIB_IPV6_MULTICAST 5 +#define MRT_RIB_GENERIC 6 +#define MRT_RIB_IPV4_UNICAST_ADDPATH 8 +#define MRT_RIB_IPV4_MULTICAST_ADDPATH 9 +#define MRT_RIB_IPV6_UNICAST_ADDPATH 10 +#define MRT_RIB_IPV6_MULTICAST_ADDPATH 11 +#define MRT_RIB_GENERIC_ADDPATH 12 + +/* MRT BGP4MP Subtypes */ +#define MRT_BGP4MP_MESSAGE 1 +#define MRT_BGP4MP_MESSAGE_AS4 4 +#define MRT_BGP4MP_STATE_CHANGE_AS4 5 +#define MRT_BGP4MP_MESSAGE_LOCAL 6 +#define MRT_BGP4MP_MESSAGE_AS4_LOCAL 7 +#define MRT_BGP4MP_MESSAGE_ADDPATH 8 +#define MRT_BGP4MP_MESSAGE_AS4_ADDPATH 9 +#define MRT_BGP4MP_MESSAGE_LOCAL_ADDPATH 10 +#define MRT_BGP4MP_MESSAGE_AS4_LOCAL_ADDPATH 11 + + +#ifdef CONFIG_MRT +void mrt_dump_cmd(struct mrt_dump_data *d); +void mrt_dump_bgp_message(struct mrt_bgp_data *d); +void mrt_dump_bgp_state_change(struct mrt_bgp_data *d); +void mrt_check_config(struct proto_config *C); +#else +static inline void mrt_dump_bgp_message(struct mrt_bgp_data *d UNUSED) { } +static inline void mrt_dump_bgp_state_change(struct mrt_bgp_data *d UNUSED) { } +#endif + +#endif /* _BIRD_MRT_H_ */ diff --git a/proto/mrt/progdoc b/proto/mrt/progdoc new file mode 100644 index 00000000..5f61a9bb --- /dev/null +++ b/proto/mrt/progdoc @@ -0,0 +1 @@ +S mrt.c |