diff options
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/attrs.c | 12 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 9 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 4 | ||||
-rw-r--r-- | proto/bgp/packets.c | 180 | ||||
-rw-r--r-- | proto/mrt/Makefile | 6 | ||||
-rw-r--r-- | proto/mrt/config.Y | 68 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 891 | ||||
-rw-r--r-- | proto/mrt/mrt.h | 158 | ||||
-rw-r--r-- | proto/mrt/progdoc | 1 |
9 files changed, 1255 insertions, 74 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index dc267fdb..dcc4a273 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -285,15 +285,20 @@ bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size) * store it and encode it later by AFI-specific hooks. */ - if ((s->channel->afi == BGP_AF_IPV4) && !s->channel->ext_next_hop) + if (!s->mp_reach) { - ASSERT(a->u.ptr->length == sizeof(ip_addr)); + // ASSERT(a->u.ptr->length == sizeof(ip_addr)); + + /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */ + ip_addr *addr = (void *) a->u.ptr->data; + if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr)) + return 0; if (size < (3+4)) return -1; bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4); - put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data )); + put_ip4(buf+3, ipa_to_ip4(*addr)); return 3+4; } @@ -946,6 +951,7 @@ bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) * * The bgp_encode_attrs() function takes a list of extended attributes * and converts it to its BGP representation (a part of an Update message). + * BGP write state may be fake when called from MRT protocol. * * Result: Length of the attribute block generated or -1 if not enough space. */ diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index e2a57137..7f2eb4d0 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -477,7 +477,7 @@ static inline void bgp_conn_set_state(struct bgp_conn *conn, uint new_state) { if (conn->bgp->p.mrtdump & MD_STATES) - mrt_dump_bgp_state_change(conn, conn->state, new_state); + bgp_dump_state_change(conn, conn->state, new_state); conn->state = new_state; } @@ -528,6 +528,9 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) /* Number of active channels */ int num = 0; + /* Summary state of ADD_PATH RX for active channels */ + uint summary_add_path_rx = 0; + WALK_LIST(c, p->p.channels) { const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); @@ -586,6 +589,9 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX); c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX); + if (active) + summary_add_path_rx |= !c->add_path_rx ? 1 : 2; + /* Update RA mode */ if (c->add_path_tx) c->c.ra_mode = RA_ANY; @@ -598,6 +604,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32)); p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *)); p->channel_count = num; + p->summary_add_path_rx = summary_add_path_rx; WALK_LIST(c, p->p.channels) { diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 6f0a5587..2729780c 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -266,6 +266,7 @@ struct bgp_proto { u8 llgr_ready; /* Neighbor could do Long-lived GR, implies gr_ready */ u8 gr_active_num; /* Neighbor is doing GR, number of active channels */ u8 channel_count; /* Number of active channels */ + u8 summary_add_path_rx; /* Summary state of ADD_PATH RX w.r.t active channels */ u32 *afi_map; /* Map channel index -> AFI */ struct bgp_channel **channel_map; /* Map channel index -> channel */ struct bgp_conn *conn; /* Connection we have established */ @@ -361,6 +362,7 @@ struct bgp_write_state { struct bgp_channel *channel; struct linpool *pool; + int mp_reach; int as4_session; int add_path; int mpls; @@ -538,7 +540,7 @@ void bgp_get_route_info(struct rte *, byte *buf); /* packets.c */ -void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new); +void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new); const struct bgp_af_desc *bgp_get_af_desc(u32 afi); const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi); void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index ed1db04b..3be48c00 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -17,7 +17,7 @@ #include "nest/protocol.h" #include "nest/route.h" #include "nest/attrs.h" -#include "nest/mrtdump.h" +#include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" #include "lib/flowspec.h" @@ -90,91 +90,71 @@ get_af4(byte *buf) return (get_u16(buf) << 16) | buf[3]; } -/* - * MRT Dump format is not semantically specified. - * We will use these values in appropriate fields: - * - * Local AS, Remote AS - configured AS numbers for given BGP instance. - * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection) - * - * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state - * changes) and MESSAGE (for received BGP messages). - * - * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant - * only when AS4 session is established and even in that case MESSAGE - * does not use AS4 variant for initial OPEN message. This strange - * behavior is here for compatibility with Quagga and Bgpdump, - */ - -static byte * -mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4) +static void +init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d) { struct bgp_proto *p = conn->bgp; - uint v4 = ipa_is_ip4(p->cf->remote_ip); + int p_ok = conn->state >= BS_OPENCONFIRM; - if (as4) - { - put_u32(buf+0, p->remote_as); - put_u32(buf+4, p->public_as); - buf+=8; - } - else - { - put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS); - put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS); - buf+=4; - } + memset(d, 0, sizeof(struct mrt_bgp_data)); + d->peer_as = p->remote_as; + d->local_as = p->local_as; + d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0; + d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; + d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE; + d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE; + d->as4 = p_ok ? p->as4_session : 0; +} - put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0); - put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6); - buf+=4; +static uint bgp_find_update_afi(byte *pos, uint len); - if (v4) - { - buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE); - buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE); - } - else +static int +bgp_estimate_add_path(struct bgp_proto *p, byte *pkt, uint len) +{ + /* No need to estimate it for other messages than UPDATE */ + if (pkt[18] != PKT_UPDATE) + return 0; + + /* 1 -> no channel, 2 -> all channels, 3 -> some channels */ + if (p->summary_add_path_rx < 3) + return p->summary_add_path_rx == 2; + + uint afi = bgp_find_update_afi(pkt, len); + struct bgp_channel *c = bgp_get_channel(p, afi); + if (!c) { - buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE); - buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE); + /* Either frame error (if !afi) or unknown AFI/SAFI, + will be reported later in regular parsing */ + BGP_TRACE(D_PACKETS, "MRT processing noticed invalid packet"); + return 0; } - return buf; + return c->add_path_rx; } static void -mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len) +bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len) { - byte *buf = alloca(128+len); /* 128 is enough for MRT headers */ - byte *bp = buf + MRTDUMP_HDR_LENGTH; - int as4 = conn->bgp->as4_session; + struct mrt_bgp_data d; + init_mrt_bgp_data(conn, &d); - bp = mrt_put_bgp4_hdr(bp, conn, as4); - memcpy(bp, pkt, len); - bp += len; - mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE, - buf, bp-buf); -} + d.message = pkt; + d.msg_len = len; + d.add_path = bgp_estimate_add_path(conn->bgp, pkt, len); -static inline u16 -convert_state(uint state) -{ - /* Convert state from our BS_* values to values used in MRTDump */ - return (state == BS_CLOSE) ? 1 : state + 1; + mrt_dump_bgp_message(&d); } void -mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new) +bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new) { - byte buf[128]; - byte *bp = buf + MRTDUMP_HDR_LENGTH; + struct mrt_bgp_data d; + init_mrt_bgp_data(conn, &d); + + d.old_state = old; + d.new_state = new; - bp = mrt_put_bgp4_hdr(bp, conn, 1); - put_u16(bp+0, convert_state(old)); - put_u16(bp+2, convert_state(new)); - bp += 4; - mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf); + mrt_dump_bgp_state_change(&d); } static byte * @@ -2135,6 +2115,7 @@ again: ; .proto = p, .channel = c, .pool = bgp_linpool, + .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, .add_path = c->add_path_tx, .mpls = c->desc->mpls, @@ -2162,7 +2143,7 @@ again: ; goto again; } - res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ? + res = !s.mp_reach ? bgp_create_ip_reach(&s, buck, buf, end): bgp_create_mp_reach(&s, buck, buf, end); @@ -2389,6 +2370,67 @@ done: return; } +static uint +bgp_find_update_afi(byte *pos, uint len) +{ + /* + * This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and + * bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which + * AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors. + */ + if (len < 23) + return 0; + + /* Assume there is no withrawn NLRI, read lengths and move to attribute list */ + uint wlen = get_u16(pos + 19); + uint alen = get_u16(pos + 21); + ADVANCE(pos, len, 23); + + /* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */ + if ((wlen != 0) || (alen < len) || !alen) + return BGP_AF_IPV4; + + if (alen > len) + return 0; + + /* Process attribute list (alen == len) */ + while (len) + { + if (len < 2) + return 0; + + uint flags = pos[0]; + uint code = pos[1]; + ADVANCE(pos, len, 2); + + uint ll = !(flags & BAF_EXT_LEN) ? 1 : 2; + if (len < ll) + return 0; + + /* Read attribute length and move to attribute body */ + alen = (ll == 1) ? get_u8(pos) : get_u16(pos); + ADVANCE(pos, len, ll); + + if (len < alen) + return 0; + + /* Found MP NLRI */ + if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI)) + { + if (alen < 3) + return 0; + + return BGP_AF(get_u16(pos), pos[2]); + } + + /* Move to the next attribute */ + ADVANCE(pos, len, alen); + } + + /* No basic or MP NLRI, but there are some attributes -> error */ + return 0; +} + /* * ROUTE-REFRESH @@ -2890,7 +2932,7 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len) DBG("BGP: Got packet %02x (%d bytes)\n", type, len); if (conn->bgp->p.mrtdump & MD_MESSAGES) - mrt_dump_bgp_packet(conn, pkt, len); + bgp_dump_message(conn, pkt, len); switch (type) { diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile new file mode 100644 index 00000000..925fb102 --- /dev/null +++ b/proto/mrt/Makefile @@ -0,0 +1,6 @@ +src := mrt.c +obj := $(src-o-files) +$(all-daemon) +$(cf-local) + +tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file diff --git a/proto/mrt/config.Y b/proto/mrt/config.Y new file mode 100644 index 00000000..4da6777a --- /dev/null +++ b/proto/mrt/config.Y @@ -0,0 +1,68 @@ +/* + * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol + * + * (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2017--2018 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +#include "proto/mrt/mrt.h" + +CF_DEFINES + +#define MRT_CFG ((struct mrt_config *) this_proto) + +CF_DECLS + +CF_KEYWORDS(MRT, TABLE, FILTER, FILENAME, PERIOD, ALWAYS, ADD, PATH, DUMP, TO) + +%type <md> mrt_dump_args + +CF_GRAMMAR + +proto: mrt_proto ; + +mrt_proto_start: proto_start MRT +{ + this_proto = proto_config_new(&proto_mrt, $1); +}; + +mrt_proto_item: + proto_item + | TABLE rtable { MRT_CFG->table_cf = $2; } + | TABLE TEXT { MRT_CFG->table_expr = $2; } + | FILTER filter { MRT_CFG->filter = $2; } + | where_filter { MRT_CFG->filter = $1; } + | FILENAME text { MRT_CFG->filename = $2; } + | PERIOD expr { MRT_CFG->period = $2; } + | ALWAYS ADD PATH bool { MRT_CFG->always_add_path = $4; } + ; + +mrt_proto_opts: + /* empty */ + | mrt_proto_opts mrt_proto_item ';' + ; + +mrt_proto: + mrt_proto_start proto_name '{' mrt_proto_opts '}' { mrt_check_config(this_proto); }; + +CF_CLI_HELP(MRT DUMP, [table <name>|\"<pattern>\"] [to \"<file>\"] [filter <filter>|where <where filter>] , [[Save MRT Table Dump into a file]]) +CF_CLI(MRT DUMP, mrt_dump_args, [table <name>|\"<pattern>\"] [to \"<file>\"] [filter <filter>|where <where filter>], [[Save mrt table dump v2 of table name <t> right now]]) +{ mrt_dump_cmd($3); } ; + +mrt_dump_args: + /* empty */ { $$ = cfg_allocz(sizeof(struct mrt_dump_data)); } + | mrt_dump_args TABLE rtable { $$ = $1; $$->table_ptr = $3->table; } + | mrt_dump_args TABLE TEXT { $$ = $1; $$->table_expr = $3; } + | mrt_dump_args FILTER filter { $$ = $1; $$->filter = $3; } + | mrt_dump_args where_filter { $$ = $1; $$->filter = $2; } + | mrt_dump_args TO text { $$ = $1; $$->filename = $3; } + ; + + +CF_CODE + +CF_END diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c new file mode 100644 index 00000000..95014958 --- /dev/null +++ b/proto/mrt/mrt.c @@ -0,0 +1,891 @@ +/* + * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol + * + * (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2017--2018 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: Multi-Threaded Routing Toolkit (MRT) protocol + * + * The MRT protocol is implemented in just one file: |mrt.c|. It contains of + * several parts: Generic functions for preparing MRT messages in a buffer, + * functions for MRT table dump (called from timer or CLI), functions for MRT + * BGP4MP dump (called from BGP), and the usual protocol glue. For the MRT table + * dump, the key structure is struct mrt_table_dump_state, which contains all + * necessary data and created when the MRT dump cycle is started for the + * duration of the MRT dump. The MBGP4MP dump is currently not bound to MRT + * protocol instance and uses the config->mrtdump_file fd. + * + * The protocol is simple, just periodically scans routing table and export it + * to a file. It does not use the regular update mechanism, but a direct access + * in order to handle iteration through multiple routing tables. The table dump + * needs to dump all peers first and then use indexes to address the peers, we + * use a hash table (@peer_hash) to find peer index based on BGP protocol key + * attributes. + * + * One thing worth documenting is the locking. During processing, the currently + * processed table (@table field in the state structure) is locked and also the + * explicitly named table is locked (@table_ptr field in the state structure) if + * specified. Between dumps no table is locked. Also the current config is + * locked (by config_add_obstacle()) during table dumps as some data (strings, + * filters) are shared from the config and the running table dump may be + * interrupted by reconfiguration. + * + * Supported standards: + * - RFC 6396 - MRT format standard + * - RFC 8050 - ADD_PATH extension + */ + +#include <unistd.h> +#include <limits.h> +#include <errno.h> + +#include "mrt.h" + +#include "nest/cli.h" +#include "filter/filter.h" +#include "proto/bgp/bgp.h" +#include "sysdep/unix/unix.h" + + +#ifdef PATH_MAX +#define BIRD_PATH_MAX PATH_MAX +#else +#define BIRD_PATH_MAX 4096 +#endif + +#define mrt_log(s, msg, args...) \ + ({ \ + if (s->cli) \ + cli_printf(s->cli, -8009, msg, ## args); \ + if (s->proto) \ + log(L_ERR "%s: " msg, s->proto->p.name, ## args); \ + }) + + +/* + * MRT buffer code + */ + +static void +mrt_buffer_init(buffer *b, pool *pool, size_t n) +{ + b->start = mb_alloc(pool, n); + b->pos = b->start; + b->end = b->start + n; +} + +static void +mrt_buffer_grow(buffer *b, size_t n) +{ + size_t used = b->pos - b->start; + size_t size = b->end - b->start; + size_t req = used + n; + + while (size < req) + size = size * 3 / 2; + + b->start = mb_realloc(b->start, size); + b->pos = b->start + used; + b->end = b->start + size; +} + +static inline void +mrt_buffer_need(buffer *b, size_t n) +{ + if (b->pos + n > b->end) + mrt_buffer_grow(b, n); +} + +static inline uint +mrt_buffer_pos(buffer *b) +{ + return b->pos - b->start; +} + +static inline void +mrt_buffer_flush(buffer *b) +{ + b->pos = b->start; +} + +#define MRT_DEFINE_TYPE(S, T) \ + static inline void mrt_put_##S##_(buffer *b, T x) \ + { \ + put_##S(b->pos, x); \ + b->pos += sizeof(T); \ + } \ + \ + static inline void mrt_put_##S(buffer *b, T x) \ + { \ + mrt_buffer_need(b, sizeof(T)); \ + put_##S(b->pos, x); \ + b->pos += sizeof(T); \ + } + +MRT_DEFINE_TYPE(u8, u8) +MRT_DEFINE_TYPE(u16, u16) +MRT_DEFINE_TYPE(u32, u32) +MRT_DEFINE_TYPE(u64, u64) +MRT_DEFINE_TYPE(ip4, ip4_addr) +MRT_DEFINE_TYPE(ip6, ip6_addr) + +static inline void +mrt_put_ipa(buffer *b, ip_addr x) +{ + if (ipa_is_ip4(x)) + mrt_put_ip4(b, ipa_to_ip4(x)); + else + mrt_put_ip6(b, ipa_to_ip6(x)); +} + +static inline void +mrt_put_data(buffer *b, const void *src, size_t n) +{ + if (!n) + return; + + mrt_buffer_need(b, n); + memcpy(b->pos, src, n); + b->pos += n; +} + +static void +mrt_init_message(buffer *b, u16 type, u16 subtype) +{ + /* Reset buffer */ + mrt_buffer_flush(b); + mrt_buffer_need(b, MRT_HDR_LENGTH); + + /* Prepare header */ + mrt_put_u32_(b, current_real_time() TO_S); /* now_real */ + mrt_put_u16_(b, type); + mrt_put_u16_(b, subtype); + + /* Message length, will be fixed later */ + mrt_put_u32_(b, 0); +} + +static void +mrt_dump_message(buffer *b, int fd) +{ + uint len = mrt_buffer_pos(b); + + /* Fix message length */ + ASSERT(len >= MRT_HDR_LENGTH); + put_u32(b->start + 8, len - MRT_HDR_LENGTH); + + if (fd < 0) + return; + + if (write(fd, b->start, len) < 0) + log(L_ERR "Write to MRT file failed: %m"); /* TODO: name of file */ +} + +static int +bstrsub(char *dst, size_t n, const char *src, const char *key, const char *val) +{ + const char *last, *next; + char *pos = dst; + size_t step, klen = strlen(key), vlen = strlen(val); + + for (last = src; next = strstr(last, key); last = next + klen) + { + step = next - last; + if (n <= step + vlen) + return 0; + + memcpy(pos, last, step); + ADVANCE(pos, n, step); + + memcpy(pos, val, vlen); + ADVANCE(pos, n, vlen); + } + + step = strlen(last); + if (n <= step) + return 0; + + memcpy(pos, last, step); + ADVANCE(pos, n, step); + + pos[0] = 0; + return 1; +} + +static inline rtable * +mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) +{ + /* Handle explicit table, return it in the first pass */ + if (tab_ptr) + return !tab ? tab_ptr : NULL; + + /* Walk routing_tables list, starting after tab (if non-NULL) */ + for (tab = !tab ? HEAD(routing_tables) : NODE_NEXT(tab); + NODE_VALID(tab); + tab = NODE_NEXT(tab)) + if (patmatch(pattern, tab->name) && + ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) + return tab; + + return NULL; +} + +static rtable * +mrt_next_table(struct mrt_table_dump_state *s) +{ + rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); + + if (s->table) + rt_unlock_table(s->table); + + s->table = tab; + s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; + s->bws->mp_reach = !s->ipv4; + + if (s->table) + rt_lock_table(s->table); + + return s->table; +} + +static int +mrt_open_file(struct mrt_table_dump_state *s) +{ + char fmt1[BIRD_PATH_MAX]; + char name[BIRD_PATH_MAX]; + btime now = current_time(); + btime now_real = current_real_time(); + + if (!bstrsub(fmt1, sizeof(fmt1), s->filename, "%N", s->table->name) || + !tm_format_real_time(name, sizeof(name), fmt1, now_real)) + { + mrt_log(s, "Invalid filename '%s'", s->filename); + return 0; + } + + s->file = rf_open(s->pool, name, "a"); + if (!s->file) + { + mrt_log(s, "Unable to open MRT file '%s': %m", name); + return 0; + } + + s->fd = rf_fileno(s->file); + s->time_offset = now_real - now; + + return 1; +} + +static void +mrt_close_file(struct mrt_table_dump_state *s) +{ + rfree(s->file); + s->file = NULL; + s->fd = -1; +} + + +/* + * MRT Table Dump: Peer Index Table + */ + +#define PEER_KEY(n) n->peer_id, n->peer_as, n->peer_ip +#define PEER_NEXT(n) n->next +#define PEER_EQ(id1,as1,ip1,id2,as2,ip2) \ + id1 == id2 && as1 == as2 && ipa_equal(ip1, ip2) +#define PEER_FN(id,as,ip) ipa_hash(ip) + +static void +mrt_peer_table_header(struct mrt_table_dump_state *s, u32 router_id, const char *name) +{ + buffer *b = &s->buf; + + /* Collector BGP ID */ + mrt_put_u32(b, router_id); + + /* View Name */ + uint name_length = name ? strlen(name) : 0; + name_length = MIN(name_length, 65535); + mrt_put_u16(b, name_length); + mrt_put_data(b, name, name_length); + + /* Peer Count, will be fixed later */ + s->peer_count = 0; + s->peer_count_offset = mrt_buffer_pos(b); + mrt_put_u16(b, 0); + + HASH_INIT(s->peer_hash, s->pool, 10); +} + +static void +mrt_peer_table_entry(struct mrt_table_dump_state *s, u32 peer_id, u32 peer_as, ip_addr peer_ip) +{ + buffer *b = &s->buf; + + uint type = MRT_PEER_TYPE_32BIT_ASN; + if (ipa_is_ip6(peer_ip)) + type |= MRT_PEER_TYPE_IPV6; + + /* Dump peer to buffer */ + mrt_put_u8(b, type); + mrt_put_u32(b, peer_id); + mrt_put_ipa(b, peer_ip); + mrt_put_u32(b, peer_as); + + /* Add peer to hash table */ + struct mrt_peer_entry *n = lp_allocz(s->peer_lp, sizeof(struct mrt_peer_entry)); + n->peer_id = peer_id; + n->peer_as = peer_as; + n->peer_ip = peer_ip; + n->index = s->peer_count++; + + HASH_INSERT(s->peer_hash, PEER, n); +} + +static void +mrt_peer_table_dump(struct mrt_table_dump_state *s) +{ + mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, MRT_PEER_INDEX_TABLE); + mrt_peer_table_header(s, config->router_id, s->table->name); + + /* 0 is fake peer for non-BGP routes */ + mrt_peer_table_entry(s, 0, 0, IPA_NONE); + +#ifdef CONFIG_BGP + struct proto *P; + WALK_LIST(P, proto_list) + if ((P->proto == &proto_bgp) && (P->proto_state != PS_DOWN)) + { + struct bgp_proto *p = (void *) P; + mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->cf->remote_ip); + } +#endif + + /* Fix Peer Count */ + put_u16(s->buf.start + s->peer_count_offset, s->peer_count); + + mrt_dump_message(&s->buf, s->fd); +} + +static void +mrt_peer_table_flush(struct mrt_table_dump_state *s) +{ + lp_flush(s->peer_lp); + HASH_FREE(s->peer_hash); +} + + +/* + * MRT Table Dump: RIB Table + */ + +static void +mrt_rib_table_header(struct mrt_table_dump_state *s, net_addr *n) +{ + buffer *b = &s->buf; + + /* Sequence Number */ + mrt_put_u32(b, s->seqnum); + + /* Network Prefix */ + if (s->ipv4) + { + ASSERT(n->type == NET_IP4); + ip4_addr a = ip4_hton(net4_prefix(n)); + uint len = net4_pxlen(n); + + mrt_put_u8(b, len); + mrt_put_data(b, &a, BYTES(len)); + } + else + { + ASSERT(n->type == NET_IP6); + ip6_addr a = ip6_hton(net6_prefix(n)); + uint len = net6_pxlen(n); + + mrt_put_u8(b, len); + mrt_put_data(b, &a, BYTES(len)); + } + + /* Entry Count, will be fixed later */ + s->entry_count = 0; + s->entry_count_offset = mrt_buffer_pos(b); + mrt_put_u16(b, 0); +} + +static void +mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) +{ + buffer *b = &s->buf; + uint peer = 0; + +#ifdef CONFIG_BGP + /* Find peer index */ + if (r->attrs->src->proto->proto == &proto_bgp) + { + struct bgp_proto *p = (void *) r->attrs->src->proto; + struct mrt_peer_entry *n = + HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->cf->remote_ip); + + peer = n ? n->index : 0; + } +#endif + + /* Peer Index and Originated Time */ + mrt_put_u16(b, peer); + mrt_put_u32(b, (r->lastmod + s->time_offset) TO_S); + + /* Path Identifier */ + if (s->add_path) + mrt_put_u32(b, r->attrs->src->private_id); + + /* Route Attributes */ + mrt_put_u16(b, 0); + +#ifdef CONFIG_BGP + if (r->attrs->eattrs) + { + struct ea_list *eattrs = r->attrs->eattrs; + + if (!rta_is_cached(r->attrs)) + ea_normalize(eattrs); + + mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); + int alen = bgp_encode_attrs(s->bws, eattrs, b->pos, b->end); + + if (alen < 0) + { + mrt_log(s, "Attribute list too long for %N", r->net->n.addr); + alen = 0; + } + + put_u16(b->pos - 2, alen); + b->pos += alen; + } +#endif + + s->entry_count++; +} + +static void +mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) +{ + s->add_path = s->bws->add_path = add_path; + + int subtype = s->ipv4 ? + (!add_path ? MRT_RIB_IPV4_UNICAST : MRT_RIB_IPV4_UNICAST_ADDPATH) : + (!add_path ? MRT_RIB_IPV6_UNICAST : MRT_RIB_IPV6_UNICAST_ADDPATH); + + mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); + mrt_rib_table_header(s, n->n.addr); + + rte *rt, *rt0; + for (rt0 = n->routes; rt = rt0; rt0 = rt0->next) + { + if (rte_is_filtered(rt)) + continue; + + /* Skip routes that should be reported in the other phase */ + if (!s->always_add_path && (!rt->attrs->src->private_id != !s->add_path)) + { + s->want_add_path = 1; + continue; + } + + rte_make_tmp_attrs(&rt, s->linpool); + + if (f_run(s->filter, &rt, s->linpool, 0) <= F_ACCEPT) + mrt_rib_table_entry(s, rt); + + if (rt != rt0) + rte_free(rt); + + lp_flush(s->linpool); + } + + /* Fix Entry Count */ + put_u16(s->buf.start + s->entry_count_offset, s->entry_count); + + /* Update max counter */ + s->max -= 1 + s->entry_count; + + /* Skip empty entries */ + if (!s->entry_count) + return; + + s->seqnum++; + mrt_dump_message(&s->buf, s->fd); +} + + +/* + * MRT Table Dump: main logic + */ + +static struct mrt_table_dump_state * +mrt_table_dump_init(pool *pp) +{ + pool *pool = rp_new(pp, "MRT Table Dump"); + struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); + + s->pool = pool; + s->linpool = lp_new(pool, 4080); + s->peer_lp = lp_new(pool, 4080); + mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); + + /* We lock the current config as we may reference it indirectly by filter */ + s->config = config; + config_add_obstacle(s->config); + + s->fd = -1; + + return s; +} + +static void +mrt_table_dump_free(struct mrt_table_dump_state *s) +{ + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); + + if (s->table) + rt_unlock_table(s->table); + + if (s->table_ptr) + rt_unlock_table(s->table_ptr); + + config_del_obstacle(s->config); + + rfree(s->pool); +} + + +static int +mrt_table_dump_step(struct mrt_table_dump_state *s) +{ + struct bgp_write_state bws = { .as4_session = 1 }; + + s->max = 2048; + s->bws = &bws; + + if (s->table_open) + goto step; + + while (mrt_next_table(s)) + { + if (!mrt_open_file(s)) + continue; + + mrt_peer_table_dump(s); + + FIB_ITERATE_INIT(&s->fit, &s->table->fib); + s->table_open = 1; + + step: + FIB_ITERATE_START(&s->table->fib, &s->fit, net, n) + { + if (s->max < 0) + { + FIB_ITERATE_PUT(&s->fit); + return 0; + } + + /* With Always ADD_PATH option, we jump directly to second phase */ + s->want_add_path = s->always_add_path; + + if (s->want_add_path == 0) + mrt_rib_table_dump(s, n, 0); + + if (s->want_add_path == 1) + mrt_rib_table_dump(s, n, 1); + } + FIB_ITERATE_END; + s->table_open = 0; + + mrt_close_file(s); + mrt_peer_table_flush(s); + } + + return 1; +} + +static void +mrt_timer(timer *t) +{ + struct mrt_proto *p = t->data; + struct mrt_config *cf = (void *) (p->p.cf); + + if (p->table_dump) + { + log(L_WARN "%s: Earlier RIB table dump still not finished, skipping next one", p->p.name); + return; + } + + TRACE(D_EVENTS, "RIB table dump started"); + + struct mrt_table_dump_state *s = mrt_table_dump_init(p->p.pool); + + s->proto = p; + s->table_expr = cf->table_expr; + s->table_ptr = cf->table_cf ? cf->table_cf->table : NULL; + s->filter = cf->filter; + s->filename = cf->filename; + s->always_add_path = cf->always_add_path; + + if (s->table_ptr) + rt_lock_table(s->table_ptr); + + p->table_dump = s; + ev_schedule(p->event); +} + +static void +mrt_event(void *P) +{ + struct mrt_proto *p = P; + + if (!p->table_dump) + return; + + if (!mrt_table_dump_step(p->table_dump)) + { + ev_schedule(p->event); + return; + } + + mrt_table_dump_free(p->table_dump); + p->table_dump = NULL; + + TRACE(D_EVENTS, "RIB table dump done"); + + if (p->p.proto_state == PS_STOP) + proto_notify_state(&p->p, PS_DOWN); +} + + +/* + * MRT Table Dump: CLI command + */ + +static void +mrt_dump_cont(struct cli *c) +{ + if (!mrt_table_dump_step(c->rover)) + return; + + cli_printf(c, 0, ""); + mrt_table_dump_free(c->rover); + c->cont = c->cleanup = c->rover = NULL; +} + +static void +mrt_dump_cleanup(struct cli *c) +{ + mrt_table_dump_free(c->rover); + c->rover = NULL; +} + +void +mrt_dump_cmd(struct mrt_dump_data *d) +{ + if (cli_access_restricted()) + return; + + if (!d->table_expr && !d->table_ptr) + cf_error("Table not specified"); + + if (!d->filename) + cf_error("File not specified"); + + struct mrt_table_dump_state *s = mrt_table_dump_init(this_cli->pool); + + s->cli = this_cli; + s->table_expr = d->table_expr; + s->table_ptr = d->table_ptr; + s->filter = d->filter; + s->filename = d->filename; + + if (s->table_ptr) + rt_lock_table(s->table_ptr); + + this_cli->cont = mrt_dump_cont; + this_cli->cleanup = mrt_dump_cleanup; + this_cli->rover = s; +} + + +/* + * MRT BGP4MP dump + */ + +static buffer * +mrt_bgp_buffer(void) +{ + /* Static buffer for BGP4MP dump, TODO: change to use MRT protocol */ + static buffer b; + + if (!b.start) + mrt_buffer_init(&b, &root_pool, 1024); + + return &b; +} + +static void +mrt_bgp_header(buffer *b, struct mrt_bgp_data *d) +{ + if (d->as4) + { + mrt_put_u32(b, d->peer_as); + mrt_put_u32(b, d->local_as); + } + else + { + mrt_put_u16(b, (d->peer_as <= 0xFFFF) ? d->peer_as : AS_TRANS); + mrt_put_u16(b, (d->local_as <= 0xFFFF) ? d->local_as : AS_TRANS); + } + + mrt_put_u16(b, (d->index <= 0xFFFF) ? d->index : 0); + mrt_put_u16(b, d->af); + + if (d->af == BGP_AFI_IPV4) + { + mrt_put_ip4(b, ipa_to_ip4(d->peer_ip)); + mrt_put_ip4(b, ipa_to_ip4(d->local_ip)); + } + else + { + mrt_put_ip6(b, ipa_to_ip6(d->peer_ip)); + mrt_put_ip6(b, ipa_to_ip6(d->local_ip)); + } +} + +void +mrt_dump_bgp_message(struct mrt_bgp_data *d) +{ + const u16 subtypes[] = { + MRT_BGP4MP_MESSAGE, MRT_BGP4MP_MESSAGE_AS4, + MRT_BGP4MP_MESSAGE_LOCAL, MRT_BGP4MP_MESSAGE_AS4_LOCAL, + MRT_BGP4MP_MESSAGE_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_ADDPATH, + MRT_BGP4MP_MESSAGE_LOCAL_ADDPATH, MRT_BGP4MP_MESSAGE_AS4_LOCAL_ADDPATH, + }; + + buffer *b = mrt_bgp_buffer(); + mrt_init_message(b, MRT_BGP4MP, subtypes[d->as4 + 4*d->add_path]); + mrt_bgp_header(b, d); + mrt_put_data(b, d->message, d->msg_len); + mrt_dump_message(b, config->mrtdump_file); +} + +void +mrt_dump_bgp_state_change(struct mrt_bgp_data *d) +{ + /* Convert state from our BS_* values to values used in MRTDump */ + const u16 states[BS_MAX] = {1, 2, 3, 4, 5, 6, 1}; + + if (states[d->old_state] == states[d->new_state]) + return; + + /* Always use AS4 mode for STATE_CHANGE */ + d->as4 = 1; + + buffer *b = mrt_bgp_buffer(); + mrt_init_message(b, MRT_BGP4MP, MRT_BGP4MP_STATE_CHANGE_AS4); + mrt_bgp_header(b, d); + mrt_put_u16(b, states[d->old_state]); + mrt_put_u16(b, states[d->new_state]); + mrt_dump_message(b, config->mrtdump_file); +} + + +/* + * MRT protocol glue + */ + +void +mrt_check_config(struct proto_config *CF) +{ + struct mrt_config *cf = (void *) CF; + + if (!cf->table_expr && !cf->table_cf) + cf_error("Table not specified"); + + if (!cf->filename) + cf_error("File not specified"); + + if (!cf->period) + cf_error("Period not specified"); +} + +static struct proto * +mrt_init(struct proto_config *CF) +{ + struct proto *P = proto_new(CF); + + return P; +} + +static int +mrt_start(struct proto *P) +{ + struct mrt_proto *p = (void *) P; + struct mrt_config *cf = (void *) (P->cf); + + p->timer = tm_new_init(P->pool, mrt_timer, p, cf->period S, 0); + p->event = ev_new_init(P->pool, mrt_event, p); + + tm_start(p->timer, cf->period S); + + return PS_UP; +} + +static int +mrt_shutdown(struct proto *P) +{ + struct mrt_proto *p = (void *) P; + + return p->table_dump ? PS_STOP : PS_DOWN; +} + +static int +mrt_reconfigure(struct proto *P, struct proto_config *CF) +{ + struct mrt_proto *p = (void *) P; + struct mrt_config *old = (void *) (P->cf); + struct mrt_config *new = (void *) CF; + + if (new->period != old->period) + { + TRACE(D_EVENTS, "Changing period from %u to %u s", old->period, new->period); + + btime now = current_time(); + btime new_time = p->timer->expires - (old->period S) + (new->period S); + p->timer->recurrent = new->period S; + tm_set(p->timer, MAX(now, new_time)); + } + + return 1; +} + +static void +mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED) +{ + /* Do nothing */ +} + + +struct protocol proto_mrt = { + .name = "MRT", + .template = "mrt%d", + .class = PROTOCOL_MRT, + .proto_size = sizeof(struct mrt_proto), + .config_size = sizeof(struct mrt_config), + .init = mrt_init, + .start = mrt_start, + .shutdown = mrt_shutdown, + .reconfigure = mrt_reconfigure, + .copy_config = mrt_copy_config, +}; diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h new file mode 100644 index 00000000..b2cec09d --- /dev/null +++ b/proto/mrt/mrt.h @@ -0,0 +1,158 @@ +/* + * BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol + * + * (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org> + * (c) 2017--2018 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_MRT_H_ +#define _BIRD_MRT_H_ + +#include "nest/bird.h" +#include "nest/protocol.h" +#include "lib/lists.h" +#include "nest/route.h" +#include "lib/event.h" +#include "lib/hash.h" + + +struct mrt_config { + struct proto_config c; + + struct rtable_config *table_cf; + const char *table_expr; + struct filter *filter; + const char *filename; + uint period; + int always_add_path; +}; + +struct mrt_proto { + struct proto p; + timer *timer; + event *event; + + struct mrt_target *file; + struct mrt_table_dump_state *table_dump; +}; + +struct mrt_dump_data { + const char *table_expr; + struct rtable *table_ptr; + struct filter *filter; + char *filename; +}; + +struct mrt_peer_entry { + u32 index; + u32 peer_id; + u32 peer_as; + ip_addr peer_ip; + struct mrt_peer_entry *next; +}; + +struct mrt_table_dump_state { + struct mrt_proto *proto; /* Protocol for regular MRT dumps (or NULL) */ + struct cli *cli; /* CLI for irregular MRT dumps (or NULL) */ + struct config *config; /* Config valid during start of dump, locked */ + + /* Configuration information */ + const char *table_expr; /* Wildcard for table name (or NULL) */ + struct rtable *table_ptr; /* Explicit table (or NULL) */ + struct filter *filter; /* Optional filter */ + const char *filename; /* Filename pattern */ + int always_add_path; /* Always use *_ADDPATH message subtypes */ + + /* Allocated by mrt_table_dump_init() */ + pool *pool; /* Pool for table dump */ + linpool *linpool; /* Temporary linear pool */ + linpool *peer_lp; /* Linear pool for peer entries in peer_hash */ + buffer buf; /* Buffer for MRT messages */ + + HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */ + + struct rtable *table; /* Processed table, NULL initially */ + struct fib_iterator fit; /* Iterator in processed table */ + int table_open; /* Whether iterator is linked */ + + int ipv4; /* Processed table is IPv4 */ + int add_path; /* Current message subtype is *_ADDPATH */ + int want_add_path; /* Want *_ADDPATH message later */ + int max; /* Decreasing counter of dumped routes */ + u32 seqnum; /* MRT message sequence number */ + btime time_offset; /* Time offset between monotonic and real time */ + struct bgp_write_state *bws; /* */ + + u16 peer_count; /* Number of peers */ + u32 peer_count_offset; /* Buffer offset to store peer_count later */ + u16 entry_count; /* Number of RIB Entries */ + u32 entry_count_offset; /* Buffer offset to store entry_count later */ + + struct rfile *file; /* tracking for mrt table dump file */ + int fd; +}; + +struct mrt_bgp_data { + uint peer_as; + uint local_as; + uint index; + uint af; + ip_addr peer_ip; + ip_addr local_ip; + byte *message; + uint msg_len; + uint old_state; + uint new_state; + u8 as4; + u8 add_path; +}; + + +#define MRT_HDR_LENGTH 12 /* MRT Timestamp + MRT Type + MRT Subtype + MRT Load Length */ +#define MRT_PEER_TYPE_32BIT_ASN 2 /* MRT Table Dump: Peer Index Table: Peer Type: Use 32bit ASN */ +#define MRT_PEER_TYPE_IPV6 1 /* MRT Table Dump: Peer Index Table: Peer Type: Use IPv6 IP Address */ + +#define MRT_ATTR_BUFFER_SIZE 65536 + +/* MRT Types */ +#define MRT_TABLE_DUMP_V2 13 +#define MRT_BGP4MP 16 + +/* MRT Table Dump v2 Subtypes */ +#define MRT_PEER_INDEX_TABLE 1 +#define MRT_RIB_IPV4_UNICAST 2 +#define MRT_RIB_IPV4_MULTICAST 3 +#define MRT_RIB_IPV6_UNICAST 4 +#define MRT_RIB_IPV6_MULTICAST 5 +#define MRT_RIB_GENERIC 6 +#define MRT_RIB_IPV4_UNICAST_ADDPATH 8 +#define MRT_RIB_IPV4_MULTICAST_ADDPATH 9 +#define MRT_RIB_IPV6_UNICAST_ADDPATH 10 +#define MRT_RIB_IPV6_MULTICAST_ADDPATH 11 +#define MRT_RIB_GENERIC_ADDPATH 12 + +/* MRT BGP4MP Subtypes */ +#define MRT_BGP4MP_MESSAGE 1 +#define MRT_BGP4MP_MESSAGE_AS4 4 +#define MRT_BGP4MP_STATE_CHANGE_AS4 5 +#define MRT_BGP4MP_MESSAGE_LOCAL 6 +#define MRT_BGP4MP_MESSAGE_AS4_LOCAL 7 +#define MRT_BGP4MP_MESSAGE_ADDPATH 8 +#define MRT_BGP4MP_MESSAGE_AS4_ADDPATH 9 +#define MRT_BGP4MP_MESSAGE_LOCAL_ADDPATH 10 +#define MRT_BGP4MP_MESSAGE_AS4_LOCAL_ADDPATH 11 + + +#ifdef CONFIG_MRT +void mrt_dump_cmd(struct mrt_dump_data *d); +void mrt_dump_bgp_message(struct mrt_bgp_data *d); +void mrt_dump_bgp_state_change(struct mrt_bgp_data *d); +void mrt_check_config(struct proto_config *C); +#else +static inline void mrt_dump_bgp_message(struct mrt_bgp_data *d UNUSED) { } +static inline void mrt_dump_bgp_state_change(struct mrt_bgp_data *d UNUSED) { } +#endif + +#endif /* _BIRD_MRT_H_ */ diff --git a/proto/mrt/progdoc b/proto/mrt/progdoc new file mode 100644 index 00000000..5f61a9bb --- /dev/null +++ b/proto/mrt/progdoc @@ -0,0 +1 @@ +S mrt.c |