diff options
Diffstat (limited to 'proto')
38 files changed, 1143 insertions, 1457 deletions
diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 1e87212c..6d2a593e 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -641,13 +641,36 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (r) { rta a0 = { - .src = p->p.main_source, .source = RTS_BABEL, .scope = SCOPE_UNIVERSE, .dest = RTD_UNICAST, + .pref = c->preference, .from = r->neigh->addr, .nh.gw = r->next_hop, .nh.iface = r->neigh->ifa->iface, + .eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)), + }; + + *a0.eattrs = (ea_list) { .count = 3 }; + a0.eattrs->attrs[0] = (eattr) { + .id = EA_BABEL_METRIC, + .type = EAF_TYPE_INT, + .u.data = r->metric, + }; + + struct adata *ad = alloca(sizeof(struct adata) + sizeof(u64)); + ad->length = sizeof(u64); + memcpy(ad->data, &(r->router_id), sizeof(u64)); + a0.eattrs->attrs[1] = (eattr) { + .id = EA_BABEL_ROUTER_ID, + .type = EAF_TYPE_OPAQUE, + .u.ptr = ad, + }; + + a0.eattrs->attrs[2] = (eattr) { + .id = EA_BABEL_SEQNO, + .type = EAF_TYPE_INT, + .u.data = r->seqno, }; /* @@ -658,40 +681,37 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) a0.nh.flags = RNF_ONLINK; - rta *a = rta_lookup(&a0); - rte *rte = rte_get_temp(a); - rte->u.babel.seqno = r->seqno; - rte->u.babel.metric = r->metric; - rte->u.babel.router_id = r->router_id; - rte->pflags = EA_ID_FLAG(EA_BABEL_METRIC) | EA_ID_FLAG(EA_BABEL_ROUTER_ID); + rte e0 = { + .attrs = &a0, + .src = p->p.main_source, + }; e->unreachable = 0; - rte_update2(c, e->n.addr, rte, p->p.main_source); + rte_update(c, e->n.addr, &e0, p->p.main_source); } else if (e->valid && (e->router_id != p->router_id)) { /* Unreachable */ rta a0 = { - .src = p->p.main_source, .source = RTS_BABEL, .scope = SCOPE_UNIVERSE, .dest = RTD_UNREACHABLE, + .pref = 1, }; - rta *a = rta_lookup(&a0); - rte *rte = rte_get_temp(a); - memset(&rte->u.babel, 0, sizeof(rte->u.babel)); - rte->pflags = 0; - rte->pref = 1; + rte e0 = { + .attrs = &a0, + .src = p->p.main_source, + }; e->unreachable = 1; - rte_update2(c, e->n.addr, rte, p->p.main_source); + rte_update(c, e->n.addr, &e0, p->p.main_source); } else { /* Retraction */ e->unreachable = 0; - rte_update2(c, e->n.addr, NULL, p->p.main_source); + rte_update(c, e->n.addr, NULL, p->p.main_source); } } @@ -701,7 +721,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e) { struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel; e->unreachable = 0; - rte_update2(c, e->n.addr, NULL, p->p.main_source); + rte_update(c, e->n.addr, NULL, p->p.main_source); } @@ -1727,7 +1747,7 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con TRACE(D_EVENTS, "Adding interface %s", new->name); - pool *pool = rp_new(p->p.pool, new->name); + pool *pool = rp_new(p->p.pool, p->p.loop, new->name); ifa = mb_allocz(pool, sizeof(struct babel_iface)); ifa->proto = p; @@ -1779,7 +1799,7 @@ babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa) rem_node(NODE ifa); - rfree(ifa->pool); /* contains ifa itself, locks, socket, etc */ + rp_free(ifa->pool, p->p.pool); /* contains ifa itself, locks, socket, etc */ } static int @@ -1887,7 +1907,8 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) { struct iface *iface; - WALK_LIST(iface, iface_list) + IFACE_LEGACY_ACCESS; + WALK_LIST(iface, global_iface_list) { if (!(iface->flags & IF_UP)) continue; @@ -2010,7 +2031,13 @@ babel_dump(struct proto *P) static void babel_get_route_info(rte *rte, byte *buf) { - buf += bsprintf(buf, " (%d/%d) [%lR]", rte->pref, rte->u.babel.metric, rte->u.babel.router_id); + u64 rid = 0; + eattr *e = ea_find(rte->attrs->eattrs, EA_BABEL_ROUTER_ID); + if (e) + memcpy(&rid, e->u.ptr->data, sizeof(u64)); + + buf += bsprintf(buf, " (%d/%d) [%lR]", rte->attrs->pref, + ea_get_int(rte->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY), rid); } static int @@ -2018,6 +2045,9 @@ babel_get_attr(const eattr *a, byte *buf, int buflen UNUSED) { switch (a->id) { + case EA_BABEL_SEQNO: + return GA_FULL; + case EA_BABEL_METRIC: bsprintf(buf, "metric: %d", a->u.data); return GA_FULL; @@ -2231,45 +2261,23 @@ babel_kick_timer(struct babel_proto *p) static int -babel_preexport(struct proto *P, struct rte **new, struct linpool *pool UNUSED) +babel_preexport(struct channel *c, struct rte *new) { - struct rta *a = (*new)->attrs; - + struct rta *a = new->attrs; /* Reject our own unreachable routes */ - if ((a->dest == RTD_UNREACHABLE) && (a->src->proto == P)) + if ((a->dest == RTD_UNREACHABLE) && (new->src->owner == &c->proto->sources)) return -1; return 0; } -static void -babel_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - struct adata *id = lp_alloc_adata(pool, sizeof(u64)); - memcpy(id->data, &rt->u.babel.router_id, sizeof(u64)); - - rte_init_tmp_attrs(rt, pool, 2); - rte_make_tmp_attr(rt, EA_BABEL_METRIC, EAF_TYPE_INT, rt->u.babel.metric); - rte_make_tmp_attr(rt, EA_BABEL_ROUTER_ID, EAF_TYPE_OPAQUE, (uintptr_t) id); -} - -static void -babel_store_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rt->u.babel.metric = rte_store_tmp_attr(rt, EA_BABEL_METRIC); - - /* EA_BABEL_ROUTER_ID is read-only, we do not really save the value */ - rte_store_tmp_attr(rt, EA_BABEL_ROUTER_ID); -} - /* * babel_rt_notify - core tells us about new route (possibly our own), * so store it into our data structures. */ static void -babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, - struct rte *new, struct rte *old UNUSED) +babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, + struct rte *new, const struct rte *old UNUSED) { struct babel_proto *p = (void *) P; struct babel_entry *e; @@ -2277,19 +2285,31 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, if (new) { /* Update */ - uint internal = (new->attrs->src->proto == P); - uint rt_seqno = internal ? new->u.babel.seqno : p->update_seqno; + uint rt_seqno; uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0); - u64 rt_router_id = internal ? new->u.babel.router_id : p->router_id; + u64 rt_router_id = 0; + + if (new->src->owner == &P->sources) + { + rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; + eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID); + if (e) + memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64)); + } + else + { + rt_seqno = p->update_seqno; + rt_router_id = p->router_id; + } if (rt_metric > BABEL_INFINITY) { log(L_WARN "%s: Invalid babel_metric value %u for route %N", - p->p.name, rt_metric, net->n.addr); + p->p.name, rt_metric, net); rt_metric = BABEL_INFINITY; } - e = babel_get_entry(p, net->n.addr); + e = babel_get_entry(p, net); /* Activate triggered updates */ if ((e->valid != BABEL_ENTRY_VALID) || @@ -2307,7 +2327,7 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, else { /* Withdraw */ - e = babel_find_entry(p, net->n.addr); + e = babel_find_entry(p, net); if (!e || e->valid != BABEL_ENTRY_VALID) return; @@ -2323,15 +2343,16 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, static int babel_rte_better(struct rte *new, struct rte *old) { - return new->u.babel.metric < old->u.babel.metric; + uint new_metric = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; + uint old_metric = ea_find(old->attrs->eattrs, EA_BABEL_SEQNO)->u.data; + + return new_metric < old_metric; } -static int -babel_rte_same(struct rte *new, struct rte *old) +static u32 +babel_rte_igp_metric(struct rte *rt) { - return ((new->u.babel.seqno == old->u.babel.seqno) && - (new->u.babel.metric == old->u.babel.metric) && - (new->u.babel.router_id == old->u.babel.router_id)); + return ea_get_int(rt->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY); } @@ -2352,6 +2373,12 @@ babel_postconfig(struct proto_config *CF) cf->ip6_channel = ip6 ?: ip6_sadr; } +static struct rte_owner_class babel_rte_owner_class = { + .get_route_info = babel_get_route_info, + .rte_better = babel_rte_better, + .rte_igp_metric = babel_rte_igp_metric, +}; + static struct proto * babel_init(struct proto_config *CF) { @@ -2365,10 +2392,8 @@ babel_init(struct proto_config *CF) P->if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->preexport = babel_preexport; - P->make_tmp_attrs = babel_make_tmp_attrs; - P->store_tmp_attrs = babel_store_tmp_attrs; - P->rte_better = babel_rte_better; - P->rte_same = babel_rte_same; + + P->sources.class = &babel_rte_owner_class; return P; } @@ -2461,7 +2486,6 @@ babel_reconfigure(struct proto *P, struct proto_config *CF) return 1; } - struct protocol proto_babel = { .name = "Babel", .template = "babel%d", @@ -2476,6 +2500,5 @@ struct protocol proto_babel = { .start = babel_start, .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, - .get_route_info = babel_get_route_info, .get_attr = babel_get_attr }; diff --git a/proto/babel/babel.h b/proto/babel/babel.h index 84feb085..8b6da3c8 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -28,6 +28,7 @@ #define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0) #define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1) +#define EA_BABEL_SEQNO EA_CODE(PROTOCOL_BABEL, 2) #define BABEL_MAGIC 42 #define BABEL_VERSION 2 diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index 402122fc..267dff98 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -1,6 +1,6 @@ -src := bfd.c io.c packets.c +src := bfd.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index dac184c5..63e0deff 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -113,15 +113,26 @@ #define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2 #define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n) -static list bfd_proto_list; -static list bfd_wait_list; +DEFINE_DOMAIN(rtable); +#define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock) +#define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock) +#define BFD_ASSERT_LOCKED ASSERT_DIE(DOMAIN_IS_LOCKED(rtable, bfd_global.lock)) + +static struct { + DOMAIN(rtable) lock; + list wait_list; + list proto_list; +} bfd_global; + +static struct bfd_session bfd_admin_down = { .loc = { .state = BFD_STATE_ADMIN_DOWN }, }; const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; static void bfd_session_set_min_tx(struct bfd_session *s, u32 val); static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface); static void bfd_free_iface(struct bfd_iface *ifa); -static inline void bfd_notify_kick(struct bfd_proto *p); +static void bfd_remove_session(struct bfd_proto *p, struct bfd_session *s); +static void bfd_reconfigure_session_hook(void *vsession); /* @@ -140,37 +151,57 @@ bfd_merge_options(const struct bfd_iface_config *cf, const struct bfd_options *o }; } -static void +static int bfd_session_update_state(struct bfd_session *s, uint state, uint diag) { struct bfd_proto *p = s->ifa->bfd; - uint old_state = s->loc_state; - int notify; + uint old_state = BFD_LOC_STATE(s).state; if (state == old_state) - return; + { + if (current_time() > s->last_reqlist_check + 5 S) + { + BFD_LOCK; + if (EMPTY_LIST(s->request_list)) + { + bfd_remove_session(p, s); + BFD_UNLOCK; + return 1; + } + + s->last_reqlist_check = current_time(); + BFD_UNLOCK; + } + return 0; + } TRACE(D_EVENTS, "Session to %I changed state from %s to %s", s->addr, bfd_state_names[old_state], bfd_state_names[state]); - bfd_lock_sessions(p); - s->loc_state = state; - s->loc_diag = diag; + atomic_store_explicit(&s->loc, ((struct bfd_session_state) { .state = state, .diag = diag }), memory_order_release); s->last_state_change = current_time(); - notify = !NODE_VALID(&s->n); - if (notify) - add_tail(&p->notify_list, &s->n); - bfd_unlock_sessions(p); - if (state == BFD_STATE_UP) bfd_session_set_min_tx(s, s->cf.min_tx_int); if (old_state == BFD_STATE_UP) bfd_session_set_min_tx(s, s->cf.idle_tx_int); - if (notify) - bfd_notify_kick(p); + BFD_LOCK; + if (EMPTY_LIST(s->request_list)) + { + bfd_remove_session(p, s); + BFD_UNLOCK; + return 1; + } + + struct bfd_request *req; + node *nn; + WALK_LIST2(req, nn, s->request_list, n) + ev_send_self(&req->event); + + BFD_UNLOCK; + return 0; } static void @@ -188,7 +219,7 @@ bfd_session_update_tx_interval(struct bfd_session *s) return; /* Set timer relative to last tx_timer event */ - tm_set(s->tx_timer, s->last_tx + tx_int_l); + tm_set_in(s->tx_timer, s->last_tx + tx_int_l, s->ifa->bfd->p.loop); } static void @@ -202,7 +233,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick) if (!s->last_rx) return; - tm_set(s->hold_timer, s->last_rx + timeout); + tm_set_in(s->hold_timer, s->last_rx + timeout, s->ifa->bfd->p.loop); } static void @@ -215,8 +246,8 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) if (s->rem_demand_mode && !s->poll_active && - (s->loc_state == BFD_STATE_UP) && - (s->rem_state == BFD_STATE_UP)) + (BFD_LOC_STATE(s).state == BFD_STATE_UP) && + (s->rem.state == BFD_STATE_UP)) goto stop; if (s->rem_min_rx_int == 0) @@ -226,7 +257,7 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) if (reset || !tm_active(s->tx_timer)) { s->last_tx = 0; - tm_start(s->tx_timer, 0); + tm_start_in(s->tx_timer, 0, s->ifa->bfd->p.loop); } return; @@ -286,28 +317,29 @@ bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old int next_state = 0; int diag = BFD_DIAG_NOTHING; - switch (s->loc_state) + switch (BFD_LOC_STATE(s).state) { case BFD_STATE_ADMIN_DOWN: return; case BFD_STATE_DOWN: - if (s->rem_state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT; - else if (s->rem_state == BFD_STATE_INIT) next_state = BFD_STATE_UP; + if (s->rem.state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT; + else if (s->rem.state == BFD_STATE_INIT) next_state = BFD_STATE_UP; break; case BFD_STATE_INIT: - if (s->rem_state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; - else if (s->rem_state >= BFD_STATE_INIT) next_state = BFD_STATE_UP; + if (s->rem.state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; + else if (s->rem.state >= BFD_STATE_INIT) next_state = BFD_STATE_UP; break; case BFD_STATE_UP: - if (s->rem_state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; + if (s->rem.state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; break; } if (next_state) - bfd_session_update_state(s, next_state, diag); + if (bfd_session_update_state(s, next_state, diag)) + return; bfd_session_control_tx_timer(s, 0); @@ -322,7 +354,7 @@ bfd_session_timeout(struct bfd_session *s) TRACE(D_EVENTS, "Session to %I expired", s->addr); - s->rem_state = BFD_STATE_DOWN; + s->rem.state = BFD_STATE_DOWN; s->rem_id = 0; s->rem_min_tx_int = 0; s->rem_min_rx_int = 1; @@ -333,7 +365,8 @@ bfd_session_timeout(struct bfd_session *s) s->poll_active = 0; s->poll_scheduled = 0; - bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT); + if (bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT)) + return; bfd_session_control_tx_timer(s, 1); } @@ -349,7 +382,7 @@ bfd_session_set_min_tx(struct bfd_session *s, u32 val) s->des_min_tx_new = val; /* Postpone timer update if des_min_tx_int increases and the session is up */ - if ((s->loc_state != BFD_STATE_UP) || (val < s->des_min_tx_int)) + if ((BFD_LOC_STATE(s).state != BFD_STATE_UP) || (val < s->des_min_tx_int)) { s->des_min_tx_int = val; bfd_session_update_tx_interval(s); @@ -369,7 +402,7 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val) s->req_min_rx_new = val; /* Postpone timer update if req_min_rx_int decreases and the session is up */ - if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int)) + if ((BFD_LOC_STATE(s).state != BFD_STATE_UP) || (val > s->req_min_rx_int)) { s->req_min_rx_int = val; bfd_session_update_detection_time(s, 0); @@ -381,12 +414,14 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val) struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id) { + ASSERT_DIE(birdloop_inside(p->p.loop)); return HASH_FIND(p->session_hash_id, HASH_ID, id); } struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr, uint ifindex) { + ASSERT_DIE(birdloop_inside(p->p.loop)); return HASH_FIND(p->session_hash_ip, HASH_IP, addr, ifindex); } @@ -419,7 +454,8 @@ bfd_get_free_id(struct bfd_proto *p) static struct bfd_session * bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface, struct bfd_options *opts) { - birdloop_enter(p->loop); + ASSERT_DIE(birdloop_inside(p->p.loop)); + BFD_ASSERT_LOCKED; struct bfd_iface *ifa = bfd_get_iface(p, local, iface); @@ -433,10 +469,15 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * HASH_INSERT(p->session_hash_ip, HASH_IP, s); s->cf = bfd_merge_options(ifa->cf, opts); + s->update_event = (event) { + .hook = bfd_reconfigure_session_hook, + .data = s, + .list = birdloop_event_list(p->p.loop), + }; /* Initialization of state variables - see RFC 5880 6.8.1 */ - s->loc_state = BFD_STATE_DOWN; - s->rem_state = BFD_STATE_DOWN; + atomic_store_explicit(&s->loc, ((struct bfd_session_state) { .state = BFD_STATE_DOWN }), memory_order_relaxed); + s->rem.state = BFD_STATE_DOWN; s->des_min_tx_int = s->des_min_tx_new = s->cf.idle_tx_int; s->req_min_rx_int = s->req_min_rx_new = s->cf.min_rx_int; s->rem_min_rx_int = 1; @@ -444,8 +485,8 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * s->passive = s->cf.passive; s->tx_csn = random_u32(); - s->tx_timer = tm_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0); - s->hold_timer = tm_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0); + s->tx_timer = tm_new_init(p->p.pool, bfd_tx_timer_hook, s, 0, 0); + s->hold_timer = tm_new_init(p->p.pool, bfd_hold_timer_hook, s, 0, 0); bfd_session_update_tx_interval(s); bfd_session_control_tx_timer(s, 1); @@ -454,51 +495,15 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * TRACE(D_EVENTS, "Session to %I added", s->addr); - birdloop_leave(p->loop); - return s; } -/* -static void -bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa) -{ - birdloop_enter(p->loop); - - s->opened = 1; - - bfd_session_control_tx_timer(s); - - birdloop_leave(p->loop); -} - -static void -bfd_close_session(struct bfd_proto *p, struct bfd_session *s) -{ - birdloop_enter(p->loop); - - s->opened = 0; - - bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN); - bfd_session_control_tx_timer(s); - - birdloop_leave(p->loop); -} -*/ - static void bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) { - ip_addr ip = s->addr; - - /* Caller should ensure that request list is empty */ - - birdloop_enter(p->loop); - - /* Remove session from notify list if scheduled for notification */ - /* No need for bfd_lock_sessions(), we are already protected by birdloop_enter() */ - if (NODE_VALID(&s->n)) - rem_node(&s->n); + ASSERT_DIE(birdloop_inside(p->p.loop)); + BFD_ASSERT_LOCKED; + ASSERT_DIE(EMPTY_LIST(s->request_list)); bfd_free_iface(s->ifa); @@ -508,25 +513,27 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) HASH_REMOVE(p->session_hash_id, HASH_ID, s); HASH_REMOVE(p->session_hash_ip, HASH_IP, s); - sl_free(p->session_slab, s); - - TRACE(D_EVENTS, "Session to %I removed", ip); + TRACE(D_EVENTS, "Session to %I removed", s->addr); - birdloop_leave(p->loop); + sl_free(p->session_slab, s); } static void bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) { + ASSERT_DIE(birdloop_inside(p->p.loop)); + BFD_LOCK; if (EMPTY_LIST(s->request_list)) + { + bfd_remove_session(p, s); + BFD_UNLOCK; return; - - birdloop_enter(p->loop); + } struct bfd_request *req = SKIP_BACK(struct bfd_request, n, HEAD(s->request_list)); s->cf = bfd_merge_options(s->ifa->cf, &req->opts); - u32 tx = (s->loc_state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int; + u32 tx = (BFD_LOC_STATE(s).state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int; bfd_session_set_min_tx(s, tx); bfd_session_set_min_rx(s, s->cf.min_rx_int); s->detect_mult = s->cf.multiplier; @@ -534,9 +541,15 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) bfd_session_control_tx_timer(s, 0); - birdloop_leave(p->loop); - TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); + BFD_UNLOCK; +} + +static void +bfd_reconfigure_session_hook(void *data) +{ + struct bfd_session *s = data; + return bfd_reconfigure_session(s->ifa->bfd, s); } @@ -573,7 +586,7 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) struct bfd_config *cf = (struct bfd_config *) (p->p.cf); struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface); - ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface)); + ifa = mb_allocz(p->p.pool, sizeof(struct bfd_iface)); ifa->local = local; ifa->iface = iface; ifa->cf = ic; @@ -604,7 +617,7 @@ bfd_free_iface(struct bfd_iface *ifa) } static void -bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc) +bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct bfd_config *nc) { struct bfd_iface_config *new = bfd_find_iface_config(nc, ifa->iface); struct bfd_iface_config *old = ifa->cf; @@ -618,9 +631,7 @@ bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_con (new->passive != old->passive); /* This should be probably changed to not access ifa->cf from the BFD thread */ - birdloop_enter(p->loop); ifa->cf = new; - birdloop_leave(p->loop); } @@ -629,17 +640,22 @@ bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_con */ static void -bfd_request_notify(struct bfd_request *req, u8 state, u8 diag) +bfd_request_notify(void *data) { - u8 old_state = req->state; + struct bfd_request *req = data; + struct bfd_session_state old = req->old_state; - if (state == old_state) + BFD_LOCK; /* Needed to safely access req->session */ + struct bfd_session_state new = atomic_load_explicit(&req->session->loc, memory_order_acquire); + BFD_UNLOCK; + + if (new.state == old.state) return; - req->state = state; - req->diag = diag; - req->old_state = old_state; - req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN); + req->state = new.state; + req->diag = new.diag; + req->old_state = new; + req->down = (old.state == BFD_STATE_UP) && (new.state == BFD_STATE_DOWN); if (req->hook) req->hook(req); @@ -648,9 +664,12 @@ bfd_request_notify(struct bfd_request *req, u8 state, u8 diag) static int bfd_add_request(struct bfd_proto *p, struct bfd_request *req) { + BFD_ASSERT_LOCKED; + ASSERT_DIE(req->session == &bfd_admin_down); + struct bfd_config *cf = (struct bfd_config *) (p->p.cf); - if (p->p.vrf_set && (p->p.vrf != req->vrf)) + if (p->p.vrf && (p->p.vrf != req->vrf)) return 0; if (ipa_is_ip4(req->addr) ? !cf->accept_ipv4 : !cf->accept_ipv6) @@ -661,7 +680,6 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) uint ifindex = req->iface ? req->iface->index : 0; struct bfd_session *s = bfd_find_session_by_addr(p, req->addr, ifindex); - u8 state, diag; if (!s) s = bfd_add_session(p, req->addr, req->local, req->iface, &req->opts); @@ -670,52 +688,67 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) add_tail(&s->request_list, &req->n); req->session = s; - bfd_lock_sessions(p); - state = s->loc_state; - diag = s->loc_diag; - bfd_unlock_sessions(p); - - bfd_request_notify(req, state, diag); + ev_send_self(&req->event); return 1; } static void -bfd_submit_request(struct bfd_request *req) +bfd_pickup_requests(void *_data UNUSED) { - node *n; + struct bfd_proto *p; + node *nn; + WALK_LIST2(p, nn, bfd_global.proto_list, bfd_node) + { + birdloop_enter(p->p.loop); + BFD_LOCK; - WALK_LIST(n, bfd_proto_list) - if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req)) - return; + struct bfd_request *req; + node *rn, *rnxt; + WALK_LIST2_DELSAFE(req, rn, rnxt, bfd_global.wait_list, n) + bfd_add_request(p, req); - rem_node(&req->n); - add_tail(&bfd_wait_list, &req->n); - req->session = NULL; - bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); + BFD_UNLOCK; + birdloop_ping(p->p.loop); + birdloop_leave(p->p.loop); + } } +static event bfd_pickup_event = { .hook = bfd_pickup_requests }; +#define bfd_schedule_pickup() ev_send(&global_event_list, &bfd_pickup_event) + static void bfd_take_requests(struct bfd_proto *p) { + struct bfd_request *req; node *n, *nn; - - WALK_LIST_DELSAFE(n, nn, bfd_wait_list) - bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n)); + BFD_LOCK; + WALK_LIST2_DELSAFE(req, n, nn, bfd_global.wait_list, n) + bfd_add_request(p, req); + BFD_UNLOCK; } static void bfd_drop_requests(struct bfd_proto *p) { node *n; - - HASH_WALK(p->session_hash_id, next_id, s) + BFD_LOCK; + HASH_WALK_DELSAFE(p->session_hash_id, next_id, s) { - /* We assume that p is not in bfd_proto_list */ WALK_LIST_FIRST(n, s->request_list) - bfd_submit_request(SKIP_BACK(struct bfd_request, n, n)); + { + struct bfd_request *req = SKIP_BACK(struct bfd_request, n, n); + rem_node(&req->n); + add_tail(&bfd_global.wait_list, &req->n); + req->session = &bfd_admin_down; + ev_send_self(&req->event); + } + + bfd_schedule_pickup(); + bfd_remove_session(p, s); } HASH_WALK_END; + BFD_UNLOCK; } static struct resclass bfd_request_class; @@ -724,13 +757,11 @@ struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, struct iface *vrf, void (*hook)(struct bfd_request *), void *data, + struct event_list *list, const struct bfd_options *opts) { struct bfd_request *req = ralloc(p, &bfd_request_class); - /* Hack: self-link req->n, we will call rem_node() on it */ - req->n.prev = req->n.next = &req->n; - req->addr = addr; req->local = local; req->iface = iface; @@ -739,10 +770,19 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, if (opts) req->opts = *opts; - bfd_submit_request(req); - req->hook = hook; req->data = data; + req->event = (event) { + .hook = bfd_request_notify, + .data = req, + .list = list, + }; + + BFD_LOCK; + req->session = &bfd_admin_down; + add_tail(&bfd_global.wait_list, &req->n); + bfd_schedule_pickup(); + BFD_UNLOCK; return req; } @@ -750,30 +790,29 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, void bfd_update_request(struct bfd_request *req, const struct bfd_options *opts) { - struct bfd_session *s = req->session; - if (!memcmp(opts, &req->opts, sizeof(const struct bfd_options))) return; + BFD_LOCK; req->opts = *opts; - if (s) - bfd_reconfigure_session(s->ifa->bfd, s); + struct bfd_session *s = req->session; + if (s != &bfd_admin_down) + ev_send_self(&s->update_event); + + BFD_UNLOCK; } static void bfd_request_free(resource *r) { struct bfd_request *req = (struct bfd_request *) r; - struct bfd_session *s = req->session; + BFD_LOCK; rem_node(&req->n); + BFD_UNLOCK; - /* Remove the session if there is no request for it. Skip that if - inside notify hooks, will be handled by bfd_notify_hook() itself */ - - if (s && EMPTY_LIST(s->request_list) && !s->notify_running) - bfd_remove_session(s->ifa->bfd, s); + ev_postpone(&req->event); } static void @@ -810,7 +849,7 @@ bfd_neigh_notify(struct neighbor *nb) if ((nb->scope > 0) && !n->req) { ip_addr local = ipa_nonzero(n->local) ? n->local : nb->ifa->ip; - n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, NULL); + n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, birdloop_event_list(p->p.loop), NULL); } if ((nb->scope <= 0) && n->req) @@ -827,7 +866,7 @@ bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) if (n->multihop) { - n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, NULL); + n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, birdloop_event_list(p->p.loop), NULL); return; } @@ -902,107 +941,15 @@ bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new) /* - * BFD notify socket - */ - -/* This core notify code should be replaced after main loop transition to birdloop */ - -int pipe(int pipefd[2]); -void pipe_drain(int fd); -void pipe_kick(int fd); - -static int -bfd_notify_hook(sock *sk, uint len UNUSED) -{ - struct bfd_proto *p = sk->data; - struct bfd_session *s; - list tmp_list; - u8 state, diag; - node *n, *nn; - - pipe_drain(sk->fd); - - bfd_lock_sessions(p); - init_list(&tmp_list); - add_tail_list(&tmp_list, &p->notify_list); - init_list(&p->notify_list); - bfd_unlock_sessions(p); - - WALK_LIST_FIRST(s, tmp_list) - { - bfd_lock_sessions(p); - rem_node(&s->n); - state = s->loc_state; - diag = s->loc_diag; - bfd_unlock_sessions(p); - - s->notify_running = 1; - WALK_LIST_DELSAFE(n, nn, s->request_list) - bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag); - s->notify_running = 0; - - /* Remove the session if all requests were removed in notify hooks */ - if (EMPTY_LIST(s->request_list)) - bfd_remove_session(p, s); - } - - return 0; -} - -static inline void -bfd_notify_kick(struct bfd_proto *p) -{ - pipe_kick(p->notify_ws->fd); -} - -static void -bfd_noterr_hook(sock *sk, int err) -{ - struct bfd_proto *p = sk->data; - log(L_ERR "%s: Notify socket error: %m", p->p.name, err); -} - -static void -bfd_notify_init(struct bfd_proto *p) -{ - int pfds[2]; - sock *sk; - - int rv = pipe(pfds); - if (rv < 0) - die("pipe: %m"); - - sk = sk_new(p->p.pool); - sk->type = SK_MAGIC; - sk->rx_hook = bfd_notify_hook; - sk->err_hook = bfd_noterr_hook; - sk->fd = pfds[0]; - sk->data = p; - if (sk_open(sk) < 0) - die("bfd: sk_open failed"); - p->notify_rs = sk; - - /* The write sock is not added to any event loop */ - sk = sk_new(p->p.pool); - sk->type = SK_MAGIC; - sk->fd = pfds[1]; - sk->data = p; - sk->flags = SKF_THREAD; - if (sk_open(sk) < 0) - die("bfd: sk_open failed"); - p->notify_ws = sk; -} - - -/* * BFD protocol glue */ void bfd_init_all(void) { - init_list(&bfd_proto_list); - init_list(&bfd_wait_list); + bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global"); + init_list(&bfd_global.wait_list); + init_list(&bfd_global.proto_list); } static struct proto * @@ -1021,22 +968,13 @@ bfd_start(struct proto *P) struct bfd_proto *p = (struct bfd_proto *) P; struct bfd_config *cf = (struct bfd_config *) (P->cf); - p->loop = birdloop_new(); - p->tpool = rp_new(NULL, "BFD thread root"); - pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE); - p->session_slab = sl_new(P->pool, sizeof(struct bfd_session)); HASH_INIT(p->session_hash_id, P->pool, 8); HASH_INIT(p->session_hash_ip, P->pool, 8); init_list(&p->iface_list); - init_list(&p->notify_list); - bfd_notify_init(p); - - add_tail(&bfd_proto_list, &p->bfd_node); - - birdloop_enter(p->loop); + add_tail(&bfd_global.proto_list, &p->bfd_node); if (cf->accept_ipv4 && cf->accept_direct) p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); @@ -1050,42 +988,33 @@ bfd_start(struct proto *P) if (cf->accept_ipv6 && cf->accept_multihop) p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); - birdloop_leave(p->loop); - bfd_take_requests(p); struct bfd_neighbor *n; WALK_LIST(n, cf->neigh_list) bfd_start_neighbor(p, n); - birdloop_start(p->loop); - return PS_UP; } - static int bfd_shutdown(struct proto *P) { struct bfd_proto *p = (struct bfd_proto *) P; - struct bfd_config *cf = (struct bfd_config *) (P->cf); + struct bfd_config *cf = (struct bfd_config *) (p->p.cf); rem_node(&p->bfd_node); - birdloop_stop(p->loop); - - struct bfd_neighbor *n; - WALK_LIST(n, cf->neigh_list) - bfd_stop_neighbor(p, n); + struct bfd_neighbor *bn; + WALK_LIST(bn, cf->neigh_list) + bfd_stop_neighbor(p, bn); bfd_drop_requests(p); - /* FIXME: This is hack */ - birdloop_enter(p->loop); - rfree(p->tpool); - birdloop_leave(p->loop); - - birdloop_free(p->loop); + if (p->rx4_1) sk_stop(p->rx4_1); + if (p->rx4_m) sk_stop(p->rx4_m); + if (p->rx6_1) sk_stop(p->rx6_1); + if (p->rx6_m) sk_stop(p->rx6_m); return PS_DOWN; } @@ -1098,6 +1027,8 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) struct bfd_config *new = (struct bfd_config *) c; struct bfd_iface *ifa; + ASSERT_DIE(birdloop_inside(P->loop)); + /* TODO: Improve accept reconfiguration */ if ((new->accept_ipv4 != old->accept_ipv4) || (new->accept_ipv6 != old->accept_ipv6) || @@ -1105,21 +1036,21 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) (new->accept_multihop != old->accept_multihop)) return 0; - birdloop_mask_wakeups(p->loop); + birdloop_mask_wakeups(p->p.loop); WALK_LIST(ifa, p->iface_list) bfd_reconfigure_iface(p, ifa, new); - HASH_WALK(p->session_hash_id, next_id, s) + HASH_WALK_DELSAFE(p->session_hash_id, next_id, s) { if (s->ifa->changed) bfd_reconfigure_session(p, s); } - HASH_WALK_END; + HASH_WALK_DELSAFE_END; bfd_reconfigure_neighbors(p, new); - birdloop_unmask_wakeups(p->loop); + birdloop_unmask_wakeups(p->p.loop); return 1; } @@ -1140,13 +1071,14 @@ bfd_show_sessions(struct proto *P) { byte tbuf[TM_DATETIME_BUFFER_SIZE]; struct bfd_proto *p = (struct bfd_proto *) P; - uint state, diag UNUSED; btime tx_int, timeout; const char *ifname; + birdloop_enter(P->loop); if (p->p.proto_state != PS_UP) { cli_msg(-1020, "%s: is not up", p->p.name); + birdloop_leave(P->loop); return; } @@ -1154,12 +1086,9 @@ bfd_show_sessions(struct proto *P) cli_msg(-1020, "%-25s %-10s %-10s %-12s %8s %8s", "IP address", "Interface", "State", "Since", "Interval", "Timeout"); - HASH_WALK(p->session_hash_id, next_id, s) { - /* FIXME: this is thread-unsafe, but perhaps harmless */ - state = s->loc_state; - diag = s->loc_diag; + uint state = BFD_LOC_STATE(s).state; ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---"; tx_int = s->last_tx ? MAX(s->des_min_tx_int, s->rem_min_rx_int) : 0; timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult; @@ -1171,6 +1100,8 @@ bfd_show_sessions(struct proto *P) s->addr, ifname, bfd_state_names[state], tbuf, tx_int, timeout); } HASH_WALK_END; + + birdloop_leave(P->loop); } diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 91fdaa60..b3266857 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -17,12 +17,12 @@ #include "nest/password.h" #include "conf/conf.h" #include "lib/hash.h" +#include "lib/io-loop.h" #include "lib/resource.h" #include "lib/socket.h" #include "lib/string.h" #include "nest/bfd.h" -#include "io.h" #define BFD_CONTROL_PORT 3784 @@ -87,19 +87,13 @@ struct bfd_neighbor struct bfd_proto { struct proto p; - struct birdloop *loop; - pool *tpool; - pthread_spinlock_t lock; + node bfd_node; slab *session_slab; HASH(struct bfd_session) session_hash_id; HASH(struct bfd_session) session_hash_ip; - sock *notify_rs; - sock *notify_ws; - list notify_list; - sock *rx4_1; sock *rx6_1; sock *rx4_m; @@ -122,7 +116,6 @@ struct bfd_iface struct bfd_session { - node n; ip_addr addr; /* Address of session */ struct bfd_iface *ifa; /* Iface associated with session */ struct bfd_session *next_id; /* Next in bfd.session_hash_id */ @@ -133,14 +126,15 @@ struct bfd_session u8 poll_active; u8 poll_scheduled; - u8 loc_state; - u8 rem_state; - u8 loc_diag; - u8 rem_diag; + _Atomic struct bfd_session_state loc; + struct bfd_session_state rem; +#define BFD_LOC_STATE(s) atomic_load_explicit(&(s)->loc, memory_order_relaxed) + u32 loc_id; /* Local session ID (local discriminator) */ u32 rem_id; /* Remote session ID (remote discriminator) */ - struct bfd_session_config cf; /* Static configuration parameters */ + struct bfd_session_config cf; /* Static configuration parameers */ + event update_event; /* Reconfiguration requested */ u32 des_min_tx_int; /* Desired min rx interval, local option */ u32 des_min_tx_new; /* Used for des_min_tx_int change */ @@ -162,6 +156,7 @@ struct bfd_session list request_list; /* List of client requests (struct bfd_request) */ btime last_state_change; /* Time of last state change */ + btime last_reqlist_check; /* Time of last check whether the request list is not empty */ u8 notify_running; /* 1 if notify hooks are running */ u8 rx_csn_known; /* Received crypto sequence number is known */ @@ -208,10 +203,6 @@ extern const char *bfd_state_names[]; extern const u8 bfd_auth_type_to_hash_alg[]; - -static inline void bfd_lock_sessions(struct bfd_proto *p) { pthread_spin_lock(&p->lock); } -static inline void bfd_unlock_sessions(struct bfd_proto *p) { pthread_spin_unlock(&p->lock); } - /* bfd.c */ struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id); struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr, uint ifindex); diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index df1cba42..ed5479fb 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -36,6 +36,7 @@ proto: bfd_proto ; bfd_proto_start: proto_start BFD { this_proto = proto_config_new(&proto_bfd, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); init_list(&BFD_CFG->patt_list); init_list(&BFD_CFG->neigh_list); BFD_CFG->accept_ipv4 = BFD_CFG->accept_ipv6 = 1; diff --git a/proto/bfd/io.c b/proto/bfd/io.c deleted file mode 100644 index 1cd9365a..00000000 --- a/proto/bfd/io.c +++ /dev/null @@ -1,535 +0,0 @@ -/* - * BIRD -- I/O and event loop - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <fcntl.h> -#include <poll.h> -#include <pthread.h> -#include <time.h> -#include <sys/time.h> - -#include "nest/bird.h" -#include "proto/bfd/io.h" - -#include "lib/buffer.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/timer.h" -#include "lib/socket.h" - - -struct birdloop -{ - pool *pool; - pthread_t thread; - pthread_mutex_t mutex; - - u8 stop_called; - u8 poll_active; - u8 wakeup_masked; - int wakeup_fds[2]; - - struct timeloop time; - list event_list; - list sock_list; - uint sock_num; - - BUFFER(sock *) poll_sk; - BUFFER(struct pollfd) poll_fd; - u8 poll_changed; - u8 close_scheduled; -}; - - -/* - * Current thread context - */ - -static pthread_key_t current_loop_key; -extern pthread_key_t current_time_key; - -static inline struct birdloop * -birdloop_current(void) -{ - return pthread_getspecific(current_loop_key); -} - -static inline void -birdloop_set_current(struct birdloop *loop) -{ - pthread_setspecific(current_loop_key, loop); - pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop); -} - -static inline void -birdloop_init_current(void) -{ - pthread_key_create(¤t_loop_key, NULL); -} - - -/* - * Wakeup code for birdloop - */ - -static void -pipe_new(int *pfds) -{ - int rv = pipe(pfds); - if (rv < 0) - die("pipe: %m"); - - if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0) - die("fcntl(O_NONBLOCK): %m"); - - if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0) - die("fcntl(O_NONBLOCK): %m"); -} - -void -pipe_drain(int fd) -{ - char buf[64]; - int rv; - - try: - rv = read(fd, buf, 64); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) - return; - die("wakeup read: %m"); - } - if (rv == 64) - goto try; -} - -void -pipe_kick(int fd) -{ - u64 v = 1; - int rv; - - try: - rv = write(fd, &v, sizeof(u64)); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) - return; - die("wakeup write: %m"); - } -} - -static inline void -wakeup_init(struct birdloop *loop) -{ - pipe_new(loop->wakeup_fds); -} - -static inline void -wakeup_drain(struct birdloop *loop) -{ - pipe_drain(loop->wakeup_fds[0]); -} - -static inline void -wakeup_do_kick(struct birdloop *loop) -{ - pipe_kick(loop->wakeup_fds[1]); -} - -static inline void -wakeup_kick(struct birdloop *loop) -{ - if (!loop->wakeup_masked) - wakeup_do_kick(loop); - else - loop->wakeup_masked = 2; -} - -/* For notifications from outside */ -void -wakeup_kick_current(void) -{ - struct birdloop *loop = birdloop_current(); - - if (loop && loop->poll_active) - wakeup_kick(loop); -} - - -/* - * Events - */ - -static inline uint -events_waiting(struct birdloop *loop) -{ - return !EMPTY_LIST(loop->event_list); -} - -static inline void -events_init(struct birdloop *loop) -{ - init_list(&loop->event_list); -} - -static void -events_fire(struct birdloop *loop) -{ - times_update(&loop->time); - ev_run_list(&loop->event_list); -} - -void -ev2_schedule(event *e) -{ - struct birdloop *loop = birdloop_current(); - - if (loop->poll_active && EMPTY_LIST(loop->event_list)) - wakeup_kick(loop); - - if (e->n.next) - rem_node(&e->n); - - add_tail(&loop->event_list, &e->n); -} - - -/* - * Sockets - */ - -static void -sockets_init(struct birdloop *loop) -{ - init_list(&loop->sock_list); - loop->sock_num = 0; - - BUFFER_INIT(loop->poll_sk, loop->pool, 4); - BUFFER_INIT(loop->poll_fd, loop->pool, 4); - loop->poll_changed = 1; /* add wakeup fd */ -} - -static void -sockets_add(struct birdloop *loop, sock *s) -{ - add_tail(&loop->sock_list, &s->n); - loop->sock_num++; - - s->index = -1; - loop->poll_changed = 1; - - if (loop->poll_active) - wakeup_kick(loop); -} - -void -sk_start(sock *s) -{ - struct birdloop *loop = birdloop_current(); - - sockets_add(loop, s); -} - -static void -sockets_remove(struct birdloop *loop, sock *s) -{ - rem_node(&s->n); - loop->sock_num--; - - if (s->index >= 0) - loop->poll_sk.data[s->index] = NULL; - - s->index = -1; - loop->poll_changed = 1; - - /* Wakeup moved to sk_stop() */ -} - -void -sk_stop(sock *s) -{ - struct birdloop *loop = birdloop_current(); - - sockets_remove(loop, s); - - if (loop->poll_active) - { - loop->close_scheduled = 1; - wakeup_kick(loop); - } - else - close(s->fd); - - s->fd = -1; -} - -static inline uint sk_want_events(sock *s) -{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } - -/* -FIXME: this should be called from sock code - -static void -sockets_update(struct birdloop *loop, sock *s) -{ - if (s->index >= 0) - loop->poll_fd.data[s->index].events = sk_want_events(s); -} -*/ - -static void -sockets_prepare(struct birdloop *loop) -{ - BUFFER_SET(loop->poll_sk, loop->sock_num + 1); - BUFFER_SET(loop->poll_fd, loop->sock_num + 1); - - struct pollfd *pfd = loop->poll_fd.data; - sock **psk = loop->poll_sk.data; - uint i = 0; - node *n; - - WALK_LIST(n, loop->sock_list) - { - sock *s = SKIP_BACK(sock, n, n); - - ASSERT(i < loop->sock_num); - - s->index = i; - *psk = s; - pfd->fd = s->fd; - pfd->events = sk_want_events(s); - pfd->revents = 0; - - pfd++; - psk++; - i++; - } - - ASSERT(i == loop->sock_num); - - /* Add internal wakeup fd */ - *psk = NULL; - pfd->fd = loop->wakeup_fds[0]; - pfd->events = POLLIN; - pfd->revents = 0; - - loop->poll_changed = 0; -} - -static void -sockets_close_fds(struct birdloop *loop) -{ - struct pollfd *pfd = loop->poll_fd.data; - sock **psk = loop->poll_sk.data; - int poll_num = loop->poll_fd.used - 1; - - int i; - for (i = 0; i < poll_num; i++) - if (psk[i] == NULL) - close(pfd[i].fd); - - loop->close_scheduled = 0; -} - -int sk_read(sock *s, int revents); -int sk_write(sock *s); - -static void -sockets_fire(struct birdloop *loop) -{ - struct pollfd *pfd = loop->poll_fd.data; - sock **psk = loop->poll_sk.data; - int poll_num = loop->poll_fd.used - 1; - - times_update(&loop->time); - - /* Last fd is internal wakeup fd */ - if (pfd[poll_num].revents & POLLIN) - wakeup_drain(loop); - - int i; - for (i = 0; i < poll_num; pfd++, psk++, i++) - { - int e = 1; - - if (! pfd->revents) - continue; - - if (pfd->revents & POLLNVAL) - die("poll: invalid fd %d", pfd->fd); - - if (pfd->revents & POLLIN) - while (e && *psk && (*psk)->rx_hook) - e = sk_read(*psk, 0); - - e = 1; - if (pfd->revents & POLLOUT) - while (e && *psk) - e = sk_write(*psk); - } -} - - -/* - * Birdloop - */ - -static void * birdloop_main(void *arg); - -struct birdloop * -birdloop_new(void) -{ - /* FIXME: this init should be elsewhere and thread-safe */ - static int init = 0; - if (!init) - { birdloop_init_current(); init = 1; } - - pool *p = rp_new(NULL, "Birdloop root"); - struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); - loop->pool = p; - pthread_mutex_init(&loop->mutex, NULL); - - wakeup_init(loop); - - events_init(loop); - timers_init(&loop->time, p); - sockets_init(loop); - - return loop; -} - -void -birdloop_start(struct birdloop *loop) -{ - int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop); - if (rv) - die("pthread_create(): %M", rv); -} - -void -birdloop_stop(struct birdloop *loop) -{ - pthread_mutex_lock(&loop->mutex); - loop->stop_called = 1; - wakeup_do_kick(loop); - pthread_mutex_unlock(&loop->mutex); - - int rv = pthread_join(loop->thread, NULL); - if (rv) - die("pthread_join(): %M", rv); -} - -void -birdloop_free(struct birdloop *loop) -{ - rfree(loop->pool); -} - - -void -birdloop_enter(struct birdloop *loop) -{ - /* TODO: these functions could save and restore old context */ - pthread_mutex_lock(&loop->mutex); - birdloop_set_current(loop); -} - -void -birdloop_leave(struct birdloop *loop) -{ - /* TODO: these functions could save and restore old context */ - birdloop_set_current(NULL); - pthread_mutex_unlock(&loop->mutex); -} - -void -birdloop_mask_wakeups(struct birdloop *loop) -{ - pthread_mutex_lock(&loop->mutex); - loop->wakeup_masked = 1; - pthread_mutex_unlock(&loop->mutex); -} - -void -birdloop_unmask_wakeups(struct birdloop *loop) -{ - pthread_mutex_lock(&loop->mutex); - if (loop->wakeup_masked == 2) - wakeup_do_kick(loop); - loop->wakeup_masked = 0; - pthread_mutex_unlock(&loop->mutex); -} - -static void * -birdloop_main(void *arg) -{ - struct birdloop *loop = arg; - timer *t; - int rv, timeout; - - birdloop_set_current(loop); - - pthread_mutex_lock(&loop->mutex); - while (1) - { - events_fire(loop); - timers_fire(&loop->time); - - times_update(&loop->time); - if (events_waiting(loop)) - timeout = 0; - else if (t = timers_first(&loop->time)) - timeout = (tm_remains(t) TO_MS) + 1; - else - timeout = -1; - - if (loop->poll_changed) - sockets_prepare(loop); - - loop->poll_active = 1; - pthread_mutex_unlock(&loop->mutex); - - try: - rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout); - if (rv < 0) - { - if (errno == EINTR || errno == EAGAIN) - goto try; - die("poll: %m"); - } - - pthread_mutex_lock(&loop->mutex); - loop->poll_active = 0; - - if (loop->close_scheduled) - sockets_close_fds(loop); - - if (loop->stop_called) - break; - - if (rv) - sockets_fire(loop); - - timers_fire(&loop->time); - } - - loop->stop_called = 0; - pthread_mutex_unlock(&loop->mutex); - - return NULL; -} - - diff --git a/proto/bfd/io.h b/proto/bfd/io.h deleted file mode 100644 index ec706e9a..00000000 --- a/proto/bfd/io.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * BIRD -- I/O and event loop - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_BFD_IO_H_ -#define _BIRD_BFD_IO_H_ - -#include "nest/bird.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/timer.h" -#include "lib/socket.h" - - -void ev2_schedule(event *e); - -void sk_start(sock *s); -void sk_stop(sock *s); - -struct birdloop *birdloop_new(void); -void birdloop_start(struct birdloop *loop); -void birdloop_stop(struct birdloop *loop); -void birdloop_free(struct birdloop *loop); - -void birdloop_enter(struct birdloop *loop); -void birdloop_leave(struct birdloop *loop); -void birdloop_mask_wakeups(struct birdloop *loop); -void birdloop_unmask_wakeups(struct birdloop *loop); - - -#endif /* _BIRD_BFD_IO_H_ */ diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 7618e20f..893d582d 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -290,9 +290,11 @@ bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) if (!sk) return; + struct bfd_session_state loc = BFD_LOC_STATE(s); + pkt = (struct bfd_ctl_packet *) sk->tbuf; - pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag); - pkt->flags = bfd_pack_flags(s->loc_state, 0); + pkt->vdiag = bfd_pack_vdiag(1, loc.diag); + pkt->flags = bfd_pack_flags(loc.state, 0); pkt->detect_mult = s->detect_mult; pkt->length = BFD_BASE_LEN; pkt->snd_id = htonl(s->loc_id); @@ -313,7 +315,7 @@ bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) log(L_WARN "%s: Old packet overwritten in TX buffer", p->p.name); TRACE(D_PACKETS, "Sending CTL to %I [%s%s]", s->addr, - bfd_state_names[s->loc_state], bfd_format_flags(pkt->flags, fb)); + bfd_state_names[loc.state], bfd_format_flags(pkt->flags, fb)); sk_send_to(sk, pkt->length, s->addr, sk->dport); } @@ -382,16 +384,17 @@ bfd_rx_hook(sock *sk, uint len) u32 old_rx_int = s->rem_min_rx_int; s->rem_id= ntohl(pkt->snd_id); - s->rem_state = bfd_pkt_get_state(pkt); - s->rem_diag = bfd_pkt_get_diag(pkt); + s->rem.state = bfd_pkt_get_state(pkt); + s->rem.diag = bfd_pkt_get_diag(pkt); s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND; s->rem_min_tx_int = ntohl(pkt->des_min_tx_int); s->rem_min_rx_int = ntohl(pkt->req_min_rx_int); s->rem_detect_mult = pkt->detect_mult; TRACE(D_PACKETS, "CTL received from %I [%s%s]", sk->faddr, - bfd_state_names[s->rem_state], bfd_format_flags(pkt->flags, fb)); + bfd_state_names[s->rem.state], bfd_format_flags(pkt->flags, fb)); + /* This call may drop the session, must be called in tail position */ bfd_session_process_ctl(s, pkt->flags, old_tx_int, old_rx_int); return 1; @@ -410,7 +413,7 @@ bfd_err_hook(sock *sk, int err) sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) { - sock *sk = sk_new(p->tpool); + sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; sk->subtype = af; sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; @@ -425,6 +428,7 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) sk->tos = IP_PREC_INTERNET_CONTROL; sk->priority = sk_priority_control; sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0); + sk->loop = p->p.loop; if (sk_open(sk) < 0) goto err; @@ -441,7 +445,7 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { - sock *sk = sk_new(p->tpool); + sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; sk->saddr = local; sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; @@ -457,6 +461,7 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) sk->priority = sk_priority_control; sk->ttl = ifa ? 255 : -1; sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT; + sk->loop = p->p.loop; if (sk_open(sk) < 0) goto err; diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 24ba00ba..02b07410 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -333,26 +333,26 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) } int -bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad) +bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) { - eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); - if (!a) + eattr *ea = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); + if (!ea) return 0; - const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC); + const byte *b = bgp_aigp_get_tlv(ea->u.ptr, BGP_AIGP_METRIC); if (!b) return 0; u64 aigp = get_u64(b + 3); - u64 step = e->attrs->igp_metric; + u64 step = a->igp_metric; - if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN)) + if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN)) step = BGP_AIGP_MAX; if (!step) step = 1; - *ad = a->u.ptr; + *ad = ea->u.ptr; *metric = aigp + step; if (*metric < aigp) *metric = BGP_AIGP_MAX; @@ -371,6 +371,13 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) return *metric < IGP_METRIC_UNKNOWN; } +u32 +bgp_rte_igp_metric(struct rte *rt) +{ + u64 metric = bgp_total_aigp_metric(rt->attrs); + return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); +} + /* * Attribute hooks @@ -896,7 +903,7 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla static void bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) { - net_addr *n = s->route->net->n.addr; + const net_addr *n = s->route->net; u32 *labels = (u32 *) a->u.ptr->data; uint lnum = a->u.ptr->length / 4; @@ -1617,7 +1624,7 @@ bgp_free_prefix_table(struct bgp_channel *c) } static struct bgp_prefix * -bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id) +bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { u32 hash = net_hash(net) ^ u32_hash(path_id); struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); @@ -1661,12 +1668,10 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) */ int -bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +bgp_preexport(struct channel *c, rte *e) { - rte *e = *new; - struct proto *SRC = e->attrs->src->proto; - struct bgp_proto *p = (struct bgp_proto *) P; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; + struct bgp_proto *p = (struct bgp_proto *) (c->proto); + struct bgp_proto *src = bgp_rte_proto(e); /* Reject our routes */ if (src == p) @@ -1690,11 +1695,11 @@ bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) } /* Handle well-known communities, RFC 1997 */ - struct eattr *c; + struct eattr *com; if (p->cf->interpret_communities && - (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)))) + (com = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)))) { - const struct adata *d = c->u.ptr; + const struct adata *d = com->u.ptr; /* Do not export anywhere */ if (int_set_contains(d, BGP_COMM_NO_ADVERTISE)) @@ -1719,8 +1724,7 @@ bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) static ea_list * bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool) { - struct proto *SRC = e->attrs->src->proto; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; + struct bgp_proto *src = bgp_rte_proto(e); struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; ea_list *attrs = attrs0; eattr *a; @@ -1774,7 +1778,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* AIGP attribute - accumulate local metric or originate new one */ u64 metric; if (s.local_next_hop && - (bgp_total_aigp_metric_(e, &metric, &ad) || + (bgp_total_aigp_metric_(e->attrs, &metric, &ad) || (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad)))) { ad = bgp_aigp_set_metric(pool, ad, metric); @@ -1833,7 +1837,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at } void -bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) +bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old) { struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; @@ -1843,21 +1847,19 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) if (new) { - struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2); + struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, C->rte_update_pool); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); - path = new->attrs->src->global_id; - - lp_flush(bgp_linpool2); + path = new->src->global_id; } else { buck = bgp_get_withdraw_bucket(c); - path = old->attrs->src->global_id; + path = old->src->global_id; } - px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0); + px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0); add_tail(&buck->prefixes, &px->buck_node); bgp_schedule_packet(p->conn, c, PKT_UPDATE); @@ -1874,42 +1876,52 @@ bgp_get_neighbor(rte *r) return as; /* If AS_PATH is not defined, we treat rte as locally originated */ - struct bgp_proto *p = (void *) r->attrs->src->proto; + struct bgp_proto *p = bgp_rte_proto(r); return p->cf->confederation ?: p->local_as; } static inline int rte_stale(rte *r) { - if (r->u.bgp.stale < 0) + if (r->pflags & BGP_REF_STALE) + return 1; + + if (r->pflags & BGP_REF_NOT_STALE) + return 0; + + /* If staleness is unknown, compute and cache it */ + eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE)) { - /* If staleness is unknown, compute and cache it */ - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); - r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE); + r->pflags |= BGP_REF_STALE; + return 1; + } + else + { + r->pflags |= BGP_REF_NOT_STALE; + return 0; } - - return r->u.bgp.stale; } int bgp_rte_better(rte *new, rte *old) { - struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto; - struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto; + struct bgp_proto *new_bgp = bgp_rte_proto(new); + struct bgp_proto *old_bgp = bgp_rte_proto(old); eattr *x, *y; u32 n, o; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - n = new->u.bgp.suppressed; - o = old->u.bgp.suppressed; + n = new->pflags & BGP_REF_SUPPRESSED; + o = old->pflags & BGP_REF_SUPPRESSED; if (n > o) return 0; if (n < o) return 1; /* RFC 4271 9.1.2.1. Route resolvability test */ - n = rte_resolvable(new); - o = rte_resolvable(old); + n = rta_resolvable(new->attrs); + o = rta_resolvable(old->attrs); if (n > o) return 1; if (n < o) @@ -1934,8 +1946,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 7311 4.1 - Apply AIGP metric */ - u64 n2 = bgp_total_aigp_metric(new); - u64 o2 = bgp_total_aigp_metric(old); + u64 n2 = bgp_total_aigp_metric(new->attrs); + u64 o2 = bgp_total_aigp_metric(old->attrs); if (n2 < o2) return 1; if (n2 > o2) @@ -2039,21 +2051,18 @@ bgp_rte_better(rte *new, rte *old) int bgp_rte_mergable(rte *pri, rte *sec) { - struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto; - struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto; + struct bgp_proto *pri_bgp = bgp_rte_proto(pri); + struct bgp_proto *sec_bgp = bgp_rte_proto(sec); eattr *x, *y; u32 p, s; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - if (pri->u.bgp.suppressed != sec->u.bgp.suppressed) + /* LLGR draft - depreference stale routes */ + if (pri->pflags != sec->pflags) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ - if (rte_resolvable(pri) != rte_resolvable(sec)) - return 0; - - /* LLGR draft - depreference stale routes */ - if (rte_stale(pri) != rte_stale(sec)) + if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs)) return 0; /* Start with local preferences */ @@ -2118,24 +2127,23 @@ bgp_rte_mergable(rte *pri, rte *sec) static inline int same_group(rte *r, u32 lpref, u32 lasn) { - return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn); + return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn); } static inline int -use_deterministic_med(rte *r) +use_deterministic_med(struct rte_storage *r) { - struct proto *P = r->attrs->src->proto; - return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; + struct bgp_proto *p = bgp_rte_proto(&r->rte); + return p && p->cf->deterministic_med; } int -bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) +bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best) { - rte *r, *s; rte *key = new ? new : old; - u32 lpref = key->pref; + u32 lpref = key->attrs->pref; u32 lasn = bgp_get_neighbor(key); - int old_suppressed = old ? old->u.bgp.suppressed : 0; + int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0; /* * Proper RFC 4271 path selection is a bit complicated, it cannot be @@ -2187,11 +2195,11 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) */ if (new) - new->u.bgp.suppressed = 1; + new->pflags |= BGP_REF_SUPPRESSED; if (old) { - old->u.bgp.suppressed = 1; + old->pflags |= BGP_REF_SUPPRESSED; /* The fast case - replace not best with worse (or remove not best) */ if (old_suppressed && !(new && bgp_rte_better(new, old))) @@ -2199,13 +2207,13 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) } /* The default case - find a new best-in-group route */ - r = new; /* new may not be in the list */ - for (s=net->routes; rte_is_valid(s); s=s->next) - if (use_deterministic_med(s) && same_group(s, lpref, lasn)) + rte *r = new; /* new may not be in the list */ + for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) { - s->u.bgp.suppressed = 1; - if (!r || bgp_rte_better(s, r)) - r = s; + s->rte.pflags |= BGP_REF_SUPPRESSED; + if (!r || bgp_rte_better(&s->rte, r)) + r = &s->rte; } /* Simple case - the last route in group disappears */ @@ -2214,16 +2222,16 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) /* Found if new is mergable with best-in-group */ if (new && (new != r) && bgp_rte_mergable(r, new)) - new->u.bgp.suppressed = 0; + new->pflags &= ~BGP_REF_SUPPRESSED; /* Found all existing routes mergable with best-in-group */ - for (s=net->routes; rte_is_valid(s); s=s->next) - if (use_deterministic_med(s) && same_group(s, lpref, lasn)) - if ((s != r) && bgp_rte_mergable(r, s)) - s->u.bgp.suppressed = 0; + for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) + if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte)) + s->rte.pflags &= ~BGP_REF_SUPPRESSED; /* Found best-in-group */ - r->u.bgp.suppressed = 0; + r->pflags &= ~BGP_REF_SUPPRESSED; /* * There are generally two reasons why we have to force @@ -2255,25 +2263,44 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return !old_suppressed; } -struct rte * -bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +void +bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); - const struct adata *ad = a ? a->u.ptr : NULL; - uint flags = a ? a->flags : BAF_PARTIAL; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); - if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) - return NULL; + do { + rte *r = feed[--count]; + if (r->sender != c->c.in_req.hook) + continue; + + /* A new route, do not mark as stale */ + if (r->stale_cycle == c->c.in_req.hook->stale_set) + continue; + + eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + const struct adata *ad = ea ? ea->u.ptr : NULL; + uint flags = ea ? ea->flags : BAF_PARTIAL; + + rte e0 = *r; + e0.flags |= REF_USE_STALE; - if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) - return r; + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + rte_import(&c->c.in_req, n, NULL, r->src); - r = rte_cow_rta(r, pool); - bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags, - int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); - r->u.bgp.stale = 1; + else if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + rte_import(&c->c.in_req, n, &e0, r->src); - return r; + else { + rta *a = e0.attrs = rta_do_cow(r->attrs, c->c.rte_update_pool); + + bgp_set_attr_ptr(&(a->eattrs), c->c.rte_update_pool, BA_COMMUNITY, flags, + int_set_add(c->c.rte_update_pool, ad, BGP_COMM_LLGR_STALE)); + e0.pflags |= BGP_REF_STALE; + + rte_import(&c->c.in_req, n, &e0, r->src); + lp_flush(c->c.rte_update_pool); + } + } while (count); } @@ -2356,22 +2383,22 @@ bgp_get_route_info(rte *e, byte *buf) eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); u32 origas; - buf += bsprintf(buf, " (%d", e->pref); + buf += bsprintf(buf, " (%d", e->attrs->pref); - if (e->u.bgp.suppressed) + if (e->pflags & BGP_REF_SUPPRESSED) buf += bsprintf(buf, "-"); if (rte_stale(e)) buf += bsprintf(buf, "s"); - u64 metric = bgp_total_aigp_metric(e); + u64 metric = bgp_total_aigp_metric(e->attrs); if (metric < BGP_AIGP_MAX) { buf += bsprintf(buf, "/%lu", metric); } else if (e->attrs->igp_metric) { - if (!rte_resolvable(e)) + if (!rta_resolvable(e->attrs)) buf += bsprintf(buf, "/-"); else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) buf += bsprintf(buf, "/?"); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index e4d754b1..65cc3a40 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -124,11 +124,8 @@ #include "bgp.h" - -struct linpool *bgp_linpool; /* Global temporary pool */ -struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ -static list bgp_sockets; /* Global list of listening sockets */ - +/* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_sockets); static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); @@ -140,6 +137,15 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_graceful_restart_feed(struct bgp_channel *c); +static inline void channel_refresh_end_reload(struct channel *c) +{ + channel_refresh_end(c); + + if (c->in_table) + channel_request_reload(c); +} + /** * bgp_open - open a BGP instance * @p: BGP instance @@ -152,16 +158,14 @@ static void bgp_listen_sock_err(sock *sk UNUSED, int err); static int bgp_open(struct bgp_proto *p) { + ASSERT_DIE(birdloop_inside(&main_birdloop)); + struct bgp_socket *bs = NULL; struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : (p->ipv4 ? IPA_NONE4 : IPA_NONE6); uint port = p->cf->local_port; - /* FIXME: Add some global init? */ - if (!bgp_linpool) - init_list(&bgp_sockets); - /* We assume that cf->iface is defined iff cf->local_ip is link-local */ WALK_LIST(bs, bgp_sockets) @@ -180,7 +184,7 @@ bgp_open(struct bgp_proto *p) sk->sport = port; sk->iface = ifa; sk->vrf = p->p.vrf; - sk->flags = 0; + sk->flags = SKF_PASSIVE_THREAD; sk->tos = IP_PREC_INTERNET_CONTROL; sk->rbsize = BGP_RX_BUFFER_SIZE; sk->tbsize = BGP_TX_BUFFER_SIZE; @@ -198,12 +202,6 @@ bgp_open(struct bgp_proto *p) add_tail(&bgp_sockets, &bs->n); - if (!bgp_linpool) - { - bgp_linpool = lp_new_default(proto_pool); - bgp_linpool2 = lp_new_default(proto_pool); - } - return 0; err: @@ -222,6 +220,7 @@ err: static void bgp_close(struct bgp_proto *p) { + ASSERT_DIE(birdloop_inside(&main_birdloop)); struct bgp_socket *bs = p->sock; ASSERT(bs && bs->uc); @@ -232,15 +231,6 @@ bgp_close(struct bgp_proto *p) rfree(bs->sk); rem_node(&bs->n); mb_free(bs); - - if (!EMPTY_LIST(bgp_sockets)) - return; - - rfree(bgp_linpool); - bgp_linpool = NULL; - - rfree(bgp_linpool2); - bgp_linpool2 = NULL; } static inline int @@ -325,7 +315,7 @@ bgp_initiate(struct bgp_proto *p) { p->start_state = BSS_DELAY; BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay); - bgp_start_timer(p->startup_timer, p->startup_delay); + bgp_start_timer(p, p->startup_timer, p->startup_delay); } else bgp_startup(p); @@ -346,6 +336,7 @@ err1: /** * bgp_start_timer - start a BGP timer + * @p: bgp_proto which the timer belongs to * @t: timer * @value: time (in seconds) to fire (0 to disable the timer) * @@ -354,14 +345,16 @@ err1: * timers. */ void -bgp_start_timer(timer *t, uint value) +bgp_start_timer(struct bgp_proto *p, timer *t, uint value) { + BGP_ASSERT_INSIDE(p); + if (value) { /* The randomization procedure is specified in RFC 4271 section 10 */ btime time = value S; btime randomize = random() % ((time / 4) + 1); - tm_start(t, time - randomize); + tm_start_in(t, time - randomize, p->p.loop); } else tm_stop(t); @@ -377,7 +370,7 @@ bgp_start_timer(timer *t, uint value) void bgp_close_conn(struct bgp_conn *conn) { - // struct bgp_proto *p = conn->bgp; + BGP_ASSERT_INSIDE(conn->bgp); DBG("BGP: Closing connection\n"); conn->packets_to_send = 0; @@ -469,6 +462,8 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len static void bgp_down(struct bgp_proto *p) { + bgp_start_timer(p, p->startup_timer, 0); + if (p->start_state > BSS_PREPARE) { bgp_setup_auth(p, 0); @@ -482,21 +477,34 @@ bgp_down(struct bgp_proto *p) } static void -bgp_decision(void *vp) +bgp_active_event(void *vp) { struct bgp_proto *p = vp; - DBG("BGP: Decision start\n"); + BGP_ASSERT_INSIDE(p); + + DBG("%s: Decision start\n", p->p.name); if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state != BS_OPENCONFIRM) && !p->passive) bgp_active(p); +} + +static void +bgp_down_event(void *vp) +{ + struct bgp_proto *p = vp; + BGP_ENTER(p); + + DBG("%s: Down event\n", p->p.name); if ((p->p.proto_state == PS_STOP) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state == BS_IDLE)) bgp_down(p); + + BGP_LEAVE(p); } static struct bgp_proto * @@ -528,7 +536,7 @@ bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) proto_notify_state(&p->p, PS_STOP); bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len); - ev_schedule(p->event); + ev_send_loop(&main_birdloop, p->down_event); } static inline void @@ -575,6 +583,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->link_addr = p->neigh->iface->llv6->ip; conn->sk->fast_rx = 0; + conn->sk->cork = &rt_cork; p->conn = conn; p->last_error_class = 0; @@ -719,7 +728,7 @@ bgp_conn_enter_close_state(struct bgp_conn *conn) conn->sk->rx_hook = NULL; /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */ - bgp_start_timer(conn->hold_timer, 10); + bgp_start_timer(p, conn->hold_timer, 10); if (os == BS_ESTABLISHED) bgp_conn_leave_established_state(p); @@ -733,7 +742,8 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn) bgp_close_conn(conn); bgp_conn_set_state(conn, BS_IDLE); - ev_schedule(p->event); + ev_send_loop(p->p.loop, p->active_event); + ev_send_loop(&main_birdloop, p->down_event); if (os == BS_ESTABLISHED) bgp_conn_leave_established_state(p); @@ -775,25 +785,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c); + channel_refresh_begin(&c->c); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c); - rt_refresh_begin(c->c.table, &c->c); + channel_refresh_end(&c->c); + channel_refresh_begin(&c->c); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c); - rt_modify_stale(c->c.table, &c->c); + channel_refresh_begin(&c->c); + bgp_graceful_restart_feed(c); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c); - rt_refresh_end(c->c.table, &c->c); + channel_refresh_begin(&c->c); + channel_refresh_end(&c->c); } /* Reset bucket and prefix tables */ @@ -808,9 +818,54 @@ bgp_handle_graceful_restart(struct bgp_proto *p) ASSERT(p->gr_active_num > 0); proto_notify_state(&p->p, PS_START); - tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); + tm_start_in(p->gr_timer, p->conn->remote_caps->gr_time S, p->p.loop); +} + +static void +bgp_graceful_restart_feed_done(struct rt_export_request *req) +{ + req->hook = NULL; } +static void +bgp_graceful_restart_feed_dump_req(struct rt_export_request *req) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req); +} + +static void +bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct bgp_proto *p = (void *) c->c.proto; + BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state)); + + if (state == TES_READY) + rt_stop_export(req, bgp_graceful_restart_feed_done); +} + +static void +bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED) +{ /* Nothing to do */ } + +static void +bgp_graceful_restart_feed(struct bgp_channel *c) +{ + c->stale_feed = (struct rt_export_request) { + .name = "BGP-GR", + .list = &global_work_list, + .trace_routes = c->c.debug | c->c.proto->debug, + .dump_req = bgp_graceful_restart_feed_dump_req, + .log_state_change = bgp_graceful_restart_feed_log_state_change, + .export_bulk = bgp_rte_modify_stale, + .export_one = bgp_graceful_restart_drop_export, + }; + + rt_request_export(c->c.table, &c->stale_feed); +} + + /** * bgp_graceful_restart_done - finish active BGP graceful restart * @c: BGP channel @@ -833,8 +888,11 @@ bgp_graceful_restart_done(struct bgp_channel *c) if (!p->gr_active_num) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); + if (c->stale_feed.hook) + rt_stop_export(&c->stale_feed, bgp_graceful_restart_feed_done); + tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c); + channel_refresh_end_reload(&c->c); } /** @@ -875,8 +933,8 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; - tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c); + tm_start_in(c->stale_timer, c->stale_time S, p->p.loop); + bgp_graceful_restart_feed(c); } } else @@ -913,11 +971,11 @@ bgp_refresh_begin(struct bgp_channel *c) if (c->load_state == BFS_LOADING) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } - c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c); + if (c->load_state == BFS_REFRESHING) + channel_refresh_end(&c->c); - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c); + c->load_state = BFS_REFRESHING; + channel_refresh_begin(&c->c); } /** @@ -938,10 +996,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); + channel_refresh_end_reload(&c->c); } @@ -955,7 +1010,7 @@ bgp_send_open(struct bgp_conn *conn) bgp_prepare_capabilities(conn); bgp_schedule_packet(conn, NULL, PKT_OPEN); bgp_conn_set_state(conn, BS_OPENSENT); - bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); + bgp_start_timer(conn->bgp, conn->hold_timer, conn->bgp->cf->initial_hold_time); } static void @@ -1032,7 +1087,7 @@ bgp_hold_timeout(timer *t) and perhaps just not processed BGP packets in time. */ if (sk_rx_ready(conn->sk) > 0) - bgp_start_timer(conn->hold_timer, 10); + bgp_start_timer(p, conn->hold_timer, 10); else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready) { BGP_TRACE(D_EVENTS, "Hold timer expired"); @@ -1077,10 +1132,12 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) static void bgp_setup_sk(struct bgp_conn *conn, sock *s) { + ASSERT_DIE(s->flags & SKF_THREAD); s->data = conn; s->err_hook = bgp_sock_err; s->fast_rx = 1; conn->sk = s; + sk_start(s); } static void @@ -1089,10 +1146,12 @@ bgp_active(struct bgp_proto *p) int delay = MAX(1, p->cf->connect_delay_time); struct bgp_conn *conn = &p->outgoing_conn; + BGP_ASSERT_INSIDE(p); + BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); bgp_setup_conn(p, conn); bgp_conn_set_state(conn, BS_ACTIVE); - bgp_start_timer(conn->connect_timer, delay); + bgp_start_timer(p, conn->connect_timer, delay); } /** @@ -1109,9 +1168,12 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c struct bgp_conn *conn = &p->outgoing_conn; int hops = p->cf->multihop ? : 1; + BGP_ASSERT_INSIDE(p); + DBG("BGP: Connecting\n"); sock *s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; + s->flags |= SKF_THREAD; s->saddr = p->local_ip; s->daddr = p->remote_ip; s->dport = p->cf->remote_port; @@ -1139,7 +1201,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c goto err; DBG("BGP: Waiting for connect success\n"); - bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time); + bgp_start_timer(p, conn->connect_timer, p->cf->connect_retry_time); return; err: @@ -1211,6 +1273,18 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) return 0; } + if (p->p.loop == &main_birdloop) + { + /* Protocol is down for whatever reason. No need for locking. */ + BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) rejected (protocol is down)", + sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, + sk->dport); + rfree(sk); + return 0; + } + + BGP_ENTER(p); + /* * BIRD should keep multiple incoming connections in OpenSent state (for * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming @@ -1240,6 +1314,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) if (!acc) { rfree(sk); + BGP_LEAVE(p); return 0; } @@ -1265,6 +1340,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) p = bgp_spawn(p, sk->daddr); p->postponed_sk = sk; rmove(sk, p->p.pool); + BGP_LEAVE(p); return 0; } @@ -1272,12 +1348,14 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); + BGP_LEAVE(p); return 0; err: sk_log_error(sk, p->p.name); log(L_ERR "%s: Incoming connection aborted", p->p.name); rfree(sk); + BGP_LEAVE(p); return 0; } @@ -1312,10 +1390,9 @@ bgp_neigh_notify(neighbor *n) struct bgp_proto *p = (struct bgp_proto *) n->proto; int ps = p->p.proto_state; - if (n != p->neigh) - return; + BGP_ASSERT_INSIDE(p); - if ((ps == PS_DOWN) || (ps == PS_STOP)) + if ((n != p->neigh) || (ps == PS_DOWN) || (ps == PS_STOP)) return; int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE); @@ -1393,7 +1470,7 @@ bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd) if (bfd && !p->bfd_req && !bgp_is_dynamic(p)) p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, p->cf->multihop ? NULL : p->neigh->iface, - p->p.vrf, bgp_bfd_notify, p, bfd); + p->p.vrf, bgp_bfd_notify, p, birdloop_event_list(p->p.loop), bfd); if (!bfd && p->bfd_req) { @@ -1408,12 +1485,9 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); + ASSERT(p->conn && (p->route_refresh)); - if (c->c.in_table) - channel_schedule_reload(C); - else - bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); + bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); } static void @@ -1474,9 +1548,12 @@ bgp_start_locked(struct object_lock *lock) struct bgp_proto *p = lock->data; const struct bgp_config *cf = p->cf; + BGP_ENTER(p); + if (p->p.proto_state != PS_START) { DBG("BGP: Got lock in different state %d\n", p->p.proto_state); + BGP_LEAVE(p); return; } @@ -1486,10 +1563,11 @@ bgp_start_locked(struct object_lock *lock) { /* Multi-hop sessions do not use neighbor entries */ bgp_initiate(p); + BGP_LEAVE(p); return; } - neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY); + neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY | NEF_NOTIFY_MAIN); if (!n) { log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface); @@ -1497,6 +1575,7 @@ bgp_start_locked(struct object_lock *lock) p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); proto_notify_state(&p->p, PS_DOWN); + BGP_LEAVE(p); return; } @@ -1508,6 +1587,8 @@ bgp_start_locked(struct object_lock *lock) BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name); else bgp_start_neighbor(p); + + BGP_LEAVE(p); } static int @@ -1546,10 +1627,13 @@ bgp_start(struct proto *P) p->stats.rx_bytes = p->stats.tx_bytes = 0; p->last_rx_update = 0; - p->event = ev_new_init(p->p.pool, bgp_decision, p); + p->active_event = ev_new_init(p->p.pool, bgp_active_event, p); + p->down_event = ev_new_init(p->p.pool, bgp_down_event, p); p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); + p->rx_lp = lp_new_default(p->p.pool); + p->local_id = proto_get_router_id(P->cf); if (p->rr_client) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; @@ -1677,6 +1761,13 @@ done: return p->p.proto_state; } +struct rte_owner_class bgp_rte_owner_class = { + .get_route_info = bgp_get_route_info, + .rte_better = bgp_rte_better, + .rte_mergable = bgp_rte_mergable, + .rte_igp_metric = bgp_rte_igp_metric, +}; + static struct proto * bgp_init(struct proto_config *CF) { @@ -1690,10 +1781,9 @@ bgp_init(struct proto_config *CF) P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; - P->rte_better = bgp_rte_better; - P->rte_mergable = bgp_rte_mergable; - P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; - P->rte_modify = bgp_rte_modify_stale; + + P->sources.class = &bgp_rte_owner_class; + P->sources.rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; p->cf = cf; p->is_internal = (cf->local_as == cf->remote_as); @@ -1745,17 +1835,19 @@ bgp_channel_start(struct channel *C) ip_addr src = p->local_ip; if (c->igp_table_ip4) - rt_lock_table(c->igp_table_ip4); + RT_LOCKED(c->igp_table_ip4, t) + rt_lock_table(t); if (c->igp_table_ip6) - rt_lock_table(c->igp_table_ip6); + RT_LOCKED(c->igp_table_ip6, t) + rt_lock_table(t); c->pool = p->p.pool; // XXXX bgp_init_bucket_table(c); bgp_init_prefix_table(c); if (c->cf->import_table) - channel_setup_in_table(C); + channel_setup_in_table(C, 0); if (c->cf->export_table) channel_setup_out_table(C); @@ -1829,10 +1921,12 @@ bgp_channel_cleanup(struct channel *C) struct bgp_channel *c = (void *) C; if (c->igp_table_ip4) - rt_unlock_table(c->igp_table_ip4); + RT_LOCKED(c->igp_table_ip4, t) + rt_unlock_table(t); if (c->igp_table_ip6) - rt_unlock_table(c->igp_table_ip6); + RT_LOCKED(c->igp_table_ip6, t) + rt_unlock_table(t); c->index = 0; @@ -2430,6 +2524,9 @@ bgp_show_proto_info(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; + if (p->p.proto_state != PS_DOWN) + BGP_ASSERT_INSIDE(p); + cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); if (bgp_is_dynamic(p) && p->cf->remote_range) @@ -2556,6 +2653,5 @@ struct protocol proto_bgp = { .copy_config = bgp_copy_config, .get_status = bgp_get_status, .get_attr = bgp_get_attr, - .get_route_info = bgp_get_route_info, .show_proto_info = bgp_show_proto_info }; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index cca4b448..d5ac3bd9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -200,6 +200,10 @@ struct bgp_channel_config { #define BGP_BFD_GRACEFUL 2 /* BFD down triggers graceful restart */ +/* rte->pflags */ +#define BGP_REF_SUPPRESSED 0x1 /* Used for deterministic MED comparison */ +#define BGP_REF_STALE 0x2 /* Route is LLGR_STATE */ +#define BGP_REF_NOT_STALE 0x4 /* Route is NOT LLGR_STATE */ struct bgp_af_caps { u32 afi; @@ -308,6 +312,7 @@ struct bgp_proto { struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ + struct linpool *rx_lp; /* Linpool for parsing received updates */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ struct bgp_socket *sock; /* Shared listening socket */ @@ -317,7 +322,8 @@ struct bgp_proto { btime last_established; /* Last time of enter/leave of established state */ btime last_rx_update; /* Last time of RX update */ ip_addr link_addr; /* Link-local version of local_ip */ - event *event; /* Event for respawning and shutting process */ + event *active_event; /* Event for respawning */ + event *down_event; /* Event to shut down */ timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ int dynamic_name_counter; /* Counter for dynamic BGP names */ @@ -362,6 +368,7 @@ struct bgp_channel { timer *stale_timer; /* Long-lived stale timer for LLGR */ u32 stale_time; /* Stored LLGR stale time from last session */ + struct rt_export_request stale_feed; /* Feeder request for stale route modification */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ @@ -489,11 +496,8 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode) longjmp(s->err_jmpbuf, 1); } -extern struct linpool *bgp_linpool; -extern struct linpool *bgp_linpool2; - -void bgp_start_timer(timer *t, uint value); +void bgp_start_timer(struct bgp_proto *p, timer *t, uint value); void bgp_check_config(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); void bgp_close_conn(struct bgp_conn *c); @@ -513,11 +517,12 @@ struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); static inline int -rte_resolvable(rte *rt) +rta_resolvable(rta *a) { - return rt->attrs->dest == RTD_UNICAST; + return a->dest == RTD_UNICAST; } +extern struct rte_owner_class bgp_rte_owner_class; #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 @@ -580,24 +585,31 @@ void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); -int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); -struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); -void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old); -int bgp_preexport(struct proto *, struct rte **, struct linpool *); +int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best); +void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint); +u32 bgp_rte_igp_metric(struct rte *); +void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); +int bgp_preexport(struct channel *, struct rte *); int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); -void bgp_get_route_info(struct rte *, byte *buf); -int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad); +void bgp_get_route_info(struct rte *, byte *); +int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad); + +static inline struct bgp_proto *bgp_rte_proto(struct rte *rte) +{ + return (rte->src->owner->class == &bgp_rte_owner_class) ? + SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL; +} #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) static inline u64 -bgp_total_aigp_metric(rte *r) +bgp_total_aigp_metric(rta *a) { u64 metric = BGP_AIGP_MAX; const struct adata *ad; - bgp_total_aigp_metric_(r, &metric, &ad); + bgp_total_aigp_metric_(a, &metric, &ad); return metric; } @@ -749,5 +761,10 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define ORIGIN_EGP 1 #define ORIGIN_INCOMPLETE 2 +/* Loop */ + +#define BGP_ENTER(bgp) birdloop_enter(bgp->p.loop) +#define BGP_LEAVE(bgp) birdloop_leave(bgp->p.loop) +#define BGP_ASSERT_INSIDE(bgp) ASSERT_DIE((bgp->p.loop != &main_birdloop) && birdloop_inside(bgp->p.loop)) #endif diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 2dfbdca9..8e42bbdb 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -48,6 +48,7 @@ proto: bgp_proto '}' ; bgp_proto_start: proto_start BGP { this_proto = proto_config_new(&proto_bgp, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); BGP_CFG->local_port = BGP_PORT; BGP_CFG->remote_port = BGP_PORT; BGP_CFG->multihop = -1; /* undefined */ diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 99b5d5b4..88b66040 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -914,7 +914,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session); bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE); - bgp_start_timer(conn->hold_timer, conn->hold_time); + bgp_start_timer(p, conn->hold_timer, conn->hold_time); bgp_conn_enter_openconfirm_state(conn); } @@ -971,7 +971,7 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); if (!s->mpls) - rta_apply_hostentry(a, s->hostentry, NULL); + rta_apply_hostentry(a, s->hostentry, NULL, s->pool); /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ } @@ -1005,7 +1005,7 @@ bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) ms.len = lnum; memcpy(ms.stack, labels, 4*lnum); - rta_apply_hostentry(a, s->hostentry, &ms); + rta_apply_hostentry(a, s->hostentry, &ms, s->pool); } } @@ -1339,6 +1339,8 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) { if (path_id != s->last_id) { + rt_unlock_source(s->last_src); + s->last_src = rt_get_source(&s->proto->p, path_id); s->last_id = path_id; @@ -1349,28 +1351,25 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) if (!a0) { /* Route withdraw */ - rte_update3(&s->channel->c, n, NULL, s->last_src); + rte_update(&s->channel->c, n, NULL, s->last_src); return; } /* Prepare cached route attributes */ if (s->cached_rta == NULL) { - a0->src = s->last_src; - /* Workaround for rta_lookup() breaking eattrs */ ea_list *ea = a0->eattrs; s->cached_rta = rta_lookup(a0); a0->eattrs = ea; } - rta *a = rta_clone(s->cached_rta); - rte *e = rte_get_temp(a); + rte e0 = { + .attrs = s->cached_rta, + .src = s->last_src, + }; - e->pflags = 0; - e->u.bgp.suppressed = 0; - e->u.bgp.stale = -1; - rte_update3(&s->channel->c, n, e, s->last_src); + rte_update(&s->channel->c, n, &e0, s->last_src); } static void @@ -2296,7 +2295,7 @@ again: ; struct bgp_write_state s = { .proto = p, .channel = c, - .pool = bgp_linpool, + .pool = c->c.rte_update_pool, .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, .add_path = c->add_path_tx, @@ -2424,6 +2423,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis s->last_id = 0; s->last_src = s->proto->p.main_source; + rt_lock_source(s->last_src); /* * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not @@ -2440,6 +2440,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis a->scope = SCOPE_UNIVERSE; a->from = s->proto->remote_ip; a->eattrs = ea; + a->pref = c->c.preference; c->desc->decode_next_hop(s, nh, nh_len, a); bgp_finish_attrs(s, a); @@ -2453,6 +2454,8 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis rta_free(s->cached_rta); s->cached_rta = NULL; + + rt_unlock_source(s->last_src); } static void @@ -2472,12 +2475,12 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) if (conn->state != BS_ESTABLISHED) { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } - bgp_start_timer(conn->hold_timer, conn->hold_time); + bgp_start_timer(p, conn->hold_timer, conn->hold_time); /* Initialize parse state */ struct bgp_parse_state s = { .proto = p, - .pool = bgp_linpool, + .pool = p->rx_lp, .as4_session = p->as4_session, }; @@ -2808,7 +2811,7 @@ bgp_fire_tx(struct bgp_conn *conn) { conn->packets_to_send &= ~(1 << PKT_KEEPALIVE); BGP_TRACE(D_PACKETS, "Sending KEEPALIVE"); - bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); + bgp_start_timer(p, conn->keepalive_timer, conn->keepalive_time); return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH); } else while (conn->channels_to_send) @@ -2893,7 +2896,7 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) conn->packets_to_send |= 1 << type; if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev)) - ev_schedule(conn->tx_ev); + ev_send_loop(conn->bgp->p.loop, conn->tx_ev); } void bgp_kick_tx(void *vconn) @@ -2906,7 +2909,7 @@ bgp_kick_tx(void *vconn) ; if (!max && !ev_active(conn->tx_ev)) - ev_schedule(conn->tx_ev); + ev_send_loop(conn->bgp->p.loop, conn->tx_ev); } void @@ -2920,7 +2923,7 @@ bgp_tx(sock *sk) ; if (!max && !ev_active(conn->tx_ev)) - ev_schedule(conn->tx_ev); + ev_send_loop(conn->bgp->p.loop, conn->tx_ev); } @@ -3102,7 +3105,7 @@ bgp_rx_keepalive(struct bgp_conn *conn) struct bgp_proto *p = conn->bgp; BGP_TRACE(D_PACKETS, "Got KEEPALIVE"); - bgp_start_timer(conn->hold_timer, conn->hold_time); + bgp_start_timer(p, conn->hold_timer, conn->hold_time); if (conn->state == BS_OPENCONFIRM) { bgp_conn_enter_established_state(conn); return; } diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 8d97c860..e12f7743 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - static inline void mrt_put_##S##_(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - static inline void mrt_put_##S(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ @@ -228,7 +228,7 @@ mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) NODE_VALID(tn); tn = tn->next) { - tab = SKIP_BACK(struct rtable, n, tn); + tab = SKIP_BACK(rtable, n, tn); if (patmatch(pattern, tab->name) && ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) return tab; @@ -243,13 +243,21 @@ mrt_next_table(struct mrt_table_dump_state *s) rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); if (s->table) - rt_unlock_table(s->table); + { + RT_LOCK(s->table); + rt_unlock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } s->table = tab; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table) - rt_lock_table(s->table); + { + RT_LOCK(s->table); + rt_lock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } return s->table; } @@ -460,7 +468,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) return; fail: - mrt_log(s, "Attribute list too long for %N", r->net->n.addr); + mrt_log(s, "Attribute list too long for %N", r->net); } #endif @@ -472,9 +480,9 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) #ifdef CONFIG_BGP /* Find peer index */ - if (r->attrs->src->proto->proto == &proto_bgp) + struct bgp_proto *p = bgp_rte_proto(r); + if (p) { - struct bgp_proto *p = (void *) r->attrs->src->proto; struct mrt_peer_entry *n = HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); @@ -488,7 +496,7 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) /* Path Identifier */ if (s->add_path) - mrt_put_u32(b, r->attrs->src->private_id); + mrt_put_u32(b, r->src->private_id); /* Route Attributes */ mrt_put_u16(b, 0); @@ -512,26 +520,21 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); mrt_rib_table_header(s, n->n.addr); - rte *rt, *rt0; - for (rt0 = n->routes; rt = rt0; rt0 = rt0->next) + for (struct rte_storage *rt, *rt0 = n->routes; rt = rt0; rt0 = rt0->next) { - if (rte_is_filtered(rt)) + if (rte_is_filtered(&rt->rte)) continue; /* Skip routes that should be reported in the other phase */ - if (!s->always_add_path && (!rt->attrs->src->private_id != !s->add_path)) + if (!s->always_add_path && (!rt->rte.src->private_id != !s->add_path)) { s->want_add_path = 1; continue; } - rte_make_tmp_attrs(&rt, s->linpool, NULL); - - if (f_run(s->filter, &rt, s->linpool, 0) <= F_ACCEPT) - mrt_rib_table_entry(s, rt); - - if (rt != rt0) - rte_free(rt); + rte e = rt->rte; + if (f_run(s->filter, &e, s->linpool, 0) <= F_ACCEPT) + mrt_rib_table_entry(s, &e); lp_flush(s->linpool); } @@ -558,10 +561,11 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) static struct mrt_table_dump_state * mrt_table_dump_init(pool *pp) { - pool *pool = rp_new(pp, "MRT Table Dump"); + pool *pool = rp_new(pp, &main_birdloop, "MRT Table Dump"); struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); s->pool = pool; + s->parent = pp; s->linpool = lp_new(pool, 4080); s->peer_lp = lp_new(pool, 4080); mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); @@ -578,18 +582,27 @@ mrt_table_dump_init(pool *pp) static void mrt_table_dump_free(struct mrt_table_dump_state *s) { - if (s->table_open) - FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); - if (s->table) - rt_unlock_table(s->table); + { + RT_LOCK(s->table); + + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &RT_PRIV(s->table)->fib); + + rt_unlock_table(RT_PRIV(s->table)); + RT_UNLOCK(s->table); + } if (s->table_ptr) - rt_unlock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_unlock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } config_del_obstacle(s->config); - rfree(s->pool); + rp_free(s->pool, s->parent); } @@ -601,8 +614,14 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) s->max = 2048; s->bws = &bws; + rtable_private *tab; + if (s->table_open) + { + RT_LOCK(s->table); + tab = RT_PRIV(s->table); goto step; + } while (mrt_next_table(s)) { @@ -611,15 +630,18 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_dump(s); - FIB_ITERATE_INIT(&s->fit, &s->table->fib); + RT_LOCK(s->table); + tab = RT_PRIV(s->table); + FIB_ITERATE_INIT(&s->fit, &tab->fib); s->table_open = 1; step: - FIB_ITERATE_START(&s->table->fib, &s->fit, net, n) + FIB_ITERATE_START(&tab->fib, &s->fit, net, n) { if (s->max < 0) { FIB_ITERATE_PUT(&s->fit); + RT_UNLOCK(s->table); return 0; } @@ -639,6 +661,7 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_flush(s); } + RT_UNLOCK(s->table); return 1; } @@ -666,7 +689,11 @@ mrt_timer(timer *t) s->always_add_path = cf->always_add_path; if (s->table_ptr) - rt_lock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_lock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } p->table_dump = s; ev_schedule(p->event); @@ -739,7 +766,11 @@ mrt_dump_cmd(struct mrt_dump_data *d) s->filename = d->filename; if (s->table_ptr) - rt_lock_table(s->table_ptr); + { + RT_LOCK(s->table_ptr); + rt_lock_table(RT_PRIV(s->table_ptr)); + RT_UNLOCK(s->table_ptr); + } this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 4ff94c12..2e616f6f 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -40,7 +40,7 @@ struct mrt_proto { struct mrt_dump_data { const char *table_expr; - struct rtable *table_ptr; + rtable *table_ptr; const struct filter *filter; const char *filename; }; @@ -60,20 +60,21 @@ struct mrt_table_dump_state { /* Configuration information */ const char *table_expr; /* Wildcard for table name (or NULL) */ - struct rtable *table_ptr; /* Explicit table (or NULL) */ + rtable *table_ptr; /* Explicit table (or NULL) */ const struct filter *filter; /* Optional filter */ const char *filename; /* Filename pattern */ int always_add_path; /* Always use *_ADDPATH message subtypes */ /* Allocated by mrt_table_dump_init() */ pool *pool; /* Pool for table dump */ + pool *parent; /* Parent pool for cleanup */ linpool *linpool; /* Temporary linear pool */ linpool *peer_lp; /* Linear pool for peer entries in peer_hash */ buffer buf; /* Buffer for MRT messages */ HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */ - struct rtable *table; /* Processed table, NULL initially */ + rtable *table; /* Processed table, NULL initially */ struct fib_iterator fit; /* Iterator in processed table */ int table_open; /* Whether iterator is linked */ diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index f38b8210..049030ac 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -311,7 +311,7 @@ ospf_iface_remove(struct ospf_iface *ifa) ospf_iface_sm(ifa, ISM_DOWN); rem_node(NODE ifa); - rfree(ifa->pool); + rp_free(ifa->pool, p->p.pool); } void @@ -522,7 +522,10 @@ static inline void add_nbma_node(struct ospf_iface *ifa, struct nbma_node *src, int found) { struct nbma_node *n = mb_alloc(ifa->pool, sizeof(struct nbma_node)); + + n->n = (node) {}; add_tail(&ifa->nbma_list, NODE n); + n->ip = src->ip; n->eligible = src->eligible; n->found = found; @@ -564,7 +567,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i OSPF_TRACE(D_EVENTS, "Adding interface %s (%N) to area %R", iface->name, &addr->prefix, oa->areaid); - pool = rp_new(p->p.pool, "OSPF Interface"); + pool = rp_new(p->p.pool, p->p.loop, "OSPF Interface"); ifa = mb_allocz(pool, sizeof(struct ospf_iface)); ifa->iface = iface; ifa->addr = addr; @@ -684,7 +687,7 @@ ospf_iface_new_vlink(struct ospf_proto *p, struct ospf_iface_patt *ip) /* Vlink ifname is stored just after the ospf_iface structure */ - pool = rp_new(p->p.pool, "OSPF Vlink"); + pool = rp_new(p->p.pool, p->p.loop, "OSPF Vlink"); ifa = mb_allocz(pool, sizeof(struct ospf_iface) + 16); ifa->oa = p->backbone; ifa->cf = ip; @@ -1222,7 +1225,8 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p) struct iface *iface; struct ifa *a; - WALK_LIST(iface, iface_list) + IFACE_LEGACY_ACCESS; + WALK_LIST(iface, global_iface_list) { if (! (iface->flags & IF_UP)) continue; @@ -1268,7 +1272,8 @@ ospf_reconfigure_ifaces3(struct ospf_proto *p) struct iface *iface; struct ifa *a; - WALK_LIST(iface, iface_list) + IFACE_LEGACY_ACCESS; + WALK_LIST(iface, global_iface_list) { if (! (iface->flags & IF_UP)) continue; diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index ca369819..4ae0d3fa 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -80,7 +80,7 @@ struct ospf_neighbor * ospf_neighbor_new(struct ospf_iface *ifa) { struct ospf_proto *p = ifa->oa->po; - struct pool *pool = rp_new(p->p.pool, "OSPF Neighbor"); + struct pool *pool = rp_new(p->p.pool, p->p.loop, "OSPF Neighbor"); struct ospf_neighbor *n = mb_allocz(pool, sizeof(struct ospf_neighbor)); n->pool = pool; @@ -120,7 +120,7 @@ ospf_neigh_down(struct ospf_neighbor *n) s_get(&(n->dbsi)); release_lsrtl(p, n); rem_node(NODE n); - rfree(n->pool); + rp_free(n->pool, p->p.pool); OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", rid, ifa->ifname); } @@ -777,7 +777,7 @@ ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd) if (use_bfd && !n->bfd_req) n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip, n->ifa->iface, p->p.vrf, - ospf_neigh_bfd_hook, n, NULL); + ospf_neigh_bfd_hook, n, birdloop_event_list(p->p.loop), NULL); if (!use_bfd && n->bfd_req) { diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index ba8c2e2b..16774df6 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -107,12 +107,10 @@ #include <stdlib.h> #include "ospf.h" -static int ospf_preexport(struct proto *P, rte **new, struct linpool *pool); -static void ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool); -static void ospf_store_tmp_attrs(struct rte *rt, struct linpool *pool); +static int ospf_preexport(struct channel *C, rte *new); static void ospf_reload_routes(struct channel *C); static int ospf_rte_better(struct rte *new, struct rte *old); -static int ospf_rte_same(struct rte *new, struct rte *old); +static u32 ospf_rte_igp_metric(struct rte *rt); static void ospf_disp(timer *timer); @@ -378,10 +376,8 @@ ospf_init(struct proto_config *CF) P->reload_routes = ospf_reload_routes; P->feed_begin = ospf_feed_begin; P->feed_end = ospf_feed_end; - P->make_tmp_attrs = ospf_make_tmp_attrs; - P->store_tmp_attrs = ospf_store_tmp_attrs; - P->rte_better = ospf_rte_better; - P->rte_same = ospf_rte_same; + + P->sources.class = &ospf_rte_owner_class; return P; } @@ -390,7 +386,9 @@ ospf_init(struct proto_config *CF) static int ospf_rte_better(struct rte *new, struct rte *old) { - if (new->u.ospf.metric1 == LSINFINITY) + u32 new_metric1 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + + if (new_metric1 == LSINFINITY) return 0; if(new->attrs->source < old->attrs->source) return 1; @@ -398,27 +396,27 @@ ospf_rte_better(struct rte *new, struct rte *old) if(new->attrs->source == RTS_OSPF_EXT2) { - if(new->u.ospf.metric2 < old->u.ospf.metric2) return 1; - if(new->u.ospf.metric2 > old->u.ospf.metric2) return 0; + u32 old_metric2 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY); + u32 new_metric2 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY); + if(new_metric2 < old_metric2) return 1; + if(new_metric2 > old_metric2) return 0; } - if (new->u.ospf.metric1 < old->u.ospf.metric1) + u32 old_metric1 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + if (new_metric1 < old_metric1) return 1; return 0; /* Old is shorter or same */ } -static int -ospf_rte_same(struct rte *new, struct rte *old) +static u32 +ospf_rte_igp_metric(struct rte *rt) { - /* new->attrs == old->attrs always */ - return - new->u.ospf.metric1 == old->u.ospf.metric1 && - new->u.ospf.metric2 == old->u.ospf.metric2 && - new->u.ospf.tag == old->u.ospf.tag && - new->u.ospf.router_id == old->u.ospf.router_id; -} + if (rt->attrs->source == RTS_OSPF_EXT2) + return IGP_METRIC_UNKNOWN; + return ea_get_int(rt->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); +} void ospf_schedule_rtcalc(struct ospf_proto *p) @@ -484,14 +482,13 @@ ospf_disp(timer * timer) * import to the filters. */ static int -ospf_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +ospf_preexport(struct channel *c, rte *e) { - struct ospf_proto *p = (struct ospf_proto *) P; + struct ospf_proto *p = (struct ospf_proto *) c->proto; struct ospf_area *oa = ospf_main_area(p); - rte *e = *new; /* Reject our own routes */ - if (e->attrs->src->proto == P) + if (e->sender == c->in_req.hook) return -1; /* Do not export routes to stub areas */ @@ -501,26 +498,6 @@ ospf_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) return 0; } -static void -ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 4); - rte_make_tmp_attr(rt, EA_OSPF_METRIC1, EAF_TYPE_INT, rt->u.ospf.metric1); - rte_make_tmp_attr(rt, EA_OSPF_METRIC2, EAF_TYPE_INT, rt->u.ospf.metric2); - rte_make_tmp_attr(rt, EA_OSPF_TAG, EAF_TYPE_INT, rt->u.ospf.tag); - rte_make_tmp_attr(rt, EA_OSPF_ROUTER_ID, EAF_TYPE_ROUTER_ID, rt->u.ospf.router_id); -} - -static void -ospf_store_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 4); - rt->u.ospf.metric1 = rte_store_tmp_attr(rt, EA_OSPF_METRIC1); - rt->u.ospf.metric2 = rte_store_tmp_attr(rt, EA_OSPF_METRIC2); - rt->u.ospf.tag = rte_store_tmp_attr(rt, EA_OSPF_TAG); - rt->u.ospf.router_id = rte_store_tmp_attr(rt, EA_OSPF_ROUTER_ID); -} - /** * ospf_shutdown - Finish of OSPF instance * @P: OSPF protocol instance @@ -558,6 +535,9 @@ ospf_shutdown(struct proto *P) } FIB_WALK_END; + if (tm_active(p->disp_timer)) + tm_stop(p->disp_timer); + return PS_DOWN; } @@ -607,16 +587,20 @@ ospf_get_route_info(rte * rte, byte * buf) } buf += bsprintf(buf, " %s", type); - buf += bsprintf(buf, " (%d/%d", rte->pref, rte->u.ospf.metric1); + buf += bsprintf(buf, " (%d/%d", rte->attrs->pref, ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY)); if (rte->attrs->source == RTS_OSPF_EXT2) - buf += bsprintf(buf, "/%d", rte->u.ospf.metric2); + buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY)); buf += bsprintf(buf, ")"); - if ((rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2) && rte->u.ospf.tag) + if (rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2) { - buf += bsprintf(buf, " [%x]", rte->u.ospf.tag); + eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_TAG); + if (ea && (ea->u.data > 0)) + buf += bsprintf(buf, " [%x]", ea->u.data); } - if (rte->u.ospf.router_id) - buf += bsprintf(buf, " [%R]", rte->u.ospf.router_id); + + eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_ROUTER_ID); + if (ea) + buf += bsprintf(buf, " [%R]", ea->u.data); } static int @@ -1533,6 +1517,12 @@ ospf_sh_lsadb(struct lsadb_show_data *ld) } +struct rte_owner_class ospf_rte_owner_class = { + .get_route_info = ospf_get_route_info, + .rte_better = ospf_rte_better, + .rte_igp_metric = ospf_rte_igp_metric, +}; + struct protocol proto_ospf = { .name = "OSPF", .template = "ospf%d", @@ -1548,5 +1538,4 @@ struct protocol proto_ospf = { .reconfigure = ospf_reconfigure, .get_status = ospf_get_status, .get_attr = ospf_get_attr, - .get_route_info = ospf_get_route_info }; diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 3e704ae8..a5f83e79 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -1007,6 +1007,8 @@ void ospf_sh_state(struct proto *P, int verbose, int reachable); void ospf_sh_lsadb(struct lsadb_show_data *ld); +extern struct rte_owner_class ospf_rte_owner_class; + /* iface.c */ void ospf_iface_chstate(struct ospf_iface *ifa, u8 state); void ospf_iface_sm(struct ospf_iface *ifa, int event); diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index faee49dc..3e208023 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -144,7 +144,7 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old) { int r; - if (old->type == RTS_DUMMY) + if (!old->type) return 1; /* Prefer intra-area to inter-area to externals */ @@ -195,7 +195,7 @@ orta_compare_asbr(const struct ospf_proto *p, const orta *new, const orta *old) { int r; - if (old->type == RTS_DUMMY) + if (!old->type) return 1; if (!p->rfc1583) @@ -225,7 +225,7 @@ orta_compare_ext(const struct ospf_proto *p, const orta *new, const orta *old) { int r; - if (old->type == RTS_DUMMY) + if (!old->type) return 1; /* 16.4 (6a) - prefer routes with lower type */ @@ -2053,36 +2053,61 @@ again1: if (nf->n.type) /* Add the route */ { rta a0 = { - .src = p->p.main_source, .source = nf->n.type, .scope = SCOPE_UNIVERSE, .dest = RTD_UNICAST, .nh = *(nf->n.nhs), + .pref = p->p.main_channel->preference, }; if (reload || ort_changed(nf, &a0)) { - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a); + a0.eattrs = alloca(sizeof(ea_list) + 4 * sizeof(eattr)); + memset(a0.eattrs, 0, sizeof(ea_list)); - rta_free(nf->old_rta); - nf->old_rta = rta_clone(a); - e->u.ospf.metric1 = nf->old_metric1 = nf->n.metric1; - e->u.ospf.metric2 = nf->old_metric2 = nf->n.metric2; - e->u.ospf.tag = nf->old_tag = nf->n.tag; - e->u.ospf.router_id = nf->old_rid = nf->n.rid; - e->pflags = EA_ID_FLAG(EA_OSPF_METRIC1) | EA_ID_FLAG(EA_OSPF_ROUTER_ID); + nf->old_metric1 = nf->n.metric1; + nf->old_metric2 = nf->n.metric2; + nf->old_tag = nf->n.tag; + nf->old_rid = nf->n.rid; + + a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { + .id = EA_OSPF_METRIC1, + .type = EAF_TYPE_INT, + .u.data = nf->n.metric1, + }; if (nf->n.type == RTS_OSPF_EXT2) - e->pflags |= EA_ID_FLAG(EA_OSPF_METRIC2); + a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { + .id = EA_OSPF_METRIC2, + .type = EAF_TYPE_INT, + .u.data = nf->n.metric2, + }; - /* Perhaps onfly if tag is non-zero? */ if ((nf->n.type == RTS_OSPF_EXT1) || (nf->n.type == RTS_OSPF_EXT2)) - e->pflags |= EA_ID_FLAG(EA_OSPF_TAG); + a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { + .id = EA_OSPF_TAG, + .type = EAF_TYPE_INT, + .u.data = nf->n.tag, + }; + + a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { + .id = EA_OSPF_ROUTER_ID, + .type = EAF_TYPE_ROUTER_ID, + .u.data = nf->n.rid, + }; + + rta_free(nf->old_rta); + nf->old_rta = rta_lookup(&a0); + + rte e0 = { + .attrs = nf->old_rta, + .src = p->p.main_source, + }; DBG("Mod rte type %d - %N via %I on iface %s, met %d\n", a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); - rte_update(&p->p, nf->fn.addr, e); + + rte_update(p->p.main_channel, nf->fn.addr, &e0, p->p.main_source); } } else if (nf->old_rta) @@ -2091,7 +2116,7 @@ again1: rta_free(nf->old_rta); nf->old_rta = NULL; - rte_update(&p->p, nf->fn.addr, NULL); + rte_update(p->p.main_channel, nf->fn.addr, NULL, p->p.main_source); } /* Remove unused rt entry, some special entries are persistent */ @@ -2107,7 +2132,6 @@ again1: } FIB_ITERATE_END; - WALK_LIST(oa, p->area_list) { /* Cleanup ASBR hash tables */ diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 52c2a0ce..bb88d20a 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1300,7 +1300,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa) } void -ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED) +ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED) { struct ospf_proto *p = (struct ospf_proto *) P; struct ospf_area *oa = NULL; /* non-NULL for NSSA-LSA */ @@ -1319,7 +1319,7 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (!new) { - nf = fib_find(&p->rtf, n->n.addr); + nf = fib_find(&p->rtf, n); if (!nf || !nf->external_rte) return; @@ -1346,14 +1346,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (m1 > LSINFINITY) { log(L_WARN "%s: Invalid ospf_metric1 value %u for route %N", - p->p.name, m1, n->n.addr); + p->p.name, m1, n); m1 = LSINFINITY; } if (m2 > LSINFINITY) { log(L_WARN "%s: Invalid ospf_metric2 value %u for route %N", - p->p.name, m2, n->n.addr); + p->p.name, m2, n); m2 = LSINFINITY; } @@ -1377,12 +1377,12 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (ipa_zero(fwd)) { log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N", - p->p.name, n->n.addr); + p->p.name, n); return; } } - nf = fib_get(&p->rtf, n->n.addr); + nf = fib_get(&p->rtf, n); ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1, p->vpn_pe); nf->external_rte = 1; } diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index 535d1f1b..c36d0b50 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -200,7 +200,7 @@ void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 d void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn); void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa); -void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old); +void ospf_rt_notify(struct proto *P, struct channel *ch, const net_addr *n, rte *new, const rte *old); void ospf_update_topology(struct ospf_proto *p); struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type); diff --git a/proto/perf/perf.c b/proto/perf/perf.c index ba401a8a..aa688d88 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -143,10 +143,10 @@ perf_loop(void *data) if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) { struct rta a0 = { - .src = p->p.main_source, .source = RTS_PERF, .scope = SCOPE_UNIVERSE, .dest = RTD_UNICAST, + .pref = p->p.main_channel->preference, .nh.iface = p->ifa->iface, .nh.gw = gw, .nh.weight = 1, @@ -160,18 +160,17 @@ perf_loop(void *data) clock_gettime(CLOCK_MONOTONIC, &ts_generated); - for (uint i=0; i<N; i++) { - rte *e = rte_get_temp(p->data[i].a); - e->pflags = 0; - - rte_update(P, &(p->data[i].net), e); + for (uint i=0; i<N; i++) + { + rte e0 = { .attrs = p->data[i].a, .src = P->main_source, }; + rte_update(P->main_channel, &(p->data[i].net), &e0, P->main_source); } clock_gettime(CLOCK_MONOTONIC, &ts_update); if (!p->keep) for (uint i=0; i<N; i++) - rte_update(P, &(p->data[i].net), NULL); + rte_update(P->main_channel, &(p->data[i].net), NULL, P->main_source); clock_gettime(CLOCK_MONOTONIC, &ts_withdraw); @@ -199,12 +198,14 @@ perf_loop(void *data) p->exp++; } - rt_schedule_prune(P->main_channel->table); + RT_LOCK(P->main_channel->table); + rt_schedule_prune(RT_PRIV(P->main_channel->table)); + RT_UNLOCK(P->main_channel->table); ev_schedule(p->loop); } static void -perf_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net UNUSED, struct rte *new UNUSED, struct rte *old UNUSED) +perf_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net UNUSED, struct rte *new UNUSED, const struct rte *old UNUSED) { struct perf_proto *p = (struct perf_proto *) P; p->exp++; diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 1202c169..fc08445f 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -16,7 +16,7 @@ CF_DEFINES CF_DECLS -CF_KEYWORDS(PIPE, PEER, TABLE) +CF_KEYWORDS(PIPE, PEER, TABLE, MAX, GENERATION) CF_GRAMMAR @@ -25,6 +25,8 @@ proto: pipe_proto '}' { this_channel = NULL; } ; pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); + PIPE_CFG->max_generation = 16; } proto_name { @@ -41,6 +43,10 @@ pipe_proto: | pipe_proto proto_item ';' | pipe_proto channel_item_ ';' | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } + | pipe_proto MAX GENERATION expr ';' { + if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4); + PIPE_CFG->max_generation = $4; + } ; CF_CODE diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 3532f114..270f7b92 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -43,67 +43,58 @@ #include "pipe.h" +#ifdef CONFIG_BGP +#include "proto/bgp/bgp.h" +#endif + static void -pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old) +pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *new, const rte *old) { struct pipe_proto *p = (void *) P; struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri; - struct rte_src *src; - - rte *e; - rta *a; if (!new && !old) return; - if (dst->table->pipe_busy) - { - log(L_ERR "Pipe loop detected when sending %N to table %s", - n->n.addr, dst->table->name); - return; - } - if (new) { - a = alloca(rta_size(new->attrs)); + rta *a = alloca(rta_size(new->attrs)); memcpy(a, new->attrs, rta_size(new->attrs)); - a->aflags = 0; + a->cached = 0; a->hostentry = NULL; - e = rte_get_temp(a); - e->pflags = 0; - - /* Copy protocol specific embedded attributes. */ - memcpy(&(e->u), &(new->u), sizeof(e->u)); - e->pref = new->pref; - e->pflags = new->pflags; -#ifdef CONFIG_BGP - /* Hack to cleanup cached value */ - if (e->attrs->src->proto->proto == &proto_bgp) - e->u.bgp.stale = -1; -#endif + rte e0 = { + .attrs = a, + .src = new->src, + .generation = new->generation + 1, + }; - src = a->src; + rte_update(dst, n, &e0, new->src); } else - { - e = NULL; - src = old->attrs->src; - } - - src_ch->table->pipe_busy = 1; - rte_update2(dst, n->n.addr, e, src); - src_ch->table->pipe_busy = 0; + rte_update(dst, n, NULL, old->src); } static int -pipe_preexport(struct proto *P, rte **ee, struct linpool *p UNUSED) +pipe_preexport(struct channel *c, rte *e) { - struct proto *pp = (*ee)->sender->proto; + struct pipe_proto *p = (void *) c->proto; + + /* Avoid direct loopbacks */ + if (e->sender == c->in_req.hook) + return -1; - if (pp == P) - return -1; /* Avoid local loops automatically */ + /* Indirection check */ + uint max_generation = ((struct pipe_config *) p->p.cf)->max_generation; + if (e->generation >= max_generation) + { + log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", + e->generation, max_generation, c->proto->name, + c->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); + + return -1; + } return 0; } @@ -117,6 +108,23 @@ pipe_reload_routes(struct channel *C) channel_request_feeding((C == p->pri) ? p->sec : p->pri); } +static void +pipe_feed_begin(struct channel *C, int refeeding UNUSED) +{ + struct pipe_proto *p = (void *) C->proto; + struct channel *dst = (C == p->pri) ? p->sec : p->pri; + + channel_refresh_begin(dst); +} + +static void +pipe_feed_end(struct channel *C) +{ + struct pipe_proto *p = (void *) C->proto; + struct channel *dst = (C == p->pri) ? p->sec : p->pri; + + channel_refresh_end(dst); +} static void pipe_postconfig(struct proto_config *CF) @@ -187,6 +195,10 @@ pipe_init(struct proto_config *CF) P->rt_notify = pipe_rt_notify; P->preexport = pipe_preexport; P->reload_routes = pipe_reload_routes; + P->feed_begin = pipe_feed_begin; + P->feed_end = pipe_feed_end; + + p->rl_gen = (struct tbf) TBF_DEFAULT_LOG_LIMITS; pipe_configure_channels(p, cf); @@ -219,8 +231,18 @@ pipe_get_status(struct proto *P, byte *buf) static void pipe_show_stats(struct pipe_proto *p) { - struct proto_stats *s1 = &p->pri->stats; - struct proto_stats *s2 = &p->sec->stats; + struct channel_import_stats *s1i = &p->pri->import_stats; + struct channel_export_stats *s1e = &p->pri->export_stats; + struct channel_import_stats *s2i = &p->sec->import_stats; + struct channel_export_stats *s2e = &p->sec->export_stats; + + struct rt_import_stats *rs1i = p->pri->in_req.hook ? &p->pri->in_req.hook->stats : NULL; + struct rt_export_stats *rs1e = p->pri->out_req.hook ? &p->pri->out_req.hook->stats : NULL; + struct rt_import_stats *rs2i = p->sec->in_req.hook ? &p->sec->in_req.hook->stats : NULL; + struct rt_export_stats *rs2e = p->sec->out_req.hook ? &p->sec->out_req.hook->stats : NULL; + + u32 pri_routes = p->pri->in_limit.count; + u32 sec_routes = p->sec->in_limit.count; /* * Pipe stats (as anything related to pipes) are a bit tricky. There @@ -244,24 +266,22 @@ pipe_show_stats(struct pipe_proto *p) */ cli_msg(-1006, " Routes: %u imported, %u exported", - s1->imp_routes, s2->imp_routes); + pri_routes, sec_routes); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s2->exp_updates_received, s2->exp_updates_rejected + s1->imp_updates_invalid, - s2->exp_updates_filtered, s1->imp_updates_ignored, s1->imp_updates_accepted); + rs2e->updates_received, s2e->updates_rejected + s1i->updates_invalid, + s2e->updates_filtered, rs1i->updates_ignored, rs1i->updates_accepted); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s2->exp_withdraws_received, s1->imp_withdraws_invalid, - s1->imp_withdraws_ignored, s1->imp_withdraws_accepted); + rs2e->withdraws_received, s1i->withdraws_invalid, + rs1i->withdraws_ignored, rs1i->withdraws_accepted); cli_msg(-1006, " Export updates: %10u %10u %10u %10u %10u", - s1->exp_updates_received, s1->exp_updates_rejected + s2->imp_updates_invalid, - s1->exp_updates_filtered, s2->imp_updates_ignored, s2->imp_updates_accepted); + rs1e->updates_received, s1e->updates_rejected + s2i->updates_invalid, + s1e->updates_filtered, rs2i->updates_ignored, rs2i->updates_accepted); cli_msg(-1006, " Export withdraws: %10u %10u --- %10u %10u", - s1->exp_withdraws_received, s2->imp_withdraws_invalid, - s2->imp_withdraws_ignored, s2->imp_withdraws_accepted); + rs1e->withdraws_received, s2i->withdraws_invalid, + rs2i->withdraws_ignored, rs2i->withdraws_accepted); } -static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" }; - static void pipe_show_proto_info(struct proto *P) { @@ -270,13 +290,17 @@ pipe_show_proto_info(struct proto *P) cli_msg(-1006, " Channel %s", "main"); cli_msg(-1006, " Table: %s", p->pri->table->name); cli_msg(-1006, " Peer table: %s", p->sec->table->name); - cli_msg(-1006, " Import state: %s", pipe_feed_state[p->sec->export_state]); - cli_msg(-1006, " Export state: %s", pipe_feed_state[p->pri->export_state]); + cli_msg(-1006, " Import state: %s", rt_export_state_name(rt_export_get_state(p->sec->out_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(p->pri->out_req.hook))); cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter)); cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter)); - channel_show_limit(&p->pri->in_limit, "Import limit:"); - channel_show_limit(&p->sec->in_limit, "Export limit:"); + + + channel_show_limit(&p->pri->in_limit, "Import limit:", + (p->pri->limit_active & (1 << PLD_IN)), p->pri->limit_actions[PLD_IN]); + channel_show_limit(&p->sec->in_limit, "Export limit:", + (p->sec->limit_active & (1 << PLD_IN)), p->sec->limit_actions[PLD_IN]); if (P->proto_state != PS_DOWN) pipe_show_stats(p); diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 038c6666..60c857eb 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -12,12 +12,14 @@ struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + u8 max_generation; }; struct pipe_proto { struct proto p; struct channel *pri; struct channel *sec; + struct tbf rl_gen; }; #endif diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 66e8eb4b..15673555 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -284,7 +284,7 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf RADV_TRACE(D_EVENTS, "Adding interface %s", iface->name); - pool *pool = rp_new(p->p.pool, iface->name); + pool *pool = rp_new(p->p.pool, p->p.loop, iface->name); ifa = mb_allocz(pool, sizeof(struct radv_iface)); ifa->pool = pool; ifa->ra = p; @@ -317,7 +317,7 @@ radv_iface_remove(struct radv_iface *ifa) rem_node(NODE ifa); - rfree(ifa->pool); + rp_free(ifa->pool, p->p.pool); } static void @@ -385,18 +385,18 @@ radv_trigger_valid(struct radv_config *cf) } static inline int -radv_net_match_trigger(struct radv_config *cf, net *n) +radv_net_match_trigger(struct radv_config *cf, const net_addr *n) { - return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger); + return radv_trigger_valid(cf) && net_equal(n, &cf->trigger); } int -radv_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +radv_preexport(struct channel *c, rte *new) { // struct radv_proto *p = (struct radv_proto *) P; - struct radv_config *cf = (struct radv_config *) (P->cf); + struct radv_config *cf = (struct radv_config *) (c->proto->cf); - if (radv_net_match_trigger(cf, (*new)->net)) + if (radv_net_match_trigger(cf, new->net)) return RIC_PROCESS; if (cf->propagate_routes) @@ -406,7 +406,7 @@ radv_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) } static void -radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED) +radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED) { struct radv_proto *p = (struct radv_proto *) P; struct radv_config *cf = (struct radv_config *) (P->cf); @@ -457,14 +457,14 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte (preference != RA_PREF_HIGH)) { log(L_WARN "%s: Invalid ra_preference value %u on route %N", - p->p.name, preference, n->n.addr); + p->p.name, preference, n); preference = RA_PREF_MEDIUM; preference_set = 1; lifetime = 0; lifetime_set = 1; } - rt = fib_get(&p->routes, n->n.addr); + rt = fib_get(&p->routes, n); /* Ignore update if nothing changed */ if (rt->valid && @@ -487,7 +487,7 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte else { /* Withdraw */ - rt = fib_find(&p->routes, n->n.addr); + rt = fib_find(&p->routes, n); if (!rt || !rt->valid) return; @@ -555,7 +555,10 @@ radv_check_active(struct radv_proto *p) return 1; struct channel *c = p->p.main_channel; - return rt_examine(c->table, &cf->trigger, &p->p, c->out_filter); + RT_LOCK(c->table); + int active = rt_examine(RT_PRIV(c->table), &cf->trigger, c, c->out_filter); + RT_UNLOCK(c->table); + return active; } static void @@ -660,8 +663,9 @@ radv_reconfigure(struct proto *P, struct proto_config *CF) if (!old->propagate_routes && new->propagate_routes) channel_request_feeding(p->p.main_channel); + IFACE_LEGACY_ACCESS; struct iface *iface; - WALK_LIST(iface, iface_list) + WALK_LIST(iface, global_iface_list) { if (!(iface->flags & IF_UP)) continue; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index e1a235a0..0f8b10ad 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -145,7 +145,7 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) { /* Update */ rta a0 = { - .src = p->p.main_source, + .pref = p->p.main_channel->preference, .source = RTS_RIP, .scope = SCOPE_UNIVERSE, .dest = RTD_UNICAST, @@ -188,21 +188,34 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) a0.nh.iface = rt->from->ifa->iface; } - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a); + a0.eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)); + memset(a0.eattrs, 0, sizeof(ea_list)); /* Zero-ing only the ea_list header */ + a0.eattrs->count = 3; + a0.eattrs->attrs[0] = (eattr) { + .id = EA_RIP_METRIC, + .type = EAF_TYPE_INT, + .u.data = rt_metric, + }; + a0.eattrs->attrs[1] = (eattr) { + .id = EA_RIP_TAG, + .type = EAF_TYPE_INT, + .u.data = rt_tag, + }; + a0.eattrs->attrs[2] = (eattr) { + .id = EA_RIP_FROM, + .type = EAF_TYPE_PTR, + .u.data = (uintptr_t) a0.nh.iface, + }; - e->u.rip.from = a0.nh.iface; - e->u.rip.metric = rt_metric; - e->u.rip.tag = rt_tag; - e->pflags = EA_ID_FLAG(EA_RIP_METRIC) | EA_ID_FLAG(EA_RIP_TAG); + rte e0 = { + .attrs = &a0, + .src = p->p.main_source, + }; - rte_update(&p->p, en->n.addr, e); + rte_update(p->p.main_channel, en->n.addr, &e0, p->p.main_source); } else - { - /* Withdraw */ - rte_update(&p->p, en->n.addr, NULL); - } + rte_update(p->p.main_channel, en->n.addr, NULL, p->p.main_source); } /** @@ -297,8 +310,8 @@ rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from) * it into our data structures. */ static void -rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new, - struct rte *old UNUSED) +rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, struct rte *new, + const struct rte *old UNUSED) { struct rip_proto *p = (struct rip_proto *) P; struct rip_entry *en; @@ -307,20 +320,21 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s if (new) { /* Update */ - u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1); u32 rt_tag = ea_get_int(new->attrs->eattrs, EA_RIP_TAG, 0); + u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1); + struct iface *rt_from = (struct iface *) ea_get_int(new->attrs->eattrs, EA_RIP_FROM, 0); if (rt_metric > p->infinity) { log(L_WARN "%s: Invalid rip_metric value %u for route %N", - p->p.name, rt_metric, net->n.addr); + p->p.name, rt_metric, net); rt_metric = p->infinity; } if (rt_tag > 0xffff) { log(L_WARN "%s: Invalid rip_tag value %u for route %N", - p->p.name, rt_tag, net->n.addr); + p->p.name, rt_tag, net); rt_metric = p->infinity; rt_tag = 0; } @@ -332,21 +346,21 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s * collection. */ - en = fib_get(&p->rtable, net->n.addr); + en = fib_get(&p->rtable, net); old_metric = en->valid ? en->metric : -1; en->valid = RIP_ENTRY_VALID; en->metric = rt_metric; en->tag = rt_tag; - en->from = (new->attrs->src->proto == P) ? new->u.rip.from : NULL; + en->from = (new->src->owner == &P->sources) ? rt_from : NULL; en->iface = new->attrs->nh.iface; en->next_hop = new->attrs->nh.gw; } else { /* Withdraw */ - en = fib_find(&p->rtable, net->n.addr); + en = fib_find(&p->rtable, net); if (!en || en->valid != RIP_ENTRY_VALID) return; @@ -499,7 +513,7 @@ rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n) ip_addr saddr = rip_is_v2(p) ? n->ifa->sk->saddr : n->nbr->ifa->ip; n->bfd_req = bfd_request_session(p->p.pool, n->nbr->addr, saddr, n->nbr->iface, p->p.vrf, - rip_bfd_notify, n, NULL); + rip_bfd_notify, n, birdloop_event_list(p->p.loop), NULL); } if (!use_bfd && n->bfd_req) @@ -762,7 +776,8 @@ rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf) { struct iface *iface; - WALK_LIST(iface, iface_list) + IFACE_LEGACY_ACCESS; + WALK_LIST(iface, global_iface_list) { if (!(iface->flags & IF_UP)) continue; @@ -1068,37 +1083,33 @@ rip_reload_routes(struct channel *C) rip_kick_timer(p); } -static void -rip_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rte_make_tmp_attr(rt, EA_RIP_METRIC, EAF_TYPE_INT, rt->u.rip.metric); - rte_make_tmp_attr(rt, EA_RIP_TAG, EAF_TYPE_INT, rt->u.rip.tag); -} +static struct rte_owner_class rip_rte_owner_class; -static void -rip_store_tmp_attrs(struct rte *rt, struct linpool *pool) +static inline struct rip_proto * +rip_rte_proto(struct rte *rte) { - rte_init_tmp_attrs(rt, pool, 2); - rt->u.rip.metric = rte_store_tmp_attr(rt, EA_RIP_METRIC); - rt->u.rip.tag = rte_store_tmp_attr(rt, EA_RIP_TAG); + return (rte->src->owner->class == &rip_rte_owner_class) ? + SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL; } static int rip_rte_better(struct rte *new, struct rte *old) { - return new->u.rip.metric < old->u.rip.metric; + ASSERT_DIE(new->src == old->src); + struct rip_proto *p = rip_rte_proto(new); + + u32 new_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, p->infinity); + u32 old_metric = ea_get_int(old->attrs->eattrs, EA_RIP_METRIC, p->infinity); + + return new_metric < old_metric; } -static int -rip_rte_same(struct rte *new, struct rte *old) +static u32 +rip_rte_igp_metric(struct rte *rt) { - return ((new->u.rip.metric == old->u.rip.metric) && - (new->u.rip.tag == old->u.rip.tag) && - (new->u.rip.from == old->u.rip.from)); + return ea_get_int(rt->attrs->eattrs, EA_RIP_METRIC, IGP_METRIC_UNKNOWN); } - static void rip_postconfig(struct proto_config *CF) { @@ -1120,10 +1131,7 @@ rip_init(struct proto_config *CF) P->rt_notify = rip_rt_notify; P->neigh_notify = rip_neigh_notify; P->reload_routes = rip_reload_routes; - P->make_tmp_attrs = rip_make_tmp_attrs; - P->store_tmp_attrs = rip_store_tmp_attrs; - P->rte_better = rip_rte_better; - P->rte_same = rip_rte_same; + P->sources.class = &rip_rte_owner_class; return P; } @@ -1198,10 +1206,14 @@ rip_reconfigure(struct proto *P, struct proto_config *CF) static void rip_get_route_info(rte *rte, byte *buf) { - buf += bsprintf(buf, " (%d/%d)", rte->pref, rte->u.rip.metric); + struct rip_proto *p = rip_rte_proto(rte); + u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity); + u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0); + + buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric); - if (rte->u.rip.tag) - bsprintf(buf, " [%04x]", rte->u.rip.tag); + if (rt_tag) + bsprintf(buf, " [%04x]", rt_tag); } static int @@ -1321,6 +1333,12 @@ rip_dump(struct proto *P) } +static struct rte_owner_class rip_rte_owner_class = { + .get_route_info = rip_get_route_info, + .rte_better = rip_rte_better, + .rte_igp_metric = rip_rte_igp_metric, +}; + struct protocol proto_rip = { .name = "RIP", .template = "rip%d", @@ -1335,6 +1353,5 @@ struct protocol proto_rip = { .start = rip_start, .shutdown = rip_shutdown, .reconfigure = rip_reconfigure, - .get_route_info = rip_get_route_info, .get_attr = rip_get_attr }; diff --git a/proto/rip/rip.h b/proto/rip/rip.h index 8d347000..f8713c4a 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -197,6 +197,7 @@ struct rip_rte #define EA_RIP_METRIC EA_CODE(PROTOCOL_RIP, 0) #define EA_RIP_TAG EA_CODE(PROTOCOL_RIP, 1) +#define EA_RIP_FROM EA_CODE(PROTOCOL_RIP, 2) static inline int rip_is_v2(struct rip_proto *p) { return p->rip2; } diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y index d6d326b8..743b5b42 100644 --- a/proto/rpki/config.Y +++ b/proto/rpki/config.Y @@ -42,6 +42,7 @@ proto: rpki_proto ; rpki_proto_start: proto_start RPKI { this_proto = proto_config_new(&proto_rpki, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL; RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL; RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL; diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index d246dd50..d7895a22 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -233,7 +233,12 @@ static const size_t min_pdu_size[] = { [ERROR] = 16, }; -static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); +static int rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); + +#define rpki_send_error_pdu(cache, error_code, err_pdu_len, erroneous_pdu, fmt...) ({ \ + rpki_send_error_pdu_(cache, error_code, err_pdu_len, erroneous_pdu, #fmt); \ + CACHE_TRACE(D_PACKETS, cache, #fmt); \ + }) static void rpki_pdu_to_network_byte_order(struct pdu_header *pdu) @@ -595,6 +600,7 @@ rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu) case INTERNAL_ERROR: case INVALID_REQUEST: case UNSUPPORTED_PDU_TYPE: + CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PDU_TYPE"); rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); break; @@ -652,21 +658,7 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ { if (cache->request_session_id) { - if (cache->last_update) - { - /* - * This isn't the first sync and we already received records. This point - * is after Reset Query and before importing new records from cache - * server. We need to load new ones and kick out missing ones. So start - * a refresh cycle. - */ - if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel); - if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel); - - cache->p->refresh_channels = 1; - } + rpki_start_refresh(cache->p); cache->session_id = pdu->session_id; cache->request_session_id = 0; } @@ -842,14 +834,7 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da (cf->keep_expire_interval ? "keeps " : ""), cache->expire_interval); } - if (cache->p->refresh_channels) - { - cache->p->refresh_channels = 0; - if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel); - if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel); - } + rpki_stop_refresh(cache->p); cache->last_update = current_time(); cache->serial_num = pdu->serial_num; @@ -924,6 +909,9 @@ rpki_rx_hook(struct birdsock *sk, uint size) struct rpki_cache *cache = sk->data; struct rpki_proto *p = cache->p; + if ((p->p.proto_state == PS_DOWN) || (p->cache != cache)) + return 0; + byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; @@ -980,6 +968,8 @@ rpki_err_hook(struct birdsock *sk, int error_num) CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection"); } + if (cache->p->cache != cache) + return; rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); } @@ -999,6 +989,9 @@ rpki_tx_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + while (rpki_fire_tx(cache) > 0) ; } @@ -1008,6 +1001,9 @@ rpki_connected_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + CACHE_TRACE(D_EVENTS, cache, "Connected"); proto_notify_state(&cache->p->p, PS_UP); @@ -1029,7 +1025,7 @@ rpki_connected_hook(sock *sk) * This function prepares Error PDU and sends it to a cache server. */ static int -rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) +rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) { va_list args; char msg[128]; diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index ab0837f3..afba2216 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -109,6 +109,7 @@ static void rpki_schedule_next_expire_check(struct rpki_cache *cache); static void rpki_stop_refresh_timer_event(struct rpki_cache *cache); static void rpki_stop_retry_timer_event(struct rpki_cache *cache); static void rpki_stop_expire_timer_event(struct rpki_cache *cache); +static void rpki_stop_all_timers(struct rpki_cache *cache); /* @@ -121,27 +122,48 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_ struct rpki_proto *p = cache->p; rta a0 = { - .src = p->p.main_source, + .pref = channel->preference, .source = RTS_RPKI, .scope = SCOPE_UNIVERSE, .dest = RTD_NONE, }; - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a); + rte e0 = { .attrs = &a0, .src = p->p.main_source, }; - e->pflags = 0; - - rte_update2(channel, &pfxr->n, e, a0.src); + rte_update(channel, &pfxr->n, &e0, p->p.main_source); } void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr) { struct rpki_proto *p = cache->p; - rte_update2(channel, &pfxr->n, NULL, p->p.main_source); + rte_update(channel, &pfxr->n, NULL, p->p.main_source); +} + +void +rpki_start_refresh(struct rpki_proto *p) +{ + if (p->roa4_channel) + rt_refresh_begin(&p->roa4_channel->in_req); + if (p->roa6_channel) + rt_refresh_begin(&p->roa6_channel->in_req); + + p->refresh_channels = 1; } +void +rpki_stop_refresh(struct rpki_proto *p) +{ + if (!p->refresh_channels) + return; + + p->refresh_channels = 0; + + if (p->roa4_channel) + rt_refresh_end(&p->roa4_channel->in_req); + if (p->roa6_channel) + rt_refresh_end(&p->roa6_channel->in_req); +} /* * RPKI Protocol Logic @@ -198,6 +220,8 @@ rpki_force_restart_proto(struct rpki_proto *p) { if (p->cache) { + rpki_tr_close(p->cache->tr_sock); + rpki_stop_all_timers(p->cache); CACHE_DBG(p->cache, "Connection object destroying"); } @@ -321,7 +345,7 @@ rpki_schedule_next_refresh(struct rpki_cache *cache) btime t = cache->refresh_interval S; CACHE_DBG(cache, "after %t s", t); - tm_start(cache->refresh_timer, t); + tm_start_in(cache->refresh_timer, t, cache->p->p.loop); } static void @@ -330,7 +354,7 @@ rpki_schedule_next_retry(struct rpki_cache *cache) btime t = cache->retry_interval S; CACHE_DBG(cache, "after %t s", t); - tm_start(cache->retry_timer, t); + tm_start_in(cache->retry_timer, t, cache->p->p.loop); } static void @@ -341,7 +365,7 @@ rpki_schedule_next_expire_check(struct rpki_cache *cache) t = MAX(t, 1 S); CACHE_DBG(cache, "after %t s", t); - tm_start(cache->expire_timer, t); + tm_start_in(cache->expire_timer, t, cache->p->p.loop); } static void @@ -358,13 +382,21 @@ rpki_stop_retry_timer_event(struct rpki_cache *cache) tm_stop(cache->retry_timer); } -static void UNUSED +static void rpki_stop_expire_timer_event(struct rpki_cache *cache) { CACHE_DBG(cache, "Stop"); tm_stop(cache->expire_timer); } +static void +rpki_stop_all_timers(struct rpki_cache *cache) +{ + rpki_stop_refresh_timer_event(cache); + rpki_stop_retry_timer_event(cache); + rpki_stop_expire_timer_event(cache); +} + static int rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache) { @@ -387,6 +419,9 @@ rpki_refresh_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -433,6 +468,9 @@ rpki_retry_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -478,6 +516,9 @@ rpki_expire_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + if (!cache->last_update) return; @@ -555,7 +596,7 @@ rpki_check_expire_interval(uint seconds) static struct rpki_cache * rpki_init_cache(struct rpki_proto *p, struct rpki_config *cf) { - pool *pool = rp_new(p->p.pool, cf->hostname); + pool *pool = rp_new(p->p.pool, p->p.loop, cf->hostname); struct rpki_cache *cache = mb_allocz(pool, sizeof(struct rpki_cache)); @@ -620,6 +661,7 @@ rpki_close_connection(struct rpki_cache *cache) { CACHE_TRACE(D_EVENTS, cache, "Closing a connection"); rpki_tr_close(cache->tr_sock); + rpki_stop_refresh(cache->p); proto_notify_state(&cache->p->p, PS_START); } diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h index 8a5c38fd..a70a2027 100644 --- a/proto/rpki/rpki.h +++ b/proto/rpki/rpki.h @@ -83,6 +83,8 @@ const char *rpki_cache_state_to_str(enum rpki_cache_state state); void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); +void rpki_start_refresh(struct rpki_proto *p); +void rpki_stop_refresh(struct rpki_proto *p); /* * RPKI Protocol Logic diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c index 6333f367..223afa80 100644 --- a/proto/rpki/ssh_transport.c +++ b/proto/rpki/ssh_transport.c @@ -38,6 +38,8 @@ rpki_tr_ssh_open(struct rpki_tr_sock *tr) if (sk_open(sk) != 0) return RPKI_TR_ERROR; + sk_start(sk); + return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c index 132f8e2d..4e850c44 100644 --- a/proto/rpki/tcp_transport.c +++ b/proto/rpki/tcp_transport.c @@ -31,6 +31,8 @@ rpki_tr_tcp_open(struct rpki_tr_sock *tr) if (sk_open(sk) != 0) return RPKI_TR_ERROR; + sk_start(sk); + return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index a1ac7587..26609764 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,6 +85,8 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; + sk->flags |= SKF_THREAD; + sk->loop = cache->p->p.loop; if (ipa_zero(sk->daddr) && sk->host) { @@ -119,6 +121,7 @@ rpki_tr_close(struct rpki_tr_sock *tr) if (tr->sk) { + sk_stop(tr->sk); rfree(tr->sk); tr->sk = NULL; } diff --git a/proto/static/static.c b/proto/static/static.c index 2789c1bb..d89ca8b0 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -52,14 +52,18 @@ static linpool *static_lp; static inline struct rte_src * static_get_source(struct static_proto *p, uint i) { return i ? rt_get_source(&p->p, i) : p->p.main_source; } +static inline void static_free_source(struct rte_src *src, uint i) +{ if (i) rt_unlock_source(src); } + static void static_announce_rte(struct static_proto *p, struct static_route *r) { + struct rte_src *src; rta *a = allocz(RTA_MAX_SIZE); - a->src = static_get_source(p, r->index); a->source = RTS_STATIC; a->scope = SCOPE_UNIVERSE; a->dest = r->dest; + a->pref = p->p.main_channel->preference; if (r->dest == RTD_UNICAST) { @@ -94,7 +98,7 @@ static_announce_rte(struct static_proto *p, struct static_route *r) if (r->dest == RTDX_RECURSIVE) { rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6; - rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls); + rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls, static_lp); } /* Already announced */ @@ -102,24 +106,16 @@ static_announce_rte(struct static_proto *p, struct static_route *r) return; /* We skip rta_lookup() here */ - rte *e = rte_get_temp(a); - e->pflags = 0; + src = static_get_source(p, r->index); + rte e0 = { .attrs = a, .src = src, .net = r->net, }, *e = &e0; + /* Evaluate the filter */ if (r->cmds) - { - /* Create a temporary table node */ - e->net = alloca(sizeof(net) + r->net->length); - memset(e->net, 0, sizeof(net) + r->net->length); - net_copy(e->net->n.addr, r->net); + f_eval_rte(r->cmds, e, static_lp); - /* Evaluate the filter */ - f_eval_rte(r->cmds, &e, static_lp); + rte_update(p->p.main_channel, r->net, e, src); + static_free_source(src, r->index); - /* Remove the temporary node */ - e->net = NULL; - } - - rte_update2(p->p.main_channel, r->net, e, a->src); r->state = SRS_CLEAN; if (r->cmds) @@ -131,7 +127,9 @@ withdraw: if (r->state == SRS_DOWN) return; - rte_update2(p->p.main_channel, r->net, NULL, a->src); + src = static_get_source(p, r->index); + rte_update(p->p.main_channel, r->net, NULL, src); + static_free_source(src, r->index); r->state = SRS_DOWN; } @@ -208,7 +206,7 @@ static_update_bfd(struct static_proto *p, struct static_route *r) // ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip; r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip, nb->iface, p->p.vrf, - static_bfd_notify, r, NULL); + static_bfd_notify, r, birdloop_event_list(p->p.loop), NULL); } if (!bfd_up && r->bfd_req) @@ -297,7 +295,11 @@ static void static_remove_rte(struct static_proto *p, struct static_route *r) { if (r->state) - rte_update2(p->p.main_channel, r->net, NULL, static_get_source(p, r->index)); + { + struct rte_src *src = static_get_source(p, r->index); + rte_update(p->p.main_channel, r->net, NULL, src); + static_free_source(src, r->index); + } static_reset_rte(p, r); } @@ -454,6 +456,8 @@ static_postconfig(struct proto_config *CF) static_index_routes(cf); } +static struct rte_owner_class static_rte_owner_class; + static struct proto * static_init(struct proto_config *CF) { @@ -465,8 +469,7 @@ static_init(struct proto_config *CF) P->neigh_notify = static_neigh_notify; P->reload_routes = static_reload_routes; - P->rte_better = static_rte_better; - P->rte_mergable = static_rte_mergable; + P->sources.class = &static_rte_owner_class; if (cf->igp_table_ip4) p->igp_table_ip4 = cf->igp_table_ip4->table; @@ -488,10 +491,12 @@ static_start(struct proto *P) static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); if (p->igp_table_ip4) - rt_lock_table(p->igp_table_ip4); + RT_LOCKED(p->igp_table_ip4, t) + rt_lock_table(t); if (p->igp_table_ip6) - rt_lock_table(p->igp_table_ip6); + RT_LOCKED(p->igp_table_ip6, t) + rt_lock_table(t); p->event = ev_new_init(p->p.pool, static_announce_marked, p); @@ -517,19 +522,15 @@ static_shutdown(struct proto *P) WALK_LIST(r, cf->routes) static_reset_rte(p, r); - return PS_DOWN; -} - -static void -static_cleanup(struct proto *P) -{ - struct static_proto *p = (void *) P; - if (p->igp_table_ip4) - rt_unlock_table(p->igp_table_ip4); + RT_LOCKED(p->igp_table_ip4, t) + rt_unlock_table(t); if (p->igp_table_ip6) - rt_unlock_table(p->igp_table_ip6); + RT_LOCKED(p->igp_table_ip6, t) + rt_unlock_table(t); + + return PS_DOWN; } static void @@ -721,9 +722,9 @@ static_get_route_info(rte *rte, byte *buf) { eattr *a = ea_find(rte->attrs->eattrs, EA_GEN_IGP_METRIC); if (a) - buf += bsprintf(buf, " (%d/%u)", rte->pref, a->u.data); + buf += bsprintf(buf, " (%d/%u)", rte->attrs->pref, a->u.data); else - buf += bsprintf(buf, " (%d)", rte->pref); + buf += bsprintf(buf, " (%d)", rte->attrs->pref); } static void @@ -773,6 +774,11 @@ static_show(struct proto *P) static_show_rt(r); } +static struct rte_owner_class static_rte_owner_class = { + .get_route_info = static_get_route_info, + .rte_better = static_rte_better, + .rte_mergable = static_rte_mergable, +}; struct protocol proto_static = { .name = "Static", @@ -787,8 +793,6 @@ struct protocol proto_static = { .dump = static_dump, .start = static_start, .shutdown = static_shutdown, - .cleanup = static_cleanup, .reconfigure = static_reconfigure, .copy_config = static_copy_config, - .get_route_info = static_get_route_info, }; |