summaryrefslogtreecommitdiff
path: root/proto/bgp
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-10-04 15:40:52 +0200
committerMaria Matejka <mq@ucw.cz>2022-10-04 15:40:52 +0200
commitbecca314e2546d6005a23398ce2d3012d4b396cb (patch)
treebdd2f55e81d42e6a1108593840c9273106676e09 /proto/bgp
parent61c127c021ac34eba25d3245ccf8f9eb9dd352f5 (diff)
parent0072d11f3431165240656edf6ade473554b8747e (diff)
Merge commit '0072d11f' into tmp-learn
Diffstat (limited to 'proto/bgp')
-rw-r--r--proto/bgp/attrs.c66
-rw-r--r--proto/bgp/bgp.c77
-rw-r--r--proto/bgp/bgp.h5
-rw-r--r--proto/bgp/packets.c22
4 files changed, 139 insertions, 31 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 084c9b63..a7b1a7ed 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1901,7 +1901,6 @@ bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req
pool *p = rp_new(c->c.proto->pool, "Export hook");
struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
hook->pool = p;
- hook->lp = lp_new_default(p);
hook->event = ev_new_init(p, bgp_out_table_feed, hook);
hook->feed_type = TFT_HASH;
@@ -1919,6 +1918,7 @@ bgp_setup_out_table(struct bgp_channel *c)
};
init_list(&c->prefix_exporter.hooks);
+ init_list(&c->prefix_exporter.pending);
c->c.out_table = &c->prefix_exporter;
}
@@ -1929,10 +1929,10 @@ bgp_setup_out_table(struct bgp_channel *c)
*/
int
-bgp_preexport(struct channel *c, rte *e)
+bgp_preexport(struct channel *C, rte *e)
{
struct proto *SRC = e->src->proto;
- struct bgp_proto *p = (struct bgp_proto *) (c->proto);
+ struct bgp_proto *p = (struct bgp_proto *) C->proto;
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
/* Reject our routes */
@@ -2546,27 +2546,57 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
return !old_suppressed;
}
-rte *
-bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
+void
+bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count)
{
- eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
- const struct adata *ad = ea ? ea->u.ptr : NULL;
- uint flags = ea ? ea->flags : BAF_PARTIAL;
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ struct rt_import_hook *irh = c->c.in_req.hook;
- if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
- return NULL;
+ /* Find our routes among others */
+ for (uint i=0; i<count; i++)
+ {
+ rte *r = feed[i];
+
+ if (
+ !rte_is_valid(r) || /* Not a valid route */
+ (r->sender != irh) || /* Not our route */
+ (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */
+ continue;
+
+ eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
+ const struct adata *ad = ea ? ea->u.ptr : NULL;
+ uint flags = ea ? ea->flags : BAF_PARTIAL;
- if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
- return r;
+ /* LLGR not allowed, withdraw the route */
+ if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
+ {
+ rte_import(&c->c.in_req, n, NULL, r->src);
+ continue;
+ }
+
+ /* Route already marked as LLGR, do nothing */
+ if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
+ continue;
- _Thread_local static rte e0;
- e0 = *r;
+ /* Store the tmp_linpool state to aggresively save memory */
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
- bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags,
- int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
- e0.pflags |= BGP_REF_STALE;
+ /* Mark the route as LLGR */
+ rte e0 = *r;
+ bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE));
+ e0.pflags &= ~BGP_REF_NOT_STALE;
+ e0.pflags |= BGP_REF_STALE;
- return &e0;
+ /* We need to update the route but keep it stale. */
+ ASSERT_DIE(irh->stale_set == irh->stale_valid + 1);
+ irh->stale_set--;
+ rte_import(&c->c.in_req, n, &e0, r->src);
+ irh->stale_set++;
+
+ /* Restore the memory state */
+ lp_restore(tmp_linpool, &tmpp);
+ }
}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 6ffe8824..33849b0b 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -139,6 +139,9 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
static void bgp_listen_sock_err(sock *sk UNUSED, int err);
+static void bgp_graceful_restart_feed(struct bgp_channel *c);
+
+
/**
* bgp_open - open a BGP instance
* @p: BGP instance
@@ -373,6 +376,7 @@ bgp_close_conn(struct bgp_conn *conn)
conn->keepalive_timer = NULL;
rfree(conn->hold_timer);
conn->hold_timer = NULL;
+
rfree(conn->tx_ev);
conn->tx_ev = NULL;
rfree(conn->sk);
@@ -511,6 +515,7 @@ void
bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
+ p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
ev_schedule(p->event);
@@ -760,25 +765,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
{
case BGP_GRS_NONE:
c->gr_active = BGP_GRS_ACTIVE;
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_ACTIVE:
- rt_refresh_end(c->c.table, &c->c.in_req);
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_LLGR:
- rt_refresh_begin(c->c.table, &c->c.in_req);
- rt_modify_stale(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
+ bgp_graceful_restart_feed(c);
break;
}
}
else
{
/* Just flush the routes */
- rt_refresh_begin(c->c.table, &c->c.in_req);
- rt_refresh_end(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/* Reset bucket and prefix tables */
@@ -796,6 +801,53 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
}
+static void
+bgp_graceful_restart_feed_done(struct rt_export_request *req)
+{
+ req->hook = NULL;
+}
+
+static void
+bgp_graceful_restart_feed_dump_req(struct rt_export_request *req)
+{
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req);
+}
+
+static void
+bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state)
+{
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ struct bgp_proto *p = (void *) c->c.proto;
+ BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state));
+
+ if (state == TES_READY)
+ rt_stop_export(req, bgp_graceful_restart_feed_done);
+}
+
+static void
+bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED)
+{ /* Nothing to do */ }
+
+static void
+bgp_graceful_restart_feed(struct bgp_channel *c)
+{
+ c->stale_feed = (struct rt_export_request) {
+ .name = "BGP-GR",
+ .list = &global_work_list,
+ .trace_routes = c->c.debug | c->c.proto->debug,
+ .dump_req = bgp_graceful_restart_feed_dump_req,
+ .log_state_change = bgp_graceful_restart_feed_log_state_change,
+ .export_bulk = bgp_rte_modify_stale,
+ .export_one = bgp_graceful_restart_drop_export,
+ };
+
+ rt_request_export(&c->c.table->exporter, &c->stale_feed);
+}
+
+
+
+
/**
* bgp_graceful_restart_done - finish active BGP graceful restart
* @c: BGP channel
@@ -819,7 +871,7 @@ bgp_graceful_restart_done(struct bgp_channel *c)
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
tm_stop(c->stale_timer);
- rt_refresh_end(c->c.table, &c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/**
@@ -861,7 +913,7 @@ bgp_graceful_restart_timeout(timer *t)
/* Channel is in GR, and supports LLGR -> start LLGR */
c->gr_active = BGP_GRS_LLGR;
tm_start(c->stale_timer, c->stale_time S);
- rt_modify_stale(c->c.table, &c->c.in_req);
+ bgp_graceful_restart_feed(c);
}
}
else
@@ -899,7 +951,7 @@ bgp_refresh_begin(struct bgp_channel *c)
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
c->load_state = BFS_REFRESHING;
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
}
/**
@@ -920,7 +972,7 @@ bgp_refresh_end(struct bgp_channel *c)
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
c->load_state = BFS_NONE;
- rt_refresh_end(c->c.table, &c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
@@ -1526,6 +1578,8 @@ bgp_start(struct proto *P)
p->last_rx_update = 0;
p->event = ev_new_init(p->p.pool, bgp_decision, p);
+ p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p);
+
p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
@@ -1672,7 +1726,6 @@ bgp_init(struct proto_config *CF)
P->rte_better = bgp_rte_better;
P->rte_mergable = bgp_rte_mergable;
P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
- P->rte_modify = bgp_rte_modify_stale;
P->rte_igp_metric = bgp_rte_igp_metric;
p->cf = cf;
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 003893e0..469f0cb9 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -319,6 +319,7 @@ struct bgp_proto {
struct bgp_socket *sock; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
+ event *uncork_ev; /* Uncork event in case of congestion */
struct bgp_stats stats; /* BGP statistics */
btime last_established; /* Last time of enter/leave of established state */
btime last_rx_update; /* Last time of RX update */
@@ -371,6 +372,7 @@ struct bgp_channel {
timer *stale_timer; /* Long-lived stale timer for LLGR */
u32 stale_time; /* Stored LLGR stale time from last session */
+ struct rt_export_request stale_feed; /* Feeder request for stale route modification */
u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
@@ -576,7 +578,7 @@ void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bu
int bgp_rte_better(struct rte *, struct rte *);
int bgp_rte_mergable(rte *pri, rte *sec);
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
-struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool);
+void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count);
u32 bgp_rte_igp_metric(const rte *);
void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old);
int bgp_preexport(struct channel *, struct rte *);
@@ -609,6 +611,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
void bgp_kick_tx(void *vconn);
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, uint size);
+void bgp_uncork(void *vp);
const char * bgp_error_dsc(unsigned code, unsigned subcode);
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 4d4ae3eb..de976588 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -3175,6 +3175,21 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
}
}
+void
+bgp_uncork(void *vp)
+{
+ struct bgp_proto *p = vp;
+
+ if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook)
+ {
+ struct birdsock *sk = p->conn->sk;
+ ASSERT_DIE(sk->rpos > sk->rbuf);
+ sk->rx_hook = bgp_rx;
+ bgp_rx(sk, sk->rpos - sk->rbuf);
+ BGP_TRACE(D_PACKETS, "Uncorked");
+ }
+}
+
/**
* bgp_rx - handle received data
* @sk: socket
@@ -3189,6 +3204,7 @@ int
bgp_rx(sock *sk, uint size)
{
struct bgp_conn *conn = sk->data;
+ struct bgp_proto *p = conn->bgp;
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
uint i, len;
@@ -3198,6 +3214,12 @@ bgp_rx(sock *sk, uint size)
{
if ((conn->state == BS_CLOSE) || (conn->sk != sk))
return 0;
+ if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev))
+ {
+ sk->rx_hook = NULL;
+ BGP_TRACE(D_PACKETS, "Corked");
+ return 0;
+ }
for(i=0; i<16; i++)
if (pkt_start[i] != 0xff)
{