summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Moskyto Matejka <mq@ucw.cz>2016-04-08 12:09:31 +0200
committerJan Moskyto Matejka <mq@ucw.cz>2016-04-08 12:28:33 +0200
commit7a7ac656829223713f9e6bcef63d2b5a5efce7d2 (patch)
tree153afd214a815124b37fcd88c66134d716a390cf
parent4bdf1881dc6230b742d7efcaad8eeac4ed25f445 (diff)
parent06edbb67ed807811654e7fd8f0f9b83766430216 (diff)
Merge branch 'master' into int-new-channels
-rw-r--r--doc/bird.sgml23
-rw-r--r--filter/config.Y3
-rw-r--r--filter/filter.c8
-rw-r--r--lib/lists.c29
-rw-r--r--lib/lists.h18
-rw-r--r--lib/resource.c12
-rw-r--r--lib/socket.h1
-rw-r--r--nest/a-path.c31
-rw-r--r--nest/attrs.h1
-rw-r--r--nest/config.Y2
-rw-r--r--nest/proto.c3
-rw-r--r--nest/route.h2
-rw-r--r--proto/bfd/bfd.c2
-rw-r--r--proto/bfd/io.c4
-rw-r--r--proto/bgp/bgp.c19
-rw-r--r--proto/bgp/packets.c28
-rw-r--r--proto/ospf/iface.c4
-rw-r--r--proto/ospf/neighbor.c3
-rw-r--r--proto/ospf/topology.c2
-rw-r--r--proto/rip/config.Y2
-rw-r--r--sysdep/bsd/krt-sock.c5
-rw-r--r--sysdep/linux/netlink.c3
-rw-r--r--sysdep/unix/io.c126
-rw-r--r--sysdep/unix/krt.c46
-rw-r--r--sysdep/unix/log.c11
-rw-r--r--sysdep/unix/main.c2
26 files changed, 227 insertions, 163 deletions
diff --git a/doc/bird.sgml b/doc/bird.sgml
index 86df0456..5e5aeee4 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -318,8 +318,9 @@ protocol rip {
<p><descrip>
<tag>include "<m/filename/"</tag>
This statement causes inclusion of a new file. <m/Filename/ could also
- be a wildcard. The maximal depth is 8. Note that this statement could be
- used anywhere in the config file, not just as a top-level option.
+ be a wildcard, in that case matching files are included in alphabetic
+ order. The maximal depth is 8. Note that this statement could be used
+ anywhere in the config file, not just as a top-level option.
<tag><label id="dsc-log">log "<m/filename/"|syslog [name <m/name/]|stderr all|{ <m/list of classes/ }</tag>
Set logging of messages having the given class (either <cf/all/ or
@@ -1119,9 +1120,12 @@ foot).
<cf><m/P/.last</cf> returns the last ASN (the source ASN) in path <m/P/.
+ <cf><m/P/.last_nonaggregated</cf> returns the last ASN in the non-aggregated part of the path <m/P/.
+
Both <cf/first/ and <cf/last/ return zero if there is no appropriate
ASN, for example if the path contains an AS set element as the first (or
- the last) part.
+ the last) part. If the path ends with an AS set, <cf/last_nonaggregated/
+ may be used to get last ASN before any AS set.
<cf><m/P/.len</cf> returns the length of path <m/P/.
@@ -1859,6 +1863,11 @@ using the following configuration parameters:
in neighbor's implementation of 4B AS extension. Even when disabled
(off), BIRD behaves internally as AS4-aware BGP router. Default: on.
+ <tag>enable extended messages <m/switch/</tag>
+ The BGP protocol uses maximum message length of 4096 bytes. This option
+ provides an extension to allow extended messages with length up
+ to 65535 bytes. Default: off.
+
<tag>capabilities <m/switch/</tag>
Use capability advertisement to advertise optional capabilities. This is
standard behavior for newer BGP implementations, but there might be some
@@ -2054,7 +2063,7 @@ protocol bgp {
multihop; # ... which is connected indirectly
export filter { # We use non-trivial export rules
if source = RTS_STATIC then { # Export only static routes
- # Assign our community
+ # Assign our community
bgp_community.add((65000,64501));
# Artificially increase path length
# by advertising local AS number twice
@@ -2263,7 +2272,7 @@ these attributes:
<tag>ip <cf/krt_prefsrc/</tag> (Linux)
The preferred source address. Used in source address selection for
- outgoing packets. Has to be one of the IP addresses of the router.
+ outgoing packets. Has to be one of the IP addresses of the router.
<tag>int <cf/krt_realm/</tag> (Linux)
The realm of the route. Can be used for traffic classification.
@@ -2608,8 +2617,8 @@ protocol ospf &lt;name&gt; {
updates. Default value is 5.
<tag>priority <M>num</M></tag>
- On every multiple access network (e.g., the Ethernet) Designed Router
- and Backup Designed router are elected. These routers have some special
+ On every multiple access network (e.g., the Ethernet) Designated Router
+ and Backup Designated router are elected. These routers have some special
functions in the flooding process. Higher priority increases preferences
in this election. Routers with priority 0 are not eligible. Default
value is 1.
diff --git a/filter/config.Y b/filter/config.Y
index 3bb00c13..3e70a63e 100644
--- a/filter/config.Y
+++ b/filter/config.Y
@@ -282,7 +282,7 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
LEN,
DEFINED,
ADD, DELETE, CONTAINS, RESET,
- PREPEND, FIRST, LAST, MATCH,
+ PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MATCH,
EMPTY,
FILTER, WHERE, EVAL)
@@ -743,6 +743,7 @@ term:
| term '.' MASK '(' term ')' { $$ = f_new_inst(); $$->code = P('i','M'); $$->a1.p = $1; $$->a2.p = $5; }
| term '.' FIRST { $$ = f_new_inst(); $$->code = P('a','f'); $$->a1.p = $1; }
| term '.' LAST { $$ = f_new_inst(); $$->code = P('a','l'); $$->a1.p = $1; }
+ | term '.' LAST_NONAGGREGATED { $$ = f_new_inst(); $$->code = P('a','L'); $$->a1.p = $1; }
/* Communities */
/* This causes one shift/reduce conflict
diff --git a/filter/filter.c b/filter/filter.c
index 6ab0cc93..cc1bb3dc 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -1056,6 +1056,14 @@ interpret(struct f_inst *what)
res.type = T_INT;
res.val.i = as;
break;
+ case P('a','L'): /* Get last ASN from non-aggregated part of AS PATH */
+ ONEARG;
+ if (v1.type != T_PATH)
+ runtime( "AS path expected" );
+
+ res.type = T_INT;
+ res.val.i = as_path_get_last_nonaggregated(v1.val.ad);
+ break;
case 'r':
ONEARG;
res = v1;
diff --git a/lib/lists.c b/lib/lists.c
index d323a4b6..12ef3cc6 100644
--- a/lib/lists.c
+++ b/lib/lists.c
@@ -41,7 +41,7 @@ add_tail(list *l, node *n)
{
node *z = l->tail;
- n->next = (node *) &l->null;
+ n->next = &l->tail_node;
n->prev = z;
z->next = n;
l->tail = n;
@@ -60,7 +60,7 @@ add_head(list *l, node *n)
node *z = l->head;
n->next = z;
- n->prev = (node *) &l->head;
+ n->prev = &l->head_node;
z->prev = n;
l->head = n;
}
@@ -88,7 +88,7 @@ insert_node(node *n, node *after)
* rem_node - remove a node from a list
* @n: node to be removed
*
- * Removes a node @n from the list it's linked in.
+ * Removes a node @n from the list it's linked in. Afterwards, node @n is cleared.
*/
LIST_INLINE void
rem_node(node *n)
@@ -98,23 +98,6 @@ rem_node(node *n)
z->next = x;
x->prev = z;
-}
-
-/**
- * rem2_node - remove a node from a list, with cleanup
- * @n: node to be removed
- *
- * Removes a node @n from the list it's linked in and resets its pointers to NULL.
- * Useful if you want to distinguish between linked and unlinked nodes.
- */
-LIST_INLINE void
-rem2_node(node *n)
-{
- node *z = n->prev;
- node *x = n->next;
-
- z->next = x;
- x->prev = z;
n->next = NULL;
n->prev = NULL;
}
@@ -150,9 +133,9 @@ replace_node(node *old, node *new)
LIST_INLINE void
init_list(list *l)
{
- l->head = (node *) &l->null;
+ l->head = &l->tail_node;
l->null = NULL;
- l->tail = (node *) &l->head;
+ l->tail = &l->head_node;
}
/**
@@ -172,6 +155,6 @@ add_tail_list(list *to, list *l)
p->next = q;
q->prev = p;
q = l->tail;
- q->next = (node *) &to->null;
+ q->next = &to->tail_node;
to->tail = q;
}
diff --git a/lib/lists.h b/lib/lists.h
index d75f033d..46b33446 100644
--- a/lib/lists.h
+++ b/lib/lists.h
@@ -26,10 +26,23 @@ typedef struct node {
struct node *next, *prev;
} node;
-typedef struct list { /* In fact two overlayed nodes */
- struct node *head, *null, *tail;
+typedef union list { /* In fact two overlayed nodes */
+ struct { /* Head node */
+ struct node head_node;
+ void *head_padding;
+ };
+ struct { /* Tail node */
+ void *tail_padding;
+ struct node tail_node;
+ };
+ struct { /* Split to separate pointers */
+ struct node *head;
+ struct node *null;
+ struct node *tail;
+ };
} list;
+
#define NODE (node *)
#define HEAD(list) ((void *)((list).head))
#define TAIL(list) ((void *)((list).tail))
@@ -64,7 +77,6 @@ typedef struct list { /* In fact two overlayed nodes */
void add_tail(list *, node *);
void add_head(list *, node *);
void rem_node(node *);
-void rem2_node(node *);
void add_tail_list(list *, list *);
void init_list(list *);
void insert_node(node *, node *);
diff --git a/lib/resource.c b/lib/resource.c
index 64f9a39c..68718dfb 100644
--- a/lib/resource.c
+++ b/lib/resource.c
@@ -163,6 +163,7 @@ rfree(void *res)
if (r->n.next)
rem_node(&r->n);
r->class->free(r);
+ r->class = NULL;
xfree(r);
}
@@ -383,16 +384,9 @@ mb_allocz(pool *p, unsigned size)
void *
mb_realloc(void *m, unsigned size)
{
- struct mblock *ob = NULL;
-
- if (m)
- {
- ob = SKIP_BACK(struct mblock, data, m);
- if (ob->r.n.next)
- rem_node(&ob->r.n);
- }
+ struct mblock *b = SKIP_BACK(struct mblock, data, m);
- struct mblock *b = xrealloc(ob, sizeof(struct mblock) + size);
+ b = xrealloc(b, sizeof(struct mblock) + size);
replace_node(&b->r.n, &b->r.n);
b->size = size;
return b->data;
diff --git a/lib/socket.h b/lib/socket.h
index 1b03098d..91ae9db3 100644
--- a/lib/socket.h
+++ b/lib/socket.h
@@ -27,6 +27,7 @@ typedef struct birdsock {
struct iface *iface; /* Interface; specify this for broad/multicast sockets */
byte *rbuf, *rpos; /* NULL=allocate automatically */
+ uint fast_rx; /* RX has higher priority in event loop */
uint rbsize;
int (*rx_hook)(struct birdsock *, int size); /* NULL=receiving turned off, returns 1 to clear rx buffer */
diff --git a/nest/a-path.c b/nest/a-path.c
index c9c5aefb..32e2d27e 100644
--- a/nest/a-path.c
+++ b/nest/a-path.c
@@ -220,7 +220,7 @@ as_path_get_last(struct adata *path, u32 *orig_as)
p += BS * len;
}
break;
- default: bug("as_path_get_first: Invalid path segment");
+ default: bug("Invalid path segment");
}
}
@@ -229,6 +229,35 @@ as_path_get_last(struct adata *path, u32 *orig_as)
return found;
}
+u32
+as_path_get_last_nonaggregated(struct adata *path)
+{
+ u8 *p = path->data;
+ u8 *q = p+path->length;
+ u32 res = 0;
+ int len;
+
+ while (p<q)
+ {
+ switch (*p++)
+ {
+ case AS_PATH_SET:
+ return res;
+
+ case AS_PATH_SEQUENCE:
+ if (len = *p++)
+ res = get_as(p + BS * (len - 1));
+ p += BS * len;
+ break;
+
+ default: bug("Invalid path segment");
+ }
+ }
+
+ return res;
+}
+
+
int
as_path_get_first(struct adata *path, u32 *last_as)
{
diff --git a/nest/attrs.h b/nest/attrs.h
index 1d005a6a..0171c6a8 100644
--- a/nest/attrs.h
+++ b/nest/attrs.h
@@ -35,6 +35,7 @@ int as_path_getlen(struct adata *path);
int as_path_getlen_int(struct adata *path, int bs);
int as_path_get_first(struct adata *path, u32 *orig_as);
int as_path_get_last(struct adata *path, u32 *last_as);
+u32 as_path_get_last_nonaggregated(struct adata *path);
int as_path_contains(struct adata *path, u32 as, int min);
int as_path_match_set(struct adata *path, struct f_tree *set);
struct adata *as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos);
diff --git a/nest/config.Y b/nest/config.Y
index 6bb686c3..94a67670 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -112,7 +112,7 @@ idval:
else if (($1->class == (SYM_CONSTANT | T_IP)) && ipa_is_ip4(SYM_VAL($1).ip))
$$ = ipa_to_u32(SYM_VAL($1).ip);
else
- cf_error("Number of IPv4 address constant expected");
+ cf_error("Number or IPv4 address constant expected");
}
;
diff --git a/nest/proto.c b/nest/proto.c
index f712fe5f..df4952b7 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -264,6 +264,7 @@ channel_stop_export(struct channel *c)
rt_feed_channel_abort(c);
c->export_state = ES_DOWN;
+ c->stats.exp_routes = 0;
}
static void
@@ -299,7 +300,7 @@ channel_do_flush(struct channel *c)
static void
channel_do_down(struct channel *c)
{
- rem2_node(&c->table_node);
+ rem_node(&c->table_node);
rt_unlock_table(c->table);
c->proto->active_channels--;
diff --git a/nest/route.h b/nest/route.h
index 22fca331..11b08ce5 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -232,8 +232,8 @@ typedef struct rte {
struct { /* Routes generated by krt sync (both temporary and inherited ones) */
s8 src; /* Alleged route source (see krt.h) */
u8 proto; /* Kernel source protocol ID */
- u8 type; /* Kernel route type */
u8 seen; /* Seen during last scan */
+ u8 best; /* Best route in network, propagated to core */
u32 metric; /* Kernel metric */
} krt;
} u;
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
index 62752e21..f966161c 100644
--- a/proto/bfd/bfd.c
+++ b/proto/bfd/bfd.c
@@ -872,7 +872,7 @@ bfd_notify_hook(sock *sk, int len)
WALK_LIST_FIRST(s, tmp_list)
{
bfd_lock_sessions(p);
- rem2_node(&s->n);
+ rem_node(&s->n);
state = s->loc_state;
diag = s->loc_diag;
bfd_unlock_sessions(p);
diff --git a/proto/bfd/io.c b/proto/bfd/io.c
index fb150040..79ed9af7 100644
--- a/proto/bfd/io.c
+++ b/proto/bfd/io.c
@@ -576,7 +576,7 @@ sockets_close_fds(struct birdloop *loop)
loop->close_scheduled = 0;
}
-int sk_read(sock *s);
+int sk_read(sock *s, int revents);
int sk_write(sock *s);
static void
@@ -605,7 +605,7 @@ sockets_fire(struct birdloop *loop)
if (pfd->revents & POLLIN)
while (e && *psk && (*psk)->rx_hook)
- e = sk_read(*psk);
+ e = sk_read(*psk, 0);
e = 1;
if (pfd->revents & POLLOUT)
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index cb5b108c..61b5cba2 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -374,6 +374,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
if (ipa_zero(p->source_addr))
p->source_addr = conn->sk->saddr;
+ conn->sk->fast_rx = 0;
+
p->conn = conn;
p->last_error_class = 0;
p->last_error_code = 0;
@@ -666,6 +668,10 @@ bgp_keepalive_timeout(timer *t)
DBG("BGP: Keepalive timer\n");
bgp_schedule_packet(conn, PKT_KEEPALIVE);
+
+ /* Kick TX a bit faster */
+ if (ev_active(conn->tx_ev))
+ ev_run(conn->tx_ev);
}
static void
@@ -696,6 +702,7 @@ bgp_setup_sk(struct bgp_conn *conn, sock *s)
{
s->data = conn;
s->err_hook = bgp_sock_err;
+ s->fast_rx = 1;
conn->sk = s;
}
@@ -813,7 +820,13 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED)
return 0;
}
- /* We are in proper state and there is no other incoming connection */
+ /*
+ * BIRD should keep multiple incoming connections in OpenSent state (for
+ * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
+ * connections are rejected istead. The exception is the case where an
+ * incoming connection triggers a graceful restart.
+ */
+
acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
@@ -823,6 +836,10 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED)
bgp_handle_graceful_restart(p);
bgp_conn_enter_idle_state(p->conn);
acc = 1;
+
+ /* There might be separate incoming connection in OpenSent state */
+ if (p->incoming_conn.state > BS_ACTIVE)
+ bgp_close_conn(&p->incoming_conn);
}
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index ed99f623..72ca3728 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -164,6 +164,14 @@ bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
}
static byte *
+bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
+{
+ *buf++ = 6; /* Capability 6: Support for extended messages */
+ *buf++ = 0; /* Capability data length */
+ return buf;
+}
+
+static byte *
bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
{
*buf++ = 64; /* Capability 64: Support for graceful restart */
@@ -223,14 +231,6 @@ bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
return buf;
}
-static byte *
-bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
-{
- *buf++ = 230; /* Capability TBD: Support for extended messages */
- *buf++ = 0; /* Capability data length */
- return buf;
-}
-
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
@@ -827,6 +827,12 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
conn->peer_refresh_support = 1;
break;
+ case 6: /* Extended message length capability, draft */
+ if (cl != 0)
+ goto err;
+ conn->peer_ext_messages_support = 1;
+ break;
+
case 64: /* Graceful restart capability, RFC 4724 */
if (cl % 4 != 2)
goto err;
@@ -867,12 +873,6 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
conn->peer_enhanced_refresh_support = 1;
break;
- case 230: /* Extended message length capability, draft, cap number TBD */
- if (cl != 0)
- goto err;
- conn->peer_ext_messages_support = 1;
- break;
-
/* We can safely ignore all other capabilities */
}
len -= 2 + cl;
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 6001ac26..4548f6da 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -595,10 +595,10 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
if (ospf_is_v2(p) && (ifa->type == OSPF_IT_NBMA) && (addr->flags & IA_PEER))
ifa->type = OSPF_IT_PTMP;
- if ((ifa->type == OSPF_IT_BCAST) && !(iface->flags & if_multi_flag))
+ if ((ifa->type == OSPF_IT_BCAST) && !(iface->flags & if_multi_flag) && !ifa->stub)
ifa->type = OSPF_IT_NBMA;
- if ((ifa->type == OSPF_IT_PTP) && !(iface->flags & if_multi_flag))
+ if ((ifa->type == OSPF_IT_PTP) && !(iface->flags & if_multi_flag) && !ifa->stub)
ifa->type = OSPF_IT_PTMP;
if (ifa->type != old_type)
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index b30b0438..b68ba6f4 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -108,6 +108,7 @@ ospf_neigh_down(struct ospf_neighbor *n)
{
struct ospf_iface *ifa = n->ifa;
struct ospf_proto *p = ifa->oa->po;
+ u32 rid = n->rid;
if ((ifa->type == OSPF_IT_NBMA) || (ifa->type == OSPF_IT_PTMP))
{
@@ -121,7 +122,7 @@ ospf_neigh_down(struct ospf_neighbor *n)
rem_node(NODE n);
rfree(n->pool);
- OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", n->rid, ifa->ifname);
+ OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", rid, ifa->ifname);
}
/**
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index 9d0a93c7..86e39d75 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -278,7 +278,7 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa)
if (!SNODE_VALID(en))
s_add_tail(&p->lsal, SNODE en);
- if (en->lsa_body == NULL)
+ if (!en->nf || !en->lsa_body)
en->nf = lsa->nf;
if (en->nf != lsa->nf)
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index 79e57741..3c8cd0f2 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -137,7 +137,7 @@ rip_iface_item:
| TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3; if ($3<=0) cf_error("Timeout time must be positive"); }
| GARBAGE TIME expr { RIP_IFACE->garbage_time = $3; if ($3<=0) cf_error("Garbage time must be positive"); }
| ECMP WEIGHT expr { RIP_IFACE->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); }
- | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); }
+ | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX length must be in range 256-65535"); }
| TX LENGTH expr { RIP_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); }
| TX tos { RIP_IFACE->tx_tos = $2; }
| TX PRIORITY expr { RIP_IFACE->tx_priority = $3; }
diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c
index 5f2f1309..9f84b3f5 100644
--- a/sysdep/bsd/krt-sock.c
+++ b/sysdep/bsd/krt-sock.c
@@ -528,9 +528,8 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
e->net = net;
e->u.krt.src = src;
e->u.krt.proto = src2;
-
- /* These are probably too Linux-specific */
- e->u.krt.type = 0;
+ e->u.krt.seen = 0;
+ e->u.krt.best = 0;
e->u.krt.metric = 0;
if (scan)
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index 6240c177..c398a7f6 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -1204,7 +1204,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
e->net = net;
e->u.krt.src = src;
e->u.krt.proto = i->rtm_protocol;
- e->u.krt.type = i->rtm_type;
+ e->u.krt.seen = 0;
+ e->u.krt.best = 0;
e->u.krt.metric = 0;
if (a[RTA_PRIORITY])
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index cbfb47d5..37e26c9b 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -19,6 +19,7 @@
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
+#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
@@ -41,12 +42,12 @@
#include "lib/sysio.h"
/* Maximum number of calls of tx handler for one socket in one
- * select iteration. Should be small enough to not monopolize CPU by
+ * poll iteration. Should be small enough to not monopolize CPU by
* one protocol instance.
*/
#define MAX_STEPS 4
-/* Maximum number of calls of rx handler for all sockets in one select
+/* Maximum number of calls of rx handler for all sockets in one poll
iteration. RX callbacks are often much more costly so we limit
this to gen small latencies */
#define MAX_RX_STEPS 4
@@ -1023,7 +1024,6 @@ sk_log_error(sock *s, const char *p)
static list sock_list;
static struct birdsock *current_sock;
static struct birdsock *stored_sock;
-static int sock_recalc_fdsets_p;
static inline sock *
sk_next(sock *s)
@@ -1079,7 +1079,6 @@ sk_free(resource *r)
if (s == stored_sock)
stored_sock = sk_next(s);
rem_node(&s->n);
- sock_recalc_fdsets_p = 1;
}
}
@@ -1277,7 +1276,6 @@ static void
sk_insert(sock *s)
{
add_tail(&sock_list, &s->n);
- sock_recalc_fdsets_p = 1;
}
static void
@@ -1329,18 +1327,6 @@ sk_passive_connected(sock *s, int type)
log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
}
- if (fd >= FD_SETSIZE)
- {
- /* FIXME: Call err_hook instead ? */
- log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
- t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
- t->dport, "rejected due to FD_SETSIZE limit");
- close(fd);
- t->fd = -1;
- rfree(t);
- return 1;
- }
-
if (sk_setup(t) < 0)
{
/* FIXME: Call err_hook instead ? */
@@ -1416,9 +1402,6 @@ sk_open(sock *s)
if (fd < 0)
ERR("socket");
- if (fd >= FD_SETSIZE)
- ERR2("FD_SETSIZE limit reached");
-
s->fd = fd;
if (sk_setup(s) < 0)
@@ -1696,19 +1679,12 @@ sk_maybe_write(sock *s)
int
sk_rx_ready(sock *s)
{
- fd_set rd, wr;
- struct timeval timo;
int rv;
-
- FD_ZERO(&rd);
- FD_ZERO(&wr);
- FD_SET(s->fd, &rd);
-
- timo.tv_sec = 0;
- timo.tv_usec = 0;
+ struct pollfd pfd = { .fd = s->fd };
+ pfd.events |= POLLIN;
redo:
- rv = select(s->fd+1, &rd, &wr, NULL, &timo);
+ rv = poll(&pfd, 1, 0);
if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
goto redo;
@@ -1777,7 +1753,7 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa,
/* sk_read() and sk_write() are called from BFD's event loop */
int
-sk_read(sock *s)
+sk_read(sock *s, int revents)
{
switch (s->type)
{
@@ -1796,6 +1772,11 @@ sk_read(sock *s)
{
if (errno != EINTR && errno != EAGAIN)
s->err_hook(s, errno);
+ else if (errno == EAGAIN && !(revents & POLLIN))
+ {
+ log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
+ s->err_hook(s, 0);
+ }
}
else if (!c)
s->err_hook(s, 0);
@@ -2068,62 +2049,63 @@ static int short_loops = 0;
void
io_loop(void)
{
- fd_set rd, wr;
- struct timeval timo;
+ int poll_tout;
time_t tout;
- int hi, events;
+ int nfds, events, pout;
sock *s;
node *n;
+ int fdmax = 256;
+ struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
watchdog_start1();
- sock_recalc_fdsets_p = 1;
for(;;)
{
events = ev_run_list(&global_event_list);
+ timers:
update_times();
tout = tm_first_shot();
if (tout <= now)
{
tm_shot();
- continue;
+ goto timers;
}
- timo.tv_sec = events ? 0 : MIN(tout - now, 3);
- timo.tv_usec = 0;
+ poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
io_close_event();
- if (sock_recalc_fdsets_p)
- {
- sock_recalc_fdsets_p = 0;
- FD_ZERO(&rd);
- FD_ZERO(&wr);
- }
-
- hi = 0;
+ nfds = 0;
WALK_LIST(n, sock_list)
{
+ pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
s = SKIP_BACK(sock, n, n);
if (s->rx_hook)
{
- FD_SET(s->fd, &rd);
- if (s->fd > hi)
- hi = s->fd;
+ pfd[nfds].fd = s->fd;
+ pfd[nfds].events |= POLLIN;
}
- else
- FD_CLR(s->fd, &rd);
if (s->tx_hook && s->ttx != s->tpos)
{
- FD_SET(s->fd, &wr);
- if (s->fd > hi)
- hi = s->fd;
+ pfd[nfds].fd = s->fd;
+ pfd[nfds].events |= POLLOUT;
+ }
+ if (pfd[nfds].fd != -1)
+ {
+ s->index = nfds;
+ nfds++;
}
else
- FD_CLR(s->fd, &wr);
+ s->index = -1;
+
+ if (nfds >= fdmax)
+ {
+ fdmax *= 2;
+ pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
+ }
}
/*
* Yes, this is racy. But even if the signal comes before this test
- * and entering select(), it gets caught on the next timer tick.
+ * and entering poll(), it gets caught on the next timer tick.
*/
if (async_config_flag)
@@ -2148,18 +2130,18 @@ io_loop(void)
continue;
}
- /* And finally enter select() to find active sockets */
+ /* And finally enter poll() to find active sockets */
watchdog_stop();
- hi = select(hi+1, &rd, &wr, NULL, &timo);
+ pout = poll(pfd, nfds, poll_tout);
watchdog_start();
- if (hi < 0)
+ if (pout < 0)
{
if (errno == EINTR || errno == EAGAIN)
continue;
- die("select: %m");
+ die("poll: %m");
}
- if (hi)
+ if (pout)
{
/* guaranteed to be non-empty */
current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
@@ -2167,23 +2149,29 @@ io_loop(void)
while (current_sock)
{
sock *s = current_sock;
+ if (s->index == -1)
+ {
+ current_sock = sk_next(s);
+ goto next;
+ }
+
int e;
int steps;
steps = MAX_STEPS;
- if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
+ if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
do
{
steps--;
io_log_event(s->rx_hook, s->data);
- e = sk_read(s);
+ e = sk_read(s, pfd[s->index].revents);
if (s != current_sock)
goto next;
}
while (e && s->rx_hook && steps);
steps = MAX_STEPS;
- if (FD_ISSET(s->fd, &wr))
+ if (pfd[s->index].revents & POLLOUT)
do
{
steps--;
@@ -2210,13 +2198,17 @@ io_loop(void)
while (current_sock && count < MAX_RX_STEPS)
{
sock *s = current_sock;
- int e UNUSED;
+ if (s->index == -1)
+ {
+ current_sock = sk_next(s);
+ goto next2;
+ }
- if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
+ if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
{
count++;
io_log_event(s->rx_hook, s->data);
- e = sk_read(s);
+ sk_read(s, pfd[s->index].revents);
if (s != current_sock)
goto next2;
}
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 6b3b4eee..b0a96613 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -412,46 +412,58 @@ again:
{
rte *e, **ee, *best, **pbest, *old_best;
- old_best = n->routes;
+ /*
+ * Note that old_best may be NULL even if there was an old best route in
+ * the previous step, because it might be replaced in krt_learn_scan().
+ * But in that case there is a new valid best route.
+ */
+
+ old_best = NULL;
best = NULL;
pbest = NULL;
ee = &n->routes;
while (e = *ee)
{
+ if (e->u.krt.best)
+ old_best = e;
+
if (!e->u.krt.seen)
{
*ee = e->next;
rte_free(e);
continue;
}
+
if (!best || best->u.krt.metric > e->u.krt.metric)
{
best = e;
pbest = ee;
}
+
e->u.krt.seen = 0;
+ e->u.krt.best = 0;
ee = &e->next;
}
if (!n->routes)
{
DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen);
if (old_best)
- {
- krt_learn_announce_delete(p, n);
- n->n.flags &= ~KRF_INSTALLED;
- }
+ krt_learn_announce_delete(p, n);
+
FIB_ITERATE_PUT(&fit);
fib_delete(fib, n);
goto again;
}
+
+ best->u.krt.best = 1;
*pbest = best->next;
best->next = n->routes;
n->routes = best;
- if (best != old_best || !(n->n.flags & KRF_INSTALLED) || p->reload)
+
+ if ((best != old_best) || p->reload)
{
DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
krt_learn_announce_update(p, best);
- n->n.flags |= KRF_INSTALLED;
}
else
DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric);
@@ -510,31 +522,31 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
best = n->routes;
bestp = &n->routes;
for(gg=&n->routes; g=*gg; gg=&g->next)
+ {
if (best->u.krt.metric > g->u.krt.metric)
{
best = g;
bestp = gg;
}
+
+ g->u.krt.best = 0;
+ }
+
if (best)
{
+ best->u.krt.best = 1;
*bestp = best->next;
best->next = n->routes;
n->routes = best;
}
+
if (best != old_best)
{
DBG("krt_learn_async: distributing change\n");
if (best)
- {
- krt_learn_announce_update(p, best);
- n->n.flags |= KRF_INSTALLED;
- }
+ krt_learn_announce_update(p, best);
else
- {
- n->routes = NULL;
- krt_learn_announce_delete(p, n);
- n->n.flags &= ~KRF_INSTALLED;
- }
+ krt_learn_announce_delete(p, n);
}
}
@@ -559,7 +571,7 @@ krt_dump(struct proto *P)
static void
krt_dump_attrs(rte *e)
{
- debug(" [m=%d,p=%d,t=%d]", e->u.krt.metric, e->u.krt.proto, e->u.krt.type);
+ debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto);
}
#endif
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index 7cb26360..b90bbbd2 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -284,17 +284,18 @@ log_switch(int debug, list *l, char *new_syslog_name)
current_log_list = l;
#ifdef HAVE_SYSLOG
- if (current_syslog_name && new_syslog_name &&
- !strcmp(current_syslog_name, new_syslog_name))
+ char *old_syslog_name = current_syslog_name;
+ current_syslog_name = new_syslog_name;
+
+ if (old_syslog_name && new_syslog_name &&
+ !strcmp(old_syslog_name, new_syslog_name))
return;
- if (current_syslog_name)
+ if (old_syslog_name)
closelog();
if (new_syslog_name)
openlog(new_syslog_name, LOG_CONS | LOG_NDELAY, LOG_DAEMON);
-
- current_syslog_name = new_syslog_name;
#endif
}
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 8796ab9c..f95bd968 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -450,6 +450,7 @@ cli_connect(sock *s, int size UNUSED)
s->err_hook = cli_err;
s->data = c = cli_new(s);
s->pool = c->pool; /* We need to have all the socket buffers allocated in the cli pool */
+ s->fast_rx = 1;
c->rx_pos = c->rx_buf;
c->rx_aux = NULL;
rmove(s, c->pool);
@@ -466,6 +467,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid)
s->type = SK_UNIX_PASSIVE;
s->rx_hook = cli_connect;
s->rbsize = 1024;
+ s->fast_rx = 1;
/* Return value intentionally ignored */
unlink(path_control_socket);