summaryrefslogtreecommitdiff
path: root/sysdep/unix
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep/unix')
-rw-r--r--sysdep/unix/Makefile2
-rw-r--r--sysdep/unix/alloc.c191
-rw-r--r--sysdep/unix/io.c27
-rw-r--r--sysdep/unix/krt.Y5
-rw-r--r--sysdep/unix/krt.c131
-rw-r--r--sysdep/unix/krt.h5
-rw-r--r--sysdep/unix/main.c38
7 files changed, 256 insertions, 143 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile
index d0d36b5f..51ab98a9 100644
--- a/sysdep/unix/Makefile
+++ b/sysdep/unix/Makefile
@@ -2,6 +2,8 @@ src := alloc.c io.c krt.c log.c main.c random.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
+$(call proto-build,kif_build)
+$(call proto-build,krt_build)
$(conf-y-targets): $(s)krt.Y
src := $(filter-out main.c, $(src))
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index 4c9d5eb5..edad6209 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -8,7 +8,10 @@
#include "nest/bird.h"
#include "lib/resource.h"
+#include "lib/lists.h"
+#include "lib/event.h"
+#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
@@ -17,82 +20,168 @@
#endif
long page_size = 0;
-_Bool alloc_multipage = 0;
#ifdef HAVE_MMAP
+#define KEEP_PAGES_MAIN_MAX 256
+#define KEEP_PAGES_MAIN_MIN 8
+#define CLEANUP_PAGES_BULK 256
+
+STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX);
+
static _Bool use_fake = 0;
+
+#if DEBUGGING
+struct free_page {
+ node unused[42];
+ node n;
+};
#else
-static _Bool use_fake = 1;
+struct free_page {
+ node n;
+};
+#endif
+
+struct free_pages {
+ list pages;
+ u16 min, max; /* Minimal and maximal number of free pages kept */
+ uint cnt; /* Number of empty pages */
+ event cleanup;
+};
+
+static void global_free_pages_cleanup_event(void *);
+
+static struct free_pages global_free_pages = {
+ .min = KEEP_PAGES_MAIN_MIN,
+ .max = KEEP_PAGES_MAIN_MAX,
+ .cleanup = { .hook = global_free_pages_cleanup_event },
+};
+
+uint *pages_kept = &global_free_pages.cnt;
+
+static void *
+alloc_sys_page(void)
+{
+ void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr == MAP_FAILED)
+ bug("mmap(%lu) failed: %m", page_size);
+
+ return ptr;
+}
+
+extern int shutting_down; /* Shutdown requested. */
+
+#else // ! HAVE_MMAP
+#define use_fake 1
#endif
-void resource_sys_init(void)
+void *
+alloc_page(void)
{
+ if (use_fake)
+ {
+ void *ptr = NULL;
+ int err = posix_memalign(&ptr, page_size, page_size);
+
+ if (err || !ptr)
+ bug("posix_memalign(%lu) failed", (long unsigned int) page_size);
+
+ return ptr;
+ }
+
#ifdef HAVE_MMAP
- if (!(page_size = sysconf(_SC_PAGESIZE)))
- die("System page size must be non-zero");
+ struct free_pages *fps = &global_free_pages;
- if ((u64_popcount(page_size) > 1) || (page_size > 16384))
+ if (fps->cnt)
{
-#endif
- /* Too big or strange page, use the aligned allocator instead */
- page_size = 4096;
- use_fake = 1;
+ struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
+ rem_node(&fp->n);
+ if ((--fps->cnt < fps->min) && !shutting_down)
+ ev_schedule(&fps->cleanup);
+
+ bzero(fp, page_size);
+ return fp;
}
+
+ return alloc_sys_page();
+#endif
}
-void *
-alloc_sys_page(void)
+void
+free_page(void *ptr)
{
-#ifdef HAVE_MMAP
- if (!use_fake)
+ if (use_fake)
{
- if (alloc_multipage)
- {
- void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (big == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- uintptr_t offset = ((uintptr_t) big) % page_size;
- if (offset)
- {
- void *ret = big + page_size - offset;
- munmap(big, page_size - offset);
- munmap(ret + page_size, offset);
- return ret;
- }
- else
- {
- munmap(big + page_size, page_size);
- return big;
- }
- }
-
- void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (ret == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- return ret;
+ free(ptr);
+ return;
}
- else
+
+#ifdef HAVE_MMAP
+ struct free_pages *fps = &global_free_pages;
+ struct free_page *fp = ptr;
+
+ fp->n = (node) {};
+ add_tail(&fps->pages, &fp->n);
+
+ if ((++fps->cnt > fps->max) && !shutting_down)
+ ev_schedule(&fps->cleanup);
#endif
+}
+
+#ifdef HAVE_MMAP
+static void
+global_free_pages_cleanup_event(void *data UNUSED)
+{
+ if (shutting_down)
+ return;
+
+ struct free_pages *fps = &global_free_pages;
+
+ while (fps->cnt / 2 < fps->min)
{
- void *ret = aligned_alloc(page_size, page_size);
- if (!ret)
- bug("aligned_alloc(%lu) failed", page_size);
- return ret;
+ struct free_page *fp = alloc_sys_page();
+ fp->n = (node) {};
+ add_tail(&fps->pages, &fp->n);
+ fps->cnt++;
+ }
+
+ for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++)
+ {
+ struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
+ rem_node(&fp->n);
+
+ if (munmap(fp, page_size) == 0)
+ fps->cnt--;
+ else if (errno == ENOMEM)
+ add_head(&fps->pages, &fp->n);
+ else
+ bug("munmap(%p) failed: %m", fp);
}
}
+#endif
void
-free_sys_page(void *ptr)
+resource_sys_init(void)
{
#ifdef HAVE_MMAP
- if (!use_fake)
+ ASSERT_DIE(global_free_pages.cnt == 0);
+
+ if (!(page_size = sysconf(_SC_PAGESIZE)))
+ die("System page size must be non-zero");
+
+ if (u64_popcount(page_size) == 1)
{
- if (munmap(ptr, page_size) < 0)
- bug("munmap(%p) failed: %m", ptr);
+ struct free_pages *fps = &global_free_pages;
+
+ init_list(&fps->pages);
+ global_free_pages_cleanup_event(NULL);
+ return;
}
- else
+
+ /* Too big or strange page, use the aligned allocator instead */
+ log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
+ use_fake = 1;
#endif
- free(ptr);
+
+ page_size = 4096;
}
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 3d67d0a7..8a116789 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -1436,6 +1436,10 @@ sk_open(sock *s)
if (sk_set_high_port(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
+ if (s->flags & SKF_FREEBIND)
+ if (sk_set_freebind(s) < 0)
+ log(L_WARN "Socket error: %s%#m", s->err);
+
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
ERR2("bind");
@@ -1850,8 +1854,8 @@ sk_read_ssh(sock *s)
/* sk_read() and sk_write() are called from BFD's event loop */
-int
-sk_read(sock *s, int revents)
+static inline int
+sk_read_noflush(sock *s, int revents)
{
switch (s->type)
{
@@ -1914,7 +1918,15 @@ sk_read(sock *s, int revents)
}
int
-sk_write(sock *s)
+sk_read(sock *s, int revents)
+{
+ int e = sk_read_noflush(s, revents);
+ tmp_flush();
+ return e;
+}
+
+static inline int
+sk_write_noflush(sock *s)
{
switch (s->type)
{
@@ -1962,6 +1974,14 @@ sk_write(sock *s)
}
}
+int
+sk_write(sock *s)
+{
+ int e = sk_write_noflush(s);
+ tmp_flush();
+ return e;
+}
+
int sk_is_ipv4(sock *s)
{ return s->af == AF_INET; }
@@ -1980,6 +2000,7 @@ sk_err(sock *s, int revents)
}
s->err_hook(s, se);
+ tmp_flush();
}
void
diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y
index 95b54d65..4ce9a328 100644
--- a/sysdep/unix/krt.Y
+++ b/sysdep/unix/krt.Y
@@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip)
CF_DECLS
-CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
+CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS)
CF_KEYWORDS(INTERFACE, PREFERRED)
%type <i> kern_mp_limit
@@ -122,9 +122,6 @@ kif_iface:
kif_iface_start iface_patt_list_nopx kif_iface_opt_list;
-dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ;
-dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ;
-
CF_CODE
CF_END
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 360a2888..84457d37 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -53,7 +53,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "filter/filter.h"
#include "conf/conf.h"
@@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src)
struct protocol proto_unix_iface = {
.name = "Device",
.template = "device%d",
- .class = PROTOCOL_DEVICE,
.proto_size = sizeof(struct kif_proto),
.config_size = sizeof(struct kif_config),
.preconfig = kif_preconfig,
@@ -243,6 +242,13 @@ struct protocol proto_unix_iface = {
.copy_config = kif_copy_config
};
+void
+kif_build(void)
+{
+ proto_build(&proto_unix_iface);
+}
+
+
/*
* Tracing of routes
*/
@@ -280,7 +286,7 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS;
static inline u32
krt_metric(rte *a)
{
- eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC);
+ eattr *ea = ea_find(a->attrs->eattrs, &ea_krt_metric);
return ea ? ea->u.data : 0;
}
@@ -317,7 +323,7 @@ static struct rte_storage *
krt_store_async(struct krt_proto *p, net *n, rte *e)
{
ASSERT(!e->attrs->cached);
- e->attrs->pref = p->p.main_channel->preference;
+ ea_set_attr_u32(&e->attrs->eattrs, &ea_gen_preference, 0, p->p.main_channel->preference);
e->src = p->p.main_source;
return rte_store(e, n, p->krt_table);
}
@@ -338,14 +344,14 @@ krt_learn_scan(struct krt_proto *p, rte *e)
if (krt_uptodate(&m->rte, e))
{
krt_trace_in_rl(&rl_alien, p, e, "[alien] seen");
- rte_free(ee, p->krt_table);
+ rte_free(ee);
m->rte.pflags |= KRT_REF_SEEN;
}
else
{
krt_trace_in(p, e, "[alien] updated");
*mm = m->next;
- rte_free(m, p->krt_table);
+ rte_free(m);
m = NULL;
}
}
@@ -392,7 +398,7 @@ again:
if (!(e->rte.pflags & KRT_REF_SEEN))
{
*ee = e->next;
- rte_free(e, p->krt_table);
+ rte_free(e);
continue;
}
@@ -452,12 +458,12 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
if (krt_uptodate(&g->rte, e))
{
krt_trace_in(p, e, "[alien async] same");
- rte_free(ee, p->krt_table);
+ rte_free(ee);
return;
}
krt_trace_in(p, e, "[alien async] updated");
*gg = g->next;
- rte_free(g, p->krt_table);
+ rte_free(g);
}
else
krt_trace_in(p, e, "[alien async] created");
@@ -468,15 +474,15 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
else if (!g)
{
krt_trace_in(p, e, "[alien async] delete failed");
- rte_free(ee, p->krt_table);
+ rte_free(ee);
return;
}
else
{
krt_trace_in(p, e, "[alien async] removed");
*gg = g->next;
- rte_free(ee, p->krt_table);
- rte_free(g, p->krt_table);
+ rte_free(ee);
+ rte_free(g);
}
best = n->routes;
bestp = &n->routes;
@@ -546,21 +552,27 @@ krt_is_installed(struct krt_proto *p, net *n)
return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id);
}
-static void
-krt_flush_routes(struct krt_proto *p)
+static uint
+rte_feed_count(net *n)
{
- struct rtable *t = p->p.main_channel->table;
+ uint count = 0;
+ for (struct rte_storage *e = n->routes; e; e = e->next)
+ if (rte_is_valid(RTE_OR_NULL(e)))
+ count++;
+ return count;
+}
- KRT_TRACE(p, D_EVENTS, "Flushing kernel routes");
- FIB_WALK(&t->fib, net, n)
+static void
+rte_feed_obtain(net *n, rte **feed, uint count)
+{
+ uint i = 0;
+ for (struct rte_storage *e = n->routes; e; e = e->next)
+ if (rte_is_valid(RTE_OR_NULL(e)))
{
- if (krt_is_installed(p, n))
- {
- /* FIXME: this does not work if gw is changed in export filter */
- krt_replace_rte(p, n->n.addr, NULL, &n->routes->rte);
- }
+ ASSERT_DIE(i < count);
+ feed[i++] = &e->rte;
}
- FIB_WALK_END;
+ ASSERT_DIE(i == count);
}
static struct rte *
@@ -570,7 +582,15 @@ krt_export_net(struct krt_proto *p, net *net)
const struct filter *filter = c->out_filter;
if (c->ra_mode == RA_MERGED)
- return rt_export_merged(c, net, krt_filter_lp, 1);
+ {
+ uint count = rte_feed_count(net);
+ if (!count)
+ return NULL;
+
+ rte **feed = alloca(count * sizeof(rte *));
+ rte_feed_obtain(net, feed, count);
+ return rt_export_merged(c, feed, count, krt_filter_lp, 1);
+ }
static _Thread_local rte rt;
rt = net->routes->rte;
@@ -586,7 +606,7 @@ krt_export_net(struct krt_proto *p, net *net)
if (filter == FILTER_ACCEPT)
goto accept;
- if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT)
+ if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT)
goto reject;
@@ -602,13 +622,10 @@ krt_same_dest(rte *k, rte *e)
{
rta *ka = k->attrs, *ea = e->attrs;
- if (ka->dest != ea->dest)
- return 0;
-
- if (ka->dest == RTD_UNICAST)
- return nexthop_same(&(ka->nh), &(ea->nh));
+ eattr *nhea_k = ea_find(ka->eattrs, &ea_gen_nexthop);
+ eattr *nhea_e = ea_find(ea->eattrs, &ea_gen_nexthop);
- return 1;
+ return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr);
}
/*
@@ -641,6 +658,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
#endif
/* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
+ /* Deleting all routes if flush is requested */
+ if (p->flush_routes)
+ goto delete;
/* We wait for the initial feed to have correct installed state */
if (!p->ready)
@@ -733,6 +753,17 @@ krt_prune(struct krt_proto *p)
p->initialized = 1;
}
+static void
+krt_flush_routes(struct krt_proto *p)
+{
+ KRT_TRACE(p, D_EVENTS, "Flushing kernel routes");
+ p->flush_routes = 1;
+ krt_init_scan(p);
+ krt_do_scan(p);
+ /* No prune! */
+ p->flush_routes = 0;
+}
+
void
krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src)
{
@@ -1111,24 +1142,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src)
krt_sys_copy_config(d, s);
}
-static int
-krt_get_attr(const eattr *a, byte *buf, int buflen)
-{
- switch (a->id)
- {
- case EA_KRT_SOURCE:
- bsprintf(buf, "source");
- return GA_NAME;
-
- case EA_KRT_METRIC:
- bsprintf(buf, "metric");
- return GA_NAME;
-
- default:
- return krt_sys_get_attr(a, buf, buflen);
- }
-}
+struct ea_class ea_krt_source = {
+ .name = "krt_source",
+ .type = T_INT,
+};
+struct ea_class ea_krt_metric = {
+ .name = "krt_metric",
+ .type = T_INT,
+};
#ifdef CONFIG_IP6_SADR_KERNEL
#define MAYBE_IP6_SADR NB_IP6_SADR
@@ -1145,7 +1167,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen)
struct protocol proto_unix_kernel = {
.name = "Kernel",
.template = "kernel%d",
- .class = PROTOCOL_KERNEL,
.preference = DEF_PREF_INHERITED,
.channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS,
.proto_size = sizeof(struct krt_proto),
@@ -1157,8 +1178,18 @@ struct protocol proto_unix_kernel = {
.shutdown = krt_shutdown,
.reconfigure = krt_reconfigure,
.copy_config = krt_copy_config,
- .get_attr = krt_get_attr,
#ifdef KRT_ALLOW_LEARN
.dump = krt_dump,
#endif
};
+
+void
+krt_build(void)
+{
+ proto_build(&proto_unix_kernel);
+
+ EA_REGISTER_ALL(
+ &ea_krt_source,
+ &ea_krt_metric,
+ );
+}
diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
index cd4bd07d..69fc9aa1 100644
--- a/sysdep/unix/krt.h
+++ b/sysdep/unix/krt.h
@@ -21,8 +21,12 @@ struct kif_proto;
#define KRT_DEFAULT_ECMP_LIMIT 16
+#if 0
#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0)
#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1)
+#endif
+
+extern struct ea_class ea_krt_source, ea_krt_metric;
#define KRT_REF_SEEN 0x1 /* Seen in table */
#define KRT_REF_BEST 0x2 /* Best in table */
@@ -66,6 +70,7 @@ struct krt_proto {
byte ready; /* Initial feed has been finished */
byte initialized; /* First scan has been finished */
byte reload; /* Next scan is doing reload */
+ byte flush_routes; /* Scanning to flush */
};
extern pool *krt_pool;
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 7e8ea0dc..8fdad4e6 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -30,7 +30,7 @@
#include "lib/event.h"
#include "lib/timer.h"
#include "lib/string.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "nest/cli.h"
@@ -682,7 +682,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "B:c:dD:ps:P:u:g:flRh";
+static char *opt_list = "bc:dD:ps:P:u:g:flRh";
int parse_and_exit;
char *bird_name;
static char *use_user;
@@ -703,7 +703,6 @@ display_help(void)
fprintf(stderr,
"\n"
"Options: \n"
- " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n"
@@ -790,15 +789,12 @@ get_gid(const char *s)
return gr->gr_gid;
}
-extern _Bool alloc_multipage;
-
static void
parse_args(int argc, char **argv)
{
int config_changed = 0;
int socket_changed = 0;
int c;
- int bp;
bird_name = get_bird_name(argv[0], "bird");
if (argc == 2)
@@ -811,29 +807,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
{
- case 'B':
- bp = atoi(optarg);
- if (bp < 1)
- {
- fprintf(stderr, "Strange block size power %d\n\n", bp);
- display_usage();
- exit(1);
- }
-
- if ((1 << bp) < page_size)
- {
- fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
- display_usage();
- exit(1);
- }
-
- if ((1L << bp) > page_size)
- {
- alloc_multipage = 1;
- page_size = (1L << bp);
- }
-
- break;
case 'c':
config_name = optarg;
config_changed = 1;
@@ -888,8 +861,6 @@ parse_args(int argc, char **argv)
}
}
-void resource_sys_init(void);
-
/*
* Hic Est main()
*/
@@ -902,7 +873,6 @@ main(int argc, char **argv)
dmalloc_debug(0x2f03d00);
#endif
- resource_sys_init();
parse_args(argc, argv);
log_switch(1, NULL, NULL);
@@ -911,8 +881,8 @@ main(int argc, char **argv)
resource_init();
timer_init();
olock_init();
- io_init();
rt_init();
+ io_init();
if_init();
// roa_init();
config_init();
@@ -936,8 +906,6 @@ main(int argc, char **argv)
open_pid_file();
protos_build();
- proto_build(&proto_unix_kernel);
- proto_build(&proto_unix_iface);
struct config *conf = read_config();