diff options
Diffstat (limited to 'sysdep/unix')
-rw-r--r-- | sysdep/unix/Makefile | 2 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 190 | ||||
-rw-r--r-- | sysdep/unix/io.c | 23 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 114 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 7 | ||||
-rw-r--r-- | sysdep/unix/log.c | 15 | ||||
-rw-r--r-- | sysdep/unix/main.c | 11 |
7 files changed, 210 insertions, 152 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index d0d36b5f..51ab98a9 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,6 +2,8 @@ src := alloc.c io.c krt.c log.c main.c random.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,kif_build) +$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 90453f7b..edad6209 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -19,111 +19,169 @@ #include <sys/mman.h> #endif -#ifdef HAVE_MMAP -#define KEEP_PAGES 512 +long page_size = 0; -static u64 page_size = 0; -static _Bool use_fake = 0; +#ifdef HAVE_MMAP +#define KEEP_PAGES_MAIN_MAX 256 +#define KEEP_PAGES_MAIN_MIN 8 +#define CLEANUP_PAGES_BULK 256 -uint pages_kept = 0; -static list pages_list; +STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); -static void cleanup_pages(void *data); -static event page_cleanup_event = { .hook = cleanup_pages }; +static _Bool use_fake = 0; +#if DEBUGGING +struct free_page { + node unused[42]; + node n; +}; #else -static const u64 page_size = 4096; /* Fake page size */ +struct free_page { + node n; +}; #endif -u64 get_page_size(void) +struct free_pages { + list pages; + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ + event cleanup; +}; + +static void global_free_pages_cleanup_event(void *); + +static struct free_pages global_free_pages = { + .min = KEEP_PAGES_MAIN_MIN, + .max = KEEP_PAGES_MAIN_MAX, + .cleanup = { .hook = global_free_pages_cleanup_event }, +}; + +uint *pages_kept = &global_free_pages.cnt; + +static void * +alloc_sys_page(void) { - if (page_size) - return page_size; + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -#ifdef HAVE_MMAP - if (page_size = sysconf(_SC_PAGESIZE)) - { - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) - { - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } - return page_size; - } + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); - bug("Page size must be non-zero"); -#endif + return ptr; } +extern int shutting_down; /* Shutdown requested. */ + +#else // ! HAVE_MMAP +#define use_fake 1 +#endif + void * alloc_page(void) { -#ifdef HAVE_MMAP - if (pages_kept) - { - node *page = TAIL(pages_list); - rem_node(page); - pages_kept--; - memset(page, 0, get_page_size()); - return page; - } - - if (!use_fake) - { - void *ret = mmap(NULL, get_page_size(), PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", (long unsigned int) page_size); - return ret; - } - else -#endif + if (use_fake) { void *ptr = NULL; int err = posix_memalign(&ptr, page_size, page_size); + if (err || !ptr) bug("posix_memalign(%lu) failed", (long unsigned int) page_size); + return ptr; } + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + + if (fps->cnt) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); + rem_node(&fp->n); + if ((--fps->cnt < fps->min) && !shutting_down) + ev_schedule(&fps->cleanup); + + bzero(fp, page_size); + return fp; + } + + return alloc_sys_page(); +#endif } void free_page(void *ptr) { -#ifdef HAVE_MMAP - if (!use_fake) + if (use_fake) { - if (!pages_kept) - init_list(&pages_list); + free(ptr); + return; + } + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + struct free_page *fp = ptr; - memset(ptr, 0, sizeof(node)); - add_tail(&pages_list, ptr); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); - if (++pages_kept > KEEP_PAGES) - ev_schedule(&page_cleanup_event); - } - else + if ((++fps->cnt > fps->max) && !shutting_down) + ev_schedule(&fps->cleanup); #endif - free(ptr); } #ifdef HAVE_MMAP static void -cleanup_pages(void *data UNUSED) +global_free_pages_cleanup_event(void *data UNUSED) { - for (uint seen = 0; (pages_kept > KEEP_PAGES) && (seen < KEEP_PAGES); seen++) + if (shutting_down) + return; + + struct free_pages *fps = &global_free_pages; + + while (fps->cnt / 2 < fps->min) { - void *ptr = HEAD(pages_list); - rem_node(ptr); - if (munmap(ptr, get_page_size()) == 0) - pages_kept--; + struct free_page *fp = alloc_sys_page(); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + fps->cnt++; + } + + for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); + rem_node(&fp->n); + + if (munmap(fp, page_size) == 0) + fps->cnt--; else if (errno == ENOMEM) - add_tail(&pages_list, ptr); + add_head(&fps->pages, &fp->n); else - bug("munmap(%p) failed: %m", ptr); + bug("munmap(%p) failed: %m", fp); } - - if (pages_kept > KEEP_PAGES) - ev_schedule(&page_cleanup_event); } #endif + +void +resource_sys_init(void) +{ +#ifdef HAVE_MMAP + ASSERT_DIE(global_free_pages.cnt == 0); + + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) + { + struct free_pages *fps = &global_free_pages; + + init_list(&fps->pages); + global_free_pages_cleanup_event(NULL); + return; + } + + /* Too big or strange page, use the aligned allocator instead */ + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + use_fake = 1; +#endif + + page_size = 4096; +} diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 17dc05a3..810e782d 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1854,8 +1854,8 @@ sk_read_ssh(sock *s) /* sk_read() and sk_write() are called from BFD's event loop */ -int -sk_read(sock *s, int revents) +static inline int +sk_read_noflush(sock *s, int revents) { switch (s->type) { @@ -1918,7 +1918,15 @@ sk_read(sock *s, int revents) } int -sk_write(sock *s) +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) { switch (s->type) { @@ -1966,6 +1974,14 @@ sk_write(sock *s) } } +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + int sk_is_ipv4(sock *s) { return s->af == AF_INET; } @@ -1984,6 +2000,7 @@ sk_err(sock *s, int revents) } s->err_hook(s, se); + tmp_flush(); } void diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 342adee5..c4a3a4a8 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -243,6 +243,13 @@ struct protocol proto_unix_iface = { .copy_config = kif_copy_config }; +void +kif_build(void) +{ + proto_build(&proto_unix_iface); +} + + /* * Tracing of routes */ @@ -277,22 +284,23 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; * the same key. */ +static inline u32 +krt_metric(rte *a) +{ + eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC); + return ea ? ea->u.data : 0; +} + static inline int krt_same_key(rte *a, rte *b) { - return a->u.krt.metric == b->u.krt.metric; + return (krt_metric(a) == krt_metric(b)); } static inline int krt_uptodate(rte *a, rte *b) { - if (a->attrs != b->attrs) - return 0; - - if (a->u.krt.proto != b->u.krt.proto) - return 0; - - return 1; + return (a->attrs == b->attrs); } static void @@ -300,9 +308,7 @@ krt_learn_announce_update(struct krt_proto *p, rte *e) { net *n = e->net; rta *aa = rta_clone(e->attrs); - rte *ee = rte_get_temp(aa); - ee->pflags = EA_ID_FLAG(EA_KRT_SOURCE) | EA_ID_FLAG(EA_KRT_METRIC); - ee->u.krt = e->u.krt; + rte *ee = rte_get_temp(aa, p->p.main_source); rte_update(&p->p, n->n.addr, ee); } @@ -331,7 +337,7 @@ krt_learn_scan(struct krt_proto *p, rte *e) { krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); rte_free(e); - m->u.krt.seen = 1; + m->pflags |= KRT_REF_SEEN; } else { @@ -347,7 +353,7 @@ krt_learn_scan(struct krt_proto *p, rte *e) { e->next = n->routes; n->routes = e; - e->u.krt.seen = 1; + e->pflags |= KRT_REF_SEEN; } } @@ -377,24 +383,23 @@ again: ee = &n->routes; while (e = *ee) { - if (e->u.krt.best) + if (e->pflags & KRT_REF_BEST) old_best = e; - if (!e->u.krt.seen) + if (!(e->pflags & KRT_REF_SEEN)) { *ee = e->next; rte_free(e); continue; } - if (!best || best->u.krt.metric > e->u.krt.metric) + if (!best || krt_metric(best) > krt_metric(e)) { best = e; pbest = ee; } - e->u.krt.seen = 0; - e->u.krt.best = 0; + e->pflags &= ~(KRT_REF_SEEN | KRT_REF_BEST); ee = &e->next; } if (!n->routes) @@ -408,18 +413,18 @@ again: goto again; } - best->u.krt.best = 1; + best->pflags |= KRT_REF_BEST; *pbest = best->next; best->next = n->routes; n->routes = best; if ((best != old_best) || p->reload) { - DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); + DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(best)); krt_learn_announce_update(p, best); } else - DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); + DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(best)); } FIB_ITERATE_END; @@ -433,6 +438,9 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) net *n = net_get(p->krt_table, n0->n.addr); rte *g, **gg, *best, **bestp, *old_best; + ASSERT(!e->attrs->cached); + e->attrs->pref = p->p.main_channel->preference; + e->attrs = rta_lookup(e->attrs); old_best = n->routes; @@ -476,18 +484,18 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) bestp = &n->routes; for(gg=&n->routes; g=*gg; gg=&g->next) { - if (best->u.krt.metric > g->u.krt.metric) + if (krt_metric(best) > krt_metric(g)) { best = g; bestp = gg; } - g->u.krt.best = 0; + g->pflags &= ~KRT_REF_BEST; } if (best) { - best->u.krt.best = 1; + best->pflags |= KRT_REF_BEST; *bestp = best->next; best->next = n->routes; n->routes = best; @@ -528,12 +536,6 @@ krt_dump(struct proto *P) rt_dump(p->krt_table); } -static void -krt_dump_attrs(rte *e) -{ - debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto); -} - #endif /* @@ -582,8 +584,6 @@ krt_export_net(struct krt_proto *p, net *net, rte **rt_free) if (filter == FILTER_REJECT) return NULL; - rte_make_tmp_attrs(&rt, krt_filter_lp, NULL); - /* We could run krt_preexport() here, but it is already handled by krt_is_installed() */ if (filter == FILTER_ACCEPT) @@ -624,13 +624,14 @@ krt_same_dest(rte *k, rte *e) */ void -krt_got_route(struct krt_proto *p, rte *e) +krt_got_route(struct krt_proto *p, rte *e, s8 src) { rte *new = NULL, *rt_free = NULL; net *n = e->net; + e->pflags = 0; #ifdef KRT_ALLOW_LEARN - switch (e->u.krt.src) + switch (src) { case KRT_SRC_KERNEL: goto ignore; @@ -752,11 +753,12 @@ krt_prune(struct krt_proto *p) } void -krt_got_route_async(struct krt_proto *p, rte *e, int new) +krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { net *net = e->net; + e->pflags = 0; - switch (e->u.krt.src) + switch (src) { case KRT_SRC_BIRD: /* Should be filtered by the back end */ @@ -899,29 +901,11 @@ krt_scan_timer_kick(struct krt_proto *p) * Updates */ -static void -krt_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rte_make_tmp_attr(rt, EA_KRT_SOURCE, EAF_TYPE_INT, rt->u.krt.proto); - rte_make_tmp_attr(rt, EA_KRT_METRIC, EAF_TYPE_INT, rt->u.krt.metric); -} - -static void -krt_store_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rt->u.krt.proto = rte_store_tmp_attr(rt, EA_KRT_SOURCE); - rt->u.krt.metric = rte_store_tmp_attr(rt, EA_KRT_METRIC); -} - static int -krt_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) +krt_preexport(struct channel *C, rte *e) { // struct krt_proto *p = (struct krt_proto *) P; - rte *e = *new; - - if (e->attrs->src->proto == C->proto) + if (e->src->proto == C->proto) return -1; if (!krt_capable(e)) @@ -996,14 +980,6 @@ krt_feed_end(struct channel *C) } -static int -krt_rte_same(rte *a, rte *b) -{ - /* src is always KRT_SRC_ALIEN and type is irrelevant */ - return (a->u.krt.proto == b->u.krt.proto) && (a->u.krt.metric == b->u.krt.metric); -} - - /* * Protocol glue */ @@ -1057,9 +1033,6 @@ krt_init(struct proto_config *CF) p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; - p->p.make_tmp_attrs = krt_make_tmp_attrs; - p->p.store_tmp_attrs = krt_store_tmp_attrs; - p->p.rte_same = krt_rte_same; krt_sys_init(p); return &p->p; @@ -1217,6 +1190,11 @@ struct protocol proto_unix_kernel = { .get_attr = krt_get_attr, #ifdef KRT_ALLOW_LEARN .dump = krt_dump, - .dump_attrs = krt_dump_attrs, #endif }; + +void +krt_build(void) +{ + proto_build(&proto_unix_kernel); +} diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 1536e502..18a206e6 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -24,6 +24,9 @@ struct kif_proto; #define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0) #define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1) +#define KRT_REF_SEEN 0x1 /* Seen in table */ +#define KRT_REF_BEST 0x2 /* Best in table */ + /* Whenever we recognize our own routes, we allow learing of foreign routes */ #ifdef CONFIG_SELF_CONSCIOUS @@ -74,8 +77,8 @@ extern pool *krt_pool; struct proto_config * kif_init_config(int class); void kif_request_scan(void); void krt_use_shared_scan(void); -void krt_got_route(struct krt_proto *p, struct rte *e); -void krt_got_route_async(struct krt_proto *p, struct rte *e, int new); +void krt_got_route(struct krt_proto *p, struct rte *e, s8 src); +void krt_got_route_async(struct krt_proto *p, struct rte *e, int new, s8 src); static inline int krt_get_sync_error(struct krt_proto *p, struct rte *e) diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 14d18c01..4e9df069 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -309,22 +309,15 @@ die(const char *msg, ...) void debug(const char *msg, ...) { -#define MAX_DEBUG_BUFSIZE 65536 +#define MAX_DEBUG_BUFSIZE 16384 va_list args; - static uint bufsize = 4096; - static char *buf = NULL; - - if (!buf) - buf = mb_alloc(&root_pool, bufsize); + char buf[MAX_DEBUG_BUFSIZE]; va_start(args, msg); if (dbgf) { - while (bvsnprintf(buf, bufsize, msg, args) < 0) - if (bufsize >= MAX_DEBUG_BUFSIZE) - bug("Extremely long debug output, split it."); - else - buf = mb_realloc(buf, (bufsize *= 2)); + if (bvsnprintf(buf, MAX_DEBUG_BUFSIZE, msg, args) < 0) + bug("Extremely long debug output, split it."); fputs(buf, dbgf); } diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index f87d0c43..84e7d889 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -479,6 +479,14 @@ cli_err(sock *s, int err) cli_free(s->data); } +static void +cli_connect_err(sock *s UNUSED, int err) +{ + ASSERT_DIE(err); + if (config->cli_debug) + log(L_INFO "Failed to accept CLI connection: %s", strerror(err)); +} + static int cli_connect(sock *s, uint size UNUSED) { @@ -507,6 +515,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) s = cli_sk = sk_new(cli_pool); s->type = SK_UNIX_PASSIVE; s->rx_hook = cli_connect; + s->err_hook = cli_connect_err; s->rbsize = 1024; s->fast_rx = 1; @@ -897,8 +906,6 @@ main(int argc, char **argv) open_pid_file(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); struct config *conf = read_config(); |