diff options
Diffstat (limited to 'sysdep/unix')
-rw-r--r-- | sysdep/unix/Makefile | 2 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 191 | ||||
-rw-r--r-- | sysdep/unix/io.c | 35 | ||||
-rw-r--r-- | sysdep/unix/krt.Y | 5 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 122 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 4 | ||||
-rw-r--r-- | sysdep/unix/main.c | 42 |
7 files changed, 243 insertions, 158 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index d0d36b5f..51ab98a9 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,6 +2,8 @@ src := alloc.c io.c krt.c log.c main.c random.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,kif_build) +$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 4c9d5eb5..edad6209 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -8,7 +8,10 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "lib/lists.h" +#include "lib/event.h" +#include <errno.h> #include <stdlib.h> #include <unistd.h> @@ -17,82 +20,168 @@ #endif long page_size = 0; -_Bool alloc_multipage = 0; #ifdef HAVE_MMAP +#define KEEP_PAGES_MAIN_MAX 256 +#define KEEP_PAGES_MAIN_MIN 8 +#define CLEANUP_PAGES_BULK 256 + +STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); + static _Bool use_fake = 0; + +#if DEBUGGING +struct free_page { + node unused[42]; + node n; +}; #else -static _Bool use_fake = 1; +struct free_page { + node n; +}; +#endif + +struct free_pages { + list pages; + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ + event cleanup; +}; + +static void global_free_pages_cleanup_event(void *); + +static struct free_pages global_free_pages = { + .min = KEEP_PAGES_MAIN_MIN, + .max = KEEP_PAGES_MAIN_MAX, + .cleanup = { .hook = global_free_pages_cleanup_event }, +}; + +uint *pages_kept = &global_free_pages.cnt; + +static void * +alloc_sys_page(void) +{ + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); + + return ptr; +} + +extern int shutting_down; /* Shutdown requested. */ + +#else // ! HAVE_MMAP +#define use_fake 1 #endif -void resource_sys_init(void) +void * +alloc_page(void) { + if (use_fake) + { + void *ptr = NULL; + int err = posix_memalign(&ptr, page_size, page_size); + + if (err || !ptr) + bug("posix_memalign(%lu) failed", (long unsigned int) page_size); + + return ptr; + } + #ifdef HAVE_MMAP - if (!(page_size = sysconf(_SC_PAGESIZE))) - die("System page size must be non-zero"); + struct free_pages *fps = &global_free_pages; - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) + if (fps->cnt) { -#endif - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; + struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); + rem_node(&fp->n); + if ((--fps->cnt < fps->min) && !shutting_down) + ev_schedule(&fps->cleanup); + + bzero(fp, page_size); + return fp; } + + return alloc_sys_page(); +#endif } -void * -alloc_sys_page(void) +void +free_page(void *ptr) { -#ifdef HAVE_MMAP - if (!use_fake) + if (use_fake) { - if (alloc_multipage) - { - void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (big == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - uintptr_t offset = ((uintptr_t) big) % page_size; - if (offset) - { - void *ret = big + page_size - offset; - munmap(big, page_size - offset); - munmap(ret + page_size, offset); - return ret; - } - else - { - munmap(big + page_size, page_size); - return big; - } - } - - void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - return ret; + free(ptr); + return; } - else + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + struct free_page *fp = ptr; + + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + + if ((++fps->cnt > fps->max) && !shutting_down) + ev_schedule(&fps->cleanup); #endif +} + +#ifdef HAVE_MMAP +static void +global_free_pages_cleanup_event(void *data UNUSED) +{ + if (shutting_down) + return; + + struct free_pages *fps = &global_free_pages; + + while (fps->cnt / 2 < fps->min) { - void *ret = aligned_alloc(page_size, page_size); - if (!ret) - bug("aligned_alloc(%lu) failed", page_size); - return ret; + struct free_page *fp = alloc_sys_page(); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + fps->cnt++; + } + + for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); + rem_node(&fp->n); + + if (munmap(fp, page_size) == 0) + fps->cnt--; + else if (errno == ENOMEM) + add_head(&fps->pages, &fp->n); + else + bug("munmap(%p) failed: %m", fp); } } +#endif void -free_sys_page(void *ptr) +resource_sys_init(void) { #ifdef HAVE_MMAP - if (!use_fake) + ASSERT_DIE(global_free_pages.cnt == 0); + + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) { - if (munmap(ptr, page_size) < 0) - bug("munmap(%p) failed: %m", ptr); + struct free_pages *fps = &global_free_pages; + + init_list(&fps->pages); + global_free_pages_cleanup_event(NULL); + return; } - else + + /* Too big or strange page, use the aligned allocator instead */ + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + use_fake = 1; #endif - free(ptr); + + page_size = 4096; } diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 3d67d0a7..810e782d 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1436,6 +1436,10 @@ sk_open(sock *s) if (sk_set_high_port(s) < 0) log(L_WARN "Socket error: %s%#m", s->err); + if (s->flags & SKF_FREEBIND) + if (sk_set_freebind(s) < 0) + log(L_WARN "Socket error: %s%#m", s->err); + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ERR2("bind"); @@ -1850,8 +1854,8 @@ sk_read_ssh(sock *s) /* sk_read() and sk_write() are called from BFD's event loop */ -int -sk_read(sock *s, int revents) +static inline int +sk_read_noflush(sock *s, int revents) { switch (s->type) { @@ -1914,7 +1918,15 @@ sk_read(sock *s, int revents) } int -sk_write(sock *s) +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) { switch (s->type) { @@ -1962,6 +1974,14 @@ sk_write(sock *s) } } +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + int sk_is_ipv4(sock *s) { return s->af == AF_INET; } @@ -1980,6 +2000,7 @@ sk_err(sock *s, int revents) } s->err_hook(s, se); + tmp_flush(); } void @@ -2042,8 +2063,8 @@ io_update_time(void) event_open->duration = last_time - event_open->timestamp; if (event_open->duration > config->latency_limit) - log(L_WARN "Event 0x%p 0x%p took %d ms", - event_open->hook, event_open->data, (int) (event_open->duration TO_MS)); + log(L_WARN "Event 0x%p 0x%p took %u.%03u ms", + event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000)); event_open = NULL; } @@ -2147,8 +2168,8 @@ watchdog_stop(void) btime duration = last_time - loop_time; if (duration > config->watchdog_warning) - log(L_WARN "I/O loop cycle took %d ms for %d events", - (int) (duration TO_MS), event_log_num); + log(L_WARN "I/O loop cycle took %u.%03u ms for %d events", + (uint) (duration TO_MS), (uint) (duration % 1000), event_log_num); } diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 95b54d65..4ce9a328 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip) CF_DECLS -CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS) CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -122,9 +122,6 @@ kif_iface: kif_iface_start iface_patt_list_nopx kif_iface_opt_list; -dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ; -dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ; - CF_CODE CF_END diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 609ee921..46b5a51d 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -53,7 +53,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" @@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .class = PROTOCOL_DEVICE, .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, @@ -243,6 +242,13 @@ struct protocol proto_unix_iface = { .copy_config = kif_copy_config }; +void +kif_build(void) +{ + proto_build(&proto_unix_iface); +} + + /* * Tracing of routes */ @@ -280,29 +286,45 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; static inline u32 krt_metric(rte *a) { - eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC); + eattr *ea = ea_find(a->attrs, &ea_krt_metric); return ea ? ea->u.data : 0; } static inline int -krt_rte_better(rte *a, rte *b) +krt_same_key(rte *a, rte *b) { - return (krt_metric(a) > krt_metric(b)); + return (krt_metric(a) == krt_metric(b)); +} + +static inline int +krt_uptodate(rte *a, rte *b) +{ + return (a->attrs == b->attrs); } /* Called when alien route is discovered during scan */ static void -krt_learn_rte(struct krt_proto *p, rte *e) +krt_learn_scan(struct krt_proto *p, rte *e) { - e->src = rt_get_source(&p->p, krt_metric(e)); - rte_update(p->p.main_channel, e->net, e, e->src); + rte e0 = { + .attrs = e->attrs, + .src = rt_get_source(&p->p, krt_metric(e)), + }; + + ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference); + + rte_update(p->p.main_channel, e->net, &e0, e0.src); } static void -krt_learn_init(struct krt_proto *p) +krt_learn_async(struct krt_proto *p, rte *e, int new) { - if (KRT_CF->learn) - channel_setup_in_table(p->p.main_channel, 1); + if (new) + return krt_learn_scan(p, e); + + struct rte_src *src = rt_find_source(&p->p, krt_metric(e)); + if (src) + rte_update(p->p.main_channel, e->net, NULL, src); } #endif @@ -322,7 +344,7 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) + if (rte_is_valid(RTE_OR_NULL(e))) count++; return count; } @@ -332,7 +354,7 @@ rte_feed_obtain(net *n, rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) + if (rte_is_valid(RTE_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; @@ -371,7 +393,7 @@ krt_export_net(struct krt_proto *p, net *net) if (filter == FILTER_ACCEPT) goto accept; - if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT) + if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT) goto reject; @@ -385,15 +407,12 @@ reject: static int krt_same_dest(rte *k, rte *e) { - rta *ka = k->attrs, *ea = e->attrs; + ea_list *ka = k->attrs, *ea = e->attrs; - if (ka->dest != ea->dest) - return 0; + eattr *nhea_k = ea_find(ka, &ea_gen_nexthop); + eattr *nhea_e = ea_find(ea, &ea_gen_nexthop); - if (ka->dest == RTD_UNICAST) - return nexthop_same(&(ka->nh), &(ea->nh)); - - return 1; + return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* @@ -418,7 +437,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) case KRT_SRC_ALIEN: if (KRT_CF->learn) - krt_learn_rte(p, e); + krt_learn_scan(p, e); else krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; @@ -487,11 +506,6 @@ static void krt_init_scan(struct krt_proto *p) { bmap_reset(&p->seen_map, 1024); - -#ifdef KRT_ALLOW_LEARN - if (KRT_CF->learn) - channel_refresh_begin(p->p.main_channel); -#endif } static void @@ -517,11 +531,6 @@ krt_prune(struct krt_proto *p) } FIB_WALK_END; -#ifdef KRT_ALLOW_LEARN - if (KRT_CF->learn) - channel_refresh_end(p->p.main_channel); -#endif - if (p->ready) p->initialized = 1; } @@ -561,7 +570,7 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) case KRT_SRC_ALIEN: if (KRT_CF->learn) { - krt_learn_rte(p, e); + krt_learn_async(p, e, new); return; } #endif @@ -672,9 +681,9 @@ krt_scan_timer_kick(struct krt_proto *p) */ static int -krt_preexport(struct channel *c, rte *e) +krt_preexport(struct channel *C, rte *e) { - if (e->src->proto == c->proto) + if (e->src->proto == C->proto) return -1; if (!krt_capable(e)) @@ -807,7 +816,6 @@ krt_init(struct proto_config *CF) p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; - p->p.rte_better = krt_rte_better; krt_sys_init(p); return &p->p; @@ -833,10 +841,6 @@ krt_start(struct proto *P) bmap_init(&p->seen_map, p->p.pool, 1024); add_tail(&krt_proto_list, &p->krt_node); -#ifdef KRT_ALLOW_LEARN - krt_learn_init(p); -#endif - if (!krt_sys_start(p)) { rem_node(&p->krt_node); @@ -916,24 +920,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) krt_sys_copy_config(d, s); } -static int -krt_get_attr(const eattr *a, byte *buf, int buflen) -{ - switch (a->id) - { - case EA_KRT_SOURCE: - bsprintf(buf, "source"); - return GA_NAME; - - case EA_KRT_METRIC: - bsprintf(buf, "metric"); - return GA_NAME; - - default: - return krt_sys_get_attr(a, buf, buflen); - } -} +struct ea_class ea_krt_source = { + .name = "krt_source", + .type = T_INT, +}; +struct ea_class ea_krt_metric = { + .name = "krt_metric", + .type = T_INT, +}; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR @@ -950,7 +945,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen) struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", - .class = PROTOCOL_KERNEL, .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), @@ -962,5 +956,15 @@ struct protocol proto_unix_kernel = { .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, - .get_attr = krt_get_attr, }; + +void +krt_build(void) +{ + proto_build(&proto_unix_kernel); + + EA_REGISTER_ALL( + &ea_krt_source, + &ea_krt_metric, + ); +} diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 968c5b16..e0d60cbd 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -21,8 +21,12 @@ struct kif_proto; #define KRT_DEFAULT_ECMP_LIMIT 16 +#if 0 #define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0) #define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1) +#endif + +extern struct ea_class ea_krt_source, ea_krt_metric; #define KRT_REF_SEEN 0x1 /* Seen in table */ #define KRT_REF_BEST 0x2 /* Best in table */ diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 7e8ea0dc..07d6c691 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -30,7 +30,7 @@ #include "lib/event.h" #include "lib/timer.h" #include "lib/string.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" @@ -56,7 +56,7 @@ async_dump(void) // XXXX tm_dump_all(); if_dump_all(); neigh_dump_all(); - rta_dump_all(); + ea_dump_all(); rt_dump_all(); protos_dump_all(); @@ -116,7 +116,7 @@ add_num_const(char *name, int val, const char *file, const uint line) struct f_val *v = cfg_alloc(sizeof(struct f_val)); *v = (struct f_val) { .type = T_INT, .val.i = val }; struct symbol *sym = cf_get_symbol(name); - if (sym->class && (sym->scope == conf_this_scope)) + if (sym->class && cf_symbol_is_local(sym)) cf_error("Error reading value for %s from %s:%d: already defined", name, file, line); cf_define_symbol(sym, SYM_CONSTANT | T_INT, val, v); @@ -682,7 +682,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; +static char *opt_list = "bc:dD:ps:P:u:g:flRh"; int parse_and_exit; char *bird_name; static char *use_user; @@ -703,7 +703,6 @@ display_help(void) fprintf(stderr, "\n" "Options: \n" - " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n" " -c <config-file> Use given configuration file instead of\n" " " PATH_CONFIG_FILE "\n" " -d Enable debug messages and run bird in foreground\n" @@ -790,15 +789,12 @@ get_gid(const char *s) return gr->gr_gid; } -extern _Bool alloc_multipage; - static void parse_args(int argc, char **argv) { int config_changed = 0; int socket_changed = 0; int c; - int bp; bird_name = get_bird_name(argv[0], "bird"); if (argc == 2) @@ -811,29 +807,6 @@ parse_args(int argc, char **argv) while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) { - case 'B': - bp = atoi(optarg); - if (bp < 1) - { - fprintf(stderr, "Strange block size power %d\n\n", bp); - display_usage(); - exit(1); - } - - if ((1 << bp) < page_size) - { - fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size); - display_usage(); - exit(1); - } - - if ((1L << bp) > page_size) - { - alloc_multipage = 1; - page_size = (1L << bp); - } - - break; case 'c': config_name = optarg; config_changed = 1; @@ -888,8 +861,6 @@ parse_args(int argc, char **argv) } } -void resource_sys_init(void); - /* * Hic Est main() */ @@ -902,7 +873,6 @@ main(int argc, char **argv) dmalloc_debug(0x2f03d00); #endif - resource_sys_init(); parse_args(argc, argv); log_switch(1, NULL, NULL); @@ -911,8 +881,8 @@ main(int argc, char **argv) resource_init(); timer_init(); olock_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); @@ -936,8 +906,6 @@ main(int argc, char **argv) open_pid_file(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); struct config *conf = read_config(); |