diff options
author | Maria Matejka <mq@ucw.cz> | 2022-09-27 12:39:07 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-09-27 12:39:07 +0200 |
commit | 32a67c93ebf29309286dca5195f026eeda3f78a2 (patch) | |
tree | 578c6038187d0c50c4a4f250e440983dbb93029d /sysdep/unix | |
parent | 57a34d466e85bedbf40a0f7cbde23b843a303c8d (diff) | |
parent | cae5979871ee7aa341334f8b1af6bafc60ee9692 (diff) |
Merge commit 'cae5979871ee7aa341334f8b1af6bafc60ee9692' into tmp-bad-learn
Diffstat (limited to 'sysdep/unix')
-rw-r--r-- | sysdep/unix/Makefile | 2 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 191 | ||||
-rw-r--r-- | sysdep/unix/io.c | 27 | ||||
-rw-r--r-- | sysdep/unix/krt.Y | 5 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 131 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 5 | ||||
-rw-r--r-- | sysdep/unix/main.c | 38 |
7 files changed, 256 insertions, 143 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index d0d36b5f..51ab98a9 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,6 +2,8 @@ src := alloc.c io.c krt.c log.c main.c random.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,kif_build) +$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 4c9d5eb5..edad6209 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -8,7 +8,10 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "lib/lists.h" +#include "lib/event.h" +#include <errno.h> #include <stdlib.h> #include <unistd.h> @@ -17,82 +20,168 @@ #endif long page_size = 0; -_Bool alloc_multipage = 0; #ifdef HAVE_MMAP +#define KEEP_PAGES_MAIN_MAX 256 +#define KEEP_PAGES_MAIN_MIN 8 +#define CLEANUP_PAGES_BULK 256 + +STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); + static _Bool use_fake = 0; + +#if DEBUGGING +struct free_page { + node unused[42]; + node n; +}; #else -static _Bool use_fake = 1; +struct free_page { + node n; +}; +#endif + +struct free_pages { + list pages; + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ + event cleanup; +}; + +static void global_free_pages_cleanup_event(void *); + +static struct free_pages global_free_pages = { + .min = KEEP_PAGES_MAIN_MIN, + .max = KEEP_PAGES_MAIN_MAX, + .cleanup = { .hook = global_free_pages_cleanup_event }, +}; + +uint *pages_kept = &global_free_pages.cnt; + +static void * +alloc_sys_page(void) +{ + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); + + return ptr; +} + +extern int shutting_down; /* Shutdown requested. */ + +#else // ! HAVE_MMAP +#define use_fake 1 #endif -void resource_sys_init(void) +void * +alloc_page(void) { + if (use_fake) + { + void *ptr = NULL; + int err = posix_memalign(&ptr, page_size, page_size); + + if (err || !ptr) + bug("posix_memalign(%lu) failed", (long unsigned int) page_size); + + return ptr; + } + #ifdef HAVE_MMAP - if (!(page_size = sysconf(_SC_PAGESIZE))) - die("System page size must be non-zero"); + struct free_pages *fps = &global_free_pages; - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) + if (fps->cnt) { -#endif - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; + struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); + rem_node(&fp->n); + if ((--fps->cnt < fps->min) && !shutting_down) + ev_schedule(&fps->cleanup); + + bzero(fp, page_size); + return fp; } + + return alloc_sys_page(); +#endif } -void * -alloc_sys_page(void) +void +free_page(void *ptr) { -#ifdef HAVE_MMAP - if (!use_fake) + if (use_fake) { - if (alloc_multipage) - { - void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (big == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - uintptr_t offset = ((uintptr_t) big) % page_size; - if (offset) - { - void *ret = big + page_size - offset; - munmap(big, page_size - offset); - munmap(ret + page_size, offset); - return ret; - } - else - { - munmap(big + page_size, page_size); - return big; - } - } - - void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - return ret; + free(ptr); + return; } - else + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + struct free_page *fp = ptr; + + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + + if ((++fps->cnt > fps->max) && !shutting_down) + ev_schedule(&fps->cleanup); #endif +} + +#ifdef HAVE_MMAP +static void +global_free_pages_cleanup_event(void *data UNUSED) +{ + if (shutting_down) + return; + + struct free_pages *fps = &global_free_pages; + + while (fps->cnt / 2 < fps->min) { - void *ret = aligned_alloc(page_size, page_size); - if (!ret) - bug("aligned_alloc(%lu) failed", page_size); - return ret; + struct free_page *fp = alloc_sys_page(); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + fps->cnt++; + } + + for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); + rem_node(&fp->n); + + if (munmap(fp, page_size) == 0) + fps->cnt--; + else if (errno == ENOMEM) + add_head(&fps->pages, &fp->n); + else + bug("munmap(%p) failed: %m", fp); } } +#endif void -free_sys_page(void *ptr) +resource_sys_init(void) { #ifdef HAVE_MMAP - if (!use_fake) + ASSERT_DIE(global_free_pages.cnt == 0); + + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) { - if (munmap(ptr, page_size) < 0) - bug("munmap(%p) failed: %m", ptr); + struct free_pages *fps = &global_free_pages; + + init_list(&fps->pages); + global_free_pages_cleanup_event(NULL); + return; } - else + + /* Too big or strange page, use the aligned allocator instead */ + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + use_fake = 1; #endif - free(ptr); + + page_size = 4096; } diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 3d67d0a7..8a116789 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1436,6 +1436,10 @@ sk_open(sock *s) if (sk_set_high_port(s) < 0) log(L_WARN "Socket error: %s%#m", s->err); + if (s->flags & SKF_FREEBIND) + if (sk_set_freebind(s) < 0) + log(L_WARN "Socket error: %s%#m", s->err); + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ERR2("bind"); @@ -1850,8 +1854,8 @@ sk_read_ssh(sock *s) /* sk_read() and sk_write() are called from BFD's event loop */ -int -sk_read(sock *s, int revents) +static inline int +sk_read_noflush(sock *s, int revents) { switch (s->type) { @@ -1914,7 +1918,15 @@ sk_read(sock *s, int revents) } int -sk_write(sock *s) +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) { switch (s->type) { @@ -1962,6 +1974,14 @@ sk_write(sock *s) } } +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + int sk_is_ipv4(sock *s) { return s->af == AF_INET; } @@ -1980,6 +2000,7 @@ sk_err(sock *s, int revents) } s->err_hook(s, se); + tmp_flush(); } void diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 95b54d65..4ce9a328 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip) CF_DECLS -CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS) CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -122,9 +122,6 @@ kif_iface: kif_iface_start iface_patt_list_nopx kif_iface_opt_list; -dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ; -dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ; - CF_CODE CF_END diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 360a2888..84457d37 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -53,7 +53,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" @@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .class = PROTOCOL_DEVICE, .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, @@ -243,6 +242,13 @@ struct protocol proto_unix_iface = { .copy_config = kif_copy_config }; +void +kif_build(void) +{ + proto_build(&proto_unix_iface); +} + + /* * Tracing of routes */ @@ -280,7 +286,7 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; static inline u32 krt_metric(rte *a) { - eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC); + eattr *ea = ea_find(a->attrs->eattrs, &ea_krt_metric); return ea ? ea->u.data : 0; } @@ -317,7 +323,7 @@ static struct rte_storage * krt_store_async(struct krt_proto *p, net *n, rte *e) { ASSERT(!e->attrs->cached); - e->attrs->pref = p->p.main_channel->preference; + ea_set_attr_u32(&e->attrs->eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); e->src = p->p.main_source; return rte_store(e, n, p->krt_table); } @@ -338,14 +344,14 @@ krt_learn_scan(struct krt_proto *p, rte *e) if (krt_uptodate(&m->rte, e)) { krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); - rte_free(ee, p->krt_table); + rte_free(ee); m->rte.pflags |= KRT_REF_SEEN; } else { krt_trace_in(p, e, "[alien] updated"); *mm = m->next; - rte_free(m, p->krt_table); + rte_free(m); m = NULL; } } @@ -392,7 +398,7 @@ again: if (!(e->rte.pflags & KRT_REF_SEEN)) { *ee = e->next; - rte_free(e, p->krt_table); + rte_free(e); continue; } @@ -452,12 +458,12 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) if (krt_uptodate(&g->rte, e)) { krt_trace_in(p, e, "[alien async] same"); - rte_free(ee, p->krt_table); + rte_free(ee); return; } krt_trace_in(p, e, "[alien async] updated"); *gg = g->next; - rte_free(g, p->krt_table); + rte_free(g); } else krt_trace_in(p, e, "[alien async] created"); @@ -468,15 +474,15 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) else if (!g) { krt_trace_in(p, e, "[alien async] delete failed"); - rte_free(ee, p->krt_table); + rte_free(ee); return; } else { krt_trace_in(p, e, "[alien async] removed"); *gg = g->next; - rte_free(ee, p->krt_table); - rte_free(g, p->krt_table); + rte_free(ee); + rte_free(g); } best = n->routes; bestp = &n->routes; @@ -546,21 +552,27 @@ krt_is_installed(struct krt_proto *p, net *n) return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id); } -static void -krt_flush_routes(struct krt_proto *p) +static uint +rte_feed_count(net *n) { - struct rtable *t = p->p.main_channel->table; + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} - KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, net, n) +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) { - if (krt_is_installed(p, n)) - { - /* FIXME: this does not work if gw is changed in export filter */ - krt_replace_rte(p, n->n.addr, NULL, &n->routes->rte); - } + ASSERT_DIE(i < count); + feed[i++] = &e->rte; } - FIB_WALK_END; + ASSERT_DIE(i == count); } static struct rte * @@ -570,7 +582,15 @@ krt_export_net(struct krt_proto *p, net *net) const struct filter *filter = c->out_filter; if (c->ra_mode == RA_MERGED) - return rt_export_merged(c, net, krt_filter_lp, 1); + { + uint count = rte_feed_count(net); + if (!count) + return NULL; + + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + return rt_export_merged(c, feed, count, krt_filter_lp, 1); + } static _Thread_local rte rt; rt = net->routes->rte; @@ -586,7 +606,7 @@ krt_export_net(struct krt_proto *p, net *net) if (filter == FILTER_ACCEPT) goto accept; - if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT) + if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT) goto reject; @@ -602,13 +622,10 @@ krt_same_dest(rte *k, rte *e) { rta *ka = k->attrs, *ea = e->attrs; - if (ka->dest != ea->dest) - return 0; - - if (ka->dest == RTD_UNICAST) - return nexthop_same(&(ka->nh), &(ea->nh)); + eattr *nhea_k = ea_find(ka->eattrs, &ea_gen_nexthop); + eattr *nhea_e = ea_find(ea->eattrs, &ea_gen_nexthop); - return 1; + return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* @@ -641,6 +658,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + /* Deleting all routes if flush is requested */ + if (p->flush_routes) + goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) @@ -733,6 +753,17 @@ krt_prune(struct krt_proto *p) p->initialized = 1; } +static void +krt_flush_routes(struct krt_proto *p) +{ + KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); + p->flush_routes = 1; + krt_init_scan(p); + krt_do_scan(p); + /* No prune! */ + p->flush_routes = 0; +} + void krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { @@ -1111,24 +1142,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) krt_sys_copy_config(d, s); } -static int -krt_get_attr(const eattr *a, byte *buf, int buflen) -{ - switch (a->id) - { - case EA_KRT_SOURCE: - bsprintf(buf, "source"); - return GA_NAME; - - case EA_KRT_METRIC: - bsprintf(buf, "metric"); - return GA_NAME; - - default: - return krt_sys_get_attr(a, buf, buflen); - } -} +struct ea_class ea_krt_source = { + .name = "krt_source", + .type = T_INT, +}; +struct ea_class ea_krt_metric = { + .name = "krt_metric", + .type = T_INT, +}; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR @@ -1145,7 +1167,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen) struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", - .class = PROTOCOL_KERNEL, .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), @@ -1157,8 +1178,18 @@ struct protocol proto_unix_kernel = { .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, - .get_attr = krt_get_attr, #ifdef KRT_ALLOW_LEARN .dump = krt_dump, #endif }; + +void +krt_build(void) +{ + proto_build(&proto_unix_kernel); + + EA_REGISTER_ALL( + &ea_krt_source, + &ea_krt_metric, + ); +} diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index cd4bd07d..69fc9aa1 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -21,8 +21,12 @@ struct kif_proto; #define KRT_DEFAULT_ECMP_LIMIT 16 +#if 0 #define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0) #define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1) +#endif + +extern struct ea_class ea_krt_source, ea_krt_metric; #define KRT_REF_SEEN 0x1 /* Seen in table */ #define KRT_REF_BEST 0x2 /* Best in table */ @@ -66,6 +70,7 @@ struct krt_proto { byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ + byte flush_routes; /* Scanning to flush */ }; extern pool *krt_pool; diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 7e8ea0dc..8fdad4e6 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -30,7 +30,7 @@ #include "lib/event.h" #include "lib/timer.h" #include "lib/string.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" @@ -682,7 +682,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; +static char *opt_list = "bc:dD:ps:P:u:g:flRh"; int parse_and_exit; char *bird_name; static char *use_user; @@ -703,7 +703,6 @@ display_help(void) fprintf(stderr, "\n" "Options: \n" - " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n" " -c <config-file> Use given configuration file instead of\n" " " PATH_CONFIG_FILE "\n" " -d Enable debug messages and run bird in foreground\n" @@ -790,15 +789,12 @@ get_gid(const char *s) return gr->gr_gid; } -extern _Bool alloc_multipage; - static void parse_args(int argc, char **argv) { int config_changed = 0; int socket_changed = 0; int c; - int bp; bird_name = get_bird_name(argv[0], "bird"); if (argc == 2) @@ -811,29 +807,6 @@ parse_args(int argc, char **argv) while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) { - case 'B': - bp = atoi(optarg); - if (bp < 1) - { - fprintf(stderr, "Strange block size power %d\n\n", bp); - display_usage(); - exit(1); - } - - if ((1 << bp) < page_size) - { - fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size); - display_usage(); - exit(1); - } - - if ((1L << bp) > page_size) - { - alloc_multipage = 1; - page_size = (1L << bp); - } - - break; case 'c': config_name = optarg; config_changed = 1; @@ -888,8 +861,6 @@ parse_args(int argc, char **argv) } } -void resource_sys_init(void); - /* * Hic Est main() */ @@ -902,7 +873,6 @@ main(int argc, char **argv) dmalloc_debug(0x2f03d00); #endif - resource_sys_init(); parse_args(argc, argv); log_switch(1, NULL, NULL); @@ -911,8 +881,8 @@ main(int argc, char **argv) resource_init(); timer_init(); olock_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); @@ -936,8 +906,6 @@ main(int argc, char **argv) open_pid_file(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); struct config *conf = read_config(); |