/* * BIRD -- UNIX Kernel Synchronization * * (c) 1998--1999 Martin Mares <mj@ucw.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ #define LOCAL_DEBUG #include "nest/bird.h" #include "nest/iface.h" #include "nest/route.h" #include "nest/protocol.h" #include "lib/timer.h" #include "conf/conf.h" #include "unix.h" #include "krt.h" /* * The whole kernel synchronization is a bit messy and touches some internals * of the routing table engine, because routing table maintenance is a typical * example of the proverbial compatibility between different Unices and we want * to keep the overhead of our krt business as low as possible and avoid maintaining * a local routing table copy. * * The kernel syncer can work in three different modes (according to system config header): * o Single routing table, single krt protocol. [traditional Unix] * o Many routing tables, separate krt protocols for all of them. * o Many routing tables, but every scan includes all tables, so we start * separate krt protocols which cooperate with each other. [Linux 2.2] * In this case, we keep only a single scan timer. * * The hacky bits: * o We use FIB node flags to keep track of route synchronization status. * o When starting up, we cheat by looking if there is another kernel * krt instance to be initialized later and performing table scan * only once for all the instances. * o We attach temporary rte's to routing tables. * * If you are brave enough, continue now. You cannot say you haven't been warned. */ static int krt_uptodate(rte *k, rte *e); /* * Global resources */ pool *krt_pool; void krt_io_init(void) { krt_pool = rp_new(&root_pool, "Kernel Syncer"); krt_if_io_init(); } /* * Interfaces */ struct proto_config *cf_kif; static struct kif_proto *kif_proto; static timer *kif_scan_timer; static bird_clock_t kif_last_shot; static void kif_scan(timer *t) { struct kif_proto *p = t->data; DBG("KIF: It's interface scan time...\n"); kif_last_shot = now; krt_if_scan(p); } static void kif_force_scan(void) { if (kif_proto && kif_last_shot + 2 < now) { kif_scan(kif_scan_timer); tm_start(kif_scan_timer, ((struct kif_config *) kif_proto->p.cf)->scan_time); } } static struct proto * kif_init(struct proto_config *c) { struct kif_proto *p = proto_new(c, sizeof(struct kif_proto)); return &p->p; } static int kif_start(struct proto *P) { struct kif_proto *p = (struct kif_proto *) P; kif_proto = p; krt_if_start(p); /* Start periodic interface scanning */ kif_scan_timer = tm_new(P->pool); kif_scan_timer->hook = kif_scan; kif_scan_timer->data = p; kif_scan_timer->recurrent = KIF_CF->scan_time; kif_scan(kif_scan_timer); tm_start(kif_scan_timer, KIF_CF->scan_time); return PS_UP; } static int kif_shutdown(struct proto *P) { struct kif_proto *p = (struct kif_proto *) P; tm_stop(kif_scan_timer); krt_if_shutdown(p); kif_proto = NULL; if_start_update(); /* Remove all interfaces */ if_end_update(); /* * FIXME: Is it really a good idea? It causes routes to be flushed, * but at the same time it avoids sending of these deletions to the kernel, * because krt thinks the kernel itself has already removed the route * when downing the interface. Sad. */ return PS_DOWN; } struct protocol proto_unix_iface = { name: "Device", priority: 100, init: kif_init, start: kif_start, shutdown: kif_shutdown, }; /* * Inherited Routes */ #ifdef KRT_ALLOW_LEARN static inline int krt_same_key(rte *a, rte *b) { return a->u.krt.proto == b->u.krt.proto && a->u.krt.metric == b->u.krt.metric && a->u.krt.type == b->u.krt.type; } static void krt_learn_announce_update(struct krt_proto *p, rte *e) { net *n = e->net; rta *aa = rta_clone(e->attrs); rte *ee = rte_get_temp(aa); net *nn = net_get(p->p.table, n->n.prefix, n->n.pxlen); ee->net = nn; ee->pflags = 0; ee->u.krt = e->u.krt; rte_update(p->p.table, nn, &p->p, ee); } static void krt_learn_announce_delete(struct krt_proto *p, net *n) { n = net_find(p->p.table, n->n.prefix, n->n.pxlen); if (n) rte_update(p->p.table, n, &p->p, NULL); } static void krt_learn_scan(struct krt_proto *p, rte *e) { net *n0 = e->net; net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen); rte *m, **mm; e->attrs->source = RTS_INHERIT; for(mm=&n->routes; m = *mm; mm=&m->next) if (krt_same_key(m, e)) break; if (m) { if (krt_uptodate(m, e)) { DBG("krt_learn_scan: SEEN\n"); rte_free(e); m->u.krt.seen = 1; } else { DBG("krt_learn_scan: OVERRIDE\n"); *mm = m->next; rte_free(m); m = NULL; } } else DBG("krt_learn_scan: CREATE\n"); if (!m) { e->attrs = rta_lookup(e->attrs); e->next = n->routes; n->routes = e; e->u.krt.seen = 1; } } static void krt_learn_prune(struct krt_proto *p) { struct fib *fib = &p->krt_table.fib; struct fib_iterator fit; DBG("Pruning inheritance data...\n"); FIB_ITERATE_INIT(&fit, fib); again: FIB_ITERATE_START(fib, &fit, f) { net *n = (net *) f; rte *e, **ee, *best, **pbest, *old_best; old_best = n->routes; best = NULL; pbest = NULL; ee = &n->routes; while (e = *ee) { if (!e->u.krt.seen) { *ee = e->next; rte_free(e); continue; } if (!best || best->u.krt.metric > e->u.krt.metric) { best = e; pbest = ee; } e->u.krt.seen = 0; ee = &e->next; } if (!n->routes) { DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); if (old_best) { krt_learn_announce_delete(p, n); n->n.flags &= ~KRF_INSTALLED; } FIB_ITERATE_PUT(&fit, f); fib_delete(fib, f); goto again; } *pbest = best->next; best->next = n->routes; n->routes = best; if (best != old_best || !(n->n.flags & KRF_INSTALLED)) { DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); krt_learn_announce_update(p, best); n->n.flags |= KRF_INSTALLED; } else DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); } FIB_ITERATE_END(f); } static void krt_learn_async(struct krt_proto *p, rte *e, int new) { net *n0 = e->net; net *n = net_get(&p->krt_table, n0->n.prefix, n0->n.pxlen); rte *g, **gg, *best, **bestp, *old_best; e->attrs->source = RTS_INHERIT; old_best = n->routes; for(gg=&n->routes; g = *gg; gg = &g->next) if (krt_same_key(g, e)) break; if (new) { if (g) { if (krt_uptodate(g, e)) { DBG("krt_learn_async: same\n"); rte_free(e); return; } DBG("krt_learn_async: update\n"); *gg = g->next; rte_free(g); } else DBG("krt_learn_async: create\n"); e->attrs = rta_lookup(e->attrs); e->next = n->routes; n->routes = e; } else if (!g) { DBG("krt_learn_async: not found\n"); rte_free(e); return; } else { DBG("krt_learn_async: delete\n"); *gg = g->next; rte_free(e); rte_free(g); } best = n->routes; bestp = &n->routes; for(gg=&n->routes; g=*gg; gg=&g->next) if (best->u.krt.metric > g->u.krt.metric) { best = g; bestp = gg; } if (best) { *bestp = best->next; best->next = n->routes; n->routes = best; } if (best != old_best) { DBG("krt_learn_async: distributing change\n"); if (best) { krt_learn_announce_update(p, best); n->n.flags |= KRF_INSTALLED; } else { n->routes = NULL; krt_learn_announce_delete(p, n); n->n.flags &= ~KRF_INSTALLED; } } } static void krt_learn_init(struct krt_proto *p) { if (KRT_CF->learn) rt_setup(p->p.pool, &p->krt_table, "Inherited"); } static void krt_dump(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; if (!KRT_CF->learn) return; debug("KRT: Table of inheritable routes\n"); rt_dump(&p->krt_table); } static void krt_dump_attrs(rte *e) { debug(" [m=%d,p=%d,t=%d]", e->u.krt.metric, e->u.krt.proto, e->u.krt.type); } #endif /* * Routes */ #ifdef CONFIG_ALL_TABLES_AT_ONCE static timer *krt_scan_timer; static int krt_instance_count; static list krt_instance_list; #endif static void krt_flush_routes(struct krt_proto *p) { struct rtable *t = p->p.table; DBG("Flushing kernel routes...\n"); FIB_WALK(&t->fib, f) { net *n = (net *) f; rte *e = n->routes; if (e) { rta *a = e->attrs; if (a->source != RTS_DEVICE && a->source != RTS_INHERIT) krt_set_notify(p, e->net, NULL, e); } } FIB_WALK_END; } static int krt_uptodate(rte *k, rte *e) { rta *ka = k->attrs, *ea = e->attrs; if (ka->dest != ea->dest) return 0; switch (ka->dest) { case RTD_ROUTER: return ipa_equal(ka->gw, ea->gw); case RTD_DEVICE: return !strcmp(ka->iface->name, ea->iface->name); default: return 1; } } /* * This gets called back when the low-level scanning code discovers a route. * We expect that the route is a temporary rte and its attributes are uncached. */ void krt_got_route(struct krt_proto *p, rte *e) { rte *old; net *net = e->net; int src = e->u.krt.src; int verdict; #ifdef CONFIG_AUTO_ROUTES if (e->attrs->dest == RTD_DEVICE) { /* It's a device route. Probably a kernel-generated one. */ verdict = KRF_IGNORE; goto sentenced; } #endif #ifdef KRT_ALLOW_LEARN if (src == KRT_SRC_ALIEN) { if (KRT_CF->learn) krt_learn_scan(p, e); else DBG("krt_parse_entry: Alien route, ignoring\n"); return; } #endif if (net->n.flags & KRF_VERDICT_MASK) { /* Route to this destination was already seen. Strange, but it happens... */ DBG("Already seen.\n"); return; } if (net->n.flags & KRF_INSTALLED) { old = net->routes; ASSERT(old); if (krt_uptodate(e, old)) verdict = KRF_SEEN; else verdict = KRF_UPDATE; } else verdict = KRF_DELETE; sentenced: DBG("krt_parse_entry: verdict=%s\n", ((char *[]) { "CREATE", "SEEN", "UPDATE", "DELETE", "IGNORE" }) [verdict]); net->n.flags = (net->n.flags & ~KRF_VERDICT_MASK) | verdict; if (verdict == KRF_UPDATE || verdict == KRF_DELETE) { /* Get a cached copy of attributes and link the route */ rta *a = e->attrs; a->source = RTS_DUMMY; e->attrs = rta_lookup(a); e->next = net->routes; net->routes = e; } else rte_free(e); } static void krt_prune(struct krt_proto *p) { struct proto *pp = &p->p; struct rtable *t = p->p.table; struct fib_node *f; DBG("Pruning routes in table %s...\n", t->name); FIB_WALK(&t->fib, f) { net *n = (net *) f; int verdict = f->flags & KRF_VERDICT_MASK; rte *new, *old; if (verdict != KRF_CREATE && verdict != KRF_SEEN && verdict != KRF_IGNORE) { old = n->routes; n->routes = old->next; } else old = NULL; new = n->routes; switch (verdict) { case KRF_CREATE: if (new && (f->flags & KRF_INSTALLED)) { DBG("krt_prune: reinstalling %I/%d\n", n->n.prefix, n->n.pxlen); krt_set_notify(p, n, new, NULL); } break; case KRF_SEEN: case KRF_IGNORE: /* Nothing happens */ break; case KRF_UPDATE: DBG("krt_prune: updating %I/%d\n", n->n.prefix, n->n.pxlen); krt_set_notify(p, n, new, old); break; case KRF_DELETE: DBG("krt_prune: deleting %I/%d\n", n->n.prefix, n->n.pxlen); krt_set_notify(p, n, NULL, old); break; default: bug("krt_prune: invalid route status"); } if (old) rte_free(old); f->flags &= ~KRF_VERDICT_MASK; } FIB_WALK_END; #ifdef KRT_ALLOW_LEARN if (KRT_CF->learn) krt_learn_prune(p); #endif } void krt_got_route_async(struct krt_proto *p, rte *e, int new) { net *net = e->net; rte *old = net->routes; int src = e->u.krt.src; switch (src) { case KRT_SRC_BIRD: ASSERT(0); case KRT_SRC_REDIRECT: DBG("It's a redirect, kill him! Kill! Kill!\n"); krt_set_notify(p, net, NULL, e); break; case KRT_SRC_ALIEN: #ifdef KRT_ALLOW_LEARN if (KRT_CF->learn) { krt_learn_async(p, e, new); return; } #endif /* Fall-thru */ default: DBG("Discarding\n"); rte_update(p->p.table, net, &p->p, NULL); } rte_free(e); } /* * Periodic scanning */ static void krt_scan(timer *t) { struct krt_proto *p; kif_force_scan(); #ifdef CONFIG_ALL_TABLES_AT_ONCE { void *q; DBG("KRT: It's route scan time...\n"); krt_scan_fire(NULL); WALK_LIST(q, krt_instance_list) { p = SKIP_BACK(struct krt_proto, instance_node, q); krt_prune(p); } } #else p = t->data; DBG("KRT: It's route scan time for %s...\n", p->p.name); krt_scan_fire(p); krt_prune(p); #endif } /* * Updates */ static void krt_notify(struct proto *P, net *net, rte *new, rte *old, struct ea_list *tmpa) { struct krt_proto *p = (struct krt_proto *) P; if (new && (!krt_capable(new) || new->attrs->source == RTS_INHERIT)) new = NULL; if (!(net->n.flags & KRF_INSTALLED)) old = NULL; if (new) net->n.flags |= KRF_INSTALLED; else net->n.flags &= ~KRF_INSTALLED; krt_set_notify(p, net, new, old); } /* * Protocol glue */ struct proto_config *cf_krt; static void krt_preconfig(struct protocol *P, struct config *c) { krt_scan_preconfig(c); } static void krt_postconfig(struct proto_config *C) { struct krt_config *c = (struct krt_config *) C; #ifdef CONFIG_ALL_TABLES_AT_ONCE struct krt_config *first = (struct krt_config *) cf_krt; if (first->scan_time != c->scan_time) cf_error("All kernel syncers must use the same table scan interval"); #endif if (C->table->krt_attached) cf_error("Kernel syncer (%s) already attached to table %s", C->table->krt_attached->name, C->table->name); C->table->krt_attached = C; krt_scan_postconfig(c); } static timer * krt_start_timer(struct krt_proto *p) { timer *t; t = tm_new(p->krt_pool); t->hook = krt_scan; t->data = p; t->recurrent = KRT_CF->scan_time; tm_start(t, KRT_CF->scan_time); return t; } static int krt_start(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; int first = 1; #ifdef CONFIG_ALL_TABLES_AT_ONCE if (!krt_instance_count++) init_list(&krt_instance_list); else first = 0; p->krt_pool = krt_pool; add_tail(&krt_instance_list, &p->instance_node); #else p->krt_pool = P->pool; #endif #ifdef KRT_ALLOW_LEARN krt_learn_init(p); #endif krt_scan_start(p, first); krt_set_start(p, first); /* Start periodic routing table scanning */ #ifdef CONFIG_ALL_TABLES_AT_ONCE if (first) krt_scan_timer = krt_start_timer(p); p->scan_timer = krt_scan_timer; /* If this is the last instance to be initialized, kick the timer */ if (!P->proto->startup_counter) krt_scan(p->scan_timer); #else p->scan_timer = krt_start_timer(p); krt_scan(p->scan_timer); #endif return PS_UP; } static int krt_shutdown(struct proto *P) { struct krt_proto *p = (struct krt_proto *) P; int last = 1; #ifdef CONFIG_ALL_TABLES_AT_ONCE rem_node(&p->instance_node); if (--krt_instance_count) last = 0; else #endif tm_stop(p->scan_timer); if (!KRT_CF->persist) krt_flush_routes(p); krt_set_shutdown(p, last); krt_scan_shutdown(p, last); #ifdef CONFIG_ALL_TABLES_AT_ONCE if (last) rfree(krt_scan_timer); #endif return PS_DOWN; } static struct proto * krt_init(struct proto_config *c) { struct krt_proto *p = proto_new(c, sizeof(struct krt_proto)); p->p.rt_notify = krt_notify; return &p->p; } struct protocol proto_unix_kernel = { name: "Kernel", priority: 80, preconfig: krt_preconfig, postconfig: krt_postconfig, init: krt_init, start: krt_start, shutdown: krt_shutdown, #ifdef KRT_ALLOW_LEARN dump: krt_dump, dump_attrs: krt_dump_attrs, #endif };