63 files changed, 11085 insertions, 6130 deletions
diff --git a/proto/Doc b/proto/Doc
index 04c25bc0..ef573d2a 100644
--- a/proto/Doc
+++ b/proto/Doc
@@ -4,7 +4,8 @@ C bfd
 C bgp
 C ospf
 C pipe
-C rip
 C radv
+C rip
+C rpki
 C static
 S ../nest/rt-dev.c
diff --git a/proto/babel/Makefile b/proto/babel/Makefile
index 400ffbac..a5b4a13b 100644
--- a/proto/babel/Makefile
+++ b/proto/babel/Makefile
@@ -1,5 +1,6 @@
-source=babel.c packets.c
-root-rel=../../
-dir-name=proto/babel
+src := babel.c packets.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 38be6909..aa7e8b68 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -2,6 +2,8 @@
  *	BIRD -- The Babel protocol
  *
  *	Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
+ * 	(c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2016--2017 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  *
@@ -29,17 +31,14 @@
  *
  * The main route selection is done in babel_select_route(). This is called when
  * an entry is updated by receiving updates from the network or when modified by
- * internal timers. It performs feasibility checks on the available routes for
- * the prefix and selects the one with the lowest metric to be announced to the
- * core.
+ * internal timers. The function selects from feasible and reachable routes the
+ * one with the lowest metric to be announced to the core.
  */
 
 #include <stdlib.h>
 #include "babel.h"
 
 
-#define OUR_ROUTE(r) (r->neigh == NULL)
-
 /*
  * Is one number greater or equal than another mod 2^16? This is based on the
  * definition of serial number space in RFC 1982. Note that arguments are of
@@ -48,47 +47,49 @@
 static inline int ge_mod64k(uint a, uint b)
 { return (u16)(a - b) < 0x8000; }
 
-static void babel_dump_entry(struct babel_entry *e);
-static void babel_dump_route(struct babel_route *r);
-static void babel_select_route(struct babel_entry *e);
-static void babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n);
-static void babel_send_wildcard_request(struct babel_iface *ifa);
-static int  babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen,
-			       u64 router_id, u16 seqno);
-static void babel_trigger_iface_update(struct babel_iface *ifa);
-static void babel_trigger_update(struct babel_proto *p);
-static void babel_send_seqno_request(struct babel_entry *e);
+static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e);
+static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod);
+static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e);
+static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n);
+static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr);
+static void babel_update_cost(struct babel_neighbor *n);
 static inline void babel_kick_timer(struct babel_proto *p);
 static inline void babel_iface_kick_timer(struct babel_iface *ifa);
 
+static inline void babel_lock_neighbor(struct babel_neighbor *nbr)
+{ if (nbr) nbr->uc++; }
+
+static inline void babel_unlock_neighbor(struct babel_neighbor *nbr)
+{ if (nbr && !--nbr->uc) mb_free(nbr); }
+
 
 /*
  *	Functions to maintain data structures
  */
 
 static void
-babel_init_entry(struct fib_node *n)
+babel_init_entry(void *E)
 {
-  struct babel_entry *e = (void *) n;
-  e->proto = NULL;
-  e->selected_in = NULL;
-  e->selected_out = NULL;
-  e->updated = now;
+  struct babel_entry *e = E;
+
+  e->updated = current_time();
+  init_list(&e->requests);
   init_list(&e->sources);
   init_list(&e->routes);
 }
 
 static inline struct babel_entry *
-babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen)
+babel_find_entry(struct babel_proto *p, const net_addr *n)
 {
-  return fib_find(&p->rtable, &prefix, plen);
+  struct fib *rtable = (n->type == NET_IP4) ? &p->ip4_rtable : &p->ip6_rtable;
+  return fib_find(rtable, n);
 }
 
 static struct babel_entry *
-babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen)
+babel_get_entry(struct babel_proto *p, const net_addr *n)
 {
-  struct babel_entry *e = fib_get(&p->rtable, &prefix, plen);
-  e->proto = p;
+  struct fib *rtable = (n->type == NET_IP4) ? &p->ip4_rtable : &p->ip6_rtable;
+  struct babel_entry *e = fib_get(rtable, n);
   return e;
 }
 
@@ -105,9 +106,8 @@ babel_find_source(struct babel_entry *e, u64 router_id)
 }
 
 static struct babel_source *
-babel_get_source(struct babel_entry *e, u64 router_id)
+babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id)
 {
-  struct babel_proto *p = e->proto;
   struct babel_source *s = babel_find_source(e, router_id);
 
   if (s)
@@ -115,7 +115,7 @@ babel_get_source(struct babel_entry *e, u64 router_id)
 
   s = sl_alloc(p->source_slab);
   s->router_id = router_id;
-  s->expires = now + BABEL_GARBAGE_INTERVAL;
+  s->expires = current_time() + BABEL_GARBAGE_INTERVAL;
   s->seqno = 0;
   s->metric = BABEL_INFINITY;
   add_tail(&e->sources, NODE s);
@@ -124,14 +124,14 @@ babel_get_source(struct babel_entry *e, u64 router_id)
 }
 
 static void
-babel_expire_sources(struct babel_entry *e)
+babel_expire_sources(struct babel_proto *p, struct babel_entry *e)
 {
-  struct babel_proto *p = e->proto;
   struct babel_source *n, *nx;
+  btime now_ = current_time();
 
   WALK_LIST_DELSAFE(n, nx, e->sources)
   {
-    if (n->expires && n->expires <= now)
+    if (n->expires && n->expires <= now_)
     {
       rem_node(NODE n);
       sl_free(p->source_slab, n);
@@ -152,9 +152,8 @@ babel_find_route(struct babel_entry *e, struct babel_neighbor *n)
 }
 
 static struct babel_route *
-babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr)
+babel_get_route(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *nbr)
 {
-  struct babel_proto *p = e->proto;
   struct babel_route *r = babel_find_route(e, nbr);
 
   if (r)
@@ -162,94 +161,91 @@ babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr)
 
   r = sl_alloc(p->route_slab);
   memset(r, 0, sizeof(*r));
+
   r->e = e;
+  r->neigh = nbr;
   add_tail(&e->routes, NODE r);
-
-  if (nbr)
-  {
-    r->neigh = nbr;
-    r->expires = now + BABEL_GARBAGE_INTERVAL;
-    add_tail(&nbr->routes, NODE &r->neigh_route);
-  }
+  add_tail(&nbr->routes, NODE &r->neigh_route);
 
   return r;
 }
 
-static void
-babel_flush_route(struct babel_route *r)
+static inline void
+babel_retract_route(struct babel_proto *p, struct babel_route *r)
 {
-  struct babel_proto *p = r->e->proto;
+  r->metric = r->advert_metric = BABEL_INFINITY;
 
-  DBG("Babel: Flush route %I/%d router_id %lR neigh %I\n",
-      r->e->n.prefix, r->e->n.pxlen, r->router_id, r->neigh ? r->neigh->addr : IPA_NONE);
-
-  rem_node(NODE r);
+  if (r == r->e->selected)
+    babel_select_route(p, r->e, r);
+}
 
-  if (r->neigh)
-    rem_node(&r->neigh_route);
+static void
+babel_flush_route(struct babel_proto *p, struct babel_route *r)
+{
+  DBG("Babel: Flush route %N router_id %lR neigh %I\n",
+      r->e->n.addr, r->router_id, r->neigh->addr);
 
-  if (r->e->selected_in == r)
-    r->e->selected_in = NULL;
+  rem_node(NODE r);
+  rem_node(&r->neigh_route);
 
-  if (r->e->selected_out == r)
-    r->e->selected_out = NULL;
+  if (r->e->selected == r)
+    r->e->selected = NULL;
 
   sl_free(p->route_slab, r);
 }
 
 static void
-babel_expire_route(struct babel_route *r)
+babel_expire_route(struct babel_proto *p, struct babel_route *r)
 {
-  struct babel_proto *p = r->e->proto;
-  struct babel_entry *e = r->e;
+  struct babel_config *cf = (void *) p->p.cf;
 
-  TRACE(D_EVENTS, "Route expiry timer for %I/%d router-id %lR fired",
-	e->n.prefix, e->n.pxlen, r->router_id);
+  TRACE(D_EVENTS, "Route expiry timer for %N router-id %lR fired",
+	r->e->n.addr, r->router_id);
 
   if (r->metric < BABEL_INFINITY)
   {
-    r->metric = BABEL_INFINITY;
-    r->expires = now + r->expiry_interval;
+    r->metric = r->advert_metric = BABEL_INFINITY;
+    r->expires = current_time() + cf->hold_time;
   }
   else
   {
-    babel_flush_route(r);
+    babel_flush_route(p, r);
   }
 }
 
 static void
-babel_refresh_route(struct babel_route *r)
+babel_refresh_route(struct babel_proto *p, struct babel_route *r)
 {
-  if (!OUR_ROUTE(r) && (r == r->e->selected_in))
-    babel_send_route_request(r->e, r->neigh);
+  if (r == r->e->selected)
+    babel_send_route_request(p, r->e, r->neigh);
 
   r->refresh_time = 0;
 }
 
 static void
-babel_expire_routes(struct babel_proto *p)
+babel_expire_routes_(struct babel_proto *p, struct fib *rtable)
 {
-  struct babel_entry *e;
+  struct babel_config *cf = (void *) p->p.cf;
   struct babel_route *r, *rx;
   struct fib_iterator fit;
+  btime now_ = current_time();
 
-  FIB_ITERATE_INIT(&fit, &p->rtable);
+  FIB_ITERATE_INIT(&fit, rtable);
 
 loop:
-  FIB_ITERATE_START(&p->rtable, &fit, n)
+  FIB_ITERATE_START(rtable, &fit, struct babel_entry, e)
   {
-    e = (struct babel_entry *) n;
     int changed = 0;
 
     WALK_LIST_DELSAFE(r, rx, e->routes)
     {
-      if (r->refresh_time && r->refresh_time <= now)
-	babel_refresh_route(r);
+      if (r->refresh_time && r->refresh_time <= now_)
+	babel_refresh_route(p, r);
 
-      if (r->expires && r->expires <= now)
+      if (r->expires && r->expires <= now_)
       {
-	babel_expire_route(r);
-	changed = 1;
+	changed = changed || (r == e->selected);
+	babel_expire_route(p, r);
       }
     }
 
@@ -258,25 +254,148 @@ loop:
       /*
        * We have to restart the iteration because there may be a cascade of
        * synchronous events babel_select_route() -> nest table change ->
-       * babel_rt_notify() -> p->rtable change, invalidating hidden variables.
+       * babel_rt_notify() -> rtable change, invalidating hidden variables.
        */
+      FIB_ITERATE_PUT(&fit);
+      babel_select_route(p, e, NULL);
+      goto loop;
+    }
+
+    /* Clean up stale entries */
+    if ((e->valid == BABEL_ENTRY_STALE) && ((e->updated + cf->hold_time) <= now_))
+      e->valid = BABEL_ENTRY_DUMMY;
 
-      FIB_ITERATE_PUT(&fit, n);
-      babel_select_route(e);
+    /* Clean up unreachable route */
+    if (e->unreachable && (!e->valid || (e->router_id == p->router_id)))
+    {
+      FIB_ITERATE_PUT(&fit);
+      babel_announce_retraction(p, e);
       goto loop;
     }
 
-    babel_expire_sources(e);
+    babel_expire_sources(p, e);
+    babel_expire_requests(p, e);
 
     /* Remove empty entries */
-    if (EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes))
+    if (!e->valid && EMPTY_LIST(e->routes) && EMPTY_LIST(e->sources) && EMPTY_LIST(e->requests))
     {
-      FIB_ITERATE_PUT(&fit, n);
-      fib_delete(&p->rtable, e);
+      FIB_ITERATE_PUT(&fit);
+      fib_delete(rtable, e);
       goto loop;
     }
   }
-  FIB_ITERATE_END(n);
+  FIB_ITERATE_END;
+}
+
+static void
+babel_expire_routes(struct babel_proto *p)
+{
+  babel_expire_routes_(p, &p->ip4_rtable);
+  babel_expire_routes_(p, &p->ip6_rtable);
+}
+
+static inline int seqno_request_valid(struct babel_seqno_request *sr)
+{ return !sr->nbr || sr->nbr->ifa; }
+
+/*
+ * Add seqno request to the table of pending requests (RFC 6216 3.2.6) and send
+ * it to network. Do nothing if it is already in the table.
+ */
+
+static void
+babel_add_seqno_request(struct babel_proto *p, struct babel_entry *e,
+			u64 router_id, u16 seqno, u8 hop_count,
+			struct babel_neighbor *nbr)
+{
+  struct babel_seqno_request *sr;
+
+  WALK_LIST(sr, e->requests)
+    if (sr->router_id == router_id)
+    {
+      /* Found matching or newer */
+      if (ge_mod64k(sr->seqno, seqno) && seqno_request_valid(sr))
+	return;
+
+      /* Found older */
+      babel_unlock_neighbor(sr->nbr);
+      rem_node(NODE sr);
+      goto found;
+    }
+
+  /* No entries found */
+  sr = sl_alloc(p->seqno_slab);
+
+found:
+  sr->router_id = router_id;
+  sr->seqno = seqno;
+  sr->hop_count = hop_count;
+  sr->count = 0;
+  sr->expires = current_time() + BABEL_SEQNO_REQUEST_EXPIRY;
+  babel_lock_neighbor(sr->nbr = nbr);
+  add_tail(&e->requests, NODE sr);
+
+  babel_send_seqno_request(p, e, sr);
+}
+
+static void
+babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr)
+{
+  babel_unlock_neighbor(sr->nbr);
+  rem_node(NODE sr);
+  sl_free(p->seqno_slab, sr);
+}
+
+static int
+babel_satisfy_seqno_request(struct babel_proto *p, struct babel_entry *e,
+			   u64 router_id, u16 seqno)
+{
+  struct babel_seqno_request *sr;
+
+  WALK_LIST(sr, e->requests)
+    if ((sr->router_id == router_id) && ge_mod64k(seqno, sr->seqno))
+    {
+      /* Found the request, remove it */
+      babel_remove_seqno_request(p, sr);
+      return 1;
+    }
+
+  return 0;
+}
+
+static void
+babel_expire_requests(struct babel_proto *p, struct babel_entry *e)
+{
+  struct babel_seqno_request *sr, *srx;
+  btime now_ = current_time();
+
+  WALK_LIST_DELSAFE(sr, srx, e->requests)
+  {
+    /* Remove seqno requests sent to dead neighbors */
+    if (!seqno_request_valid(sr))
+    {
+      babel_remove_seqno_request(p, sr);
+      continue;
+    }
+
+    /* Handle expired requests - resend or remove */
+    if (sr->expires && sr->expires <= now_)
+    {
+      if (sr->count < BABEL_SEQNO_REQUEST_RETRY)
+      {
+	sr->count++;
+	sr->expires += (BABEL_SEQNO_REQUEST_EXPIRY << sr->count);
+	babel_send_seqno_request(p, e, sr);
+      }
+      else
+      {
+	TRACE(D_EVENTS, "Seqno request for %N router-id %lR expired",
+	      e->n.addr, sr->router_id);
+
+	babel_remove_seqno_request(p, sr);
+	continue;
+      }
+    }
+  }
 }
 
 static struct babel_neighbor *
@@ -294,61 +413,79 @@ babel_find_neighbor(struct babel_iface *ifa, ip_addr addr)
 static struct babel_neighbor *
 babel_get_neighbor(struct babel_iface *ifa, ip_addr addr)
 {
+  struct babel_proto *p = ifa->proto;
   struct babel_neighbor *nbr = babel_find_neighbor(ifa, addr);
 
   if (nbr)
     return nbr;
 
+  TRACE(D_EVENTS, "New neighbor %I on %s", addr, ifa->iface->name);
+
   nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor));
   nbr->ifa = ifa;
   nbr->addr = addr;
+  nbr->rxcost = BABEL_INFINITY;
   nbr->txcost = BABEL_INFINITY;
+  nbr->cost = BABEL_INFINITY;
   init_list(&nbr->routes);
+  babel_lock_neighbor(nbr);
   add_tail(&ifa->neigh_list, NODE nbr);
 
   return nbr;
 }
 
 static void
-babel_flush_neighbor(struct babel_neighbor *nbr)
+babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr)
 {
-  struct babel_proto *p = nbr->ifa->proto;
+  struct babel_route *r;
   node *n;
 
-  TRACE(D_EVENTS, "Flushing neighbor %I", nbr->addr);
+  TRACE(D_EVENTS, "Removing neighbor %I on %s", nbr->addr, nbr->ifa->iface->name);
 
   WALK_LIST_FIRST(n, nbr->routes)
   {
-    struct babel_route *r = SKIP_BACK(struct babel_route, neigh_route, n);
-    struct babel_entry *e = r->e;
-    int selected = (r == e->selected_in);
-
-    babel_flush_route(r);
-
-    if (selected)
-      babel_select_route(e);
+    r = SKIP_BACK(struct babel_route, neigh_route, n);
+    babel_retract_route(p, r);
+    babel_flush_route(p, r);
   }
 
+  nbr->ifa = NULL;
   rem_node(NODE nbr);
-  mb_free(nbr);
+  babel_unlock_neighbor(nbr);
 }
 
 static void
-babel_expire_ihu(struct babel_neighbor *nbr)
+babel_expire_ihu(struct babel_proto *p, struct babel_neighbor *nbr)
 {
+  TRACE(D_EVENTS, "IHU from nbr %I on %s expired", nbr->addr, nbr->ifa->iface->name);
+
   nbr->txcost = BABEL_INFINITY;
+  nbr->ihu_expiry = 0;
+  babel_update_cost(nbr);
 }
 
 static void
-babel_expire_hello(struct babel_neighbor *nbr)
+babel_expire_hello(struct babel_proto *p, struct babel_neighbor *nbr, btime now_)
 {
+again:
   nbr->hello_map <<= 1;
 
   if (nbr->hello_cnt < 16)
     nbr->hello_cnt++;
 
-  if (!nbr->hello_map)
-    babel_flush_neighbor(nbr);
+  nbr->hello_expiry += nbr->last_hello_int;
+
+  /* We may expire multiple hellos if last_hello_int is too short */
+  if (nbr->hello_map && nbr->hello_expiry <= now_)
+    goto again;
+
+  TRACE(D_EVENTS, "Hello from nbr %I on %s expired, %d left",
+	nbr->addr, nbr->ifa->iface->name, u32_popcount(nbr->hello_map));
+
+  if (nbr->hello_map)
+    babel_update_cost(nbr);
+  else
+    babel_flush_neighbor(p, nbr);
 }
 
 static void
@@ -356,16 +493,17 @@ babel_expire_neighbors(struct babel_proto *p)
 {
   struct babel_iface *ifa;
   struct babel_neighbor *nbr, *nbx;
+  btime now_ = current_time();
 
   WALK_LIST(ifa, p->interfaces)
   {
     WALK_LIST_DELSAFE(nbr, nbx, ifa->neigh_list)
     {
-      if (nbr->ihu_expiry && nbr->ihu_expiry <= now)
-        babel_expire_ihu(nbr);
+      if (nbr->ihu_expiry && nbr->ihu_expiry <= now_)
+        babel_expire_ihu(p, nbr);
 
-      if (nbr->hello_expiry && nbr->hello_expiry <= now)
-        babel_expire_hello(nbr);
+      if (nbr->hello_expiry && nbr->hello_expiry <= now_)
+        babel_expire_hello(p, nbr, now_);
     }
   }
 }
@@ -399,66 +537,81 @@ babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric)
     ((seqno == s->seqno) && (metric < s->metric));
 }
 
-static u16
-babel_compute_rxcost(struct babel_neighbor *n)
+/* Simple additive metric - Appendix 3.1 in the RFC */
+static inline u16
+babel_compute_metric(struct babel_neighbor *n, uint metric)
 {
-  struct babel_iface *ifa = n->ifa;
-  u8 cnt, missed;
-  u16 map=n->hello_map;
-
-  if (!map) return BABEL_INFINITY;
-  cnt = u32_popcount(map); // number of bits set
-  missed = n->hello_cnt-cnt;
+  return MIN(metric + n->cost, BABEL_INFINITY);
+}
 
-  if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS)
-  {
-    /* ETX - Appendix 2.2 in the RFC.
+static void
+babel_update_cost(struct babel_neighbor *nbr)
+{
+  struct babel_proto *p = nbr->ifa->proto;
+  struct babel_iface_config *cf = nbr->ifa->cf;
+  uint rcv = u32_popcount(nbr->hello_map); // number of bits set
+  uint max = nbr->hello_cnt;
+  uint rxcost = BABEL_INFINITY;	/* Cost to announce in IHU */
+  uint txcost = BABEL_INFINITY;	/* Effective cost for route selection */
 
-       beta = prob. of successful transmission.
-       rxcost = BABEL_RXCOST_WIRELESS/beta
+  if (!rcv || !nbr->ifa->up)
+    goto done;
 
-       Since: beta = 1-missed/n->hello_cnt = cnt/n->hello_cnt
-       Then: rxcost = BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt
-   */
-    if (!cnt) return BABEL_INFINITY;
-    return BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt;
-  }
-  else
+  switch (cf->type)
   {
+  case BABEL_IFACE_TYPE_WIRED:
     /* k-out-of-j selection - Appendix 2.1 in the RFC. */
-    DBG("Babel: Missed %d hellos from %I\n", missed, n->addr);
-    /* Link is bad if more than half the expected hellos were lost */
-    return (missed > n->hello_cnt/2) ? BABEL_INFINITY : ifa->cf->rxcost;
-  }
-}
 
+    /* Link is bad if less than cf->limit/16 of expected hellos were received */
+    if (rcv * 16 < cf->limit * max)
+      break;
 
-static u16
-babel_compute_cost(struct babel_neighbor *n)
-{
-  struct babel_iface *ifa = n->ifa;
-  u16 rxcost = babel_compute_rxcost(n);
-  if (rxcost == BABEL_INFINITY) return rxcost;
-  else if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS)
-  {
-    /* ETX - Appendix 2.2 in the RFC */
-    return (MAX(n->txcost, BABEL_RXCOST_WIRELESS) * rxcost)/BABEL_RXCOST_WIRELESS;
+    rxcost =  cf->rxcost;
+    txcost = nbr->txcost;
+    break;
+
+  case BABEL_IFACE_TYPE_WIRELESS:
+    /*
+     * ETX - Appendix 2.2 in the RFC.
+     *
+     * alpha  = prob. of successful transmission estimated by the neighbor
+     * beta   = prob. of successful transmission estimated by the router
+     * rxcost = nominal rxcost of the router / beta
+     * txcost = nominal rxcost of the neighbor / (alpha * beta)
+     *        = received txcost / beta
+     *
+     * Note that received txcost is just neighbor's rxcost. Beta is rcv/max,
+     * we use inverse values of beta (i.e. max/rcv) to stay in integers.
+     */
+    rxcost = MIN( cf->rxcost * max / rcv, BABEL_INFINITY);
+    txcost = MIN(nbr->txcost * max / rcv, BABEL_INFINITY);
+    break;
   }
-  else
+
+done:
+  /* If RX cost changed, send IHU with next Hello */
+  if (rxcost != nbr->rxcost)
   {
-    /* k-out-of-j selection - Appendix 2.1 in the RFC. */
-    return n->txcost;
+    nbr->rxcost = rxcost;
+    nbr->ihu_cnt = 0;
   }
-}
 
-/* Simple additive metric - Appendix 3.1 in the RFC */
-static u16
-babel_compute_metric(struct babel_neighbor *n, uint metric)
-{
-  metric += babel_compute_cost(n);
-  return MIN(metric, BABEL_INFINITY);
-}
+  /* If link cost changed, run route selection */
+  if (txcost != nbr->cost)
+  {
+    TRACE(D_EVENTS, "Cost of nbr %I on %s changed from %u to %u",
+	  nbr->addr, nbr->ifa->iface->name, nbr->cost, txcost);
 
+    nbr->cost = txcost;
+
+    struct babel_route *r; node *n;
+    WALK_LIST2(r, n, nbr->routes, neigh_route)
+    {
+      r->metric = babel_compute_metric(nbr, r->advert_metric);
+      babel_select_route(p, r->e, r);
+    }
+  }
+}
 
 /**
  * babel_announce_rte - announce selected route to the core
@@ -466,123 +619,151 @@ babel_compute_metric(struct babel_neighbor *n, uint metric)
  * @e: Babel route entry to announce
  *
  * This function announces a Babel entry to the core if it has a selected
- * incoming path, and retracts it otherwise. If the selected entry has infinite
- * metric, the route is announced as unreachable.
+ * incoming path, and retracts it otherwise. If there is no selected route but
+ * the entry is valid and ours, the unreachable route is announced instead.
  */
 static void
 babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
 {
-  struct babel_route *r = e->selected_in;
+  struct babel_route *r = e->selected;
+  struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
 
   if (r)
   {
-    net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen);
-    rta A = {
+    rta a0 = {
       .src = p->p.main_source,
       .source = RTS_BABEL,
       .scope = SCOPE_UNIVERSE,
-      .cast = RTC_UNICAST,
-      .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER,
-      .flags = 0,
+      .dest = RTD_UNICAST,
       .from = r->neigh->addr,
-      .iface = r->neigh->ifa->iface,
+      .nh.gw = r->next_hop,
+      .nh.iface = r->neigh->ifa->iface,
     };
 
-    if (r->metric < BABEL_INFINITY)
-      A.gw = r->next_hop;
-
-    rta *a = rta_lookup(&A);
+    rta *a = rta_lookup(&a0);
     rte *rte = rte_get_temp(a);
+    rte->u.babel.seqno = r->seqno;
     rte->u.babel.metric = r->metric;
     rte->u.babel.router_id = r->router_id;
-    rte->net = n;
     rte->pflags = 0;
 
-    rte_update(&p->p, n, rte);
+    e->unreachable = 0;
+    rte_update2(c, e->n.addr, rte, p->p.main_source);
+  }
+  else if (e->valid && (e->router_id != p->router_id))
+  {
+    /* Unreachable */
+    rta a0 = {
+      .src = p->p.main_source,
+      .source = RTS_BABEL,
+      .scope = SCOPE_UNIVERSE,
+      .dest = RTD_UNREACHABLE,
+    };
+
+    rta *a = rta_lookup(&a0);
+    rte *rte = rte_get_temp(a);
+    memset(&rte->u.babel, 0, sizeof(rte->u.babel));
+    rte->pflags = 0;
+    rte->pref = 1;
+
+    e->unreachable = 1;
+    rte_update2(c, e->n.addr, rte, p->p.main_source);
   }
   else
   {
     /* Retraction */
-    net *n = net_find(p->p.table, e->n.prefix, e->n.pxlen);
-    rte_update(&p->p, n, NULL);
+    e->unreachable = 0;
+    rte_update2(c, e->n.addr, NULL, p->p.main_source);
   }
 }
 
+/* Special case of babel_announce_rte() just for retraction */
+static inline void
+babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
+{
+  struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
+  e->unreachable = 0;
+  rte_update2(c, e->n.addr, NULL, p->p.main_source);
+}
+
+
 /**
  * babel_select_route - select best route for given route entry
+ * @p: Babel protocol instance
  * @e: Babel entry to select the best route for
+ * @mod: Babel route that was modified or NULL if unspecified
  *
- * Select the best feasible route for a given prefix among the routes received
- * from peers, and propagate it to the nest. This just selects the feasible
- * route with the lowest metric.
+ * Select the best reachable and feasible route for a given prefix among the
+ * routes received from peers, and propagate it to the nest. This just selects
+ * the reachable and feasible route with the lowest metric, but keeps selected
+ * the old one in case of tie.
  *
  * If no feasible route is available for a prefix that previously had a route
- * selected, a seqno request is sent to try to get a valid route. In the
- * meantime, the route is marked as infeasible in the nest (to blackhole packets
- * going to it, as per the RFC).
+ * selected, a seqno request is sent to try to get a valid route. If the entry
+ * is valid and not owned by us, the unreachable route is announced to the nest
+ * (to blackhole packets going to it, as per section 2.8). It is later removed
+ * by babel_expire_routes(). Otherwise, the route is just removed from the nest.
+ *
+ * Argument @mod is used to optimize best route calculation. When specified, the
+ * function can assume that only the @mod route was modified to avoid full best
+ * route selection and announcement when non-best route was modified in minor
+ * way. The caller is advised to not call babel_select_route() when no change is
+ * done (e.g. periodic route updates) to avoid unnecessary announcements of the
+ * same best route. The caller is not required to call the function in case of a
+ * retraction of a non-best route.
  *
- * If no feasible route is available, and no previous route is selected, the
- * route is removed from the nest entirely.
+ * Note that the function does not active triggered updates. That is done by
+ * babel_rt_notify() when the change is propagated back to Babel.
  */
 static void
-babel_select_route(struct babel_entry *e)
+babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod)
 {
-  struct babel_proto *p = e->proto;
-  struct babel_route *r, *cur = e->selected_in;
+  struct babel_route *r, *best = e->selected;
 
-  /* try to find the best feasible route */
-  WALK_LIST(r, e->routes)
-    if (!OUR_ROUTE(r) && /* prevent propagating our own routes back to core */
-	(!cur || r->metric < cur->metric) &&
-        babel_is_feasible(babel_find_source(e, r->router_id), r->seqno, r->advert_metric))
-      cur = r;
-
-  if (cur && !OUR_ROUTE(cur) &&
-      ((!e->selected_in && cur->metric < BABEL_INFINITY) ||
-       (e->selected_in && cur->metric < e->selected_in->metric)))
+  /* Shortcut if only non-best was modified */
+  if (mod && (mod != best))
   {
-    TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %lR metric %d",
-	  e->n.prefix, e->n.pxlen, cur->router_id, cur->metric);
-
-    e->selected_in = cur;
-    e->updated = now;
-    babel_announce_rte(p, e);
+    /* Either select modified route, or keep old best route */
+    if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible)
+      best = mod;
+    else
+      return;
   }
-  else if (!cur || cur->metric == BABEL_INFINITY)
+  else
   {
-    /* Couldn't find a feasible route. If we have a selected route, that means
-       it just became infeasible; so set it's metric to infinite and install it
-       (as unreachable), then send a seqno request.
-
-       babel_build_rte() will set the unreachable flag if the metric is BABEL_INFINITY.*/
-    if (e->selected_in)
-    {
-      TRACE(D_EVENTS, "Lost feasible route for prefix %I/%d",
-	    e->n.prefix, e->n.pxlen);
-
-      e->selected_in->metric = BABEL_INFINITY;
-      e->updated = now;
+    /* Selected route may be modified and no longer admissible */
+    if (!best || (best->metric == BABEL_INFINITY) || !best->feasible)
+      best = NULL;
+
+    /* Find the best feasible route from all routes */
+    WALK_LIST(r, e->routes)
+      if ((r->metric < (best ? best->metric : BABEL_INFINITY)) && r->feasible)
+	best = r;
+  }
 
-      babel_send_seqno_request(e);
-      babel_announce_rte(p, e);
+  if (best)
+  {
+    if (best != e->selected)
+      TRACE(D_EVENTS, "Picked new route for prefix %N: router-id %lR metric %d",
+	    e->n.addr, best->router_id, best->metric);
+  }
+  else if (e->selected)
+  {
+    /*
+     * We have lost all feasible routes. We have to broadcast seqno request
+     * (Section 3.8.2.1) and keep unreachable route for a while (section 2.8).
+     * The later is done automatically by babel_announce_rte().
+     */
 
-      /* Section 3.6 of the RFC forbids an infeasible from being selected. This
-	 is cleared after announcing the route to the core to make sure an
-	 unreachable route is propagated first. */
-      e->selected_in = NULL;
-    }
-    else
-    {
-      /* No route currently selected, and no new one selected; this means we
-	 don't have a route to this destination anymore (and were probably
-	 called from an expiry timer). Remove the route from the nest. */
-      TRACE(D_EVENTS, "Flushing route for prefix %I/%d", e->n.prefix, e->n.pxlen);
-
-      e->selected_in = NULL;
-      e->updated = now;
-      babel_announce_rte(p, e);
-    }
+    TRACE(D_EVENTS, "Lost feasible route for prefix %N", e->n.addr);
+    if (e->valid && (e->selected->router_id == e->router_id))
+      babel_add_seqno_request(p, e, e->selected->router_id, e->selected->seqno + 1, 0, NULL);
   }
+  else
+    return;
+
+  e->selected = best;
+  babel_announce_rte(p, e);
 }
 
 /*
@@ -610,11 +791,11 @@ babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neig
 
   msg->type = BABEL_TLV_IHU;
   msg->ihu.addr = n->addr;
-  msg->ihu.rxcost = babel_compute_rxcost(n);
+  msg->ihu.rxcost = n->rxcost;
   msg->ihu.interval = ifa->cf->ihu_interval;
 
-  TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %d",
-        msg->ihu.addr, msg->ihu.rxcost, msg->ihu.interval);
+  TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %t",
+        msg->ihu.addr, msg->ihu.rxcost, (btime) msg->ihu.interval);
 }
 
 static void
@@ -623,6 +804,7 @@ babel_send_ihu(struct babel_iface *ifa, struct babel_neighbor *n)
   union babel_msg msg = {};
   babel_build_ihu(&msg, ifa, n);
   babel_send_unicast(&msg, ifa, n->addr);
+  n->ihu_cnt = BABEL_IHU_INTERVAL_FACTOR;
 }
 
 static void
@@ -631,14 +813,18 @@ babel_send_ihus(struct babel_iface *ifa)
   struct babel_neighbor *n;
   WALK_LIST(n, ifa->neigh_list)
   {
-    union babel_msg msg = {};
-    babel_build_ihu(&msg, ifa, n);
-    babel_enqueue(&msg, ifa);
+    if (n->hello_cnt && (--n->ihu_cnt <= 0))
+    {
+      union babel_msg msg = {};
+      babel_build_ihu(&msg, ifa, n);
+      babel_enqueue(&msg, ifa);
+      n->ihu_cnt = BABEL_IHU_INTERVAL_FACTOR;
+    }
   }
 }
 
 static void
-babel_send_hello(struct babel_iface *ifa, u8 send_ihu)
+babel_send_hello(struct babel_iface *ifa)
 {
   struct babel_proto *p = ifa->proto;
   union babel_msg msg = {};
@@ -647,30 +833,26 @@ babel_send_hello(struct babel_iface *ifa, u8 send_ihu)
   msg.hello.seqno = ifa->hello_seqno++;
   msg.hello.interval = ifa->cf->hello_interval;
 
-  TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %d",
-	ifa->ifname, msg.hello.seqno, msg.hello.interval);
+  TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t",
+	ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval);
 
   babel_enqueue(&msg, ifa);
 
-  if (send_ihu)
-    babel_send_ihus(ifa);
+  babel_send_ihus(ifa);
 }
 
 static void
-babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n)
+babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n)
 {
-  struct babel_proto *p = e->proto;
-  struct babel_iface *ifa = n->ifa;
   union babel_msg msg = {};
 
-  TRACE(D_PACKETS, "Sending route request for %I/%d to %I",
-        e->n.prefix, e->n.pxlen, n->addr);
+  TRACE(D_PACKETS, "Sending route request for %N to %I",
+        e->n.addr, n->addr);
 
   msg.type = BABEL_TLV_ROUTE_REQUEST;
-  msg.route_request.prefix = e->n.prefix;
-  msg.route_request.plen = e->n.pxlen;
+  net_copy(&msg.route_request.net, e->n.addr);
 
-  babel_send_unicast(&msg, ifa, n->addr);
+  babel_send_unicast(&msg, n->ifa, n->addr);
 }
 
 static void
@@ -689,56 +871,32 @@ babel_send_wildcard_request(struct babel_iface *ifa)
 }
 
 static void
-babel_send_seqno_request(struct babel_entry *e)
+babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr)
 {
-  struct babel_proto *p = e->proto;
-  struct babel_route *r = e->selected_in;
-  struct babel_iface *ifa = NULL;
-  struct babel_source *s = NULL;
   union babel_msg msg = {};
 
-  s = babel_find_source(e, r->router_id);
-  if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1))
-    return;
-
-  TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d",
-	e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1);
-
   msg.type = BABEL_TLV_SEQNO_REQUEST;
-  msg.seqno_request.plen = e->n.pxlen;
-  msg.seqno_request.seqno = s->seqno + 1;
-  msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT;
-  msg.seqno_request.router_id = r->router_id;
-  msg.seqno_request.prefix = e->n.prefix;
-
-  WALK_LIST(ifa, p->interfaces)
-    babel_enqueue(&msg, ifa);
-}
+  msg.seqno_request.hop_count = sr->hop_count ?: BABEL_INITIAL_HOP_COUNT;
+  msg.seqno_request.seqno = sr->seqno;
+  msg.seqno_request.router_id = sr->router_id;
+  net_copy(&msg.seqno_request.net, e->n.addr);
 
-static void
-babel_unicast_seqno_request(struct babel_route *r)
-{
-  struct babel_entry *e = r->e;
-  struct babel_proto *p = e->proto;
-  struct babel_iface *ifa = r->neigh->ifa;
-  struct babel_source *s = NULL;
-  union babel_msg msg = {};
-
-  s = babel_find_source(e, r->router_id);
-  if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1))
-    return;
-
-  TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d",
-	e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1);
+  if (sr->nbr)
+  {
+    TRACE(D_PACKETS, "Sending seqno request for %N router-id %lR seqno %d to %I on %s",
+	  e->n.addr, sr->router_id, sr->seqno, sr->nbr->addr, sr->nbr->ifa->ifname);
 
-  msg.type = BABEL_TLV_SEQNO_REQUEST;
-  msg.seqno_request.plen = e->n.pxlen;
-  msg.seqno_request.seqno = s->seqno + 1;
-  msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT;
-  msg.seqno_request.router_id = r->router_id;
-  msg.seqno_request.prefix = e->n.prefix;
+    babel_send_unicast(&msg, sr->nbr->ifa, sr->nbr->addr);
+  }
+  else
+  {
+    TRACE(D_PACKETS, "Sending broadcast seqno request for %N router-id %lR seqno %d",
+	  e->n.addr, sr->router_id, sr->seqno);
 
-  babel_send_unicast(&msg, ifa, r->neigh->addr);
+    struct babel_iface *ifa;
+    WALK_LIST(ifa, p->interfaces)
+      babel_enqueue(&msg, ifa);
+  }
 }
 
 /**
@@ -752,49 +910,55 @@ babel_unicast_seqno_request(struct babel_route *r)
  * transmitted entry is updated.
  */
 static void
-babel_send_update(struct babel_iface *ifa, bird_clock_t changed)
+babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable)
 {
   struct babel_proto *p = ifa->proto;
 
-  FIB_WALK(&p->rtable, n)
+  /* Update increase was requested */
+  if (p->update_seqno_inc)
   {
-    struct babel_entry *e = (void *) n;
-    struct babel_route *r = e->selected_out;
+    p->update_seqno++;
+    p->update_seqno_inc = 0;
+  }
 
-    if (!r)
+  FIB_WALK(rtable, struct babel_entry, e)
+  {
+    if (!e->valid)
       continue;
 
     /* Our own seqno might have changed, in which case we update the routes we
        originate. */
-    if ((r->router_id == p->router_id) && (r->seqno < p->update_seqno))
+    if ((e->router_id == p->router_id) && (e->seqno < p->update_seqno))
     {
-      r->seqno = p->update_seqno;
-      e->updated = now;
+      e->seqno = p->update_seqno;
+      e->updated = current_time();
     }
 
     /* Skip routes that weren't updated since 'changed' time */
     if (e->updated < changed)
       continue;
 
-    TRACE(D_PACKETS, "Sending update for %I/%d router-id %lR seqno %d metric %d",
-	  e->n.prefix, e->n.pxlen, r->router_id, r->seqno, r->metric);
+    TRACE(D_PACKETS, "Sending update for %N router-id %lR seqno %d metric %d",
+	  e->n.addr, e->router_id, e->seqno, e->metric);
 
     union babel_msg msg = {};
     msg.type = BABEL_TLV_UPDATE;
-    msg.update.plen = e->n.pxlen;
     msg.update.interval = ifa->cf->update_interval;
-    msg.update.seqno = r->seqno;
-    msg.update.metric = r->metric;
-    msg.update.prefix = e->n.prefix;
-    msg.update.router_id = r->router_id;
+    msg.update.seqno = e->seqno;
+    msg.update.metric = e->metric;
+    msg.update.router_id = e->router_id;
+    net_copy(&msg.update.net, e->n.addr);
+
+    msg.update.next_hop = ((e->n.addr->type == NET_IP4) ?
+			   ifa->next_hop_ip4 : ifa->next_hop_ip6);
 
     babel_enqueue(&msg, ifa);
 
     /* Update feasibility distance for redistributed routes */
-    if (!OUR_ROUTE(r))
+    if (e->router_id != p->router_id)
     {
-      struct babel_source *s = babel_get_source(e, r->router_id);
-      s->expires = now + BABEL_GARBAGE_INTERVAL;
+      struct babel_source *s = babel_get_source(p, e, e->router_id);
+      s->expires = current_time() + BABEL_GARBAGE_INTERVAL;
 
       if ((msg.update.seqno > s->seqno) ||
 	  ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric)))
@@ -808,6 +972,15 @@ babel_send_update(struct babel_iface *ifa, bird_clock_t changed)
 }
 
 static void
+babel_send_update(struct babel_iface *ifa, btime changed)
+{
+  struct babel_proto *p = ifa->proto;
+
+  babel_send_update_(ifa, changed, &p->ip4_rtable);
+  babel_send_update_(ifa, changed, &p->ip6_rtable);
+}
+
+static void
 babel_trigger_iface_update(struct babel_iface *ifa)
 {
   struct babel_proto *p = ifa->proto;
@@ -819,7 +992,7 @@ babel_trigger_iface_update(struct babel_iface *ifa)
   TRACE(D_EVENTS, "Scheduling triggered updates for %s seqno %d",
 	ifa->iface->name, p->update_seqno);
 
-  ifa->want_triggered = now;
+  ifa->want_triggered = current_time();
   babel_iface_kick_timer(ifa);
 }
 
@@ -839,20 +1012,18 @@ babel_trigger_update(struct babel_proto *p)
 
 /* A retraction is an update with an infinite metric */
 static void
-babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen)
+babel_send_retraction(struct babel_iface *ifa, net_addr *n)
 {
   struct babel_proto *p = ifa->proto;
   union babel_msg msg = {};
 
-  TRACE(D_PACKETS, "Sending retraction for %I/%d seqno %d",
-	prefix, plen, p->update_seqno);
+  TRACE(D_PACKETS, "Sending retraction for %N seqno %d", n, p->update_seqno);
 
   msg.type = BABEL_TLV_UPDATE;
-  msg.update.plen = plen;
   msg.update.interval = ifa->cf->update_interval;
   msg.update.seqno = p->update_seqno;
   msg.update.metric = BABEL_INFINITY;
-  msg.update.prefix = prefix;
+  msg.update.net = *n;
 
   babel_enqueue(&msg, ifa);
 }
@@ -881,7 +1052,7 @@ babel_send_wildcard_retraction(struct babel_iface *ifa)
 
 /* Update hello history according to Appendix A1 of the RFC */
 static void
-babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval)
+babel_update_hello_history(struct babel_neighbor *n, u16 seqno, uint interval)
 {
   /*
    * Compute the difference between expected and received seqno (modulo 2^16).
@@ -892,7 +1063,7 @@ babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval)
 
   u16 delta = ((uint) seqno - (uint) n->next_hello_seqno);
 
-  if (delta == 0)
+  if ((delta == 0) || (n->hello_cnt == 0))
   {
     /* Do nothing */
   }
@@ -919,84 +1090,10 @@ babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval)
   n->hello_map = (n->hello_map << 1) | 1;
   n->next_hello_seqno = seqno+1;
   if (n->hello_cnt < 16) n->hello_cnt++;
-  n->hello_expiry = now + BABEL_HELLO_EXPIRY_FACTOR(interval);
-}
-
-static void
-babel_expire_seqno_requests(struct babel_proto *p)
-{
-  struct babel_seqno_request *n, *nx;
-  WALK_LIST_DELSAFE(n, nx, p->seqno_cache)
-  {
-    if ((n->updated + BABEL_SEQNO_REQUEST_EXPIRY) <= now)
-    {
-      rem_node(NODE n);
-      sl_free(p->seqno_slab, n);
-    }
-  }
-}
-
-/*
- * Checks the seqno request cache for a matching request and returns failure if
- * found. Otherwise, a new entry is stored in the cache.
- */
-static int
-babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen,
-                          u64 router_id, u16 seqno)
-{
-  struct babel_seqno_request *r;
-
-  WALK_LIST(r, p->seqno_cache)
-  {
-    if (ipa_equal(r->prefix, prefix) && (r->plen == plen) &&
-	(r->router_id == router_id) && (r->seqno == seqno))
-      return 0;
-  }
-
-  /* no entries found */
-  r = sl_alloc(p->seqno_slab);
-  r->prefix = prefix;
-  r->plen = plen;
-  r->router_id = router_id;
-  r->seqno = seqno;
-  r->updated = now;
-  add_tail(&p->seqno_cache, NODE r);
-
-  return 1;
-}
-
-static void
-babel_forward_seqno_request(struct babel_entry *e,
-                            struct babel_msg_seqno_request *in,
-                            ip_addr sender)
-{
-  struct babel_proto *p = e->proto;
-  struct babel_route *r;
-
-  TRACE(D_PACKETS, "Forwarding seqno request for %I/%d router-id %lR seqno %d",
-	e->n.prefix, e->n.pxlen, in->router_id, in->seqno);
-
-  WALK_LIST(r, e->routes)
-  {
-    if ((r->router_id == in->router_id) &&
-	!OUR_ROUTE(r) &&
-	!ipa_equal(r->neigh->addr, sender))
-    {
-      if (!babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, in->router_id, in->seqno))
-	return;
 
-      union babel_msg msg = {};
-      msg.type = BABEL_TLV_SEQNO_REQUEST;
-      msg.seqno_request.plen = in->plen;
-      msg.seqno_request.seqno = in->seqno;
-      msg.seqno_request.hop_count = in->hop_count-1;
-      msg.seqno_request.router_id = in->router_id;
-      msg.seqno_request.prefix = e->n.prefix;
-
-      babel_send_unicast(&msg, r->neigh->ifa, r->neigh->addr);
-      return;
-    }
-  }
+  /* Update expiration */
+  n->hello_expiry = current_time() + BABEL_HELLO_EXPIRY_FACTOR(interval);
+  n->last_hello_int = interval;
 }
 
 
@@ -1010,8 +1107,8 @@ babel_handle_ack_req(union babel_msg *m, struct babel_iface *ifa)
   struct babel_proto *p = ifa->proto;
   struct babel_msg_ack_req *msg = &m->ack_req;
 
-  TRACE(D_PACKETS, "Handling ACK request nonce %d interval %d",
-	msg->nonce, msg->interval);
+  TRACE(D_PACKETS, "Handling ACK request nonce %d interval %t",
+	msg->nonce, (btime) msg->interval);
 
   babel_send_ack(ifa, msg->sender, msg->nonce);
 }
@@ -1022,12 +1119,17 @@ babel_handle_hello(union babel_msg *m, struct babel_iface *ifa)
   struct babel_proto *p = ifa->proto;
   struct babel_msg_hello *msg = &m->hello;
 
-  TRACE(D_PACKETS, "Handling hello seqno %d interval %d",
-	msg->seqno, msg->interval);
+  TRACE(D_PACKETS, "Handling hello seqno %d interval %t",
+	msg->seqno, (btime) msg->interval);
 
   struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
+  int first_hello = !n->hello_cnt;
+
   babel_update_hello_history(n, msg->seqno, msg->interval);
-  if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS)
+  babel_update_cost(n);
+
+  /* Speed up session establishment by sending IHU immediately */
+  if (first_hello)
     babel_send_ihu(ifa, n);
 }
 
@@ -1041,12 +1143,13 @@ babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa)
   if ((msg->ae != BABEL_AE_WILDCARD) && !ipa_equal(msg->addr, ifa->addr))
     return;
 
-  TRACE(D_PACKETS, "Handling IHU rxcost %d interval %d",
-	msg->rxcost, msg->interval);
+  TRACE(D_PACKETS, "Handling IHU rxcost %d interval %t",
+	msg->rxcost, (btime) msg->interval);
 
   struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
   n->txcost = msg->rxcost;
-  n->ihu_expiry = now + BABEL_IHU_EXPIRY_FACTOR(msg->interval);
+  n->ihu_expiry = current_time() + BABEL_IHU_EXPIRY_FACTOR(msg->interval);
+  babel_update_cost(n);
 }
 
 /**
@@ -1069,12 +1172,15 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
   struct babel_neighbor *nbr;
   struct babel_entry *e;
   struct babel_source *s;
-  struct babel_route *r;
+  struct babel_route *r, *best;
   node *n;
-  int feasible;
+  int feasible, metric;
 
-  TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d",
-	msg->prefix, msg->plen, msg->seqno, msg->metric);
+  if (msg->wildcard)
+    TRACE(D_PACKETS, "Handling wildcard retraction", msg->seqno);
+  else
+    TRACE(D_PACKETS, "Handling update for %N with seqno %d metric %d",
+	  &msg->net, msg->seqno, msg->metric);
 
   nbr = babel_find_neighbor(ifa, msg->sender);
   if (!nbr)
@@ -1089,38 +1195,12 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
     return;
   }
 
-  /*
-   * RFC section 3.5.4:
-   *
-   * When a Babel node receives an update (id, prefix, seqno, metric) from a
-   * neighbour neigh with a link cost value equal to cost, it checks whether it
-   * already has a routing table entry indexed by (neigh, id, prefix).
-   *
-   * If no such entry exists:
-   *
-   * o if the update is unfeasible, it is ignored;
-   *
-   * o if the metric is infinite (the update is a retraction), the update is
-   *   ignored;
-   *
-   * o otherwise, a new route table entry is created, indexed by (neigh, id,
-   *   prefix), with seqno equal to seqno and an advertised metric equal to the
-   *   metric carried by the update.
-   *
-   * If such an entry exists:
-   *
-   * o if the entry is currently installed and the update is unfeasible, then
-   *   the behaviour depends on whether the router-ids of the two entries match.
-   *   If the router-ids are different, the update is treated as though it were
-   *   a retraction (i.e., as though the metric were FFFF hexadecimal). If the
-   *   router-ids are equal, the update is ignored;
-   *
-   * o otherwise (i.e., if either the update is feasible or the entry is not
-   *   currently installed), then the entry's sequence number, advertised
-   *   metric, metric, and router-id are updated and, unless the advertised
-   *   metric is infinite, the route's expiry timer is reset to a small multiple
-   *   of the Interval value included in the update.
-   */
+  struct channel *c = (msg->net.type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
+  if (!c || (c->channel_state != CS_UP))
+  {
+    DBG("Babel: Ignoring update for inactive address family.\n");
+    return;
+  }
 
   /* Retraction */
   if (msg->metric == BABEL_INFINITY)
@@ -1134,13 +1214,12 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
       WALK_LIST(n, nbr->routes)
       {
 	r = SKIP_BACK(struct babel_route, neigh_route, n);
-	r->metric = BABEL_INFINITY;
-	babel_select_route(r->e);
+	babel_retract_route(p, r);
       }
     }
     else
     {
-      e = babel_find_entry(p, msg->prefix, msg->plen);
+      e = babel_find_entry(p, &msg->net);
 
       if (!e)
 	return;
@@ -1151,68 +1230,56 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
       if (!r)
 	return;
 
-      r->metric = BABEL_INFINITY;
-      babel_select_route(e);
+      /* Router-id, next-hop and seqno are ignored for retractions */
+      babel_retract_route(p, r);
     }
 
     /* Done with retractions */
     return;
   }
 
-  e = babel_get_entry(p, msg->prefix, msg->plen);
-  r = babel_find_route(e, nbr); /* the route entry indexed by neighbour */
+  /* Regular update */
+  e = babel_get_entry(p, &msg->net);
+  r = babel_get_route(p, e, nbr); /* the route entry indexed by neighbour */
   s = babel_find_source(e, msg->router_id); /* for feasibility */
   feasible = babel_is_feasible(s, msg->seqno, msg->metric);
+  metric = babel_compute_metric(nbr, msg->metric);
+  best = e->selected;
 
-  if (!r)
-  {
-    if (!feasible)
-      return;
+  /* RFC section 3.8.2.2 - Dealing with unfeasible updates */
+  if (!feasible && (metric != BABEL_INFINITY) &&
+      (!best || (r == best) || (metric < best->metric)))
+    babel_add_seqno_request(p, e, s->router_id, s->seqno + 1, 0, nbr);
 
-    r = babel_get_route(e, nbr);
-    r->advert_metric = msg->metric;
-    r->router_id = msg->router_id;
-    r->metric = babel_compute_metric(nbr, msg->metric);
-    r->next_hop = msg->next_hop;
-    r->seqno = msg->seqno;
-  }
-  else if (r == r->e->selected_in && !feasible)
-  {
-    /*
-     * Route is installed and update is infeasible - we may lose the route,
-     * so send a unicast seqno request (section 3.8.2.2 second paragraph).
-     */
-    babel_unicast_seqno_request(r);
+  /* Special case - ignore unfeasible update to best route */
+  if (r == best && !feasible && (msg->router_id == r->router_id))
+    return;
 
-    if (msg->router_id == r->router_id)
-      return;
+  r->expires = current_time() + BABEL_ROUTE_EXPIRY_FACTOR(msg->interval);
+  r->refresh_time = current_time() + BABEL_ROUTE_REFRESH_FACTOR(msg->interval);
 
-    /* Treat as retraction */
-    r->metric = BABEL_INFINITY;
-  }
-  else
+  /* No further processing if there is no change */
+  if ((r->feasible == feasible) && (r->seqno == msg->seqno) &&
+      (r->metric == metric) && (r->advert_metric == msg->metric) &&
+      (r->router_id == msg->router_id) && ipa_equal(r->next_hop, msg->next_hop))
+    return;
+
+  /* Last paragraph above - update the entry */
+  r->feasible = feasible;
+  r->seqno = msg->seqno;
+  r->metric = metric;
+  r->advert_metric = msg->metric;
+  r->router_id = msg->router_id;
+  r->next_hop = msg->next_hop;
+
+  /* If received update satisfies seqno request, we send triggered updates */
+  if (babel_satisfy_seqno_request(p, e, msg->router_id, msg->seqno))
   {
-    /* Last paragraph above - update the entry */
-    r->advert_metric = msg->metric;
-    r->metric = babel_compute_metric(nbr, msg->metric);
-    r->next_hop = msg->next_hop;
-
-    r->router_id = msg->router_id;
-    r->seqno = msg->seqno;
-
-    r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval);
-    r->expires = now + r->expiry_interval;
-    if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL)
-      r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL;
-
-    /* If the route is not feasible at this point, it means it is from another
-       neighbour than the one currently selected; so send a unicast seqno
-       request to try to get a better route (section 3.8.2.2 last paragraph). */
-    if (!feasible)
-      babel_unicast_seqno_request(r);
+    babel_trigger_update(p);
+    e->updated = current_time();
   }
 
-  babel_select_route(e);
+  babel_select_route(p, e, r);
 }
 
 void
@@ -1231,23 +1298,22 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa)
     return;
   }
 
-  TRACE(D_PACKETS, "Handling route request for %I/%d", msg->prefix, msg->plen);
+  TRACE(D_PACKETS, "Handling route request for %N", &msg->net);
 
   /* Non-wildcard request - see if we have an entry for the route.
      If not, send a retraction, otherwise send an update. */
-  struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen);
+  struct babel_entry *e = babel_find_entry(p, &msg->net);
   if (!e)
   {
-    babel_send_retraction(ifa, msg->prefix, msg->plen);
+    babel_send_retraction(ifa, &msg->net);
   }
   else
   {
     babel_trigger_iface_update(ifa);
-    e->updated = now;
+    e->updated = current_time();
   }
 }
 
-
 void
 babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa)
 {
@@ -1256,36 +1322,54 @@ babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa)
 
   /* RFC 6126 3.8.1.2 */
 
-  TRACE(D_PACKETS, "Handling seqno request for %I/%d router-id %lR seqno %d hop count %d",
-	msg->prefix, msg->plen, msg->router_id, msg->seqno, msg->hop_count);
+  TRACE(D_PACKETS, "Handling seqno request for %N router-id %lR seqno %d hop count %d",
+	&msg->net, msg->router_id, msg->seqno, msg->hop_count);
 
   /* Ignore if we have no such entry or entry has infinite metric */
-  struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen);
-  if (!e || !e->selected_out || (e->selected_out->metric == BABEL_INFINITY))
+  struct babel_entry *e = babel_find_entry(p, &msg->net);
+  if (!e || !e->valid || (e->metric == BABEL_INFINITY))
     return;
 
   /* Trigger update on incoming interface if we have a selected route with
      different router id or seqno no smaller than requested */
-  struct babel_route *r = e->selected_out;
-  if ((r->router_id != msg->router_id) || ge_mod64k(r->seqno, msg->seqno))
+  if ((e->router_id != msg->router_id) || ge_mod64k(e->seqno, msg->seqno))
   {
     babel_trigger_iface_update(ifa);
-    e->updated = now;
+    e->updated = current_time();
     return;
   }
 
   /* Seqno is larger; check if we own the router id */
   if (msg->router_id == p->router_id)
   {
-    /* Ours; update seqno and trigger global update */
-    p->update_seqno++;
+    /* Ours; seqno increase and trigger global update */
+    p->update_seqno_inc = 1;
     babel_trigger_update(p);
   }
-  else
+  else if (msg->hop_count > 1)
   {
     /* Not ours; forward if TTL allows it */
-    if (msg->hop_count > 1)
-      babel_forward_seqno_request(e, msg, msg->sender);
+
+    /* Find best admissible route */
+    struct babel_route *r, *best1 = NULL, *best2 = NULL;
+    WALK_LIST(r, e->routes)
+      if ((r->router_id == msg->router_id) && !ipa_equal(r->neigh->addr, msg->sender))
+      {
+	/* Find best feasible route */
+	if ((!best1 || r->metric < best1->metric) && r->feasible)
+	  best1 = r;
+
+	/* Find best not necessary feasible route */
+	if (!best2 || r->metric < best2->metric)
+	  best2 = r;
+      }
+
+    /* If no route is found, do nothing */
+    r = best1 ?: best2;
+    if (!r)
+      return;
+
+    babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, r->neigh);
   }
 }
 
@@ -1320,42 +1404,43 @@ babel_iface_timer(timer *t)
 {
   struct babel_iface *ifa = t->data;
   struct babel_proto *p = ifa->proto;
-  bird_clock_t hello_period = ifa->cf->hello_interval;
-  bird_clock_t update_period = ifa->cf->update_interval;
+  btime hello_period = ifa->cf->hello_interval;
+  btime update_period = ifa->cf->update_interval;
+  btime now_ = current_time();
 
-  if (now >= ifa->next_hello)
+  if (now_ >= ifa->next_hello)
   {
-    babel_send_hello(ifa, (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS ||
-                           ifa->hello_seqno % BABEL_IHU_INTERVAL_FACTOR == 0));
-    ifa->next_hello +=  hello_period * (1 + (now - ifa->next_hello) / hello_period);
+    babel_send_hello(ifa);
+    ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period);
   }
 
-  if (now >= ifa->next_regular)
+  if (now_ >= ifa->next_regular)
   {
     TRACE(D_EVENTS, "Sending regular updates on %s", ifa->ifname);
     babel_send_update(ifa, 0);
-    ifa->next_regular += update_period * (1 + (now - ifa->next_regular) / update_period);
+    ifa->next_regular += update_period * (1 + (now_ - ifa->next_regular) / update_period);
     ifa->want_triggered = 0;
     p->triggered = 0;
   }
-  else if (ifa->want_triggered && (now >= ifa->next_triggered))
+  else if (ifa->want_triggered && (now_ >= ifa->next_triggered))
   {
     TRACE(D_EVENTS, "Sending triggered updates on %s", ifa->ifname);
     babel_send_update(ifa, ifa->want_triggered);
-    ifa->next_triggered = now + MIN(5, update_period / 2 + 1);
+    ifa->next_triggered = now_ + MIN(1 S, update_period / 2);
     ifa->want_triggered = 0;
     p->triggered = 0;
   }
 
-  bird_clock_t next_event = MIN(ifa->next_hello, ifa->next_regular);
-  tm_start(ifa->timer, ifa->want_triggered ? 1 : (next_event - now));
+  btime next_event = MIN(ifa->next_hello, ifa->next_regular);
+  if (ifa->want_triggered) next_event = MIN(next_event, ifa->next_triggered);
+  tm_set(ifa->timer, next_event);
 }
 
 static inline void
 babel_iface_kick_timer(struct babel_iface *ifa)
 {
-  if (ifa->timer->expires > (now + 1))
-    tm_start(ifa->timer, 1);
+  if (ifa->timer->expires > (current_time() + 100 MS))
+    tm_start(ifa->timer, 100 MS);
 }
 
 static void
@@ -1365,14 +1450,14 @@ babel_iface_start(struct babel_iface *ifa)
 
   TRACE(D_EVENTS, "Starting interface %s", ifa->ifname);
 
-  ifa->next_hello = now + (random() % ifa->cf->hello_interval) + 1;
-  ifa->next_regular = now + (random() % ifa->cf->update_interval) + 1;
-  ifa->next_triggered = now + MIN(5, ifa->cf->update_interval / 2 + 1);
+  ifa->next_hello = current_time() + (random() % ifa->cf->hello_interval);
+  ifa->next_regular = current_time() + (random() % ifa->cf->update_interval);
+  ifa->next_triggered = current_time() + MIN(1 S, ifa->cf->update_interval / 2);
   ifa->want_triggered = 0;	/* We send an immediate update (below) */
-  tm_start(ifa->timer, 1);
+  tm_start(ifa->timer, 100 MS);
   ifa->up = 1;
 
-  babel_send_hello(ifa, 0);
+  babel_send_hello(ifa);
   babel_send_wildcard_retraction(ifa);
   babel_send_wildcard_request(ifa);
   babel_send_update(ifa, 0);	/* Full update */
@@ -1398,9 +1483,7 @@ babel_iface_stop(struct babel_iface *ifa)
     WALK_LIST(n, nbr->routes)
     {
       r = SKIP_BACK(struct babel_route, neigh_route, n);
-      r->metric = BABEL_INFINITY;
-      r->expires = now + r->expiry_interval;
-      babel_select_route(r->e);
+      babel_retract_route(p, r);
     }
   }
 
@@ -1488,21 +1571,21 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con
   ifa->cf = ic;
   ifa->pool = pool;
   ifa->ifname = new->name;
+  ifa->addr = new->llv6->ip;
 
   add_tail(&p->interfaces, NODE ifa);
 
-  struct ifa *addr;
-  WALK_LIST(addr, new->addrs)
-    if (ipa_is_link_local(addr->ip))
-      ifa->addr = addr->ip;
+  ip_addr addr4 = new->addr4 ? new->addr4->ip : IPA_NONE;
+  ifa->next_hop_ip4 = ipa_nonzero(ic->next_hop_ip4) ? ic->next_hop_ip4 : addr4;
+  ifa->next_hop_ip6 = ipa_nonzero(ic->next_hop_ip6) ? ic->next_hop_ip6 : ifa->addr;
 
-  if (ipa_zero(ifa->addr))
-    log(L_WARN "%s: Cannot find link-local addr on %s", p->p.name, new->name);
+  if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
+    log(L_WARN "%s: Cannot find IPv4 next hop addr on %s", p->p.name, new->name);
 
   init_list(&ifa->neigh_list);
   ifa->hello_seqno = 1;
 
-  ifa->timer = tm_new_set(ifa->pool, babel_iface_timer, ifa, 0, 0);
+  ifa->timer = tm_new_init(ifa->pool, babel_iface_timer, ifa, 0, 0);
 
   init_list(&ifa->msg_queue);
   ifa->send_event = ev_new(ifa->pool);
@@ -1527,7 +1610,7 @@ babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa)
 
   struct babel_neighbor *n;
   WALK_LIST_FIRST(n, ifa->neigh_list)
-    babel_flush_neighbor(n);
+    babel_flush_neighbor(p, n);
 
   rem_node(NODE ifa);
 
@@ -1545,12 +1628,16 @@ babel_if_notify(struct proto *P, unsigned flags, struct iface *iface)
 
   if (flags & IF_CHANGE_UP)
   {
-    struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, iface->addr);
+    struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
 
     /* we only speak multicast */
     if (!(iface->flags & IF_MULTICAST))
       return;
 
+    /* Ignore ifaces without link-local address */
+    if (!iface->llv6)
+      return;
+
     if (ic)
       babel_add_iface(p, iface, ic);
 
@@ -1590,11 +1677,18 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
 
   ifa->cf = new;
 
-  if (ifa->next_hello > (now + new->hello_interval))
-    ifa->next_hello = now + (random() % new->hello_interval) + 1;
+  ip_addr addr4 = ifa->iface->addr4 ? ifa->iface->addr4->ip : IPA_NONE;
+  ifa->next_hop_ip4 = ipa_nonzero(new->next_hop_ip4) ? new->next_hop_ip4 : addr4;
+  ifa->next_hop_ip6 = ipa_nonzero(new->next_hop_ip6) ? new->next_hop_ip6 : ifa->addr;
+
+  if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
+    log(L_WARN "%s: Cannot find IPv4 next hop addr on %s", p->p.name, ifa->ifname);
 
-  if (ifa->next_regular > (now + new->update_interval))
-    ifa->next_regular = now + (random() % new->update_interval) + 1;
+  if (ifa->next_hello > (current_time() + new->hello_interval))
+    ifa->next_hello = current_time() + (random() % new->hello_interval);
+
+  if (ifa->next_regular > (current_time() + new->update_interval))
+    ifa->next_regular = current_time() + (random() % new->update_interval);
 
   if ((new->tx_length != old->tx_length) || (new->rx_buffer != old->rx_buffer))
     babel_iface_update_buffers(ifa);
@@ -1615,7 +1709,15 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
 
   WALK_LIST(iface, iface_list)
   {
-    if (! (iface->flags & IF_UP))
+    if (!(iface->flags & IF_UP))
+      continue;
+
+    /* Ignore non-multicast ifaces */
+    if (!(iface->flags & IF_MULTICAST))
+      continue;
+
+    /* Ignore ifaces without link-local address */
+    if (!iface->llv6)
       continue;
 
     struct babel_iface *ifa = babel_find_iface(p, iface);
@@ -1648,18 +1750,17 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
 static void
 babel_dump_source(struct babel_source *s)
 {
-  debug("Source router_id %lR seqno %d metric %d expires %d\n",
-	s->router_id, s->seqno, s->metric, s->expires ? s->expires-now : 0);
+  debug("Source router_id %lR seqno %d metric %d expires %t\n",
+	s->router_id, s->seqno, s->metric,
+	s->expires ? s->expires - current_time() : 0);
 }
 
 static void
 babel_dump_route(struct babel_route *r)
 {
-  debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %d\n",
-	r->neigh ? r->neigh->addr : IPA_NONE,
-        r->neigh ? r->neigh->ifa->ifname : "(none)",
-        r->seqno, r->advert_metric, r->metric,
-	r->router_id, r->expires ? r->expires-now : 0);
+  debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %t\n",
+	r->neigh->addr, r->neigh->ifa->ifname, r->seqno, r->advert_metric, r->metric,
+	r->router_id, r->expires ? r->expires - current_time() : 0);
 }
 
 static void
@@ -1668,7 +1769,7 @@ babel_dump_entry(struct babel_entry *e)
   struct babel_source *s;
   struct babel_route *r;
 
-  debug("Babel: Entry %I/%d:\n", e->n.prefix, e->n.pxlen);
+  debug("Babel: Entry %N:\n", e->n.addr);
 
   WALK_LIST(s,e->sources)
   { debug(" "); babel_dump_source(s); }
@@ -1676,8 +1777,7 @@ babel_dump_entry(struct babel_entry *e)
   WALK_LIST(r,e->routes)
   {
     debug(" ");
-    if (r == e->selected_out) debug("*");
-    if (r == e->selected_in) debug("+");
+    if (r == e->selected) debug("*");
     babel_dump_route(r);
   }
 }
@@ -1685,10 +1785,10 @@ babel_dump_entry(struct babel_entry *e)
 static void
 babel_dump_neighbor(struct babel_neighbor *n)
 {
-  debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %d/%d\n",
+  debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %t/%t\n",
 	n->addr, n->txcost, n->hello_map, n->next_hello_seqno,
-        n->hello_expiry ? n->hello_expiry - now : 0,
-        n->ihu_expiry ? n->ihu_expiry - now : 0);
+	n->hello_expiry ? n->hello_expiry - current_time() : 0,
+        n->ihu_expiry ? n->ihu_expiry - current_time() : 0);
 }
 
 static void
@@ -1696,9 +1796,10 @@ babel_dump_iface(struct babel_iface *ifa)
 {
   struct babel_neighbor *n;
 
-  debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %d %d\n",
+  debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %t %t",
 	ifa->ifname, ifa->addr, ifa->cf->rxcost, ifa->cf->type, ifa->hello_seqno,
 	ifa->cf->hello_interval, ifa->cf->update_interval);
+  debug(" next hop v4 %I next hop v6 %I\n", ifa->next_hop_ip4, ifa->next_hop_ip6);
 
   WALK_LIST(n, ifa->neigh_list)
   { debug(" "); babel_dump_neighbor(n); }
@@ -1715,9 +1816,14 @@ babel_dump(struct proto *P)
   WALK_LIST(ifa, p->interfaces)
     babel_dump_iface(ifa);
 
-  FIB_WALK(&p->rtable, n)
+  FIB_WALK(&p->ip4_rtable, struct babel_entry, e)
   {
-    babel_dump_entry((struct babel_entry *) n);
+    babel_dump_entry(e);
+  }
+  FIB_WALK_END;
+  FIB_WALK(&p->ip6_rtable, struct babel_entry, e)
+  {
+    babel_dump_entry(e);
   }
   FIB_WALK_END;
 }
@@ -1765,8 +1871,9 @@ babel_show_interfaces(struct proto *P, char *iff)
   }
 
   cli_msg(-1023, "%s:", p->p.name);
-  cli_msg(-1023, "%-10s %-6s %7s %6s %6s",
-	  "Interface", "State", "RX cost", "Nbrs", "Timer");
+  cli_msg(-1023, "%-10s %-6s %7s %6s %7s %-15s %s",
+	  "Interface", "State", "RX cost", "Nbrs", "Timer",
+	  "Next hop (v4)", "Next hop (v6)");
 
   WALK_LIST(ifa, p->interfaces)
   {
@@ -1777,9 +1884,11 @@ babel_show_interfaces(struct proto *P, char *iff)
     WALK_LIST(nbr, ifa->neigh_list)
 	nbrs++;
 
-    int timer = MIN(ifa->next_regular, ifa->next_hello) - now;
-    cli_msg(-1023, "%-10s %-6s %7u %6u %6u",
-	    ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->rxcost, nbrs, MAX(timer, 0));
+    btime timer = MIN(ifa->next_regular, ifa->next_hello) - current_time();
+    cli_msg(-1023, "%-10s %-6s %7u %6u %7t %-15I %I",
+	    ifa->iface->name, (ifa->up ? "Up" : "Down"),
+	    ifa->cf->rxcost, nbrs, MAX(timer, 0),
+	    ifa->next_hop_ip4, ifa->next_hop_ip6);
   }
 
   cli_msg(0, "");
@@ -1801,8 +1910,8 @@ babel_show_neighbors(struct proto *P, char *iff)
   }
 
   cli_msg(-1024, "%s:", p->p.name);
-  cli_msg(-1024, "%-25s %-10s %6s %6s %10s",
-	  "IP address", "Interface", "Metric", "Routes", "Next hello");
+  cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s",
+	  "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires");
 
   WALK_LIST(ifa, p->interfaces)
   {
@@ -1815,25 +1924,48 @@ babel_show_neighbors(struct proto *P, char *iff)
       WALK_LIST(r, n->routes)
         rts++;
 
-      int timer = n->hello_expiry - now;
-      cli_msg(-1024, "%-25I %-10s %6u %6u %10u",
-	      n->addr, ifa->iface->name, n->txcost, rts, MAX(timer, 0));
+      uint hellos = u32_popcount(n->hello_map);
+      btime timer = n->hello_expiry - current_time();
+      cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t",
+	      n->addr, ifa->iface->name, n->cost, rts, hellos, MAX(timer, 0));
     }
   }
 
   cli_msg(0, "");
 }
 
+static void
+babel_show_entries_(struct babel_proto *p UNUSED, struct fib *rtable)
+{
+  FIB_WALK(rtable, struct babel_entry, e)
+  {
+    struct babel_route *r = NULL;
+    uint rts = 0, srcs = 0;
+    node *n;
+
+    WALK_LIST(n, e->routes)
+      rts++;
+
+    WALK_LIST(n, e->sources)
+      srcs++;
+
+    if (e->valid)
+      cli_msg(-1025, "%-24N %-23lR %6u %5u %7u %7u",
+	      e->n.addr, e->router_id, e->metric, e->seqno, rts, srcs);
+    else if (r = e->selected)
+      cli_msg(-1025, "%-24N %-23lR %6u %5u %7u %7u",
+	      e->n.addr, r->router_id, r->metric, r->seqno, rts, srcs);
+    else
+      cli_msg(-1025, "%-24N %-23s %6s %5s %7u %7u",
+	      e->n.addr, "<none>", "-", "-", rts, srcs);
+  }
+  FIB_WALK_END;
+}
+
 void
 babel_show_entries(struct proto *P)
 {
   struct babel_proto *p = (void *) P;
-  struct babel_entry *e = NULL;
-  struct babel_source *s = NULL;
-  struct babel_route *r = NULL;
-
-  char ipbuf[STD_ADDRESS_P_LENGTH+5];
-  char ridbuf[ROUTER_ID_64_LENGTH+1];
 
   if (p->p.proto_state != PS_UP)
   {
@@ -1843,37 +1975,51 @@ babel_show_entries(struct proto *P)
   }
 
   cli_msg(-1025, "%s:", p->p.name);
-  cli_msg(-1025, "%-29s %-23s %6s %5s %7s %7s",
-	  "Prefix", "Router ID", "Metric", "Seqno", "Expires", "Sources");
-
-  FIB_WALK(&p->rtable, n)
-  {
-    e = (struct babel_entry *) n;
-    r = e->selected_in ? e->selected_in : e->selected_out;
-
-    int srcs = 0;
-    WALK_LIST(s, e->sources)
-      srcs++;
+  cli_msg(-1025, "%-24s %-23s %6s %5s %7s %7s",
+	  "Prefix", "Router ID", "Metric", "Seqno", "Routes", "Sources");
 
-    bsprintf(ipbuf, "%I/%u", e->n.prefix, e->n.pxlen);
+  babel_show_entries_(p, &p->ip4_rtable);
+  babel_show_entries_(p, &p->ip6_rtable);
 
-    if (r)
-    {
-      if (r->router_id == p->router_id)
-        bsprintf(ridbuf, "%s", "<self>");
-      else
-        bsprintf(ridbuf, "%lR", r->router_id);
+  cli_msg(0, "");
+}
 
-      int time = r->expires ? r->expires - now : 0;
-      cli_msg(-1025, "%-29s %-23s %6u %5u %7u %7u",
-	      ipbuf, ridbuf, r->metric, r->seqno, MAX(time, 0), srcs);
-    }
-    else
+static void
+babel_show_routes_(struct babel_proto *p UNUSED, struct fib *rtable)
+{
+  FIB_WALK(rtable, struct babel_entry, e)
+  {
+    struct babel_route *r;
+    WALK_LIST(r, e->routes)
     {
-      cli_msg(-1025, "%-29s %-44s %7u", ipbuf, "<pending>", srcs);
+      char c = (r == e->selected) ? '*' : (r->feasible ? '+' : ' ');
+      btime time = r->expires ? r->expires - current_time() : 0;
+      cli_msg(-1025, "%-24N %-25I %-10s %5u %c %5u %7t",
+	      e->n.addr, r->next_hop, r->neigh->ifa->ifname,
+	      r->metric, c, r->seqno, MAX(time, 0));
     }
   }
   FIB_WALK_END;
+}
+
+void
+babel_show_routes(struct proto *P)
+{
+  struct babel_proto *p = (void *) P;
+
+  if (p->p.proto_state != PS_UP)
+  {
+    cli_msg(-1025, "%s: is not up", p->p.name);
+    cli_msg(0, "");
+    return;
+  }
+
+  cli_msg(-1025, "%s:", p->p.name);
+  cli_msg(-1025, "%-24s %-25s %-9s %6s F %5s %7s",
+	  "Prefix", "Nexthop", "Interface", "Metric", "Seqno", "Expires");
+
+  babel_show_routes_(p, &p->ip4_rtable);
+  babel_show_routes_(p, &p->ip6_rtable);
 
   cli_msg(0, "");
 }
@@ -1897,15 +2043,14 @@ babel_timer(timer *t)
   struct babel_proto *p = t->data;
 
   babel_expire_routes(p);
-  babel_expire_seqno_requests(p);
   babel_expire_neighbors(p);
 }
 
 static inline void
 babel_kick_timer(struct babel_proto *p)
 {
-  if (p->timer->expires > (now + 1))
-    tm_start(p->timer, 1);
+  if (p->timer->expires > (current_time() + 100 MS))
+    tm_start(p->timer, 100 MS);
 }
 
 
@@ -1936,12 +2081,18 @@ babel_prepare_attrs(struct linpool *pool, ea_list *next, uint metric, u64 router
 
 
 static int
-babel_import_control(struct proto *P, struct rte **rt, struct ea_list **attrs, struct linpool *pool)
+babel_import_control(struct proto *P, struct rte **new, struct ea_list **attrs, struct linpool *pool)
 {
   struct babel_proto *p = (void *) P;
+  rte *rt = *new;
+
+  /* Reject our own unreachable routes */
+  if ((rt->attrs->dest == RTD_UNREACHABLE) && (rt->attrs->src->proto == P))
+    return -1;
+
 
   /* Prepare attributes with initial values */
-  if ((*rt)->attrs->source != RTS_BABEL)
+  if (rt->attrs->source != RTS_BABEL)
     *attrs = babel_prepare_attrs(pool, NULL, 0, p->router_id);
 
   return 0;
@@ -1964,70 +2115,55 @@ babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs)
  * so store it into our data structures.
  */
 static void
-babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
+babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
 		struct rte *new, struct rte *old UNUSED, struct ea_list *attrs UNUSED)
 {
   struct babel_proto *p = (void *) P;
   struct babel_entry *e;
-  struct babel_route *r;
 
   if (new)
   {
     /* Update */
-    e = babel_get_entry(p, net->n.prefix, net->n.pxlen);
+    uint internal = (new->attrs->src->proto == P);
+    uint rt_seqno = internal ? new->u.babel.seqno : p->update_seqno;
+    uint rt_metric = ea_get_int(attrs, EA_BABEL_METRIC, 0);
+    uint rt_router_id = internal ? new->u.babel.router_id : p->router_id;
 
-    if (new->attrs->src->proto != P)
+    if (rt_metric > BABEL_INFINITY)
     {
-      r = babel_get_route(e, NULL);
-      r->seqno = p->update_seqno;
-      r->router_id = p->router_id;
-      r->metric = 0;	/* FIXME: should be selectable */
+      log(L_WARN "%s: Invalid babel_metric value %u for route %N",
+	  p->p.name, rt_metric, net->n.addr);
+      rt_metric = BABEL_INFINITY;
     }
-    else
-      r = e->selected_in;
 
-    if (r != e->selected_out)
+    e = babel_get_entry(p, net->n.addr);
+
+    /* Activate triggered updates */
+    if ((e->valid |= BABEL_ENTRY_VALID) ||
+	(e->router_id != rt_router_id))
     {
-      e->selected_out = r;
-      e->updated = now;
       babel_trigger_update(p);
+      e->updated = current_time();
     }
+
+    e->valid = BABEL_ENTRY_VALID;
+    e->seqno = rt_seqno;
+    e->metric = rt_metric;
+    e->router_id = rt_router_id;
   }
   else
   {
     /* Withdraw */
-    e = babel_find_entry(p, net->n.prefix, net->n.pxlen);
-    if (!e || !e->selected_out)
+    e = babel_find_entry(p, net->n.addr);
+
+    if (!e || e->valid != BABEL_ENTRY_VALID)
       return;
 
-    if (OUR_ROUTE(e->selected_out))
-    {
-      /*
-       * We originate this route, so set its metric to infinity and set an
-       * expiry time. This causes a retraction to be sent, and later the route
-       * to be flushed once the hold time has passed.
-       */
-      e->selected_out->metric = BABEL_INFINITY;
-      e->selected_out->expires = now + BABEL_HOLD_TIME;
-      e->updated = now;
-      babel_trigger_update(p);
-    }
-    else
-    {
-      /*
-       * This is a route originating from someone else that was lost; presumably
-       * because an export filter was updated to filter it. This means we can't
-       * set the metric to infinity (it would be overridden on subsequent
-       * updates from the peer originating the route), so just clear the
-       * exported route.
-       *
-       * This causes peers to expire the route after a while (like if we just
-       * shut down), but it's the best we can do in these circumstances; and
-       * since export filters presumably aren't updated that often this is
-       * acceptable.
-       */
-      e->selected_out = NULL;
-    }
+    e->valid = BABEL_ENTRY_STALE;
+    e->metric = BABEL_INFINITY;
+
+    babel_trigger_update(p);
+    e->updated = current_time();
   }
 }
 
@@ -2040,17 +2176,21 @@ babel_rte_better(struct rte *new, struct rte *old)
 static int
 babel_rte_same(struct rte *new, struct rte *old)
 {
-  return ((new->u.babel.router_id == old->u.babel.router_id) &&
-          (new->u.babel.metric == old->u.babel.metric));
+  return ((new->u.babel.seqno == old->u.babel.seqno) &&
+	  (new->u.babel.metric == old->u.babel.metric) &&
+	  (new->u.babel.router_id == old->u.babel.router_id));
 }
 
 
 static struct proto *
-babel_init(struct proto_config *cfg)
+babel_init(struct proto_config *CF)
 {
-  struct proto *P = proto_new(cfg, sizeof(struct babel_proto));
+  struct proto *P = proto_new(CF);
+  struct babel_proto *p = (void *) P;
+
+  proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4));
+  proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6));
 
-  P->accept_ra_types = RA_OPTIMAL;
   P->if_notify = babel_if_notify;
   P->rt_notify = babel_rt_notify;
   P->import_control = babel_import_control;
@@ -2068,10 +2208,14 @@ babel_start(struct proto *P)
   struct babel_proto *p = (void *) P;
   struct babel_config *cf = (void *) P->cf;
 
-  fib_init(&p->rtable, P->pool, sizeof(struct babel_entry), 0, babel_init_entry);
+  fib_init(&p->ip4_rtable, P->pool, NET_IP4, sizeof(struct babel_entry),
+	   OFFSETOF(struct babel_entry, n), 0, babel_init_entry);
+  fib_init(&p->ip6_rtable, P->pool, NET_IP6, sizeof(struct babel_entry),
+	   OFFSETOF(struct babel_entry, n), 0, babel_init_entry);
+
   init_list(&p->interfaces);
-  p->timer = tm_new_set(P->pool, babel_timer, p, 0, 1);
-  tm_start(p->timer, 2);
+  p->timer = tm_new_init(P->pool, babel_timer, p, 1 S, 0);
+  tm_start(p->timer, 1 S);
   p->update_seqno = 1;
   p->router_id = proto_get_router_id(&cf->c);
 
@@ -2079,7 +2223,6 @@ babel_start(struct proto *P)
   p->source_slab = sl_new(P->pool, sizeof(struct babel_source));
   p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node));
   p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request));
-  init_list(&p->seqno_cache);
 
   p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 };
 
@@ -2111,14 +2254,18 @@ babel_shutdown(struct proto *P)
 }
 
 static int
-babel_reconfigure(struct proto *P, struct proto_config *c)
+babel_reconfigure(struct proto *P, struct proto_config *CF)
 {
   struct babel_proto *p = (void *) P;
-  struct babel_config *new = (void *) c;
+  struct babel_config *new = (void *) CF;
 
   TRACE(D_EVENTS, "Reconfiguring");
 
-  p->p.cf = c;
+  if (!proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)) ||
+      !proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)))
+    return 0;
+
+  p->p.cf = CF;
   babel_reconfigure_ifaces(p, new);
 
   babel_trigger_update(p);
@@ -2133,6 +2280,8 @@ struct protocol proto_babel = {
   .template =		"babel%d",
   .attr_class =		EAP_BABEL,
   .preference =		DEF_PREF_BABEL,
+  .channel_mask =	NB_IP,
+  .proto_size =		sizeof(struct babel_proto),
   .config_size =	sizeof(struct babel_config),
   .init =		babel_init,
   .dump =		babel_dump,
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index 6a95d82f..1128d261 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -2,6 +2,8 @@
  *	BIRD -- The Babel protocol
  *
  *	Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
+ * 	(c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2016--2017 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  *
@@ -23,10 +25,6 @@
 #include "lib/string.h"
 #include "lib/timer.h"
 
-#ifndef IPV6
-#error "The Babel protocol only speaks IPv6"
-#endif
-
 #define EA_BABEL_METRIC		EA_CODE(EAP_BABEL, 0)
 #define EA_BABEL_ROUTER_ID	EA_CODE(EAP_BABEL, 1)
 
@@ -36,27 +34,30 @@
 #define BABEL_INFINITY		0xFFFF
 
 
-#define BABEL_HELLO_INTERVAL_WIRED	4	/* Default hello intervals in seconds */
-#define BABEL_HELLO_INTERVAL_WIRELESS	4
+#define BABEL_HELLO_INTERVAL_WIRED	(4 S_)	/* Default hello intervals in seconds */
+#define BABEL_HELLO_INTERVAL_WIRELESS	(4 S_)
+#define BABEL_HELLO_LIMIT		12
 #define BABEL_UPDATE_INTERVAL_FACTOR	4
 #define BABEL_IHU_INTERVAL_FACTOR	3
-#define BABEL_IHU_EXPIRY_FACTOR(X)	((X)*3/2)	/* 1.5 */
-#define BABEL_HELLO_EXPIRY_FACTOR(X)	((X)*3/2)	/* 1.5 */
-#define BABEL_ROUTE_EXPIRY_FACTOR(X)	((X)*7/2)	/* 3.5 */
-#define BABEL_ROUTE_REFRESH_INTERVAL	2	/* Seconds before route expiry to send route request */
-#define BABEL_HOLD_TIME			10	/* Expiry time for our own routes */
+#define BABEL_HOLD_TIME_FACTOR		4	/* How long we keep unreachable route relative to update interval */
+#define BABEL_IHU_EXPIRY_FACTOR(X)	((btime)(X)*7/2)	/* 3.5 */
+#define BABEL_HELLO_EXPIRY_FACTOR(X)	((btime)(X)*3/2)	/* 1.5 */
+#define BABEL_ROUTE_EXPIRY_FACTOR(X)	((btime)(X)*7/2)	/* 3.5 */
+#define BABEL_ROUTE_REFRESH_FACTOR(X)	((btime)(X)*5/2)	/* 2.5 */
+#define BABEL_SEQNO_REQUEST_RETRY	4
+#define BABEL_SEQNO_REQUEST_EXPIRY	(2 S_)
+#define BABEL_GARBAGE_INTERVAL		(300 S_)
 #define BABEL_RXCOST_WIRED		96
 #define BABEL_RXCOST_WIRELESS		256
 #define BABEL_INITIAL_HOP_COUNT		255
-#define BABEL_MAX_SEND_INTERVAL		5
-#define BABEL_TIME_UNITS		100	/* On-wire times are counted in centiseconds */
-#define BABEL_SEQNO_REQUEST_EXPIRY	60
-#define BABEL_GARBAGE_INTERVAL		300
+#define BABEL_MAX_SEND_INTERVAL		5	/* Unused ? */
 
 /* Max interval that will not overflow when carried as 16-bit centiseconds */
-#define BABEL_MAX_INTERVAL		(0xFFFF/BABEL_TIME_UNITS)
+#define BABEL_TIME_UNITS		10000	/* On-wire times are counted in centiseconds */
+#define BABEL_MIN_INTERVAL		(0x0001 * BABEL_TIME_UNITS)
+#define BABEL_MAX_INTERVAL		(0xFFFF * BABEL_TIME_UNITS)
 
-#define BABEL_OVERHEAD		(SIZE_OF_IP_HEADER+UDP_HEADER_LENGTH)
+#define BABEL_OVERHEAD		(IP6_HEADER_LENGTH+UDP_HEADER_LENGTH)
 #define BABEL_MIN_MTU		(512 + BABEL_OVERHEAD)
 
 
@@ -82,6 +83,11 @@ enum babel_tlv_type {
   BABEL_TLV_MAX
 };
 
+enum babel_subtlv_type {
+  BABEL_SUBTLV_PAD1		= 0,
+  BABEL_SUBTLV_PADN		= 1
+};
+
 enum babel_iface_type {
   /* In practice, UNDEF and WIRED give equivalent behaviour */
   BABEL_IFACE_TYPE_UNDEF	= 0,
@@ -101,8 +107,8 @@ enum babel_ae_type {
 
 struct babel_config {
   struct proto_config c;
-
-  list iface_list;              /* Patterns configured -- keep it first; see babel_reconfigure why */
+  list iface_list;			/* List of iface configs (struct babel_iface_config) */
+  uint hold_time;			/* Time to hold stale entries and unreachable routes */
 };
 
 struct babel_iface_config {
@@ -110,33 +116,41 @@ struct babel_iface_config {
 
   u16 rxcost;
   u8 type;
+  u8 limit;				/* Minimum number of Hellos to keep link up */
   u8 check_link;
   uint port;
-  u16 hello_interval;
-  u16 ihu_interval;
-  u16 update_interval;
+  uint hello_interval;			/* Hello interval, in us */
+  uint ihu_interval;			/* IHU interval, in us */
+  uint update_interval;			/* Update interval, in us */
 
   u16 rx_buffer;			/* RX buffer size, 0 for MTU */
   u16 tx_length;			/* TX packet length limit (including headers), 0 for MTU */
   int tx_tos;
   int tx_priority;
+
+  ip_addr next_hop_ip4;
+  ip_addr next_hop_ip6;
 };
 
 struct babel_proto {
   struct proto p;
   timer *timer;
-  struct fib rtable;
+  struct fib ip4_rtable;
+  struct fib ip6_rtable;
+
+  struct channel *ip4_channel;
+  struct channel *ip6_channel;
+
   list interfaces;			/* Interfaces we really know about (struct babel_iface) */
   u64 router_id;
   u16 update_seqno;			/* To be increased on request */
+  u8 update_seqno_inc;			/* Request for update_seqno increase */
   u8 triggered;				/* For triggering global updates */
 
   slab *route_slab;
   slab *source_slab;
   slab *msg_slab;
-
   slab *seqno_slab;
-  list seqno_cache;			/* Seqno requests in the cache (struct babel_seqno_request) */
 
   struct tbf log_pkt_tbf;		/* TBF for packet messages */
 };
@@ -155,16 +169,18 @@ struct babel_iface {
   char *ifname;
   sock *sk;
   ip_addr addr;
+  ip_addr next_hop_ip4;
+  ip_addr next_hop_ip6;
   int tx_length;
   list neigh_list;			/* List of neighbors seen on this iface (struct babel_neighbor) */
   list msg_queue;
 
   u16 hello_seqno;			/* To be increased on each hello */
 
-  bird_clock_t next_hello;
-  bird_clock_t next_regular;
-  bird_clock_t next_triggered;
-  bird_clock_t want_triggered;
+  btime next_hello;
+  btime next_regular;
+  btime next_triggered;
+  btime want_triggered;
 
   timer *timer;
   event *send_event;
@@ -175,13 +191,18 @@ struct babel_neighbor {
   struct babel_iface *ifa;
 
   ip_addr addr;
-  u16 txcost;
+  uint uc;				/* Reference counter for seqno requests */
+  u16 rxcost;				/* Sent in last IHU */
+  u16 txcost;				/* Received in last IHU */
+  u16 cost;				/* Computed neighbor cost */
+  s8 ihu_cnt;				/* IHU countdown, 0 to send it */
   u8 hello_cnt;
   u16 hello_map;
   u16 next_hello_seqno;
+  uint last_hello_int;
   /* expiry timers */
-  bird_clock_t hello_expiry;
-  bird_clock_t ihu_expiry;
+  btime hello_expiry;
+  btime ihu_expiry;
 
   list routes;				/* Routes this neighbour has sent us (struct babel_route) */
 };
@@ -192,7 +213,7 @@ struct babel_source {
   u64 router_id;
   u16 seqno;
   u16 metric;
-  bird_clock_t expires;
+  btime expires;
 };
 
 struct babel_route {
@@ -201,38 +222,47 @@ struct babel_route {
   struct babel_entry    *e;
   struct babel_neighbor *neigh;
 
+  u8 feasible;
   u16 seqno;
-  u16 advert_metric;
   u16 metric;
+  u16 advert_metric;
   u64 router_id;
   ip_addr next_hop;
-  bird_clock_t refresh_time;
-  bird_clock_t expires;
-  u16 expiry_interval;
+  btime refresh_time;
+  btime expires;
 };
 
-struct babel_entry {
-  struct fib_node n;
-  struct babel_proto *proto;
-  struct babel_route *selected_in;
-  struct babel_route *selected_out;
-
-  bird_clock_t updated;
-
-  list sources;				/* Source entries for this prefix (struct babel_source). */
-  list routes;				/* Routes for this prefix (struct babel_route) */
-};
-
-/* Stores forwarded seqno requests for duplicate suppression. */
 struct babel_seqno_request {
   node n;
-  ip_addr prefix;
-  u8  plen;
   u64 router_id;
   u16 seqno;
-  bird_clock_t updated;
+  u8 hop_count;
+  u8 count;
+  btime expires;
+  struct babel_neighbor *nbr;
+};
+
+struct babel_entry {
+  struct babel_route *selected;
+
+  list routes;				/* Routes for this prefix (struct babel_route) */
+  list sources;				/* Source entries for this prefix (struct babel_source). */
+  list requests;
+
+  u8 valid;				/* Entry validity state (BABEL_ENTRY_*) */
+  u8 unreachable;			/* Unreachable route is announced */
+  u16 seqno;				/* Outgoing seqno */
+  u16 metric;				/* Outgoing metric */
+  u64 router_id;			/* Outgoing router ID */
+  btime updated;			/* Last change of outgoing rte, for triggered updates */
+
+  struct fib_node n;
 };
 
+#define BABEL_ENTRY_DUMMY	0	/* No outgoing route */
+#define BABEL_ENTRY_VALID	1	/* Valid outgoing route */
+#define BABEL_ENTRY_STALE	2	/* Stale outgoing route, waiting for GC */
+
 
 /*
  *	Internal TLV messages
@@ -241,7 +271,7 @@ struct babel_seqno_request {
 struct babel_msg_ack_req {
   u8 type;
   u16 nonce;
-  u16 interval;
+  uint interval;
   ip_addr sender;
 };
 
@@ -253,7 +283,7 @@ struct babel_msg_ack {
 struct babel_msg_hello {
   u8 type;
   u16 seqno;
-  u16 interval;
+  uint interval;
   ip_addr sender;
 };
 
@@ -261,7 +291,7 @@ struct babel_msg_ihu {
   u8 type;
   u8 ae;
   u16 rxcost;
-  u16 interval;
+  uint interval;
   ip_addr addr;
   ip_addr sender;
 };
@@ -269,12 +299,11 @@ struct babel_msg_ihu {
 struct babel_msg_update {
   u8 type;
   u8 wildcard;
-  u8 plen;
-  u16 interval;
+  uint interval;
   u16 seqno;
   u16 metric;
-  ip_addr prefix;
   u64 router_id;
+  net_addr net;
   ip_addr next_hop;
   ip_addr sender;
 };
@@ -282,17 +311,15 @@ struct babel_msg_update {
 struct babel_msg_route_request {
   u8 type;
   u8 full;
-  u8 plen;
-  ip_addr prefix;
+  net_addr net;
 };
 
 struct babel_msg_seqno_request {
   u8 type;
-  u8 plen;
-  u16 seqno;
   u8 hop_count;
+  u16 seqno;
   u64 router_id;
-  ip_addr prefix;
+  net_addr net;
   ip_addr sender;
 };
 
@@ -326,6 +353,7 @@ void babel_handle_seqno_request(union babel_msg *msg, struct babel_iface *ifa);
 void babel_show_interfaces(struct proto *P, char *iff);
 void babel_show_neighbors(struct proto *P, char *iff);
 void babel_show_entries(struct proto *P);
+void babel_show_routes(struct proto *P);
 
 /* packets.c */
 void babel_enqueue(union babel_msg *msg, struct babel_iface *ifa);
diff --git a/proto/babel/config.Y b/proto/babel/config.Y
index b6170852..25ce5ba0 100644
--- a/proto/babel/config.Y
+++ b/proto/babel/config.Y
@@ -2,6 +2,8 @@
  *	BIRD -- Babel Configuration
  *
  *	Copyright (c) 2015-2016 Toke Hoiland-Jorgensen
+ * 	(c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2016--2017 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  */
@@ -21,7 +23,8 @@ CF_DEFINES
 CF_DECLS
 
 CF_KEYWORDS(BABEL, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, WIRED,
-WIRELESS, RX, TX, BUFFER, LENGTH, CHECK, LINK, BABEL_METRIC)
+	WIRELESS, RX, TX, BUFFER, LENGTH, CHECK, LINK, BABEL_METRIC, NEXT, HOP,
+	IPV4, IPV6)
 
 CF_GRAMMAR
 
@@ -31,10 +34,12 @@ babel_proto_start: proto_start BABEL
 {
   this_proto = proto_config_new(&proto_babel, $1);
   init_list(&BABEL_CFG->iface_list);
+  BABEL_CFG->hold_time = 1 S_;
 };
 
 babel_proto_item:
    proto_item
+ | proto_channel
  | INTERFACE babel_iface
  ;
 
@@ -54,6 +59,7 @@ babel_iface_start:
   init_list(&this_ipatt->ipn_list);
   BABEL_IFACE->port = BABEL_PORT;
   BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED;
+  BABEL_IFACE->limit = BABEL_HELLO_LIMIT;
   BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL;
   BABEL_IFACE->tx_priority = sk_priority_control;
   BABEL_IFACE->check_link = 1;
@@ -81,21 +87,26 @@ babel_iface_finish:
   if (!BABEL_IFACE->update_interval)
     BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL);
   BABEL_IFACE->ihu_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR, BABEL_MAX_INTERVAL);
+
+  BABEL_CFG->hold_time = MAX_(BABEL_CFG->hold_time, BABEL_IFACE->update_interval*BABEL_HOLD_TIME_FACTOR);
 };
 
 
 babel_iface_item:
  | PORT expr { BABEL_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); }
  | RXCOST expr { BABEL_IFACE->rxcost = $2; if (($2<1) || ($2>65535)) cf_error("Invalid rxcost"); }
- | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid hello interval"); }
- | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid update interval"); }
+ | LIMIT expr { BABEL_IFACE->limit = $2; if (($2<1) || ($2>16)) cf_error("Limit must be in range 1-16"); }
  | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; }
  | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; }
+ | HELLO INTERVAL expr_us { BABEL_IFACE->hello_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Hello interval must be in range 10 ms - 655 s"); }
+ | UPDATE INTERVAL expr_us { BABEL_IFACE->update_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Update interval must be in range 10 ms - 655 s"); }
  | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); }
  | TX LENGTH expr { BABEL_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); }
  | TX tos { BABEL_IFACE->tx_tos = $2; }
  | TX PRIORITY expr { BABEL_IFACE->tx_priority = $3; }
  | CHECK LINK bool { BABEL_IFACE->check_link = $3; }
+ | NEXT HOP IPV4 ipa { BABEL_IFACE->next_hop_ip4 = $4; if (!ipa_is_ip4($4)) cf_error("Must be an IPv4 address"); }
+ | NEXT HOP IPV6 ipa { BABEL_IFACE->next_hop_ip6 = $4; if (!ipa_is_ip6($4)) cf_error("Must be an IPv6 address"); }
  ;
 
 babel_iface_opts:
@@ -125,6 +136,9 @@ CF_CLI(SHOW BABEL NEIGHBORS, optsym opttext, [<name>] [\"<interface>\"], [[Show
 CF_CLI(SHOW BABEL ENTRIES, optsym opttext, [<name>], [[Show information about Babel prefix entries]])
 { babel_show_entries(proto_get_named($4, &proto_babel)); };
 
+CF_CLI(SHOW BABEL ROUTES, optsym opttext, [<name>], [[Show information about Babel route entries]])
+{ babel_show_routes(proto_get_named($4, &proto_babel)); };
+
 CF_CODE
 
 CF_END
diff --git a/proto/babel/packets.c b/proto/babel/packets.c
index 08054832..4abcf7e4 100644
--- a/proto/babel/packets.c
+++ b/proto/babel/packets.c
@@ -2,6 +2,8 @@
  *	BIRD -- The Babel protocol
  *
  *	Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
+ * 	(c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2016--2017 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  *
@@ -112,13 +114,15 @@ struct babel_parse_state {
   struct babel_proto *proto;
   struct babel_iface *ifa;
   ip_addr saddr;
-  ip_addr next_hop;
+  ip_addr next_hop_ip4;
+  ip_addr next_hop_ip6;
   u64 router_id;		/* Router ID used in subsequent updates */
   u8 def_ip6_prefix[16];	/* Implicit IPv6 prefix in network order */
   u8 def_ip4_prefix[4];		/* Implicit IPv4 prefix in network order */
   u8 router_id_seen;		/* router_id field is valid */
   u8 def_ip6_prefix_seen;	/* def_ip6_prefix is valid */
   u8 def_ip4_prefix_seen;	/* def_ip4_prefix is valid */
+  u8 current_tlv_endpos;	/* End of self-terminating TLVs (offset from start) */
 };
 
 enum parse_result {
@@ -130,7 +134,10 @@ enum parse_result {
 struct babel_write_state {
   u64 router_id;
   u8 router_id_seen;
-//  ip_addr next_hop;
+  ip_addr next_hop_ip4;
+  ip_addr next_hop_ip6;
+  u8 def_ip6_prefix[16];	/* Implicit IPv6 prefix in network order */
+  u8 def_ip6_pxlen;
 };
 
 
@@ -146,34 +153,58 @@ struct babel_write_state {
 #define TLV_HDR(tlv,t,l) ({ tlv->type = t; tlv->length = l - sizeof(struct babel_tlv); })
 #define TLV_HDR0(tlv,t) TLV_HDR(tlv, t, tlv_data[t].min_length)
 
-#define BYTES(n) ((((uint) n) + 7) / 8)
+#define NET_SIZE(n) BYTES(net_pxlen(n))
 
-static inline u16
+static inline uint
+bytes_equal(u8 *b1, u8 *b2, uint maxlen)
+{
+  uint i;
+  for (i = 0; (i < maxlen) && (*b1 == *b2); i++, b1++, b2++)
+    ;
+  return i;
+}
+
+static inline uint
 get_time16(const void *p)
 {
-  u16 v = get_u16(p) / BABEL_TIME_UNITS;
-  return MAX(1, v);
+  uint v = get_u16(p) * BABEL_TIME_UNITS;
+  return MAX(BABEL_MIN_INTERVAL, v);
 }
 
 static inline void
-put_time16(void *p, u16 v)
+put_time16(void *p, uint v)
 {
-  put_u16(p, v * BABEL_TIME_UNITS);
+  put_u16(p, v / BABEL_TIME_UNITS);
 }
 
-static inline ip6_addr
-get_ip6_px(const void *p, uint plen)
+static inline void
+read_ip4_px(net_addr *n, const void *p, uint plen)
+{
+  ip4_addr addr = {0};
+  memcpy(&addr, p, BYTES(plen));
+  net_fill_ip4(n, ip4_ntoh(addr), plen);
+}
+
+static inline void
+put_ip4_px(void *p, net_addr *n)
+{
+  ip4_addr addr = ip4_hton(net4_prefix(n));
+  memcpy(p, &addr, NET_SIZE(n));
+}
+
+static inline void
+read_ip6_px(net_addr *n, const void *p, uint plen)
 {
   ip6_addr addr = IPA_NONE;
   memcpy(&addr, p, BYTES(plen));
-  return ip6_ntoh(addr);
+  net_fill_ip6(n, ip6_ntoh(addr), plen);
 }
 
 static inline void
-put_ip6_px(void *p, ip6_addr addr, uint plen)
+put_ip6_px(void *p, net_addr *n)
 {
-  addr = ip6_hton(addr);
-  memcpy(p, &addr, BYTES(plen));
+  ip6_addr addr = ip6_hton(net6_prefix(n));
+  memcpy(p, &addr, NET_SIZE(n));
 }
 
 static inline ip6_addr
@@ -351,14 +382,33 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m,
   if (msg->ae >= BABEL_AE_MAX)
     return PARSE_IGNORE;
 
-  // We handle link-local IPs. In every other case, the addr field will be 0 but
-  // validation will succeed. The handler takes care of these cases.
-  if (msg->ae == BABEL_AE_IP6_LL)
+  /*
+   * We only actually read link-local IPs. In every other case, the addr field
+   * will be 0 but validation will succeed. The handler takes care of these
+   * cases. We handle them here anyway because we need the length for parsing
+   * subtlvs.
+   */
+  switch (msg->ae)
   {
+  case BABEL_AE_IP4:
+    if (TLV_OPT_LENGTH(tlv) < 4)
+      return PARSE_ERROR;
+    state->current_tlv_endpos += 4;
+    break;
+
+  case BABEL_AE_IP6:
+    if (TLV_OPT_LENGTH(tlv) < 16)
+      return PARSE_ERROR;
+    state->current_tlv_endpos += 16;
+    break;
+
+  case BABEL_AE_IP6_LL:
     if (TLV_OPT_LENGTH(tlv) < 8)
       return PARSE_ERROR;
 
     msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr));
+    state->current_tlv_endpos += 8;
+    break;
   }
 
   return PARSE_SUCCESS;
@@ -431,21 +481,27 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED,
     return PARSE_ERROR;
 
   case BABEL_AE_IP4:
-    /* TODO */
+    if (TLV_OPT_LENGTH(tlv) < sizeof(ip4_addr))
+      return PARSE_ERROR;
+
+    state->next_hop_ip4 = ipa_from_ip4(get_ip4(&tlv->addr));
+    state->current_tlv_endpos += sizeof(ip4_addr);
     return PARSE_IGNORE;
 
   case BABEL_AE_IP6:
     if (TLV_OPT_LENGTH(tlv) < sizeof(ip6_addr))
       return PARSE_ERROR;
 
-    state->next_hop = ipa_from_ip6(get_ip6(&tlv->addr));
+    state->next_hop_ip6 = ipa_from_ip6(get_ip6(&tlv->addr));
+    state->current_tlv_endpos += sizeof(ip6_addr);
     return PARSE_IGNORE;
 
   case BABEL_AE_IP6_LL:
     if (TLV_OPT_LENGTH(tlv) < 8)
       return PARSE_ERROR;
 
-    state->next_hop = ipa_from_ip6(get_ip6_ll(&tlv->addr));
+    state->next_hop_ip6 = ipa_from_ip6(get_ip6_ll(&tlv->addr));
+    state->current_tlv_endpos += 8;
     return PARSE_IGNORE;
 
   default:
@@ -455,6 +511,51 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED,
   return PARSE_IGNORE;
 }
 
+/* This is called directly from babel_write_update() and returns -1 if a next
+   hop should be written but there is not enough space. */
+static int
+babel_write_next_hop(struct babel_tlv *hdr, ip_addr addr,
+		     struct babel_write_state *state, uint max_len)
+{
+  struct babel_tlv_next_hop *tlv = (void *) hdr;
+
+  if (ipa_zero(addr))
+  {
+    /* Should not happen */
+    return 0;
+  }
+  else if (ipa_is_ip4(addr) && !ipa_equal(addr, state->next_hop_ip4))
+  {
+    uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip4_addr);
+    if (len > max_len)
+      return -1;
+
+    TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len);
+
+    tlv->ae = BABEL_AE_IP4;
+    put_ip4(&tlv->addr, ipa_to_ip4(addr));
+    state->next_hop_ip4 = addr;
+
+    return len;
+  }
+  else if (ipa_is_ip6(addr) && !ipa_equal(addr, state->next_hop_ip6))
+  {
+    uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip6_addr);
+    if (len > max_len)
+      return -1;
+
+    TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len);
+
+    tlv->ae = BABEL_AE_IP6;
+    put_ip6(&tlv->addr, ipa_to_ip6(addr));
+    state->next_hop_ip6 = addr;
+
+    return len;
+  }
+
+  return 0;
+}
+
 static int
 babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
                   struct babel_parse_state *state)
@@ -480,15 +581,43 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
     if (tlv->plen > 0)
       return PARSE_ERROR;
 
+    if (msg->metric != 65535)
+      return PARSE_ERROR;
+
     msg->wildcard = 1;
     break;
 
   case BABEL_AE_IP4:
-    /* TODO */
-    return PARSE_IGNORE;
+    if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+      return PARSE_ERROR;
+
+    /* Cannot omit data if there is no saved prefix */
+    if (tlv->omitted && !state->def_ip4_prefix_seen)
+      return PARSE_ERROR;
+
+    /* Update must have next hop, unless it is retraction */
+    if (ipa_zero(state->next_hop_ip4) && (msg->metric != BABEL_INFINITY))
+      return PARSE_ERROR;
+
+    /* Merge saved prefix and received prefix parts */
+    memcpy(buf, state->def_ip4_prefix, tlv->omitted);
+    memcpy(buf + tlv->omitted, tlv->addr, len);
+
+    ip4_addr prefix4 = get_ip4(buf);
+    net_fill_ip4(&msg->net, prefix4, tlv->plen);
+
+    if (tlv->flags & BABEL_FLAG_DEF_PREFIX)
+    {
+      put_ip4(state->def_ip4_prefix, prefix4);
+      state->def_ip4_prefix_seen = 1;
+    }
+
+    msg->next_hop = state->next_hop_ip4;
+
+    break;
 
   case BABEL_AE_IP6:
-    if (tlv->plen > MAX_PREFIX_LENGTH)
+    if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
       return PARSE_ERROR;
 
     /* Cannot omit data if there is no saved prefix */
@@ -499,20 +628,23 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
     memcpy(buf, state->def_ip6_prefix, tlv->omitted);
     memcpy(buf + tlv->omitted, tlv->addr, len);
 
-    msg->plen = tlv->plen;
-    msg->prefix = ipa_from_ip6(get_ip6(buf));
+    ip6_addr prefix6 = get_ip6(buf);
+    net_fill_ip6(&msg->net, prefix6, tlv->plen);
 
     if (tlv->flags & BABEL_FLAG_DEF_PREFIX)
     {
-      put_ip6(state->def_ip6_prefix, msg->prefix);
+      put_ip6(state->def_ip6_prefix, prefix6);
       state->def_ip6_prefix_seen = 1;
     }
 
     if (tlv->flags & BABEL_FLAG_ROUTER_ID)
     {
-      state->router_id = ((u64) _I2(msg->prefix)) << 32 | _I3(msg->prefix);
+      state->router_id = ((u64) _I2(prefix6)) << 32 | _I3(prefix6);
       state->router_id_seen = 1;
     }
+
+    msg->next_hop = state->next_hop_ip6;
+
     break;
 
   case BABEL_AE_IP6_LL:
@@ -531,8 +663,8 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
   }
 
   msg->router_id = state->router_id;
-  msg->next_hop = state->next_hop;
   msg->sender = state->saddr;
+  state->current_tlv_endpos += len;
 
   return PARSE_SUCCESS;
 }
@@ -541,7 +673,6 @@ static uint
 babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
                    struct babel_write_state *state, uint max_len)
 {
-  struct babel_tlv_update *tlv = (void *) hdr;
   struct babel_msg_update *msg = &m->update;
   uint len0 = 0;
 
@@ -550,16 +681,35 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
    * both of them. There is enough space for the Router-ID TLV, because
    * sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update).
    *
-   * Router ID is not used for retractions, so do not us it in such case.
+   * Router ID is not used for retractions, so do not use it in such case.
    */
   if ((msg->metric < BABEL_INFINITY) &&
       (!state->router_id_seen || (msg->router_id != state->router_id)))
   {
     len0 = babel_write_router_id(hdr, msg->router_id, state, max_len);
-    tlv = (struct babel_tlv_update *) NEXT_TLV(tlv);
+    hdr = NEXT_TLV(hdr);
   }
 
-  uint len = sizeof(struct babel_tlv_update) + BYTES(msg->plen);
+  /*
+   * We also may add Next Hop TLV for regular updates. It may fail for not
+   * enough space or it may be unnecessary as the next hop is the same as the
+   * last one already announced. So we handle all three cases.
+   */
+  if (msg->metric < BABEL_INFINITY)
+  {
+    int l = babel_write_next_hop(hdr, msg->next_hop, state, max_len - len0);
+    if (l < 0)
+      return 0;
+
+    if (l)
+    {
+      len0 += l;
+      hdr = NEXT_TLV(hdr);
+    }
+  }
+
+  struct babel_tlv_update *tlv = (void *) hdr;
+  uint len = sizeof(struct babel_tlv_update) + NET_SIZE(&msg->net);
 
   if (len0 + len > max_len)
     return 0;
@@ -572,11 +722,39 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
     tlv->ae = BABEL_AE_WILDCARD;
     tlv->plen = 0;
   }
+  else if (msg->net.type == NET_IP4)
+  {
+    tlv->ae = BABEL_AE_IP4;
+    tlv->plen = net4_pxlen(&msg->net);
+    put_ip4_px(tlv->addr, &msg->net);
+  }
   else
   {
     tlv->ae = BABEL_AE_IP6;
-    tlv->plen = msg->plen;
-    put_ip6_px(tlv->addr, msg->prefix, msg->plen);
+    tlv->plen = net6_pxlen(&msg->net);
+
+    /* Address compression - omit initial matching bytes */
+    u8 buf[16], omit;
+    put_ip6(buf, net6_prefix(&msg->net));
+    omit = bytes_equal(buf, state->def_ip6_prefix,
+		       MIN(tlv->plen, state->def_ip6_pxlen) / 8);
+
+    if (omit > 0)
+    {
+      memcpy(tlv->addr, buf + omit, NET_SIZE(&msg->net) - omit);
+
+      tlv->omitted = omit;
+      tlv->length -= omit;
+      len -= omit;
+    }
+    else
+    {
+      put_ip6_px(tlv->addr, &msg->net);
+      tlv->flags |= BABEL_FLAG_DEF_PREFIX;
+
+      put_ip6(state->def_ip6_prefix, net6_prefix(&msg->net));
+      state->def_ip6_pxlen = tlv->plen;
+    }
   }
 
   put_time16(&tlv->interval, msg->interval);
@@ -606,18 +784,25 @@ babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m,
     return PARSE_SUCCESS;
 
   case BABEL_AE_IP4:
-    /* TODO */
-    return PARSE_IGNORE;
+    if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+      return PARSE_ERROR;
+
+    if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
+      return PARSE_ERROR;
+
+    read_ip4_px(&msg->net, tlv->addr, tlv->plen);
+    state->current_tlv_endpos += BYTES(tlv->plen);
+    return PARSE_SUCCESS;
 
   case BABEL_AE_IP6:
-    if (tlv->plen > MAX_PREFIX_LENGTH)
+    if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
       return PARSE_ERROR;
 
     if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
       return PARSE_ERROR;
 
-    msg->plen = tlv->plen;
-    msg->prefix = get_ip6_px(tlv->addr, tlv->plen);
+    read_ip6_px(&msg->net, tlv->addr, tlv->plen);
+    state->current_tlv_endpos += BYTES(tlv->plen);
     return PARSE_SUCCESS;
 
   case BABEL_AE_IP6_LL:
@@ -637,7 +822,7 @@ babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m,
   struct babel_tlv_route_request *tlv = (void *) hdr;
   struct babel_msg_route_request *msg = &m->route_request;
 
-  uint len = sizeof(struct babel_tlv_route_request) + BYTES(msg->plen);
+  uint len = sizeof(struct babel_tlv_route_request) + NET_SIZE(&msg->net);
 
   if (len > max_len)
     return 0;
@@ -649,11 +834,17 @@ babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m,
     tlv->ae = BABEL_AE_WILDCARD;
     tlv->plen = 0;
   }
+  else if (msg->net.type == NET_IP4)
+  {
+    tlv->ae = BABEL_AE_IP4;
+    tlv->plen = net4_pxlen(&msg->net);
+    put_ip4_px(tlv->addr, &msg->net);
+  }
   else
   {
     tlv->ae = BABEL_AE_IP6;
-    tlv->plen = msg->plen;
-    put_ip6_px(tlv->addr, msg->prefix, msg->plen);
+    tlv->plen = net6_pxlen(&msg->net);
+    put_ip6_px(tlv->addr, &msg->net);
   }
 
   return len;
@@ -681,18 +872,25 @@ babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
     return PARSE_ERROR;
 
   case BABEL_AE_IP4:
-    /* TODO */
-    return PARSE_IGNORE;
+    if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+      return PARSE_ERROR;
+
+    if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
+      return PARSE_ERROR;
+
+    read_ip4_px(&msg->net, tlv->addr, tlv->plen);
+    state->current_tlv_endpos += BYTES(tlv->plen);
+    return PARSE_SUCCESS;
 
   case BABEL_AE_IP6:
-    if (tlv->plen > MAX_PREFIX_LENGTH)
+    if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
       return PARSE_ERROR;
 
     if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
       return PARSE_ERROR;
 
-    msg->plen = tlv->plen;
-    msg->prefix = get_ip6_px(tlv->addr, tlv->plen);
+    read_ip6_px(&msg->net, tlv->addr, tlv->plen);
+    state->current_tlv_endpos += BYTES(tlv->plen);
     return PARSE_SUCCESS;
 
   case BABEL_AE_IP6_LL:
@@ -712,23 +910,70 @@ babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
   struct babel_tlv_seqno_request *tlv = (void *) hdr;
   struct babel_msg_seqno_request *msg = &m->seqno_request;
 
-  uint len = sizeof(struct babel_tlv_seqno_request) + BYTES(msg->plen);
+  uint len = sizeof(struct babel_tlv_seqno_request) + NET_SIZE(&msg->net);
 
   if (len > max_len)
     return 0;
 
   TLV_HDR(tlv, BABEL_TLV_SEQNO_REQUEST, len);
-  tlv->ae = BABEL_AE_IP6;
-  tlv->plen = msg->plen;
+
+  if (msg->net.type == NET_IP4)
+  {
+    tlv->ae = BABEL_AE_IP4;
+    tlv->plen = net4_pxlen(&msg->net);
+    put_ip4_px(tlv->addr, &msg->net);
+  }
+  else
+  {
+    tlv->ae = BABEL_AE_IP6;
+    tlv->plen = net6_pxlen(&msg->net);
+    put_ip6_px(tlv->addr, &msg->net);
+  }
+
   put_u16(&tlv->seqno, msg->seqno);
   tlv->hop_count = msg->hop_count;
   put_u64(&tlv->router_id, msg->router_id);
-  put_ip6_px(tlv->addr, msg->prefix, msg->plen);
 
   return len;
 }
 
 static inline int
+babel_read_subtlvs(struct babel_tlv *hdr,
+		   union babel_msg *msg UNUSED,
+		   struct babel_parse_state *state)
+{
+  struct babel_tlv *tlv;
+
+  for (tlv = (void *) hdr + state->current_tlv_endpos;
+       (void *) tlv < (void *) hdr + TLV_LENGTH(hdr);
+       tlv = NEXT_TLV(tlv))
+  {
+    /*
+     * The subtlv type space is non-contiguous (due to the mandatory bit), so
+     * use a switch for dispatch instead of the mapping array we use for TLVs
+     */
+    switch (tlv->type)
+    {
+    case BABEL_SUBTLV_PAD1:
+    case BABEL_SUBTLV_PADN:
+      /* FIXME: Framing errors in PADN are silently ignored, see babel_process_packet() */
+      break;
+
+    default:
+      /* Unknown mandatory subtlv; PARSE_IGNORE ignores the whole TLV */
+      if (tlv->type > 128)
+      {
+	DBG("Babel: Mandatory subtlv %d found; skipping TLV\n", tlv->type);
+	return PARSE_IGNORE;
+      }
+      break;
+    }
+  }
+
+  return PARSE_SUCCESS;
+}
+
+static inline int
 babel_read_tlv(struct babel_tlv *hdr,
                union babel_msg *msg,
                struct babel_parse_state *state)
@@ -741,8 +986,14 @@ babel_read_tlv(struct babel_tlv *hdr,
   if (TLV_LENGTH(hdr) < tlv_data[hdr->type].min_length)
     return PARSE_ERROR;
 
+  state->current_tlv_endpos = tlv_data[hdr->type].min_length;
   memset(msg, 0, sizeof(*msg));
-  return tlv_data[hdr->type].read_tlv(hdr, msg, state);
+
+  int res = tlv_data[hdr->type].read_tlv(hdr, msg, state);
+  if (res != PARSE_SUCCESS)
+    return res;
+
+  return babel_read_subtlvs(hdr, msg, state);
 }
 
 static uint
@@ -797,7 +1048,7 @@ static uint
 babel_write_queue(struct babel_iface *ifa, list *queue)
 {
   struct babel_proto *p = ifa->proto;
-  struct babel_write_state state = {};
+  struct babel_write_state state = { .next_hop_ip6 = ifa->addr };
 
   if (EMPTY_LIST(*queue))
     return 0;
@@ -933,10 +1184,10 @@ babel_process_packet(struct babel_pkt_header *pkt, int len,
   byte *end = (byte *)pkt + plen;
 
   struct babel_parse_state state = {
-    .proto	= p,
-    .ifa	= ifa,
-    .saddr	= saddr,
-    .next_hop	= saddr,
+    .proto	  = p,
+    .ifa	  = ifa,
+    .saddr	  = saddr,
+    .next_hop_ip6 = saddr,
   };
 
   if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION))
@@ -1045,7 +1296,7 @@ babel_rx_hook(sock *sk, uint len)
       sk->iface->name, sk->faddr, sk->laddr);
 
   /* Silently ignore my own packets */
-  if (ipa_equal(ifa->iface->addr->ip, sk->faddr))
+  if (ipa_equal(sk->faddr, sk->saddr))
     return 1;
 
   if (!ipa_is_link_local(sk->faddr))
@@ -1080,6 +1331,7 @@ babel_open_socket(struct babel_iface *ifa)
   sk->sport = ifa->cf->port;
   sk->dport = ifa->cf->port;
   sk->iface = ifa->iface;
+  sk->saddr = ifa->addr;
 
   sk->rx_hook = babel_rx_hook;
   sk->tx_hook = babel_tx_hook;
diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile
index c28cedec..402122fc 100644
--- a/proto/bfd/Makefile
+++ b/proto/bfd/Makefile
@@ -1,5 +1,6 @@
-source=bfd.c packets.c io.c
-root-rel=../../
-dir-name=proto/bfd
+src := bfd.c io.c packets.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
index 79135fae..67ec2270 100644
--- a/proto/bfd/bfd.c
+++ b/proto/bfd/bfd.c
@@ -64,16 +64,15 @@
  * ready, the protocol just creates a BFD request like any other protocol.
  *
  * The protocol uses a new generic event loop (structure &birdloop) from |io.c|,
- * which supports sockets, timers and events like the main loop. Timers
- * (structure &timer2) are new microsecond based timers, while sockets and
- * events are the same. A birdloop is associated with a thread (field @thread)
- * in which event hooks are executed. Most functions for setting event sources
- * (like sk_start() or tm2_start()) must be called from the context of that
- * thread. Birdloop allows to temporarily acquire the context of that thread for
- * the main thread by calling birdloop_enter() and then birdloop_leave(), which
- * also ensures mutual exclusion with all event hooks. Note that resources
- * associated with a birdloop (like timers) should be attached to the
- * independent resource pool, detached from the main resource tree.
+ * which supports sockets, timers and events like the main loop. A birdloop is
+ * associated with a thread (field @thread) in which event hooks are executed.
+ * Most functions for setting event sources (like sk_start() or tm_start()) must
+ * be called from the context of that thread. Birdloop allows to temporarily
+ * acquire the context of that thread for the main thread by calling
+ * birdloop_enter() and then birdloop_leave(), which also ensures mutual
+ * exclusion with all event hooks. Note that resources associated with a
+ * birdloop (like timers) should be attached to the independent resource pool,
+ * detached from the main resource tree.
  *
  * There are two kinds of interaction between the BFD core (running in the BFD
  * thread) and the rest of BFD (running in the main thread). The first kind are
@@ -112,7 +111,7 @@
 #define HASH_IP_KEY(n)		n->addr
 #define HASH_IP_NEXT(n)		n->next_ip
 #define HASH_IP_EQ(a,b)		ipa_equal(a,b)
-#define HASH_IP_FN(k)		ipa_hash32(k)
+#define HASH_IP_FN(k)		ipa_hash(k)
 
 static list bfd_proto_list;
 static list bfd_wait_list;
@@ -145,6 +144,7 @@ bfd_session_update_state(struct bfd_session *s, uint state, uint diag)
   bfd_lock_sessions(p);
   s->loc_state = state;
   s->loc_diag = diag;
+  s->last_state_change = current_time();
 
   notify = !NODE_VALID(&s->n);
   if (notify)
@@ -176,7 +176,7 @@ bfd_session_update_tx_interval(struct bfd_session *s)
     return;
 
   /* Set timer relative to last tx_timer event */
-  tm2_set(s->tx_timer, s->last_tx + tx_int_l);
+  tm_set(s->tx_timer, s->last_tx + tx_int_l);
 }
 
 static void
@@ -190,7 +190,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick)
   if (!s->last_rx)
     return;
 
-  tm2_set(s->hold_timer, s->last_rx + timeout);
+  tm_set(s->hold_timer, s->last_rx + timeout);
 }
 
 static void
@@ -211,16 +211,16 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset)
     goto stop;
 
   /* So TX timer should run */
-  if (reset || !tm2_active(s->tx_timer))
+  if (reset || !tm_active(s->tx_timer))
   {
     s->last_tx = 0;
-    tm2_start(s->tx_timer, 0);
+    tm_start(s->tx_timer, 0);
   }
 
   return;
 
  stop:
-  tm2_stop(s->tx_timer);
+  tm_stop(s->tx_timer);
   s->last_tx = 0;
 }
 
@@ -379,7 +379,7 @@ bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr)
 }
 
 static void
-bfd_tx_timer_hook(timer2 *t)
+bfd_tx_timer_hook(timer *t)
 {
   struct bfd_session *s = t->data;
 
@@ -388,7 +388,7 @@ bfd_tx_timer_hook(timer2 *t)
 }
 
 static void
-bfd_hold_timer_hook(timer2 *t)
+bfd_hold_timer_hook(timer *t)
 {
   bfd_session_timeout(t->data);
 }
@@ -432,13 +432,13 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
   s->passive = ifa->cf->passive;
   s->tx_csn = random_u32();
 
-  s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
-  s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
+  s->tx_timer = tm_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
+  s->hold_timer = tm_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
   bfd_session_update_tx_interval(s);
   bfd_session_control_tx_timer(s, 1);
 
   init_list(&s->request_list);
-  s->last_state_change = now;
+  s->last_state_change = current_time();
 
   TRACE(D_EVENTS, "Session to %I added", s->addr);
 
@@ -879,9 +879,6 @@ bfd_notify_hook(sock *sk, uint len UNUSED)
     diag = s->loc_diag;
     bfd_unlock_sessions(p);
 
-    /* FIXME: convert to btime and move to bfd_session_update_state() */
-    s->last_state_change = now;
-
     s->notify_running = 1;
     WALK_LIST_DELSAFE(n, nn, s->request_list)
       bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag);
@@ -954,7 +951,7 @@ bfd_init_all(void)
 static struct proto *
 bfd_init(struct proto_config *c)
 {
-  struct proto *p = proto_new(c, sizeof(struct bfd_proto));
+  struct proto *p = proto_new(c);
 
   p->neigh_notify = bfd_neigh_notify;
 
@@ -983,8 +980,10 @@ bfd_start(struct proto *P)
   add_tail(&bfd_proto_list, &p->bfd_node);
 
   birdloop_enter(p->loop);
-  p->rx_1 = bfd_open_rx_sk(p, 0);
-  p->rx_m = bfd_open_rx_sk(p, 1);
+  p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4);
+  p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4);
+  p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6);
+  p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6);
   birdloop_leave(p->loop);
 
   bfd_take_requests(p);
@@ -1078,7 +1077,7 @@ bfd_show_sessions(struct proto *P)
   byte tbuf[TM_DATETIME_BUFFER_SIZE];
   struct bfd_proto *p = (struct bfd_proto *) P;
   uint state, diag UNUSED;
-  u32 tx_int, timeout;
+  btime tx_int, timeout;
   const char *ifname;
 
   if (p->p.proto_state != PS_UP)
@@ -1099,15 +1098,14 @@ bfd_show_sessions(struct proto *P)
     state = s->loc_state;
     diag = s->loc_diag;
     ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---";
-    tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0;
-    timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult;
+    tx_int = s->last_tx ? MAX(s->des_min_tx_int, s->rem_min_rx_int) : 0;
+    timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult;
 
     state = (state < 4) ? state : 0;
-    tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change);
+    tm_format_time(tbuf, &config->tf_proto, s->last_state_change);
 
-    cli_msg(-1020, "%-25I %-10s %-10s %-10s  %3u.%03u  %3u.%03u",
-	    s->addr, ifname, bfd_state_names[state], tbuf,
-	    tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000);
+    cli_msg(-1020, "%-25I %-10s %-10s %-10s  %7t  %7t",
+	    s->addr, ifname, bfd_state_names[state], tbuf, tx_int, timeout);
   }
   HASH_WALK_END;
 
@@ -1118,6 +1116,7 @@ bfd_show_sessions(struct proto *P)
 struct protocol proto_bfd = {
   .name =		"BFD",
   .template =		"bfd%d",
+  .proto_size =		sizeof(struct bfd_proto),
   .config_size =	sizeof(struct bfd_config),
   .init =		bfd_init,
   .start =		bfd_start,
diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
index 46e09879..bc4fe969 100644
--- a/proto/bfd/bfd.h
+++ b/proto/bfd/bfd.h
@@ -87,8 +87,10 @@ struct bfd_proto
   sock *notify_ws;
   list notify_list;
 
-  sock *rx_1;
-  sock *rx_m;
+  sock *rx4_1;
+  sock *rx6_1;
+  sock *rx4_m;
+  sock *rx6_m;
   list iface_list;
 };
 
@@ -138,11 +140,11 @@ struct bfd_session
   btime last_tx;			/* Time of last sent periodic control packet */
   btime last_rx;			/* Time of last received valid control packet */
 
-  timer2 *tx_timer;			/* Periodic control packet timer */
-  timer2 *hold_timer;			/* Timer for session down detection time */
+  timer *tx_timer;			/* Periodic control packet timer */
+  timer *hold_timer;			/* Timer for session down detection time */
 
   list request_list;			/* List of client requests (struct bfd_request) */
-  bird_clock_t last_state_change;	/* Time of last state change */
+  btime last_state_change;		/* Time of last state change */
   u8 notify_running;			/* 1 if notify hooks are running */
 
   u8 rx_csn_known;			/* Received crypto sequence number is known */
@@ -201,7 +203,7 @@ void bfd_show_sessions(struct proto *P);
 
 /* packets.c */
 void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final);
-sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop);
+sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version);
 sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa);
 
 
diff --git a/proto/bfd/io.c b/proto/bfd/io.c
index 8f4f5007..b01cbfce 100644
--- a/proto/bfd/io.c
+++ b/proto/bfd/io.c
@@ -18,10 +18,10 @@
 #include "proto/bfd/io.h"
 
 #include "lib/buffer.h"
-#include "lib/heap.h"
 #include "lib/lists.h"
 #include "lib/resource.h"
 #include "lib/event.h"
+#include "lib/timer.h"
 #include "lib/socket.h"
 
 
@@ -31,16 +31,12 @@ struct birdloop
   pthread_t thread;
   pthread_mutex_t mutex;
 
-  btime last_time;
-  btime real_time;
-  u8 use_monotonic_clock;
-
   u8 stop_called;
   u8 poll_active;
   u8 wakeup_masked;
   int wakeup_fds[2];
 
-  BUFFER(timer2 *) timers;
+  struct timeloop time;
   list event_list;
   list sock_list;
   uint sock_num;
@@ -57,6 +53,7 @@ struct birdloop
  */
 
 static pthread_key_t current_loop_key;
+extern pthread_key_t current_time_key;
 
 static inline struct birdloop *
 birdloop_current(void)
@@ -68,6 +65,7 @@ static inline void
 birdloop_set_current(struct birdloop *loop)
 {
   pthread_setspecific(current_loop_key, loop);
+  pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop);
 }
 
 static inline void
@@ -78,98 +76,6 @@ birdloop_init_current(void)
 
 
 /*
- *	Time clock
- */
-
-static void times_update_alt(struct birdloop *loop);
-
-static void
-times_init(struct birdloop *loop)
-{
-  struct timespec ts;
-  int rv;
-
-  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
-  if (rv < 0)
-  {
-    log(L_WARN "Monotonic clock is missing");
-
-    loop->use_monotonic_clock = 0;
-    loop->last_time = 0;
-    loop->real_time = 0;
-    times_update_alt(loop);
-    return;
-  }
-
-  if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40)))
-    log(L_WARN "Monotonic clock is crazy");
-
-  loop->use_monotonic_clock = 1;
-  loop->last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
-  loop->real_time = 0;
-}
-
-static void
-times_update_pri(struct birdloop *loop)
-{
-  struct timespec ts;
-  int rv;
-
-  rv = clock_gettime(CLOCK_MONOTONIC, &ts);
-  if (rv < 0)
-    die("clock_gettime: %m");
-
-  btime new_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
-
-  if (new_time < loop->last_time)
-    log(L_ERR "Monotonic clock is broken");
-
-  loop->last_time = new_time;
-  loop->real_time = 0;
-}
-
-static void
-times_update_alt(struct birdloop *loop)
-{
-  struct timeval tv;
-  int rv;
-
-  rv = gettimeofday(&tv, NULL);
-  if (rv < 0)
-    die("gettimeofday: %m");
-
-  btime new_time = ((s64) tv.tv_sec S) + tv.tv_usec;
-  btime delta = new_time - loop->real_time;
-
-  if ((delta < 0) || (delta > (60 S)))
-  {
-    if (loop->real_time)
-      log(L_WARN "Time jump, delta %d us", (int) delta);
-
-    delta = 100 MS;
-  }
-
-  loop->last_time += delta;
-  loop->real_time = new_time;
-}
-
-static void
-times_update(struct birdloop *loop)
-{
-  if (loop->use_monotonic_clock)
-    times_update_pri(loop);
-  else
-    times_update_alt(loop);
-}
-
-btime
-current_time(void)
-{
-  return birdloop_current()->last_time;
-}
-
-
-/*
  *	Wakeup code for birdloop
  */
 
@@ -238,7 +144,7 @@ wakeup_drain(struct birdloop *loop)
 }
 
 static inline void
-wakeup_do_kick(struct birdloop *loop) 
+wakeup_do_kick(struct birdloop *loop)
 {
   pipe_kick(loop->wakeup_fds[1]);
 }
@@ -252,6 +158,16 @@ wakeup_kick(struct birdloop *loop)
     loop->wakeup_masked = 2;
 }
 
+/* For notifications from outside */
+void
+wakeup_kick_current(void)
+{
+  struct birdloop *loop = birdloop_current();
+
+  if (loop && loop->poll_active)
+    wakeup_kick(loop);
+}
+
 
 /*
  *	Events
@@ -272,7 +188,7 @@ events_init(struct birdloop *loop)
 static void
 events_fire(struct birdloop *loop)
 {
-  times_update(loop);
+  times_update(&loop->time);
   ev_run_list(&loop->event_list);
 }
 
@@ -292,154 +208,6 @@ ev2_schedule(event *e)
 
 
 /*
- *	Timers
- */
-
-#define TIMER_LESS(a,b)		((a)->expires < (b)->expires)
-#define TIMER_SWAP(heap,a,b,t)	(t = heap[a], heap[a] = heap[b], heap[b] = t, \
-				   heap[a]->index = (a), heap[b]->index = (b))
-
-static inline uint timers_count(struct birdloop *loop)
-{ return loop->timers.used - 1; }
-
-static inline timer2 *timers_first(struct birdloop *loop)
-{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; }
-
-
-static void
-tm2_free(resource *r)
-{
-  timer2 *t = (timer2 *) r;
-
-  tm2_stop(t);
-}
-
-static void
-tm2_dump(resource *r)
-{
-  timer2 *t = (timer2 *) r;
-
-  debug("(code %p, data %p, ", t->hook, t->data);
-  if (t->randomize)
-    debug("rand %d, ", t->randomize);
-  if (t->recurrent)
-    debug("recur %d, ", t->recurrent);
-  if (t->expires)
-    debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS);
-  else
-    debug("inactive)\n");
-}
-
-
-static struct resclass tm2_class = {
-  "Timer",
-  sizeof(timer2),
-  tm2_free,
-  tm2_dump,
-  NULL,
-  NULL
-};
-
-timer2 *
-tm2_new(pool *p)
-{
-  timer2 *t = ralloc(p, &tm2_class);
-  t->index = -1;
-  return t;
-}
-
-void
-tm2_set(timer2 *t, btime when)
-{
-  struct birdloop *loop = birdloop_current();
-  uint tc = timers_count(loop);
-
-  if (!t->expires)
-  {
-    t->index = ++tc;
-    t->expires = when;
-    BUFFER_PUSH(loop->timers) = t;
-    HEAP_INSERT(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP);
-  }
-  else if (t->expires < when)
-  {
-    t->expires = when;
-    HEAP_INCREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
-  }
-  else if (t->expires > when)
-  {
-    t->expires = when;
-    HEAP_DECREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
-  }
-
-  if (loop->poll_active && (t->index == 1))
-    wakeup_kick(loop);
-}
-
-void
-tm2_start(timer2 *t, btime after)
-{
-  tm2_set(t, current_time() + MAX(after, 0));
-}
-
-void
-tm2_stop(timer2 *t)
-{
-  if (!t->expires)
-    return;
-
-  struct birdloop *loop = birdloop_current();
-  uint tc = timers_count(loop);
-
-  HEAP_DELETE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
-  BUFFER_POP(loop->timers);
-
-  t->index = -1;
-  t->expires = 0;
-}
-
-static void
-timers_init(struct birdloop *loop)
-{
-  BUFFER_INIT(loop->timers, loop->pool, 4);
-  BUFFER_PUSH(loop->timers) = NULL;
-}
-
-static void
-timers_fire(struct birdloop *loop)
-{
-  btime base_time;
-  timer2 *t;
-
-  times_update(loop);
-  base_time = loop->last_time;
-
-  while (t = timers_first(loop))
-  {
-    if (t->expires > base_time)
-      return;
-
-    if (t->recurrent)
-    {
-      btime when = t->expires + t->recurrent;
-      
-      if (when <= loop->last_time)
-	when = loop->last_time + t->recurrent;
-
-      if (t->randomize)
-	when += random() % (t->randomize + 1);
-
-      tm2_set(t, when);
-    }
-    else
-      tm2_stop(t);
-
-    t->hook(t);
-  }
-}
-
-
-/*
  *	Sockets
  */
 
@@ -586,7 +354,7 @@ sockets_fire(struct birdloop *loop)
   sock **psk = loop->poll_sk.data;
   int poll_num = loop->poll_fd.used - 1;
 
-  times_update(loop);
+  times_update(&loop->time);
 
   /* Last fd is internal wakeup fd */
   if (pfd[poll_num].revents & POLLIN)
@@ -634,11 +402,10 @@ birdloop_new(void)
   loop->pool = p;
   pthread_mutex_init(&loop->mutex, NULL);
 
-  times_init(loop);
   wakeup_init(loop);
 
   events_init(loop);
-  timers_init(loop);
+  timers_init(&loop->time, p);
   sockets_init(loop);
 
   return loop;
@@ -710,7 +477,7 @@ static void *
 birdloop_main(void *arg)
 {
   struct birdloop *loop = arg;
-  timer2 *t;
+  timer *t;
   int rv, timeout;
 
   birdloop_set_current(loop);
@@ -719,13 +486,13 @@ birdloop_main(void *arg)
   while (1)
   {
     events_fire(loop);
-    timers_fire(loop);
+    timers_fire(&loop->time);
 
-    times_update(loop);
+    times_update(&loop->time);
     if (events_waiting(loop))
       timeout = 0;
-    else if (t = timers_first(loop))
-      timeout = (tm2_remains(t) TO_MS) + 1;
+    else if (t = timers_first(&loop->time))
+      timeout = (tm_remains(t) TO_MS) + 1;
     else
       timeout = -1;
 
@@ -756,7 +523,7 @@ birdloop_main(void *arg)
     if (rv)
       sockets_fire(loop);
 
-    timers_fire(loop);
+    timers_fire(&loop->time);
   }
 
   loop->stop_called = 0;
diff --git a/proto/bfd/io.h b/proto/bfd/io.h
index 641ee054..ec706e9a 100644
--- a/proto/bfd/io.h
+++ b/proto/bfd/io.h
@@ -11,80 +11,15 @@
 #include "lib/lists.h"
 #include "lib/resource.h"
 #include "lib/event.h"
+#include "lib/timer.h"
 #include "lib/socket.h"
-// #include "lib/timer.h"
 
 
-typedef struct timer2
-{
-  resource r;
-  void (*hook)(struct timer2 *);
-  void *data;
-
-  btime expires;			/* 0=inactive */
-  uint randomize;			/* Amount of randomization */
-  uint recurrent;			/* Timer recurrence */
-
-  int index;
-} timer2;
-
-
-btime current_time(void);
-
 void ev2_schedule(event *e);
 
-
-timer2 *tm2_new(pool *p);
-void tm2_set(timer2 *t, btime when);
-void tm2_start(timer2 *t, btime after);
-void tm2_stop(timer2 *t);
-
-static inline int
-tm2_active(timer2 *t)
-{
-  return t->expires != 0;
-}
-
-static inline btime 
-tm2_remains(timer2 *t)
-{
-  btime now = current_time();
-  return (t->expires > now) ? (t->expires - now) : 0;
-}
-
-static inline timer2 *
-tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint rand)
-{
-  timer2 *t = tm2_new(p);
-  t->hook = hook;
-  t->data = data;
-  t->recurrent = rec;
-  t->randomize = rand;
-  return t;
-}
-
-static inline void
-tm2_set_max(timer2 *t, btime when)
-{
-  if (when > t->expires)
-    tm2_set(t, when);
-}
-
-/*
-static inline void
-tm2_start_max(timer2 *t, btime after)
-{
-  btime rem = tm2_remains(t);
-  tm2_start(t, MAX_(rem, after));
-}
-*/
-
-
 void sk_start(sock *s);
 void sk_stop(sock *s);
 
-
-
 struct birdloop *birdloop_new(void);
 void birdloop_start(struct birdloop *loop);
 void birdloop_stop(struct birdloop *loop);
diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
index 129db72f..b76efda6 100644
--- a/proto/bfd/packets.c
+++ b/proto/bfd/packets.c
@@ -248,7 +248,7 @@ bfd_check_authentication(struct bfd_proto *p, struct bfd_session *s, struct bfd_
     /* BFD CSNs are in 32-bit circular number space */
     u32 csn = ntohl(auth->csn);
     if (s->rx_csn_known &&
-	(((csn - s->rx_csn) > (3 * s->detect_mult)) ||
+	(((csn - s->rx_csn) > (3 * (uint) s->detect_mult)) ||
 	 (meticulous && (csn == s->rx_csn))))
     {
       /* We want to report both new and old CSN */
@@ -405,10 +405,11 @@ bfd_err_hook(sock *sk, int err)
 }
 
 sock *
-bfd_open_rx_sk(struct bfd_proto *p, int multihop)
+bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af)
 {
   sock *sk = sk_new(p->tpool);
   sk->type = SK_UDP;
+  sk->subtype = af;
   sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
   sk->data = p;
 
@@ -421,10 +422,6 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop)
   sk->priority = sk_priority_control;
   sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
 
-#ifdef IPV6
-  sk->flags |= SKF_V6ONLY;
-#endif
-
   if (sk_open(sk) < 0)
     goto err;
 
@@ -456,10 +453,6 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
   sk->ttl = ifa ? 255 : -1;
   sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT;
 
-#ifdef IPV6
-  sk->flags |= SKF_V6ONLY;
-#endif
-
   if (sk_open(sk) < 0)
     goto err;
 
diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile
index a634cf0d..00aaef5e 100644
--- a/proto/bgp/Makefile
+++ b/proto/bgp/Makefile
@@ -1,5 +1,6 @@
-source=bgp.c attrs.c packets.c
-root-rel=../../
-dir-name=proto/bgp
+src := attrs.c bgp.c packets.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index b9e2490d..882ba44e 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -2,6 +2,8 @@
  *	BIRD -- BGP Attributes
  *
  *	(c) 2000 Martin Mares <mj@ucw.cz>
+ *	(c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2008--2016 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  */
@@ -39,890 +41,1249 @@
  * specifies that such updates should be ignored, but that is generally
  * a bad idea.
  *
- * Error checking of optional transitive attributes is done according to
- * draft-ietf-idr-optional-transitive-03, but errors are handled always
- * as withdraws.
+ * BGP attribute table has several hooks:
  *
- * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
- * but unknown segments cause a session drop with Malformed AS_PATH
- * error (see validate_path()). The behavior in such case is not
- * explicitly specified by RFC 4271. RFC 5065 specifies that
- * inconsistent AS_CONFED_* segments should cause a session drop, but
- * implementations that pass invalid AS_CONFED_* segments are
- * widespread.
+ * export - Hook that validates and normalizes attribute during export phase.
+ * Receives eattr, may modify it (e.g., sort community lists for canonical
+ * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
+ * necessary. May assume that eattr has value valid w.r.t. its type, but may be
+ * invalid w.r.t. BGP constraints. Optional.
  *
- * Error handling of AS4_* attributes is done as specified by RFC 6793. There
- * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR
- * that are not handled by that RFC, these are logged and ignored (see
- * bgp_reconstruct_4b_attrs()).
+ * encode - Hook that converts internal representation to external one during
+ * packet writing. Receives eattr and puts it in the buffer (including attribute
+ * header). Returns number of bytes, or -1 if not enough space. May assume that
+ * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
+ * for all known attributes that exist internally after export phase (i.e., all
+ * except pseudoattributes MP_(UN)REACH_NLRI).
+ *
+ * decode - Hook that converts external representation to internal one during
+ * packet parsing. Receives attribute data in buffer, validates it and adds
+ * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
+ * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
+ *
+ * format - Optional hook that converts eattr to textual representation.
  */
 
+// XXXX review pool usage : c->c.proto->pool
 
-static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
-#ifndef IPV6
-,BA_NEXT_HOP
-#endif
-};
 
-struct attr_desc {
-  char *name;
-  int expected_length;
-  int expected_flags;
-  int type;
-  int allow_in_ebgp;
-  int (*validate)(struct bgp_proto *p, byte *attr, int len);
-  void (*format)(eattr *ea, byte *buf, int buflen);
+struct bgp_attr_desc {
+  const char *name;
+  uint type;
+  uint flags;
+  void (*export)(struct bgp_export_state *s, eattr *a);
+  int  (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
+  void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
+  void (*format)(eattr *ea, byte *buf, uint size);
 };
 
-#define IGNORE -1
-#define WITHDRAW -2
+static const struct bgp_attr_desc bgp_attr_table[];
+
+static inline int bgp_attr_known(uint code);
+
+eattr *
+bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
+{
+  ASSERT(bgp_attr_known(code));
+
+  ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
+  eattr *e = &a->attrs[0];
+
+  a->flags = EALF_SORTED;
+  a->count = 1;
+  a->next = *attrs;
+  *attrs = a;
+
+  e->id = EA_CODE(EAP_BGP, code);
+  e->type = bgp_attr_table[code].type;
+  e->flags = flags;
+
+  if (e->type & EAF_EMBEDDED)
+    e->u.data = (u32) val;
+  else
+    e->u.ptr = (struct adata *) val;
+
+  return e;
+}
+
+
+
+#define REPORT(msg, args...) \
+  ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
+
+#define DISCARD(msg, args...) \
+  ({ REPORT(msg, ## args); return; })
+
+#define WITHDRAW(msg, args...) \
+  ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+
+#define UNSET(a) \
+  ({ a->type = EAF_TYPE_UNDEF; return; })
+
+#define NEW_BGP		"Discarding %s attribute received from AS4-aware neighbor"
+#define BAD_EBGP	"Discarding %s attribute received from EBGP neighbor"
+#define BAD_LENGTH	"Malformed %s attribute - invalid length (%u)"
+#define BAD_VALUE	"Malformed %s attribute - invalid value (%u)"
+#define NO_MANDATORY	"Missing mandatory %s attribute"
+
+
+static inline int
+bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
+{
+  *buf++ = flags;
+  *buf++ = code;
+  *buf++ = len;
+  return 3;
+}
+
+static inline int
+bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
+{
+  *buf++ = flags | BAF_EXT_LEN;
+  *buf++ = code;
+  put_u16(buf, len);
+  return 4;
+}
+
+static inline int
+bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
+{
+  if (len < 256)
+    return bgp_put_attr_hdr3(buf, code, flags, len);
+  else
+    return bgp_put_attr_hdr4(buf, code, flags, len);
+}
 
 static int
-bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED)
+bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
 {
-  if (*a > 2)
-    return 6;
-  return 0;
+  if (size < (3+1))
+    return -1;
+
+  bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
+  buf[3] = a->u.data;
+
+  return 3+1;
 }
 
-static void
-bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED)
+static int
+bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
 {
-  static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
+  if (size < (3+4))
+    return -1;
+
+  bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
+  put_u32(buf+3, a->u.data);
 
-  bsprintf(buf, bgp_origin_names[a->u.data]);
+  return 3+4;
 }
 
 static int
-path_segment_contains(byte *p, int bs, u32 asn)
+bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
 {
-  int i;
-  int len = p[1];
-  p += 2;
+  uint len = a->u.ptr->length;
 
-  for(i=0; i<len; i++)
-    {
-      u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p);
-      if (asn2 == asn)
-	return 1;
-      p += bs;
-    }
+  if (size < (4+len))
+    return -1;
 
-  return 0;
+  uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
+  put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
+
+  return hdr + len;
 }
 
-/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
 static int
-validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength)
+bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len)
 {
-  int res = 0;
-  u8 *a, *dst;
-  int len, plen;
+  if (size < (4+len))
+    return -1;
 
-  dst = a = idata;
-  len = *ilength;
+  uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
+  memcpy(buf + hdr, data, len);
 
-  while (len)
-    {
-      if (len < 2)
-	return -1;
-
-      plen = 2 + bs * a[1];
-      if (len < plen)
-	return -1;
-
-      if (a[1] == 0)
-        {
-	  log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it",
-	      p->p.name, as_path ? "AS" : "AS4");
-	  goto skip;
-	}
-
-      switch (a[0])
-	{
-	case AS_PATH_SET:
-	  res++;
-	  break;
-
-	case AS_PATH_SEQUENCE:
-	  res += a[1];
-	  break;
-
-	case AS_PATH_CONFED_SEQUENCE:
-	case AS_PATH_CONFED_SET:
-	  if (as_path && path_segment_contains(a, bs, p->remote_as))
-	    {
-	      log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name);
-	      return -1;
-	    }
-
-	  log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
-	      p->p.name, as_path ? "AS" : "AS4");
-	  goto skip;
-
-	default:
-	  return -1;
-	}
-
-      if (dst != a)
-	memmove(dst, a, plen);
-      dst += plen;
-
-    skip:
-      len -= plen;
-      a += plen;
-    }
+  return hdr + len;
+}
 
-  *ilength = dst - idata;
-  return res;
+static int
+bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
+{
+  return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
 }
 
-static inline int
-validate_as_path(struct bgp_proto *p, byte *a, int *len)
+
+/*
+ *	Attribute hooks
+ */
+
+static void
+bgp_export_origin(struct bgp_export_state *s, eattr *a)
 {
-  return validate_path(p, 1, p->as4_session ? 4 : 2, a, len);
+  if (a->u.data > 2)
+    WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
 }
 
-static inline int
-validate_as4_path(struct bgp_proto *p, struct adata *path)
+static void
+bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  return validate_path(p, 0, 4, path->data, &path->length);
+  if (len != 1)
+    WITHDRAW(BAD_LENGTH, "ORIGIN", len);
+
+  if (data[0] > 2)
+    WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
+
+  bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
 }
 
+static void
+bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
+{
+  static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
+
+  bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
+}
+
+
 static int
-bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6)
+bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
 {
-#ifdef IPV6
-  return IGNORE;
-#else
-  ip_addr addr;
+  byte *data = a->u.ptr->data;
+  uint len = a->u.ptr->length;
+
+  if (!s->as4_session)
+  {
+    /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
+    byte *src = data;
+    data = alloca(len);
+    len = as_path_32to16(data, src, len);
+  }
+
+  return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
+}
 
-  memcpy(&addr, a, len);
-  ipa_ntoh(addr);
-  if (ipa_classify(addr) & IADDR_HOST)
+static void
+bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+  struct bgp_proto *p = s->proto;
+  int as_length = s->as4_session ? 4 : 2;
+  int as_confed = p->cf->confederation && p->is_interior;
+  char err[128];
+
+  if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
+    WITHDRAW("Malformed AS_PATH attribute - %s", err);
+
+  /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
+  if (p->is_interior && !p->is_internal &&
+      ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
+    WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
+
+  if (!s->as4_session)
+  {
+    /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
+    byte *src = data;
+    data = alloca(2*len);
+    len = as_path_16to32(data, src, len);
+  }
+
+  bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
+}
+
+
+static int
+bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
+{
+  /*
+   * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
+   * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
+   * store it and encode it later by AFI-specific hooks.
+   */
+
+  if (s->channel->afi == BGP_AF_IPV4)
+  {
+    ASSERT(a->u.ptr->length == sizeof(ip_addr));
+
+    if (size < (3+4))
+      return -1;
+
+    bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
+    put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
+
+    return 3+4;
+  }
+  else
+  {
+    s->mp_next_hop = a;
     return 0;
+  }
+}
+
+static void
+bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
+{
+  if (len != 4)
+    WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
+
+  /* Semantic checks are done later */
+  s->ip_next_hop_len = len;
+  s->ip_next_hop_data = data;
+}
+
+/* TODO: This function should use AF-specific hook */
+static void
+bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
+{
+  ip_addr *nh = (void *) a->u.ptr->data;
+  uint len = a->u.ptr->length;
+
+  ASSERT((len == 16) || (len == 32));
+
+  /* in IPv6, we may have two addresses in NEXT HOP */
+  if ((len == 16) || ipa_zero(nh[1]))
+    bsprintf(buf, "%I", nh[0]);
   else
-    return 8;
-#endif
+    bsprintf(buf, "%I %I", nh[0], nh[1]);
 }
 
+
 static void
-bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED)
+bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  ip_addr *ipp = (ip_addr *) a->u.ptr->data;
-#ifdef IPV6
-  /* in IPv6, we might have two addresses in NEXT HOP */
-  if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1]))
-    {
-      bsprintf(buf, "%I %I", ipp[0], ipp[1]);
-      return;
-    }
-#endif
+  if (len != 4)
+    WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
 
-  bsprintf(buf, "%I", ipp[0]);
+  u32 val = get_u32(data);
+  bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
 }
 
-static int
-bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
+
+static void
+bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
 {
-  int exp_len = p->as4_session ? 8 : 6;
-  
-  return (len == exp_len) ? 0 : WITHDRAW;
+  if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
+    UNSET(a);
 }
 
 static void
-bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
+bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  struct adata *ad =  a->u.ptr;
-  byte *data = ad->data;
-  u32 as;
+  if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
+    DISCARD(BAD_EBGP, "LOCAL_PREF");
 
-  as = get_u32(data);
-  data += 4;
+  if (len != 4)
+    WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
 
-  bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
+  u32 val = get_u32(data);
+  bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
 }
 
-static int
-bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+
+static void
+bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
 {
-  return ((len % 4) == 0) ? 0 : WITHDRAW;
+  if (len != 0)
+    DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
+
+  bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
 }
 
 static int
-bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
 {
-  return ((len % 4) == 0) ? 0 : 5;
+  byte *data = a->u.ptr->data;
+  uint len = a->u.ptr->length;
+
+  if (!s->as4_session)
+  {
+    /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
+    byte *src = data;
+    data = alloca(6);
+    len = aggregator_32to16(data, src);
+  }
+
+  return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
 }
 
 static void
-bgp_format_cluster_list(eattr *a, byte *buf, int buflen)
+bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  /* Truncates cluster lists larger than buflen, probably not a problem */
-  int_set_format(a->u.ptr, 0, -1, buf, buflen);
+  if (len != (s->as4_session ? 8 : 6))
+    DISCARD(BAD_LENGTH, "AGGREGATOR", len);
+
+  if (!s->as4_session)
+  {
+    /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
+    byte *src = data;
+    data = alloca(8);
+    len = aggregator_16to32(data, src);
+  }
+
+  bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
 }
 
-static int
-bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
+static void
+bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
 {
-#ifdef IPV6
-  p->mp_reach_start = a;
-  p->mp_reach_len = len;
-#endif
-  return IGNORE;
+  byte *data = a->u.ptr->data;
+
+  bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
 }
 
-static int
-bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
+
+static void
+bgp_export_community(struct bgp_export_state *s, eattr *a)
 {
-#ifdef IPV6
-  p->mp_unreach_start = a;
-  p->mp_unreach_len = len;
-#endif
-  return IGNORE;
+  if (a->u.ptr->length == 0)
+    UNSET(a);
+
+  a->u.ptr = int_set_sort(s->pool, a->u.ptr);
 }
 
-static int
-bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+static void
+bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  return ((len % 8) == 0) ? 0 : WITHDRAW;
+  if (!len || (len % 4))
+    WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
+
+  struct adata *ad = lp_alloc_adata(s->pool, len);
+  get_u32s(data, (u32 *) ad->data, len / 4);
+  bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
 }
 
-static int
-bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
-{
-  return ((len % 12) == 0) ? 0 : WITHDRAW;
-}
-
-
-static struct attr_desc bgp_attr_table[] = {
-  { NULL, -1, 0, 0, 0,								/* Undefined */
-    NULL, NULL },
-  { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1,				/* BA_ORIGIN */
-    bgp_check_origin, bgp_format_origin },
-  { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1,				/* BA_AS_PATH */
-    NULL, NULL }, /* is checked by validate_as_path() as a special case */
-  { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1,			/* BA_NEXT_HOP */
-    bgp_check_next_hop, bgp_format_next_hop },
-  { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1,					/* BA_MULTI_EXIT_DISC */
-    NULL, NULL },
-  { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1,				/* BA_LOCAL_PREF */
-    NULL, NULL },
-  { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,			/* BA_ATOMIC_AGGR */
-    NULL, NULL },
-  { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,	/* BA_AGGREGATOR */
-    bgp_check_aggregator, bgp_format_aggregator },
-  { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1,	/* BA_COMMUNITY */
-    bgp_check_community, NULL },
-  { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0,			/* BA_ORIGINATOR_ID */
-    NULL, NULL },
-  { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0,			/* BA_CLUSTER_LIST */
-    bgp_check_cluster_list, bgp_format_cluster_list }, 
-  { .name = NULL },								/* BA_DPA */
-  { .name = NULL },								/* BA_ADVERTISER */
-  { .name = NULL },								/* BA_RCID_PATH */
-  { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,			/* BA_MP_REACH_NLRI */
-    bgp_check_reach_nlri, NULL },
-  { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,			/* BA_MP_UNREACH_NLRI */
-    bgp_check_unreach_nlri, NULL },
-  { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1,	/* BA_EXT_COMMUNITY */
-    bgp_check_ext_community, NULL },
-  { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,		/* BA_AS4_PATH */
-    NULL, NULL },
-  { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,	/* BA_AS4_PATH */
-    NULL, NULL },
-  [BA_LARGE_COMMUNITY] =
-  { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1,
-    bgp_check_large_community, NULL }
-};
 
-/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
- * It does not matter as this attribute does not appear on routes in the routing table.
- */
+static void
+bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
+{
+  if (!s->proto->is_internal)
+    UNSET(a);
+}
+
+static void
+bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+  if (!s->proto->is_internal)
+    DISCARD(BAD_EBGP, "ORIGINATOR_ID");
 
-#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
+  if (len != 4)
+    WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
 
-static inline struct adata *
-bgp_alloc_adata(struct linpool *pool, unsigned len)
+  u32 val = get_u32(data);
+  bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
+}
+
+
+static void
+bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
 {
-  struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
-  ad->length = len;
-  return ad;
+  if (!s->proto->is_internal)
+    UNSET(a);
+
+  if (a->u.ptr->length == 0)
+    UNSET(a);
 }
 
 static void
-bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
+bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  ASSERT(ATTR_KNOWN(attr));
-  e->id = EA_CODE(EAP_BGP, attr);
-  e->type = bgp_attr_table[attr].type;
-  e->flags = bgp_attr_table[attr].expected_flags;
-  if (e->type & EAF_EMBEDDED)
-    e->u.data = val;
-  else
-    e->u.ptr = (struct adata *) val;
+  if (!s->proto->is_internal)
+    DISCARD(BAD_EBGP, "CLUSTER_LIST");
+
+  if (!len || (len % 4))
+    WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
+
+  struct adata *ad = lp_alloc_adata(s->pool, len);
+  get_u32s(data, (u32 *) ad->data, len / 4);
+  bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
 }
 
-static byte *
-bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
+static void
+bgp_format_cluster_list(eattr *a, byte *buf, uint size)
 {
-  struct adata *ad = bgp_alloc_adata(pool, len);
-  bgp_set_attr(e, attr, (uintptr_t) ad);
-  return ad->data;
+  /* Truncates cluster lists larger than buflen, probably not a problem */
+  int_set_format(a->u.ptr, 0, -1, buf, size);
 }
 
-void
-bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
+
+static inline u32
+get_af3(byte *buf)
 {
-  ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
-  a->next = *to;
-  *to = a;
-  a->flags = EALF_SORTED;
-  a->count = 1;
-  bgp_set_attr(a->attrs, attr, val);
+  return (get_u16(buf) << 16) | buf[2];
 }
 
-byte *
-bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
+static void
+bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
 {
-  struct adata *ad = bgp_alloc_adata(pool, len);
-  bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
-  return ad->data;
+  /*
+   *	2 B	MP_REACH_NLRI data - Address Family Identifier
+   *	1 B	MP_REACH_NLRI data - Subsequent Address Family Identifier
+   *	1 B	MP_REACH_NLRI data - Length of Next Hop Network Address
+   *	var	MP_REACH_NLRI data - Network Address of Next Hop
+   *	1 B	MP_REACH_NLRI data - Reserved (zero)
+   *	var	MP_REACH_NLRI data - Network Layer Reachability Information
+   */
+
+  if ((len < 5) || (len < (5 + (uint) data[3])))
+    bgp_parse_error(s, 9);
+
+  s->mp_reach_af = get_af3(data);
+  s->mp_next_hop_len = data[3];
+  s->mp_next_hop_data = data + 4;
+  s->mp_reach_len = len - 5 - s->mp_next_hop_len;
+  s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
 }
 
-static int
-bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len)
+
+static void
+bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
 {
-  int wlen;
+  /*
+   *	2 B	MP_UNREACH_NLRI data - Address Family Identifier
+   *	1 B	MP_UNREACH_NLRI data - Subsequent Address Family Identifier
+   *	var	MP_UNREACH_NLRI data - Network Layer Reachability Information
+   */
 
-  DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
+  if (len < 3)
+    bgp_parse_error(s, 9);
+
+  s->mp_unreach_af = get_af3(data);
+  s->mp_unreach_len = len - 3;
+  s->mp_unreach_nlri = data + 3;
+}
 
-  if (len < 256)
-    {
-      *dst++ = flags;
-      *dst++ = code;
-      *dst++ = len;
-      wlen = 3;
-    }
-  else
-    {
-      *dst++ = flags | BAF_EXT_LEN;
-      *dst++ = code;
-      put_u16(dst, len);
-      wlen = 4;
-    }
 
-  return wlen;
+static void
+bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
+{
+  if (a->u.ptr->length == 0)
+    UNSET(a);
+
+  a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
 }
 
 static void
-aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
+bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  byte *src = aggr->data;
-  *new_used = 0;
+  if (!len || (len % 8))
+    WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
 
-  u32 as = get_u32(src);
-  if (as > 0xFFFF) 
-    {
-      as = AS_TRANS;
-      *new_used = 1;
-    }
-  put_u16(dst, as);
+  struct adata *ad = lp_alloc_adata(s->pool, len);
+  get_u32s(data, (u32 *) ad->data, len / 4);
+  bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
+}
 
-  /* Copy IPv4 address */
-  memcpy(dst + 2, src + 4, 4);
+
+static void
+bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+  if (s->as4_session)
+    DISCARD(NEW_BGP, "AS4_AGGREGATOR");
+
+  if (len != 8)
+    DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
+
+  bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
 }
 
 static void
-aggregator_convert_to_new(struct adata *aggr, byte *dst)
+bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
 {
-  byte *src = aggr->data;
+  char err[128];
 
-  u32 as   = get_u16(src);
-  put_u32(dst, as);
+  if (s->as4_session)
+    DISCARD(NEW_BGP, "AS4_PATH");
 
-  /* Copy IPv4 address */
-  memcpy(dst + 4, src + 2, 4);
+  if (len < 6)
+    DISCARD(BAD_LENGTH, "AS4_PATH", len);
+
+  if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
+    DISCARD("Malformed AS4_PATH attribute - %s", err);
+
+  struct adata *a = lp_alloc_adata(s->pool, len);
+  memcpy(a->data, data, len);
+
+  /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
+  if (as_path_contains_confed(a))
+  {
+    REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
+    a = as_path_strip_confed(s->pool, a);
+  }
+
+  bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
+}
+
+static void
+bgp_export_large_community(struct bgp_export_state *s, eattr *a)
+{
+  if (a->u.ptr->length == 0)
+    UNSET(a);
+
+  a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
+}
+
+static void
+bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+  if (!len || (len % 12))
+    WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
+
+  struct adata *ad = lp_alloc_adata(s->pool, len);
+  get_u32s(data, (u32 *) ad->data, len / 4);
+  bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
+}
+
+static void
+bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
+{
+  net_addr *n = s->route->net->n.addr;
+  u32 *labels = (u32 *) a->u.ptr->data;
+  uint lnum = a->u.ptr->length / 4;
+
+  /* Perhaps we should just ignore it? */
+  if (!s->mpls)
+    WITHDRAW("Unexpected MPLS stack");
+
+  /* Empty MPLS stack is not allowed */
+  if (!lnum)
+    WITHDRAW("Malformed MPLS stack - empty");
+
+  /* This is ugly, but we must ensure that labels fit into NLRI field */
+  if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
+    WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
+
+  for (uint i = 0; i < lnum; i++)
+  {
+    if (labels[i] > 0xfffff)
+      WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
+
+    /* TODO: Check for special-purpose label values? */
+  }
 }
 
 static int
-bgp_get_attr_len(eattr *a)
+bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
 {
-  int len;
-  if (ATTR_KNOWN(EA_ID(a->id)))
+  /*
+   * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
+   * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
+   */
+
+  s->mpls_labels = a->u.ptr;
+  return 0;
+}
+
+static void
+bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
+{
+  DISCARD("Discarding received attribute #0");
+}
+
+static void
+bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
+{
+  u32 *labels = (u32 *) a->u.ptr->data;
+  uint lnum = a->u.ptr->length / 4;
+  char *pos = buf;
+
+  for (uint i = 0; i < lnum; i++)
+  {
+    if (size < 20)
     {
-      int code = EA_ID(a->id);
-      struct attr_desc *desc = &bgp_attr_table[code];
-      len = desc->expected_length;
-      if (len < 0)
-	{
-	  ASSERT(!(a->type & EAF_EMBEDDED));
-	  len = a->u.ptr->length;
-	}
+      bsprintf(pos, "...");
+      return;
     }
+
+    uint l = bsprintf(pos, "%d/", labels[i]);
+    ADVANCE(pos, size, l);
+  }
+
+  /* Clear last slash or terminate empty string */
+  pos[lnum ? -1 : 0] = 0;
+}
+
+static inline void
+bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+{
+  bgp_set_attr_data(to, s->pool, code, flags, data, len);
+}
+
+
+/*
+ *	Attribute table
+ */
+
+static const struct bgp_attr_desc bgp_attr_table[] = {
+  [BA_ORIGIN] = {
+    .name = "origin",
+    .type = EAF_TYPE_INT,
+    .flags = BAF_TRANSITIVE,
+    .export = bgp_export_origin,
+    .encode = bgp_encode_u8,
+    .decode = bgp_decode_origin,
+    .format = bgp_format_origin,
+  },
+  [BA_AS_PATH] = {
+    .name = "as_path",
+    .type = EAF_TYPE_AS_PATH,
+    .flags = BAF_TRANSITIVE,
+    .encode = bgp_encode_as_path,
+    .decode = bgp_decode_as_path,
+  },
+  [BA_NEXT_HOP] = {
+    .name = "next_hop",
+    .type = EAF_TYPE_IP_ADDRESS,
+    .flags = BAF_TRANSITIVE,
+    .encode = bgp_encode_next_hop,
+    .decode = bgp_decode_next_hop,
+    .format = bgp_format_next_hop,
+  },
+  [BA_MULTI_EXIT_DISC] = {
+    .name = "med",
+    .type = EAF_TYPE_INT,
+    .flags = BAF_OPTIONAL,
+    .encode = bgp_encode_u32,
+    .decode = bgp_decode_med,
+  },
+  [BA_LOCAL_PREF] = {
+    .name = "local_pref",
+    .type = EAF_TYPE_INT,
+    .flags = BAF_TRANSITIVE,
+    .export = bgp_export_local_pref,
+    .encode = bgp_encode_u32,
+    .decode = bgp_decode_local_pref,
+  },
+  [BA_ATOMIC_AGGR] = {
+    .name = "atomic_aggr",
+    .type = EAF_TYPE_OPAQUE,
+    .flags = BAF_TRANSITIVE,
+    .encode = bgp_encode_raw,
+    .decode = bgp_decode_atomic_aggr,
+  },
+  [BA_AGGREGATOR] = {
+    .name = "aggregator",
+    .type = EAF_TYPE_OPAQUE,
+    .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+    .encode = bgp_encode_aggregator,
+    .decode = bgp_decode_aggregator,
+    .format = bgp_format_aggregator,
+  },
+  [BA_COMMUNITY] = {
+    .name = "community",
+    .type = EAF_TYPE_INT_SET,
+    .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+    .export = bgp_export_community,
+    .encode = bgp_encode_u32s,
+    .decode = bgp_decode_community,
+  },
+  [BA_ORIGINATOR_ID] = {
+    .name = "originator_id",
+    .type = EAF_TYPE_ROUTER_ID,
+    .flags = BAF_OPTIONAL,
+    .export = bgp_export_originator_id,
+    .encode = bgp_encode_u32,
+    .decode = bgp_decode_originator_id,
+  },
+  [BA_CLUSTER_LIST] = {
+    .name = "cluster_list",
+    .type = EAF_TYPE_INT_SET,
+    .flags = BAF_OPTIONAL,
+    .export = bgp_export_cluster_list,
+    .encode = bgp_encode_u32s,
+    .decode = bgp_decode_cluster_list,
+    .format = bgp_format_cluster_list,
+  },
+  [BA_MP_REACH_NLRI] = {
+    .name = "mp_reach_nlri",
+    .type = EAF_TYPE_OPAQUE,
+    .flags = BAF_OPTIONAL,
+    .decode = bgp_decode_mp_reach_nlri,
+  },
+  [BA_MP_UNREACH_NLRI] = {
+    .name = "mp_unreach_nlri",
+    .type = EAF_TYPE_OPAQUE,
+    .flags = BAF_OPTIONAL,
+    .decode = bgp_decode_mp_unreach_nlri,
+  },
+  [BA_EXT_COMMUNITY] = {
+    .name = "ext_community",
+    .type = EAF_TYPE_EC_SET,
+    .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+    .export = bgp_export_ext_community,
+    .encode = bgp_encode_u32s,
+    .decode = bgp_decode_ext_community,
+  },
+  [BA_AS4_PATH] = {
+    .name = "as4_path",
+    .type = EAF_TYPE_AS_PATH,
+    .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+    .encode = bgp_encode_raw,
+    .decode = bgp_decode_as4_path,
+  },
+  [BA_AS4_AGGREGATOR] = {
+    .name = "as4_aggregator",
+    .type = EAF_TYPE_OPAQUE,
+    .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+    .encode = bgp_encode_raw,
+    .decode = bgp_decode_as4_aggregator,
+    .format = bgp_format_aggregator,
+  },
+  [BA_LARGE_COMMUNITY] = {
+    .name = "large_community",
+    .type = EAF_TYPE_LC_SET,
+    .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+    .export = bgp_export_large_community,
+    .encode = bgp_encode_u32s,
+    .decode = bgp_decode_large_community,
+  },
+  [BA_MPLS_LABEL_STACK] = {
+    .name = "mpls_label_stack",
+    .type = EAF_TYPE_INT_SET,
+    .export = bgp_export_mpls_label_stack,
+    .encode = bgp_encode_mpls_label_stack,
+    .decode = bgp_decode_mpls_label_stack,
+    .format = bgp_format_mpls_label_stack,
+  },
+};
+
+static inline int
+bgp_attr_known(uint code)
+{
+  return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
+}
+
+
+/*
+ *	Attribute export
+ */
+
+static inline void
+bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
+{
+  if (EA_PROTO(a->id) != EAP_BGP)
+    return;
+
+  uint code = EA_ID(a->id);
+
+  if (bgp_attr_known(code))
+  {
+    const struct bgp_attr_desc *desc = &bgp_attr_table[code];
+
+    /* The flags might have been zero if the attr was added by filters */
+    a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
+
+    /* Set partial bit if new opt-trans attribute is attached to non-local route */
+    if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
+	(a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
+      a->flags |= BAF_PARTIAL;
+
+    /* Call specific hook */
+    CALL(desc->export, s, a);
+
+    /* Attribute might become undefined in hook */
+    if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
+      return;
+  }
   else
-    {
-      ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
-      len = a->u.ptr->length;
-    }
-  
-  return len;
+  {
+    /* Don't re-export unknown non-transitive attributes */
+    if (!(a->flags & BAF_TRANSITIVE))
+      return;
+
+    a->flags |= BAF_PARTIAL;
+  }
+
+  /* Append updated attribute */
+  to->attrs[to->count++] = *a;
 }
 
-#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
+/**
+ * bgp_export_attrs - export BGP attributes
+ * @s: BGP export state
+ * @attrs: a list of extended attributes
+ *
+ * The bgp_export_attrs() function takes a list of attributes and merges it to
+ * one newly allocated and sorted segment. Attributes are validated and
+ * normalized by type-specific export hooks and attribute flags are updated.
+ * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
+ * empty community sets).
+ *
+ * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
+ */
+static inline ea_list *
+bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
+{
+  /* Merge the attribute list */
+  ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
+  ea_merge(attrs, new);
+  ea_sort(new);
+
+  uint i, count;
+  count = new->count;
+  new->count = 0;
+
+  /* Export each attribute */
+  for (i = 0; i < count; i++)
+    bgp_export_attr(s, &new->attrs[i], new);
+
+  if (s->err_withdraw)
+    return NULL;
+
+  return new;
+}
+
+
+/*
+ *	Attribute encoding
+ */
+
+static inline int
+bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
+{
+  ASSERT(EA_PROTO(a->id) == EAP_BGP);
+
+  uint code = EA_ID(a->id);
+
+  if (bgp_attr_known(code))
+    return bgp_attr_table[code].encode(s, a, buf, size);
+  else
+    return bgp_encode_raw(s, a, buf, size);
+}
 
 /**
  * bgp_encode_attrs - encode BGP attributes
- * @p: BGP instance
- * @w: buffer
+ * @s: BGP write state
  * @attrs: a list of extended attributes
- * @remains: remaining space in the buffer
+ * @buf: buffer
+ * @end: buffer end
  *
  * The bgp_encode_attrs() function takes a list of extended attributes
  * and converts it to its BGP representation (a part of an Update message).
  *
  * Result: Length of the attribute block generated or -1 if not enough space.
  */
-uint
-bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
+int
+bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
 {
-  uint i, code, type, flags;
-  byte *start = w;
-  int len, rv;
+  byte *pos = buf;
+  int i, len;
 
-  for(i=0; i<attrs->count; i++)
-    {
-      eattr *a = &attrs->attrs[i];
-      ASSERT(EA_PROTO(a->id) == EAP_BGP);
-      code = EA_ID(a->id);
-
-#ifdef IPV6
-      /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
-      if (code == BA_NEXT_HOP)
-	continue;
-#endif
-
-      /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
-       * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH 
-       * as optional AS4_PATH attribute.
-       */
-      if ((code == BA_AS_PATH) && (! p->as4_session))
-	{
-	  len = a->u.ptr->length;
-
-	  if (remains < (len + 4))
-	    goto err_no_buffer;
-
-	  /* Using temporary buffer because don't know a length of created attr
-	   * and therefore a length of a header. Perhaps i should better always
-	   * use BAF_EXT_LEN. */
-	  
-	  byte buf[len];
-	  int new_used;
-	  int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
-
-	  DBG("BGP: Encoding old AS_PATH\n");
-	  rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
-	  ADVANCE(w, remains, rv);
-	  memcpy(w, buf, nl);
-	  ADVANCE(w, remains, nl);
-
-	  if (! new_used)
-	    continue;
-
-	  if (remains < (len + 4))
-	    goto err_no_buffer;
-
-	  /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments 
-	   * here but we don't support confederations and such paths we already
-	   * discarded in bgp_check_as_path().
-	   */
-
-	  DBG("BGP: Encoding AS4_PATH\n");
-	  rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
-	  ADVANCE(w, remains, rv);
-	  memcpy(w, a->u.ptr->data, len);
-	  ADVANCE(w, remains, len);
-
-	  continue;
-	}
-
-      /* The same issue with AGGREGATOR attribute */
-      if ((code == BA_AGGREGATOR) && (! p->as4_session))
-	{
-	  int new_used;
-
-	  len = 6;
-	  if (remains < (len + 3))
-	    goto err_no_buffer;
-
-	  rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
-	  ADVANCE(w, remains, rv);
-	  aggregator_convert_to_old(a->u.ptr, w, &new_used);
-	  ADVANCE(w, remains, len);
-
-	  if (! new_used)
-	    continue;
-
-	  len = 8;
-	  if (remains < (len + 3))
-	    goto err_no_buffer;
-
-	  rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
-	  ADVANCE(w, remains, rv);
-	  memcpy(w, a->u.ptr->data, len);
-	  ADVANCE(w, remains, len);
-
-	  continue;
-	}
-
-      /* Standard path continues here ... */
-
-      type = a->type & EAF_TYPE_MASK;
-      flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
-      len = bgp_get_attr_len(a);
-
-      /* Skip empty sets */ 
-      if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0))
-	continue; 
-
-      if (remains < len + 4)
-	goto err_no_buffer;
-
-      rv = bgp_encode_attr_hdr(w, flags, code, len);
-      ADVANCE(w, remains, rv);
-
-      switch (type)
-	{
-	case EAF_TYPE_INT:
-	case EAF_TYPE_ROUTER_ID:
-	  if (len == 4)
-	    put_u32(w, a->u.data);
-	  else
-	    *w = a->u.data;
-	  break;
-	case EAF_TYPE_IP_ADDRESS:
-	  {
-	    ip_addr ip = *(ip_addr *)a->u.ptr->data;
-	    ipa_hton(ip);
-	    memcpy(w, &ip, len);
-	    break;
-	  }
-	case EAF_TYPE_INT_SET:
-	case EAF_TYPE_LC_SET:
-	case EAF_TYPE_EC_SET:
-	  {
-	    u32 *z = int_set_get_data(a->u.ptr);
-	    int i;
-	    for(i=0; i<len; i+=4)
-	      put_u32(w+i, *z++);
-	    break;
-	  }
-	case EAF_TYPE_OPAQUE:
-	case EAF_TYPE_AS_PATH:
-	  memcpy(w, a->u.ptr->data, len);
-	  break;
-	default:
-	  bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
-	}
-      ADVANCE(w, remains, len);
-    }
-  return w - start;
+  for (i = 0; i < attrs->count; i++)
+  {
+    len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
 
- err_no_buffer:
-  return -1;
+    if (len < 0)
+      return -1;
+
+    pos += len;
+  }
+
+  return pos - buf;
 }
 
+
 /*
-static void
-bgp_init_prefix(struct fib_node *N)
+ *	Attribute decoding
+ */
+
+static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
+
+static inline int
+bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
 {
-  struct bgp_prefix *p = (struct bgp_prefix *) N;
-  p->bucket_node.next = NULL;
+  eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
+  int num = p->cf->allow_local_as + 1;
+  return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
 }
-*/
 
-static int
-bgp_compare_u32(const u32 *x, const u32 *y)
+static inline int
+bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
 {
-  return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
+  eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
+  return (e && (e->u.data == p->local_id));
 }
 
-static inline void
-bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt)
+static inline int
+bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
 {
-  memcpy(dest, src, sizeof(u32) * cnt);
-  qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
+  eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
+  return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
 }
 
-static int
-bgp_compare_ec(const u32 *xp, const u32 *yp)
+static inline void
+bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
 {
-  u64 x = ec_get(xp, 0);
-  u64 y = ec_get(yp, 0);
-  return (x < y) ? -1 : (x > y) ? 1 : 0;
+  /* Handle duplicate attributes; RFC 7606 3 (g) */
+  if (BIT32_TEST(s->attrs_seen, code))
+  {
+    if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
+      bgp_parse_error(s, 1);
+    else
+      DISCARD("Discarding duplicate attribute (code %u)", code);
+  }
+  BIT32_SET(s->attrs_seen, code);
+
+  if (bgp_attr_known(code))
+  {
+    const struct bgp_attr_desc *desc = &bgp_attr_table[code];
+
+    /* Handle conflicting flags; RFC 7606 3 (c) */
+    if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
+      WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
+
+    desc->decode(s, code, flags, data, len, to);
+  }
+  else /* Unknown attribute */
+  {
+    if (!(flags & BAF_OPTIONAL))
+      WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
+
+    bgp_decode_unknown(s, code, flags, data, len, to);
+  }
 }
 
-static inline void
-bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal)
+/**
+ * bgp_decode_attrs - check and decode BGP attributes
+ * @s: BGP parse state
+ * @data: start of attribute block
+ * @len: length of attribute block
+ *
+ * This function takes a BGP attribute block (a part of an Update message), checks
+ * its consistency and converts it to a list of BIRD route attributes represented
+ * by an (uncached) &rta.
+ */
+ea_list *
+bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
 {
-  u32 *dst = int_set_get_data(ad);
+  struct bgp_proto *p = s->proto;
+  ea_list *attrs = NULL;
+  uint code, flags, alen;
+  byte *pos = data;
+
+  /* Parse the attributes */
+  while (len)
+  {
+    alen = 0;
 
-  /* Remove non-transitive communities (EC_TBIT active) on external sessions */
-  if (! internal)
+    /* Read attribute type */
+    if (len < 2)
+      goto framing_error;
+    flags = pos[0];
+    code = pos[1];
+    ADVANCE(pos, len, 2);
+
+    /* Read attribute length */
+    if (flags & BAF_EXT_LEN)
     {
-      int len = int_set_get_size(ad);
-      u32 *t = dst;
-      int i;
-
-      for (i=0; i < len; i += 2)
-	{
-	  if (src[i] & EC_TBIT)
-	    continue;
-	  
-	  *t++ = src[i];
-	  *t++ = src[i+1];
-	}
-
-      ad->length = (t - dst) * 4;
+      if (len < 2)
+	goto framing_error;
+      alen = get_u16(pos);
+      ADVANCE(pos, len, 2);
+    }
+    else
+    {
+      if (len < 1)
+	goto framing_error;
+      alen = *pos;
+      ADVANCE(pos, len, 1);
     }
-  else
-    memcpy(dst, src, ad->length);
 
-  qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
-}
+    if (alen > len)
+      goto framing_error;
 
-static int
-bgp_compare_lc(const u32 *x, const u32 *y)
-{
-  if (x[0] != y[0])
-    return (x[0] > y[0]) ? 1 : -1;
-  if (x[1] != y[1])
-    return (x[1] > y[1]) ? 1 : -1;
-  if (x[2] != y[2])
-    return (x[2] > y[2]) ? 1 : -1;
-  return 0;
+    DBG("Attr %02x %02x %u\n", code, flags, alen);
+
+    bgp_decode_attr(s, code, flags, pos, alen, &attrs);
+    ADVANCE(pos, len, alen);
+  }
+
+  if (s->err_withdraw)
+    goto withdraw;
+
+  /* If there is no reachability NLRI, we are finished */
+  if (!s->ip_reach_len && !s->mp_reach_len)
+    return NULL;
+
+
+  /* Handle missing mandatory attributes; RFC 7606 3 (d) */
+  if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
+  { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
+
+  if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
+  { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
+
+  /* When receiving attributes from non-AS4-aware BGP speaker, we have to
+     reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
+  if (!p->as4_session)
+    bgp_process_as4_attrs(&attrs, s->pool);
+
+  /* Reject routes with our ASN in AS_PATH attribute */
+  if (bgp_as_path_loopy(p, attrs, p->local_as))
+    goto withdraw;
+
+  /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
+  if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
+    goto withdraw;
+
+  /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
+  if (p->is_internal && bgp_originator_id_loopy(p, attrs))
+    goto withdraw;
+
+  /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
+  if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
+    goto withdraw;
+
+  /* If there is no local preference, define one */
+  if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
+    bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+
+  return attrs;
+
+
+framing_error:
+  /* RFC 7606 4 - handle attribute framing errors */
+  REPORT("Malformed attribute list - framing error (%u/%u) at %d",
+	 alen, len, (int) (pos - s->attrs));
+
+withdraw:
+  /* RFC 7606 5.2 - handle missing NLRI during errors */
+  if (!s->ip_reach_len && !s->mp_reach_len)
+    bgp_parse_error(s, 1);
+
+  s->err_withdraw = 1;
+  return NULL;
 }
 
-static inline void
-bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt)
+
+/*
+ *	Route bucket hash table
+ */
+
+#define RBH_KEY(b)		b->eattrs, b->hash
+#define RBH_NEXT(b)		b->next
+#define RBH_EQ(a1,h1,a2,h2)	h1 == h2 && ea_same(a1, a2)
+#define RBH_FN(a,h)		h
+
+#define RBH_REHASH		bgp_rbh_rehash
+#define RBH_PARAMS		/8, *2, 2, 2, 8, 20
+
+
+HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
+
+void
+bgp_init_bucket_table(struct bgp_channel *c)
 {
-  memcpy(dest, src, LCOMM_LENGTH * cnt);
-  qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc);
-}
+  HASH_INIT(c->bucket_hash, c->pool, 8);
 
-static void
-bgp_rehash_buckets(struct bgp_proto *p)
-{
-  struct bgp_bucket **old = p->bucket_hash;
-  struct bgp_bucket **new;
-  unsigned oldn = p->hash_size;
-  unsigned i, e, mask;
-  struct bgp_bucket *b;
-
-  p->hash_size = p->hash_limit;
-  DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
-  p->hash_limit *= 4;
-  if (p->hash_limit >= 65536)
-    p->hash_limit = ~0;
-  new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
-  mask = p->hash_size - 1;
-  for (i=0; i<oldn; i++)
-    while (b = old[i])
-      {
-	old[i] = b->hash_next;
-	e = b->hash & mask;
-	b->hash_next = new[e];
-	if (b->hash_next)
-	  b->hash_next->hash_prev = b;
-	b->hash_prev = NULL;
-	new[e] = b;
-      }
-  mb_free(old);
+  init_list(&c->bucket_queue);
+  c->withdraw_bucket = NULL;
 }
 
 static struct bgp_bucket *
-bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
+bgp_get_bucket(struct bgp_channel *c, ea_list *new)
 {
-  struct bgp_bucket *b;
-  unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
-  unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
-  unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned;
-  unsigned i;
+  /* Hash and lookup */
+  u32 hash = ea_hash(new);
+  struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
+
+  if (b)
+    return b;
+
+  uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
+  uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
+  uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
+  uint i;
   byte *dest;
-  unsigned index = hash & (p->hash_size - 1);
 
   /* Gather total size of non-inline attributes */
-  for (i=0; i<new->count; i++)
-    {
-      eattr *a = &new->attrs[i];
-      if (!(a->type & EAF_EMBEDDED))
-	size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
-    }
+  for (i = 0; i < new->count; i++)
+  {
+    eattr *a = &new->attrs[i];
 
-  /* Create the bucket and hash it */
-  b = mb_alloc(p->p.pool, size);
-  b->hash_next = p->bucket_hash[index];
-  if (b->hash_next)
-    b->hash_next->hash_prev = b;
-  p->bucket_hash[index] = b;
-  b->hash_prev = NULL;
-  b->hash = hash;
-  add_tail(&p->bucket_queue, &b->send_node);
+    if (!(a->type & EAF_EMBEDDED))
+      size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
+  }
+
+  /* Create the bucket */
+  b = mb_alloc(c->pool, size);
   init_list(&b->prefixes);
+  b->hash = hash;
+
+  /* Copy list of extended attributes */
   memcpy(b->eattrs, new, ea_size);
-  dest = ((byte *)b->eattrs) + ea_size_aligned;
+  dest = ((byte *) b->eattrs) + ea_size_aligned;
 
   /* Copy values of non-inline attributes */
-  for (i=0; i<new->count; i++)
+  for (i = 0; i < new->count; i++)
+  {
+    eattr *a = &b->eattrs->attrs[i];
+
+    if (!(a->type & EAF_EMBEDDED))
     {
-      eattr *a = &b->eattrs->attrs[i];
-      if (!(a->type & EAF_EMBEDDED))
-	{
-	  struct adata *oa = a->u.ptr;
-	  struct adata *na = (struct adata *) dest;
-	  memcpy(na, oa, sizeof(struct adata) + oa->length);
-	  a->u.ptr = na;
-	  dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
-	}
+      struct adata *oa = a->u.ptr;
+      struct adata *na = (struct adata *) dest;
+      memcpy(na, oa, sizeof(struct adata) + oa->length);
+      a->u.ptr = na;
+      dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
     }
+  }
 
-  /* If needed, rehash */
-  p->hash_count++;
-  if (p->hash_count > p->hash_limit)
-    bgp_rehash_buckets(p);
+  /* Insert the bucket to send queue and bucket hash */
+  add_tail(&c->bucket_queue, &b->send_node);
+  HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
 
   return b;
 }
 
 static struct bgp_bucket *
-bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
+bgp_get_withdraw_bucket(struct bgp_channel *c)
 {
-  ea_list *new;
-  unsigned i, cnt, hash, code;
-  eattr *a, *d;
-  u32 seen = 0;
-  struct bgp_bucket *b;
-
-  /* Merge the attribute list */
-  new = alloca(ea_scan(attrs));
-  ea_merge(attrs, new);
-  ea_sort(new);
+  if (!c->withdraw_bucket)
+  {
+    c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
+    init_list(&c->withdraw_bucket->prefixes);
+  }
 
-  /* Normalize attributes */
-  d = new->attrs;
-  cnt = new->count;
-  new->count = 0;
-  for(i=0; i<cnt; i++)
-    {
-      a = &new->attrs[i];
-      if (EA_PROTO(a->id) != EAP_BGP)
-	continue;
-      code = EA_ID(a->id);
-      if (ATTR_KNOWN(code))
-	{
-	  if (!p->is_internal)
-	    {
-	      if (!bgp_attr_table[code].allow_in_ebgp)
-		continue;
-	      if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref)
-		continue;
-	    }
-	  /* The flags might have been zero if the attr was added by filters */
-	  a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
-	  if (code < 32)
-	    seen |= 1 << code;
-	}
-      else
-	{
-	  /* Don't re-export unknown non-transitive attributes */
-	  if (!(a->flags & BAF_TRANSITIVE))
-	    continue;
-	}
-      *d = *a;
-      if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
-	d->flags |= BAF_PARTIAL;
-      switch (d->type & EAF_TYPE_MASK)
-	{
-	case EAF_TYPE_INT_SET:
-	  {
-	    struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
-	    z->length = d->u.ptr->length;
-	    bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
-	    d->u.ptr = z;
-	    break;
-	  }
-	case EAF_TYPE_EC_SET:
-	  {
-	    struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
-	    z->length = d->u.ptr->length;
-	    bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal);
-	    d->u.ptr = z;
-	    break;
-	  }
-	case EAF_TYPE_LC_SET:
-	  {
-	    struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
-	    z->length = d->u.ptr->length;
-	    bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH);
-	    d->u.ptr = z;
-	    break;
-	  }
-	default: ;
-	}
-      d++;
-      new->count++;
-    }
+  return c->withdraw_bucket;
+}
 
-  /* Hash */
-  hash = ea_hash(new);
-  for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
-    if (b->hash == hash && ea_same(b->eattrs, new))
-      {
-	DBG("Found bucket.\n");
-	return b;
-      }
-
-  /* Ensure that there are all mandatory attributes */
-  for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
-    if (!(seen & (1 << bgp_mandatory_attrs[i])))
-      {
-	log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen);
-	return NULL;
-      }
-
-  /* Check if next hop is valid */
-  a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
-  if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
-    {
-      log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
-      return NULL;
-    }
+void
+bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+{
+  rem_node(&b->send_node);
+  HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
+  mb_free(b);
+}
 
-  /* Create new bucket */
-  DBG("Creating bucket.\n");
-  return bgp_new_bucket(p, new, hash);
+void
+bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+{
+  rem_node(&b->send_node);
+  add_tail(&c->bucket_queue, &b->send_node);
 }
 
 void
-bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
+bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
 {
-  if (buck->hash_next)
-    buck->hash_next->hash_prev = buck->hash_prev;
-  if (buck->hash_prev)
-    buck->hash_prev->hash_next = buck->hash_next;
-  else
-    p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
-  mb_free(buck);
+  struct bgp_proto *p = (void *) c->c.proto;
+  struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
+
+  log(L_ERR "%s: Attribute list too long", p->p.name);
+  while (!EMPTY_LIST(b->prefixes))
+  {
+    struct bgp_prefix *px = HEAD(b->prefixes);
+
+    log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
+    rem_node(&px->buck_node);
+    add_tail(&wb->prefixes, &px->buck_node);
+  }
 }
 
 
-/* Prefix hash table */
+/*
+ *	Prefix hash table
+ */
 
-#define PXH_KEY(n1)		n1->n.prefix, n1->n.pxlen, n1->path_id
-#define PXH_NEXT(n)		n->next
-#define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2
-#define PXH_FN(p,l,i)		ipa_hash32(p) ^ u32_hash((l << 16) ^ i)
+#define PXH_KEY(px)		px->net, px->path_id, px->hash
+#define PXH_NEXT(px)		px->next
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
+#define PXH_FN(n,i,h)		h
 
 #define PXH_REHASH		bgp_pxh_rehash
 #define PXH_PARAMS		/8, *2, 2, 2, 8, 20
@@ -931,308 +1292,282 @@ bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
 
 void
-bgp_init_prefix_table(struct bgp_proto *p, u32 order)
+bgp_init_prefix_table(struct bgp_channel *c)
 {
-  HASH_INIT(p->prefix_hash, p->p.pool, order);
+  HASH_INIT(c->prefix_hash, c->pool, 8);
 
-  p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
+  uint alen = net_addr_length[c->c.net_type];
+  c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
 }
 
 void
-bgp_free_prefix_table(struct bgp_proto *p)
+bgp_free_prefix_table(struct bgp_channel *c)
 {
-  HASH_FREE(p->prefix_hash);
+  HASH_FREE(c->prefix_hash);
 
-  rfree(p->prefix_slab);
-  p->prefix_slab = NULL;
+  rfree(c->prefix_slab);
+  c->prefix_slab = NULL;
 }
 
 static struct bgp_prefix *
-bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
+bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
 {
-  struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id);
+  u32 hash = net_hash(net) ^ u32_hash(path_id);
+  struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
+
+  if (px)
+  {
+    rem_node(&px->buck_node);
+    return px;
+  }
 
-  if (bp)
-    return bp;
+  if (c->prefix_slab)
+    px = sl_alloc(c->prefix_slab);
+  else
+    px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
 
-  bp = sl_alloc(p->prefix_slab);
-  bp->n.prefix = prefix;
-  bp->n.pxlen = pxlen;
-  bp->path_id = path_id;
-  bp->bucket_node.next = NULL;
+  px->buck_node.next = NULL;
+  px->buck_node.prev = NULL;
+  px->hash = hash;
+  px->path_id = path_id;
+  net_copy(px->net, net);
 
-  HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp);
+  HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
 
-  return bp;
+  return px;
 }
 
 void
-bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
+bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
 {
-  HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp);
-  sl_free(p->prefix_slab, bp);
+  rem_node(&px->buck_node);
+  HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
+
+  if (c->prefix_slab)
+    sl_free(c->prefix_slab, px);
+  else
+    mb_free(px);
 }
 
 
-void
-bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
+/*
+ *	BGP protocol glue
+ */
+
+int
+bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED)
 {
+  rte *e = *new;
+  struct proto *SRC = e->attrs->src->proto;
   struct bgp_proto *p = (struct bgp_proto *) P;
-  struct bgp_bucket *buck;
-  struct bgp_prefix *px;
-  rte *key;
-  u32 path_id;
+  struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
 
-  DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
+  /* Reject our routes */
+  if (src == p)
+    return -1;
 
-  if (new)
-    {
-      key = new;
-      buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
-      if (!buck)			/* Inconsistent attribute list */
-	return;
-    }
-  else
-    {
-      key = old;
-      if (!(buck = p->withdraw_bucket))
-	{
-	  buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
-	  init_list(&buck->prefixes);
-	}
-    }
-  path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
-  px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
-  if (px->bucket_node.next)
-    {
-      DBG("\tRemoving old entry.\n");
-      rem_node(&px->bucket_node);
-    }
-  add_tail(&buck->prefixes, &px->bucket_node);
-  bgp_schedule_packet(p->conn, PKT_UPDATE);
-}
+  /* Accept non-BGP routes */
+  if (src == NULL)
+    return 0;
 
-static int
-bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
-{
-  ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
-  rta *rta = e->attrs;
-  byte *z;
+  // XXXX: Check next hop AF
 
-  ea->next = *attrs;
-  *attrs = ea;
-  ea->flags = EALF_SORTED;
-  ea->count = 4;
+  /* IBGP route reflection, RFC 4456 */
+  if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
+  {
+    /* Rejected unless configured as route reflector */
+    if (!p->rr_client && !src->rr_client)
+      return -1;
+
+    /* Generally, this should be handled when path is received, but we check it
+       also here as rr_cluster_id may be undefined or different in src. */
+    if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
+      return -1;
+  }
 
-  bgp_set_attr(ea->attrs, BA_ORIGIN,
-       ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
+  /* Handle well-known communities, RFC 1997 */
+  struct eattr *c;
+  if (p->cf->interpret_communities &&
+      (c = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY))))
+  {
+    struct adata *d = c->u.ptr;
 
-  if (p->is_internal)
-    bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
-  else
-    {
-      z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6);
-      z[0] = AS_PATH_SEQUENCE;
-      z[1] = 1;				/* 1 AS */
-      put_u32(z+2, p->local_as);
-    }
+    /* Do not export anywhere */
+    if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
+      return -1;
 
-  /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
-     eBGP single-hop -> use gw if on the same iface */
-  z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
-  if (p->cf->next_hop_self ||
-      rta->dest != RTD_ROUTER ||
-      ipa_equal(rta->gw, IPA_NONE) ||
-      ipa_is_link_local(rta->gw) ||
-      (!p->is_internal && !p->cf->next_hop_keep &&
-       (!p->neigh || (rta->iface != p->neigh->iface))))
-    set_next_hop(z, p->source_addr);
-  else
-    set_next_hop(z, rta->gw);
+    /* Do not export outside of AS (or member-AS) */
+    if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
+      return -1;
 
-  bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref);
+    /* Do not export outside of AS (or confederation) */
+    if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
+      return -1;
+  }
 
-  return 0;				/* Leave decision to the filters */
+  return 0;
 }
 
 
-static inline int
-bgp_as_path_loopy(struct bgp_proto *p, rta *a)
-{
-  int num = p->cf->allow_local_as + 1;
-  eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-  return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
-}
+static adata null_adata;	/* adata of length 0 */
 
-static inline int
-bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
-{
-  eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
-  return (e && (e->u.data == p->local_id));
-}
-
-static inline int
-bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
+static ea_list *
+bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
 {
-  eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
-  return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
-}
+  struct proto *SRC = e->attrs->src->proto;
+  struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
+  struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
+  ea_list *attrs = attrs0;
+  eattr *a;
+  adata *ad;
 
+  /* ORIGIN attribute - mandatory, attach if missing */
+  if (! bgp_find_attr(attrs0, BA_ORIGIN))
+    bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
 
-static inline void
-bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
-{
-  eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-  bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
-}
+  /* AS_PATH attribute - mandatory */
+  a = bgp_find_attr(attrs0, BA_AS_PATH);
+  ad = a ? a->u.ptr : &null_adata;
 
-static inline void
-bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
-{
-  eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
-  bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid));
-}
+  /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
+  if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
+    ad = as_path_strip_confed(pool, ad);
 
-static int
-bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
-{
-  eattr *a;
+  /* AS_PATH attribute - keep or prepend ASN */
+  if (p->is_internal ||
+      (p->rs_client && src && src->rs_client))
+  {
+    /* IBGP or route server -> just ensure there is one */
+    if (!a)
+      bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
+  }
+  else if (p->is_interior)
+  {
+    /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
+    ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
+    bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+  }
+  else /* Regular EBGP (no RS, no confederation) */
+  {
+    /* Regular EBGP -> prepend ASN as regular sequence */
+    ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
+    bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+
+    /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
+    a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
+    if (a && !(a->type & EAF_FRESH))
+      bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
+  }
 
-  if (!p->is_internal && !p->rs_client)
-    {
-      bgp_path_prepend(e, attrs, pool, p->local_as);
-
-      /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
-       * propagated to other neighboring ASes.
-       * Perhaps it would be better to undefine it.
-       */
-      a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
-      if (a)
-	bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
-    }
+  /* NEXT_HOP attribute - delegated to AF-specific hook */
+  a = bgp_find_attr(attrs0, BA_NEXT_HOP);
+  bgp_update_next_hop(&s, a, &attrs);
 
-  /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
-   * eBGP single-hop -> keep next_hop if on the same iface.
-   * If the next_hop is zero (i.e. link-local), keep only if on the same iface.
-   *
-   * Note that same-iface-check uses iface from route, which is based on gw.
-   */
-  a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
-  if (a && !p->cf->next_hop_self && 
-      (p->cf->next_hop_keep ||
-       (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
-       (p->neigh && (e->attrs->iface == p->neigh->iface))))
-    {
-      /* Leave the original next hop attribute, will check later where does it point */
-    }
-  else
-    {
-      /* Need to create new one */
-      byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
-      set_next_hop(b, p->source_addr);
-    }
+  /* LOCAL_PREF attribute - required for IBGP, attach if missing */
+  if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
+    bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
 
-  if (rr)
-    {
-      /* Handling route reflection, RFC 4456 */
-      struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
+  /* IBGP route reflection, RFC 4456 */
+  if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
+  {
+    /* ORIGINATOR_ID attribute - attach if not already set */
+    if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
+      bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
 
-      a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
-      if (!a)
-	bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
+    /* CLUSTER_LIST attribute - prepend cluster ID */
+    a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
+    ad = a ? a->u.ptr : NULL;
 
-      /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
-      bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
+    /* Prepend src cluster ID */
+    if (src->rr_cluster_id)
+      ad = int_set_prepend(pool, ad, src->rr_cluster_id);
 
-      /* Two RR clients with different cluster ID, hmmm */
-      if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
-	bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
-    }
+    /* Prepend dst cluster ID if src and dst clusters are different */
+    if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
+      ad = int_set_prepend(pool, ad, p->rr_cluster_id);
 
-  return 0;				/* Leave decision to the filters */
-}
+    /* Should be at least one prepended cluster ID */
+    bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
+  }
 
-static int
-bgp_community_filter(struct bgp_proto *p, rte *e)
-{
-  eattr *a;
-  struct adata *d;
+  /* AS4_* transition attributes, RFC 6793 4.2.2 */
+  if (! p->as4_session)
+  {
+    a = bgp_find_attr(attrs, BA_AS_PATH);
+    if (a && as_path_contains_as4(a->u.ptr))
+    {
+      bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
+      bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
+    }
 
-  /* Check if we aren't forbidden to export the route by communities */
-  a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
-  if (a)
+    a = bgp_find_attr(attrs, BA_AGGREGATOR);
+    if (a && aggregator_contains_as4(a->u.ptr))
     {
-      d = a->u.ptr;
-      if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
-	{
-	  DBG("\tNO_ADVERTISE\n");
-	  return 1;
-	}
-      if (!p->is_internal &&
-	  (int_set_contains(d, BGP_COMM_NO_EXPORT) ||
-	   int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
-	{
-	  DBG("\tNO_EXPORT\n");
-	  return 1;
-	}
+      bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
+      bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
     }
+  }
 
-  return 0;
+  /*
+   * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
+   * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
+   * should be checked in AF-specific hooks.
+   */
+
+  /* Apply per-attribute export hooks for validatation and normalization */
+  return bgp_export_attrs(&s, attrs);
 }
 
-int
-bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
+void
+bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs)
 {
-  rte *e = *new;
-  struct bgp_proto *p = (struct bgp_proto *) P;
-  struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
-    (struct bgp_proto *) e->attrs->src->proto : NULL;
+  struct bgp_proto *p = (void *) P;
+  struct bgp_channel *c = (void *) C;
+  struct bgp_bucket *buck;
+  struct bgp_prefix *px;
+  u32 path;
 
-  if (p == new_bgp)			/* Poison reverse updates */
-    return -1;
-  if (new_bgp)
-    {
-      /* We should check here for cluster list loop, because the receiving BGP instance
-	 might have different cluster ID  */
-      if (bgp_cluster_list_loopy(p, e->attrs))
-	return -1;
-
-      if (p->cf->interpret_communities && bgp_community_filter(p, e))
-	return -1;
-
-      if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
-	{
-	  /* Redistribution of internal routes with IBGP */
-	  if (p->rr_client || new_bgp->rr_client)
-	    /* Route reflection, RFC 4456 */
-	    return bgp_update_attrs(p, e, attrs, pool, 1);
-	  else
-	    return -1;
-	}
-      else
-	return bgp_update_attrs(p, e, attrs, pool, 0);
-    }
+  if (new)
+  {
+    attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2);
+
+    /* If attributes are invalid, we fail back to withdraw */
+    buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
+    path = new->attrs->src->global_id;
+
+    lp_flush(bgp_linpool2);
+  }
   else
-    return bgp_create_attrs(p, e, attrs, pool);
+  {
+    buck = bgp_get_withdraw_bucket(c);
+    path = old->attrs->src->global_id;
+  }
+
+  px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
+  add_tail(&buck->prefixes, &px->buck_node);
+
+  bgp_schedule_packet(p->conn, c, PKT_UPDATE);
 }
 
+
 static inline u32
 bgp_get_neighbor(rte *r)
 {
   eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
   u32 as;
 
-  if (e && as_path_get_first(e->u.ptr, &as))
+  if (e && as_path_get_first_regular(e->u.ptr, &as))
     return as;
-  else
-    return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
+
+  /* If AS_PATH is not defined, we treat rte as locally originated */
+  struct bgp_proto *p = (void *) r->attrs->src->proto;
+  return p->cf->confederation ?: p->local_as;
 }
 
 static inline int
 rte_resolvable(rte *rt)
 {
-  int rd = rt->attrs->dest;  
-  return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
+  return rt->attrs->dest == RTD_UNICAST;
 }
 
 int
@@ -1271,16 +1606,16 @@ bgp_rte_better(rte *new, rte *old)
 
   /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
   if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
-    {
-      x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-      y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-      n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
-      o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
-      if (n < o)
-	return 1;
-      if (n > o)
-	return 0;
-    }
+  {
+    x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+    y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+    n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+    o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+    if (n < o)
+      return 1;
+    if (n > o)
+      return 0;
+  }
 
   /* RFC 4271 9.1.2.2. b) Use origins */
   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
@@ -1305,21 +1640,21 @@ bgp_rte_better(rte *new, rte *old)
    */
   if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
       (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
-    {
-      x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
-      y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
-      n = x ? x->u.data : new_bgp->cf->default_med;
-      o = y ? y->u.data : old_bgp->cf->default_med;
-      if (n < o)
-	return 1;
-      if (n > o)
-	return 0;
-    }
+  {
+    x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+    y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+    n = x ? x->u.data : new_bgp->cf->default_med;
+    o = y ? y->u.data : old_bgp->cf->default_med;
+    if (n < o)
+      return 1;
+    if (n > o)
+      return 0;
+  }
 
   /* RFC 4271 9.1.2.2. d) Prefer external peers */
-  if (new_bgp->is_internal > old_bgp->is_internal)
+  if (new_bgp->is_interior > old_bgp->is_interior)
     return 0;
-  if (new_bgp->is_internal < old_bgp->is_internal)
+  if (new_bgp->is_interior < old_bgp->is_interior)
     return 1;
 
   /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
@@ -1331,7 +1666,7 @@ bgp_rte_better(rte *new, rte *old)
     return 0;
 
   /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
-  /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
+  /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
   n = x ? x->u.data : new_bgp->remote_id;
@@ -1390,18 +1725,18 @@ bgp_rte_mergable(rte *pri, rte *sec)
 
   /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
   if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
-    {
-      x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-      y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-      p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
-      s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+  {
+    x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+    y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+    p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+    s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
 
-      if (p != s)
-	return 0;
+    if (p != s)
+      return 0;
 
-//      if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
-//	return 0;
-    }
+//    if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
+//      return 0;
+  }
 
   /* RFC 4271 9.1.2.2. b) Use origins */
   x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
@@ -1414,17 +1749,17 @@ bgp_rte_mergable(rte *pri, rte *sec)
   /* RFC 4271 9.1.2.2. c) Compare MED's */
   if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
       (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
-    {
-      x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
-      y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
-      p = x ? x->u.data : pri_bgp->cf->default_med;
-      s = y ? y->u.data : sec_bgp->cf->default_med;
-      if (p != s)
-	return 0;
-    }
+  {
+    x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+    y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+    p = x ? x->u.data : pri_bgp->cf->default_med;
+    s = y ? y->u.data : sec_bgp->cf->default_med;
+    if (p != s)
+      return 0;
+  }
 
   /* RFC 4271 9.1.2.2. d) Prefer external peers */
-  if (pri_bgp->is_internal != sec_bgp->is_internal)
+  if (pri_bgp->is_interior != sec_bgp->is_interior)
     return 0;
 
   /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
@@ -1439,7 +1774,6 @@ bgp_rte_mergable(rte *pri, rte *sec)
 }
 
 
-
 static inline int
 same_group(rte *r, u32 lpref, u32 lasn)
 {
@@ -1484,7 +1818,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
    * that this fn is not called for them.
    *
    * The idea is simple, the implementation is more problematic,
-   * mostly because of optimizations in rte_recalculate() that 
+   * mostly because of optimizations in rte_recalculate() that
    * avoids full recalculation in most cases.
    *
    * We can assume that at least one of new, old is non-NULL and both
@@ -1496,14 +1830,14 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
   /* If new and old are from different groups, we just process that
      as two independent events */
   if (new && old && !same_group(old, lpref, lasn))
-    {
-      int i1, i2;
-      i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
-      i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
-      return i1 || i2;
-    }
+  {
+    int i1, i2;
+    i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
+    i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
+    return i1 || i2;
+  }
 
-  /* 
+  /*
    * We could find the best-in-group and then make some shortcuts like
    * in rte_recalculate, but as we would have to walk through all
    * net->routes just to find it, it is probably not worth. So we
@@ -1515,35 +1849,35 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
     new->u.bgp.suppressed = 1;
 
   if (old)
+  {
+    old_is_group_best = !old->u.bgp.suppressed;
+    old->u.bgp.suppressed = 1;
+    int new_is_better = new && bgp_rte_better(new, old);
+
+    /* The first case - replace not best with worse (or remove not best) */
+    if (!old_is_group_best && !new_is_better)
+      return 0;
+
+    /* The second case - replace the best with better */
+    if (old_is_group_best && new_is_better)
     {
-      old_is_group_best = !old->u.bgp.suppressed;
-      old->u.bgp.suppressed = 1;
-      int new_is_better = new && bgp_rte_better(new, old);
-
-      /* The first case - replace not best with worse (or remove not best) */
-      if (!old_is_group_best && !new_is_better)
-	return 0;
-
-      /* The second case - replace the best with better */
-      if (old_is_group_best && new_is_better)
-	{
-	  /* new is best-in-group, the see discussion below - this is
-	     a special variant of NBG && OBG. From OBG we can deduce
-	     that same_group(old_best) iff (old == old_best)  */
-	  new->u.bgp.suppressed = 0;
-	  return (old == old_best);
-	}
+      /* new is best-in-group, the see discussion below - this is
+	 a special variant of NBG && OBG. From OBG we can deduce
+	 that same_group(old_best) iff (old == old_best)  */
+      new->u.bgp.suppressed = 0;
+      return (old == old_best);
     }
+  }
 
   /* The default case - find a new best-in-group route */
   r = new; /* new may not be in the list */
   for (s=net->routes; rte_is_valid(s); s=s->next)
     if (use_deterministic_med(s) && same_group(s, lpref, lasn))
-      {
-	s->u.bgp.suppressed = 1;
-	if (!r || bgp_rte_better(s, r))
-	  r = s;
-      }
+    {
+      s->u.bgp.suppressed = 1;
+      if (!r || bgp_rte_better(s, r))
+	r = s;
+    }
 
   /* Simple case - the last route in group disappears */
   if (!r)
@@ -1582,397 +1916,77 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
     return old_is_group_best;
 }
 
-static struct adata *
-bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
-{
-  struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
-  newa->length = 8;
-  aggregator_convert_to_new(old, newa->data);
-  return newa;
-}
-
 
-/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
- * and append path old4 (in 4B format).
+/*
+ * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
  */
-static struct adata *
-bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
-{
-  byte buf[old2->length * 2];
-
-  int ol = as_path_convert_to_new(old2, buf, req_as);
-  int nl = ol + (old4 ? old4->length : 0);
-
-  struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
-  newa->length = nl;
-  memcpy(newa->data, buf, ol);
-  if (old4) memcpy(newa->data + ol, old4->data, old4->length);
-
-  return newa;
-}
-
-static int
-as4_aggregator_valid(struct adata *aggr)
-{
-  return aggr->length == 8;
-}
-
-
-/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
-static void
-bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
-{
-  eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
-  eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
-  eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
-  eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
-  int a4_removed = 0;
-
-  if (a4 && !as4_aggregator_valid(a4->u.ptr))
-    {
-      log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name);
-      a4 = NULL;
-      a4_removed = 1;
-    }
-
-  if (a2)
-    {
-      u32 a2_as = get_u16(a2->u.ptr->data);
-
-      if (a4)
-	{
-	  if (a2_as != AS_TRANS)
-	    {
-	      /* Routes were aggregated by old router and therefore AS4_PATH
-	       * and AS4_AGGREGATOR is invalid
-	       *
-	       * Convert AS_PATH and AGGREGATOR to 4B format and finish.
-	       */
-
-	      a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
-	      p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
-
-	      return;
-	    }
-	  else
-	    {
-	      /* Common case, use AS4_AGGREGATOR attribute */
-	      a2->u.ptr = a4->u.ptr;
-	    }
-	}
-      else
-	{
-	  /* Common case, use old AGGREGATOR attribute */
-	  a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
-
-	  if ((a2_as == AS_TRANS) && !a4_removed)
-	    log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name);
-	}
-    }
-  else
-    if (a4)
-      log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name);
-
-  int p2_len = as_path_getlen_int(p2->u.ptr, 2);
-  int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1;
-
-  if (p4 && (p4_len < 0))
-    log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name);
-
-  if ((p4_len <= 0) || (p2_len < p4_len))
-    p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
-  else
-    p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
-}
-
 static void
-bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
+bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
 {
-  unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
-  unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
-  ea_list **el = &(a->eattrs);
+  eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
+  eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
+  eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
+  eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
 
-  /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
-  while (*el != NULL)
-    {
-      unsigned fid = (*el)->attrs[0].id;
-
-      if ((fid == id1) || (fid == id2))
-	{
-	  *el = (*el)->next;
-	  if (p->as4_session)
-	    log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name);
-	}
-      else
-	el = &((*el)->next);
-    }
-}
+  /* First, unset AS4_* attributes */
+  if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
+  if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
 
-/**
- * bgp_decode_attrs - check and decode BGP attributes
- * @conn: connection
- * @attr: start of attribute block
- * @len: length of attribute block
- * @pool: linear pool to make all the allocations in
- * @mandatory: 1 iff presence of mandatory attributes has to be checked
- *
- * This function takes a BGP attribute block (a part of an Update message), checks
- * its consistency and converts it to a list of BIRD route attributes represented
- * by a &rta.
- */
-struct rta *
-bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory)
-{
-  struct bgp_proto *bgp = conn->bgp;
-  rta *a = lp_alloc(pool, sizeof(struct rta));
-  uint flags, code, l, i, type;
-  int errcode;
-  byte *z, *attr_start;
-  byte seen[256/8];
-  ea_list *ea;
-  struct adata *ad;
-  int withdraw = 0;
-
-  bzero(a, sizeof(rta));
-  a->source = RTS_BGP;
-  a->scope = SCOPE_UNIVERSE;
-  a->cast = RTC_UNICAST;
-  /* a->dest = RTD_ROUTER;  -- set in bgp_set_next_hop() */
-  a->from = bgp->cf->remote_ip;
-
-  /* Parse the attributes */
-  bzero(seen, sizeof(seen));
-  DBG("BGP: Parsing attributes\n");
-  while (len)
-    {
-      if (len < 2)
-	goto malformed;
-      attr_start = attr;
-      flags = *attr++;
-      code = *attr++;
-      len -= 2;
-      if (flags & BAF_EXT_LEN)
-	{
-	  if (len < 2)
-	    goto malformed;
-	  l = get_u16(attr);
-	  attr += 2;
-	  len -= 2;
-	}
-      else
-	{
-	  if (len < 1)
-	    goto malformed;
-	  l = *attr++;
-	  len--;
-	}
-      if (l > len)
-	goto malformed;
-      len -= l;
-      z = attr;
-      attr += l;
-      DBG("Attr %02x %02x %d\n", code, flags, l);
-      if (seen[code/8] & (1 << (code%8)))
-	goto malformed;
-      if (ATTR_KNOWN(code))
-	{
-	  struct attr_desc *desc = &bgp_attr_table[code];
-	  if (desc->expected_length >= 0 && desc->expected_length != (int) l)
-	    { errcode = 5; goto err; }
-	  if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
-	    { errcode = 4; goto err; }
-	  if (!bgp->is_internal)
-	    {
-	      if (!desc->allow_in_ebgp)
-		continue;
-	      if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref)
-		continue;
-	    }
-	  if (desc->validate)
-	    {
-	      errcode = desc->validate(bgp, z, l);
-	      if (errcode > 0)
-		goto err;
-	      if (errcode == IGNORE)
-		continue;
-	      if (errcode <= WITHDRAW)
-		{
-		  log(L_WARN "%s: Attribute %s is malformed, withdrawing update",
-		      bgp->p.name, desc->name);
-		  withdraw = 1;
-		}
-	    }
-	  else if (code == BA_AS_PATH)
-	    {
-	      /* Special case as it might also trim the attribute */
-	      if (validate_as_path(bgp, z, &l) < 0)
-		{ errcode = 11; goto err; }
-	    }
-	  type = desc->type;
-	}
-      else				/* Unknown attribute */
-	{
-	  if (!(flags & BAF_OPTIONAL))
-	    { errcode = 2; goto err; }
-	  type = EAF_TYPE_OPAQUE;
-	}
-      
-      // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
-      // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
-      //   { errcode = 4; goto err; }
-
-      seen[code/8] |= (1 << (code%8));
-      ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
-      ea->next = a->eattrs;
-      a->eattrs = ea;
-      ea->flags = 0;
-      ea->count = 1;
-      ea->attrs[0].id = EA_CODE(EAP_BGP, code);
-      ea->attrs[0].flags = flags;
-      ea->attrs[0].type = type;
-      if (type & EAF_EMBEDDED)
-	ad = NULL;
-      else
-	{
-	  ad = lp_alloc(pool, sizeof(struct adata) + l);
-	  ea->attrs[0].u.ptr = ad;
-	  ad->length = l;
-	  memcpy(ad->data, z, l);
-	}
-      switch (type)
-	{
-	case EAF_TYPE_ROUTER_ID:
-	case EAF_TYPE_INT:
-	  if (l == 1)
-	    ea->attrs[0].u.data = *z;
-	  else
-	    ea->attrs[0].u.data = get_u32(z);
-	  break;
-	case EAF_TYPE_IP_ADDRESS:
-	  ipa_ntoh(*(ip_addr *)ad->data);
-	  break;
-	case EAF_TYPE_INT_SET:
-	case EAF_TYPE_LC_SET:
-	case EAF_TYPE_EC_SET:
-	  {
-	    u32 *z = (u32 *) ad->data;
-	    for(i=0; i<ad->length/4; i++)
-	      z[i] = ntohl(z[i]);
-	    break;
-	  }
-	}
-    }
-
-  if (withdraw)
-    goto withdraw;
-
-#ifdef IPV6
-  /* If we received MP_REACH_NLRI we should check mandatory attributes */
-  if (bgp->mp_reach_len != 0)
-    mandatory = 1;
-#endif
-
-  /* If there is no (reachability) NLRI, we should exit now */
-  if (! mandatory)
-    return a;
-
-  /* Check if all mandatory attributes are present */
-  for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
-    {
-      code = bgp_mandatory_attrs[i];
-      if (!(seen[code/8] & (1 << (code%8))))
-	{
-	  bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
-	  return NULL;
-	}
-    }
-
-  /* When receiving attributes from non-AS4-aware BGP speaker,
-   * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
-   */
-  if (! bgp->as4_session)
-    bgp_reconstruct_4b_atts(bgp, a, pool);
-
-  bgp_remove_as4_attrs(bgp, a);
-
-  /* If the AS path attribute contains our AS, reject the routes */
-  if (bgp_as_path_loopy(bgp, a))
-    goto withdraw;
-
-  /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ 
-  if (bgp_originator_id_loopy(bgp, a) ||
-      bgp_cluster_list_loopy(bgp, a))
-    goto withdraw;
+  /* Handle AGGREGATOR attribute */
+  if (a2 && a4)
+  {
+    u32 a2_asn = get_u32(a2->u.ptr->data);
 
-  /* If there's no local preference, define one */
-  if (!(seen[0] & (1 << BA_LOCAL_PREF)))
-    bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref);
+    /* If routes were aggregated by an old router, then AS4_PATH and
+       AS4_AGGREGATOR are invalid. In that case we give up. */
+    if (a2_asn != AS_TRANS)
+      return;
 
-  return a;
+    /* Use AS4_AGGREGATOR instead of AGGREGATOR */
+    a2->u.ptr = a4->u.ptr;
+  }
 
-withdraw:
-  return NULL;
+  /* Handle AS_PATH attribute */
+  if (p2 && p4)
+  {
+    /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
+    int p2_len = as_path_getlen(p2->u.ptr);
+    int p4_len = as_path_getlen(p4->u.ptr);
 
-malformed:
-  bgp_error(conn, 3, 1, NULL, 0);
-  return NULL;
+    /* AS_PATH is too short, give up */
+    if (p2_len < p4_len)
+      return;
 
-err:
-  bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
-  return NULL;
+    /* Merge AS_PATH and AS4_PATH */
+    as_path_cut(p2->u.ptr, p2_len - p4_len);
+    p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr);
+  }
 }
 
 int
 bgp_get_attr(eattr *a, byte *buf, int buflen)
 {
   uint i = EA_ID(a->id);
-  struct attr_desc *d;
+  const struct bgp_attr_desc *d;
   int len;
 
-  if (ATTR_KNOWN(i))
+  if (bgp_attr_known(i))
+  {
+    d = &bgp_attr_table[i];
+    len = bsprintf(buf, "%s", d->name);
+    buf += len;
+    if (d->format)
     {
-      d = &bgp_attr_table[i];
-      len = bsprintf(buf, "%s", d->name);
-      buf += len;
-      if (d->format)
-	{
-	  *buf++ = ':';
-	  *buf++ = ' ';
-	  d->format(a, buf, buflen - len - 2);
-	  return GA_FULL;
-	}
-      return GA_NAME;
+      *buf++ = ':';
+      *buf++ = ' ';
+      d->format(a, buf, buflen - len - 2);
+      return GA_FULL;
     }
-  bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
-  return GA_NAME;
-}
-
-void
-bgp_init_bucket_table(struct bgp_proto *p)
-{
-  p->hash_size = 256;
-  p->hash_limit = p->hash_size * 4;
-  p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
-  init_list(&p->bucket_queue);
-  p->withdraw_bucket = NULL;
-  // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
-}
-
-void
-bgp_free_bucket_table(struct bgp_proto *p)
-{
-  mb_free(p->bucket_hash);
-  p->bucket_hash = NULL;
-
-  struct bgp_bucket *b;
-  WALK_LIST_FIRST(b, p->bucket_queue)
-  {
-    rem_node(&b->send_node);
-    mb_free(b);
+    return GA_NAME;
   }
 
-  mb_free(p->withdraw_bucket);
-  p->withdraw_bucket = NULL;
+  bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
+  return GA_NAME;
 }
 
 void
@@ -1988,14 +2002,14 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
     buf += bsprintf(buf, "-");
 
   if (e->attrs->hostentry)
-    {
-      if (!rte_resolvable(e))
-	buf += bsprintf(buf, "/-");
-      else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
-	buf += bsprintf(buf, "/?");
-      else
-	buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
-    }
+  {
+    if (!rte_resolvable(e))
+      buf += bsprintf(buf, "/-");
+    else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
+      buf += bsprintf(buf, "/?");
+    else
+      buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
+  }
   buf += bsprintf(buf, ") [");
 
   if (p && as_path_get_last(p->u.ptr, &origas))
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index f706e76e..f4791215 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -2,6 +2,8 @@
  *	BIRD -- The Border Gateway Protocol
  *
  *	(c) 2000 Martin Mares <mj@ucw.cz>
+ *	(c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2008--2016 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  */
@@ -9,48 +11,52 @@
 /**
  * DOC: Border Gateway Protocol
  *
- * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
- * connection and most of the interface with BIRD core, |packets.c| handling
+ * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
+ * the connection and most of the interface with BIRD core, |packets.c| handling
  * both incoming and outgoing BGP packets and |attrs.c| containing functions for
  * manipulation with BGP attribute lists.
  *
- * As opposed to the other existing routing daemons, BIRD has a sophisticated core
- * architecture which is able to keep all the information needed by BGP in the
- * primary routing table, therefore no complex data structures like a central
- * BGP table are needed. This increases memory footprint of a BGP router with
- * many connections, but not too much and, which is more important, it makes
- * BGP much easier to implement.
+ * As opposed to the other existing routing daemons, BIRD has a sophisticated
+ * core architecture which is able to keep all the information needed by BGP in
+ * the primary routing table, therefore no complex data structures like a
+ * central BGP table are needed. This increases memory footprint of a BGP router
+ * with many connections, but not too much and, which is more important, it
+ * makes BGP much easier to implement.
  *
- * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
- * structure to which are attached individual connections represented by &bgp_connection
- * (usually, there exists only one connection, but during BGP session setup, there
- * can be more of them). The connections are handled according to the BGP state machine
- * defined in the RFC with all the timers and all the parameters configurable.
+ * Each instance of BGP (corresponding to a single BGP peer) is described by a
+ * &bgp_proto structure to which are attached individual connections represented
+ * by &bgp_connection (usually, there exists only one connection, but during BGP
+ * session setup, there can be more of them). The connections are handled
+ * according to the BGP state machine defined in the RFC with all the timers and
+ * all the parameters configurable.
  *
- * In incoming direction, we listen on the connection's socket and each time we receive
- * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
- * passes complete packets to bgp_rx_packet() which distributes the packet according
- * to its type.
+ * In incoming direction, we listen on the connection's socket and each time we
+ * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
+ * markers and passes complete packets to bgp_rx_packet() which distributes the
+ * packet according to its type.
  *
- * In outgoing direction, we gather all the routing updates and sort them to buckets
- * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
- * of &rta's and a &fib which helps us to find if we already have another route for
- * the same destination queued for sending, so that we can replace it with the new one
- * immediately instead of sending both updates). There also exists a special bucket holding
- * all the route withdrawals which cannot be queued anywhere else as they don't have any
- * attributes. If we have any packet to send (due to either new routes or the connection
- * tracking code wanting to send a Open, Keepalive or Notification message), we call
- * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
- * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
- * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
- * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
- * type if we have more data of the same type to send.
+ * In outgoing direction, we gather all the routing updates and sort them to
+ * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
+ * fast comparison of &rta's and a &fib which helps us to find if we already
+ * have another route for the same destination queued for sending, so that we
+ * can replace it with the new one immediately instead of sending both
+ * updates). There also exists a special bucket holding all the route
+ * withdrawals which cannot be queued anywhere else as they don't have any
+ * attributes. If we have any packet to send (due to either new routes or the
+ * connection tracking code wanting to send a Open, Keepalive or Notification
+ * message), we call bgp_schedule_packet() which sets the corresponding bit in a
+ * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
+ * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
+ * packet type bits and calls the corresponding bgp_create_xx() functions,
+ * eventually rescheduling the same packet type if we have more data of the same
+ * type to send.
  *
- * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
- * of the attribute blocks and translating them to the language of BIRD's extended attributes
- * and bgp_encode_attrs() which does the converse. Both functions are built around a
- * @bgp_attr_table array describing all important characteristics of all known attributes.
- * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
+ * The processing of attributes consists of two functions: bgp_decode_attrs()
+ * for checking of the attribute blocks and translating them to the language of
+ * BIRD's extended attributes and bgp_encode_attrs() which does the
+ * converse. Both functions are built around a @bgp_attr_table array describing
+ * all important characteristics of all known attributes.  Unknown transitive
+ * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
  *
  * BGP protocol implements graceful restart in both restarting (local restart)
  * and receiving (neighbor restart) roles. The first is handled mostly by the
@@ -61,10 +67,44 @@
  * point of view and therefore maintaining received routes. Routing table
  * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
  * stale routes after reestablishment of BGP session during graceful restart.
- */
+ *
+ * Supported standards:
+ * <itemize>
+ * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
+ * <item> <rfc id="1997"> - BGP Communities Attribute
+ * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
+ * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
+ * <item> <rfc id="2918"> - Route Refresh Capability
+ * <item> <rfc id="3107"> - Carrying Label Information in BGP
+ * <item> <rfc id="4360"> - BGP Extended Communities Attribute
+ * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
+ * <item> <rfc id="4456"> - BGP Route Reflection
+ * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
+ * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
+ * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
+ * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
+ * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
+ * <item> <rfc id="5065"> - AS confederations for BGP
+ * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
+ * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
+ * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
+ * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
+ * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
+ * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
+ * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
+ * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
+ * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
+ * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
+ * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
+ * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
+ * <item> <rfc id="8092"> - BGP Large Communities Attribute
+ * </itemize>
+*/
 
 #undef LOCAL_DEBUG
 
+#include <stdlib.h>
+
 #include "nest/bird.h"
 #include "nest/iface.h"
 #include "nest/protocol.h"
@@ -80,70 +120,150 @@
 
 
 struct linpool *bgp_linpool;		/* Global temporary pool */
-static sock *bgp_listen_sk;		/* Global listening socket */
-static int bgp_counter;			/* Number of protocol instances using the listening socket */
+struct linpool *bgp_linpool2;		/* Global temporary pool for bgp_rt_notify() */
+static list bgp_sockets;		/* Global list of listening sockets */
+
 
-static void bgp_close(struct bgp_proto *p, int apply_md5);
 static void bgp_connect(struct bgp_proto *p);
 static void bgp_active(struct bgp_proto *p);
-static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
 static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
 
+static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
+static void bgp_listen_sock_err(sock *sk UNUSED, int err);
 
 /**
  * bgp_open - open a BGP instance
  * @p: BGP instance
  *
- * This function allocates and configures shared BGP resources.
- * Should be called as the last step during initialization
- * (when lock is acquired and neighbor is ready).
- * When error, state changed to PS_DOWN, -1 is returned and caller
- * should return immediately.
+ * This function allocates and configures shared BGP resources, mainly listening
+ * sockets. Should be called as the last step during initialization (when lock
+ * is acquired and neighbor is ready). When error, caller should change state to
+ * PS_DOWN and return immediately.
  */
 static int
 bgp_open(struct bgp_proto *p)
 {
-  struct config *cfg = p->cf->c.global;
-  int errcode;
+  struct bgp_socket *bs = NULL;
+  struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
+  ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
+    (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
+  uint port = p->cf->local_port;
 
-  if (!bgp_listen_sk)
-    bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
+  /* FIXME: Add some global init? */
+  if (!bgp_linpool)
+    init_list(&bgp_sockets);
+
+  /* We assume that cf->iface is defined iff cf->local_ip is link-local */
 
-  if (!bgp_listen_sk)
+  WALK_LIST(bs, bgp_sockets)
+    if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port))
     {
-      errcode = BEM_NO_SOCKET;
-      goto err;
+      bs->uc++;
+      p->sock = bs;
+      return 0;
     }
 
-  if (!bgp_linpool)
-    bgp_linpool = lp_new(&root_pool, 4080);
+  sock *sk = sk_new(proto_pool);
+  sk->type = SK_TCP_PASSIVE;
+  sk->ttl = 255;
+  sk->saddr = addr;
+  sk->sport = port;
+  sk->flags = 0;
+  sk->tos = IP_PREC_INTERNET_CONTROL;
+  sk->rbsize = BGP_RX_BUFFER_SIZE;
+  sk->tbsize = BGP_TX_BUFFER_SIZE;
+  sk->rx_hook = bgp_incoming_connection;
+  sk->err_hook = bgp_listen_sock_err;
+
+  if (sk_open(sk) < 0)
+    goto err;
 
-  bgp_counter++;
+  bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
+  bs->sk = sk;
+  bs->uc = 1;
+  p->sock = bs;
 
-  if (p->cf->password)
-    if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
-			p->cf->iface, p->cf->password, p->cf->setkey) < 0)
-      {
-	sk_log_error(bgp_listen_sk, p->p.name);
-	bgp_close(p, 0);
-	errcode = BEM_INVALID_MD5;
-	goto err;
-      }
+  add_tail(&bgp_sockets, &bs->n);
+
+  if (!bgp_linpool)
+  {
+    bgp_linpool  = lp_new_default(proto_pool);
+    bgp_linpool2 = lp_new_default(proto_pool);
+  }
 
   return 0;
 
 err:
-  p->p.disabled = 1;
-  bgp_store_error(p, NULL, BE_MISC, errcode);
-  proto_notify_state(&p->p, PS_DOWN);
+  sk_log_error(sk, p->p.name);
+  log(L_ERR "%s: Cannot open listening socket", p->p.name);
+  rfree(sk);
   return -1;
 }
 
+/**
+ * bgp_close - close a BGP instance
+ * @p: BGP instance
+ *
+ * This function frees and deconfigures shared BGP resources.
+ */
+static void
+bgp_close(struct bgp_proto *p)
+{
+  struct bgp_socket *bs = p->sock;
+
+  ASSERT(bs && bs->uc);
+
+  if (--bs->uc)
+    return;
+
+  rfree(bs->sk);
+  rem_node(&bs->n);
+  mb_free(bs);
+
+  if (!EMPTY_LIST(bgp_sockets))
+    return;
+
+  rfree(bgp_linpool);
+  bgp_linpool = NULL;
+
+  rfree(bgp_linpool2);
+  bgp_linpool2 = NULL;
+}
+
+static inline int
+bgp_setup_auth(struct bgp_proto *p, int enable)
+{
+  if (p->cf->password)
+  {
+    int rv = sk_set_md5_auth(p->sock->sk,
+			     p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
+			     enable ? p->cf->password : NULL, p->cf->setkey);
+
+    if (rv < 0)
+      sk_log_error(p->sock->sk, p->p.name);
+
+    return rv;
+  }
+  else
+    return 0;
+}
+
+static inline struct bgp_channel *
+bgp_find_channel(struct bgp_proto *p, u32 afi)
+{
+  struct bgp_channel *c;
+  WALK_LIST(c, p->p.channels)
+    if (c->afi == afi)
+      return c;
+
+  return NULL;
+}
+
 static void
 bgp_startup(struct bgp_proto *p)
 {
   BGP_TRACE(D_EVENTS, "Started");
-  p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
+  p->start_state = BSS_CONNECT;
 
   if (!p->cf->passive)
     bgp_active(p);
@@ -159,70 +279,57 @@ bgp_startup_timeout(timer *t)
 static void
 bgp_initiate(struct bgp_proto *p)
 {
-  int rv = bgp_open(p);
-  if (rv < 0)
-    return;
+  int err_val;
+
+  if (bgp_open(p) < 0)
+  { err_val = BEM_NO_SOCKET; goto err1; }
+
+  if (bgp_setup_auth(p, 1) < 0)
+  { err_val = BEM_INVALID_MD5; goto err2; }
 
   if (p->cf->bfd)
     bgp_update_bfd(p, p->cf->bfd);
 
   if (p->startup_delay)
-    {
-      p->start_state = BSS_DELAY;
-      BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
-      bgp_start_timer(p->startup_timer, p->startup_delay);
-    }
+  {
+    p->start_state = BSS_DELAY;
+    BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
+    bgp_start_timer(p->startup_timer, p->startup_delay);
+  }
   else
     bgp_startup(p);
-}
 
-/**
- * bgp_close - close a BGP instance
- * @p: BGP instance
- * @apply_md5: 0 to disable unsetting MD5 auth
- *
- * This function frees and deconfigures shared BGP resources.
- * @apply_md5 is set to 0 when bgp_close is called as a cleanup
- * from failed bgp_open().
- */
-static void
-bgp_close(struct bgp_proto *p, int apply_md5)
-{
-  ASSERT(bgp_counter);
-  bgp_counter--;
+  return;
 
-  if (p->cf->password && apply_md5)
-    if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
-			p->cf->iface, NULL, p->cf->setkey) < 0)
-      sk_log_error(bgp_listen_sk, p->p.name);
+err2:
+  bgp_close(p);
+err1:
+  p->p.disabled = 1;
+  bgp_store_error(p, NULL, BE_MISC, err_val);
+  proto_notify_state(&p->p, PS_DOWN);
 
-  if (!bgp_counter)
-    {
-      rfree(bgp_listen_sk);
-      bgp_listen_sk = NULL;
-      rfree(bgp_linpool);
-      bgp_linpool = NULL;
-    }
+  return;
 }
 
 /**
  * bgp_start_timer - start a BGP timer
  * @t: timer
- * @value: time to fire (0 to disable the timer)
+ * @value: time (in seconds) to fire (0 to disable the timer)
  *
- * This functions calls tm_start() on @t with time @value and the
- * amount of randomization suggested by the BGP standard. Please use
- * it for all BGP timers.
+ * This functions calls tm_start() on @t with time @value and the amount of
+ * randomization suggested by the BGP standard. Please use it for all BGP
+ * timers.
  */
 void
-bgp_start_timer(timer *t, int value)
+bgp_start_timer(timer *t, uint value)
 {
   if (value)
-    {
-      /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
-      t->randomize = value / 4;
-      tm_start(t, value - t->randomize);
-    }
+  {
+    /* The randomization procedure is specified in RFC 4271 section 10 */
+    btime time = value S;
+    btime randomize = random() % ((time / 4) + 1);
+    tm_start(t, time - randomize);
+  }
   else
     tm_stop(t);
 }
@@ -231,8 +338,8 @@ bgp_start_timer(timer *t, int value)
  * bgp_close_conn - close a BGP connection
  * @conn: connection to close
  *
- * This function takes a connection described by the &bgp_conn structure,
- * closes its socket and frees all resources associated with it.
+ * This function takes a connection described by the &bgp_conn structure, closes
+ * its socket and frees all resources associated with it.
  */
 void
 bgp_close_conn(struct bgp_conn *conn)
@@ -241,16 +348,22 @@ bgp_close_conn(struct bgp_conn *conn)
 
   DBG("BGP: Closing connection\n");
   conn->packets_to_send = 0;
-  rfree(conn->connect_retry_timer);
-  conn->connect_retry_timer = NULL;
+  conn->channels_to_send = 0;
+  rfree(conn->connect_timer);
+  conn->connect_timer = NULL;
   rfree(conn->keepalive_timer);
   conn->keepalive_timer = NULL;
   rfree(conn->hold_timer);
   conn->hold_timer = NULL;
-  rfree(conn->sk);
-  conn->sk = NULL;
   rfree(conn->tx_ev);
   conn->tx_ev = NULL;
+  rfree(conn->sk);
+  conn->sk = NULL;
+
+  mb_free(conn->local_caps);
+  conn->local_caps = NULL;
+  mb_free(conn->remote_caps);
+  conn->remote_caps = NULL;
 }
 
 
@@ -258,9 +371,9 @@ bgp_close_conn(struct bgp_conn *conn)
  * bgp_update_startup_delay - update a startup delay
  * @p: BGP instance
  *
- * This function updates a startup delay that is used to postpone next BGP connect.
- * It also handles disable_after_error and might stop BGP instance when error
- * happened and disable_after_error is on.
+ * This function updates a startup delay that is used to postpone next BGP
+ * connect. It also handles disable_after_error and might stop BGP instance
+ * when error happened and disable_after_error is on.
  *
  * It should be called when BGP protocol error happened.
  */
@@ -271,17 +384,17 @@ bgp_update_startup_delay(struct bgp_proto *p)
 
   DBG("BGP: Updating startup delay\n");
 
-  if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
+  if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
     p->startup_delay = 0;
 
-  p->last_proto_error = now;
+  p->last_proto_error = current_time();
 
   if (cf->disable_after_error)
-    {
-      p->startup_delay = 0;
-      p->p.disabled = 1;
-      return;
-    }
+  {
+    p->startup_delay = 0;
+    p->p.disabled = 1;
+    return;
+  }
 
   if (!p->startup_delay)
     p->startup_delay = cf->error_delay_time_min;
@@ -290,32 +403,38 @@ bgp_update_startup_delay(struct bgp_proto *p)
 }
 
 static void
-bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
+bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode)
 {
   switch (conn->state)
-    {
-    case BS_IDLE:
-    case BS_CLOSE:
-      return;
-    case BS_CONNECT:
-    case BS_ACTIVE:
-      bgp_conn_enter_idle_state(conn);
-      return;
-    case BS_OPENSENT:
-    case BS_OPENCONFIRM:
-    case BS_ESTABLISHED:
-      bgp_error(conn, 6, subcode, NULL, 0);
-      return;
-    default:
-      bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
-    }
+  {
+  case BS_IDLE:
+  case BS_CLOSE:
+    return;
+
+  case BS_CONNECT:
+  case BS_ACTIVE:
+    bgp_conn_enter_idle_state(conn);
+    return;
+
+  case BS_OPENSENT:
+  case BS_OPENCONFIRM:
+  case BS_ESTABLISHED:
+    bgp_error(conn, 6, subcode, NULL, 0);
+    return;
+
+  default:
+    bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
+  }
 }
 
 static void
 bgp_down(struct bgp_proto *p)
 {
   if (p->start_state > BSS_PREPARE)
-    bgp_close(p, 1);
+  {
+    bgp_setup_auth(p, 0);
+    bgp_close(p);
+  }
 
   BGP_TRACE(D_EVENTS, "Down");
   proto_notify_state(&p->p, PS_DOWN);
@@ -327,20 +446,20 @@ bgp_decision(void *vp)
   struct bgp_proto *p = vp;
 
   DBG("BGP: Decision start\n");
-  if ((p->p.proto_state == PS_START)
-      && (p->outgoing_conn.state == BS_IDLE)
-      && (p->incoming_conn.state != BS_OPENCONFIRM)
-      && (!p->cf->passive))
+  if ((p->p.proto_state == PS_START) &&
+      (p->outgoing_conn.state == BS_IDLE) &&
+      (p->incoming_conn.state != BS_OPENCONFIRM) &&
+      !p->cf->passive)
     bgp_active(p);
 
-  if ((p->p.proto_state == PS_STOP)
-      && (p->outgoing_conn.state == BS_IDLE)
-      && (p->incoming_conn.state == BS_IDLE))
+  if ((p->p.proto_state == PS_STOP) &&
+      (p->outgoing_conn.state == BS_IDLE) &&
+      (p->incoming_conn.state == BS_IDLE))
     bgp_down(p);
 }
 
 void
-bgp_stop(struct bgp_proto *p, unsigned subcode)
+bgp_stop(struct bgp_proto *p, uint subcode)
 {
   proto_notify_state(&p->p, PS_STOP);
   bgp_graceful_close_conn(&p->outgoing_conn, subcode);
@@ -349,7 +468,7 @@ bgp_stop(struct bgp_proto *p, unsigned subcode)
 }
 
 static inline void
-bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
+bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
 {
   if (conn->bgp->p.mrtdump & MD_STATES)
     mrt_dump_bgp_state_change(conn, conn->state, new_state);
@@ -364,13 +483,17 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
   bgp_conn_set_state(conn, BS_OPENCONFIRM);
 }
 
+static const struct bgp_af_caps dummy_af_caps = { };
+
 void
 bgp_conn_enter_established_state(struct bgp_conn *conn)
 {
   struct bgp_proto *p = conn->bgp;
+  struct bgp_caps *local = conn->local_caps;
+  struct bgp_caps *peer = conn->remote_caps;
+  struct bgp_channel *c;
 
   BGP_TRACE(D_EVENTS, "BGP session established");
-  DBG("BGP: UP!!!\n");
 
   /* For multi-hop BGP sessions */
   if (ipa_zero(p->source_addr))
@@ -381,30 +504,92 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
   p->conn = conn;
   p->last_error_class = 0;
   p->last_error_code = 0;
-  p->feed_state = BFS_NONE;
-  p->load_state = BFS_NONE;
-  bgp_init_bucket_table(p);
-  bgp_init_prefix_table(p, 8);
 
-  int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
+  p->as4_session = conn->as4_session;
 
-  if (p->p.gr_recovery && !peer_gr_ready)
-    proto_graceful_restart_unlock(&p->p);
+  p->route_refresh = peer->route_refresh;
+  p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
 
-  if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
-    p->p.gr_wait = 1;
+  /* Whether we may handle possible GR of peer (it has some AF GR-able) */
+  p->gr_ready = 0;	/* Updated later */
 
-  if (p->gr_active)
+  /* Whether peer is ready to handle our GR recovery */
+  int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
+
+  if (p->gr_active_num)
     tm_stop(p->gr_timer);
 
-  if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
-    bgp_graceful_restart_done(p);
+  /* Number of active channels */
+  int num = 0;
+
+  WALK_LIST(c, p->p.channels)
+  {
+    const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
+    const struct bgp_af_caps *rem = bgp_find_af_caps(peer,  c->afi);
+
+    /* Ignore AFIs that were not announced in multiprotocol capability */
+    if (!loc || !loc->ready)
+      loc = &dummy_af_caps;
+
+    if (!rem || !rem->ready)
+      rem = &dummy_af_caps;
+
+    int active = loc->ready && rem->ready;
+    c->c.disabled = !active;
+    c->c.reloadable = p->route_refresh;
+
+    c->index = active ? num++ : 0;
 
-  /* GR capability implies that neighbor will send End-of-RIB */
-  if (conn->peer_gr_aware)
-    p->load_state = BFS_LOADING;
+    c->feed_state = BFS_NONE;
+    c->load_state = BFS_NONE;
 
-  /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
+    /* Channels where peer may do GR */
+    c->gr_ready = active && local->gr_aware && rem->gr_able;
+    p->gr_ready = p->gr_ready || c->gr_ready;
+
+    /* Channels not able to recover gracefully */
+    if (p->p.gr_recovery && (!active || !peer_gr_ready))
+      channel_graceful_restart_unlock(&c->c);
+
+    /* Channels waiting for local convergence */
+    if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
+      c->c.gr_wait = 1;
+
+    /* Channels where peer is not able to recover gracefully */
+    if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
+      bgp_graceful_restart_done(c);
+
+    /* GR capability implies that neighbor will send End-of-RIB */
+    if (peer->gr_aware)
+      c->load_state = BFS_LOADING;
+
+    c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
+    c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
+    c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
+
+    /* Update RA mode */
+    if (c->add_path_tx)
+      c->c.ra_mode = RA_ANY;
+    else if (c->cf->secondary)
+      c->c.ra_mode = RA_ACCEPTED;
+    else
+      c->c.ra_mode = RA_OPTIMAL;
+  }
+
+  p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
+  p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
+  p->channel_count = num;
+
+  WALK_LIST(c, p->p.channels)
+  {
+    if (c->c.disabled)
+      continue;
+
+    p->afi_map[c->index] = c->afi;
+    p->channel_map[c->index] = c;
+  }
+
+  /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
 
   bgp_conn_set_state(conn, BS_ESTABLISHED);
   proto_notify_state(&p->p, PS_UP);
@@ -416,8 +601,9 @@ bgp_conn_leave_established_state(struct bgp_proto *p)
   BGP_TRACE(D_EVENTS, "BGP session closed");
   p->conn = NULL;
 
-  bgp_free_prefix_table(p);
-  bgp_free_bucket_table(p);
+  // XXXX free these tables to avoid memory leak during graceful restart
+  // bgp_free_prefix_table(p);
+  // bgp_free_bucket_table(p);
 
   if (p->p.proto_state == PS_UP)
     bgp_stop(p, 0);
@@ -471,34 +657,57 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
   ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
 
   BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
-	    p->gr_active ? " - already pending" : "");
-  proto_notify_state(&p->p, PS_START);
+	    p->gr_active_num ? " - already pending" : "");
 
-  if (p->gr_active)
-    rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+  p->gr_active_num = 0;
+
+  struct bgp_channel *c;
+  WALK_LIST(c, p->p.channels)
+  {
+    if (c->gr_ready)
+    {
+      if (c->gr_active)
+	rt_refresh_end(c->c.table, &c->c);
+
+      c->gr_active = 1;
+      p->gr_active_num++;
+      rt_refresh_begin(c->c.table, &c->c);
+    }
+    else
+    {
+      /* Just flush the routes */
+      rt_refresh_begin(c->c.table, &c->c);
+      rt_refresh_end(c->c.table, &c->c);
+    }
+  }
 
-  p->gr_active = 1;
-  bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
-  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+  proto_notify_state(&p->p, PS_START);
+  bgp_start_timer(p->gr_timer, p->conn->local_caps->gr_time);
 }
 
 /**
  * bgp_graceful_restart_done - finish active BGP graceful restart
- * @p: BGP instance
+ * @c: BGP channel
  *
  * This function is called when the active BGP graceful restart of the neighbor
- * should be finished - either successfully (the neighbor sends all paths and
- * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
- * not support BGP graceful restart on the new session). The function ends
- * routing table refresh cycle and stops BGP restart timer.
+ * should be finished for channel @c - either successfully (the neighbor sends
+ * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
+ * unsuccessfully (the neighbor does not support BGP graceful restart on the new
+ * session). The function ends the routing table refresh cycle.
  */
 void
-bgp_graceful_restart_done(struct bgp_proto *p)
+bgp_graceful_restart_done(struct bgp_channel *c)
 {
-  BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
-  p->gr_active = 0;
-  tm_stop(p->gr_timer);
-  rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+  struct bgp_proto *p = (void *) c->c.proto;
+
+  ASSERT(c->gr_active);
+  c->gr_active = 0;
+  p->gr_active_num--;
+
+  if (!p->gr_active_num)
+    BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
+
+  rt_refresh_end(c->c.table, &c->c);
 }
 
 /**
@@ -522,7 +731,7 @@ bgp_graceful_restart_timeout(timer *t)
 
 /**
  * bgp_refresh_begin - start incoming enhanced route refresh sequence
- * @p: BGP instance
+ * @c: BGP channel
  *
  * This function is called when an incoming enhanced route refresh sequence is
  * started by the neighbor, demarcated by the BoRR packet. The function updates
@@ -531,18 +740,20 @@ bgp_graceful_restart_timeout(timer *t)
  * ensure that these two sequences do not overlap.
  */
 void
-bgp_refresh_begin(struct bgp_proto *p)
+bgp_refresh_begin(struct bgp_channel *c)
 {
-  if (p->load_state == BFS_LOADING)
-    { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
+  struct bgp_proto *p = (void *) c->c.proto;
+
+  if (c->load_state == BFS_LOADING)
+  { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
 
-  p->load_state = BFS_REFRESHING;
-  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+  c->load_state = BFS_REFRESHING;
+  rt_refresh_begin(c->c.table, &c->c);
 }
 
 /**
  * bgp_refresh_end - finish incoming enhanced route refresh sequence
- * @p: BGP instance
+ * @c: BGP channel
  *
  * This function is called when an incoming enhanced route refresh sequence is
  * finished by the neighbor, demarcated by the EoRR packet. The function updates
@@ -550,39 +761,26 @@ bgp_refresh_begin(struct bgp_proto *p)
  * during the sequence are removed by the nest.
  */
 void
-bgp_refresh_end(struct bgp_proto *p)
+bgp_refresh_end(struct bgp_channel *c)
 {
-  if (p->load_state != BFS_REFRESHING)
-    { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
+  struct bgp_proto *p = (void *) c->c.proto;
+
+  if (c->load_state != BFS_REFRESHING)
+  { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
 
-  p->load_state = BFS_NONE;
-  rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+  c->load_state = BFS_NONE;
+  rt_refresh_end(c->c.table, &c->c);
 }
 
 
 static void
 bgp_send_open(struct bgp_conn *conn)
 {
-  conn->start_state = conn->bgp->start_state;
-
-  // Default values, possibly changed by receiving capabilities.
-  conn->advertised_as = 0;
-  conn->peer_refresh_support = 0;
-  conn->peer_as4_support = 0;
-  conn->peer_add_path = 0;
-  conn->peer_enhanced_refresh_support = 0;
-  conn->peer_gr_aware = 0;
-  conn->peer_gr_able = 0;
-  conn->peer_gr_time = 0;
-  conn->peer_gr_flags = 0;
-  conn->peer_gr_aflags = 0;
-  conn->peer_ext_messages_support = 0;
-
   DBG("BGP: Sending open\n");
   conn->sk->rx_hook = bgp_rx;
   conn->sk->tx_hook = bgp_tx;
-  tm_stop(conn->connect_retry_timer);
-  bgp_schedule_packet(conn, PKT_OPEN);
+  tm_stop(conn->connect_timer);
+  bgp_schedule_packet(conn, NULL, PKT_OPEN);
   bgp_conn_set_state(conn, BS_OPENSENT);
   bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
 }
@@ -605,10 +803,10 @@ bgp_connect_timeout(timer *t)
 
   DBG("BGP: connect_timeout\n");
   if (p->p.proto_state == PS_START)
-    {
-      bgp_close_conn(conn);
-      bgp_connect(p);
-    }
+  {
+    bgp_close_conn(conn);
+    bgp_connect(p);
+  }
   else
     bgp_conn_enter_idle_state(conn);
 }
@@ -672,7 +870,7 @@ bgp_keepalive_timeout(timer *t)
   struct bgp_conn *conn = t->data;
 
   DBG("BGP: Keepalive timer\n");
-  bgp_schedule_packet(conn, PKT_KEEPALIVE);
+  bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
 
   /* Kick TX a bit faster */
   if (ev_active(conn->tx_ev))
@@ -682,21 +880,18 @@ bgp_keepalive_timeout(timer *t)
 static void
 bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
 {
-  timer *t;
-
   conn->sk = NULL;
   conn->bgp = p;
+
   conn->packets_to_send = 0;
+  conn->channels_to_send = 0;
+  conn->last_channel = 0;
+  conn->last_channel_count = 0;
+
+  conn->connect_timer	= tm_new_init(p->p.pool, bgp_connect_timeout,	 conn, 0, 0);
+  conn->hold_timer 	= tm_new_init(p->p.pool, bgp_hold_timeout,	 conn, 0, 0);
+  conn->keepalive_timer	= tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
 
-  t = conn->connect_retry_timer = tm_new(p->p.pool);
-  t->hook = bgp_connect_timeout;
-  t->data = conn;
-  t = conn->hold_timer = tm_new(p->p.pool);
-  t->hook = bgp_hold_timeout;
-  t->data = conn;
-  t = conn->keepalive_timer = tm_new(p->p.pool);
-  t->hook = bgp_keepalive_timeout;
-  t->data = conn;
   conn->tx_ev = ev_new(p->p.pool);
   conn->tx_ev->hook = bgp_kick_tx;
   conn->tx_ev->data = conn;
@@ -720,7 +915,7 @@ bgp_active(struct bgp_proto *p)
   BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
   bgp_setup_conn(p, conn);
   bgp_conn_set_state(conn, BS_ACTIVE);
-  bgp_start_timer(conn->connect_retry_timer, delay);
+  bgp_start_timer(conn->connect_timer, delay);
 }
 
 /**
@@ -734,12 +929,11 @@ bgp_active(struct bgp_proto *p)
 static void
 bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
 {
-  sock *s;
   struct bgp_conn *conn = &p->outgoing_conn;
   int hops = p->cf->multihop ? : 1;
 
   DBG("BGP: Connecting\n");
-  s = sk_new(p->p.pool);
+  sock *s = sk_new(p->p.pool);
   s->type = SK_TCP_ACTIVE;
   s->saddr = p->source_addr;
   s->daddr = p->cf->remote_ip;
@@ -766,10 +960,10 @@ bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing c
       goto err;
 
   DBG("BGP: Waiting for connect success\n");
-  bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
+  bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
   return;
 
- err:
+err:
   sk_log_error(s, p->p.name);
   bgp_sock_err(s, 0);
   return;
@@ -783,16 +977,15 @@ bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing c
 static struct bgp_proto *
 bgp_find_proto(sock *sk)
 {
-  struct proto_config *pc;
+  struct bgp_proto *p;
 
-  WALK_LIST(pc, config->protos)
-    if ((pc->protocol == &proto_bgp) && pc->proto)
-      {
-	struct bgp_proto *p = (struct bgp_proto *) pc->proto;
-	if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
-	    (!p->cf->iface || (p->cf->iface == sk->iface)))
-	  return p;
-      }
+  WALK_LIST(p, proto_list)
+    if ((p->p.proto == &proto_bgp) &&
+	ipa_equal(p->cf->remote_ip, sk->daddr) &&
+	(!p->cf->iface  || (p->cf->iface == sk->iface)) &&
+	(ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) &&
+	(p->cf->local_port == sk->sport))
+      return p;
 
   return NULL;
 }
@@ -818,12 +1011,12 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
   DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
   p = bgp_find_proto(sk);
   if (!p)
-    {
-      log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
-	  sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
-      rfree(sk);
-      return 0;
-    }
+  {
+    log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
+	sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
+    rfree(sk);
+    return 0;
+  }
 
   /*
    * BIRD should keep multiple incoming connections in OpenSent state (for
@@ -836,26 +1029,26 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
     (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
 
   if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
-    {
-      bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
-      bgp_handle_graceful_restart(p);
-      bgp_conn_enter_idle_state(p->conn);
-      acc = 1;
-
-      /* There might be separate incoming connection in OpenSent state */
-      if (p->incoming_conn.state > BS_ACTIVE)
-	bgp_close_conn(&p->incoming_conn);
-    }
+  {
+    bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
+    bgp_handle_graceful_restart(p);
+    bgp_conn_enter_idle_state(p->conn);
+    acc = 1;
+
+    /* There might be separate incoming connection in OpenSent state */
+    if (p->incoming_conn.state > BS_ACTIVE)
+      bgp_close_conn(&p->incoming_conn);
+  }
 
   BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
 	    sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
 	    sk->dport, acc ? "accepted" : "rejected");
 
   if (!acc)
-    {
-      rfree(sk);
-      return 0;
-    }
+  {
+    rfree(sk);
+    return 0;
+  }
 
   hops = p->cf->multihop ? : 1;
 
@@ -867,11 +1060,11 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
       goto err;
 
   if (p->cf->enable_extended_messages)
-    {
-      sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
-      sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
-      sk_reallocate(sk);
-    }
+  {
+    sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
+    sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
+    sk_reallocate(sk);
+  }
 
   bgp_setup_conn(p, &p->incoming_conn);
   bgp_setup_sk(&p->incoming_conn, sk);
@@ -894,34 +1087,6 @@ bgp_listen_sock_err(sock *sk UNUSED, int err)
     log(L_ERR "BGP: Error on listening socket: %M", err);
 }
 
-static sock *
-bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
-{
-  sock *s = sk_new(&root_pool);
-  DBG("BGP: Creating listening socket\n");
-  s->type = SK_TCP_PASSIVE;
-  s->ttl = 255;
-  s->saddr = addr;
-  s->sport = port ? port : BGP_PORT;
-  s->flags = flags ? 0 : SKF_V6ONLY;
-  s->tos = IP_PREC_INTERNET_CONTROL;
-  s->rbsize = BGP_RX_BUFFER_SIZE;
-  s->tbsize = BGP_TX_BUFFER_SIZE;
-  s->rx_hook = bgp_incoming_connection;
-  s->err_hook = bgp_listen_sock_err;
-
-  if (sk_open(s) < 0)
-    goto err;
-
-  return s;
-
- err:
-  sk_log_error(s, "BGP");
-  log(L_ERR "BGP: Cannot open listening socket");
-  rfree(s);
-  return NULL;
-}
-
 static void
 bgp_start_neighbor(struct bgp_proto *p)
 {
@@ -930,23 +1095,10 @@ bgp_start_neighbor(struct bgp_proto *p)
   if (ipa_zero(p->source_addr))
     p->source_addr = p->neigh->ifa->ip;
 
-#ifdef IPV6
-  {
-    struct ifa *a;
-    p->local_link = IPA_NONE;
-    WALK_LIST(a, p->neigh->iface->addrs)
-      if (a->scope == SCOPE_LINK)
-        {
-	  p->local_link = a->ip;
-	  break;
-	}
-
-    if (! ipa_nonzero(p->local_link))
-      log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
-
-    DBG("BGP: Selected link-level address %I\n", p->local_link);
-  }
-#endif
+  if (ipa_is_link_local(p->source_addr))
+    p->link_addr = p->source_addr;
+  else if (p->neigh->iface->llv6)
+    p->link_addr = p->neigh->iface->llv6->ip;
 
   bgp_initiate(p);
 }
@@ -966,34 +1118,34 @@ bgp_neigh_notify(neighbor *n)
   int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
 
   if (n->scope <= 0)
+  {
+    if (!prepare)
     {
-      if (!prepare)
-        {
-	  BGP_TRACE(D_EVENTS, "Neighbor lost");
-	  bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
-	  /* Perhaps also run bgp_update_startup_delay(p)? */
-	  bgp_stop(p, 0);
-	}
+      BGP_TRACE(D_EVENTS, "Neighbor lost");
+      bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
+      /* Perhaps also run bgp_update_startup_delay(p)? */
+      bgp_stop(p, 0);
     }
+  }
   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
+  {
+    if (!prepare)
     {
-      if (!prepare)
-        {
-	  BGP_TRACE(D_EVENTS, "Link down");
-	  bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
-	  if (ps == PS_UP)
-	    bgp_update_startup_delay(p);
-	  bgp_stop(p, 0);
-	}
+      BGP_TRACE(D_EVENTS, "Link down");
+      bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
+      if (ps == PS_UP)
+	bgp_update_startup_delay(p);
+      bgp_stop(p, 0);
     }
+  }
   else
+  {
+    if (prepare)
     {
-      if (prepare)
-	{
-	  BGP_TRACE(D_EVENTS, "Neighbor ready");
-	  bgp_start_neighbor(p);
-	}
+      BGP_TRACE(D_EVENTS, "Neighbor ready");
+      bgp_start_neighbor(p);
     }
+  }
 }
 
 static void
@@ -1003,13 +1155,13 @@ bgp_bfd_notify(struct bfd_request *req)
   int ps = p->p.proto_state;
 
   if (req->down && ((ps == PS_START) || (ps == PS_UP)))
-    {
-      BGP_TRACE(D_EVENTS, "BFD session down");
-      bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
-      if (ps == PS_UP)
-	bgp_update_startup_delay(p);
-      bgp_stop(p, 0);
-    }
+  {
+    BGP_TRACE(D_EVENTS, "BFD session down");
+    bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
+    if (ps == PS_UP)
+      bgp_update_startup_delay(p);
+    bgp_stop(p, 0);
+  }
 }
 
 static void
@@ -1021,71 +1173,72 @@ bgp_update_bfd(struct bgp_proto *p, int use_bfd)
 				     bgp_bfd_notify, p);
 
   if (!use_bfd && p->bfd_req)
-    {
-      rfree(p->bfd_req);
-      p->bfd_req = NULL;
-    }
+  {
+    rfree(p->bfd_req);
+    p->bfd_req = NULL;
+  }
 }
 
-static int
-bgp_reload_routes(struct proto *P)
+static void
+bgp_reload_routes(struct channel *C)
 {
-  struct bgp_proto *p = (struct bgp_proto *) P;
-  if (!p->conn || !p->conn->peer_refresh_support)
-    return 0;
+  struct bgp_proto *p = (void *) C->proto;
+  struct bgp_channel *c = (void *) C;
 
-  bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
-  return 1;
+  ASSERT(p->conn && p->route_refresh);
+
+  bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
 }
 
 static void
-bgp_feed_begin(struct proto *P, int initial)
+bgp_feed_begin(struct channel *C, int initial)
 {
-  struct bgp_proto *p = (struct bgp_proto *) P;
+  struct bgp_proto *p = (void *) C->proto;
+  struct bgp_channel *c = (void *) C;
 
   /* This should not happen */
   if (!p->conn)
     return;
 
   if (initial && p->cf->gr_mode)
-    p->feed_state = BFS_LOADING;
+    c->feed_state = BFS_LOADING;
 
   /* It is refeed and both sides support enhanced route refresh */
-  if (!initial && p->cf->enable_refresh &&
-      p->conn->peer_enhanced_refresh_support)
-    {
-      /* BoRR must not be sent before End-of-RIB */
-      if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
-	return;
+  if (!initial && p->enhanced_refresh)
+  {
+    /* BoRR must not be sent before End-of-RIB */
+    if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
+      return;
 
-      p->feed_state = BFS_REFRESHING;
-      bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
-    }
+    c->feed_state = BFS_REFRESHING;
+    bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
+  }
 }
 
 static void
-bgp_feed_end(struct proto *P)
+bgp_feed_end(struct channel *C)
 {
-  struct bgp_proto *p = (struct bgp_proto *) P;
+  struct bgp_proto *p = (void *) C->proto;
+  struct bgp_channel *c = (void *) C;
 
   /* This should not happen */
   if (!p->conn)
     return;
 
   /* Non-demarcated feed ended, nothing to do */
-  if (p->feed_state == BFS_NONE)
+  if (c->feed_state == BFS_NONE)
     return;
 
   /* Schedule End-of-RIB packet */
-  if (p->feed_state == BFS_LOADING)
-    p->feed_state = BFS_LOADED;
+  if (c->feed_state == BFS_LOADING)
+    c->feed_state = BFS_LOADED;
 
   /* Schedule EoRR packet */
-  if (p->feed_state == BFS_REFRESHING)
-    p->feed_state = BFS_REFRESHED;
+  if (c->feed_state == BFS_REFRESHING)
+    c->feed_state = BFS_REFRESHED;
 
   /* Kick TX hook */
-  bgp_schedule_packet(p->conn, PKT_UPDATE);
+  bgp_schedule_packet(p->conn, c, PKT_UPDATE);
 }
 
 
@@ -1096,30 +1249,30 @@ bgp_start_locked(struct object_lock *lock)
   struct bgp_config *cf = p->cf;
 
   if (p->p.proto_state != PS_START)
-    {
-      DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
-      return;
-    }
+  {
+    DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
+    return;
+  }
 
   DBG("BGP: Got lock\n");
 
   if (cf->multihop)
-    {
-      /* Multi-hop sessions do not use neighbor entries */
-      bgp_initiate(p);
-      return;
-    }
+  {
+    /* Multi-hop sessions do not use neighbor entries */
+    bgp_initiate(p);
+    return;
+  }
 
   neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
   if (!n)
-    {
-      log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
-      /* As we do not start yet, we can just disable protocol */
-      p->p.disabled = 1;
-      bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
-      proto_notify_state(&p->p, PS_DOWN);
-      return;
-    }
+  {
+    log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
+    /* As we do not start yet, we can just disable protocol */
+    p->p.disabled = 1;
+    bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
+    proto_notify_state(&p->p, PS_DOWN);
+    return;
+  }
 
   p->neigh = n;
 
@@ -1144,36 +1297,34 @@ bgp_start(struct proto *P)
   p->neigh = NULL;
   p->bfd_req = NULL;
   p->gr_ready = 0;
-  p->gr_active = 0;
-
-  rt_lock_table(p->igp_table);
+  p->gr_active_num = 0;
 
   p->event = ev_new(p->p.pool);
   p->event->hook = bgp_decision;
   p->event->data = p;
 
-  p->startup_timer = tm_new(p->p.pool);
-  p->startup_timer->hook = bgp_startup_timeout;
-  p->startup_timer->data = p;
-
-  p->gr_timer = tm_new(p->p.pool);
-  p->gr_timer->hook = bgp_graceful_restart_timeout;
-  p->gr_timer->data = p;
+  p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
+  p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
 
   p->local_id = proto_get_router_id(P->cf);
   if (p->rr_client)
     p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
 
   p->remote_id = 0;
-  p->source_addr = p->cf->source_addr;
+  p->source_addr = p->cf->local_ip;
+  p->link_addr = IPA_NONE;
 
+  /* XXXX */
   if (p->p.gr_recovery && p->cf->gr_mode)
-    proto_graceful_restart_lock(P);
+  {
+    struct bgp_channel *c;
+    WALK_LIST(c, p->p.channels)
+      channel_graceful_restart_lock(&c->c);
+  }
 
   /*
-   *  Before attempting to create the connection, we need to lock the
-   *  port, so that are sure we're the only instance attempting to talk
-   *  with that neighbor.
+   * Before attempting to create the connection, we need to lock the port,
+   * so that we are the only instance attempting to talk with that neighbor.
    */
 
   lock = p->lock = olock_new(P->pool);
@@ -1194,78 +1345,64 @@ static int
 bgp_shutdown(struct proto *P)
 {
   struct bgp_proto *p = (struct bgp_proto *) P;
-  unsigned subcode = 0;
+  uint subcode = 0;
 
   BGP_TRACE(D_EVENTS, "Shutdown requested");
 
   switch (P->down_code)
-    {
-    case PDC_CF_REMOVE:
-    case PDC_CF_DISABLE:
-      subcode = 3; // Errcode 6, 3 - peer de-configured
-      break;
-
-    case PDC_CF_RESTART:
-      subcode = 6; // Errcode 6, 6 - other configuration change
-      break;
-
-    case PDC_CMD_DISABLE:
-    case PDC_CMD_SHUTDOWN:
-      subcode = 2; // Errcode 6, 2 - administrative shutdown
-      break;
-
-    case PDC_CMD_RESTART:
-      subcode = 4; // Errcode 6, 4 - administrative reset
-      break;
-
-    case PDC_RX_LIMIT_HIT:
-    case PDC_IN_LIMIT_HIT:
-      subcode = 1; // Errcode 6, 1 - max number of prefixes reached
-      /* log message for compatibility */
-      log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
-      goto limit;
-
-    case PDC_OUT_LIMIT_HIT:
-      subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
-
-    limit:
-      bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
-      if (proto_restart)
-	bgp_update_startup_delay(p);
-      else
-	p->startup_delay = 0;
-      goto done;
-    }
+  {
+  case PDC_CF_REMOVE:
+  case PDC_CF_DISABLE:
+    subcode = 3; // Errcode 6, 3 - peer de-configured
+    break;
+
+  case PDC_CF_RESTART:
+    subcode = 6; // Errcode 6, 6 - other configuration change
+    break;
+
+  case PDC_CMD_DISABLE:
+  case PDC_CMD_SHUTDOWN:
+    subcode = 2; // Errcode 6, 2 - administrative shutdown
+    break;
+
+  case PDC_CMD_RESTART:
+    subcode = 4; // Errcode 6, 4 - administrative reset
+    break;
+
+  case PDC_RX_LIMIT_HIT:
+  case PDC_IN_LIMIT_HIT:
+    subcode = 1; // Errcode 6, 1 - max number of prefixes reached
+    /* log message for compatibility */
+    log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
+    goto limit;
+
+  case PDC_OUT_LIMIT_HIT:
+    subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
+
+  limit:
+    bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
+    if (proto_restart)
+      bgp_update_startup_delay(p);
+    else
+      p->startup_delay = 0;
+    goto done;
+  }
 
   bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
   p->startup_delay = 0;
 
- done:
+done:
   bgp_stop(p, subcode);
   return p->p.proto_state;
 }
 
-static void
-bgp_cleanup(struct proto *P)
-{
-  struct bgp_proto *p = (struct bgp_proto *) P;
-  rt_unlock_table(p->igp_table);
-}
-
-static rtable *
-get_igp_table(struct bgp_config *cf)
-{
-  return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
-}
-
 static struct proto *
-bgp_init(struct proto_config *C)
+bgp_init(struct proto_config *CF)
 {
-  struct proto *P = proto_new(C, sizeof(struct bgp_proto));
-  struct bgp_config *c = (struct bgp_config *) C;
+  struct proto *P = proto_new(CF);
   struct bgp_proto *p = (struct bgp_proto *) P;
+  struct bgp_config *cf = (struct bgp_config *) CF;
 
-  P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
   P->rt_notify = bgp_rt_notify;
   P->import_control = bgp_import_control;
   P->neigh_notify = bgp_neigh_notify;
@@ -1274,102 +1411,276 @@ bgp_init(struct proto_config *C)
   P->feed_end = bgp_feed_end;
   P->rte_better = bgp_rte_better;
   P->rte_mergable = bgp_rte_mergable;
-  P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
-
-  p->cf = c;
-  p->local_as = c->local_as;
-  p->remote_as = c->remote_as;
-  p->is_internal = (c->local_as == c->remote_as);
-  p->rs_client = c->rs_client;
-  p->rr_client = c->rr_client;
-  p->igp_table = get_igp_table(c);
+  P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
+
+  p->cf = cf;
+  p->local_as = cf->local_as;
+  p->remote_as = cf->remote_as;
+  p->public_as = cf->local_as;
+  p->is_internal = (cf->local_as == cf->remote_as);
+  p->is_interior = p->is_internal || cf->confederation_member;
+  p->rs_client = cf->rs_client;
+  p->rr_client = cf->rr_client;
+
+  /* Confederation ID is used for truly external peers */
+  if (cf->confederation && !p->is_interior)
+    p->public_as = cf->confederation;
+
+  /* Add all channels */
+  struct bgp_channel_config *cc;
+  WALK_LIST(cc, CF->channels)
+    proto_add_channel(P, &cc->c);
 
   return P;
 }
 
+static void
+bgp_channel_init(struct channel *C, struct channel_config *CF)
+{
+  struct bgp_channel *c = (void *) C;
+  struct bgp_channel_config *cf = (void *) CF;
+
+  c->cf = cf;
+  c->afi = cf->afi;
+  c->desc = cf->desc;
+
+  if (cf->igp_table_ip4)
+    c->igp_table_ip4 = cf->igp_table_ip4->table;
+
+  if (cf->igp_table_ip6)
+    c->igp_table_ip6 = cf->igp_table_ip6->table;
+}
+
+static int
+bgp_channel_start(struct channel *C)
+{
+  struct bgp_proto *p = (void *) C->proto;
+  struct bgp_channel *c = (void *) C;
+  ip_addr src = p->source_addr;
+
+  if (c->igp_table_ip4)
+    rt_lock_table(c->igp_table_ip4);
+
+  if (c->igp_table_ip6)
+    rt_lock_table(c->igp_table_ip6);
+
+  c->pool = p->p.pool; // XXXX
+  bgp_init_bucket_table(c);
+  bgp_init_prefix_table(c);
+
+  c->next_hop_addr = c->cf->next_hop_addr;
+  c->link_addr = IPA_NONE;
+  c->packets_to_send = 0;
+
+  /* Try to use source address as next hop address */
+  if (ipa_zero(c->next_hop_addr))
+  {
+    if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
+      c->next_hop_addr = src;
+
+    if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
+      c->next_hop_addr = src;
+  }
+
+  /* Exit if no feasible next hop address is found */
+  if (ipa_zero(c->next_hop_addr))
+  {
+    log(L_WARN "%s: Missing next hop address", p->p.name);
+    return 0;
+  }
+
+  /* Set link-local address for IPv6 single-hop BGP */
+  if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
+  {
+    c->link_addr = p->link_addr;
+
+    if (ipa_zero(c->link_addr))
+      log(L_WARN "%s: Missing link-local address", p->p.name);
+  }
+
+  /* Link local address is already in c->link_addr */
+  if (ipa_is_link_local(c->next_hop_addr))
+    c->next_hop_addr = IPA_NONE;
+
+  return 0; /* XXXX: Currently undefined */
+}
+
+static void
+bgp_channel_shutdown(struct channel *C)
+{
+  struct bgp_channel *c = (void *) C;
+
+  /* XXXX: cleanup bucket and prefix tables */
+
+  c->next_hop_addr = IPA_NONE;
+  c->link_addr = IPA_NONE;
+}
+
+static void
+bgp_channel_cleanup(struct channel *C)
+{
+  struct bgp_channel *c = (void *) C;
+
+  if (c->igp_table_ip4)
+    rt_unlock_table(c->igp_table_ip4);
+
+  if (c->igp_table_ip6)
+    rt_unlock_table(c->igp_table_ip6);
+}
+
+static inline struct bgp_channel_config *
+bgp_find_channel_config(struct bgp_config *cf, u32 afi)
+{
+  struct bgp_channel_config *cc;
+
+  WALK_LIST(cc, cf->c.channels)
+    if (cc->afi == afi)
+      return cc;
+
+  return NULL;
+}
+
+struct rtable_config *
+bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
+{
+  struct bgp_channel_config *cc2;
+  struct rtable_config *tab;
+
+  /* First, try table connected by the channel */
+  if (cc->c.table->addr_type == type)
+    return cc->c.table;
+
+  /* Find paired channel with the same SAFI but the other AFI */
+  u32 afi2 = cc->afi ^ 0x30000;
+  cc2 = bgp_find_channel_config(cf, afi2);
+
+  /* Second, try IGP table configured in the paired channel */
+  if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
+    return tab;
+
+  /* Third, try table connected by the paired channel */
+  if (cc2 && (cc2->c.table->addr_type == type))
+    return cc2->c.table;
+
+  /* Last, try default table of given type */
+  if (tab = cf->c.global->def_tables[type])
+    return tab;
+
+  cf_error("Undefined IGP table");
+}
+
 
 void
-bgp_check_config(struct bgp_config *c)
+bgp_postconfig(struct proto_config *CF)
 {
-  int internal = (c->local_as == c->remote_as);
+  struct bgp_config *cf = (void *) CF;
+  int internal = (cf->local_as == cf->remote_as);
 
   /* Do not check templates at all */
-  if (c->c.class == SYM_TEMPLATE)
+  if (cf->c.class == SYM_TEMPLATE)
     return;
 
 
   /* EBGP direct by default, IBGP multihop by default */
-  if (c->multihop < 0)
-    c->multihop = internal ? 64 : 0;
-
-  /* Different default for gw_mode */
-  if (!c->gw_mode)
-    c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
+  if (cf->multihop < 0)
+    cf->multihop = internal ? 64 : 0;
 
-  /* Different default based on rs_client */
-  if (!c->missing_lladdr)
-    c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
 
-  /* Disable after error incompatible with restart limit action */
-  if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
-    c->c.in_limit->action = PLA_DISABLE;
-
-
-  if (!c->local_as)
+  if (!cf->local_as)
     cf_error("Local AS number must be set");
 
-  if (ipa_zero(c->remote_ip))
+  if (ipa_zero(cf->remote_ip))
     cf_error("Neighbor must be configured");
 
-  if (!c->remote_as)
+  if (!cf->remote_as)
     cf_error("Remote AS number must be set");
 
-  if (ipa_is_link_local(c->remote_ip) && !c->iface)
+  if (ipa_is_link_local(cf->remote_ip) && !cf->iface)
     cf_error("Link-local neighbor address requires specified interface");
 
-  if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
+  if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
     cf_error("Neighbor AS number out of range (AS4 not available)");
 
-  if (!internal && c->rr_client)
+  if (!internal && cf->rr_client)
     cf_error("Only internal neighbor can be RR client");
 
-  if (internal && c->rs_client)
+  if (internal && cf->rs_client)
     cf_error("Only external neighbor can be RS client");
 
-  if (c->multihop && (c->gw_mode == GW_DIRECT))
-    cf_error("Multihop BGP cannot use direct gateway mode");
+  if (!cf->confederation && cf->confederation_member)
+    cf_error("Confederation ID must be set for member sessions");
 
-  if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
-		      ipa_is_link_local(c->source_addr)))
+  if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
+		       ipa_is_link_local(cf->remote_ip)))
     cf_error("Multihop BGP cannot be used with link-local addresses");
 
-  if (c->multihop && c->iface)
+  if (cf->multihop && cf->iface)
     cf_error("Multihop BGP cannot be bound to interface");
 
-  if (c->multihop && c->check_link)
+  if (cf->multihop && cf->check_link)
     cf_error("Multihop BGP cannot depend on link state");
 
-  if (c->multihop && c->bfd && ipa_zero(c->source_addr))
-    cf_error("Multihop BGP with BFD requires specified source address");
+  if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
+    cf_error("Multihop BGP with BFD requires specified local address");
 
-  if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
-    cf_error("BGP in recursive mode prohibits sorted table");
 
-  if (c->deterministic_med && c->c.table->sorted)
-    cf_error("BGP with deterministic MED prohibits sorted table");
+  struct bgp_channel_config *cc;
+  WALK_LIST(cc, CF->channels)
+  {
+    /* Disable after error incompatible with restart limit action */
+    if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
+      cc->c.in_limit.action = PLA_DISABLE;
 
-  if (c->secondary && !c->c.table->sorted)
-    cf_error("BGP with secondary option requires sorted table");
+    /* Different default based on rs_client */
+    if (!cc->missing_lladdr)
+      cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
+
+    /* Different default for gw_mode */
+    if (!cc->gw_mode)
+      cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
+
+    /* Default based on proto config */
+    if (cc->gr_able == 0xff)
+      cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
+
+    /* Default values of IGP tables */
+    if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
+    {
+      if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
+	cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
+
+      if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
+	cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
+
+      if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
+	cf_error("Mismatched IGP table type");
+
+      if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
+	cf_error("Mismatched IGP table type");
+    }
+
+    if (cf->multihop && (cc->gw_mode == GW_DIRECT))
+      cf_error("Multihop BGP cannot use direct gateway mode");
+
+    if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
+      cf_error("BGP in recursive mode prohibits sorted table");
+
+    if (cf->deterministic_med && cc->c.table->sorted)
+      cf_error("BGP with deterministic MED prohibits sorted table");
+
+    if (cc->secondary && !cc->c.table->sorted)
+      cf_error("BGP with secondary option requires sorted table");
+  }
 }
 
 static int
-bgp_reconfigure(struct proto *P, struct proto_config *C)
+bgp_reconfigure(struct proto *P, struct proto_config *CF)
 {
-  struct bgp_config *new = (struct bgp_config *) C;
-  struct bgp_proto *p = (struct bgp_proto *) P;
+  struct bgp_proto *p = (void *) P;
+  struct bgp_config *new = (void *) CF;
   struct bgp_config *old = p->cf;
 
-  if (proto_get_router_id(C) != p->local_id)
+  if (proto_get_router_id(CF) != p->local_id)
     return 0;
 
   int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
@@ -1377,8 +1688,26 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
 		     // password item is last and must be checked separately
 		     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
     && ((!old->password && !new->password)
-	|| (old->password && new->password && !strcmp(old->password, new->password)))
-    && (get_igp_table(old) == get_igp_table(new));
+	|| (old->password && new->password && !strcmp(old->password, new->password)));
+
+  /* FIXME: Move channel reconfiguration to generic protocol code ? */
+  struct channel *C, *C2;
+  struct bgp_channel_config *cc;
+
+  WALK_LIST(C, p->p.channels)
+    C->stale = 1;
+
+  WALK_LIST(cc, new->c.channels)
+  {
+    C = (struct channel *) bgp_find_channel(p, cc->afi);
+    same = proto_configure_channel(P, &C, &cc->c) && same;
+    C->stale = 0;
+  }
+
+  WALK_LIST_DELSAFE(C, C2, p->p.channels)
+    if (C->stale)
+      same = proto_configure_channel(P, &C, NULL) && same;
+
 
   if (same && (p->start_state > BSS_PREPARE))
     bgp_update_bfd(p, new->bfd);
@@ -1390,11 +1719,34 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
   return same;
 }
 
+#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+
+static int
+bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
+{
+  struct bgp_channel *c = (void *) C;
+  struct bgp_channel_config *new = (void *) CC;
+  struct bgp_channel_config *old = c->cf;
+
+  if (memcmp(((byte *) old) + sizeof(struct channel_config),
+	     ((byte *) new) + sizeof(struct channel_config),
+	     /* Remaining items must be checked separately */
+	     OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
+    return 0;
+
+  /* Check change in IGP tables */
+  if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) ||
+      (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6)))
+    return 0;
+
+  c->cf = new;
+  return 1;
+}
+
 static void
-bgp_copy_config(struct proto_config *dest, struct proto_config *src)
+bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
 {
   /* Just a shallow copy */
-  proto_copy_rest(dest, src, sizeof(struct bgp_config));
 }
 
 
@@ -1411,14 +1763,14 @@ bgp_copy_config(struct proto_config *dest, struct proto_config *src)
  * closes the connection.
  */
 void
-bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
+bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
 {
   struct bgp_proto *p = c->bgp;
 
   if (c->state == BS_CLOSE)
     return;
 
-  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
+  bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
   bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
   bgp_conn_enter_close_state(c);
 
@@ -1426,13 +1778,13 @@ bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int l
   c->notify_subcode = subcode;
   c->notify_data = data;
   c->notify_size = (len > 0) ? len : 0;
-  bgp_schedule_packet(c, PKT_NOTIFICATION);
+  bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
 
   if (code != 6)
-    {
-      bgp_update_startup_delay(p);
-      bgp_stop(p, 0);
-    }
+  {
+    bgp_update_startup_delay(p);
+    bgp_stop(p, 0);
+  }
 }
 
 /**
@@ -1471,19 +1823,19 @@ static const char *
 bgp_last_errmsg(struct bgp_proto *p)
 {
   switch (p->last_error_class)
-    {
-    case BE_MISC:
-      return bgp_misc_errors[p->last_error_code];
-    case BE_SOCKET:
-      return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
-    case BE_BGP_RX:
-    case BE_BGP_TX:
-      return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
-    case BE_AUTO_DOWN:
-      return bgp_auto_errors[p->last_error_code];
-    default:
-      return "";
-    }
+  {
+  case BE_MISC:
+    return bgp_misc_errors[p->last_error_code];
+  case BE_SOCKET:
+    return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
+  case BE_BGP_RX:
+  case BE_BGP_TX:
+    return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
+  case BE_AUTO_DOWN:
+    return bgp_auto_errors[p->last_error_code];
+  default:
+    return "";
+  }
 }
 
 static const char *
@@ -1514,48 +1866,165 @@ bgp_get_status(struct proto *P, byte *buf)
 }
 
 static void
+bgp_show_afis(int code, char *s, u32 *afis, uint count)
+{
+  buffer b;
+  LOG_BUFFER_INIT(b);
+
+  buffer_puts(&b, s);
+
+  for (u32 *af = afis; af < (afis + count); af++)
+  {
+    const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
+    if (desc)
+      buffer_print(&b, " %s", desc->name);
+    else
+      buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
+  }
+
+  if (b.pos == b.end)
+    strcpy(b.end - 32, " ... <too long>");
+
+  cli_msg(code, b.start);
+}
+
+static void
+bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
+{
+  struct bgp_af_caps *ac;
+  uint any_mp_bgp = 0;
+  uint any_gr_able = 0;
+  uint any_add_path = 0;
+  uint any_ext_next_hop = 0;
+  u32 *afl1 = alloca(caps->af_count * sizeof(u32));
+  u32 *afl2 = alloca(caps->af_count * sizeof(u32));
+  uint afn1, afn2;
+
+  WALK_AF_CAPS(caps, ac)
+  {
+    any_mp_bgp |= ac->ready;
+    any_gr_able |= ac->gr_able;
+    any_add_path |= ac->add_path;
+    any_ext_next_hop |= ac->ext_next_hop;
+  }
+
+  if (any_mp_bgp)
+  {
+    cli_msg(-1006, "      Multiprotocol");
+
+    afn1 = 0;
+    WALK_AF_CAPS(caps, ac)
+      if (ac->ready)
+	afl1[afn1++] = ac->afi;
+
+    bgp_show_afis(-1006, "        AF announced:", afl1, afn1);
+  }
+
+  if (caps->route_refresh)
+    cli_msg(-1006, "      Route refresh");
+
+  if (any_ext_next_hop)
+  {
+    cli_msg(-1006, "      Extended next hop");
+
+    afn1 = 0;
+    WALK_AF_CAPS(caps, ac)
+      if (ac->ext_next_hop)
+	afl1[afn1++] = ac->afi;
+
+    bgp_show_afis(-1006, "        IPv6 nexthop:", afl1, afn1);
+  }
+
+  if (caps->ext_messages)
+    cli_msg(-1006, "      Extended message");
+
+  if (caps->gr_aware)
+    cli_msg(-1006, "      Graceful restart");
+
+  if (any_gr_able)
+  {
+    /* Continues from gr_aware */
+    cli_msg(-1006, "        Restart time: %u", caps->gr_time);
+    if (caps->gr_flags & BGP_GRF_RESTART)
+      cli_msg(-1006, "        Restart recovery");
+
+    afn1 = afn2 = 0;
+    WALK_AF_CAPS(caps, ac)
+    {
+      if (ac->gr_able)
+	afl1[afn1++] = ac->afi;
+
+      if (ac->gr_af_flags & BGP_GRF_FORWARDING)
+	afl2[afn2++] = ac->afi;
+    }
+
+    bgp_show_afis(-1006, "        AF supported:", afl1, afn1);
+    bgp_show_afis(-1006, "        AF preserved:", afl2, afn2);
+  }
+
+  if (caps->as4_support)
+    cli_msg(-1006, "      4-octet AS numbers");
+
+  if (any_add_path)
+  {
+    cli_msg(-1006, "      ADD-PATH");
+
+    afn1 = afn2 = 0;
+    WALK_AF_CAPS(caps, ac)
+    {
+      if (ac->add_path & BGP_ADD_PATH_RX)
+	afl1[afn1++] = ac->afi;
+
+      if (ac->add_path & BGP_ADD_PATH_TX)
+	afl2[afn2++] = ac->afi;
+    }
+
+    bgp_show_afis(-1006, "        RX:", afl1, afn1);
+    bgp_show_afis(-1006, "        TX:", afl2, afn2);
+  }
+
+  if (caps->enhanced_refresh)
+    cli_msg(-1006, "      Enhanced refresh");
+}
+
+static void
 bgp_show_proto_info(struct proto *P)
 {
   struct bgp_proto *p = (struct bgp_proto *) P;
-  struct bgp_conn *c = p->conn;
-
-  proto_show_basic_info(P);
 
   cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
   cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
   cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
 
-  if (p->gr_active)
+  if (p->gr_active_num)
     cli_msg(-1006, "    Neighbor graceful restart active");
 
   if (P->proto_state == PS_START)
-    {
-      struct bgp_conn *oc = &p->outgoing_conn;
+  {
+    struct bgp_conn *oc = &p->outgoing_conn;
 
-      if ((p->start_state < BSS_CONNECT) &&
-	  (p->startup_timer->expires))
-	cli_msg(-1006, "    Error wait:       %d/%d",
-		p->startup_timer->expires - now, p->startup_delay);
+    if ((p->start_state < BSS_CONNECT) &&
+	(tm_active(p->startup_timer)))
+      cli_msg(-1006, "    Error wait:       %t/%u",
+	      tm_remains(p->startup_timer), p->startup_delay);
 
-      if ((oc->state == BS_ACTIVE) &&
-	  (oc->connect_retry_timer->expires))
-	cli_msg(-1006, "    Connect delay:    %d/%d",
-		oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
+    if ((oc->state == BS_ACTIVE) &&
+	(tm_active(oc->connect_timer)))
+      cli_msg(-1006, "    Connect delay:    %t/%u",
+	      tm_remains(oc->connect_timer), p->cf->connect_delay_time);
 
-      if (p->gr_active && p->gr_timer->expires)
-	cli_msg(-1006, "    Restart timer:    %d/-", p->gr_timer->expires - now);
-    }
+    if (p->gr_active_num && tm_active(p->gr_timer))
+      cli_msg(-1006, "    Restart timer:    %t/-",
+	      tm_remains(p->gr_timer));
+  }
   else if (P->proto_state == PS_UP)
-    {
-      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
-      cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s%s",
-	      c->peer_refresh_support ? " refresh" : "",
-	      c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
-	      c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
-	      c->peer_as4_support ? " AS4" : "",
-	      (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
-	      (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
-	      c->peer_ext_messages_support ? " ext-messages" : "");
+  {
+    cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
+    cli_msg(-1006, "    Local capabilities");
+    bgp_show_capabilities(p, p->conn->local_caps);
+    cli_msg(-1006, "    Neighbor capabilities");
+    bgp_show_capabilities(p, p->conn->remote_caps);
+/* XXXX
       cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
 	      p->is_internal ? "internal" : "external",
 	      p->cf->multihop ? " multihop" : "",
@@ -1565,35 +2034,60 @@ bgp_show_proto_info(struct proto *P)
 	      p->add_path_rx ? " add-path-rx" : "",
 	      p->add_path_tx ? " add-path-tx" : "",
 	      p->ext_messages ? " ext-messages" : "");
-      cli_msg(-1006, "    Source address:   %I", p->source_addr);
-      if (P->cf->in_limit)
-	cli_msg(-1006, "    Route limit:      %d/%d",
-		p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
-      cli_msg(-1006, "    Hold timer:       %d/%d",
-	      tm_remains(c->hold_timer), c->hold_time);
-      cli_msg(-1006, "    Keepalive timer:  %d/%d",
-	      tm_remains(c->keepalive_timer), c->keepalive_time);
-    }
+*/
+    cli_msg(-1006, "    Source address:   %I", p->source_addr);
+    cli_msg(-1006, "    Hold timer:       %t/%u",
+	    tm_remains(p->conn->hold_timer), p->conn->hold_time);
+    cli_msg(-1006, "    Keepalive timer:  %t/%u",
+	    tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
+  }
 
   if ((p->last_error_class != BE_NONE) &&
       (p->last_error_class != BE_MAN_DOWN))
+  {
+    const char *err1 = bgp_err_classes[p->last_error_class];
+    const char *err2 = bgp_last_errmsg(p);
+    cli_msg(-1006, "    Last error:       %s%s", err1, err2);
+  }
+
+  {
+    /* XXXX ?? */
+    struct bgp_channel *c;
+    WALK_LIST(c, p->p.channels)
     {
-      const char *err1 = bgp_err_classes[p->last_error_class];
-      const char *err2 = bgp_last_errmsg(p);
-      cli_msg(-1006, "    Last error:       %s%s", err1, err2);
+      channel_show_info(&c->c);
+
+      if (c->igp_table_ip4)
+	cli_msg(-1006, "    IGP IPv4 table: %s", c->igp_table_ip4->name);
+
+      if (c->igp_table_ip6)
+	cli_msg(-1006, "    IGP IPv6 table: %s", c->igp_table_ip6->name);
     }
+  }
 }
 
+struct channel_class channel_bgp = {
+  .channel_size =	sizeof(struct bgp_channel),
+  .config_size =	sizeof(struct bgp_channel_config),
+  .init =		bgp_channel_init,
+  .start =		bgp_channel_start,
+  .shutdown =		bgp_channel_shutdown,
+  .cleanup =		bgp_channel_cleanup,
+  .reconfigure =	bgp_channel_reconfigure,
+};
+
 struct protocol proto_bgp = {
   .name = 		"BGP",
   .template = 		"bgp%d",
   .attr_class = 	EAP_BGP,
   .preference = 	DEF_PREF_BGP,
+  .channel_mask =	NB_IP | NB_VPN | NB_FLOW,
+  .proto_size =		sizeof(struct bgp_proto),
   .config_size =	sizeof(struct bgp_config),
+  .postconfig =		bgp_postconfig,
   .init = 		bgp_init,
   .start = 		bgp_start,
   .shutdown = 		bgp_shutdown,
-  .cleanup = 		bgp_cleanup,
   .reconfigure = 	bgp_reconfigure,
   .copy_config = 	bgp_copy_config,
   .get_status = 	bgp_get_status,
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index e47a0eb1..3d940c22 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -2,6 +2,8 @@
  *	BIRD -- The Border Gateway Protocol
  *
  *	(c) 2000 Martin Mares <mj@ucw.cz>
+ *	(c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2008--2016 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  */
@@ -10,26 +12,80 @@
 #define _BIRD_BGP_H_
 
 #include <stdint.h>
+#include <setjmp.h>
+#include "nest/bird.h"
 #include "nest/route.h"
 #include "nest/bfd.h"
+//#include "lib/lists.h"
 #include "lib/hash.h"
+#include "lib/socket.h"
 
 struct linpool;
 struct eattr;
 
+
+/* Address families */
+
+#define BGP_AFI_IPV4		1
+#define BGP_AFI_IPV6		2
+
+#define BGP_SAFI_UNICAST	1
+#define BGP_SAFI_MULTICAST	2
+#define BGP_SAFI_MPLS		4
+#define BGP_SAFI_MPLS_VPN	128
+#define BGP_SAFI_VPN_MULTICAST	129
+#define BGP_SAFI_FLOW		133
+
+/* Internal AF codes */
+
+#define BGP_AF(A, B)		(((u32)(A) << 16) | (u32)(B))
+#define BGP_AFI(A)		((u32)(A) >> 16)
+#define BGP_SAFI(A)		((u32)(A) & 0xFFFF)
+
+#define BGP_AF_IPV4		BGP_AF( BGP_AFI_IPV4, BGP_SAFI_UNICAST )
+#define BGP_AF_IPV6		BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST )
+#define BGP_AF_IPV4_MC		BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST )
+#define BGP_AF_IPV6_MC		BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST )
+#define BGP_AF_IPV4_MPLS	BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS )
+#define BGP_AF_IPV6_MPLS	BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS )
+#define BGP_AF_VPN4_MPLS	BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS_VPN )
+#define BGP_AF_VPN6_MPLS	BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS_VPN )
+#define BGP_AF_VPN4_MC		BGP_AF( BGP_AFI_IPV4, BGP_SAFI_VPN_MULTICAST )
+#define BGP_AF_VPN6_MC		BGP_AF( BGP_AFI_IPV6, BGP_SAFI_VPN_MULTICAST )
+#define BGP_AF_FLOW4		BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW )
+#define BGP_AF_FLOW6		BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW )
+
+
+struct bgp_write_state;
+struct bgp_parse_state;
+struct bgp_export_state;
+struct bgp_bucket;
+
+struct bgp_af_desc {
+  u32 afi;
+  u32 net;
+  u8 mpls;
+  u8 no_igp;
+  const char *name;
+  uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
+  void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+  void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to);
+  uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size);
+  void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+};
+
+
 struct bgp_config {
   struct proto_config c;
   u32 local_as, remote_as;
+  ip_addr local_ip;			/* Source address to use */
   ip_addr remote_ip;
-  ip_addr source_addr;			/* Source address to use */
   struct iface *iface;			/* Interface for link-local addresses */
+  u16 local_port;			/* Local listening port */
   u16 remote_port; 			/* Neighbor destination port */
   int multihop;				/* Number of hops if multihop */
-  int ttl_security;			/* Enable TTL security [RFC5082] */
-  int next_hop_self;			/* Always set next hop to local IP address */
-  int next_hop_keep;			/* Do not touch next hop attribute */
-  int missing_lladdr;			/* What we will do when we don' know link-local addr, see MLL_* */
-  int gw_mode;				/* How we compute route gateway from next_hop attr, see GW_* */
+  int strict_bind;			/* Bind listening socket to local address */
+  int ttl_security;			/* Enable TTL security [RFC 5082] */
   int compare_path_lengths;		/* Use path lengths when selecting best route */
   int med_metric;			/* Compare MULTI_EXIT_DISC even between routes from differen ASes */
   int igp_metric;			/* Use IGP metrics when selecting best route */
@@ -37,22 +93,22 @@ struct bgp_config {
   int deterministic_med;		/* Use more complicated algo to have strict RFC 4271 MED comparison */
   u32 default_local_pref;		/* Default value for LOCAL_PREF attribute */
   u32 default_med;			/* Default value for MULTI_EXIT_DISC attribute */
-  int capabilities;			/* Enable capability handshake [RFC3392] */
-  int enable_refresh;			/* Enable local support for route refresh [RFC2918] */
-  int enable_as4;			/* Enable local support for 4B AS numbers [RFC4893] */
+  int capabilities;			/* Enable capability handshake [RFC 5492] */
+  int enable_refresh;			/* Enable local support for route refresh [RFC 2918] */
+  int enable_as4;			/* Enable local support for 4B AS numbers [RFC 6793] */
   int enable_extended_messages;		/* Enable local support for extended messages [draft] */
   u32 rr_cluster_id;			/* Route reflector cluster ID, if different from local ID */
   int rr_client;			/* Whether neighbor is RR client of me */
   int rs_client;			/* Whether neighbor is RS client of me */
-  int advertise_ipv4;			/* Whether we should add IPv4 capability advertisement to OPEN message */
+  u32 confederation;			/* Confederation ID, or zero if confeds not active */
+  int confederation_member;		/* Whether neighbor AS is member of our confederation */
   int passive;				/* Do not initiate outgoing connection */
   int interpret_communities;		/* Hardwired handling of well-known communities */
-  int secondary;			/* Accept also non-best routes (i.e. RA_ACCEPTED) */
-  int add_path;				/* Use ADD-PATH extension [RFC7911] */
   int allow_local_as;			/* Allow that number of local ASNs in incoming AS_PATHs */
   int allow_local_pref;			/* Allow LOCAL_PREF in EBGP sessions */
   int gr_mode;				/* Graceful restart mode (BGP_GR_*) */
   int setkey;				/* Set MD5 password to system SA/SP database */
+  /* Times below are in seconds */
   unsigned gr_time;			/* Graceful restart timeout */
   unsigned connect_delay_time;		/* Minimum delay between connect attempts */
   unsigned connect_retry_time;		/* Timeout for connect attempts */
@@ -64,11 +120,31 @@ struct bgp_config {
   unsigned disable_after_error;		/* Disable the protocol when error is detected */
 
   char *password;			/* Password used for MD5 authentication */
-  struct rtable_config *igp_table;	/* Table used for recursive next hop lookups */
   int check_link;			/* Use iface link state for liveness detection */
   int bfd;				/* Use BFD for liveness detection */
 };
 
+struct bgp_channel_config {
+  struct channel_config c;
+
+  u32 afi;
+  const struct bgp_af_desc *desc;
+
+  ip_addr next_hop_addr;		/* Local address for NEXT_HOP attribute */
+  u8 next_hop_self;			/* Always set next hop to local IP address */
+  u8 next_hop_keep;			/* Do not touch next hop attribute */
+  u8 missing_lladdr;			/* What we will do when we don' know link-local addr, see MLL_* */
+  u8 gw_mode;				/* How we compute route gateway from next_hop attr, see GW_* */
+  u8 secondary;				/* Accept also non-best routes (i.e. RA_ACCEPTED) */
+  u8 gr_able;				/* Allow full graceful restart for the channel */
+  u8 ext_next_hop;			/* Allow both IPv4 and IPv6 next hops */
+  u8 add_path;				/* Use ADD-PATH extension [RFC 7911] */
+
+  uint rest[0];				/* Remaining items are reconfigured separately */
+  struct rtable_config *igp_table_ip4;	/* Table for recursive IPv4 next hop lookups */
+  struct rtable_config *igp_table_ip6;	/* Table for recursive IPv6 next hop lookups */
+};
+
 #define MLL_SELF 1
 #define MLL_DROP 2
 #define MLL_IGNORE 3
@@ -76,112 +152,241 @@ struct bgp_config {
 #define GW_DIRECT 1
 #define GW_RECURSIVE 2
 
-#define ADD_PATH_RX 1
-#define ADD_PATH_TX 2
-#define ADD_PATH_FULL 3
+#define BGP_ADD_PATH_RX		1
+#define BGP_ADD_PATH_TX		2
+#define BGP_ADD_PATH_FULL	3
 
-#define BGP_GR_ABLE 1
-#define BGP_GR_AWARE 2
+#define BGP_GR_ABLE		1
+#define BGP_GR_AWARE		2
 
-/* For peer_gr_flags */
+/* For GR capability common flags */
 #define BGP_GRF_RESTART 0x80
 
-/* For peer_gr_aflags */
+/* For GR capability per-AF flags */
 #define BGP_GRF_FORWARDING 0x80
 
 
+struct bgp_af_caps {
+  u32 afi;
+  u8 ready;				/* Multiprotocol capability, RFC 4760 */
+  u8 gr_able;				/* Graceful restart support, RFC 4724 */
+  u8 gr_af_flags;			/* Graceful restart per-AF flags */
+  u8 ext_next_hop;			/* Extended IPv6 next hop,   RFC 5549 */
+  u8 add_path;				/* Multiple paths support,   RFC 7911 */
+};
+
+struct bgp_caps {
+  u32 as4_number;			/* Announced ASN */
+
+  u8 as4_support;			/* Four-octet AS capability, RFC 6793 */
+  u8 ext_messages;			/* Extended message length,  RFC draft */
+  u8 route_refresh;			/* Route refresh capability, RFC 2918 */
+  u8 enhanced_refresh;			/* Enhanced route refresh,   RFC 7313 */
+
+  u8 gr_aware;				/* Graceful restart capability, RFC 4724 */
+  u8 gr_flags;				/* Graceful restart flags */
+  u16 gr_time;				/* Graceful restart time in seconds */
+
+  u16 af_count;				/* Number of af_data items */
+
+  struct bgp_af_caps af_data[0];	/* Per-AF capability data */
+};
+
+#define WALK_AF_CAPS(caps,ac) \
+  for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++)
+
+
+struct bgp_socket {
+  node n;				/* Node in global bgp_sockets */
+  sock *sk;				/* Real listening socket */
+  u32 uc;				/* Use count */
+};
+
 struct bgp_conn {
   struct bgp_proto *bgp;
   struct birdsock *sk;
-  uint state;				/* State of connection state machine */
-  struct timer *connect_retry_timer;
-  struct timer *hold_timer;
-  struct timer *keepalive_timer;
-  struct event *tx_ev;
-  int packets_to_send;			/* Bitmap of packet types to be sent */
+  u8 state;				/* State of connection state machine */
+  u8 as4_session;			/* Session uses 4B AS numbers in AS_PATH (both sides support it) */
+  u8 ext_messages;			/* Session uses extended message length */
+
+  struct bgp_caps *local_caps;
+  struct bgp_caps *remote_caps;
+  timer *connect_timer;
+  timer *hold_timer;
+  timer *keepalive_timer;
+  event *tx_ev;
+  u32 packets_to_send;			/* Bitmap of packet types to be sent */
+  u32 channels_to_send;			/* Bitmap of channels with packets to be sent */
+  u8 last_channel;			/* Channel used last time for TX */
+  u8 last_channel_count;		/* Number of times the last channel was used in succession */
   int notify_code, notify_subcode, notify_size;
   byte *notify_data;
-  u32 advertised_as;			/* Temporary value for AS number received */
-  int start_state;			/* protocol start_state snapshot when connection established */
-  u8 peer_refresh_support;		/* Peer supports route refresh [RFC2918] */
-  u8 peer_as4_support;			/* Peer supports 4B AS numbers [RFC4893] */
-  u8 peer_add_path;			/* Peer supports ADD-PATH [RFC7911] */
-  u8 peer_enhanced_refresh_support;	/* Peer supports enhanced refresh [RFC7313] */
-  u8 peer_gr_aware;
-  u8 peer_gr_able;
-  u16 peer_gr_time;
-  u8 peer_gr_flags;
-  u8 peer_gr_aflags;
-  u8 peer_ext_messages_support;		/* Peer supports extended message length [draft] */
-  unsigned hold_time, keepalive_time;	/* Times calculated from my and neighbor's requirements */
+
+  uint hold_time, keepalive_time;	/* Times calculated from my and neighbor's requirements */
 };
 
 struct bgp_proto {
   struct proto p;
   struct bgp_config *cf;		/* Shortcut to BGP configuration */
   u32 local_as, remote_as;
-  int start_state;			/* Substates that partitions BS_START */
-  u8 is_internal;			/* Internal BGP connection (local_as == remote_as) */
-  u8 as4_session;			/* Session uses 4B AS numbers in AS_PATH (both sides support it) */
-  u8 add_path_rx;			/* Session expects receive of ADD-PATH extended NLRI */
-  u8 add_path_tx;			/* Session expects transmit of ADD-PATH extended NLRI */
-  u8 ext_messages;			/* Session allows to use extended messages (both sides support it) */
+  u32 public_as;			/* Externally visible ASN (local_as or confederation id) */
   u32 local_id;				/* BGP identifier of this router */
   u32 remote_id;			/* BGP identifier of the neighbor */
   u32 rr_cluster_id;			/* Route reflector cluster ID */
-  int rr_client;			/* Whether neighbor is RR client of me */
-  int rs_client;			/* Whether neighbor is RS client of me */
+  int start_state;			/* Substates that partitions BS_START */
+  u8 is_internal;			/* Internal BGP session (local_as == remote_as) */
+  u8 is_interior;			/* Internal or intra-confederation BGP session */
+  u8 as4_session;			/* Session uses 4B AS numbers in AS_PATH (both sides support it) */
+  u8 rr_client;				/* Whether neighbor is RR client of me */
+  u8 rs_client;				/* Whether neighbor is RS client of me */
+  u8 route_refresh;			/* Route refresh allowed to send [RFC 2918] */
+  u8 enhanced_refresh;			/* Enhanced refresh is negotiated [RFC 7313] */
   u8 gr_ready;				/* Neighbor could do graceful restart */
-  u8 gr_active;				/* Neighbor is doing graceful restart */
-  u8 feed_state;			/* Feed state (TX) for EoR, RR packets, see BFS_* */
-  u8 load_state;			/* Load state (RX) for EoR, RR packets, see BFS_* */
+  u8 gr_active_num;			/* Neighbor is doing GR, number of active channels */
+  u8 channel_count;			/* Number of active channels */
+  u32 *afi_map;				/* Map channel index -> AFI */
+  struct bgp_channel **channel_map;	/* Map channel index -> channel */
   struct bgp_conn *conn;		/* Connection we have established */
   struct bgp_conn outgoing_conn;	/* Outgoing connection we're working with */
   struct bgp_conn incoming_conn;	/* Incoming connection we have neither accepted nor rejected yet */
   struct object_lock *lock;		/* Lock for neighbor connection */
   struct neighbor *neigh;		/* Neighbor entry corresponding to remote ip, NULL if multihop */
+  struct bgp_socket *sock;		/* Shared listening socket */
   struct bfd_request *bfd_req;		/* BFD request, if BFD is used */
   ip_addr source_addr;			/* Local address used as an advertised next hop */
-  rtable *igp_table;			/* Table used for recursive next hop lookups */
-  struct event *event;			/* Event for respawning and shutting process */
-  struct timer *startup_timer;		/* Timer used to delay protocol startup due to previous errors (startup_delay) */
-  struct timer *gr_timer;		/* Timer waiting for reestablishment after graceful restart */
-  struct bgp_bucket **bucket_hash;	/* Hash table of attribute buckets */
-  uint hash_size, hash_count, hash_limit;
-  HASH(struct bgp_prefix) prefix_hash;	/* Prefixes to be sent */
-  slab *prefix_slab;			/* Slab holding prefix nodes */
-  list bucket_queue;			/* Queue of buckets to send */
-  struct bgp_bucket *withdraw_bucket;	/* Withdrawn routes */
-  unsigned startup_delay;		/* Time to delay protocol startup by due to errors */
-  bird_clock_t last_proto_error;	/* Time of last error that leads to protocol stop */
+  ip_addr link_addr;			/* Link-local version of source_addr */
+  event *event;				/* Event for respawning and shutting process */
+  timer *startup_timer;			/* Timer used to delay protocol startup due to previous errors (startup_delay) */
+  timer *gr_timer;			/* Timer waiting for reestablishment after graceful restart */
+  uint startup_delay;			/* Delay (in seconds) of protocol startup due to previous errors */
+  btime last_proto_error;		/* Time of last error that leads to protocol stop */
   u8 last_error_class; 			/* Error class of last error */
   u32 last_error_code;			/* Error code of last error. BGP protocol errors
 					   are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
-#ifdef IPV6
-  byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
-  unsigned mp_reach_len, mp_unreach_len;
-  ip_addr local_link;			/* Link-level version of source_addr */
-#endif
+};
+
+struct bgp_channel {
+  struct channel c;
+
+  /* Rest are BGP specific data */
+  struct bgp_channel_config *cf;
+  pool *pool; /* XXXX */
+
+  u32 afi;
+  u32 index;
+  const struct bgp_af_desc *desc;
+
+  HASH(struct bgp_bucket) bucket_hash;	/* Hash table of route buckets */
+  struct bgp_bucket *withdraw_bucket;	/* Withdrawn routes */
+  list bucket_queue;			/* Queue of buckets to send (struct bgp_bucket) */
+
+  HASH(struct bgp_prefix) prefix_hash;	/* Prefixes to be sent */
+  slab *prefix_slab;			/* Slab holding prefix nodes */
+
+  rtable *igp_table_ip4;		/* Table for recursive IPv4 next hop lookups */
+  rtable *igp_table_ip6;		/* Table for recursive IPv6 next hop lookups */
+  ip_addr next_hop_addr;		/* Local address for NEXT_HOP attribute */
+  ip_addr link_addr;			/* Link-local version of next_hop_addr */
+
+  u32 packets_to_send;			/* Bitmap of packet types to be sent */
+
+  u8 gr_ready;				/* Neighbor could do GR on this AF */
+  u8 gr_active;				/* Neighbor is doing GR and keeping fwd state */
+
+  u8 ext_next_hop;			/* Session allows both IPv4 and IPv6 next hops */
+
+  u8 add_path_rx;			/* Session expects receive of ADD-PATH extended NLRI */
+  u8 add_path_tx;			/* Session expects transmit of ADD-PATH extended NLRI */
+
+  u8 feed_state;			/* Feed state (TX) for EoR, RR packets, see BFS_* */
+  u8 load_state;			/* Load state (RX) for EoR, RR packets, see BFS_* */
 };
 
 struct bgp_prefix {
-  struct {
-    ip_addr prefix;
-    int pxlen;
-  } n;
+  node buck_node;			/* Node in per-bucket list */
+  struct bgp_prefix *next;		/* Node in prefix hash table */
+  u32 hash;
   u32 path_id;
-  struct bgp_prefix *next;
-  node bucket_node;			/* Node in per-bucket list */
+  net_addr net[0];
 };
 
 struct bgp_bucket {
   node send_node;			/* Node in send queue */
-  struct bgp_bucket *hash_next, *hash_prev;	/* Node in bucket hash table */
-  unsigned hash;			/* Hash over extended attributes */
-  list prefixes;			/* Prefixes in this buckets */
+  struct bgp_bucket *next;		/* Node in bucket hash table */
+  list prefixes;			/* Prefixes in this bucket (struct bgp_prefix) */
+  u32 hash;				/* Hash over extended attributes */
   ea_list eattrs[0];			/* Per-bucket extended attributes */
 };
 
+struct bgp_export_state {
+  struct bgp_proto *proto;
+  struct bgp_channel *channel;
+  struct linpool *pool;
+
+  struct bgp_proto *src;
+  rte *route;
+  int mpls;
+
+  u32 attrs_seen[1];
+  uint err_withdraw;
+};
+
+struct bgp_write_state {
+  struct bgp_proto *proto;
+  struct bgp_channel *channel;
+  struct linpool *pool;
+
+  int as4_session;
+  int add_path;
+  int mpls;
+
+  eattr *mp_next_hop;
+  adata *mpls_labels;
+};
+
+struct bgp_parse_state {
+  struct bgp_proto *proto;
+  struct bgp_channel *channel;
+  struct linpool *pool;
+
+  int as4_session;
+  int add_path;
+  int mpls;
+
+  u32 attrs_seen[256/32];
+
+  u32 mp_reach_af;
+  u32 mp_unreach_af;
+
+  uint attr_len;
+  uint ip_reach_len;
+  uint ip_unreach_len;
+  uint ip_next_hop_len;
+  uint mp_reach_len;
+  uint mp_unreach_len;
+  uint mp_next_hop_len;
+
+  byte *attrs;
+  byte *ip_reach_nlri;
+  byte *ip_unreach_nlri;
+  byte *ip_next_hop_data;
+  byte *mp_reach_nlri;
+  byte *mp_unreach_nlri;
+  byte *mp_next_hop_data;
+
+  uint err_withdraw;
+  uint err_subcode;
+  jmp_buf err_jmpbuf;
+
+  struct hostentry *hostentry;
+  adata *mpls_labels;
+
+  /* Cached state for bgp_rte_update() */
+  u32 last_id;
+  struct rte_src *last_src;
+  rta *cached_rta;
+};
+
 #define BGP_PORT		179
 #define BGP_VERSION		4
 #define BGP_HEADER_LENGTH	19
@@ -192,13 +397,33 @@ struct bgp_bucket {
 #define BGP_RX_BUFFER_EXT_SIZE	65535
 #define BGP_TX_BUFFER_EXT_SIZE	65535
 
-static inline uint bgp_max_packet_length(struct bgp_proto *p)
-{ return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
+static inline int bgp_channel_is_ipv4(struct bgp_channel *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
+
+static inline int bgp_channel_is_ipv6(struct bgp_channel *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+
+static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
+
+static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+
+static inline uint bgp_max_packet_length(struct bgp_conn *conn)
+{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
+
+static inline void
+bgp_parse_error(struct bgp_parse_state *s, uint subcode)
+{
+  s->err_subcode = subcode;
+  longjmp(s->err_jmpbuf, 1);
+}
 
 extern struct linpool *bgp_linpool;
+extern struct linpool *bgp_linpool2;
 
 
-void bgp_start_timer(struct timer *t, int value);
+void bgp_start_timer(timer *t, uint value);
 void bgp_check_config(struct bgp_config *c);
 void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
 void bgp_close_conn(struct bgp_conn *c);
@@ -208,9 +433,9 @@ void bgp_conn_enter_established_state(struct bgp_conn *conn);
 void bgp_conn_enter_close_state(struct bgp_conn *conn);
 void bgp_conn_enter_idle_state(struct bgp_conn *conn);
 void bgp_handle_graceful_restart(struct bgp_proto *p);
-void bgp_graceful_restart_done(struct bgp_proto *p);
-void bgp_refresh_begin(struct bgp_proto *p);
-void bgp_refresh_end(struct bgp_proto *p);
+void bgp_graceful_restart_done(struct bgp_channel *c);
+void bgp_refresh_begin(struct bgp_channel *c);
+void bgp_refresh_end(struct bgp_channel *c);
 void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
 void bgp_stop(struct bgp_proto *p, unsigned subcode);
 
@@ -233,48 +458,71 @@ struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
 
 /* attrs.c */
 
-/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
- * we store two addesses in it - a global address and a link local address.
- */
-#ifdef IPV6
-#define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
-static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
-#else
-#define NEXT_HOP_LENGTH sizeof(ip_addr)
-static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
-#endif
+static inline eattr *
+bgp_find_attr(ea_list *attrs, uint code)
+{
+  return ea_find(attrs, EA_CODE(EAP_BGP, code));
+}
+
+eattr *
+bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val);
+
+static inline void
+bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val)
+{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+
+static inline void
+bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, struct adata *val)
+{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+
+static inline void
+bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len)
+{
+  struct adata *a = lp_alloc_adata(pool, len);
+  memcpy(a->data, data, len);
+  bgp_set_attr(to, pool, code, flags, (uintptr_t) a);
+}
+
+static inline void
+bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
+{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; }
+
+
+int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
+ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
+
+void bgp_init_bucket_table(struct bgp_channel *c);
+void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+
+void bgp_init_prefix_table(struct bgp_channel *c);
+void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
 
-void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
-byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
-struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
-int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
 int bgp_rte_better(struct rte *, struct rte *);
 int bgp_rte_mergable(rte *pri, rte *sec);
 int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
-void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
+void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs);
 int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
-void bgp_init_bucket_table(struct bgp_proto *);
-void bgp_free_bucket_table(struct bgp_proto *p);
-void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
-void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
-void bgp_free_prefix_table(struct bgp_proto *p);
-void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
-uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
+int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
 void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
 
-inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
-{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
 
 /* packets.c */
 
 void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
-void bgp_schedule_packet(struct bgp_conn *conn, int type);
+const struct bgp_af_desc *bgp_get_af_desc(u32 afi);
+const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi);
+void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type);
 void bgp_kick_tx(void *vconn);
 void bgp_tx(struct birdsock *sk);
 int bgp_rx(struct birdsock *sk, uint size);
 const char * bgp_error_dsc(unsigned code, unsigned subcode);
 void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
 
+void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
+
+
 /* Packet types */
 
 #define PKT_OPEN		0x01
@@ -292,26 +540,25 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
 #define BAF_PARTIAL		0x20
 #define BAF_EXT_LEN		0x10
 
-#define BA_ORIGIN		0x01	/* [RFC1771] */		/* WM */
+#define BA_ORIGIN		0x01	/* RFC 4271 */		/* WM */
 #define BA_AS_PATH		0x02				/* WM */
 #define BA_NEXT_HOP		0x03				/* WM */
 #define BA_MULTI_EXIT_DISC	0x04				/* ON */
 #define BA_LOCAL_PREF		0x05				/* WD */
 #define BA_ATOMIC_AGGR		0x06				/* WD */
 #define BA_AGGREGATOR		0x07				/* OT */
-#define BA_COMMUNITY		0x08	/* [RFC1997] */		/* OT */
-#define BA_ORIGINATOR_ID	0x09	/* [RFC1966] */		/* ON */
-#define BA_CLUSTER_LIST		0x0a				/* ON */
-/* We don't support these: */
-#define BA_DPA			0x0b	/* ??? */
-#define BA_ADVERTISER		0x0c	/* [RFC1863] */
-#define BA_RCID_PATH		0x0d
-#define BA_MP_REACH_NLRI	0x0e	/* [RFC2283] */
-#define BA_MP_UNREACH_NLRI	0x0f
-#define BA_EXT_COMMUNITY	0x10	/* [RFC4360] */
-#define BA_AS4_PATH             0x11    /* [RFC4893] */
-#define BA_AS4_AGGREGATOR       0x12
-#define BA_LARGE_COMMUNITY	0x20	/* [RFC8092] */
+#define BA_COMMUNITY		0x08	/* RFC 1997 */		/* OT */
+#define BA_ORIGINATOR_ID	0x09	/* RFC 4456 */		/* ON */
+#define BA_CLUSTER_LIST		0x0a	/* RFC 4456 */		/* ON */
+#define BA_MP_REACH_NLRI	0x0e	/* RFC 4760 */
+#define BA_MP_UNREACH_NLRI	0x0f	/* RFC 4760 */
+#define BA_EXT_COMMUNITY	0x10	/* RFC 4360 */
+#define BA_AS4_PATH             0x11	/* RFC 6793 */
+#define BA_AS4_AGGREGATOR       0x12	/* RFC 6793 */
+#define BA_LARGE_COMMUNITY	0x20	/* RFC 8092 */
+
+/* Bird's private internal BGP attributes */
+#define BA_MPLS_LABEL_STACK	0xfe	/* MPLS label stack transfer attribute */
 
 /* BGP connection states */
 
@@ -331,14 +578,12 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
  *
  * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
  * protocol done what is neccessary to start itself (like acquiring the lock),
- * it goes to BSS_CONNECT.  When some connection attempt failed because of
- * option or capability error, it goes to BSS_CONNECT_NOCAP.
+ * it goes to BSS_CONNECT.
  */
 
 #define BSS_PREPARE		0	/* Used before ordinary BGP started, i. e. waiting for lock */
 #define BSS_DELAY		1	/* Startup delay due to previous errors */
 #define BSS_CONNECT		2	/* Ordinary BGP connecting */
-#define BSS_CONNECT_NOCAP	3	/* Legacy BGP connecting (without capabilities) */
 
 
 /* BGP feed states (TX)
@@ -347,7 +592,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
  *
  * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
  *
- * These states (stored in p->feed_state) are used to keep track of these
+ * These states (stored in c->feed_state) are used to keep track of these
  * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
  * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
  * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
@@ -403,15 +648,5 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
 #define ORIGIN_EGP		1
 #define ORIGIN_INCOMPLETE	2
 
-/* Address families */
-
-#define BGP_AF_IPV4		1
-#define BGP_AF_IPV6		2
-
-#ifdef IPV6
-#define BGP_AF BGP_AF_IPV6
-#else
-#define BGP_AF BGP_AF_IPV4
-#endif
 
 #endif
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 55c602f1..941ae5b6 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -13,28 +13,32 @@ CF_HDR
 CF_DEFINES
 
 #define BGP_CFG ((struct bgp_config *) this_proto)
+#define BGP_CC ((struct bgp_channel_config *) this_channel)
 
 CF_DECLS
 
-CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
-	KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT,
-	PATH, METRIC, ERROR, START, DELAY, FORGET, WAIT, ENABLE,
-	DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN,
-	BGP_NEXT_HOP, BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY,
-	BGP_EXT_COMMUNITY, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT,
-	CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE,
-	PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
-	INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
-	TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
-	SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE,
-	CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, BGP_LARGE_COMMUNITY)
+CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
+	MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR,
+	START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH,
+	BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR,
+	BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY,
+	SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE,
+	IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR,
+	DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID,
+	BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
+	SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
+	GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
+	STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6)
+
+%type <i32> bgp_afi
 
 CF_GRAMMAR
 
-CF_ADDTO(proto, bgp_proto '}' { bgp_check_config(BGP_CFG); } )
+CF_ADDTO(proto, bgp_proto '}' )
 
 bgp_proto_start: proto_start BGP {
      this_proto = proto_config_new(&proto_bgp, $1);
+     BGP_CFG->local_port = BGP_PORT;
      BGP_CFG->remote_port = BGP_PORT;
      BGP_CFG->multihop = -1;	/* undefined */
      BGP_CFG->hold_time = 240;
@@ -49,26 +53,35 @@ bgp_proto_start: proto_start BGP {
      BGP_CFG->enable_refresh = 1;
      BGP_CFG->enable_as4 = 1;
      BGP_CFG->capabilities = 2;
-     BGP_CFG->advertise_ipv4 = 1;
      BGP_CFG->interpret_communities = 1;
      BGP_CFG->default_local_pref = 100;
      BGP_CFG->gr_mode = BGP_GR_AWARE;
      BGP_CFG->gr_time = 120;
      BGP_CFG->setkey = 1;
- }
+   }
+ ;
+
+bgp_loc_opts:
+   /* empty */
+ | bgp_loc_opts PORT expr { BGP_CFG->local_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
+ | bgp_loc_opts AS expr { BGP_CFG->local_as = $3; }
  ;
 
 bgp_nbr_opts:
    /* empty */
- | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number");  }
+ | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
  | bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; }
  ;
 
 bgp_proto:
    bgp_proto_start proto_name '{'
  | bgp_proto proto_item ';'
- | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; }
- | bgp_proto LOCAL ipa AS expr ';' { BGP_CFG->source_addr = $3; BGP_CFG->local_as = $5; }
+ | bgp_proto bgp_proto_channel ';'
+ | bgp_proto LOCAL bgp_loc_opts ';'
+ | bgp_proto LOCAL ipa ipa_scope bgp_loc_opts ';' {
+     BGP_CFG->local_ip = $3;
+     if ($4) BGP_CFG->iface = $4;
+   }
  | bgp_proto NEIGHBOR bgp_nbr_opts ';'
  | bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' {
      if (ipa_nonzero(BGP_CFG->remote_ip))
@@ -78,20 +91,16 @@ bgp_proto:
    }
  | bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); }
  | bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; }
- | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; }
- | bgp_proto RS CLIENT ';' { BGP_CFG->rs_client = 1; }
+ | bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; }
+ | bgp_proto RS CLIENT bool ';' { BGP_CFG->rs_client = $4; }
+ | bgp_proto CONFEDERATION expr ';' { BGP_CFG->confederation = $3; }
+ | bgp_proto CONFEDERATION MEMBER bool ';' { BGP_CFG->confederation_member = $4; }
  | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; }
  | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
  | bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; }
  | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
  | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); }
- | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; BGP_CFG->next_hop_keep = 0; }
- | bgp_proto NEXT HOP KEEP ';' { BGP_CFG->next_hop_keep = 1; BGP_CFG->next_hop_self = 0; }
- | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; }
- | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; }
- | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; }
- | bgp_proto GATEWAY DIRECT ';' { BGP_CFG->gw_mode = GW_DIRECT; }
- | bgp_proto GATEWAY RECURSIVE ';' { BGP_CFG->gw_mode = GW_RECURSIVE; }
+ | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; }
  | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; }
  | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
  | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; }
@@ -99,7 +108,7 @@ bgp_proto:
  | bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; }
  | bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; }
  | bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; }
- | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; }
+ | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->local_ip = $4; }
  | bgp_proto START DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; log(L_WARN "%s: Start delay time option is deprecated, use connect delay time", this_proto->name); }
  | bgp_proto CONNECT DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; }
  | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
@@ -111,33 +120,101 @@ bgp_proto:
  | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; }
  | bgp_proto ENABLE EXTENDED MESSAGES bool ';' { BGP_CFG->enable_extended_messages = $5; }
  | bgp_proto CAPABILITIES bool ';' { BGP_CFG->capabilities = $3; }
- | bgp_proto ADVERTISE IPV4 bool ';' { BGP_CFG->advertise_ipv4 = $4; }
  | bgp_proto PASSWORD text ';' { BGP_CFG->password = $3; }
  | bgp_proto SETKEY bool ';' { BGP_CFG->setkey = $3; }
- | bgp_proto ROUTE LIMIT expr ';' {
-     this_proto->in_limit = cfg_allocz(sizeof(struct proto_limit));
-     this_proto->in_limit->limit = $4;
-     this_proto->in_limit->action = PLA_RESTART;
-     log(L_WARN "%s: Route limit option is deprecated, use import limit", this_proto->name);
-   }
  | bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; }
  | bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; }
- | bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; }
- | bgp_proto ADD PATHS RX ';' { BGP_CFG->add_path = ADD_PATH_RX; }
- | bgp_proto ADD PATHS TX ';' { BGP_CFG->add_path = ADD_PATH_TX; }
- | bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
- | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; }
  | bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
  | bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
+ | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; }
  | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
  | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
  | bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; }
- | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
  | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
  | bgp_proto CHECK LINK bool ';' { BGP_CFG->check_link = $4; }
  | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
  ;
 
+bgp_afi:
+   IPV4			{ $$ = BGP_AF_IPV4; }
+ | IPV6			{ $$ = BGP_AF_IPV6; }
+ | IPV4 MULTICAST	{ $$ = BGP_AF_IPV4_MC; }
+ | IPV6 MULTICAST	{ $$ = BGP_AF_IPV6_MC; }
+ | IPV4 MPLS		{ $$ = BGP_AF_IPV4_MPLS; }
+ | IPV6 MPLS		{ $$ = BGP_AF_IPV6_MPLS; }
+ | VPN4 MPLS		{ $$ = BGP_AF_VPN4_MPLS; }
+ | VPN6 MPLS		{ $$ = BGP_AF_VPN6_MPLS; }
+ | VPN4 MULTICAST	{ $$ = BGP_AF_VPN4_MC; }
+ | VPN6 MULTICAST	{ $$ = BGP_AF_VPN6_MC; }
+ | FLOW4		{ $$ = BGP_AF_FLOW4; }
+ | FLOW6		{ $$ = BGP_AF_FLOW6; }
+ ;
+
+bgp_channel_start: bgp_afi
+{
+  const struct bgp_af_desc *desc = bgp_get_af_desc($1);
+
+  if (!desc)
+    cf_error("Unknown AFI/SAFI");
+
+  this_channel = channel_config_new(&channel_bgp, desc->net, this_proto);
+  BGP_CC->c.name = desc->name;
+  BGP_CC->c.ra_mode = RA_UNDEF;
+  BGP_CC->afi = $1;
+  BGP_CC->desc = desc;
+  BGP_CC->gr_able = 0xff;	/* undefined */
+};
+
+bgp_channel_item:
+   channel_item
+ | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; }
+ | NEXT HOP SELF { BGP_CC->next_hop_self = 1; BGP_CC->next_hop_keep = 0; }
+ | NEXT HOP KEEP { BGP_CC->next_hop_keep = 1; BGP_CC->next_hop_self = 0; }
+ | MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; }
+ | MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; }
+ | MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; }
+ | GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; }
+ | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; }
+ | SECONDARY bool { BGP_CC->secondary = $2; }
+ | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; }
+ | EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; }
+ | ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; }
+ | ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; }
+ | ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; }
+ | IGP TABLE rtable {
+    if (BGP_CC->desc->no_igp)
+      cf_error("IGP table not allowed here");
+
+    if ($3->addr_type == NET_IP4)
+      BGP_CC->igp_table_ip4 = $3;
+    else if ($3->addr_type == NET_IP6)
+      BGP_CC->igp_table_ip6 = $3;
+    else
+      cf_error("Mismatched IGP table type");
+   }
+ ;
+
+bgp_channel_opts:
+   /* empty */
+ | bgp_channel_opts bgp_channel_item ';'
+ ;
+
+bgp_channel_opt_list:
+   /* empty */
+ | '{' bgp_channel_opts '}'
+ ;
+
+bgp_channel_end:
+{
+  if (!this_channel->table)
+    cf_error("Routing table not specified");
+
+  this_channel = NULL;
+};
+
+bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end;
+
+
 CF_ADDTO(dynamic_attr, BGP_ORIGIN
 	{ $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(EAP_BGP, BA_ORIGIN)); })
 CF_ADDTO(dynamic_attr, BGP_PATH
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index ab87bdcc..0e974746 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -2,12 +2,16 @@
  *	BIRD -- BGP Packet Processing
  *
  *	(c) 2000 Martin Mares <mj@ucw.cz>
+ *	(c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ *	(c) 2008--2016 CZ.NIC z.s.p.o.
  *
  *	Can be freely distributed and used under the terms of the GNU GPL.
  */
 
 #undef LOCAL_DEBUG
 
+#include <stdlib.h>
+
 #include "nest/bird.h"
 #include "nest/iface.h"
 #include "nest/protocol.h"
@@ -16,6 +20,7 @@
 #include "nest/mrtdump.h"
 #include "conf/conf.h"
 #include "lib/unaligned.h"
+#include "lib/flowspec.h"
 #include "lib/socket.h"
 
 #include "nest/cli.h"
@@ -27,6 +32,13 @@
 #define BGP_RR_BEGIN		1
 #define BGP_RR_END		2
 
+#define BGP_NLRI_MAX		(4 + 1 + 32)
+
+#define BGP_MPLS_BOS		1	/* Bottom-of-stack bit */
+#define BGP_MPLS_MAX		10	/* Max number of labels that 24*n <= 255 */
+#define BGP_MPLS_NULL		3	/* Implicit NULL label */
+#define BGP_MPLS_MAGIC		0x800000 /* Magic withdraw label value, RFC 3107 3 */
+
 
 static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
 static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
@@ -38,6 +50,46 @@ static byte fsm_err_subcode[BS_MAX] = {
   [BS_ESTABLISHED] = 3
 };
 
+
+static struct bgp_channel *
+bgp_get_channel(struct bgp_proto *p, u32 afi)
+{
+  uint i;
+
+  for (i = 0; i < p->channel_count; i++)
+    if (p->afi_map[i] == afi)
+      return p->channel_map[i];
+
+  return NULL;
+}
+
+static inline void
+put_af3(byte *buf, u32 id)
+{
+  put_u16(buf, id >> 16);
+  buf[2] = id & 0xff;
+}
+
+static inline void
+put_af4(byte *buf, u32 id)
+{
+  put_u16(buf, id >> 16);
+  buf[2] = 0;
+  buf[3] = id & 0xff;
+}
+
+static inline u32
+get_af3(byte *buf)
+{
+  return (get_u16(buf) << 16) | buf[2];
+}
+
+static inline u32
+get_af4(byte *buf)
+{
+  return (get_u16(buf) << 16) | buf[3];
+}
+
 /*
  * MRT Dump format is not semantically specified.
  * We will use these values in appropriate fields:
@@ -58,31 +110,41 @@ static byte *
 mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
 {
   struct bgp_proto *p = conn->bgp;
+  uint v4 = ipa_is_ip4(p->cf->remote_ip);
 
   if (as4)
-    {
-      put_u32(buf+0, p->remote_as);
-      put_u32(buf+4, p->local_as);
-      buf+=8;
-    }
+  {
+    put_u32(buf+0, p->remote_as);
+    put_u32(buf+4, p->public_as);
+    buf+=8;
+  }
   else
-    {
-      put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
-      put_u16(buf+2, (p->local_as <= 0xFFFF)  ? p->local_as  : AS_TRANS);
-      buf+=4;
-    }
+  {
+    put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
+    put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
+    buf+=4;
+  }
 
   put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
-  put_u16(buf+2, BGP_AF);
+  put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
   buf+=4;
-  buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
-  buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
+
+  if (v4)
+  {
+    buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
+    buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
+  }
+  else
+  {
+    buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
+    buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
+  }
 
   return buf;
 }
 
 static void
-mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
+mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
 {
   byte *buf = alloca(128+len);	/* 128 is enough for MRT headers */
   byte *bp = buf + MRTDUMP_HDR_LENGTH;
@@ -96,14 +158,14 @@ mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
 }
 
 static inline u16
-convert_state(unsigned state)
+convert_state(uint state)
 {
   /* Convert state from our BS_* values to values used in MRTDump */
   return (state == BS_CLOSE) ? 1 : state + 1;
 }
 
 void
-mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
+mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
 {
   byte buf[128];
   byte *bp = buf + MRTDUMP_HDR_LENGTH;
@@ -127,1303 +189,2426 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
   return buf + 2 + conn->notify_size;
 }
 
-#ifdef IPV6
-static byte *
-bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
-{
-  *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
-  *buf++ = 4;		/* Capability data length */
-  *buf++ = 0;		/* We support AF IPv6 */
-  *buf++ = BGP_AF_IPV6;
-  *buf++ = 0;		/* RFU */
-  *buf++ = 1;		/* and SAFI 1 */
-  return buf;
-}
 
-#else
+/* Capability negotiation as per RFC 5492 */
 
-static byte *
-bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
-{
-  *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
-  *buf++ = 4;		/* Capability data length */
-  *buf++ = 0;		/* We support AF IPv4 */
-  *buf++ = BGP_AF_IPV4;
-  *buf++ = 0;		/* RFU */
-  *buf++ = 1;		/* and SAFI 1 */
-  return buf;
+const struct bgp_af_caps *
+bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
+{
+  struct bgp_af_caps *ac;
+
+  WALK_AF_CAPS(caps, ac)
+    if (ac->afi == afi)
+      return ac;
+
+  return NULL;
 }
-#endif
 
-static byte *
-bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
+static struct bgp_af_caps *
+bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
 {
-  *buf++ = 2;		/* Capability 2: Support for route refresh */
-  *buf++ = 0;		/* Capability data length */
-  return buf;
+  struct bgp_af_caps *ac;
+
+  WALK_AF_CAPS(caps, ac)
+    if (ac->afi == afi)
+      return ac;
+
+  ac = &caps->af_data[caps->af_count++];
+  memset(ac, 0, sizeof(struct bgp_af_caps));
+  ac->afi = afi;
+
+  return ac;
 }
 
-static byte *
-bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
+static int
+bgp_af_caps_cmp(const void *X, const void *Y)
 {
-  *buf++ = 6;		/* Capability 6: Support for extended messages */
-  *buf++ = 0;		/* Capability data length */
-  return buf;
+  const struct bgp_af_caps *x = X, *y = Y;
+  return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
 }
 
+
 static byte *
-bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
+bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
 {
-  *buf++ = 64;		/* Capability 64: Support for graceful restart */
-  *buf++ = 6;		/* Capability data length */
+  struct bgp_proto *p = conn->bgp;
+  struct bgp_channel *c;
+  struct bgp_caps *caps;
+  struct bgp_af_caps *ac;
+  uint any_ext_next_hop = 0;
+  uint any_add_path = 0;
+  byte *data;
 
-  put_u16(buf, p->cf->gr_time);
-  if (p->p.gr_recovery)
-    buf[0] |= BGP_GRF_RESTART;
-  buf += 2;
+  /* Prepare bgp_caps structure */
+
+  int n = list_length(&p->p.channels);
+  caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
+  conn->local_caps = caps;
+
+  caps->as4_support = p->cf->enable_as4;
+  caps->ext_messages = p->cf->enable_extended_messages;
+  caps->route_refresh = p->cf->enable_refresh;
+  caps->enhanced_refresh = p->cf->enable_refresh;
+
+  if (caps->as4_support)
+    caps->as4_number = p->public_as;
+
+  if (p->cf->gr_mode)
+  {
+    caps->gr_aware = 1;
+    caps->gr_time = p->cf->gr_time;
+    caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
+  }
+
+  /* Allocate and fill per-AF fields */
+  WALK_LIST(c, p->p.channels)
+  {
+    ac = &caps->af_data[caps->af_count++];
+    ac->afi = c->afi;
+    ac->ready = 1;
+
+    ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
+    any_ext_next_hop |= ac->ext_next_hop;
+
+    ac->add_path = c->cf->add_path;
+    any_add_path |= ac->add_path;
+
+    if (c->cf->gr_able)
+    {
+      ac->gr_able = 1;
+
+      if (p->p.gr_recovery)
+	ac->gr_af_flags |= BGP_GRF_FORWARDING;
+    }
+  }
+
+  /* Sort capability fields by AFI/SAFI */
+  qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
 
-  *buf++ = 0;		/* Appropriate AF */
-  *buf++ = BGP_AF;
-  *buf++ = 1;		/* and SAFI 1 */
-  *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
+
+  /* Create capability list in buffer */
+
+  /*
+   * Note that max length is ~ 20+14*af_count. With max 12 channels that is
+   * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow
+   * unless we add new capabilities or more AFs.
+   */
+
+  WALK_AF_CAPS(caps, ac)
+    if (ac->ready)
+    {
+      *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
+      *buf++ = 4;		/* Capability data length */
+      put_af4(buf, ac->afi);
+      buf += 4;
+    }
+
+  if (caps->route_refresh)
+  {
+    *buf++ = 2;			/* Capability 2: Support for route refresh */
+    *buf++ = 0;			/* Capability data length */
+  }
+
+  if (any_ext_next_hop)
+  {
+    *buf++ = 5;			/* Capability 5: Support for extended next hop */
+    *buf++ = 0;			/* Capability data length, will be fixed later */
+    data = buf;
+
+    WALK_AF_CAPS(caps, ac)
+      if (ac->ext_next_hop)
+      {
+	put_af4(buf, ac->afi);
+	put_u16(buf+4, BGP_AFI_IPV6);
+	buf += 6;
+      }
+
+    data[-1] = buf - data;
+  }
+
+  if (caps->ext_messages)
+  {
+    *buf++ = 6;			/* Capability 6: Support for extended messages */
+    *buf++ = 0;			/* Capability data length */
+  }
+
+  if (caps->gr_aware)
+  {
+    *buf++ = 64;		/* Capability 64: Support for graceful restart */
+    *buf++ = 0;			/* Capability data length, will be fixed later */
+    data = buf;
+
+    put_u16(buf, caps->gr_time);
+    buf[0] |= caps->gr_flags;
+    buf += 2;
+
+    WALK_AF_CAPS(caps, ac)
+      if (ac->gr_able)
+      {
+	put_af3(buf, ac->afi);
+	buf[3] = ac->gr_af_flags;
+	buf += 4;
+      }
+
+    data[-1] = buf - data;
+  }
+
+  if (caps->as4_support)
+  {
+    *buf++ = 65;		/* Capability 65: Support for 4-octet AS number */
+    *buf++ = 4;			/* Capability data length */
+    put_u32(buf, p->public_as);
+    buf += 4;
+  }
+
+  if (any_add_path)
+  {
+    *buf++ = 69;		/* Capability 69: Support for ADD-PATH */
+    *buf++ = 0;			/* Capability data length, will be fixed later */
+    data = buf;
+
+    WALK_AF_CAPS(caps, ac)
+      if (ac->add_path)
+      {
+	put_af3(buf, ac->afi);
+	buf[3] = ac->add_path;
+	buf += 4;
+      }
+
+    data[-1] = buf - data;
+  }
+
+  if (caps->enhanced_refresh)
+  {
+    *buf++ = 70;		/* Capability 70: Support for enhanced route refresh */
+    *buf++ = 0;			/* Capability data length */
+  }
 
   return buf;
 }
 
-static byte *
-bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
+static void
+bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
 {
-  *buf++ = 64;		/* Capability 64: Support for graceful restart */
-  *buf++ = 2;		/* Capability data length */
-  put_u16(buf, 0);
-  return buf + 2;
-}
+  struct bgp_proto *p = conn->bgp;
+  struct bgp_af_caps *ac;
+  int i, cl;
+  u32 af;
 
-static byte *
-bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
-{
-  *buf++ = 65;		/* Capability 65: Support for 4-octet AS number */
-  *buf++ = 4;		/* Capability data length */
-  put_u32(buf, p->local_as);
-  return buf + 4;
-}
+  while (len > 0)
+  {
+    if (len < 2 || len < (2 + pos[1]))
+      goto err;
 
-static byte *
-bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
-{
-  *buf++ = 69;		/* Capability 69: Support for ADD-PATH */
-  *buf++ = 4;		/* Capability data length */
+    /* Capability length */
+    cl = pos[1];
 
-  *buf++ = 0;		/* Appropriate AF */
-  *buf++ = BGP_AF;
-  *buf++ = 1;		/* SAFI 1 */
+    /* Capability type */
+    switch (pos[0])
+    {
+    case  1: /* Multiprotocol capability, RFC 4760 */
+      if (cl != 4)
+	goto err;
 
-  *buf++ = p->cf->add_path;
+      af = get_af4(pos+2);
+      ac = bgp_get_af_caps(caps, af);
+      ac->ready = 1;
+      break;
 
-  return buf;
+    case  2: /* Route refresh capability, RFC 2918 */
+      if (cl != 0)
+	goto err;
+
+      caps->route_refresh = 1;
+      break;
+
+    case  5: /* Extended next hop encoding capability, RFC 5549 */
+      if (cl % 6)
+	goto err;
+
+      for (i = 0; i < cl; i += 6)
+      {
+	/* Specified only for IPv4 prefixes with IPv6 next hops */
+	if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
+	    (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
+	  continue;
+
+	af = get_af4(pos+2+i);
+	ac = bgp_get_af_caps(caps, af);
+	ac->ext_next_hop = 1;
+      }
+      break;
+
+    case  6: /* Extended message length capability, RFC draft */
+      if (cl != 0)
+	goto err;
+
+      caps->ext_messages = 1;
+      break;
+
+    case 64: /* Graceful restart capability, RFC 4724 */
+      if (cl % 4 != 2)
+	goto err;
+
+      /* Only the last instance is valid */
+      WALK_AF_CAPS(caps, ac)
+      {
+	ac->gr_able = 0;
+	ac->gr_af_flags = 0;
+      }
+
+      caps->gr_aware = 1;
+      caps->gr_flags = pos[2] & 0xf0;
+      caps->gr_time = get_u16(pos + 2) & 0x0fff;
+
+      for (i = 2; i < cl; i += 4)
+      {
+	af = get_af3(pos+2+i);
+	ac = bgp_get_af_caps(caps, af);
+	ac->gr_able = 1;
+	ac->gr_af_flags = pos[2+i+3];
+      }
+      break;
+
+    case 65: /* AS4 capability, RFC 6793 */
+      if (cl != 4)
+	goto err;
+
+      caps->as4_support = 1;
+      caps->as4_number = get_u32(pos + 2);
+      break;
+
+    case 69: /* ADD-PATH capability, RFC 7911 */
+      if (cl % 4)
+	goto err;
+
+      for (i = 0; i < cl; i += 4)
+      {
+	byte val = pos[2+i+3];
+	if (!val || (val > BGP_ADD_PATH_FULL))
+	{
+	  log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
+	      p->p.name, val);
+	  break;
+	}
+      }
+
+      for (i = 0; i < cl; i += 4)
+      {
+	af = get_af3(pos+2+i);
+	ac = bgp_get_af_caps(caps, af);
+	ac->add_path = pos[2+i+3];
+      }
+      break;
+
+    case 70: /* Enhanced route refresh capability, RFC 7313 */
+      if (cl != 0)
+	goto err;
+
+      caps->enhanced_refresh = 1;
+      break;
+
+      /* We can safely ignore all other capabilities */
+    }
+
+    ADVANCE(pos, len, 2 + cl);
+  }
+  return;
+
+err:
+  bgp_error(conn, 2, 0, NULL, 0);
+  return;
 }
 
-static byte *
-bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
+static int
+bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
 {
-  *buf++ = 70;		/* Capability 70: Support for enhanced route refresh */
-  *buf++ = 0;		/* Capability data length */
-  return buf;
-}
+  struct bgp_proto *p = conn->bgp;
+  struct bgp_caps *caps;
+  int ol;
+
+  /* Max number of announced AFIs is limited by max option length (255) */
+  caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
+  memset(caps, 0, sizeof(struct bgp_caps));
+
+  while (len > 0)
+  {
+    if ((len < 2) || (len < (2 + pos[1])))
+    { bgp_error(conn, 2, 0, NULL, 0); return -1; }
 
+    ol = pos[1];
+    if (pos[0] == 2)
+    {
+      /* BGP capabilities, RFC 5492 */
+      if (p->cf->capabilities)
+	bgp_read_capabilities(conn, caps, pos + 2, ol);
+    }
+    else
+    {
+      /* Unknown option */
+      bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
+      return -1;
+    }
+
+    ADVANCE(pos, len, 2 + ol);
+  }
+
+  uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
+  conn->remote_caps = mb_allocz(p->p.pool, n);
+  memcpy(conn->remote_caps, caps, n);
+
+  return 0;
+}
 
 static byte *
 bgp_create_open(struct bgp_conn *conn, byte *buf)
 {
   struct bgp_proto *p = conn->bgp;
-  byte *cap;
-  int cap_len;
 
   BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
-	    BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
+	    BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
+
   buf[0] = BGP_VERSION;
-  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
+  put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
   put_u16(buf+3, p->cf->hold_time);
   put_u32(buf+5, p->local_id);
 
-  if (conn->start_state == BSS_CONNECT_NOCAP)
-    {
-      BGP_TRACE(D_PACKETS, "Skipping capabilities");
-      buf[9] = 0;
-      return buf + 10;
-    }
+  if (p->cf->capabilities)
+  {
+    /* Prepare local_caps and write capabilities to buffer */
+    byte *end = bgp_write_capabilities(conn, buf+12);
+    uint len = end - (buf+12);
+
+    buf[9] = len + 2;		/* Optional parameters length */
+    buf[10] = 2;		/* Option 2: Capability list */
+    buf[11] = len;		/* Option data length */
+
+    return end;
+  }
+  else
+  {
+    /* Prepare empty local_caps */
+    conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
+
+    buf[9] = 0;			/* No optional parameters */
+    return buf + 10;
+  }
+
+  return buf;
+}
+
+static void
+bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
+{
+  struct bgp_proto *p = conn->bgp;
+  struct bgp_conn *other;
+  u32 asn, hold, id;
 
-  /* Skipped 3 B for length field and Capabilities parameter header */
-  cap = buf + 12;
+  /* Check state */
+  if (conn->state != BS_OPENSENT)
+  { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
+
+  /* Check message contents */
+  if (len < 29 || len != 29 + (uint) pkt[28])
+  { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 
-#ifndef IPV6
-  if (p->cf->advertise_ipv4)
-    cap = bgp_put_cap_ipv4(p, cap);
-#endif
+  if (pkt[19] != BGP_VERSION)
+  { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
 
-#ifdef IPV6
-  cap = bgp_put_cap_ipv6(p, cap);
-#endif
+  asn = get_u16(pkt+20);
+  hold = get_u16(pkt+22);
+  id = get_u32(pkt+24);
+  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
 
-  if (p->cf->enable_refresh)
-    cap = bgp_put_cap_rr(p, cap);
+  if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
+    return;
 
-  if (p->cf->gr_mode == BGP_GR_ABLE)
-    cap = bgp_put_cap_gr1(p, cap);
-  else if (p->cf->gr_mode == BGP_GR_AWARE)
-    cap = bgp_put_cap_gr2(p, cap);
+  if (hold > 0 && hold < 3)
+  { bgp_error(conn, 2, 6, pkt+22, 2); return; }
 
-  if (p->cf->enable_as4)
-    cap = bgp_put_cap_as4(p, cap);
+  /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
+  if (!id || (p->is_internal && id == p->local_id))
+  { bgp_error(conn, 2, 3, pkt+24, -4); return; }
 
-  if (p->cf->add_path)
-    cap = bgp_put_cap_add_path(p, cap);
+  struct bgp_caps *caps = conn->remote_caps;
 
-  if (p->cf->enable_refresh)
-    cap = bgp_put_cap_err(p, cap);
+  if (caps->as4_support)
+  {
+    u32 as4 = caps->as4_number;
 
-  if (p->cf->enable_extended_messages)
-    cap = bgp_put_cap_ext_msg(p, cap);
+    if ((as4 != asn) && (asn != AS_TRANS))
+      log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
 
-  cap_len = cap - buf - 12;
-  if (cap_len > 0)
-    {
-      buf[9]  = cap_len + 2;	/* Optional params len */
-      buf[10] = 2;		/* Option: Capability list */
-      buf[11] = cap_len;	/* Option length */
-      return cap;
-    }
+    if (as4 != p->remote_as)
+    { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
+  }
   else
+  {
+    if (asn != p->remote_as)
+    { bgp_error(conn, 2, 2, pkt+20, 2); return; }
+  }
+
+  /* Check the other connection */
+  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
+  switch (other->state)
+  {
+  case BS_CONNECT:
+  case BS_ACTIVE:
+    /* Stop outgoing connection attempts */
+    bgp_conn_enter_idle_state(other);
+    break;
+
+  case BS_IDLE:
+  case BS_OPENSENT:
+  case BS_CLOSE:
+    break;
+
+  case BS_OPENCONFIRM:
+    /*
+     * Description of collision detection rules in RFC 4271 is confusing and
+     * contradictory, but it is essentially:
+     *
+     * 1. Router with higher ID is dominant
+     * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
+     * 3. When both connections are in OpenConfirm state, one initiated by
+     *    the dominant router is kept.
+     *
+     * The first line in the expression below evaluates whether the neighbor
+     * is dominant, the second line whether the new connection was initiated
+     * by the neighbor. If both are true (or both are false), we keep the new
+     * connection, otherwise we keep the old one.
+     */
+    if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
+	== (conn == &p->incoming_conn))
     {
-      buf[9] = 0;		/* No optional parameters */
-      return buf + 10;
+      /* Should close the other connection */
+      BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
+      bgp_error(other, 6, 7, NULL, 0);
+      break;
     }
+    /* Fall thru */
+  case BS_ESTABLISHED:
+    /* Should close this connection */
+    BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
+    bgp_error(conn, 6, 7, NULL, 0);
+    return;
+
+  default:
+    bug("bgp_rx_open: Unknown state");
+  }
+
+  /* Update our local variables */
+  conn->hold_time = MIN(hold, p->cf->hold_time);
+  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
+  conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
+  conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
+  p->remote_id = id;
+
+  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
+      conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
+
+  bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
+  bgp_start_timer(conn->hold_timer, conn->hold_time);
+  bgp_conn_enter_openconfirm_state(conn);
 }
 
-static uint
-bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
+
+/*
+ *	Next hop handling
+ */
+
+#define REPORT(msg, args...) \
+  ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
+
+#define DISCARD(msg, args...) \
+  ({ REPORT(msg, ## args); return; })
+
+#define WITHDRAW(msg, args...) \
+  ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+
+#define BAD_AFI		"Unexpected AF <%u/%u> in UPDATE"
+#define BAD_NEXT_HOP	"Invalid NEXT_HOP attribute"
+#define NO_NEXT_HOP	"Missing NEXT_HOP attribute"
+#define NO_LABEL_STACK	"Missing MPLS stack"
+
+
+static void
+bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
 {
-  byte *start = w;
-  ip_addr a;
-  int bytes;
+  struct bgp_proto *p = s->proto;
+  struct bgp_channel *c = s->channel;
 
-  while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
-    {
-      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
-      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
+  if (c->cf->gw_mode == GW_DIRECT)
+  {
+    neighbor *nbr = NULL;
 
-      if (p->add_path_tx)
-	{
-	  put_u32(w, px->path_id);
-	  w += 4;
-	  remains -= 4;
-	}
+    /* GW_DIRECT -> single_hop -> p->neigh != NULL */
+    if (ipa_nonzero(gw))
+      nbr = neigh_find2(&p->p, &gw, NULL, 0);
+    else if (ipa_nonzero(ll))
+      nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
 
-      *w++ = px->n.pxlen;
-      bytes = (px->n.pxlen + 7) / 8;
-      a = px->n.prefix;
-      ipa_hton(a);
-      memcpy(w, &a, bytes);
-      w += bytes;
-      remains -= bytes + 1;
-      rem_node(&px->bucket_node);
-      bgp_free_prefix(p, px);
-      // fib_delete(&p->prefix_fib, px);
-    }
-  return w - start;
+    if (!nbr || (nbr->scope == SCOPE_HOST))
+      WITHDRAW(BAD_NEXT_HOP);
+
+    a->dest = RTD_UNICAST;
+    a->nh.gw = nbr->addr;
+    a->nh.iface = nbr->iface;
+  }
+  else /* GW_RECURSIVE */
+  {
+    if (ipa_zero(gw))
+      WITHDRAW(BAD_NEXT_HOP);
+
+    rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
+    s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
+
+    if (!s->mpls)
+      rta_apply_hostentry(a, s->hostentry, NULL);
+
+    /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
+  }
 }
 
 static void
-bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
+bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
 {
-  while (!EMPTY_LIST(buck->prefixes))
-    {
-      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
-      log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
-      rem_node(&px->bucket_node);
-      bgp_free_prefix(p, px);
-      // fib_delete(&p->prefix_fib, px);
-    }
+  if (lnum > MPLS_MAX_LABEL_STACK)
+  {
+    REPORT("Too many MPLS labels ($u)", lnum);
+
+    a->dest = RTD_UNREACHABLE;
+    a->hostentry = NULL;
+    a->nh = (struct nexthop) { };
+    return;
+  }
+
+  /* Handle implicit NULL as empty MPLS stack */
+  if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
+    lnum = 0;
+
+  if (s->channel->cf->gw_mode == GW_DIRECT)
+  {
+    a->nh.labels = lnum;
+    memcpy(a->nh.label, labels, 4*lnum);
+  }
+  else /* GW_RECURSIVE */
+  {
+    mpls_label_stack ms;
+
+    ms.len = lnum;
+    memcpy(ms.stack, labels, 4*lnum);
+    rta_apply_hostentry(a, s->hostentry, &ms);
+  }
 }
 
-#ifndef IPV6		/* IPv4 version */
 
-static byte *
-bgp_create_update(struct bgp_conn *conn, byte *buf)
+static inline int
+bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
 {
-  struct bgp_proto *p = conn->bgp;
-  struct bgp_bucket *buck;
-  int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
-  byte *w;
-  int wd_size = 0;
-  int r_size = 0;
-  int a_size = 0;
-
-  w = buf+2;
-  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
-    {
-      DBG("Withdrawn routes:\n");
-      wd_size = bgp_encode_prefixes(p, w, buck, remains);
-      w += wd_size;
-      remains -= wd_size;
-    }
-  put_u16(buf, wd_size);
+  struct bgp_proto *p = s->proto;
+  ip_addr *nh = (void *) a->u.ptr->data;
 
-  if (!wd_size)
-    {
-      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
-	{
-	  if (EMPTY_LIST(buck->prefixes))
-	    {
-	      DBG("Deleting empty bucket %p\n", buck);
-	      rem_node(&buck->send_node);
-	      bgp_free_bucket(p, buck);
-	      continue;
-	    }
-
-	  DBG("Processing bucket %p\n", buck);
-	  a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
-
-	  if (a_size < 0)
-	    {
-	      log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
-	      bgp_flush_prefixes(p, buck);
-	      rem_node(&buck->send_node);
-	      bgp_free_bucket(p, buck);
-	      continue;
-	    }
-
-	  put_u16(w, a_size);
-	  w += a_size + 2;
-	  r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
-	  w += r_size;
-	  break;
-	}
-    }
-  if (!a_size)				/* Attributes not already encoded */
+  if (s->channel->cf->next_hop_self)
+    return 0;
+
+  if (s->channel->cf->next_hop_keep)
+    return 1;
+
+  /* Keep it when explicitly set in export filter */
+  if (a->type & EAF_FRESH)
+    return 1;
+
+  /* Keep it when exported to internal peers */
+  if (p->is_interior && ipa_nonzero(*nh))
+    return 1;
+
+  /* Keep it when forwarded between single-hop BGPs on the same iface */
+  struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
+  return p->neigh && (p->neigh->iface == ifa);
+}
+
+static inline int
+bgp_use_gateway(struct bgp_export_state *s)
+{
+  struct bgp_proto *p = s->proto;
+  rta *ra = s->route->attrs;
+
+  if (s->channel->cf->next_hop_self)
+    return 0;
+
+  /* We need one valid global gateway */
+  if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
+    return 0;
+
+  /* Use it when exported to internal peers */
+  if (p->is_interior)
+    return 1;
+
+  /* Use it when forwarded to single-hop BGP peer on on the same iface */
+  return p->neigh && (p->neigh->iface == ra->nh.iface);
+}
+
+static void
+bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+  if (!a || !bgp_use_next_hop(s, a))
+  {
+    if (bgp_use_gateway(s))
     {
-      put_u16(w, 0);
-      w += 2;
+      rta *ra = s->route->attrs;
+      ip_addr nh[1] = { ra->nh.gw };
+      bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
+
+      if (s->mpls)
+      {
+	u32 implicit_null = BGP_MPLS_NULL;
+	u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
+	uint lnum = ra->nh.labels ? ra->nh.labels : 1;
+	bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
+      }
     }
-  if (wd_size || r_size)
+    else
     {
-      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
-      return w;
+      ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
+      bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
+
+      /* TODO: Use local MPLS assigned label */
+      if (s->mpls)
+	bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
     }
+  }
+
+  /* Check if next hop is valid */
+  a = bgp_find_attr(*to, BA_NEXT_HOP);
+  if (!a)
+    WITHDRAW(NO_NEXT_HOP);
+
+  ip_addr *nh = (void *) a->u.ptr->data;
+  ip_addr peer = s->proto->cf->remote_ip;
+  uint len = a->u.ptr->length;
+
+  /* Forbid zero next hop */
+  if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
+    WITHDRAW(BAD_NEXT_HOP);
+
+  /* Forbid next hop equal to neighbor IP */
+  if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
+    WITHDRAW(BAD_NEXT_HOP);
+
+  /* Forbid next hop with non-matching AF */
+  if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
+      !s->channel->ext_next_hop)
+    WITHDRAW(BAD_NEXT_HOP);
+
+  /* Just check if MPLS stack */
+  if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
+    WITHDRAW(NO_LABEL_STACK);
+}
+
+static uint
+bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
+{
+  /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
+  ip_addr *nh = (void *) a->u.ptr->data;
+  uint len = a->u.ptr->length;
+
+  ASSERT((len == 16) || (len == 32));
+
+  /*
+   * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
+   * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
+   * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
+   * IPv6 address with IPv6 NLRI.
+   */
+
+  if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
+  {
+    put_ip4(buf, ipa_to_ip4(nh[0]));
+    return 4;
+  }
+
+  put_ip6(buf, ipa_to_ip6(nh[0]));
+
+  if (len == 32)
+    put_ip6(buf+16, ipa_to_ip6(nh[1]));
+
+  return len;
+}
+
+static void
+bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+{
+  struct bgp_channel *c = s->channel;
+  struct adata *ad = lp_alloc_adata(s->pool, 32);
+  ip_addr *nh = (void *) ad->data;
+
+  if (len == 4)
+  {
+    nh[0] = ipa_from_ip4(get_ip4(data));
+    nh[1] = IPA_NONE;
+  }
+  else if (len == 16)
+  {
+    nh[0] = ipa_from_ip6(get_ip6(data));
+    nh[1] = IPA_NONE;
+
+    if (ipa_is_link_local(nh[0]))
+    { nh[1] = nh[0]; nh[0] = IPA_NONE; }
+  }
+  else if (len == 32)
+  {
+    nh[0] = ipa_from_ip6(get_ip6(data));
+    nh[1] = ipa_from_ip6(get_ip6(data+16));
+
+    if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
+      nh[1] = IPA_NONE;
+  }
   else
-    return NULL;
+    bgp_parse_error(s, 9);
+
+  if (ipa_zero(nh[1]))
+    ad->length = 16;
+
+  if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
+    WITHDRAW(BAD_NEXT_HOP);
+
+  // XXXX validate next hop
+
+  bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+  bgp_apply_next_hop(s, a, nh[0], nh[1]);
 }
 
-static byte *
-bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+static uint
+bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
 {
-  struct bgp_proto *p = conn->bgp;
-  BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+  ip_addr *nh = (void *) a->u.ptr->data;
+  uint len = a->u.ptr->length;
 
-  put_u32(buf, 0);
-  return buf+4;
+  ASSERT((len == 16) || (len == 32));
+
+  /*
+   * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
+   * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
+   * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
+   * IPv6 address with VPNv6 NLRI.
+   */
+
+  if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
+  {
+    put_u64(buf, 0); /* VPN RD is 0 */
+    put_ip4(buf+8, ipa_to_ip4(nh[0]));
+    return 12;
+  }
+
+  put_u64(buf, 0); /* VPN RD is 0 */
+  put_ip6(buf+8, ipa_to_ip6(nh[0]));
+
+  if (len == 16)
+    return 24;
+
+  put_u64(buf+24, 0); /* VPN RD is 0 */
+  put_ip6(buf+32, ipa_to_ip6(nh[1]));
+
+  return 48;
 }
 
-#else		/* IPv6 version */
+static void
+bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+{
+  struct bgp_channel *c = s->channel;
+  struct adata *ad = lp_alloc_adata(s->pool, 32);
+  ip_addr *nh = (void *) ad->data;
 
-static inline int
-same_iface(struct bgp_proto *p, ip_addr *ip)
+  if (len == 12)
+  {
+    nh[0] = ipa_from_ip4(get_ip4(data+8));
+    nh[1] = IPA_NONE;
+  }
+  else if (len == 24)
+  {
+    nh[0] = ipa_from_ip6(get_ip6(data+8));
+    nh[1] = IPA_NONE;
+
+    if (ipa_is_link_local(nh[0]))
+    { nh[1] = nh[0]; nh[0] = IPA_NONE; }
+  }
+  else if (len == 48)
+  {
+    nh[0] = ipa_from_ip6(get_ip6(data+8));
+    nh[1] = ipa_from_ip6(get_ip6(data+32));
+
+    if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
+      nh[1] = IPA_NONE;
+  }
+  else
+    bgp_parse_error(s, 9);
+
+  if (ipa_zero(nh[1]))
+    ad->length = 16;
+
+  /* XXXX which error */
+  if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
+    bgp_parse_error(s, 9);
+
+  if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
+    WITHDRAW(BAD_NEXT_HOP);
+
+  // XXXX validate next hop
+
+  bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+  bgp_apply_next_hop(s, a, nh[0], nh[1]);
+}
+
+
+
+static uint
+bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
 {
-  neighbor *n = neigh_find(&p->p, ip, 0);
-  return n && p->neigh && n->iface == p->neigh->iface;
+  return 0;
 }
 
-static byte *
-bgp_create_update(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
 {
-  struct bgp_proto *p = conn->bgp;
-  struct bgp_bucket *buck;
-  int size, second, rem_stored;
-  int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
-  byte *w, *w_stored, *tmp, *tstart;
-  ip_addr *ipp, ip, ip_ll;
-  ea_list *ea;
-  eattr *nh;
+  /*
+   * Although we expect no next hop and RFC 7606 7.11 states that attribute
+   * MP_REACH_NLRI with unexpected next hop length is considered malformed,
+   * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
+   */
+
+  return;
+}
 
-  put_u16(buf, 0);
-  w = buf+4;
+static void
+bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+  /* NEXT_HOP shall not pass */
+  if (a)
+    bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+}
+
+
+/*
+ *	UPDATE
+ */
+
+static void
+bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
+{
+  if (path_id != s->last_id)
+  {
+    s->last_src = rt_get_source(&s->proto->p, path_id);
+    s->last_id = path_id;
 
-  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+    rta_free(s->cached_rta);
+    s->cached_rta = NULL;
+  }
+
+  if (!a0)
+  {
+    /* Route withdraw */
+    rte_update2(&s->channel->c, n, NULL, s->last_src);
+    return;
+  }
+
+  /* Prepare cached route attributes */
+  if (s->cached_rta == NULL)
+  {
+    a0->src = s->last_src;
+
+    /* Workaround for rta_lookup() breaking eattrs */
+    ea_list *ea = a0->eattrs;
+    s->cached_rta = rta_lookup(a0);
+    a0->eattrs = ea;
+  }
+
+  rta *a = rta_clone(s->cached_rta);
+  rte *e = rte_get_temp(a);
+
+  e->pflags = 0;
+  e->u.bgp.suppressed = 0;
+  rte_update2(&s->channel->c, n, e, s->last_src);
+}
+
+static void
+bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
+{
+  u32 dummy = 0;
+  u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
+  uint lnum = mpls ? (mpls->length / 4) : 1;
+
+  for (uint i = 0; i < lnum; i++)
+  {
+    put_u24(*pos, labels[i] << 4);
+    ADVANCE(*pos, *size, 3);
+  }
+
+  /* Add bottom-of-stack flag */
+  (*pos)[-1] |= BGP_MPLS_BOS;
+
+  *pxlen += 24 * lnum;
+}
+
+static void
+bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
+{
+  u32 labels[BGP_MPLS_MAX], label;
+  uint lnum = 0;
+
+  do {
+    if (*pxlen < 24)
+      bgp_parse_error(s, 1);
+
+    label = get_u24(*pos);
+    labels[lnum++] = label >> 4;
+    ADVANCE(*pos, *len, 3);
+    *pxlen -= 24;
+
+    /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */
+    if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC))
+      break;
+  }
+  while (!(label & BGP_MPLS_BOS));
+
+  if (!a)
+    return;
+
+  /* Attach MPLS attribute unless we already have one */
+  if (!s->mpls_labels)
+  {
+    s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
+    bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
+  }
+
+  /* Overwrite data in the attribute */
+  s->mpls_labels->length = 4*lnum;
+  memcpy(s->mpls_labels->data, labels, 4*lnum);
+
+  /* Update next hop entry in rta */
+  bgp_apply_mpls_labels(s, a, labels, lnum);
+
+  /* Attributes were changed, invalidate cached entry */
+  rta_free(s->cached_rta);
+  s->cached_rta = NULL;
+
+  return;
+}
+
+static uint
+bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+  byte *pos = buf;
+
+  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+  {
+    struct bgp_prefix *px = HEAD(buck->prefixes);
+    struct net_addr_ip4 *net = (void *) px->net;
+
+    /* Encode path ID */
+    if (s->add_path)
     {
-      DBG("Withdrawn routes:\n");
-      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
-      *tmp++ = 0;
-      *tmp++ = BGP_AF_IPV6;
-      *tmp++ = 1;
-      ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
-      size = bgp_encode_attrs(p, w, ea, remains);
-      ASSERT(size >= 0);
-      w += size;
-      remains -= size;
+      put_u32(pos, px->path_id);
+      ADVANCE(pos, size, 4);
     }
-  else
+
+    /* Encode prefix length */
+    *pos = net->pxlen;
+    ADVANCE(pos, size, 1);
+
+    /* Encode MPLS labels */
+    if (s->mpls)
+      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+    /* Encode prefix body */
+    ip4_addr a = ip4_hton(net->prefix);
+    uint b = (net->pxlen + 7) / 8;
+    memcpy(pos, &a, b);
+    ADVANCE(pos, size, b);
+
+    bgp_free_prefix(s->channel, px);
+  }
+
+  return pos - buf;
+}
+
+static void
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+  while (len)
+  {
+    net_addr_ip4 net;
+    u32 path_id = 0;
+
+    /* Decode path ID */
+    if (s->add_path)
     {
-      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
-	{
-	  if (EMPTY_LIST(buck->prefixes))
-	    {
-	      DBG("Deleting empty bucket %p\n", buck);
-	      rem_node(&buck->send_node);
-	      bgp_free_bucket(p, buck);
-	      continue;
-	    }
-
-	  DBG("Processing bucket %p\n", buck);
-	  rem_stored = remains;
-	  w_stored = w;
-
-	  size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
-	  if (size < 0)
-	    {
-	      log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
-	      bgp_flush_prefixes(p, buck);
-	      rem_node(&buck->send_node);
-	      bgp_free_bucket(p, buck);
-	      continue;
-	    }
-	  w += size;
-	  remains -= size;
-
-	  /* We have two addresses here in NEXT_HOP eattr. Really.
-	     Unless NEXT_HOP was modified by filter */
-	  nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
-	  ASSERT(nh);
-	  second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
-	  ipp = (ip_addr *) nh->u.ptr->data;
-	  ip = ipp[0];
-	  ip_ll = IPA_NONE;
-
-	  if (ipa_equal(ip, p->source_addr))
-	    ip_ll = p->local_link;
-	  else
-	    {
-	      /* If we send a route with 'third party' next hop destinated 
-	       * in the same interface, we should also send a link local 
-	       * next hop address. We use the received one (stored in the 
-	       * other part of BA_NEXT_HOP eattr). If we didn't received
-	       * it (for example it is a static route), we can't use
-	       * 'third party' next hop and we have to use local IP address
-	       * as next hop. Sending original next hop address without
-	       * link local address seems to be a natural way to solve that
-	       * problem, but it is contrary to RFC 2545 and Quagga does not
-	       * accept such routes.
-	       *
-	       * There are two cases, either we have global IP, or
-	       * IPA_NONE if the neighbor is link-local. For IPA_NONE,
-	       * we suppose it is on the same iface, see bgp_update_attrs().
-	       */
-
-	      if (ipa_zero(ip) || same_iface(p, &ip))
-		{
-		  if (second && ipa_nonzero(ipp[1]))
-		    ip_ll = ipp[1];
-		  else
-		    {
-		      switch (p->cf->missing_lladdr)
-			{
-			case MLL_SELF:
-			  ip = p->source_addr;
-			  ip_ll = p->local_link;
-			  break;
-			case MLL_DROP:
-			  log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
-			  w = w_stored;
-			  remains = rem_stored;
-			  bgp_flush_prefixes(p, buck);
-			  rem_node(&buck->send_node);
-			  bgp_free_bucket(p, buck);
-			  continue;
-			case MLL_IGNORE:
-			  break;
-			}
-		    }
-		}
-	    }
-
-	  tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
-	  *tmp++ = 0;
-	  *tmp++ = BGP_AF_IPV6;
-	  *tmp++ = 1;
-
-	  if (ipa_is_link_local(ip))
-	    ip = IPA_NONE;
-
-	  if (ipa_nonzero(ip_ll))
-	    {
-	      *tmp++ = 32;
-	      ipa_hton(ip);
-	      memcpy(tmp, &ip, 16);
-	      ipa_hton(ip_ll);
-	      memcpy(tmp+16, &ip_ll, 16);
-	      tmp += 32;
-	    }
-	  else
-	    {
-	      *tmp++ = 16;
-	      ipa_hton(ip);
-	      memcpy(tmp, &ip, 16);
-	      tmp += 16;
-	    }
-
-	  *tmp++ = 0;			/* No SNPA information */
-	  tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
-	  ea->attrs[0].u.ptr->length = tmp - tstart;
-	  size = bgp_encode_attrs(p, w, ea, remains);
-	  ASSERT(size >= 0);
-	  w += size;
-	  break;
-	}
+      if (len < 5)
+	bgp_parse_error(s, 1);
+
+      path_id = get_u32(pos);
+      ADVANCE(pos, len, 4);
     }
 
-  size = w - (buf+4);
-  put_u16(buf+2, size);
-  lp_flush(bgp_linpool);
-  if (size)
+    /* Decode prefix length */
+    uint l = *pos;
+    ADVANCE(pos, len, 1);
+
+    if (len < ((l + 7) / 8))
+      bgp_parse_error(s, 1);
+
+    /* Decode MPLS labels */
+    if (s->mpls)
+      bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+    if (l > IP4_MAX_PREFIX_LENGTH)
+      bgp_parse_error(s, 10);
+
+    /* Decode prefix body */
+    ip4_addr addr = IP4_NONE;
+    uint b = (l + 7) / 8;
+    memcpy(&addr, pos, b);
+    ADVANCE(pos, len, b);
+
+    net = NET_ADDR_IP4(ip4_ntoh(addr), l);
+    net_normalize_ip4(&net);
+
+    // XXXX validate prefix
+
+    bgp_rte_update(s, (net_addr *) &net, path_id, a);
+  }
+}
+
+
+static uint
+bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+  byte *pos = buf;
+
+  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+  {
+    struct bgp_prefix *px = HEAD(buck->prefixes);
+    struct net_addr_ip6 *net = (void *) px->net;
+
+    /* Encode path ID */
+    if (s->add_path)
     {
-      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
-      return w;
+      put_u32(pos, px->path_id);
+      ADVANCE(pos, size, 4);
     }
-  else
-    return NULL;
+
+    /* Encode prefix length */
+    *pos = net->pxlen;
+    ADVANCE(pos, size, 1);
+
+    /* Encode MPLS labels */
+    if (s->mpls)
+      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+    /* Encode prefix body */
+    ip6_addr a = ip6_hton(net->prefix);
+    uint b = (net->pxlen + 7) / 8;
+    memcpy(pos, &a, b);
+    ADVANCE(pos, size, b);
+
+    bgp_free_prefix(s->channel, px);
+  }
+
+  return pos - buf;
 }
 
-static byte *
-bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 {
-  struct bgp_proto *p = conn->bgp;
-  BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+  while (len)
+  {
+    net_addr_ip6 net;
+    u32 path_id = 0;
 
-  put_u16(buf+0, 0);
-  put_u16(buf+2, 6);	/* length 4-9 */
-  buf += 4;
+    /* Decode path ID */
+    if (s->add_path)
+    {
+      if (len < 5)
+	bgp_parse_error(s, 1);
 
-  /* Empty MP_UNREACH_NLRI atribute */
-  *buf++ = BAF_OPTIONAL;
-  *buf++ = BA_MP_UNREACH_NLRI;
-  *buf++ = 3;		/* Length 7-9 */
-  *buf++ = 0;		/* AFI */
-  *buf++ = BGP_AF_IPV6;
-  *buf++ = 1;		/* SAFI */
-  return buf;
-}
+      path_id = get_u32(pos);
+      ADVANCE(pos, len, 4);
+    }
 
-#endif
+    /* Decode prefix length */
+    uint l = *pos;
+    ADVANCE(pos, len, 1);
 
-static inline byte *
-bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
-{
-  struct bgp_proto *p = conn->bgp;
-  BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
+    if (len < ((l + 7) / 8))
+      bgp_parse_error(s, 1);
 
-  /* Original original route refresh request, RFC 2918 */
-  *buf++ = 0;
-  *buf++ = BGP_AF;
-  *buf++ = BGP_RR_REQUEST;
-  *buf++ = 1;		/* SAFI */
-  return buf;
+    /* Decode MPLS labels */
+    if (s->mpls)
+      bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+    if (l > IP6_MAX_PREFIX_LENGTH)
+      bgp_parse_error(s, 10);
+
+    /* Decode prefix body */
+    ip6_addr addr = IP6_NONE;
+    uint b = (l + 7) / 8;
+    memcpy(&addr, pos, b);
+    ADVANCE(pos, len, b);
+
+    net = NET_ADDR_IP6(ip6_ntoh(addr), l);
+    net_normalize_ip6(&net);
+
+    // XXXX validate prefix
+
+    bgp_rte_update(s, (net_addr *) &net, path_id, a);
+  }
 }
 
-static inline byte *
-bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
+static uint
+bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 {
-  struct bgp_proto *p = conn->bgp;
-  BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
+  byte *pos = buf;
 
-  /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
-  *buf++ = 0;
-  *buf++ = BGP_AF;
-  *buf++ = BGP_RR_BEGIN;
-  *buf++ = 1;		/* SAFI */
-  return buf;
+  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+  {
+    struct bgp_prefix *px = HEAD(buck->prefixes);
+    struct net_addr_vpn4 *net = (void *) px->net;
+
+    /* Encode path ID */
+    if (s->add_path)
+    {
+      put_u32(pos, px->path_id);
+      ADVANCE(pos, size, 4);
+    }
+
+    /* Encode prefix length */
+    *pos = 64 + net->pxlen;
+    ADVANCE(pos, size, 1);
+
+    /* Encode MPLS labels */
+    if (s->mpls)
+      bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+    /* Encode route distinguisher */
+    put_u64(pos, net->rd);
+    ADVANCE(pos, size, 8);
+
+    /* Encode prefix body */
+    ip4_addr a = ip4_hton(net->prefix);
+    uint b = (net->pxlen + 7) / 8;
+    memcpy(pos, &a, b);
+    ADVANCE(pos, size, b);
+
+    bgp_free_prefix(s->channel, px);
+  }
+
+  return pos - buf;
 }
 
-static inline byte *
-bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 {
-  struct bgp_proto *p = conn->bgp;
-  BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
+  while (len)
+  {
+    net_addr_vpn4 net;
+    u32 path_id = 0;
 
-  /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
-  *buf++ = 0;
-  *buf++ = BGP_AF;
-  *buf++ = BGP_RR_END;
-  *buf++ = 1;		/* SAFI */
-  return buf;
+    /* Decode path ID */
+    if (s->add_path)
+    {
+      if (len < 5)
+	bgp_parse_error(s, 1);
+
+      path_id = get_u32(pos);
+      ADVANCE(pos, len, 4);
+    }
+
+    /* Decode prefix length */
+    uint l = *pos;
+    ADVANCE(pos, len, 1);
+
+    if (len < ((l + 7) / 8))
+      bgp_parse_error(s, 1);
+
+    /* Decode MPLS labels */
+    if (s->mpls)
+      bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+    /* Decode route distinguisher */
+    if (l < 64)
+      bgp_parse_error(s, 1);
+
+    u64 rd = get_u64(pos);
+    ADVANCE(pos, len, 8);
+    l -= 64;
+
+    if (l > IP4_MAX_PREFIX_LENGTH)
+      bgp_parse_error(s, 10);
+
+    /* Decode prefix body */
+    ip4_addr addr = IP4_NONE;
+    uint b = (l + 7) / 8;
+    memcpy(&addr, pos, b);
+    ADVANCE(pos, len, b);
+
+    net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
+    net_normalize_vpn4(&net);
+
+    // XXXX validate prefix
+
+    bgp_rte_update(s, (net_addr *) &net, path_id, a);
+  }
 }
 
 
+static uint
+bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+  byte *pos = buf;
+
+  while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+  {
+    struct bgp_prefix *px = HEAD(buck->prefixes);
+    struct net_addr_vpn6 *net = (void *) px->net;
+
+    /* Encode path ID */
+    if (s->add_path)
+    {
+      put_u32(pos, px->path_id);
+      ADVANCE(pos, size, 4);
+    }
+
+    /* Encode prefix length */
+    *pos = 64 + net->pxlen;
+    ADVANCE(pos, size, 1);
+
+    /* Encode MPLS labels */
+    bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+    /* Encode route distinguisher */
+    put_u64(pos, net->rd);
+    ADVANCE(pos, size, 8);
+
+    /* Encode prefix body */
+    ip6_addr a = ip6_hton(net->prefix);
+    uint b = (net->pxlen + 7) / 8;
+    memcpy(pos, &a, b);
+    ADVANCE(pos, size, b);
+
+    bgp_free_prefix(s->channel, px);
+  }
+
+  return pos - buf;
+}
+
 static void
-bgp_create_header(byte *buf, uint len, uint type)
+bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 {
-  memset(buf, 0xff, 16);		/* Marker */
-  put_u16(buf+16, len);
-  buf[18] = type;
+  while (len)
+  {
+    net_addr_vpn6 net;
+    u32 path_id = 0;
+
+    /* Decode path ID */
+    if (s->add_path)
+    {
+      if (len < 5)
+	bgp_parse_error(s, 1);
+
+      path_id = get_u32(pos);
+      ADVANCE(pos, len, 4);
+    }
+
+    /* Decode prefix length */
+    uint l = *pos;
+    ADVANCE(pos, len, 1);
+
+    if (len < ((l + 7) / 8))
+      bgp_parse_error(s, 1);
+
+    /* Decode MPLS labels */
+    if (s->mpls)
+      bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+    /* Decode route distinguisher */
+    if (l < 64)
+      bgp_parse_error(s, 1);
+
+    u64 rd = get_u64(pos);
+    ADVANCE(pos, len, 8);
+    l -= 64;
+
+    if (l > IP6_MAX_PREFIX_LENGTH)
+      bgp_parse_error(s, 10);
+
+    /* Decode prefix body */
+    ip6_addr addr = IP6_NONE;
+    uint b = (l + 7) / 8;
+    memcpy(&addr, pos, b);
+    ADVANCE(pos, len, b);
+
+    net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
+    net_normalize_vpn6(&net);
+
+    // XXXX validate prefix
+
+    bgp_rte_update(s, (net_addr *) &net, path_id, a);
+  }
 }
 
-/**
- * bgp_fire_tx - transmit packets
- * @conn: connection
- *
- * Whenever the transmit buffers of the underlying TCP connection
- * are free and we have any packets queued for sending, the socket functions
- * call bgp_fire_tx() which takes care of selecting the highest priority packet
- * queued (Notification > Keepalive > Open > Update), assembling its header
- * and body and sending it to the connection.
- */
-static int
-bgp_fire_tx(struct bgp_conn *conn)
+
+static uint
+bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 {
-  struct bgp_proto *p = conn->bgp;
-  uint s = conn->packets_to_send;
-  sock *sk = conn->sk;
-  byte *buf, *pkt, *end;
-  int type;
+  byte *pos = buf;
+
+  while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+  {
+    struct bgp_prefix *px = HEAD(buck->prefixes);
+    struct net_addr_flow4 *net = (void *) px->net;
+    uint flen = net->length - sizeof(net_addr_flow4);
 
-  if (!sk)
+    /* Encode path ID */
+    if (s->add_path)
     {
-      conn->packets_to_send = 0;
-      return 0;
+      put_u32(pos, px->path_id);
+      ADVANCE(pos, size, 4);
     }
-  buf = sk->tbuf;
-  pkt = buf + BGP_HEADER_LENGTH;
 
-  if (s & (1 << PKT_SCHEDULE_CLOSE))
+    if (flen > size)
+      break;
+
+    /* Copy whole flow data including length */
+    memcpy(pos, net->data, flen);
+    ADVANCE(pos, size, flen);
+
+    bgp_free_prefix(s->channel, px);
+  }
+
+  return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+  while (len)
+  {
+    u32 path_id = 0;
+
+    /* Decode path ID */
+    if (s->add_path)
     {
-      /* We can finally close connection and enter idle state */
-      bgp_conn_enter_idle_state(conn);
-      return 0;
+      if (len < 4)
+	bgp_parse_error(s, 1);
+
+      path_id = get_u32(pos);
+      ADVANCE(pos, len, 4);
     }
-  if (s & (1 << PKT_NOTIFICATION))
+
+    if (len < 2)
+      bgp_parse_error(s, 1);
+
+    /* Decode flow length */
+    uint hlen = flow_hdr_length(pos);
+    uint dlen = flow_read_length(pos);
+    uint flen = hlen + dlen;
+    byte *data = pos + hlen;
+
+    if (len < flen)
+      bgp_parse_error(s, 1);
+
+    /* Validate flow data */
+    enum flow_validated_state r = flow4_validate(data, dlen);
+    if (r != FLOW_ST_VALID)
     {
-      s = 1 << PKT_SCHEDULE_CLOSE;
-      type = PKT_NOTIFICATION;
-      end = bgp_create_notification(conn, pkt);
+      log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+      bgp_parse_error(s, 1);
     }
-  else if (s & (1 << PKT_KEEPALIVE))
+
+    if (data[0] != FLOW_TYPE_DST_PREFIX)
     {
-      s &= ~(1 << PKT_KEEPALIVE);
-      type = PKT_KEEPALIVE;
-      end = pkt;			/* Keepalives carry no data */
-      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
-      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+      log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+      bgp_parse_error(s, 1);
     }
-  else if (s & (1 << PKT_OPEN))
+
+    /* Decode dst prefix */
+    ip4_addr px = IP4_NONE;
+    uint pxlen = data[1];
+
+    // FIXME: Use some generic function
+    memcpy(&px, data, BYTES(pxlen));
+    px = ip4_and(px, ip4_mkmask(pxlen));
+
+    /* Prepare the flow */
+    net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
+    net_fill_flow4(n, px, pxlen, pos, flen);
+    ADVANCE(pos, len, flen);
+
+    bgp_rte_update(s, n, path_id, a);
+  }
+}
+
+
+static uint
+bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+  byte *pos = buf;
+
+  while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+  {
+    struct bgp_prefix *px = HEAD(buck->prefixes);
+    struct net_addr_flow6 *net = (void *) px->net;
+    uint flen = net->length - sizeof(net_addr_flow6);
+
+    /* Encode path ID */
+    if (s->add_path)
     {
-      s &= ~(1 << PKT_OPEN);
-      type = PKT_OPEN;
-      end = bgp_create_open(conn, pkt);
+      put_u32(pos, px->path_id);
+      ADVANCE(pos, size, 4);
     }
-  else if (s & (1 << PKT_ROUTE_REFRESH))
+
+    if (flen > size)
+      break;
+
+    /* Copy whole flow data including length */
+    memcpy(pos, net->data, flen);
+    ADVANCE(pos, size, flen);
+
+    bgp_free_prefix(s->channel, px);
+  }
+
+  return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+  while (len)
+  {
+    u32 path_id = 0;
+
+    /* Decode path ID */
+    if (s->add_path)
     {
-      s &= ~(1 << PKT_ROUTE_REFRESH);
-      type = PKT_ROUTE_REFRESH;
-      end = bgp_create_route_refresh(conn, pkt);
+      if (len < 4)
+	bgp_parse_error(s, 1);
+
+      path_id = get_u32(pos);
+      ADVANCE(pos, len, 4);
     }
-  else if (s & (1 << PKT_BEGIN_REFRESH))
+
+    if (len < 2)
+      bgp_parse_error(s, 1);
+
+    /* Decode flow length */
+    uint hlen = flow_hdr_length(pos);
+    uint dlen = flow_read_length(pos);
+    uint flen = hlen + dlen;
+    byte *data = pos + hlen;
+
+    if (len < flen)
+      bgp_parse_error(s, 1);
+
+    /* Validate flow data */
+    enum flow_validated_state r = flow6_validate(data, dlen);
+    if (r != FLOW_ST_VALID)
     {
-      s &= ~(1 << PKT_BEGIN_REFRESH);
-      type = PKT_ROUTE_REFRESH;	/* BoRR is a subtype of RR */
-      end = bgp_create_begin_refresh(conn, pkt);
+      log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+      bgp_parse_error(s, 1);
     }
-  else if (s & (1 << PKT_UPDATE))
+
+    if (data[0] != FLOW_TYPE_DST_PREFIX)
     {
-      type = PKT_UPDATE;
-      end = bgp_create_update(conn, pkt);
+      log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+      bgp_parse_error(s, 1);
+    }
 
-      if (!end)
-        {
-	  /* No update to send, perhaps we need to send End-of-RIB or EoRR */
+    /* Decode dst prefix */
+    ip6_addr px = IP6_NONE;
+    uint pxlen = data[1];
 
-	  conn->packets_to_send = 0;
+    // FIXME: Use some generic function
+    memcpy(&px, data, BYTES(pxlen));
+    px = ip6_and(px, ip6_mkmask(pxlen));
 
-	  if (p->feed_state == BFS_LOADED)
-	  {
-	    type = PKT_UPDATE;
-	    end = bgp_create_end_mark(conn, pkt);
-	  }
+    /* Prepare the flow */
+    net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
+    net_fill_flow6(n, px, pxlen, pos, flen);
+    ADVANCE(pos, len, flen);
 
-	  else if (p->feed_state == BFS_REFRESHED)
-	  {
-	    type = PKT_ROUTE_REFRESH;
-	    end = bgp_create_end_refresh(conn, pkt);
-	  }
+    bgp_rte_update(s, n, path_id, a);
+  }
+}
 
-	  else /* Really nothing to send */
-	    return 0;
 
-	  p->feed_state = BFS_NONE;
-	}
-    }
-  else
-    return 0;
+static const struct bgp_af_desc bgp_af_table[] = {
+  {
+    .afi = BGP_AF_IPV4,
+    .net = NET_IP4,
+    .name = "ipv4",
+    .encode_nlri = bgp_encode_nlri_ip4,
+    .decode_nlri = bgp_decode_nlri_ip4,
+    .encode_next_hop = bgp_encode_next_hop_ip,
+    .decode_next_hop = bgp_decode_next_hop_ip,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_IPV4_MC,
+    .net = NET_IP4,
+    .name = "ipv4-mc",
+    .encode_nlri = bgp_encode_nlri_ip4,
+    .decode_nlri = bgp_decode_nlri_ip4,
+    .encode_next_hop = bgp_encode_next_hop_ip,
+    .decode_next_hop = bgp_decode_next_hop_ip,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_IPV4_MPLS,
+    .net = NET_IP4,
+    .mpls = 1,
+    .name = "ipv4-mpls",
+    .encode_nlri = bgp_encode_nlri_ip4,
+    .decode_nlri = bgp_decode_nlri_ip4,
+    .encode_next_hop = bgp_encode_next_hop_ip,
+    .decode_next_hop = bgp_decode_next_hop_ip,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_IPV6,
+    .net = NET_IP6,
+    .name = "ipv6",
+    .encode_nlri = bgp_encode_nlri_ip6,
+    .decode_nlri = bgp_decode_nlri_ip6,
+    .encode_next_hop = bgp_encode_next_hop_ip,
+    .decode_next_hop = bgp_decode_next_hop_ip,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_IPV6_MC,
+    .net = NET_IP6,
+    .name = "ipv6-mc",
+    .encode_nlri = bgp_encode_nlri_ip6,
+    .decode_nlri = bgp_decode_nlri_ip6,
+    .encode_next_hop = bgp_encode_next_hop_ip,
+    .decode_next_hop = bgp_decode_next_hop_ip,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_IPV6_MPLS,
+    .net = NET_IP6,
+    .mpls = 1,
+    .name = "ipv6-mpls",
+    .encode_nlri = bgp_encode_nlri_ip6,
+    .decode_nlri = bgp_decode_nlri_ip6,
+    .encode_next_hop = bgp_encode_next_hop_ip,
+    .decode_next_hop = bgp_decode_next_hop_ip,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_VPN4_MPLS,
+    .net = NET_VPN4,
+    .mpls = 1,
+    .name = "vpn4-mpls",
+    .encode_nlri = bgp_encode_nlri_vpn4,
+    .decode_nlri = bgp_decode_nlri_vpn4,
+    .encode_next_hop = bgp_encode_next_hop_vpn,
+    .decode_next_hop = bgp_decode_next_hop_vpn,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_VPN6_MPLS,
+    .net = NET_VPN6,
+    .mpls = 1,
+    .name = "vpn6-mpls",
+    .encode_nlri = bgp_encode_nlri_vpn6,
+    .decode_nlri = bgp_decode_nlri_vpn6,
+    .encode_next_hop = bgp_encode_next_hop_vpn,
+    .decode_next_hop = bgp_decode_next_hop_vpn,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_VPN4_MC,
+    .net = NET_VPN4,
+    .name = "vpn4-mc",
+    .encode_nlri = bgp_encode_nlri_vpn4,
+    .decode_nlri = bgp_decode_nlri_vpn4,
+    .encode_next_hop = bgp_encode_next_hop_vpn,
+    .decode_next_hop = bgp_decode_next_hop_vpn,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_VPN6_MC,
+    .net = NET_VPN6,
+    .name = "vpn6-mc",
+    .encode_nlri = bgp_encode_nlri_vpn6,
+    .decode_nlri = bgp_decode_nlri_vpn6,
+    .encode_next_hop = bgp_encode_next_hop_vpn,
+    .decode_next_hop = bgp_decode_next_hop_vpn,
+    .update_next_hop = bgp_update_next_hop_ip,
+  },
+  {
+    .afi = BGP_AF_FLOW4,
+    .net = NET_FLOW4,
+    .no_igp = 1,
+    .name = "flow4",
+    .encode_nlri = bgp_encode_nlri_flow4,
+    .decode_nlri = bgp_decode_nlri_flow4,
+    .encode_next_hop = bgp_encode_next_hop_none,
+    .decode_next_hop = bgp_decode_next_hop_none,
+    .update_next_hop = bgp_update_next_hop_none,
+  },
+  {
+    .afi = BGP_AF_FLOW6,
+    .net = NET_FLOW6,
+    .no_igp = 1,
+    .name = "flow6",
+    .encode_nlri = bgp_encode_nlri_flow6,
+    .decode_nlri = bgp_decode_nlri_flow6,
+    .encode_next_hop = bgp_encode_next_hop_none,
+    .decode_next_hop = bgp_decode_next_hop_none,
+    .update_next_hop = bgp_update_next_hop_none,
+  },
+};
 
-  conn->packets_to_send = s;
-  bgp_create_header(buf, end - buf, type);
-  return sk_send(sk, end - buf);
+const struct bgp_af_desc *
+bgp_get_af_desc(u32 afi)
+{
+  uint i;
+  for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
+    if (bgp_af_table[i].afi == afi)
+      return &bgp_af_table[i];
+
+  return NULL;
 }
 
-/**
- * bgp_schedule_packet - schedule a packet for transmission
- * @conn: connection
- * @type: packet type
- *
- * Schedule a packet of type @type to be sent as soon as possible.
- */
-void
-bgp_schedule_packet(struct bgp_conn *conn, int type)
+static inline uint
+bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 {
-  DBG("BGP: Scheduling packet type %d\n", type);
-  conn->packets_to_send |= 1 << type;
-  if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
-    ev_schedule(conn->tx_ev);
+  return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
 }
 
-void
-bgp_kick_tx(void *vconn)
+static inline uint
+bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
 {
-  struct bgp_conn *conn = vconn;
-
-  DBG("BGP: kicking TX\n");
-  while (bgp_fire_tx(conn) > 0)
-    ;
+  return s->channel->desc->encode_next_hop(s, nh, buf, 255);
 }
 
 void
-bgp_tx(sock *sk)
+bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
 {
-  struct bgp_conn *conn = sk->data;
-
-  DBG("BGP: TX hook\n");
-  while (bgp_fire_tx(conn) > 0)
-    ;
+  s->channel->desc->update_next_hop(s, a, to);
 }
 
-/* Capatibility negotiation as per RFC 2842 */
+#define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
 
-void
-bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
+static byte *
+bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 {
-  // struct bgp_proto *p = conn->bgp;
-  int i, cl;
+  /*
+   *	2 B	Withdrawn Routes Length (zero)
+   *	---	IPv4 Withdrawn Routes NLRI (unused)
+   *	2 B	Total Path Attribute Length
+   *	var	Path Attributes
+   *	var	IPv4 Network Layer Reachability Information
+   */
+
+  int lr, la;
+
+  la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
+  if (la < 0)
+  {
+    /* Attribute list too long */
+    bgp_withdraw_bucket(s->channel, buck);
+    return NULL;
+  }
 
-  while (len > 0)
-    {
-      if (len < 2 || len < 2 + opt[1])
-	goto err;
+  put_u16(buf+0, 0);
+  put_u16(buf+2, la);
 
-      cl = opt[1];
+  lr = bgp_encode_nlri(s, buck, buf+4+la, end);
 
-      switch (opt[0])
-	{
-	case 2:	/* Route refresh capability, RFC 2918 */
-	  if (cl != 0)
-	    goto err;
-	  conn->peer_refresh_support = 1;
-	  break;
+  return buf+4+la+lr;
+}
 
-	case 6: /* Extended message length capability, draft */
-	  if (cl != 0)
-	    goto err;
-	  conn->peer_ext_messages_support = 1;
-	  break;
+static byte *
+bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
+{
+  /*
+   *	2 B	IPv4 Withdrawn Routes Length (zero)
+   *	---	IPv4 Withdrawn Routes NLRI (unused)
+   *	2 B	Total Path Attribute Length
+   *	1 B	MP_REACH_NLRI hdr - Attribute Flags
+   *	1 B	MP_REACH_NLRI hdr - Attribute Type Code
+   *	2 B	MP_REACH_NLRI hdr - Length of Attribute Data
+   *	2 B	MP_REACH_NLRI data - Address Family Identifier
+   *	1 B	MP_REACH_NLRI data - Subsequent Address Family Identifier
+   *	1 B	MP_REACH_NLRI data - Length of Next Hop Network Address
+   *	var	MP_REACH_NLRI data - Network Address of Next Hop
+   *	1 B	MP_REACH_NLRI data - Reserved (zero)
+   *	var	MP_REACH_NLRI data - Network Layer Reachability Information
+   *	var	Rest of Path Attributes
+   *	---	IPv4 Network Layer Reachability Information (unused)
+   */
+
+  int lh, lr, la;	/* Lengths of next hop, NLRI and attributes */
+
+  /* Begin of MP_REACH_NLRI atribute */
+  buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
+  buf[5] = BA_MP_REACH_NLRI;
+  put_u16(buf+6, 0);		/* Will be fixed later */
+  put_af3(buf+8, s->channel->afi);
+  byte *pos = buf+11;
+
+  /* Encode attributes to temporary buffer */
+  byte *abuf = alloca(MAX_ATTRS_LENGTH);
+  la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
+  if (la < 0)
+  {
+    /* Attribute list too long */
+    bgp_withdraw_bucket(s->channel, buck);
+    return NULL;
+  }
 
-	case 64: /* Graceful restart capability, RFC 4724 */
-	  if (cl % 4 != 2)
-	    goto err;
-	  conn->peer_gr_aware = 1;
-	  conn->peer_gr_able = 0;
-	  conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
-	  conn->peer_gr_flags = opt[2] & 0xf0;
-	  conn->peer_gr_aflags = 0;
-	  for (i = 2; i < cl; i += 4)
-	    if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
-	      {
-		conn->peer_gr_able = 1;
-		conn->peer_gr_aflags = opt[2+i+3];
-	      }
-	  break;
+  /* Encode the next hop */
+  lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
+  *pos = lh;
+  pos += 1+lh;
 
-	case 65: /* AS4 capability, RFC 4893 */
-	  if (cl != 4)
-	    goto err;
-	  conn->peer_as4_support = 1;
-	  if (conn->bgp->cf->enable_as4)
-	    conn->advertised_as = get_u32(opt + 2);
-	  break;
+  /* Reserved field */
+  *pos++ = 0;
 
-	case 69: /* ADD-PATH capability, RFC 7911 */
-	  if (cl % 4)
-	    goto err;
-	  for (i = 0; i < cl; i += 4)
-	    if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
-	      conn->peer_add_path = opt[2+i+3];
-	  if (conn->peer_add_path > ADD_PATH_FULL)
-	    goto err;
-	  break;
+  /* Encode the NLRI */
+  lr = bgp_encode_nlri(s, buck, pos, end - la);
+  pos += lr;
 
-	case 70: /* Enhanced route refresh capability, RFC 7313 */
-	  if (cl != 0)
-	    goto err;
-	  conn->peer_enhanced_refresh_support = 1;
-	  break;
+  /* End of MP_REACH_NLRI atribute, update data length */
+  put_u16(buf+6, pos-buf-8);
 
-	  /* We can safely ignore all other capabilities */
-	}
-      len -= 2 + cl;
-      opt += 2 + cl;
-    }
-  return;
+  /* Copy remaining attributes */
+  memcpy(pos, abuf, la);
+  pos += la;
 
- err:
-  bgp_error(conn, 2, 0, NULL, 0);
-  return;
+  /* Initial UPDATE fields */
+  put_u16(buf+0, 0);
+  put_u16(buf+2, pos-buf-4);
+
+  return pos;
 }
 
-static int
-bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
+#undef MAX_ATTRS_LENGTH
+
+static byte *
+bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 {
-  struct bgp_proto *p = conn->bgp;
-  int ol;
+  /*
+   *	2 B	Withdrawn Routes Length
+   *	var	IPv4 Withdrawn Routes NLRI
+   *	2 B	Total Path Attribute Length (zero)
+   *	---	Path Attributes (unused)
+   *	---	IPv4 Network Layer Reachability Information (unused)
+   */
 
-  while (len > 0)
-    {
-      if (len < 2 || len < 2 + opt[1])
-	{ bgp_error(conn, 2, 0, NULL, 0); return 0; }
-#ifdef LOCAL_DEBUG
-      {
-	int i;
-	DBG("\tOption %02x:", opt[0]);
-	for(i=0; i<opt[1]; i++)
-	  DBG(" %02x", opt[2+i]);
-	DBG("\n");
-      }
-#endif
+  uint len = bgp_encode_nlri(s, buck, buf+2, end);
 
-      ol = opt[1];
-      switch (opt[0])
-	{
-	case 2:
-	  if (conn->start_state == BSS_CONNECT_NOCAP)
-	    BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
-	  else
-	    bgp_parse_capabilities(conn, opt + 2, ol);
-	  break;
+  put_u16(buf+0, len);
+  put_u16(buf+2+len, 0);
 
-	default:
-	  /*
-	   *  BGP specs don't tell us to send which option
-	   *  we didn't recognize, but it's common practice
-	   *  to do so. Also, capability negotiation with
-	   *  Cisco routers doesn't work without that.
-	   */
-	  bgp_error(conn, 2, 4, opt, ol);
-	  return 0;
-	}
-      len -= 2 + ol;
-      opt += 2 + ol;
-    }
-  return 0;
+  return buf+4+len;
 }
 
-static void
-bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
+static byte *
+bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 {
-  struct bgp_conn *other;
-  struct bgp_proto *p = conn->bgp;
-  unsigned hold;
-  u16 base_as;
-  u32 id;
+  /*
+   *	2 B	Withdrawn Routes Length (zero)
+   *	---	IPv4 Withdrawn Routes NLRI (unused)
+   *	2 B	Total Path Attribute Length
+   *	1 B	MP_UNREACH_NLRI hdr - Attribute Flags
+   *	1 B	MP_UNREACH_NLRI hdr - Attribute Type Code
+   *	2 B	MP_UNREACH_NLRI hdr - Length of Attribute Data
+   *	2 B	MP_UNREACH_NLRI data - Address Family Identifier
+   *	1 B	MP_UNREACH_NLRI data - Subsequent Address Family Identifier
+   *	var	MP_UNREACH_NLRI data - Network Layer Reachability Information
+   *	---	IPv4 Network Layer Reachability Information (unused)
+   */
+
+  uint len = bgp_encode_nlri(s, buck, buf+11, end);
 
-  /* Check state */
-  if (conn->state != BS_OPENSENT)
-    { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
+  put_u16(buf+0, 0);
+  put_u16(buf+2, 7+len);
 
-  /* Check message contents */
-  if (len < 29 || len != 29U + pkt[28])
-    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
-  if (pkt[19] != BGP_VERSION)
-    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
-  conn->advertised_as = base_as = get_u16(pkt+20);
-  hold = get_u16(pkt+22);
-  id = get_u32(pkt+24);
-  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
+  /* Begin of MP_UNREACH_NLRI atribute */
+  buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
+  buf[5] = BA_MP_UNREACH_NLRI;
+  put_u16(buf+6, 3+len);
+  put_af3(buf+8, s->channel->afi);
 
-  if (bgp_parse_options(conn, pkt+29, pkt[28]))
-    return;
+  return buf+11+len;
+}
 
-  if (hold > 0 && hold < 3)
-    { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+static byte *
+bgp_create_update(struct bgp_channel *c, byte *buf)
+{
+  struct bgp_proto *p = (void *) c->c.proto;
+  struct bgp_bucket *buck;
+  byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
+  byte *res = NULL;
+
+again: ;
+
+  /* Initialize write state */
+  struct bgp_write_state s = {
+    .proto = p,
+    .channel = c,
+    .pool = bgp_linpool,
+    .as4_session = p->as4_session,
+    .add_path = c->add_path_tx,
+    .mpls = c->desc->mpls,
+  };
+
+  /* Try unreachable bucket */
+  if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+  {
+    res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
+      bgp_create_ip_unreach(&s, buck, buf, end):
+      bgp_create_mp_unreach(&s, buck, buf, end);
 
-  /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
-  if (!id || (p->is_internal && id == p->local_id))
-    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
+    goto done;
+  }
 
-  if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
-    log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
+  /* Try reachable buckets */
+  if (!EMPTY_LIST(c->bucket_queue))
+  {
+    buck = HEAD(c->bucket_queue);
 
-  if (conn->advertised_as != p->remote_as)
+    /* Cleanup empty buckets */
+    if (EMPTY_LIST(buck->prefixes))
     {
-      if (conn->peer_as4_support)
-	{
-	  u32 val = htonl(conn->advertised_as);
-	  bgp_error(conn, 2, 2, (byte *) &val, 4);
-	}
-      else
-	bgp_error(conn, 2, 2, pkt+20, 2);
-
-      return;
+      bgp_free_bucket(c, buck);
+      goto again;
     }
 
-  /* Check the other connection */
-  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
-  switch (other->state)
-    {
-    case BS_CONNECT:
-    case BS_ACTIVE:
-      /* Stop outgoing connection attempts */
-      bgp_conn_enter_idle_state(other);
-      break;
+    res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
+      bgp_create_ip_reach(&s, buck, buf, end):
+      bgp_create_mp_reach(&s, buck, buf, end);
 
-    case BS_IDLE:
-    case BS_OPENSENT:
-    case BS_CLOSE:
-      break;
+    if (EMPTY_LIST(buck->prefixes))
+      bgp_free_bucket(c, buck);
+    else
+      bgp_defer_bucket(c, buck);
 
-    case BS_OPENCONFIRM:
-      /*
-       * Description of collision detection rules in RFC 4271 is confusing and
-       * contradictory, but it is essentially:
-       *
-       * 1. Router with higher ID is dominant
-       * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
-       * 3. When both connections are in OpenConfirm state, one initiated by
-       *    the dominant router is kept.
-       *
-       * The first line in the expression below evaluates whether the neighbor
-       * is dominant, the second line whether the new connection was initiated
-       * by the neighbor. If both are true (or both are false), we keep the new
-       * connection, otherwise we keep the old one.
-       */
-      if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
-	  == (conn == &p->incoming_conn))
-        {
-	  /* Should close the other connection */
-	  BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
-	  bgp_error(other, 6, 7, NULL, 0);
-	  break;
-	}
-      /* Fall thru */
-    case BS_ESTABLISHED:
-      /* Should close this connection */
-      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
-      bgp_error(conn, 6, 7, NULL, 0);
-      return;
-    default:
-      bug("bgp_rx_open: Unknown state");
-    }
+    if (!res)
+      goto again;
 
-  /* Update our local variables */
-  conn->hold_time = MIN(hold, p->cf->hold_time);
-  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
-  p->remote_id = id;
-  p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
-  p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
-  p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
-  p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
-  p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
-
-  /* Update RA mode */
-  if (p->add_path_tx)
-    p->p.accept_ra_types = RA_ANY;
-  else if (p->cf->secondary)
-    p->p.accept_ra_types = RA_ACCEPTED;
-  else
-    p->p.accept_ra_types = RA_OPTIMAL;
+    goto done;
+  }
 
-  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
+  /* No more prefixes to send */
+  return NULL;
 
-  bgp_schedule_packet(conn, PKT_KEEPALIVE);
-  bgp_start_timer(conn->hold_timer, conn->hold_time);
-  bgp_conn_enter_openconfirm_state(conn);
+done:
+  BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
+  lp_flush(s.pool);
+
+  return res;
 }
 
+static byte *
+bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
+{
+  /* Empty update packet */
+  put_u32(buf, 0);
 
-static inline void
-bgp_rx_end_mark(struct bgp_proto *p)
+  return buf+4;
+}
+
+static byte *
+bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
 {
-  BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
+  put_u16(buf+0, 0);
+  put_u16(buf+2, 6);		/* length 4--9 */
 
-  if (p->load_state == BFS_LOADING)
-    p->load_state = BFS_NONE;
+  /* Empty MP_UNREACH_NLRI atribute */
+  buf[4] = BAF_OPTIONAL;
+  buf[5] = BA_MP_UNREACH_NLRI;
+  buf[6] = 3;			/* Length 7--9 */
+  put_af3(buf+7, c->afi);
 
-  if (p->p.gr_recovery)
-    proto_graceful_restart_unlock(&p->p);
-
-  if (p->gr_active)
-    bgp_graceful_restart_done(p);
-}
-
-
-#define DECODE_PREFIX(pp, ll) do {		\
-  if (p->add_path_rx)				\
-  {						\
-    if (ll < 5) { err=1; goto done; }		\
-    path_id = get_u32(pp);			\
-    pp += 4;					\
-    ll -= 4;					\
-  }						\
-  int b = *pp++;				\
-  int q;					\
-  ll--;						\
-  if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
-  q = (b+7) / 8;				\
-  if (ll < q) { err=1; goto done; }		\
-  memcpy(&prefix, pp, q);			\
-  pp += q;					\
-  ll -= q;					\
-  ipa_ntoh(prefix);				\
-  prefix = ipa_and(prefix, ipa_mkmask(b));	\
-  pxlen = b;					\
-} while (0)
+  return buf+10;
+}
 
+static byte *
+bgp_create_end_mark(struct bgp_channel *c, byte *buf)
+{
+  struct bgp_proto *p = (void *) c->c.proto;
+
+  BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+
+  return (c->afi == BGP_AF_IPV4) ?
+    bgp_create_ip_end_mark(c, buf):
+    bgp_create_mp_end_mark(c, buf);
+}
 
 static inline void
-bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
-	       u32 path_id, u32 *last_id, struct rte_src **src,
-	       rta *a0, rta **a)
+bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
 {
-  if (path_id != *last_id)
-    {
-      *src = rt_get_source(&p->p, path_id);
-      *last_id = path_id;
+  struct bgp_proto *p = s->proto;
+  struct bgp_channel *c = bgp_get_channel(p, afi);
 
-      if (*a)
-	{
-	  rta_free(*a);
-	  *a = NULL;
-	}
-    }
+  BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
 
-  /* Prepare cached route attributes */
-  if (!*a)
-    {
-      a0->src = *src;
+  if (!c)
+    DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
 
-      /* Workaround for rta_lookup() breaking eattrs */
-      ea_list *ea = a0->eattrs;
-      *a = rta_lookup(a0);
-      a0->eattrs = ea;
-    }
+  if (c->load_state == BFS_LOADING)
+    c->load_state = BFS_NONE;
 
-  net *n = net_get(p->p.table, prefix, pxlen);
-  rte *e = rte_get_temp(rta_clone(*a));
-  e->net = n;
-  e->pflags = 0;
-  e->u.bgp.suppressed = 0;
-  rte_update2(p->p.main_ahook, n, e, *src);
+  if (p->p.gr_recovery)
+    channel_graceful_restart_unlock(&c->c);
+
+  if (c->gr_active)
+    bgp_graceful_restart_done(c);
 }
 
 static inline void
-bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
-		 u32 path_id, u32 *last_id, struct rte_src **src)
+bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
 {
-  if (path_id != *last_id)
-    {
-      *src = rt_find_source(&p->p, path_id);
-      *last_id = path_id;
-    }
+  struct bgp_channel *c = bgp_get_channel(s->proto, afi);
+  rta *a = NULL;
 
-  net *n = net_find(p->p.table, prefix, pxlen);
-  rte_update2( p->p.main_ahook, n, NULL, *src);
-}
+  if (!c)
+    DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
 
-static inline int
-bgp_set_next_hop(struct bgp_proto *p, rta *a)
-{
-  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
-  ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
+  s->channel = c;
+  s->add_path = c->add_path_rx;
+  s->mpls = c->desc->mpls;
 
-#ifdef IPV6
-  int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
+  s->last_id = 0;
+  s->last_src = s->proto->p.main_source;
 
-  /* First address should not be link-local, but may be zero in direct mode */
-  if (ipa_is_link_local(*nexthop))
-    *nexthop = IPA_NONE;
-#else
-  int second = 0;
-#endif
-
-  if (p->cf->gw_mode == GW_DIRECT)
-    {
-      neighbor *ng = NULL;
+  /*
+   * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
+   * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
+   * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
+   * decode_next_hop hooks) by restoring a->eattrs afterwards.
+   */
 
-      if (ipa_nonzero(*nexthop))
-	ng = neigh_find(&p->p, nexthop, 0);
-      else if (second)	/* GW_DIRECT -> single_hop -> p->neigh != NULL */
-	ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
+  if (ea)
+  {
+    a = allocz(RTA_MAX_SIZE);
 
-      /* Fallback */
-      if (!ng)
-	ng = p->neigh;
+    a->source = RTS_BGP;
+    a->scope = SCOPE_UNIVERSE;
+    a->from = s->proto->cf->remote_ip;
+    a->eattrs = ea;
 
-      if (ng->scope == SCOPE_HOST)
-	return 0;
+    c->desc->decode_next_hop(s, nh, nh_len, a);
 
-      a->dest = RTD_ROUTER;
-      a->gw = ng->addr;
-      a->iface = ng->iface;
-      a->hostentry = NULL;
-      a->igp_metric = 0;
-    }
-  else /* GW_RECURSIVE */
-    {
-      if (ipa_zero(*nexthop))
-	  return 0;
+    /* Handle withdraw during next hop decoding */
+    if (s->err_withdraw)
+      a = NULL;
+  }
 
-      rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
-    }
+  c->desc->decode_nlri(s, nlri, len, a);
 
-  return 1;
+  rta_free(s->cached_rta);
+  s->cached_rta = NULL;
 }
 
-#ifndef IPV6		/* IPv4 version */
-
 static void
-bgp_do_rx_update(struct bgp_conn *conn,
-		 byte *withdrawn, int withdrawn_len,
-		 byte *nlri, int nlri_len,
-		 byte *attrs, int attr_len)
+bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
 {
   struct bgp_proto *p = conn->bgp;
-  struct rte_src *src = p->p.main_source;
-  rta *a0, *a = NULL;
-  ip_addr prefix;
-  int pxlen, err = 0;
-  u32 path_id = 0;
-  u32 last_id = 0;
+  ea_list *ea = NULL;
 
-  /* Check for End-of-RIB marker */
-  if (!withdrawn_len && !attr_len && !nlri_len)
-    {
-      bgp_rx_end_mark(p);
-      return;
-    }
+  BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
 
-  /* Withdraw routes */
-  while (withdrawn_len)
-    {
-      DECODE_PREFIX(withdrawn, withdrawn_len);
-      DBG("Withdraw %I/%d\n", prefix, pxlen);
+  /* Workaround for some BGP implementations that skip initial KEEPALIVE */
+  if (conn->state == BS_OPENCONFIRM)
+    bgp_conn_enter_established_state(conn);
 
-      bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
-    }
+  if (conn->state != BS_ESTABLISHED)
+  { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
 
-  if (!attr_len && !nlri_len)		/* shortcut */
-    return;
+  bgp_start_timer(conn->hold_timer, conn->hold_time);
 
-  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
+  /* Initialize parse state */
+  struct bgp_parse_state s = {
+    .proto = p,
+    .pool = bgp_linpool,
+    .as4_session = p->as4_session,
+  };
 
-  if (conn->state != BS_ESTABLISHED)	/* fatal error during decoding */
-    return;
+  /* Parse error handler */
+  if (setjmp(s.err_jmpbuf))
+  {
+    bgp_error(conn, 3, s.err_subcode, NULL, 0);
+    goto done;
+  }
 
-  if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
-    a0 = NULL;
+  /* Check minimal length */
+  if (len < 23)
+  { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 
-  last_id = 0;
-  src = p->p.main_source;
+  /* Skip fixed header */
+  uint pos = 19;
 
-  while (nlri_len)
-    {
-      DECODE_PREFIX(nlri, nlri_len);
-      DBG("Add %I/%d\n", prefix, pxlen);
+  /*
+   *	UPDATE message format
+   *
+   *	2 B	IPv4 Withdrawn Routes Length
+   *	var	IPv4 Withdrawn Routes NLRI
+   *	2 B	Total Path Attribute Length
+   *	var	Path Attributes
+   *	var	IPv4 Reachable Routes NLRI
+   */
 
-      if (a0)
-	bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
-      else /* Forced withdraw as a result of soft error */
-	bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
-    }
+  s.ip_unreach_len = get_u16(pkt + pos);
+  s.ip_unreach_nlri = pkt + pos + 2;
+  pos += 2 + s.ip_unreach_len;
 
- done:
-  if (a)
-    rta_free(a);
+  if (pos + 2 > len)
+    bgp_parse_error(&s, 1);
 
-  if (err)
-    bgp_error(conn, 3, err, NULL, 0);
+  s.attr_len = get_u16(pkt + pos);
+  s.attrs = pkt + pos + 2;
+  pos += 2 + s.attr_len;
+
+  if (pos > len)
+    bgp_parse_error(&s, 1);
 
+  s.ip_reach_len = len - pos;
+  s.ip_reach_nlri = pkt + pos;
+
+
+  if (s.attr_len)
+    ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
+
+  /* Check for End-of-RIB marker */
+  if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
+  { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
+
+  /* Check for MP End-of-RIB marker */
+  if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
+      !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
+  { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
+
+  if (s.ip_unreach_len)
+    bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
+
+  if (s.mp_unreach_len)
+    bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
+
+  if (s.ip_reach_len)
+    bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
+		    ea, s.ip_next_hop_data, s.ip_next_hop_len);
+
+  if (s.mp_reach_len)
+    bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
+		    ea, s.mp_next_hop_data, s.mp_next_hop_len);
+
+done:
+  rta_free(s.cached_rta);
+  lp_flush(s.pool);
   return;
 }
 
-#else			/* IPv6 version */
 
-#define DO_NLRI(name)					\
-  x = p->name##_start;				\
-  len = len0 = p->name##_len;				\
-  if (len)						\
-    {							\
-      if (len < 3) { err=9; goto done; }		\
-      af = get_u16(x);					\
-      x += 3;						\
-      len -= 3;						\
-      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
-    }							\
-  else							\
-    af = 0;						\
-  if (af == BGP_AF_IPV6)
+/*
+ *	ROUTE-REFRESH
+ */
 
-static void
-bgp_attach_next_hop(rta *a0, byte *x)
+static inline byte *
+bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
 {
-  ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
-  memcpy(nh, x+1, 16);
-  ipa_ntoh(nh[0]);
+  struct bgp_proto *p = (void *) c->c.proto;
 
-  /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
-  if (*x == 32)
-    {
-      memcpy(nh+1, x+17, 16);
-      ipa_ntoh(nh[1]);
-    }
-  else
-    nh[1] = IPA_NONE;
+  BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
+
+  /* Original route refresh request, RFC 2918 */
+  put_af4(buf, c->afi);
+  buf[2] = BGP_RR_REQUEST;
+
+  return buf+4;
+}
+
+static inline byte *
+bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
+{
+  struct bgp_proto *p = (void *) c->c.proto;
+
+  BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
+
+  /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
+  put_af4(buf, c->afi);
+  buf[2] = BGP_RR_BEGIN;
+
+  return buf+4;
 }
 
+static inline byte *
+bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
+{
+  struct bgp_proto *p = (void *) c->c.proto;
+
+  BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
+
+  /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
+  put_af4(buf, c->afi);
+  buf[2] = BGP_RR_END;
+
+  return buf+4;
+}
 
 static void
-bgp_do_rx_update(struct bgp_conn *conn,
-		 byte *withdrawn UNUSED, int withdrawn_len,
-		 byte *nlri UNUSED, int nlri_len,
-		 byte *attrs, int attr_len)
+bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
 {
   struct bgp_proto *p = conn->bgp;
-  struct rte_src *src = p->p.main_source;
-  byte *x;
-  int len, len0;
-  unsigned af;
-  rta *a0, *a = NULL;
-  ip_addr prefix;
-  int pxlen, err = 0;
-  u32 path_id = 0;
-  u32 last_id = 0;
-
-  p->mp_reach_len = 0;
-  p->mp_unreach_len = 0;
-  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
-
-  if (conn->state != BS_ESTABLISHED)	/* fatal error during decoding */
-    return;
 
-  /* Check for End-of-RIB marker */
-  if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
-      (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
-    {
-      bgp_rx_end_mark(p);
-      return;
-    }
+  if (conn->state != BS_ESTABLISHED)
+  { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
 
-  DO_NLRI(mp_unreach)
-    {
-      while (len)
-	{
-	  DECODE_PREFIX(x, len);
-	  DBG("Withdraw %I/%d\n", prefix, pxlen);
-	  bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
-	}
-    }
+  if (!conn->local_caps->route_refresh)
+  { bgp_error(conn, 1, 3, pkt+18, 1); return; }
 
-  DO_NLRI(mp_reach)
-    {
-      /* Create fake NEXT_HOP attribute */
-      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
-	{ err = 9; goto done; }
+  if (len < (BGP_HEADER_LENGTH + 4))
+  { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 
-      if (a0)
-	bgp_attach_next_hop(a0, x);
+  if (len > (BGP_HEADER_LENGTH + 4))
+  { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
 
-      /* Also ignore one reserved byte */
-      len -= *x + 2;
-      x += *x + 2;
+  struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
+  if (!c)
+  {
+    log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
+	p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
+    return;
+  }
 
-      if (a0 && ! bgp_set_next_hop(p, a0))
-	a0 = NULL;
+  /* RFC 7313 redefined reserved field as RR message subtype */
+  uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
 
-      last_id = 0;
-      src = p->p.main_source;
+  switch (subtype)
+  {
+  case BGP_RR_REQUEST:
+    BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
+    channel_request_feeding(&c->c);
+    break;
 
-      while (len)
-	{
-	  DECODE_PREFIX(x, len);
-	  DBG("Add %I/%d\n", prefix, pxlen);
+  case BGP_RR_BEGIN:
+    BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
+    bgp_refresh_begin(c);
+    break;
 
-	  if (a0)
-	    bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
-	  else /* Forced withdraw as a result of soft error */
-	    bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
-	}
-    }
+  case BGP_RR_END:
+    BGP_TRACE(D_PACKETS, "Got END-OF-RR");
+    bgp_refresh_end(c);
+    break;
 
- done:
-  if (a)
-    rta_free(a);
+  default:
+    log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
+	p->p.name, subtype);
+    break;
+  }
+}
+
+static inline struct bgp_channel *
+bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
+{
+  uint i = conn->last_channel;
 
-  if (err) /* Use subcode 9, not err */
-    bgp_error(conn, 3, 9, NULL, 0);
+  /* Try the last channel, but at most several times */
+  if ((conn->channels_to_send & (1 << i)) &&
+      (conn->last_channel_count < 16))
+    goto found;
 
-  return;
+  /* Find channel with non-zero channels_to_send */
+  do
+  {
+    i++;
+    if (i >= p->channel_count)
+      i = 0;
+  }
+  while (! (conn->channels_to_send & (1 << i)));
+
+  /* Use that channel */
+  conn->last_channel = i;
+  conn->last_channel_count = 0;
+
+found:
+  conn->last_channel_count++;
+  return p->channel_map[i];
 }
 
-#endif
+static inline int
+bgp_send(struct bgp_conn *conn, uint type, uint len)
+{
+  sock *sk = conn->sk;
+  byte *buf = sk->tbuf;
 
-static void
-bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
+  memset(buf, 0xff, 16);		/* Marker */
+  put_u16(buf+16, len);
+  buf[18] = type;
+
+  return sk_send(sk, len);
+}
+
+/**
+ * bgp_fire_tx - transmit packets
+ * @conn: connection
+ *
+ * Whenever the transmit buffers of the underlying TCP connection
+ * are free and we have any packets queued for sending, the socket functions
+ * call bgp_fire_tx() which takes care of selecting the highest priority packet
+ * queued (Notification > Keepalive > Open > Update), assembling its header
+ * and body and sending it to the connection.
+ */
+static int
+bgp_fire_tx(struct bgp_conn *conn)
 {
   struct bgp_proto *p = conn->bgp;
-  byte *withdrawn, *attrs, *nlri;
-  uint withdrawn_len, attr_len, nlri_len;
+  struct bgp_channel *c;
+  byte *buf, *pkt, *end;
+  uint s;
 
-  BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
+  if (!conn->sk)
+    return 0;
 
-  /* Workaround for some BGP implementations that skip initial KEEPALIVE */
-  if (conn->state == BS_OPENCONFIRM)
-    bgp_conn_enter_established_state(conn);
+  buf = conn->sk->tbuf;
+  pkt = buf + BGP_HEADER_LENGTH;
+  s = conn->packets_to_send;
 
-  if (conn->state != BS_ESTABLISHED)
-    { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
-  bgp_start_timer(conn->hold_timer, conn->hold_time);
+  if (s & (1 << PKT_SCHEDULE_CLOSE))
+  {
+    /* We can finally close connection and enter idle state */
+    bgp_conn_enter_idle_state(conn);
+    return 0;
+  }
+  if (s & (1 << PKT_NOTIFICATION))
+  {
+    conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
+    end = bgp_create_notification(conn, pkt);
+    return bgp_send(conn, PKT_NOTIFICATION, end - buf);
+  }
+  else if (s & (1 << PKT_KEEPALIVE))
+  {
+    conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
+    BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
+    bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+    return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
+  }
+  else if (s & (1 << PKT_OPEN))
+  {
+    conn->packets_to_send &= ~(1 << PKT_OPEN);
+    end = bgp_create_open(conn, pkt);
+    return bgp_send(conn, PKT_OPEN, end - buf);
+  }
+  else while (conn->channels_to_send)
+  {
+    c = bgp_get_channel_to_send(p, conn);
+    s = c->packets_to_send;
 
-  /* Find parts of the packet and check sizes */
-  if (len < 23)
+    if (s & (1 << PKT_ROUTE_REFRESH))
     {
-      bgp_error(conn, 1, 2, pkt+16, 2);
-      return;
+      c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
+      end = bgp_create_route_refresh(c, pkt);
+      return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
     }
-  withdrawn = pkt + 21;
-  withdrawn_len = get_u16(pkt + 19);
-  if (withdrawn_len + 23 > len)
-    goto malformed;
-  attrs = withdrawn + withdrawn_len + 2;
-  attr_len = get_u16(attrs - 2);
-  if (withdrawn_len + attr_len + 23 > len)
-    goto malformed;
-  nlri = attrs + attr_len;
-  nlri_len = len - withdrawn_len - attr_len - 23;
-  if (!attr_len && nlri_len)
-    goto malformed;
-  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
-
-  lp_flush(bgp_linpool);
-
-  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
-  return;
+    else if (s & (1 << PKT_BEGIN_REFRESH))
+    {
+      /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
+      c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
+      end = bgp_create_begin_refresh(c, pkt);
+      return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
+    }
+    else if (s & (1 << PKT_UPDATE))
+    {
+      end = bgp_create_update(c, pkt);
+      if (end)
+	return bgp_send(conn, PKT_UPDATE, end - buf);
+
+      /* No update to send, perhaps we need to send End-of-RIB or EoRR */
+      c->packets_to_send = 0;
+      conn->channels_to_send &= ~(1 << c->index);
+
+      if (c->feed_state == BFS_LOADED)
+      {
+	c->feed_state = BFS_NONE;
+	end = bgp_create_end_mark(c, pkt);
+	return bgp_send(conn, PKT_UPDATE, end - buf);
+      }
+
+      else if (c->feed_state == BFS_REFRESHED)
+      {
+	c->feed_state = BFS_NONE;
+	end = bgp_create_end_refresh(c, pkt);
+	return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
+      }
+    }
+    else if (s)
+      bug("Channel packets_to_send: %x", s);
+
+    c->packets_to_send = 0;
+    conn->channels_to_send &= ~(1 << c->index);
+  }
+
+  return 0;
+}
+
+/**
+ * bgp_schedule_packet - schedule a packet for transmission
+ * @conn: connection
+ * @c: channel
+ * @type: packet type
+ *
+ * Schedule a packet of type @type to be sent as soon as possible.
+ */
+void
+bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
+{
+  ASSERT(conn->sk);
+
+  DBG("BGP: Scheduling packet type %d\n", type);
+
+  if (c)
+  {
+    if (! conn->channels_to_send)
+    {
+      conn->last_channel = c->index;
+      conn->last_channel_count = 0;
+    }
+
+    c->packets_to_send |= 1 << type;
+    conn->channels_to_send |= 1 << c->index;
+  }
+  else
+    conn->packets_to_send |= 1 << type;
+
+  if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
+    ev_schedule(conn->tx_ev);
+}
+
+void
+bgp_kick_tx(void *vconn)
+{
+  struct bgp_conn *conn = vconn;
 
-malformed:
-  bgp_error(conn, 3, 1, NULL, 0);
+  DBG("BGP: kicking TX\n");
+  while (bgp_fire_tx(conn) > 0)
+    ;
+}
+
+void
+bgp_tx(sock *sk)
+{
+  struct bgp_conn *conn = sk->data;
+
+  DBG("BGP: TX hook\n");
+  while (bgp_fire_tx(conn) > 0)
+    ;
 }
 
+
 static struct {
   byte major, minor;
   byte *msg;
@@ -1480,26 +2665,25 @@ static struct {
  * which might be static string or given temporary buffer.
  */
 const char *
-bgp_error_dsc(unsigned code, unsigned subcode)
+bgp_error_dsc(uint code, uint subcode)
 {
   static char buff[32];
-  unsigned i;
+  uint i;
+
   for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
     if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
-      {
-	return bgp_msg_table[i].msg;
-      }
+      return bgp_msg_table[i].msg;
 
-  bsprintf(buff, "Unknown error %d.%d", code, subcode);
+  bsprintf(buff, "Unknown error %u.%u", code, subcode);
   return buff;
 }
 
 void
-bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
+bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
 {
   const byte *name;
   byte *t, argbuf[36];
-  unsigned i;
+  uint i;
 
   /* Don't report Cease messages generated by myself */
   if (code == 6 && class == BE_BGP_TX)
@@ -1515,7 +2699,7 @@ bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned
       if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
 	{
 	  /* Bad peer AS - we would like to print the AS */
-	  t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
+	  t += bsprintf(t, "%u", (len == 2) ? get_u16(data) : get_u32(data));
 	  goto done;
 	}
       if (len > 16)
@@ -1532,47 +2716,25 @@ static void
 bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
 {
   struct bgp_proto *p = conn->bgp;
+
   if (len < 21)
-    {
-      bgp_error(conn, 1, 2, pkt+16, 2);
-      return;
-    }
+  { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 
-  unsigned code = pkt[19];
-  unsigned subcode = pkt[20];
+  uint code = pkt[19];
+  uint subcode = pkt[20];
   int err = (code != 6);
 
   bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
   bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
 
-#ifndef IPV6
-  if ((code == 2) && ((subcode == 4) || (subcode == 7))
-      /* Error related to capability:
-       * 4 - Peer does not support capabilities at all.
-       * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
-       */
-      && (p->cf->capabilities == 2)
-      /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
-      && (conn->start_state == BSS_CONNECT)
-      /* Failed connection attempt have used capabilities */
-      && (p->cf->remote_as <= 0xFFFF))
-      /* Not possible with disabled capabilities */
-    {
-      /* We try connect without capabilities */
-      log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
-      p->start_state = BSS_CONNECT_NOCAP;
-      err = 0;
-    }
-#endif
-
   bgp_conn_enter_close_state(conn);
-  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
+  bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
 
-  if (err) 
-    {
-      bgp_update_startup_delay(p);
-      bgp_stop(p, 0);
-    }
+  if (err)
+  {
+    bgp_update_startup_delay(p);
+    bgp_stop(p, 0);
+  }
 }
 
 static void
@@ -1582,64 +2744,12 @@ bgp_rx_keepalive(struct bgp_conn *conn)
 
   BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
   bgp_start_timer(conn->hold_timer, conn->hold_time);
-  switch (conn->state)
-    {
-    case BS_OPENCONFIRM:
-      bgp_conn_enter_established_state(conn);
-      break;
-    case BS_ESTABLISHED:
-      break;
-    default:
-      bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
-    }
-}
 
-static void
-bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
-{
-  struct bgp_proto *p = conn->bgp;
+  if (conn->state == BS_OPENCONFIRM)
+  { bgp_conn_enter_established_state(conn); return; }
 
   if (conn->state != BS_ESTABLISHED)
-    { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
-
-  if (!p->cf->enable_refresh)
-    { bgp_error(conn, 1, 3, pkt+18, 1); return; }
-
-  if (len < (BGP_HEADER_LENGTH + 4))
-    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
-
-  if (len > (BGP_HEADER_LENGTH + 4))
-    { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
-
-  /* FIXME - we ignore AFI/SAFI values, as we support
-     just one value and even an error code for an invalid
-     request is not defined */
-
-  /* RFC 7313 redefined reserved field as RR message subtype */
-  uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
-
-  switch (subtype)
-  {
-  case BGP_RR_REQUEST:
-    BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
-    proto_request_feeding(&p->p);
-    break;
-
-  case BGP_RR_BEGIN:
-    BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
-    bgp_refresh_begin(p);
-    break;
-
-  case BGP_RR_END:
-    BGP_TRACE(D_PACKETS, "Got END-OF-RR");
-    bgp_refresh_end(p);
-    break;
-
-  default:
-    log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
-	p->p.name, subtype);
-    break;
-  }
+    bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
 }
 
 
@@ -1653,7 +2763,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
  * packet handler according to the packet type.
  */
 static void
-bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
+bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
 {
   byte type = pkt[18];
 
@@ -1663,14 +2773,14 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
     mrt_dump_bgp_packet(conn, pkt, len);
 
   switch (type)
-    {
-    case PKT_OPEN:		return bgp_rx_open(conn, pkt, len);
-    case PKT_UPDATE:		return bgp_rx_update(conn, pkt, len);
-    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
-    case PKT_KEEPALIVE:		return bgp_rx_keepalive(conn);
-    case PKT_ROUTE_REFRESH:	return bgp_rx_route_refresh(conn, pkt, len);
-    default:			bgp_error(conn, 1, 3, pkt+18, 1);
-    }
+  {
+  case PKT_OPEN:		return bgp_rx_open(conn, pkt, len);
+  case PKT_UPDATE:		return bgp_rx_update(conn, pkt, len);
+  case PKT_NOTIFICATION:	return bgp_rx_notification(conn, pkt, len);
+  case PKT_KEEPALIVE:		return bgp_rx_keepalive(conn);
+  case PKT_ROUTE_REFRESH:	return bgp_rx_route_refresh(conn, pkt, len);
+  default:			bgp_error(conn, 1, 3, pkt+18, 1);
+  }
 }
 
 /**
@@ -1687,10 +2797,9 @@ int
 bgp_rx(sock *sk, uint size)
 {
   struct bgp_conn *conn = sk->data;
-  struct bgp_proto *p = conn->bgp;
   byte *pkt_start = sk->rbuf;
   byte *end = pkt_start + size;
-  unsigned i, len;
+  uint i, len;
 
   DBG("BGP: RX hook: Got %d bytes\n", size);
   while (end >= pkt_start + BGP_HEADER_LENGTH)
@@ -1704,7 +2813,7 @@ bgp_rx(sock *sk, uint size)
 	    break;
 	  }
       len = get_u16(pkt_start+16);
-      if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
+      if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
 	{
 	  bgp_error(conn, 1, 2, pkt_start+16, 2);
 	  break;
diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile
index f90222cf..39e74f71 100644
--- a/proto/ospf/Makefile
+++ b/proto/ospf/Makefile
@@ -1,5 +1,6 @@
-source=ospf.c topology.c packet.c hello.c neighbor.c iface.c dbdes.c lsreq.c lsupd.c lsack.c lsalib.c rt.c
-root-rel=../../
-dir-name=proto/ospf
+src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf.c packet.c rt.c topology.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index 7b35b191..98ddf5d0 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -78,14 +78,66 @@ static void
 ospf_proto_finish(void)
 {
   struct ospf_config *cf = OSPF_CFG;
+  struct ospf_area_config *ac;
+  struct ospf_iface_patt *ic;
+
+  /* Define default channel */
+  if (EMPTY_LIST(this_proto->channels))
+  {
+    this_proto->net_type = ospf_cfg_is_v2() ? NET_IP4 : NET_IP6;
+    channel_config_new(NULL, this_proto->net_type, this_proto);
+  }
+
+  /* Propagate global instance ID to interfaces */
+  if (cf->instance_id_set)
+  {
+    WALK_LIST(ac, cf->area_list)
+      WALK_LIST(ic, ac->patt_list)
+	if (!ic->instance_id_set)
+	{ ic->instance_id = cf->instance_id; ic->instance_id_set = 1; }
+
+    WALK_LIST(ic, cf->vlink_list)
+      if (!ic->instance_id_set)
+      { ic->instance_id = cf->instance_id; ic->instance_id_set = 1; }
+  }
+
+  if (ospf_cfg_is_v3())
+  {
+    uint ipv4 = (this_proto->net_type == NET_IP4);
+    uint base = (ipv4 ? 64 : 0) + (cf->af_mc ? 32 : 0);
+
+    /* RFC 5838 - OSPFv3-AF */
+    if (cf->af_ext)
+    {
+      /* RFC 5838 2.1 - instance IDs based on AFs */
+      WALK_LIST(ac, cf->area_list)
+	WALK_LIST(ic, ac->patt_list)
+	{
+	  if (!ic->instance_id_set)
+	    ic->instance_id = base;
+	  else if (ic->instance_id >= 128)
+	    log(L_WARN "Instance ID %d from unassigned/private range", ic->instance_id);
+	  else if ((ic->instance_id < base) || (ic->instance_id >= (base + 32)))
+	    cf_error("Instance ID %d invalid for given channel type", ic->instance_id);
+	}
+
+      /* RFC 5838 2.8 - vlinks limited to IPv6 unicast */
+      if ((ipv4 || cf->af_mc) && !EMPTY_LIST(cf->vlink_list))
+	cf_error("Vlinks not supported in AFs other than IPv6 unicast");
+    }
+    else
+    {
+      if (ipv4 || cf->af_mc)
+	cf_error("Different channel type");
+    }
+  }
 
   if (EMPTY_LIST(cf->area_list))
-    cf_error( "No configured areas in OSPF");
+    cf_error("No configured areas in OSPF");
 
   int areano = 0;
   int backbone = 0;
   int nssa = 0;
-  struct ospf_area_config *ac;
   WALK_LIST(ac, cf->area_list)
   {
     areano++;
@@ -98,7 +150,7 @@ ospf_proto_finish(void)
   cf->abr = areano > 1;
 
   /* Route export or NSSA translation (RFC 3101 3.1) */
-  cf->asbr = (this_proto->out_filter != FILTER_REJECT) || (nssa && cf->abr);
+  cf->asbr = (proto_cf_main_channel(this_proto)->out_filter != FILTER_REJECT) || (nssa && cf->abr);
 
   if (cf->abr && !backbone)
   {
@@ -122,7 +174,7 @@ static inline void
 ospf_check_defcost(int cost)
 {
   if ((cost <= 0) || (cost >= LSINFINITY))
-   cf_error("Default cost must be in range 1-%d", LSINFINITY-1);
+   cf_error("Default cost must be in range 1-%u", LSINFINITY-1);
 }
 
 static inline void
@@ -135,8 +187,8 @@ ospf_check_auth(void)
 
 CF_DECLS
 
-CF_KEYWORDS(OSPF, AREA, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
-CF_KEYWORDS(NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
+CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
+CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
 CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT)
 CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP)
 CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC, TTL, SECURITY)
@@ -144,39 +196,67 @@ CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK, ONLY, BFD)
 CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
 CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
 CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH)
-CF_KEYWORDS(SECONDARY, MERGE, LSA, SUPPRESSION)
+CF_KEYWORDS(SECONDARY, MERGE, LSA, SUPPRESSION, MULTICAST, RFC5838)
 
-%type <t> opttext
 %type <ld> lsadb_args
-%type <i> nbma_eligible
+%type <i> ospf_variant ospf_af_mc nbma_eligible
+%type <cc> ospf_channel_start ospf_channel
 
 CF_GRAMMAR
 
 CF_ADDTO(proto, ospf_proto '}' { ospf_proto_finish(); } )
 
-ospf_proto_start: proto_start OSPF {
-     this_proto = proto_config_new(&proto_ospf, $1);
-     init_list(&OSPF_CFG->area_list);
-     init_list(&OSPF_CFG->vlink_list);
-     OSPF_CFG->tick = OSPF_DEFAULT_TICK;
-     OSPF_CFG->ospf2 = OSPF_IS_V2;
-  }
+ospf_variant:
+   OSPF    { $$ = 1; }
+ | OSPF V2 { $$ = 1; }
+ | OSPF V3 { $$ = 0; }
  ;
 
+ospf_proto_start: proto_start ospf_variant
+{
+  this_proto = proto_config_new(&proto_ospf, $1);
+  this_proto->net_type = $2 ? NET_IP4 : 0;
+
+  init_list(&OSPF_CFG->area_list);
+  init_list(&OSPF_CFG->vlink_list);
+  OSPF_CFG->tick = OSPF_DEFAULT_TICK;
+  OSPF_CFG->ospf2 = $2;
+  OSPF_CFG->af_ext = !$2;
+};
+
 ospf_proto:
    ospf_proto_start proto_name '{'
  | ospf_proto ospf_proto_item ';'
  ;
 
+ospf_af_mc:
+             { $$ = 0; }
+ | MULTICAST { $$ = 1; }
+ ;
+
+/* We redefine proto_channel to add multicast flag */
+ospf_channel_start: net_type ospf_af_mc
+{
+  $$ = this_channel = channel_config_new(NULL, $1, this_proto);
+
+  /* Save the multicast flag */
+  if (this_channel == proto_cf_main_channel(this_proto))
+    OSPF_CFG->af_mc = $2;
+};
+
+ospf_channel: ospf_channel_start channel_opt_list channel_end;
+
 ospf_proto_item:
    proto_item
+ | ospf_channel { this_proto->net_type = $1->net_type; }
  | RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; }
+ | RFC5838 bool { OSPF_CFG->af_ext = $2; if (!ospf_cfg_is_v3()) cf_error("RFC5838 option requires OSPFv3"); }
  | STUB ROUTER bool { OSPF_CFG->stub_router = $3; }
  | ECMP bool { OSPF_CFG->ecmp = $2 ? OSPF_DEFAULT_ECMP_LIMIT : 0; }
- | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); }
+ | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; }
  | MERGE EXTERNAL bool { OSPF_CFG->merge_external = $3; }
- | TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); }
- | INSTANCE ID expr { OSPF_CFG->instance_id = $3; if (($3<0) || ($3>255)) cf_error("Instance ID must be in range 0-255"); }
+ | TICK expr { OSPF_CFG->tick = $2; if($2 <= 0) cf_error("Tick must be greater than zero"); }
+ | INSTANCE ID expr { OSPF_CFG->instance_id = $3; OSPF_CFG->instance_id_set = 1; if ($3 > 255) cf_error("Instance ID must be in range 0-255"); }
  | ospf_area
  ;
 
@@ -226,10 +306,10 @@ ospf_stubnet:
  ;
 
 ospf_stubnet_start:
-   prefix {
+   net_ip {
      this_stubnet = cfg_allocz(sizeof(struct ospf_stubnet_config));
      add_tail(&this_area->stubnet_list, NODE this_stubnet);
-     this_stubnet->px = $1;
+     this_stubnet->prefix = $1;
      this_stubnet->cost = COST_D;
    }
  ;
@@ -281,7 +361,6 @@ ospf_vlink_start: VIRTUAL LINK idval
   OSPF_PATT->inftransdelay = INFTRANSDELAY_D;
   OSPF_PATT->deadc = DEADC_D;
   OSPF_PATT->type = OSPF_IT_VLINK;
-  OSPF_PATT->instance_id = OSPF_CFG->instance_id;
   init_list(&OSPF_PATT->nbma_list);
   reset_passwords();
  }
@@ -306,7 +385,7 @@ ospf_iface_item:
  | REAL BROADCAST bool { OSPF_PATT->real_bcast = $3; if (!ospf_cfg_is_v2()) cf_error("Real broadcast option requires OSPFv2"); }
  | PTP NETMASK bool { OSPF_PATT->ptp_netmask = $3; if (!ospf_cfg_is_v2()) cf_error("PtP netmask option requires OSPFv2"); }
  | TRANSMIT DELAY expr { OSPF_PATT->inftransdelay = $3 ; if (($3<=0) || ($3>65535)) cf_error("Transmit delay must be in range 1-65535"); }
- | PRIORITY expr { OSPF_PATT->priority = $2 ; if (($2<0) || ($2>255)) cf_error("Priority must be in range 0-255"); }
+ | PRIORITY expr { OSPF_PATT->priority = $2 ; if ($2>255) cf_error("Priority must be in range 0-255"); }
  | STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; }
  | STUB bool { OSPF_PATT->stub = $2 ; }
  | CHECK LINK bool { OSPF_PATT->check_link = $3; }
@@ -325,7 +404,6 @@ ospf_iface_item:
  | TTL SECURITY bool { OSPF_PATT->ttl_security = $3; }
  | TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; }
  | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); }
- | SECONDARY bool { OSPF_PATT->bsd_secondary = $2; }
  | password_list { ospf_check_auth(); }
  ;
 
@@ -336,12 +414,11 @@ pref_list:
 
 pref_item: pref_base pref_opt ';' ;
 
-pref_base: prefix
+pref_base: net_ip
  {
    this_pref = cfg_allocz(sizeof(struct area_net_config));
    add_tail(this_nets, NODE this_pref);
-   this_pref->px.addr = $1.addr;
-   this_pref->px.len = $1.len;
+   this_pref->prefix = $1;
  }
 ;
 
@@ -383,7 +460,6 @@ ospf_iface_start:
   OSPF_PATT->priority = PRIORITY_D;
   OSPF_PATT->deadc = DEADC_D;
   OSPF_PATT->type = OSPF_IT_UNDEF;
-  OSPF_PATT->instance_id = OSPF_CFG->instance_id;
   init_list(&OSPF_PATT->nbma_list);
   OSPF_PATT->ptp_netmask = 2; /* not specified */
   OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL;
@@ -394,7 +470,7 @@ ospf_iface_start:
 
 ospf_instance_id:
    /* empty */
- | INSTANCE expr { OSPF_PATT->instance_id = $2; if (($2<0) || ($2>255)) cf_error("Instance ID must be in range 0-255"); }
+ | INSTANCE expr { OSPF_PATT->instance_id = $2; OSPF_PATT->instance_id_set = 1; if ($2 > 255) cf_error("Instance ID must be in range 0-255"); }
  ;
 
 ospf_iface_patt_list:
@@ -415,11 +491,6 @@ ospf_iface:
   ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); }
  ;
 
-opttext:
-    TEXT
- | /* empty */ { $$ = NULL; }
- ;
-
 CF_ADDTO(dynamic_attr, OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_OSPF_METRIC1); })
 CF_ADDTO(dynamic_attr, OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_OSPF_METRIC2); })
 CF_ADDTO(dynamic_attr, OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_OSPF_TAG); })
diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c
index d6904343..f211935f 100644
--- a/proto/ospf/dbdes.c
+++ b/proto/ospf/dbdes.c
@@ -39,7 +39,7 @@ struct ospf_dbdes3_packet
 
 
 static inline uint
-ospf_dbdes_hdrlen(struct ospf_proto *p UNUSED4 UNUSED6)
+ospf_dbdes_hdrlen(struct ospf_proto *p)
 {
   return ospf_is_v2(p) ?
     sizeof(struct ospf_dbdes2_packet) : sizeof(struct ospf_dbdes3_packet);
@@ -356,7 +356,7 @@ ospf_receive_dbdes(struct ospf_packet *pkt, struct ospf_iface *ifa,
       LOG_PKT_WARN("MTU mismatch with nbr %R on %s (remote %d, local %d)",
 		   n->rid, ifa->ifname, rcv_iface_mtu, ifa->iface->mtu);
 
-    if ((rcv_imms == DBDES_IMMS) &&
+    if (((rcv_imms & DBDES_IMMS) == DBDES_IMMS) &&
 	(n->rid > p->router_id) &&
 	(plen == ospf_dbdes_hdrlen(p)))
     {
@@ -428,7 +428,7 @@ ospf_receive_dbdes(struct ospf_packet *pkt, struct ospf_iface *ifa,
       }
 
       ospf_send_dbdes(p, n);
-      tm_start(n->dbdes_timer, n->ifa->rxmtint);
+      tm_start(n->dbdes_timer, n->ifa->rxmtint S);
     }
     else
     {
diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c
index e00487dc..e706ea0f 100644
--- a/proto/ospf/hello.c
+++ b/proto/ospf/hello.c
@@ -32,10 +32,7 @@ struct ospf_hello3_packet
   struct ospf_packet hdr;
 
   u32 iface_id;
-  u8 priority;
-  u8 options3;
-  u8 options2;
-  u8 options;
+  u32 options;
   u16 helloint;
   u16 deadint;
   u32 dr;
@@ -74,7 +71,7 @@ ospf_send_hello(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn)
 	((ifa->type == OSPF_IT_PTP) && !ifa->ptp_netmask))
       ps->netmask = 0;
     else
-      ps->netmask = htonl(u32_mkmask(ifa->addr->pxlen));
+      ps->netmask = htonl(u32_mkmask(ifa->addr->prefix.pxlen));
 
     ps->helloint = ntohs(ifa->helloint);
     ps->options = ifa->oa->options;
@@ -91,10 +88,7 @@ ospf_send_hello(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn)
     struct ospf_hello3_packet *ps = (void *) pkt;
 
     ps->iface_id = htonl(ifa->iface_id);
-    ps->priority = ifa->priority;
-    ps->options3 = ifa->oa->options >> 16;
-    ps->options2 = ifa->oa->options >> 8;
-    ps->options = ifa->oa->options;
+    ps->options = ntohl(ifa->oa->options | (ifa->priority << 24));
     ps->helloint = ntohs(ifa->helloint);
     ps->deadint = htons(ifa->deadint);
     ps->dr = htonl(ifa->drid);
@@ -190,7 +184,8 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
   struct ospf_proto *p = ifa->oa->po;
   const char *err_dsc = NULL;
   u32 rcv_iface_id, rcv_helloint, rcv_deadint, rcv_dr, rcv_bdr;
-  u8 rcv_options, rcv_priority;
+  uint rcv_options, rcv_priority;
+  uint loc_options = ifa->oa->options;
   u32 *neighbors;
   u32 neigh_count;
   uint plen, i, err_val = 0;
@@ -198,7 +193,7 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
   /* RFC 2328 10.5 */
 
   /*
-   * We may not yet havethe associate neighbor, so we use Router ID from the
+   * We may not yet have the associate neighbor, so we use Router ID from the
    * packet instead of one from the neighbor structure for log messages.
    */
   u32 rcv_rid = ntohl(pkt->routerid);
@@ -227,7 +222,7 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
 
     if ((ifa->type != OSPF_IT_VLINK) &&
 	(ifa->type != OSPF_IT_PTP) &&
-	((uint) pxlen != ifa->addr->pxlen))
+	((uint) pxlen != ifa->addr->prefix.pxlen))
       DROP("prefix length mismatch", pxlen);
 
     neighbors = ps->neighbors;
@@ -245,8 +240,8 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
     rcv_deadint = ntohs(ps->deadint);
     rcv_dr = ntohl(ps->dr);
     rcv_bdr = ntohl(ps->bdr);
-    rcv_options = ps->options;
-    rcv_priority = ps->priority;
+    rcv_options = ntohl(ps->options) & 0x00FFFFFF;
+    rcv_priority = ntohl(ps->options) >> 24;
 
     neighbors = ps->neighbors;
     neigh_count = (plen - sizeof(struct ospf_hello3_packet)) / sizeof(u32);
@@ -259,9 +254,13 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
     DROP("dead interval mismatch", rcv_deadint);
 
   /* Check whether bits E, N match */
-  if ((rcv_options ^ ifa->oa->options) & (OPT_E | OPT_N))
+  if ((rcv_options ^ loc_options) & (OPT_E | OPT_N))
     DROP("area type mismatch", rcv_options);
 
+  /* RFC 5838 2.4 - AF-bit check unless on IPv6 unicast */
+  if ((loc_options & OPT_AF) && !(loc_options & OPT_V6) && !(rcv_options & OPT_AF))
+    DROP("AF-bit mismatch", rcv_options);
+
   /* Check consistency of existing neighbor entry */
   if (n)
   {
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 280fa4c1..29d21a07 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -55,7 +55,9 @@ ifa_tx_length(struct ospf_iface *ifa)
 static inline uint
 ifa_tx_hdrlen(struct ospf_iface *ifa)
 {
-  uint hlen = SIZE_OF_IP_HEADER;
+  struct ospf_proto *p = ifa->oa->po;
+
+  uint hlen = ospf_is_v2(p) ? IP4_HEADER_LENGTH : IP6_HEADER_LENGTH;
 
   /* Relevant just for OSPFv2 */
   if (ifa->autype == OSPF_AUTH_CRYPT)
@@ -115,6 +117,7 @@ ospf_sk_open(struct ospf_iface *ifa)
 
   sock *sk = sk_new(ifa->pool);
   sk->type = SK_IP;
+  sk->subtype = ospf_is_v2(p) ? SK_IPV4 : SK_IPV6;
   sk->dport = OSPF_PROTO;
   sk->saddr = ifa->addr->ip;
   sk->iface = ifa->iface;
@@ -199,6 +202,7 @@ ospf_open_vlink_sk(struct ospf_proto *p)
 {
   sock *sk = sk_new(p->p.pool);
   sk->type = SK_IP;
+  sk->subtype = ospf_is_v2(p) ? SK_IPV4 : SK_IPV6;
   sk->dport = OSPF_PROTO;
 
   /* FIXME: configurable tos/priority ? */
@@ -244,8 +248,8 @@ ospf_iface_down(struct ospf_iface *ifa)
       OSPF_TRACE(D_EVENTS, "Removing interface %s (peer %I) from area %R",
 		 ifa->ifname, ifa->addr->opposite, ifa->oa->areaid);
     else
-      OSPF_TRACE(D_EVENTS, "Removing interface %s (%I/%d) from area %R",
-		 ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen, ifa->oa->areaid);
+      OSPF_TRACE(D_EVENTS, "Removing interface %s (%N) from area %R",
+		 ifa->ifname, &ifa->addr->prefix, ifa->oa->areaid);
 
     /* First of all kill all the related vlinks */
     WALK_LIST(iff, p->iface_list)
@@ -392,15 +396,15 @@ ospf_iface_sm(struct ospf_iface *ifa, int event)
 	{
 	  ospf_iface_chstate(ifa, OSPF_IS_WAITING);
 	  if (ifa->wait_timer)
-	    tm_start(ifa->wait_timer, ifa->waitint);
+	    tm_start(ifa->wait_timer, ifa->waitint S);
 	}
       }
 
       if (ifa->hello_timer)
-	tm_start(ifa->hello_timer, ifa->helloint);
+	tm_start(ifa->hello_timer, ifa->helloint S);
 
       if (ifa->poll_timer)
-	tm_start(ifa->poll_timer, ifa->pollint);
+	tm_start(ifa->poll_timer, ifa->pollint S);
 
       ospf_send_hello(ifa, OHS_HELLO, NULL);
     }
@@ -490,13 +494,13 @@ ospf_iface_add(struct object_lock *lock)
 
   if (! ifa->stub)
   {
-    ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint);
+    ifa->hello_timer = tm_new_init(ifa->pool, hello_timer_hook, ifa, ifa->helloint S, 0);
 
     if (ifa->type == OSPF_IT_NBMA)
-      ifa->poll_timer = tm_new_set(ifa->pool, poll_timer_hook, ifa, 0, ifa->pollint);
+      ifa->poll_timer = tm_new_init(ifa->pool, poll_timer_hook, ifa, ifa->pollint S, 0);
 
     if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA))
-      ifa->wait_timer = tm_new_set(ifa->pool, wait_timer_hook, ifa, 0, 0);
+      ifa->wait_timer = tm_new_init(ifa->pool, wait_timer_hook, ifa, 0, 0);
 
     ifa->flood_queue_size = ifa_flood_queue_size(ifa);
     ifa->flood_queue = mb_allocz(ifa->pool, ifa->flood_queue_size * sizeof(void *));
@@ -530,15 +534,6 @@ ospf_iface_stubby(struct ospf_iface_patt *ip, struct ifa *addr)
   if (addr->iface->flags & IF_LOOPBACK)
     return 1;
 
-  /*
-   * For compatibility reasons on BSD systems, we force OSPF
-   * interfaces with non-primary IP prefixes to be stub.
-   */
-#if defined(OSPFv2) && !defined(CONFIG_MC_PROPER_SRC)
-  if (!ip->bsd_secondary && !(addr->flags & IA_PRIMARY))
-    return 1;
-#endif
-
   return ip->stub;
 }
 
@@ -557,8 +552,8 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
     OSPF_TRACE(D_EVENTS, "Adding interface %s (peer %I) to area %R",
 	       iface->name, addr->opposite, oa->areaid);
   else
-    OSPF_TRACE(D_EVENTS, "Adding interface %s (%I/%d) to area %R",
-	       iface->name, addr->prefix, addr->pxlen, oa->areaid);
+    OSPF_TRACE(D_EVENTS, "Adding interface %s (%N) to area %R",
+	       iface->name, &addr->prefix, oa->areaid);
 
   pool = rp_new(p->p.pool, "OSPF Interface");
   ifa = mb_allocz(pool, sizeof(struct ospf_iface));
@@ -596,6 +591,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
   if (ip->ptp_netmask < 2)
     ifa->ptp_netmask = ip->ptp_netmask;
 
+  ifa->drip = ifa->bdrip = ospf_is_v2(p) ? IPA_NONE4 : IPA_NONE6;
 
   ifa->type = ospf_iface_classify(ip->type, addr);
 
@@ -635,7 +631,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
        should be used). Because OSPFv3 iface is not subnet-specific,
        there is no need for ipa_in_net() check */
 
-    if (ospf_is_v2(p) && !ipa_in_net(nb->ip, addr->prefix, addr->pxlen))
+    if (ospf_is_v2(p) && !ipa_in_netX(nb->ip, &addr->prefix))
       continue;
 
     if (ospf_is_v3(p) && !ipa_is_link_local(nb->ip))
@@ -648,7 +644,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
   add_tail(&oa->po->iface_list, NODE ifa);
 
   struct object_lock *lock = olock_new(pool);
-  lock->addr = ospf_is_v2(p) ? ifa->addr->prefix : IPA_NONE;
+  lock->addr = ospf_is_v2(p) ? ipa_from_ip4(net4_prefix(&ifa->addr->prefix)) : IPA_NONE;
   lock->type = OBJLOCK_IP;
   lock->port = OSPF_PROTO;
   lock->inst = ifa->instance_id;
@@ -707,7 +703,7 @@ ospf_iface_new_vlink(struct ospf_proto *p, struct ospf_iface_patt *ip)
 
   add_tail(&p->iface_list, NODE ifa);
 
-  ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint);
+  ifa->hello_timer = tm_new_init(ifa->pool, hello_timer_hook, ifa, ifa->helloint S, 0);
 
   ifa->flood_queue_size = ifa_flood_queue_size(ifa);
   ifa->flood_queue = mb_allocz(ifa->pool, ifa->flood_queue_size * sizeof(void *));
@@ -719,10 +715,10 @@ ospf_iface_change_timer(timer *tm, uint val)
   if (!tm)
     return;
 
-  tm->recurrent = val;
+  tm->recurrent = val S;
 
-  if (tm->expires)
-    tm_start(tm, val);
+  if (tm_active(tm))
+    tm_start(tm, val S);
 }
 
 static inline void
@@ -805,8 +801,8 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
 	       ifname, ifa->waitint, new->waitint);
 
     ifa->waitint = new->waitint;
-    if (ifa->wait_timer && ifa->wait_timer->expires)
-      tm_start(ifa->wait_timer, ifa->waitint);
+    if (ifa->wait_timer && tm_active(ifa->wait_timer))
+      tm_start(ifa->wait_timer, ifa->waitint S);
   }
 
   /* DEAD TIMER */
@@ -898,7 +894,7 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
   WALK_LIST(nb, new->nbma_list)
   {
     /* See related note in ospf_iface_new() */
-    if (ospf_is_v2(p) && !ipa_in_net(nb->ip, ifa->addr->prefix, ifa->addr->pxlen))
+    if (ospf_is_v2(p) && !ipa_in_netX(nb->ip, &ifa->addr->prefix))
       continue;
 
     if (ospf_is_v3(p) && !ipa_is_link_local(nb->ip))
@@ -1085,6 +1081,9 @@ ospf_ifa_notify2(struct proto *P, uint flags, struct ifa *a)
 {
   struct ospf_proto *p = (struct ospf_proto *) P;
 
+  if (a->prefix.type != NET_IP4)
+    return;
+
   if (a->flags & IA_SECONDARY)
     return;
 
@@ -1124,6 +1123,9 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a)
      other addresses are used for link-LSA. */
   if (a->scope == SCOPE_LINK)
   {
+    if (a->prefix.type != NET_IP6)
+      return;
+
     if (flags & IF_CHANGE_UP)
     {
       struct ospf_mip_walk s = { .iface = a->iface };
@@ -1141,6 +1143,9 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a)
   }
   else
   {
+    if (a->prefix.type != ospf_get_af(p))
+      return;
+
     struct ospf_iface *ifa;
     WALK_LIST(ifa, p->iface_list)
       if (ifa->iface == a->iface)
@@ -1166,6 +1171,9 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p)
 
     WALK_LIST(a, iface->addrs)
     {
+      if (a->prefix.type != NET_IP4)
+	continue;
+
       if (a->flags & IA_SECONDARY)
 	continue;
 
@@ -1184,8 +1192,8 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p)
 	    continue;
 
 	  /* Hard restart */
-	  log(L_INFO "%s: Restarting interface %s (%I/%d) in area %R",
-	      p->p.name, ifa->ifname, a->prefix, a->pxlen, s.oa->areaid);
+	  log(L_INFO "%s: Restarting interface %s (%N) in area %R",
+	      p->p.name, ifa->ifname, &a->prefix, s.oa->areaid);
 	  ospf_iface_shutdown(ifa);
 	  ospf_iface_remove(ifa);
 	}
@@ -1209,6 +1217,9 @@ ospf_reconfigure_ifaces3(struct ospf_proto *p)
 
     WALK_LIST(a, iface->addrs)
     {
+      if (a->prefix.type != NET_IP6)
+	continue;
+
       if (a->flags & IA_SECONDARY)
 	continue;
 
@@ -1340,7 +1351,7 @@ ospf_iface_info(struct ospf_iface *ifa)
     else if (ifa->addr->flags & IA_PEER)
       cli_msg(-1015, "Interface %s (peer %I)", ifa->ifname, ifa->addr->opposite);
     else
-      cli_msg(-1015, "Interface %s (%I/%d)", ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen);
+      cli_msg(-1015, "Interface %s (%N)", ifa->ifname, &ifa->addr->prefix);
 
     cli_msg(-1015, "\tType: %s%s", ospf_it[ifa->type], more);
     cli_msg(-1015, "\tArea: %R (%u)", ifa->oa->areaid, ifa->oa->areaid);
diff --git a/proto/ospf/lsalib.c b/proto/ospf/lsalib.c
index cb7b186a..fbfd8d29 100644
--- a/proto/ospf/lsalib.c
+++ b/proto/ospf/lsalib.c
@@ -280,21 +280,19 @@ lsa_walk_rt(struct ospf_lsa_rt_walk *rt)
 
 
 void
-lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, ip_addr *ip, int *pxlen, u8 *pxopts, u32 *metric)
+lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric)
 {
   if (ospf2)
   {
     struct ospf_lsa_sum2 *ls = en->lsa_body;
-    *ip = ipa_from_u32(en->lsa.id & ls->netmask);
-    *pxlen = u32_masklen(ls->netmask);
+    net_fill_ip4(net, ip4_from_u32(en->lsa.id & ls->netmask), u32_masklen(ls->netmask));
     *pxopts = 0;
     *metric = ls->metric & LSA_METRIC_MASK;
   }
   else
   {
     struct ospf_lsa_sum3_net *ls = en->lsa_body;
-    u16 rest;
-    lsa_get_ipv6_prefix(ls->prefix, ip, pxlen, pxopts, &rest);
+    ospf3_get_prefix(ls->prefix, af, net, pxopts, NULL);
     *metric = ls->metric & LSA_METRIC_MASK;
   }
 }
@@ -319,13 +317,14 @@ lsa_parse_sum_rt(struct top_hash_entry *en, int ospf2, u32 *drid, u32 *metric, u
 }
 
 void
-lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *rt)
+lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_local *rt)
 {
   if (ospf2)
   {
     struct ospf_lsa_ext2 *ext = en->lsa_body;
-    rt->ip = ipa_from_u32(en->lsa.id & ext->netmask);
-    rt->pxlen = u32_masklen(ext->netmask);
+    net_fill_ip4(&rt->net,
+		 ip4_from_u32(en->lsa.id & ext->netmask),
+		 u32_masklen(ext->netmask));
     rt->pxopts = 0;
     rt->metric = ext->metric & LSA_METRIC_MASK;
     rt->ebit = ext->metric & LSA_EXT2_EBIT;
@@ -339,14 +338,13 @@ lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *r
   else
   {
     struct ospf_lsa_ext3 *ext = en->lsa_body;
-    u16 rest;
-    u32 *buf = lsa_get_ipv6_prefix(ext->rest, &rt->ip, &rt->pxlen, &rt->pxopts, &rest);
+    u32 *buf = ospf3_get_prefix(ext->rest, af, &rt->net, &rt->pxopts, NULL);
     rt->metric = ext->metric & LSA_METRIC_MASK;
     rt->ebit = ext->metric & LSA_EXT3_EBIT;
 
     rt->fbit = ext->metric & LSA_EXT3_FBIT;
     if (rt->fbit)
-      buf = lsa_get_ipv6_addr(buf, &rt->fwaddr);
+      buf = ospf3_get_addr(buf, af, &rt->fwaddr);
     else
       rt->fwaddr = IPA_NONE;
 
@@ -452,7 +450,7 @@ lsa_validate_sum3_net(struct ospf_lsa_header *lsa, struct ospf_lsa_sum3_net *bod
     return 0;
 
   u8 pxl = pxlen(body->prefix);
-  if (pxl > MAX_PREFIX_LENGTH)
+  if (pxl > IP6_MAX_PREFIX_LENGTH)
     return 0;
 
   if (lsa->length != (HDRLEN + sizeof(struct ospf_lsa_sum3_net) +
@@ -491,11 +489,11 @@ lsa_validate_ext3(struct ospf_lsa_header *lsa, struct ospf_lsa_ext3 *body)
     return 0;
 
   u8 pxl = pxlen(body->rest);
-  if (pxl > MAX_PREFIX_LENGTH)
+  if (pxl > IP6_MAX_PREFIX_LENGTH)
     return 0;
 
   int len = IPV6_PREFIX_SPACE(pxl);
-  if (body->metric & LSA_EXT3_FBIT) // forwardinf address
+  if (body->metric & LSA_EXT3_FBIT) // forwarding address
     len += 16;
   if (body->metric & LSA_EXT3_TBIT) // route tag
     len += 4;
@@ -520,7 +518,7 @@ lsa_validate_pxlist(struct ospf_lsa_header *lsa, u32 pxcount, uint offset, u8 *p
 	return 0;
 
       u8 pxl = pxlen((u32 *) (pbuf + offset));
-      if (pxl > MAX_PREFIX_LENGTH)
+      if (pxl > IP6_MAX_PREFIX_LENGTH)
 	return 0;
 
       offset += IPV6_PREFIX_SPACE(pxl);
@@ -554,8 +552,8 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body)
 /**
  * lsa_validate - check whether given LSA is valid
  * @lsa: LSA header
- * @lsa_type: one of %LSA_T_xxx
- * @ospf2: %true means OSPF version 2, %false means OSPF version 3
+ * @lsa_type: internal LSA type (%LSA_T_xxx)
+ * @ospf2: %true for OSPFv2, %false for OSPFv3
  * @body: pointer to LSA body
  *
  * Checks internal structure of given LSA body (minimal length,
diff --git a/proto/ospf/lsalib.h b/proto/ospf/lsalib.h
index 638b3525..fca7faec 100644
--- a/proto/ospf/lsalib.h
+++ b/proto/ospf/lsalib.h
@@ -41,7 +41,7 @@ void lsa_get_type_domain_(u32 itype, struct ospf_iface *ifa, u32 *otype, u32 *do
 static inline void lsa_get_type_domain(struct ospf_lsa_header *lsa, struct ospf_iface *ifa, u32 *otype, u32 *domain)
 { lsa_get_type_domain_(lsa->type_raw, ifa, otype, domain); }
 
-static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p UNUSED4 UNUSED6)
+static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p)
 { return ospf_is_v2(p) ? (h->type_raw & LSA_T_V2_MASK) : h->type_raw; }
 
 
@@ -55,9 +55,12 @@ u16 lsa_verify_checksum(const void *lsa_n, int lsa_len);
 int lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2);
 void lsa_walk_rt_init(struct ospf_proto *po, struct top_hash_entry *act, struct ospf_lsa_rt_walk *rt);
 int lsa_walk_rt(struct ospf_lsa_rt_walk *rt);
-void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, ip_addr *ip, int *pxlen, u8 *pxopts, u32 *metric);
+void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric);
 void lsa_parse_sum_rt(struct top_hash_entry *en, int ospf2, u32 *drid, u32 *metric, u32 *options);
-void lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *rt);
+void lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_local *rt);
 int lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body);
 
+static inline btime lsa_inst_age(struct top_hash_entry *en)
+{ return current_time() - en->inst_time; }
+
 #endif /* _BIRD_OSPF_LSALIB_H_ */
diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c
index 157d9628..a98c9098 100644
--- a/proto/ospf/lsupd.c
+++ b/proto/ospf/lsupd.c
@@ -137,7 +137,7 @@ ospf_lsa_lsrt_up(struct top_hash_entry *en, struct ospf_neighbor *n)
   ret->lsa_body = LSA_BODY_DUMMY;
 
   if (!tm_active(n->lsrt_timer))
-    tm_start(n->lsrt_timer, n->ifa->rxmtint);
+    tm_start(n->lsrt_timer, n->ifa->rxmtint S);
 }
 
 void
@@ -572,7 +572,7 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa,
     {
       /* 13. (5a) - enforce minimum time between updates for received LSAs */
       /* We also use this to ratelimit reactions to received self-originated LSAs */
-      if (en && ((now - en->inst_time) < MINLSARRIVAL))
+      if (en && (lsa_inst_age(en) < MINLSARRIVAL))
       {
 	OSPF_TRACE(D_EVENTS, "Skipping LSA received in less that MinLSArrival");
 	continue;
@@ -700,7 +700,7 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa,
   if (!EMPTY_SLIST(n->lsrql) && (n->lsrqi == SHEAD(n->lsrql)))
   {
     ospf_send_lsreq(p, n);
-    tm_start(n->lsrq_timer, n->ifa->rxmtint);
+    tm_start(n->lsrq_timer, n->ifa->rxmtint S);
   }
 
   return;
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index 9fe3c028..f2d3505e 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -94,11 +94,11 @@ ospf_neighbor_new(struct ospf_iface *ifa)
   init_list(&n->ackl[ACKL_DIRECT]);
   init_list(&n->ackl[ACKL_DELAY]);
 
-  n->inactim = tm_new_set(pool, inactivity_timer_hook, n, 0, 0);
-  n->dbdes_timer = tm_new_set(pool, dbdes_timer_hook, n, 0, ifa->rxmtint);
-  n->lsrq_timer = tm_new_set(pool, lsrq_timer_hook, n, 0, ifa->rxmtint);
-  n->lsrt_timer = tm_new_set(pool, lsrt_timer_hook, n, 0, ifa->rxmtint);
-  n->ackd_timer = tm_new_set(pool, ackd_timer_hook, n, 0, ifa->rxmtint / 2);
+  n->inactim = tm_new_init(pool, inactivity_timer_hook, n, 0, 0);
+  n->dbdes_timer = tm_new_init(pool, dbdes_timer_hook, n, ifa->rxmtint S, 0);
+  n->lsrq_timer = tm_new_init(pool, lsrq_timer_hook, n, ifa->rxmtint S, 0);
+  n->lsrt_timer = tm_new_init(pool, lsrt_timer_hook, n, ifa->rxmtint S, 0);
+  n->ackd_timer = tm_new_init(pool, ackd_timer_hook, n, ifa->rxmtint S / 2, 0);
 
   return (n);
 }
@@ -186,7 +186,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state)
     n->myimms = DBDES_IMMS;
 
     tm_start(n->dbdes_timer, 0);
-    tm_start(n->ackd_timer, ifa->rxmtint / 2);
+    tm_start(n->ackd_timer, ifa->rxmtint S / 2);
   }
 
   if (state > NEIGHBOR_EXSTART)
@@ -231,7 +231,7 @@ ospf_neigh_sm(struct ospf_neighbor *n, int event)
       ospf_neigh_chstate(n, NEIGHBOR_INIT);
 
     /* Restart inactivity timer */
-    tm_start(n->inactim, n->ifa->deadint);
+    tm_start(n->inactim, n->ifa->deadint S);
     break;
 
   case INM_2WAYREC:
@@ -359,7 +359,7 @@ can_do_adj(struct ospf_neighbor *n)
 }
 
 
-static inline u32 neigh_get_id(struct ospf_proto *p UNUSED4 UNUSED6, struct ospf_neighbor *n)
+static inline u32 neigh_get_id(struct ospf_proto *p, struct ospf_neighbor *n)
 { return ospf_is_v2(p) ? ipa_to_u32(n->ip) : n->rid; }
 
 static struct ospf_neighbor *
@@ -507,13 +507,14 @@ ospf_dr_election(struct ospf_iface *ifa)
 
   u32 old_drid = ifa->drid;
   u32 old_bdrid = ifa->bdrid;
+  ip_addr none = ospf_is_v2(p) ? IPA_NONE4 : IPA_NONE6;
 
   ifa->drid = ndr ? ndr->rid : 0;
-  ifa->drip = ndr ? ndr->ip  : IPA_NONE;
+  ifa->drip = ndr ? ndr->ip  : none;
   ifa->dr_iface_id = ndr ? ndr->iface_id : 0;
 
   ifa->bdrid = nbdr ? nbdr->rid : 0;
-  ifa->bdrip = nbdr ? nbdr->ip  : IPA_NONE;
+  ifa->bdrip = nbdr ? nbdr->ip  : none;
 
   DBG("DR=%R, BDR=%R\n", ifa->drid, ifa->bdrid);
 
@@ -650,20 +651,6 @@ ospf_sh_neigh_info(struct ospf_neighbor *n)
 {
   struct ospf_iface *ifa = n->ifa;
   char *pos = "PtP  ";
-  char etime[6];
-  int exp, sec, min;
-
-  exp = n->inactim->expires - now;
-  sec = exp % 60;
-  min = exp / 60;
-  if (min > 59)
-  {
-    bsprintf(etime, "-Inf-");
-  }
-  else
-  {
-    bsprintf(etime, "%02u:%02u", min, sec);
-  }
 
   if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA))
   {
@@ -675,6 +662,7 @@ ospf_sh_neigh_info(struct ospf_neighbor *n)
       pos = "Other";
   }
 
-  cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%-5s\t%-10s %-1I", n->rid, n->priority,
-	  ospf_ns_names[n->state], pos, etime, ifa->ifname, n->ip);
+  cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%7t\t%-10s %-1I",
+	  n->rid, n->priority, ospf_ns_names[n->state], pos,
+	  tm_remains(n->inactim), ifa->ifname, n->ip);
 }
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index d5d5d354..3ebebdaa 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -92,8 +92,10 @@
  * - RFC 2328 - main OSPFv2 standard
  * - RFC 5340 - main OSPFv3 standard
  * - RFC 3101 - OSPFv2 NSSA areas
- * - RFC 6549 - OSPFv2 multi-instance extensions
- * - RFC 6987 - OSPF stub router advertisement
+ * - RFC 5709 - OSPFv2 HMAC-SHA Cryptographic Authentication
+ * - RFC 5838 - OSPFv3 Support of Address Families
+ * - RFC 6549 - OSPFv2 Multi-Instance Extensions
+ * - RFC 6987 - OSPF Stub Router Advertisement
  */
 
 #include <stdlib.h>
@@ -102,18 +104,11 @@
 static int ospf_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool);
 static struct ea_list *ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool);
 static void ospf_store_tmp_attrs(struct rte *rt, struct ea_list *attrs);
-static int ospf_reload_routes(struct proto *P);
+static void ospf_reload_routes(struct channel *C);
 static int ospf_rte_better(struct rte *new, struct rte *old);
 static int ospf_rte_same(struct rte *new, struct rte *old);
 static void ospf_disp(timer *timer);
 
-static void
-ospf_area_initfib(struct fib_node *fn)
-{
-  struct area_net *an = (struct area_net *) fn;
-  an->hidden = 0;
-  an->active = 0;
-}
 
 static void
 add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac)
@@ -122,23 +117,35 @@ add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac)
   struct area_net_config *anc;
   struct area_net *an;
 
-  fib_init(&oa->net_fib, p->p.pool, sizeof(struct area_net), 0, ospf_area_initfib);
-  fib_init(&oa->enet_fib, p->p.pool, sizeof(struct area_net), 0, ospf_area_initfib);
+  fib_init(&oa->net_fib,  p->p.pool, ospf_get_af(p),
+	   sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL);
+  fib_init(&oa->enet_fib, p->p.pool, ospf_get_af(p),
+	   sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL);
 
   WALK_LIST(anc, ac->net_list)
   {
-    an = (struct area_net *) fib_get(&oa->net_fib, &anc->px.addr, anc->px.len);
+    an = fib_get(&oa->net_fib, &anc->prefix);
     an->hidden = anc->hidden;
   }
 
   WALK_LIST(anc, ac->enet_list)
   {
-    an = (struct area_net *) fib_get(&oa->enet_fib, &anc->px.addr, anc->px.len);
+    an = fib_get(&oa->enet_fib, &anc->prefix);
     an->hidden = anc->hidden;
     an->tag = anc->tag;
   }
 }
 
+static inline uint
+ospf_opts(struct ospf_proto *p)
+{
+  if (ospf_is_v2(p))
+    return 0;
+
+  return ((ospf_is_ip6(p) && !p->af_mc) ? OPT_V6 : 0) |
+    (!p->stub_router ? OPT_R : 0) | (p->af_ext ? OPT_AF : 0);
+}
+
 static void
 ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac)
 {
@@ -154,16 +161,13 @@ ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac)
   oa->areaid = ac->areaid;
   oa->rt = NULL;
   oa->po = p;
-  fib_init(&oa->rtr, p->p.pool, sizeof(ort), 0, ospf_rt_initort);
+  fib_init(&oa->rtr, p->p.pool, NET_IP4, sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
   add_area_nets(oa, ac);
 
   if (oa->areaid == 0)
     p->backbone = oa;
 
-  if (ospf_is_v2(p))
-    oa->options = ac->type;
-  else
-    oa->options = ac->type | OPT_V6 | (p->stub_router ? 0 : OPT_R);
+  oa->options = ac->type | ospf_opts(p);
 
   ospf_notify_rt_lsa(oa);
 }
@@ -229,21 +233,25 @@ ospf_start(struct proto *P)
 
   p->router_id = proto_get_router_id(P->cf);
   p->ospf2 = c->ospf2;
+  p->af_ext = c->af_ext;
+  p->af_mc = c->af_mc;
   p->rfc1583 = c->rfc1583;
   p->stub_router = c->stub_router;
   p->merge_external = c->merge_external;
   p->asbr = c->asbr;
   p->ecmp = c->ecmp;
   p->tick = c->tick;
-  p->disp_timer = tm_new_set(P->pool, ospf_disp, p, 0, p->tick);
-  tm_start(p->disp_timer, 1);
+  p->disp_timer = tm_new_init(P->pool, ospf_disp, p, p->tick S, 0);
+  tm_start(p->disp_timer, 100 MS);
   p->lsab_size = 256;
   p->lsab_used = 0;
   p->lsab = mb_alloc(P->pool, p->lsab_size);
-  p->nhpool = lp_new(P->pool, 12*sizeof(struct mpnh));
+  p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop));
   init_list(&(p->iface_list));
   init_list(&(p->area_list));
-  fib_init(&p->rtf, P->pool, sizeof(ort), 0, ospf_rt_initort);
+  fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
+  if (ospf_is_v3(p))
+    idm_init(&p->idm, P->pool, 16);
   p->areano = 0;
   p->gr = ospf_top_new(p, P->pool);
   s_init_list(&(p->lsal));
@@ -299,15 +307,16 @@ ospf_dump(struct proto *P)
 }
 
 static struct proto *
-ospf_init(struct proto_config *c)
+ospf_init(struct proto_config *CF)
 {
-  struct ospf_config *oc = (struct ospf_config *) c;
-  struct proto *P = proto_new(c, sizeof(struct ospf_proto));
+  struct ospf_config *cf = (struct ospf_config *) CF;
+  struct proto *P = proto_new(CF);
+
+  P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
 
-  P->accept_ra_types = RA_OPTIMAL;
   P->rt_notify = ospf_rt_notify;
   P->if_notify = ospf_if_notify;
-  P->ifa_notify = oc->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
+  P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
   P->import_control = ospf_import_control;
   P->reload_routes = ospf_reload_routes;
   P->make_tmp_attrs = ospf_make_tmp_attrs;
@@ -391,17 +400,16 @@ ospf_schedule_rtcalc(struct ospf_proto *p)
   p->calcrt = 1;
 }
 
-static int
-ospf_reload_routes(struct proto *P)
+static void
+ospf_reload_routes(struct channel *C)
 {
-  struct ospf_proto *p = (struct ospf_proto *) P;
+  struct ospf_proto *p = (struct ospf_proto *) C->proto;
 
-  if (p->calcrt != 2)
-    OSPF_TRACE(D_EVENTS, "Scheduling routing table calculation with route reload");
+  if (p->calcrt == 2)
+    return;
 
+  OSPF_TRACE(D_EVENTS, "Scheduling routing table calculation with route reload");
   p->calcrt = 2;
-
-  return 1;
 }
 
 
@@ -506,9 +514,9 @@ ospf_shutdown(struct proto *P)
     ospf_iface_shutdown(ifa);
 
   /* Cleanup locked rta entries */
-  FIB_WALK(&p->rtf, nftmp)
+  FIB_WALK(&p->rtf, ort, nf)
   {
-    rta_free(((ort *) nftmp)->old_rta);
+    rta_free(nf->old_rta);
   }
   FIB_WALK_END;
 
@@ -603,11 +611,7 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
   struct ospf_iface *ifa;
 
   oa->ac = nac;
-
-  if (ospf_is_v2(p))
-    oa->options = nac->type;
-  else
-    oa->options = nac->type | OPT_V6 | (p->stub_router ? 0 : OPT_R);
+  oa->options = nac->type | ospf_opts(p);
 
   if (nac->type != oac->type)
   {
@@ -639,17 +643,20 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
  * nonbroadcast network, cost of interface, etc.
  */
 static int
-ospf_reconfigure(struct proto *P, struct proto_config *c)
+ospf_reconfigure(struct proto *P, struct proto_config *CF)
 {
   struct ospf_proto *p = (struct ospf_proto *) P;
   struct ospf_config *old = (struct ospf_config *) (P->cf);
-  struct ospf_config *new = (struct ospf_config *) c;
+  struct ospf_config *new = (struct ospf_config *) CF;
   struct ospf_area_config *nac;
   struct ospf_area *oa, *oax;
   struct ospf_iface *ifa, *ifx;
   struct ospf_iface_patt *ip;
 
-  if (proto_get_router_id(c) != p->router_id)
+  if (proto_get_router_id(CF) != p->router_id)
+    return 0;
+
+  if (p->ospf2 != new->ospf2)
     return 0;
 
   if (p->rfc1583 != new->rfc1583)
@@ -658,13 +665,19 @@ ospf_reconfigure(struct proto *P, struct proto_config *c)
   if (old->abr != new->abr)
     return 0;
 
+  if ((p->af_ext != new->af_ext) || (p->af_mc != new->af_mc))
+    return 0;
+
+  if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
+    return 0;
+
   p->stub_router = new->stub_router;
   p->merge_external = new->merge_external;
   p->asbr = new->asbr;
   p->ecmp = new->ecmp;
   p->tick = new->tick;
-  p->disp_timer->recurrent = p->tick;
-  tm_start(p->disp_timer, 1);
+  p->disp_timer->recurrent = p->tick S;
+  tm_start(p->disp_timer, 100 MS);
 
   /* Mark all areas and ifaces */
   WALK_LIST(oa, p->area_list)
@@ -746,7 +759,6 @@ ospf_sh(struct proto *P)
   struct ospf_iface *ifa;
   struct ospf_neighbor *n;
   int ifano, nno, adjno, firstfib;
-  struct area_net *anet;
 
   if (p->p.proto_state != PS_UP)
   {
@@ -795,29 +807,27 @@ ospf_sh(struct proto *P)
     cli_msg(-1014, "\t\tNumber of adjacent neighbors:\t%u", adjno);
 
     firstfib = 1;
-    FIB_WALK(&oa->net_fib, nftmp)
+    FIB_WALK(&oa->net_fib, struct area_net, anet)
     {
-      anet = (struct area_net *) nftmp;
       if(firstfib)
       {
 	cli_msg(-1014, "\t\tArea networks:");
 	firstfib = 0;
       }
-      cli_msg(-1014, "\t\t\t%1I/%u\t%s\t%s", anet->fn.prefix, anet->fn.pxlen,
+      cli_msg(-1014, "\t\t\t%1N\t%s\t%s", anet->fn.addr,
 		anet->hidden ? "Hidden" : "Advertise", anet->active ? "Active" : "");
     }
     FIB_WALK_END;
 
     firstfib = 1;
-    FIB_WALK(&oa->enet_fib, nftmp)
+    FIB_WALK(&oa->enet_fib, struct area_net, anet)
     {
-      anet = (struct area_net *) nftmp;
       if(firstfib)
       {
 	cli_msg(-1014, "\t\tArea external networks:");
 	firstfib = 0;
       }
-      cli_msg(-1014, "\t\t\t%1I/%u\t%s\t%s", anet->fn.prefix, anet->fn.pxlen,
+      cli_msg(-1014, "\t\t\t%1N\t%s\t%s", anet->fn.addr,
 		anet->hidden ? "Hidden" : "Advertise", anet->active ? "Active" : "");
     }
     FIB_WALK_END;
@@ -1072,15 +1082,14 @@ show_lsa_network(struct top_hash_entry *he, int ospf2)
 }
 
 static inline void
-show_lsa_sum_net(struct top_hash_entry *he, int ospf2)
+show_lsa_sum_net(struct top_hash_entry *he, int ospf2, int af)
 {
-  ip_addr ip;
-  int pxlen;
+  net_addr net;
   u8 pxopts;
   u32 metric;
 
-  lsa_parse_sum_net(he, ospf2, &ip, &pxlen, &pxopts, &metric);
-  cli_msg(-1016, "\t\txnetwork %I/%d metric %u", ip, pxlen, metric);
+  lsa_parse_sum_net(he, ospf2, af, &net, &pxopts, &metric);
+  cli_msg(-1016, "\t\txnetwork %N metric %u", &net, metric);
 }
 
 static inline void
@@ -1096,16 +1105,16 @@ show_lsa_sum_rt(struct top_hash_entry *he, int ospf2)
 
 
 static inline void
-show_lsa_external(struct top_hash_entry *he, int ospf2)
+show_lsa_external(struct top_hash_entry *he, int ospf2, int af)
 {
   struct ospf_lsa_ext_local rt;
-  char str_via[STD_ADDRESS_P_LENGTH + 8] = "";
+  char str_via[IPA_MAX_TEXT_LENGTH + 8] = "";
   char str_tag[16] = "";
 
   if (he->lsa_type == LSA_T_EXT)
     he->domain = 0; /* Unmark the LSA */
 
-  lsa_parse_ext(he, ospf2, &rt);
+  lsa_parse_ext(he, ospf2, af, &rt);
 
   if (rt.fbit)
     bsprintf(str_via, " via %I", rt.fwaddr);
@@ -1113,19 +1122,15 @@ show_lsa_external(struct top_hash_entry *he, int ospf2)
   if (rt.tag)
     bsprintf(str_tag, " tag %08x", rt.tag);
 
-  cli_msg(-1016, "\t\t%s %I/%d metric%s %u%s%s",
+  cli_msg(-1016, "\t\t%s %N metric%s %u%s%s",
 	  (he->lsa_type == LSA_T_NSSA) ? "nssa-ext" : "external",
-	  rt.ip, rt.pxlen, rt.ebit ? "2" : "", rt.metric, str_via, str_tag);
+	  &rt.net, rt.ebit ? "2" : "", rt.metric, str_via, str_tag);
 }
 
 static inline void
-show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode)
+show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode, int af)
 {
   struct ospf_lsa_prefix *px = he->lsa_body;
-  ip_addr pxa;
-  int pxlen;
-  u8 pxopts;
-  u16 metric;
   u32 *buf;
   int i;
 
@@ -1141,14 +1146,18 @@ show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode)
 
   buf = px->rest;
   for (i = 0; i < px->pxcount; i++)
-    {
-      buf = lsa_get_ipv6_prefix(buf, &pxa, &pxlen, &pxopts, &metric);
+  {
+    net_addr net;
+    u8 pxopts;
+    u16 metric;
 
-      if (px->ref_type == LSA_T_RT)
-	cli_msg(-1016, "\t\tstubnet %I/%d metric %u", pxa, pxlen, metric);
-      else
-	cli_msg(-1016, "\t\taddress %I/%d", pxa, pxlen);
-    }
+    buf = ospf3_get_prefix(buf, af, &net, &pxopts, &metric);
+
+    if (px->ref_type == LSA_T_RT)
+      cli_msg(-1016, "\t\tstubnet %N metric %u", &net, metric);
+    else
+      cli_msg(-1016, "\t\taddress %N", &net);
+  }
 }
 
 void
@@ -1156,6 +1165,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
 {
   struct ospf_proto *p = (struct ospf_proto *) P;
   int ospf2 = ospf_is_v2(p);
+  int af = ospf_get_af(p);
   uint i, ix, j1, jx;
   u32 last_area = 0xFFFFFFFF;
 
@@ -1276,7 +1286,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
 
     case LSA_T_SUM_NET:
       if (cnode->lsa_type == LSA_T_RT)
-	show_lsa_sum_net(he, ospf2);
+	show_lsa_sum_net(he, ospf2, af);
       break;
 
     case LSA_T_SUM_RT:
@@ -1286,11 +1296,11 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
 
     case LSA_T_EXT:
     case LSA_T_NSSA:
-      show_lsa_external(he, ospf2);
+      show_lsa_external(he, ospf2, af);
       break;
 
     case LSA_T_PREFIX:
-      show_lsa_prefix(he, cnode);
+      show_lsa_prefix(he, cnode, af);
       break;
     }
 
@@ -1304,7 +1314,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
 	ix++;
 
       while ((ix < jx) && (hex[ix]->lsa.rt == cnode->lsa.rt))
-	show_lsa_external(hex[ix++], ospf2);
+	show_lsa_external(hex[ix++], ospf2, af);
 
       cnode = NULL;
     }
@@ -1338,7 +1348,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
 	last_rt = he->lsa.rt;
       }
 
-      show_lsa_external(he, ospf2);
+      show_lsa_external(he, ospf2, af);
     }
   }
 
@@ -1468,6 +1478,8 @@ struct protocol proto_ospf = {
   .template =		"ospf%d",
   .attr_class =		EAP_OSPF,
   .preference =		DEF_PREF_OSPF,
+  .channel_mask =	NB_IP,
+  .proto_size =		sizeof(struct ospf_proto),
   .config_size =	sizeof(struct ospf_config),
   .init =		ospf_init,
   .dump =		ospf_dump,
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index 81c610d5..54eeb74c 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -14,7 +14,7 @@
 #include "nest/bird.h"
 
 #include "lib/checksum.h"
-#include "lib/ip.h"
+#include "lib/idm.h"
 #include "lib/lists.h"
 #include "lib/slists.h"
 #include "lib/socket.h"
@@ -37,14 +37,6 @@
 #endif
 
 
-#ifdef IPV6
-#define OSPF_IS_V2 0
-#else
-#define OSPF_IS_V2 1
-#endif
-
-// FIXME: MAX_PREFIX_LENGTH
-
 #define OSPF_TRACE(flags, msg, args...) \
   do { if ((p->p.debug & flags) || OSPF_FORCE_DEBUG) \
     log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
@@ -66,16 +58,16 @@
   log_rl(&p->log_lsa_tbf, L_REMOTE "%s: " msg, p->p.name, args)
 
 #define LOG_LSA2(msg, args...) \
-  do { if (! p->log_lsa_tbf.mark) \
+  do { if (! p->log_lsa_tbf.drop) \
     log(L_REMOTE "%s: " msg, p->p.name, args); } while(0)
 
 
 #define OSPF_PROTO 89
 
-#define LSREFRESHTIME 1800	/* 30 minutes */
-#define MINLSINTERVAL 5
-#define MINLSARRIVAL 1
-#define LSINFINITY 0xffffff
+#define LSREFRESHTIME		1800	/* 30 minutes */
+#define MINLSINTERVAL		(5 S_)
+#define MINLSARRIVAL		(1 S_)
+#define LSINFINITY		0xffffff
 
 #define OSPF_DEFAULT_TICK 1
 #define OSPF_DEFAULT_STUB_COST 1000
@@ -87,16 +79,18 @@
 
 #define OSPF_VLINK_ID_OFFSET 0x80000000
 
-
 struct ospf_config
 {
   struct proto_config c;
   uint tick;
   u8 ospf2;
+  u8 af_ext;
+  u8 af_mc;
   u8 rfc1583;
   u8 stub_router;
   u8 merge_external;
   u8 instance_id;
+  u8 instance_id_set;
   u8 abr;
   u8 asbr;
   int ecmp;
@@ -125,24 +119,24 @@ struct ospf_area_config
 struct area_net_config
 {
   node n;
-  struct prefix px;
+  net_addr prefix;
   u32 tag;
   u8 hidden;
 };
 
 struct area_net
 {
-  struct fib_node fn;
   u32 metric;			/* With possible LSA_EXT3_EBIT for NSSA area nets */
   u32 tag;
   u8 hidden;
   u8 active;
+  struct fib_node fn;
 };
 
 struct ospf_stubnet_config
 {
   node n;
-  struct prefix px;
+  net_addr prefix;
   u32 cost;
   u8 hidden;
   u8 summary;
@@ -177,9 +171,9 @@ struct ospf_iface_patt
   int tx_priority;
   u16 tx_length;
   u16 rx_buffer;
-
 #define OSPF_RXBUF_MINSIZE 256	/* Minimal allowed size */
   u8 instance_id;
+  u8 instance_id_set;
   u8 autype;			/* OSPF_AUTH_*, not really used in OSPFv3 */
   u8 strictnbma;
   u8 check_link;
@@ -189,7 +183,6 @@ struct ospf_iface_patt
   u8 ptp_netmask;		/* bool + 2 for unspecified */
   u8 ttl_security;		/* bool + 2 for TX only */
   u8 bfd;
-  u8 bsd_secondary;
   list *passwords;
 };
 
@@ -220,12 +213,15 @@ struct ospf_proto
   int areano;			/* Number of area I belong to */
   int padj;			/* Number of neighbors in Exchange or Loading state */
   struct fib rtf;		/* Routing table */
-  byte ospf2;			/* OSPF v2 or v3 */
-  byte rfc1583;			/* RFC1583 compatibility */
-  byte stub_router;		/* Do not forward transit traffic */
-  byte merge_external;		/* Should i merge external routes? */
-  byte asbr;			/* May i originate any ext/NSSA lsa? */
-  byte ecmp;			/* Maximal number of nexthops in ECMP route, or 0 */
+  struct idm idm;		/* OSPFv3 LSA ID map */
+  u8 ospf2;			/* OSPF v2 or v3 */
+  u8 af_ext;			/* OSPFv3-AF extension */
+  u8 af_mc;			/* OSPFv3-AF multicast */
+  u8 rfc1583;			/* RFC1583 compatibility */
+  u8 stub_router;		/* Do not forward transit traffic */
+  u8 merge_external;		/* Should i merge external routes? */
+  u8 asbr;			/* May i originate any ext/NSSA lsa? */
+  u8 ecmp;			/* Maximal number of nexthops in ECMP route, or 0 */
   struct ospf_area *backbone;	/* If exists */
   event *flood_event;		/* Event for flooding LS updates */
   void *lsab;			/* LSA buffer used when originating router LSAs */
@@ -273,10 +269,10 @@ struct ospf_iface
   sock *sk;			/* IP socket */
   list neigh_list;		/* List of neighbors (struct ospf_neighbor) */
   u32 cost;			/* Cost of iface */
-  u32 waitint;			/* number of sec before changing state from wait */
-  u32 rxmtint;			/* number of seconds between LSA retransmissions */
-  u32 pollint;			/* Poll interval */
-  u32 deadint;			/* after "deadint" missing hellos is router dead */
+  u32 waitint;			/* Number of seconds before changing state from wait */
+  u32 rxmtint;			/* Number of seconds between LSA retransmissions */
+  u32 pollint;			/* Poll interval in seconds */
+  u32 deadint;			/* After deadint seconds without hellos is router dead */
   u32 iface_id;			/* Interface ID (iface->index or new value for vlinks) */
   u32 vid;			/* ID of peer of virtual link */
   ip_addr vip;			/* IP of peer of virtual link */
@@ -287,8 +283,8 @@ struct ospf_iface
 				   interface.  LSAs contained in the update */
   u16 helloint;			/* number of seconds between hello sending */
   list *passwords;
-  u32 csn;                      /* Last used crypt seq number */
-  bird_clock_t csn_use;         /* Last time when packet with that CSN was sent */
+  u32 csn;			/* Last used crypt seq number */
+  btime csn_use;		/* Last time when packet with that CSN was sent */
   ip_addr all_routers;		/* Multicast (or broadcast) address for all routers */
   ip_addr des_routers;		/* Multicast (or NULL) address for designated routers */
   ip_addr drip;			/* Designated router IP */
@@ -458,14 +454,15 @@ struct ospf_neighbor
 
 
 /* Generic option flags */
-#define OPT_V6		0x01	/* OSPFv3, LSA relevant for IPv6 routing calculation */
-#define OPT_E		0x02	/* Related to AS-external LSAs */
-#define OPT_MC		0x04	/* Related to MOSPF, not used and obsolete */
-#define OPT_N		0x08	/* Related to NSSA */
-#define OPT_P		0x08	/* OSPFv2, flags P and N share position, see NSSA RFC */
-#define OPT_EA		0x10	/* OSPFv2, external attributes, not used and obsolete */
-#define OPT_R		0x10	/* OSPFv3, originator is active router */
-#define OPT_DC		0x20	/* Related to demand circuits, not used */
+#define OPT_V6		0x0001	/* OSPFv3, LSA relevant for IPv6 routing calculation */
+#define OPT_E		0x0002	/* Related to AS-external LSAs */
+#define OPT_MC		0x0004	/* Related to MOSPF, not used and obsolete */
+#define OPT_N		0x0008	/* Related to NSSA */
+#define OPT_P		0x0008	/* OSPFv2, flags P and N share position, see NSSA RFC */
+#define OPT_EA		0x0010	/* OSPFv2, external attributes, not used and obsolete */
+#define OPT_R		0x0010	/* OSPFv3, originator is active router */
+#define OPT_DC		0x0020	/* Related to demand circuits, not used */
+#define OPT_AF		0x0100	/* OSPFv3 Address Families (RFC 5838) */
 
 /* Router-LSA VEB flags are are stored together with links (OSPFv2) or options (OSPFv3) */
 #define OPT_RT_B	(0x01 << 24)
@@ -682,8 +679,8 @@ struct ospf_lsa_ext3
 
 struct ospf_lsa_ext_local
 {
-  ip_addr ip, fwaddr;
-  int pxlen;
+  net_addr net;
+  ip_addr fwaddr;
   u32 metric, ebit, fbit, tag, propagate;
   u8 pxopts;
 };
@@ -721,73 +718,102 @@ lsa_net_count(struct ospf_lsa_header *lsa)
 /* In ospf_area->rtr we store paths to routers, but we use RID (and not IP address)
    as index, so we need to encapsulate RID to IP address */
 
-#define ipa_from_rid(x) ipa_from_u32(x)
-#define ipa_to_rid(x) ipa_to_u32(x)
+#define net_from_rid(x) NET_ADDR_IP4(ip4_from_u32(x), IP4_MAX_PREFIX_LENGTH)
+#define rid_from_net(x) ip4_to_u32(((net_addr_ip4 *) x)->prefix)
 
 #define IPV6_PREFIX_SPACE(x) ((((x) + 63) / 32) * 4)
 #define IPV6_PREFIX_WORDS(x) (((x) + 63) / 32)
 
-/* FIXME: these four functions should be significantly redesigned w.r.t. integration,
-   also should be named as ospf3_* instead of *_ipv6_* */
+
+static inline int
+ospf_valid_prefix(net_addr *n)
+{
+  /*
+   * In OSPFv2, prefix is stored as netmask; ip4_masklen() returns 255 for
+   * invalid one. But OSPFv3-AF may receive IPv4 net with 32 < pxlen < 128.
+   */
+  uint max = (n->type == NET_IP4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
+  return n->pxlen <= max;
+}
+
+/*
+ * In OSPFv3-AF (RFC 5835), IPv4 address is encoded by just placing it in the
+ * first 32 bits of IPv6 address and setting remaining bits to zero. Likewise
+ * for IPv4 prefix, where remaining bits do not matter. We use following
+ * functions to convert between IPv4 and IPv4-in-IPv6 representations:
+ */
+
+static inline ip4_addr ospf3_6to4(ip6_addr a)
+{ return _MI4(_I0(a)); }
+
+static inline ip6_addr ospf3_4to6(ip4_addr a)
+{ return _MI6(_I(a), 0, 0, 0); }
+
 
 static inline u32 *
-lsa_get_ipv6_prefix(u32 *buf, ip_addr *addr, int *pxlen, u8 *pxopts, u16 *rest)
+ospf3_get_prefix(u32 *buf, int af, net_addr *n, u8 *pxopts, u16 *rest)
 {
-  u8 pxl = (*buf >> 24);
-  *pxopts = (*buf >> 16);
-  *rest = *buf;
-  *pxlen = pxl;
+  ip6_addr px = IP6_NONE;
+  uint pxlen = (*buf >> 24);
+  *pxopts = (*buf >> 16) & 0xff;
+  if (rest) *rest = *buf & 0xffff;
   buf++;
 
-  *addr = IPA_NONE;
-
-#ifdef IPV6
-  if (pxl > 0)
-    _I0(*addr) = *buf++;
-  if (pxl > 32)
-    _I1(*addr) = *buf++;
-  if (pxl > 64)
-    _I2(*addr) = *buf++;
-  if (pxl > 96)
-    _I3(*addr) = *buf++;
+  if (pxlen > 0)
+    _I0(px) = *buf++;
+  if (pxlen > 32)
+    _I1(px) = *buf++;
+  if (pxlen > 64)
+    _I2(px) = *buf++;
+  if (pxlen > 96)
+    _I3(px) = *buf++;
 
   /* Clean up remaining bits */
-  if (pxl < 128)
-    addr->addr[pxl / 32] &= u32_mkmask(pxl % 32);
-#endif
+  if (pxlen < 128)
+    px.addr[pxlen / 32] &= u32_mkmask(pxlen % 32);
+
+  if (af == NET_IP4)
+    net_fill_ip4(n, ospf3_6to4(px), pxlen);
+  else
+    net_fill_ip6(n, px, pxlen);
 
   return buf;
 }
 
 static inline u32 *
-lsa_get_ipv6_addr(u32 *buf, ip_addr *addr)
+ospf3_put_prefix(u32 *buf, net_addr *n, u8 pxopts, u16 rest)
 {
-  *addr = *(ip_addr *) buf;
-  return buf + 4;
-}
+  ip6_addr px = (n->type == NET_IP4) ? ospf3_4to6(net4_prefix(n)) : net6_prefix(n);
+  uint pxlen = n->pxlen;
 
-static inline u32 *
-put_ipv6_prefix(u32 *buf, ip_addr addr UNUSED4, u8 pxlen UNUSED4, u8 pxopts UNUSED4, u16 lh UNUSED4)
-{
-#ifdef IPV6
-  *buf++ = ((pxlen << 24) | (pxopts << 16) | lh);
+  *buf++ = ((pxlen << 24) | (pxopts << 16) | rest);
 
   if (pxlen > 0)
-    *buf++ = _I0(addr);
+    *buf++ = _I0(px);
   if (pxlen > 32)
-    *buf++ = _I1(addr);
+    *buf++ = _I1(px);
   if (pxlen > 64)
-    *buf++ = _I2(addr);
+    *buf++ = _I2(px);
   if (pxlen > 96)
-    *buf++ = _I3(addr);
-#endif
+    *buf++ = _I3(px);
+
   return buf;
 }
 
 static inline u32 *
-put_ipv6_addr(u32 *buf, ip_addr addr)
+ospf3_get_addr(u32 *buf, int af, ip_addr *addr)
 {
-  *(ip_addr *) buf = addr;
+  ip6_addr a;
+  memcpy(&a, buf, 16);
+  *addr = (af == NET_IP4) ? ipa_from_ip4(ospf3_6to4(a)) : ipa_from_ip6(a);
+  return buf + 4;
+}
+
+static inline u32 *
+ospf3_put_addr(u32 *buf, ip_addr addr)
+{
+  ip6_addr a = ipa_is_ip4(addr) ? ospf3_4to6(ipa_to_ip4(addr)) : ipa_to_ip6(addr);
+  memcpy(buf, &a, 16);
   return buf + 4;
 }
 
@@ -831,19 +857,24 @@ static inline void ospf_notify_net_lsa(struct ospf_iface *ifa)
 static inline void ospf_notify_link_lsa(struct ospf_iface *ifa)
 { ifa->update_link_lsa = 1; }
 
-
-#define ospf_is_v2(X) OSPF_IS_V2
-#define ospf_is_v3(X) (!OSPF_IS_V2)
-/*
 static inline int ospf_is_v2(struct ospf_proto *p)
 { return p->ospf2; }
 
 static inline int ospf_is_v3(struct ospf_proto *p)
 { return ! p->ospf2; }
-*/
-static inline int ospf_get_version(struct ospf_proto *p UNUSED4 UNUSED6)
+
+static inline int ospf_get_version(struct ospf_proto *p)
 { return ospf_is_v2(p) ? 2 : 3; }
 
+static inline int ospf_is_ip4(struct ospf_proto *p)
+{ return p->p.net_type == NET_IP4; }
+
+static inline int ospf_is_ip6(struct ospf_proto *p)
+{ return p->p.net_type == NET_IP6; }
+
+static inline int ospf_get_af(struct ospf_proto *p)
+{ return p->p.net_type; }
+
 struct ospf_area *ospf_find_area(struct ospf_proto *p, u32 aid);
 
 static inline struct ospf_area *ospf_main_area(struct ospf_proto *p)
@@ -925,7 +956,7 @@ static inline void ospf_send_to_des(struct ospf_iface *ifa)
 #define SKIP(DSC) do { err_dsc = DSC; goto skip; } while(0)
 #endif
 
-static inline uint ospf_pkt_hdrlen(struct ospf_proto *p UNUSED4 UNUSED6)
+static inline uint ospf_pkt_hdrlen(struct ospf_proto *p)
 { return ospf_is_v2(p) ? (sizeof(struct ospf_packet) + sizeof(union ospf_auth)) : sizeof(struct ospf_packet); }
 
 static inline void * ospf_tx_buffer(struct ospf_iface *ifa)
diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c
index 6b6a97a4..38d7a75f 100644
--- a/proto/ospf/packet.c
+++ b/proto/ospf/packet.c
@@ -77,16 +77,16 @@ ospf_pkt_finalize(struct ospf_iface *ifa, struct ospf_packet *pkt, uint *plen)
        reboot when system does not have independent RTC? */
     if (!ifa->csn)
     {
-      ifa->csn = (u32) now;
-      ifa->csn_use = now;
+      ifa->csn = (u32) (current_real_time() TO_S);
+      ifa->csn_use = current_time();
     }
 
     /* We must have sufficient delay between sending a packet and increasing
        CSN to prevent reordering of packets (in a network) with different CSNs */
-    if ((now - ifa->csn_use) > 1)
+    if ((current_time() - ifa->csn_use) > 1 S)
       ifa->csn++;
 
-    ifa->csn_use = now;
+    ifa->csn_use = current_time();
 
     uint auth_len = mac_type_length(pass->alg);
     byte *auth_tail = ((byte *) pkt + *plen);
@@ -203,7 +203,7 @@ drop:
 /**
  * ospf_rx_hook
  * @sk: socket we received the packet.
- * @len: size of the packet
+ * @len: length of the packet
  *
  * This is the entry point for messages from neighbors. Many checks (like
  * authentication, checksums, size) are done before the packet is passed to
@@ -231,7 +231,7 @@ ospf_rx_hook(sock *sk, uint len)
     return 1;
 
   int src_local, dst_local, dst_mcast;
-  src_local = ipa_in_net(sk->faddr, ifa->addr->prefix, ifa->addr->pxlen);
+  src_local = ipa_in_netX(sk->faddr, &ifa->addr->prefix);
   dst_local = ipa_equal(sk->laddr, ifa->addr->ip);
   dst_mcast = ipa_equal(sk->laddr, ifa->all_routers) || ipa_equal(sk->laddr, ifa->des_routers);
 
@@ -270,9 +270,6 @@ ospf_rx_hook(sock *sk, uint len)
   if (pkt == NULL)
     DROP("bad IP header", len);
 
-  if (ifa->check_ttl && (sk->rcv_ttl < 255))
-    DROP("wrong TTL", sk->rcv_ttl);
-
   if (len < sizeof(struct ospf_packet))
     DROP("too short", len);
 
@@ -379,6 +376,10 @@ found:
   if (ipa_equal(sk->laddr, ifa->des_routers) && (ifa->sk_dr == 0))
     return 1;
 
+  /* TTL check must be done after instance dispatch */
+  if (ifa->check_ttl && (sk->rcv_ttl < 255))
+    DROP("wrong TTL", sk->rcv_ttl);
+
   if (rid == p->router_id)
     DROP1("my own router ID");
 
@@ -491,8 +492,8 @@ ospf_send_to_agt(struct ospf_iface *ifa, u8 state)
 void
 ospf_send_to_bdr(struct ospf_iface *ifa)
 {
-  if (ipa_nonzero(ifa->drip))
+  if (ipa_nonzero2(ifa->drip))
     ospf_send_to(ifa, ifa->drip);
-  if (ipa_nonzero(ifa->bdrip))
+  if (ipa_nonzero2(ifa->bdrip))
     ospf_send_to(ifa, ifa->bdrip);
 }
diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c
index 368e3d05..c0fe218a 100644
--- a/proto/ospf/rt.c
+++ b/proto/ospf/rt.c
@@ -10,9 +10,7 @@
 
 #include "ospf.h"
 
-static void add_cand(list * l, struct top_hash_entry *en,
-		     struct top_hash_entry *par, u32 dist,
-		     struct ospf_area *oa, int i);
+static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint lif, uint nif);
 static void rt_sync(struct ospf_proto *p);
 
 
@@ -21,17 +19,8 @@ static inline void reset_ri(ort *ort)
   bzero(&ort->n, sizeof(orta));
 }
 
-void
-ospf_rt_initort(struct fib_node *fn)
-{
-  ort *ri = (ort *) fn;
-  reset_ri(ri);
-  ri->old_rta = NULL;
-  ri->fn.flags = 0;
-}
-
 static inline int
-nh_is_vlink(struct mpnh *nhs)
+nh_is_vlink(struct nexthop *nhs)
 {
   return !nhs->iface;
 }
@@ -42,20 +31,19 @@ unresolved_vlink(ort *ort)
   return ort->n.nhs && nh_is_vlink(ort->n.nhs);
 }
 
-static inline struct mpnh *
+static inline struct nexthop *
 new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight)
 {
-  struct mpnh *nh = lp_alloc(p->nhpool, sizeof(struct mpnh));
+  struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop));
   nh->gw = gw;
   nh->iface = iface;
-  nh->next = NULL;
   nh->weight = weight;
   return nh;
 }
 
 /* Returns true if there are device nexthops in n */
 static inline int
-has_device_nexthops(const struct mpnh *n)
+has_device_nexthops(const struct nexthop *n)
 {
   for (; n; n = n->next)
     if (ipa_zero(n->gw))
@@ -65,13 +53,13 @@ has_device_nexthops(const struct mpnh *n)
 }
 
 /* Replace device nexthops with nexthops to gw */
-static struct mpnh *
-fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw)
+static struct nexthop *
+fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw)
 {
-  struct mpnh *root1 = NULL;
-  struct mpnh *root2 = NULL;
-  struct mpnh **nn1 = &root1;
-  struct mpnh **nn2 = &root2;
+  struct nexthop *root1 = NULL;
+  struct nexthop *root2 = NULL;
+  struct nexthop **nn1 = &root1;
+  struct nexthop **nn2 = &root2;
 
   if (!p->ecmp)
     return new_nexthop(p, gw, n->iface, n->weight);
@@ -82,7 +70,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw)
 
   for (; n; n = n->next)
   {
-    struct mpnh *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight);
+    struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight);
 
     if (ipa_zero(n->gw))
     {
@@ -96,7 +84,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw)
     }
   }
 
-  return mpnh_merge(root1, root2, 1, 1, p->ecmp, p->nhpool);
+  return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool);
 }
 
 
@@ -292,7 +280,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new)
 
   if (old->nhs != new->nhs)
   {
-    old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
+    old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
 			  p->ecmp, p->nhpool);
     old->nhs_reuse = 1;
   }
@@ -308,7 +296,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new)
 
   if (old->nhs != new->nhs)
   {
-    old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
+    old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
 			  p->ecmp, p->nhpool);
     old->nhs_reuse = 1;
   }
@@ -334,9 +322,9 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new)
 
 
 static inline void
-ri_install_net(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new)
+ri_install_net(struct ospf_proto *p, net_addr *net, const orta *new)
 {
-  ort *old = (ort *) fib_get(&p->rtf, &prefix, pxlen);
+  ort *old = fib_get(&p->rtf, net);
   int cmp = orta_compare(p, new, &old->n);
 
   if (cmp > 0)
@@ -348,8 +336,8 @@ ri_install_net(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new)
 static inline void
 ri_install_rt(struct ospf_area *oa, u32 rid, const orta *new)
 {
-  ip_addr addr = ipa_from_rid(rid);
-  ort *old = (ort *) fib_get(&oa->rtr, &addr, MAX_PREFIX_LENGTH);
+  net_addr_ip4 nrid = net_from_rid(rid);
+  ort *old = fib_get(&oa->rtr, (net_addr *) &nrid);
   int cmp = orta_compare(oa->po, new, &old->n);
 
   if (cmp > 0)
@@ -359,17 +347,19 @@ ri_install_rt(struct ospf_area *oa, u32 rid, const orta *new)
 }
 
 static inline void
-ri_install_asbr(struct ospf_proto *p, ip_addr *addr, const orta *new)
+ri_install_asbr(struct ospf_proto *p, u32 rid, const orta *new)
 {
-  ort *old = (ort *) fib_get(&p->backbone->rtr, addr, MAX_PREFIX_LENGTH);
+  net_addr_ip4 nrid = net_from_rid(rid);
+  ort *old = fib_get(&p->backbone->rtr, (net_addr *) &nrid);
+
   if (orta_compare_asbr(p, new, &old->n) > 0)
     ort_replace(old, new);
 }
 
 static inline void
-ri_install_ext(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new)
+ri_install_ext(struct ospf_proto *p, net_addr *net, const orta *new)
 {
-  ort *old = (ort *) fib_get(&p->rtf, &prefix, pxlen);
+  ort *old = fib_get(&p->rtf, net);
   int cmp = orta_compare_ext(p, new, &old->n);
 
   if (cmp > 0)
@@ -404,7 +394,7 @@ px_pos_to_ifa(struct ospf_area *oa, int pos)
 
 
 static void
-add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_hash_entry *en, int pos)
+add_network(struct ospf_area *oa, net_addr *net, int metric, struct top_hash_entry *en, int pos)
 {
   struct ospf_proto *p = oa->po;
 
@@ -419,7 +409,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_
     .nhs = en->nhs
   };
 
-  if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH)
+  if (!ospf_valid_prefix(net))
   {
     log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
 	p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
@@ -440,7 +430,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_
     nf.nhs = ifa ? new_nexthop(p, IPA_NONE, ifa->iface, ifa->ecmp_weight) : NULL;
   }
 
-  ri_install_net(p, px, pxlen, &nf);
+  ri_install_net(p, net, &nf);
 }
 
 
@@ -451,8 +441,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
   struct ospf_lsa_rt *rt = act->lsa_body;
   struct ospf_lsa_rt_walk rtl;
   struct top_hash_entry *tmp;
-  ip_addr prefix;
-  int pxlen, i;
+  int i;
 
   if (rt->options & OPT_RT_V)
     oa->trcap = 1;
@@ -502,9 +491,10 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
        * the same result by handing them here because add_network()
        * will keep the best (not the first) found route.
        */
-      prefix = ipa_from_u32(rtl.id & rtl.data);
-      pxlen = u32_masklen(rtl.data);
-      add_network(oa, prefix, pxlen, act->dist + rtl.metric, act, i);
+      net_addr_ip4 net =
+	NET_ADDR_IP4(ip4_from_u32(rtl.id & rtl.data), u32_masklen(rtl.data));
+
+      add_network(oa, (net_addr *) &net, act->dist + rtl.metric, act, i);
       break;
 
     case LSART_NET:
@@ -517,7 +507,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
       break;
     }
 
-    add_cand(&oa->cand, tmp, act, act->dist + rtl.metric, oa, i);
+    add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.lif, rtl.nif);
   }
 }
 
@@ -526,21 +516,21 @@ spfa_process_net(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_ent
 {
   struct ospf_lsa_net *ln = act->lsa_body;
   struct top_hash_entry *tmp;
-  ip_addr prefix;
-  int pxlen, i, cnt;
+  int i, cnt;
 
   if (ospf_is_v2(p))
   {
-    prefix = ipa_from_u32(act->lsa.id & ln->optx);
-    pxlen = u32_masklen(ln->optx);
-    add_network(oa, prefix, pxlen, act->dist, act, -1);
+    net_addr_ip4 net =
+      NET_ADDR_IP4(ip4_from_u32(act->lsa.id & ln->optx), u32_masklen(ln->optx));
+
+    add_network(oa, (net_addr *) &net, act->dist, act, -1);
   }
 
   cnt = lsa_net_count(&act->lsa);
   for (i = 0; i < cnt; i++)
   {
     tmp = ospf_hash_find_rt(p->gr, oa->areaid, ln->routers[i]);
-    add_cand(&oa->cand, tmp, act, act->dist, oa, -1);
+    add_cand(oa, tmp, act, act->dist, -1, 0, 0);
   }
 }
 
@@ -549,10 +539,6 @@ spfa_process_prefixes(struct ospf_proto *p, struct ospf_area *oa)
 {
   struct top_hash_entry *en, *src;
   struct ospf_lsa_prefix *px;
-  ip_addr pxa;
-  int pxlen;
-  u8 pxopts;
-  u16 metric;
   u32 *buf;
   int i;
 
@@ -587,18 +573,22 @@ spfa_process_prefixes(struct ospf_proto *p, struct ospf_area *oa)
 
     buf = px->rest;
     for (i = 0; i < px->pxcount; i++)
-      {
-	buf = lsa_get_ipv6_prefix(buf, &pxa, &pxlen, &pxopts, &metric);
+    {
+      net_addr net;
+      u8 pxopts;
+      u16 metric;
 
-	if (pxopts & OPT_PX_NU)
-	  continue;
+      buf = ospf3_get_prefix(buf, ospf_get_af(p), &net, &pxopts, &metric);
 
-	/* Store the first global address to use it later as a vlink endpoint */
-	if ((pxopts & OPT_PX_LA) && ipa_zero(src->lb))
-	  src->lb = pxa;
+      if (pxopts & OPT_PX_NU)
+	continue;
 
-	add_network(oa, pxa, pxlen, src->dist + metric, src, i);
-      }
+      /* Store the first global address to use it later as a vlink endpoint */
+      if ((pxopts & OPT_PX_LA) && (net.type == NET_IP6) && ipa_zero(src->lb))
+	src->lb = ipa_from_ip6(net6_prefix(&net));
+
+      add_network(oa, &net, src->dist + metric, src, i);
+    }
   }
 }
 
@@ -659,7 +649,8 @@ ospf_rt_spfa(struct ospf_area *oa)
 }
 
 static int
-link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par)
+link_back(struct ospf_area *oa, struct top_hash_entry *en,
+	  struct top_hash_entry *par, uint lif, uint nif)
 {
   struct ospf_proto *p = oa->po;
   struct ospf_lsa_rt_walk rtl;
@@ -697,6 +688,10 @@ link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
 	tmp = ospf_hash_find_net(p->gr, oa->areaid, rtl.id, rtl.nif);
 	if (tmp == par)
 	{
+	  /*
+	   * Note that there may be multiple matching Rt-fields if router 'en'
+	   * have multiple interfaces to net 'par'. Perhaps we should do ECMP.
+	   */
 	  if (ospf_is_v2(p))
 	    en->lb = ipa_from_u32(rtl.data);
 	  else
@@ -708,7 +703,13 @@ link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
 
       case LSART_VLNK:
       case LSART_PTP:
-	/* Not necessary the same link, see RFC 2328 [23] */
+	/*
+	 * For OSPFv2, not necessary the same link, see RFC 2328 [23].
+	 * For OSPFv3, we verify that by comparing nif and lif fields.
+	 */
+	if (ospf_is_v3(p) && ((rtl.lif != nif) || (rtl.nif != lif)))
+	  break;
+
 	tmp = ospf_hash_find_rt(p->gr, oa->areaid, rtl.id);
 	if (tmp == par)
 	  return 1;
@@ -741,13 +742,12 @@ ospf_rt_sum(struct ospf_area *oa)
 {
   struct ospf_proto *p = oa->po;
   struct top_hash_entry *en;
-  ip_addr ip, abrip;
+  net_addr net;
   u32 dst_rid, metric, options;
   ort *abr;
-  int pxlen = -1, type = -1;
+  int type;
   u8 pxopts;
 
-
   OSPF_TRACE(D_EVENTS, "Starting routing table calculation for inter-area (area %R)", oa->areaid);
 
   WALK_SLIST(en, p->lsal)
@@ -770,18 +770,18 @@ ospf_rt_sum(struct ospf_area *oa)
 
     if (en->lsa_type == LSA_T_SUM_NET)
     {
-      lsa_parse_sum_net(en, ospf_is_v2(p), &ip, &pxlen, &pxopts, &metric);
-
-      if (pxopts & OPT_PX_NU)
-	continue;
+      lsa_parse_sum_net(en, ospf_is_v2(p), ospf_get_af(p), &net, &pxopts, &metric);
 
-      if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH)
+      if (!ospf_valid_prefix(&net))
       {
 	log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
 	    p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
 	continue;
       }
 
+      if (pxopts & OPT_PX_NU)
+	continue;
+
       options = 0;
       type = ORT_NET;
     }
@@ -802,8 +802,8 @@ ospf_rt_sum(struct ospf_area *oa)
       continue;
 
     /* 16.2. (4) */
-    abrip = ipa_from_rid(en->lsa.rt);
-    abr = (ort *) fib_find(&oa->rtr, &abrip, MAX_PREFIX_LENGTH);
+    net_addr_ip4 nrid = net_from_rid(en->lsa.rt);
+    abr = fib_find(&oa->rtr, (net_addr *) &nrid);
     if (!abr || !abr->n.type)
       continue;
 
@@ -827,7 +827,7 @@ ospf_rt_sum(struct ospf_area *oa)
     };
 
     if (type == ORT_NET)
-      ri_install_net(p, ip, pxlen, &nf);
+      ri_install_net(p, &net, &nf);
     else
       ri_install_rt(oa, dst_rid, &nf);
   }
@@ -841,11 +841,7 @@ ospf_rt_sum_tr(struct ospf_area *oa)
   struct ospf_area *bb = p->backbone;
   struct top_hash_entry *en;
   ort *re, *abr;
-  ip_addr ip, abrip;
-  u32 dst_rid, metric, options;
-  int pxlen;
-  u8 pxopts;
-
+  u32 metric;
 
   if (!bb)
     return;
@@ -868,26 +864,31 @@ ospf_rt_sum_tr(struct ospf_area *oa)
 
     if (en->lsa_type == LSA_T_SUM_NET)
     {
-      lsa_parse_sum_net(en, ospf_is_v2(p), &ip, &pxlen, &pxopts, &metric);
+      net_addr net;
+      u8 pxopts;
 
-      if (pxopts & OPT_PX_NU)
-	continue;
+      lsa_parse_sum_net(en, ospf_is_v2(p), ospf_get_af(p), &net, &pxopts, &metric);
 
-      if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH)
+      if (!ospf_valid_prefix(&net))
       {
 	log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
 	    p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
 	continue;
       }
 
-      re = fib_find(&p->rtf, &ip, pxlen);
+      if (pxopts & OPT_PX_NU)
+	continue;
+
+      re = fib_find(&p->rtf, &net);
     }
     else // en->lsa_type == LSA_T_SUM_RT
     {
+      u32 dst_rid, options;
+
       lsa_parse_sum_rt(en, ospf_is_v2(p), &dst_rid, &metric, &options);
 
-      ip = ipa_from_rid(dst_rid);
-      re = fib_find(&bb->rtr, &ip, MAX_PREFIX_LENGTH);
+      net_addr_ip4 nrid = net_from_rid(dst_rid);
+      re = fib_find(&bb->rtr, (net_addr *) &nrid);
     }
 
     /* 16.3 (1b) */
@@ -905,8 +906,8 @@ ospf_rt_sum_tr(struct ospf_area *oa)
       continue;
 
     /* 16.3. (4) */
-    abrip = ipa_from_rid(en->lsa.rt);
-    abr = fib_find(&oa->rtr, &abrip, MAX_PREFIX_LENGTH);
+    net_addr_ip4 nrid = net_from_rid(en->lsa.rt);
+    abr = fib_find(&oa->rtr, (net_addr *) &nrid);
     if (!abr || !abr->n.type)
       continue;
 
@@ -997,7 +998,7 @@ decide_sum_lsa(struct ospf_area *oa, ort *nf, int dest)
     return 1;
 
   struct area_net *anet = (struct area_net *)
-    fib_route(&nf->n.oa->net_fib, nf->fn.prefix, nf->fn.pxlen);
+    fib_route(&nf->n.oa->net_fib, nf->fn.addr);
 
   /* Condensed area network found */
   if (anet)
@@ -1016,13 +1017,13 @@ check_sum_net_lsa(struct ospf_proto *p, ort *nf)
   if (nf->area_net)
   {
     /* It is a default route for stub areas, handled entirely in ospf_rt_abr() */
-    if (nf->fn.pxlen == 0)
+    if (nf->fn.addr->pxlen == 0)
       return;
 
     /* Find that area network */
     WALK_LIST(anet_oa, p->area_list)
     {
-      anet = (struct area_net *) fib_find(&anet_oa->net_fib, &nf->fn.prefix, nf->fn.pxlen);
+      anet = fib_find(&anet_oa->net_fib, nf->fn.addr);
       if (anet)
 	break;
     }
@@ -1041,14 +1042,16 @@ check_sum_net_lsa(struct ospf_proto *p, ort *nf)
 static inline void
 check_sum_rt_lsa(struct ospf_proto *p, ort *nf)
 {
+  u32 rid = rid_from_net(nf->fn.addr);
+
   struct ospf_area *oa;
   WALK_LIST(oa, p->area_list)
     if (decide_sum_lsa(oa, nf, ORT_ROUTER))
-      ospf_originate_sum_rt_lsa(p, oa, nf, nf->n.metric1, nf->n.options);
+      ospf_originate_sum_rt_lsa(p, oa, rid, nf->n.metric1, nf->n.options);
 }
 
 static inline int
-decide_nssa_lsa(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf, struct ospf_lsa_ext_local *rt)
+decide_nssa_lsa(struct ospf_proto *p, ort *nf, struct ospf_lsa_ext_local *rt)
 {
   struct ospf_area *oa = nf->n.oa;
   struct top_hash_entry *en = nf->n.en;
@@ -1057,14 +1060,14 @@ decide_nssa_lsa(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf, struct ospf_lsa_e
     return 0;
 
   /* Condensed area network found */
-  if (fib_route(&oa->enet_fib, nf->fn.prefix, nf->fn.pxlen))
+  if (fib_route(&oa->enet_fib, nf->fn.addr))
     return 0;
 
   if (!en || (en->lsa_type != LSA_T_NSSA))
     return 0;
 
   /* We do not store needed data in struct orta, we have to parse the LSA */
-  lsa_parse_ext(en, ospf_is_v2(p), rt);
+  lsa_parse_ext(en, ospf_is_v2(p), ospf_get_af(p), rt);
 
   if (rt->pxopts & OPT_PX_NU)
     return 0;
@@ -1092,7 +1095,7 @@ check_nssa_lsa(struct ospf_proto *p, ort *nf)
     /* Find that area network */
     WALK_LIST(oa, p->area_list)
     {
-      anet = (struct area_net *) fib_find(&oa->enet_fib, &nf->fn.prefix, nf->fn.pxlen);
+      anet = fib_find(&oa->enet_fib, nf->fn.addr);
       if (anet)
 	break;
     }
@@ -1162,24 +1165,20 @@ static void
 ospf_rt_abr1(struct ospf_proto *p)
 {
   struct area_net *anet;
-  ort *nf, *default_nf;
+  ort *default_nf;
+  net_addr default_net;
 
   /* RFC 2328 G.3 - incomplete resolution of virtual next hops - routers */
-  FIB_WALK(&p->backbone->rtr, nftmp)
+  FIB_WALK(&p->backbone->rtr, ort, nf)
   {
-    nf = (ort *) nftmp;
-
     if (nf->n.type && unresolved_vlink(nf))
       reset_ri(nf);
   }
   FIB_WALK_END;
 
 
-  FIB_WALK(&p->rtf, nftmp)
+  FIB_WALK(&p->rtf, ort, nf)
   {
-    nf = (ort *) nftmp;
-
-
     /* RFC 2328 G.3 - incomplete resolution of virtual next hops - networks */
     if (nf->n.type && unresolved_vlink(nf))
       reset_ri(nf);
@@ -1188,7 +1187,7 @@ ospf_rt_abr1(struct ospf_proto *p)
     /* Compute condensed area networks */
     if (nf->n.type == RTS_OSPF)
     {
-      anet = (struct area_net *) fib_route(&nf->n.oa->net_fib, nf->fn.prefix, nf->fn.pxlen);
+      anet = (struct area_net *) fib_route(&nf->n.oa->net_fib, nf->fn.addr);
       if (anet)
       {
 	if (!anet->active)
@@ -1196,7 +1195,7 @@ ospf_rt_abr1(struct ospf_proto *p)
 	  anet->active = 1;
 
 	  /* Get a RT entry and mark it to know that it is an area network */
-	  ort *nfi = (ort *) fib_get(&p->rtf, &anet->fn.prefix, anet->fn.pxlen);
+	  ort *nfi = fib_get(&p->rtf, anet->fn.addr);
 	  nfi->area_net = 1;
 
 	  /* 16.2. (3) */
@@ -1211,8 +1210,13 @@ ospf_rt_abr1(struct ospf_proto *p)
   }
   FIB_WALK_END;
 
-  ip_addr addr = IPA_NONE;
-  default_nf = (ort *) fib_get(&p->rtf, &addr, 0);
+
+  if (ospf_is_v2(p))
+    net_fill_ip4(&default_net, IP4_NONE, 0);
+  else
+    net_fill_ip6(&default_net, IP6_NONE, 0);
+
+  default_nf = fib_get(&p->rtf, &default_net);
   default_nf->area_net = 1;
 
   struct ospf_area *oa;
@@ -1239,11 +1243,10 @@ ospf_rt_abr1(struct ospf_proto *p)
     /* RFC 2328 16.4. (3) - precompute preferred ASBR entries */
     if (oa_is_ext(oa))
     {
-      FIB_WALK(&oa->rtr, nftmp)
+      FIB_WALK(&oa->rtr, ort, nf)
       {
-	nf = (ort *) nftmp;
 	if (nf->n.options & ORTA_ASBR)
-	  ri_install_asbr(p, &nf->fn.prefix, &nf->n);
+	  ri_install_asbr(p, rid_from_net(nf->fn.addr), &nf->n);
       }
       FIB_WALK_END;
     }
@@ -1251,9 +1254,9 @@ ospf_rt_abr1(struct ospf_proto *p)
 
 
   /* Originate or flush ASBR summary LSAs */
-  FIB_WALK(&p->backbone->rtr, nftmp)
+  FIB_WALK(&p->backbone->rtr, ort, nf)
   {
-    check_sum_rt_lsa(p, (ort *) nftmp);
+    check_sum_rt_lsa(p, nf);
   }
   FIB_WALK_END;
 
@@ -1280,8 +1283,6 @@ ospf_rt_abr2(struct ospf_proto *p)
 {
   struct ospf_area *oa;
   struct top_hash_entry *en;
-  ort *nf, *nf2;
-
 
   /* RFC 3103 3.1 - type-7 translator election */
   struct ospf_area *bb = p->backbone;
@@ -1293,13 +1294,12 @@ ospf_rt_abr2(struct ospf_proto *p)
       if (oa->ac->translator)
 	goto decided;
 
-      FIB_WALK(&oa->rtr, nftmp)
+      FIB_WALK(&oa->rtr, ort, nf)
       {
-	nf = (ort *) nftmp;
 	if (!nf->n.type || !(nf->n.options & ORTA_ABR))
 	  continue;
 
-	nf2 = fib_find(&bb->rtr, &nf->fn.prefix, MAX_PREFIX_LENGTH);
+	ort *nf2 = fib_find(&bb->rtr, nf->fn.addr);
 	if (!nf2 || !nf2->n.type || !(nf2->n.options & ORTA_ABR))
 	  continue;
 
@@ -1329,23 +1329,21 @@ ospf_rt_abr2(struct ospf_proto *p)
       if (!translate && (oa->translate == TRANS_ON))
       {
 	if (oa->translator_timer == NULL)
-	  oa->translator_timer = tm_new_set(p->p.pool, translator_timer_hook, oa, 0, 0);
+	  oa->translator_timer = tm_new_init(p->p.pool, translator_timer_hook, oa, 0, 0);
 
 	/* Schedule the end of translation */
-	tm_start(oa->translator_timer, oa->ac->transint);
+	tm_start(oa->translator_timer, oa->ac->transint S);
 	oa->translate = TRANS_WAIT;
       }
     }
 
 
   /* Compute condensed external networks */
-  FIB_WALK(&p->rtf, nftmp)
+  FIB_WALK(&p->rtf, ort, nf)
   {
-    nf = (ort *) nftmp;
     if (rt_is_nssa(nf) && (nf->n.options & ORTA_PROP))
     {
-      struct area_net *anet = (struct area_net *)
-	fib_route(&nf->n.oa->enet_fib, nf->fn.prefix, nf->fn.pxlen);
+      struct area_net *anet = fib_route(&nf->n.oa->enet_fib, nf->fn.addr);
 
       if (anet)
       {
@@ -1354,7 +1352,7 @@ ospf_rt_abr2(struct ospf_proto *p)
 	  anet->active = 1;
 
 	  /* Get a RT entry and mark it to know that it is an area network */
-	  nf2 = (ort *) fib_get(&p->rtf, &anet->fn.prefix, anet->fn.pxlen);
+	  ort *nf2 = fib_get(&p->rtf, anet->fn.addr);
 	  nf2->area_net = 1;
 	}
 
@@ -1369,10 +1367,8 @@ ospf_rt_abr2(struct ospf_proto *p)
   FIB_WALK_END;
 
 
-  FIB_WALK(&p->rtf, nftmp)
+  FIB_WALK(&p->rtf, ort, nf)
   {
-    nf = (ort *) nftmp;
-
     check_sum_net_lsa(p, nf);
     check_nssa_lsa(p, nf);
   }
@@ -1382,22 +1378,57 @@ ospf_rt_abr2(struct ospf_proto *p)
 
 /* Like fib_route(), but ignores dummy rt entries */
 static void *
-ospf_fib_route(struct fib *f, ip_addr a, int len)
+ospf_fib_route_ip4(struct fib *f, ip4_addr a, int len)
+{
+  net_addr_ip4 net = NET_ADDR_IP4(a, len);
+  ort *nf;
+
+loop:
+  nf = fib_find(f, (net_addr *) &net);
+  if (nf && nf->n.type)
+    return nf;
+
+  if (net.pxlen > 0)
+  {
+    net.pxlen--;
+    ip4_clrbit(&net.prefix, net.pxlen);
+    goto loop;
+  }
+
+  return NULL;
+}
+
+static void *
+ospf_fib_route_ip6(struct fib *f, ip6_addr a, int len)
 {
-  ip_addr a0;
+  net_addr_ip6 net = NET_ADDR_IP6(a, len);
   ort *nf;
 
-  while (len >= 0)
+loop:
+  nf = fib_find(f, (net_addr *) &net);
+  if (nf && nf->n.type)
+    return nf;
+
+  if (net.pxlen > 0)
   {
-    a0 = ipa_and(a, ipa_mkmask(len));
-    nf = fib_find(f, &a0, len);
-    if (nf && nf->n.type)
-      return nf;
-    len--;
+    net.pxlen--;
+    ip6_clrbit(&net.prefix, net.pxlen);
+    goto loop;
   }
+
   return NULL;
 }
 
+static void *
+ospf_fib_route(struct fib *f, ip_addr a)
+{
+  if (f->addr_type == NET_IP4)
+    return ospf_fib_route_ip4(f, ipa_to_ip4(a), IP4_MAX_PREFIX_LENGTH);
+  else
+    return ospf_fib_route_ip6(f, ipa_to_ip6(a), IP6_MAX_PREFIX_LENGTH);
+}
+
+
 /* RFC 2328 16.4. calculating external routes */
 static void
 ospf_ext_spf(struct ospf_proto *p)
@@ -1405,7 +1436,6 @@ ospf_ext_spf(struct ospf_proto *p)
   struct top_hash_entry *en;
   struct ospf_lsa_ext_local rt;
   ort *nf1, *nf2;
-  ip_addr rtid;
   u32 br_metric;
   struct ospf_area *atmp;
 
@@ -1429,21 +1459,20 @@ ospf_ext_spf(struct ospf_proto *p)
     DBG("%s: Working on LSA. ID: %R, RT: %R, Type: %u\n",
 	p->p.name, en->lsa.id, en->lsa.rt, en->lsa_type);
 
-    lsa_parse_ext(en, ospf_is_v2(p), &rt);
-
-    if (rt.metric == LSINFINITY)
-      continue;
+    lsa_parse_ext(en, ospf_is_v2(p), ospf_get_af(p), &rt);
 
-    if (rt.pxopts & OPT_PX_NU)
-      continue;
-
-    if (rt.pxlen < 0 || rt.pxlen > MAX_PREFIX_LENGTH)
+    if (!ospf_valid_prefix(&rt.net))
     {
       log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
 	  p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
       continue;
     }
 
+    if (rt.metric == LSINFINITY)
+      continue;
+
+    if (rt.pxopts & OPT_PX_NU)
+      continue;
 
     /* 16.4. (3) */
     /* If there are more areas, we already precomputed preferred ASBR
@@ -1457,8 +1486,8 @@ ospf_ext_spf(struct ospf_proto *p)
     if (!atmp)
       continue;			/* Should not happen */
 
-    rtid = ipa_from_rid(en->lsa.rt);
-    nf1 = fib_find(&atmp->rtr, &rtid, MAX_PREFIX_LENGTH);
+    net_addr_ip4 nrid = net_from_rid(en->lsa.rt);
+    nf1 = fib_find(&atmp->rtr, (net_addr *) &nrid);
 
     if (!nf1 || !nf1->n.type)
       continue;			/* No AS boundary router found */
@@ -1468,7 +1497,7 @@ ospf_ext_spf(struct ospf_proto *p)
 
     /* 16.4. (3) NSSA - special rule for default routes */
     /* ABR should use default only if P-bit is set and summaries are active */
-    if ((en->lsa_type == LSA_T_NSSA) && ipa_zero(rt.ip) && (rt.pxlen == 0) &&
+    if ((en->lsa_type == LSA_T_NSSA) && (rt.net.pxlen == 0) &&
 	(p->areano > 1) && !(rt.propagate && atmp->ac->summary))
       continue;
 
@@ -1480,7 +1509,7 @@ ospf_ext_spf(struct ospf_proto *p)
     }
     else
     {
-      nf2 = ospf_fib_route(&p->rtf, rt.fwaddr, MAX_PREFIX_LENGTH);
+      nf2 = ospf_fib_route(&p->rtf, rt.fwaddr);
       if (!nf2)
 	continue;
 
@@ -1542,7 +1571,7 @@ ospf_ext_spf(struct ospf_proto *p)
     nfa.oa = atmp; /* undefined in RFC 2328 */
     nfa.en = en; /* store LSA for later (NSSA processing) */
 
-    ri_install_ext(p, rt.ip, rt.pxlen, &nfa);
+    ri_install_ext(p, &rt.net, &nfa);
   }
 }
 
@@ -1552,13 +1581,10 @@ ospf_rt_reset(struct ospf_proto *p)
 {
   struct ospf_area *oa;
   struct top_hash_entry *en;
-  struct area_net *anet;
-  ort *ri;
 
   /* Reset old routing table */
-  FIB_WALK(&p->rtf, nftmp)
+  FIB_WALK(&p->rtf, ort, ri)
   {
-    ri = (ort *) nftmp;
     ri->area_net = 0;
     ri->keep = 0;
     reset_ri(ri);
@@ -1580,9 +1606,8 @@ ospf_rt_reset(struct ospf_proto *p)
   WALK_LIST(oa, p->area_list)
   {
     /* Reset ASBR routing tables */
-    FIB_WALK(&oa->rtr, nftmp)
+    FIB_WALK(&oa->rtr, ort, ri)
     {
-      ri = (ort *) nftmp;
       reset_ri(ri);
     }
     FIB_WALK_END;
@@ -1590,17 +1615,15 @@ ospf_rt_reset(struct ospf_proto *p)
     /* Reset condensed area networks */
     if (p->areano > 1)
     {
-      FIB_WALK(&oa->net_fib, nftmp)
+      FIB_WALK(&oa->net_fib, struct area_net, anet)
       {
-	anet = (struct area_net *) nftmp;
 	anet->active = 0;
 	anet->metric = 0;
       }
       FIB_WALK_END;
 
-      FIB_WALK(&oa->enet_fib, nftmp)
+      FIB_WALK(&oa->enet_fib, struct area_net, anet)
       {
-	anet = (struct area_net *) nftmp;
 	anet->active = 0;
 	anet->metric = 0;
       }
@@ -1659,19 +1682,33 @@ ospf_rt_spf(struct ospf_proto *p)
 
 
 static inline int
-inherit_nexthops(struct mpnh *pn)
+inherit_nexthops(struct nexthop *pn)
 {
   /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */
   return pn && (ipa_nonzero(pn->gw) || !pn->iface);
 }
 
-static struct mpnh *
+static inline ip_addr
+link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en)
+{
+  struct ospf_lsa_link *link_lsa = en->lsa_body;
+  ip6_addr ll = link_lsa->lladdr;
+
+  if (ip6_zero(ll))
+    return IPA_NONE;
+
+  return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll);
+}
+
+static struct nexthop *
 calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
-	      struct top_hash_entry *par, int pos)
+	      struct top_hash_entry *par, int pos, uint lif, uint nif)
 {
   struct ospf_proto *p = oa->po;
-  struct mpnh *pn = par->nhs;
-  struct ospf_iface *ifa;
+  struct nexthop *pn = par->nhs;
+  struct top_hash_entry *link = NULL;
+  struct ospf_iface *ifa = NULL;
+  ip_addr nh = IPA_NONE;
   u32 rid = en->lsa.rt;
 
   /* 16.1.1. The next hop calculation */
@@ -1696,6 +1733,9 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
     if (!ifa)
       return NULL;
 
+    if (ospf_is_v3(p) && (ifa->iface_id != lif))
+      log(L_WARN "%s: Inconsistent interface ID %u/%u", p->p.name, ifa->iface_id, lif);
+
     return new_nexthop(p, IPA_NONE, ifa->iface, ifa->ecmp_weight);
   }
 
@@ -1706,14 +1746,44 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
     if (!ifa)
       return NULL;
 
+    if (ospf_is_v3(p) && (ifa->iface_id != lif))
+      log(L_WARN "%s: Inconsistent interface ID %u/%u", p->p.name, ifa->iface_id, lif);
+
     if (ifa->type == OSPF_IT_VLINK)
       return new_nexthop(p, IPA_NONE, NULL, 0);
 
-    struct ospf_neighbor *m = find_neigh(ifa, rid);
-    if (!m || (m->state != NEIGHBOR_FULL))
-      return NULL;
+    /* FIXME: On physical PtP links we may skip next-hop altogether */
+
+    if (ospf_is_v2(p) || ospf_is_ip6(p))
+    {
+      /*
+       * In this case, next-hop is a source address from neighbor's packets.
+       * That is necessary for OSPFv2 and practical for OSPFv3 (as it works even
+       * if neighbor uses LinkLSASuppression), but does not work with OSPFv3-AF
+       * on IPv4 topology, where src is IPv6 but next-hop should be IPv4.
+       */
+      struct ospf_neighbor *m = find_neigh(ifa, rid);
+      if (!m || (m->state != NEIGHBOR_FULL))
+	return NULL;
+
+      nh = m->ip;
+    }
+    else
+    {
+      /*
+       * Next-hop is taken from lladdr field of Link-LSA, based on Neighbor
+       * Iface ID (nif) field in our Router-LSA, which is just nbr->iface_id.
+       */
+      link = ospf_hash_find(p->gr, ifa->iface_id, nif, rid, LSA_T_LINK);
+      if (!link)
+	return NULL;
+
+      nh = link_lsa_lladdr(p, link);
+      if (ipa_zero(nh))
+	return NULL;
+    }
 
-    return new_nexthop(p, m->ip, ifa->iface, ifa->ecmp_weight);
+    return new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight);
   }
 
   /* The third case - bcast or nbma neighbor */
@@ -1740,18 +1810,15 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
        * Next-hop is taken from lladdr field of Link-LSA, en->lb_id
        * is computed in link_back().
        */
-      struct top_hash_entry *lhe;
-      lhe = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
-
-      if (!lhe)
+      link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
+      if (!link)
 	return NULL;
 
-      struct ospf_lsa_link *llsa = lhe->lsa_body;
-
-      if (ip6_zero(llsa->lladdr))
+      nh = link_lsa_lladdr(p, link);
+      if (ipa_zero(nh))
 	return NULL;
 
-      return new_nexthop(p, ipa_from_ip6(llsa->lladdr), pn->iface, pn->weight);
+      return new_nexthop(p, nh, pn->iface, pn->weight);
     }
   }
 
@@ -1764,8 +1831,8 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
 
 /* Add LSA into list of candidates in Dijkstra's algorithm */
 static void
-add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
-	 u32 dist, struct ospf_area *oa, int pos)
+add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par,
+	 u32 dist, int pos, uint lif, uint nif)
 {
   struct ospf_proto *p = oa->po;
   node *prev, *n;
@@ -1778,9 +1845,9 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
   if (en->lsa.age == LSA_MAXAGE)
     return;
 
-  if (ospf_is_v3(p) && (en->lsa_type == LSA_T_RT))
+  if (ospf_is_v3(p) && (oa->options & OPT_V6) && (en->lsa_type == LSA_T_RT))
   {
-    /* In OSPFv3, check V6 flag */
+    /* In OSPFv3 IPv6 unicast, check V6 flag */
     struct ospf_lsa_rt *rt = en->lsa_body;
     if (!(rt->options & OPT_V6))
       return;
@@ -1795,10 +1862,10 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
     return;
 
   /* We should check whether there is a reverse link from en to par, */
-  if (!link_back(oa, en, par))
+  if (!link_back(oa, en, par, lif, nif))
     return;
 
-  struct mpnh *nhs = calc_next_hop(oa, en, par, pos);
+  struct nexthop *nhs = calc_next_hop(oa, en, par, pos, lif, nif);
   if (!nhs)
   {
     log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)",
@@ -1836,7 +1903,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
 
     /* Merge old and new */
     int new_reuse = (par->nhs != nhs);
-    en->nhs = mpnh_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool);
+    en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool);
     en->nhs_reuse = 1;
     return;
   }
@@ -1855,20 +1922,20 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
 
   prev = NULL;
 
-  if (EMPTY_LIST(*l))
+  if (EMPTY_LIST(oa->cand))
   {
-    add_head(l, &en->cn);
+    add_head(&oa->cand, &en->cn);
   }
   else
   {
-    WALK_LIST(n, *l)
+    WALK_LIST(n, oa->cand)
     {
       act = SKIP_BACK(struct top_hash_entry, cn, n);
       if ((act->dist > dist) ||
 	  ((act->dist == dist) && (act->lsa_type == LSA_T_RT)))
       {
 	if (prev == NULL)
-	  add_head(l, &en->cn);
+	  add_head(&oa->cand, &en->cn);
 	else
 	  insert_node(&en->cn, prev);
 	added = 1;
@@ -1879,7 +1946,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
 
     if (!added)
     {
-      add_tail(l, &en->cn);
+      add_tail(&oa->cand, &en->cn);
     }
   }
 }
@@ -1892,8 +1959,7 @@ ort_changed(ort *nf, rta *nr)
     (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) ||
     (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) ||
     (nr->source != or->source) || (nr->dest != or->dest) ||
-    (nr->iface != or->iface) || !ipa_equal(nr->gw, or->gw) ||
-    !mpnh_same(nr->nexthops, or->nexthops);
+    !nexthop_same(&(nr->nh), &(or->nh));
 }
 
 static void
@@ -1902,25 +1968,22 @@ rt_sync(struct ospf_proto *p)
   struct top_hash_entry *en;
   struct fib_iterator fit;
   struct fib *fib = &p->rtf;
-  ort *nf;
   struct ospf_area *oa;
 
   /* This is used for forced reload of routes */
   int reload = (p->calcrt == 2);
 
-  OSPF_TRACE(D_EVENTS, "Starting routing table synchronisation");
+  OSPF_TRACE(D_EVENTS, "Starting routing table synchronization");
 
   DBG("Now syncing my rt table with nest's\n");
   FIB_ITERATE_INIT(&fit, fib);
 again1:
-  FIB_ITERATE_START(fib, &fit, nftmp)
+  FIB_ITERATE_START(fib, &fit, ort, nf)
   {
-    nf = (ort *) nftmp;
-
     /* Sanity check of next-hop addresses, failure should not happen */
     if (nf->n.type)
     {
-      struct mpnh *nh;
+      struct nexthop *nh;
       for (nh = nf->n.nhs; nh; nh = nh->next)
 	if (ipa_nonzero(nh->gw))
 	{
@@ -1943,29 +2006,12 @@ again1:
 	.src = p->p.main_source,
 	.source = nf->n.type,
 	.scope = SCOPE_UNIVERSE,
-	.cast = RTC_UNICAST
+	.dest = RTD_UNICAST,
+	.nh = *(nf->n.nhs),
       };
 
-      if (nf->n.nhs->next)
-      {
-	a0.dest = RTD_MULTIPATH;
-	a0.nexthops = nf->n.nhs;
-      }
-      else if (ipa_nonzero(nf->n.nhs->gw))
-      {
-	a0.dest = RTD_ROUTER;
-	a0.iface = nf->n.nhs->iface;
-	a0.gw = nf->n.nhs->gw;
-      }
-      else
-      {
-	a0.dest = RTD_DEVICE;
-	a0.iface = nf->n.nhs->iface;
-      }
-
       if (reload || ort_changed(nf, &a0))
       {
-	net *ne = net_get(p->p.table, nf->fn.prefix, nf->fn.pxlen);
 	rta *a = rta_lookup(&a0);
 	rte *e = rte_get_temp(a);
 
@@ -1976,12 +2022,10 @@ again1:
 	e->u.ospf.tag = nf->old_tag = nf->n.tag;
 	e->u.ospf.router_id = nf->old_rid = nf->n.rid;
 	e->pflags = 0;
-	e->net = ne;
-	e->pref = p->p.preference;
 
-	DBG("Mod rte type %d - %I/%d via %I on iface %s, met %d\n",
-	    a0.source, nf->fn.prefix, nf->fn.pxlen, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
-	rte_update(&p->p, ne, e);
+	DBG("Mod rte type %d - %N via %I on iface %s, met %d\n",
+	    a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
+	rte_update(&p->p, nf->fn.addr, e);
       }
     }
     else if (nf->old_rta)
@@ -1990,19 +2034,21 @@ again1:
       rta_free(nf->old_rta);
       nf->old_rta = NULL;
 
-      net *ne = net_get(p->p.table, nf->fn.prefix, nf->fn.pxlen);
-      rte_update(&p->p, ne, NULL);
+      rte_update(&p->p, nf->fn.addr, NULL);
     }
 
     /* Remove unused rt entry, some special entries are persistent */
     if (!nf->n.type && !nf->external_rte && !nf->area_net && !nf->keep)
     {
-      FIB_ITERATE_PUT(&fit, nftmp);
-      fib_delete(fib, nftmp);
+      if (nf->lsa_id)
+	idm_free(&p->idm, nf->lsa_id);
+
+      FIB_ITERATE_PUT(&fit);
+      fib_delete(fib, nf);
       goto again1;
     }
   }
-  FIB_ITERATE_END(nftmp);
+  FIB_ITERATE_END;
 
 
   WALK_LIST(oa, p->area_list)
@@ -2010,18 +2056,16 @@ again1:
     /* Cleanup ASBR hash tables */
     FIB_ITERATE_INIT(&fit, &oa->rtr);
 again2:
-    FIB_ITERATE_START(&oa->rtr, &fit, nftmp)
+    FIB_ITERATE_START(&oa->rtr, &fit, ort, nf)
     {
-      nf = (ort *) nftmp;
-
       if (!nf->n.type)
       {
-	FIB_ITERATE_PUT(&fit, nftmp);
-	fib_delete(&oa->rtr, nftmp);
+	FIB_ITERATE_PUT(&fit);
+	fib_delete(&oa->rtr, nf);
 	goto again2;
       }
     }
-    FIB_ITERATE_END(nftmp);
+    FIB_ITERATE_END;
   }
 
   /* Cleanup stale LSAs */
diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h
index 73b28375..589d2bc5 100644
--- a/proto/ospf/rt.h
+++ b/proto/ospf/rt.h
@@ -53,7 +53,7 @@ typedef struct orta
   struct ospf_area *oa;
   struct ospf_area *voa;	/* Used when route is replaced in ospf_rt_sum_tr(),
 				   NULL otherwise */
-  struct mpnh *nhs;		/* Next hops computed during SPF */
+  struct nexthop *nhs;		/* Next hops computed during SPF */
   struct top_hash_entry *en;	/* LSA responsible for this orta */
 }
 orta;
@@ -78,13 +78,15 @@ typedef struct ort
    * route was not in the last update, in that case other old_* values are not
    * valid.
    */
-  struct fib_node fn;
   orta n;
   u32 old_metric1, old_metric2, old_tag, old_rid;
   rta *old_rta;
+  u32 lsa_id;
   u8 external_rte;
   u8 area_net;
   u8 keep;
+
+  struct fib_node fn;
 }
 ort;
 
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index 341eff87..717c8280 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -70,7 +70,7 @@ ospf_install_lsa(struct ospf_proto *p, struct ospf_lsa_header *lsa, u32 type, u3
   en->lsa_body = body;
   en->lsa = *lsa;
   en->init_age = en->lsa.age;
-  en->inst_time = now;
+  en->inst_time = current_time();
 
   /*
    * We do not set en->mode. It is either default LSA_M_BASIC, or in a special
@@ -128,7 +128,7 @@ ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_ls
       en->lsa.sn = lsa->sn + 1;
       en->lsa.age = 0;
       en->init_age = 0;
-      en->inst_time = now;
+      en->inst_time = current_time();
       lsa_generate_checksum(&en->lsa, en->lsa_body);
 
       OSPF_TRACE(D_EVENTS, "Advancing LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x",
@@ -160,7 +160,7 @@ ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_ls
       en->lsa = *lsa;
       en->lsa.age = LSA_MAXAGE;
       en->init_age = lsa->age;
-      en->inst_time = now;
+      en->inst_time = current_time();
 
       OSPF_TRACE(D_EVENTS, "Resetting LSA:  Type: %04x, Id: %R, Rt: %R, Seq: %08x",
 		 en->lsa_type, en->lsa.id, en->lsa.rt, en->lsa.sn);
@@ -196,7 +196,7 @@ static int
 ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa_body, u16 lsa_blen, u16 lsa_opts)
 {
   /* Enforce MinLSInterval */
-  if ((en->init_age == 0) && en->inst_time && ((en->inst_time + MINLSINTERVAL) > now))
+  if (!en->init_age && en->inst_time && (lsa_inst_age(en) < MINLSINTERVAL))
     return 0;
 
   /* Handle wrapping sequence number */
@@ -237,7 +237,7 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa
   en->lsa.sn++;
   en->lsa.age = 0;
   en->init_age = 0;
-  en->inst_time = now;
+  en->inst_time = current_time();
   lsa_generate_checksum(&en->lsa, en->lsa_body);
 
   OSPF_TRACE(D_EVENTS, "Originating LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x",
@@ -283,8 +283,8 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa)
 
   if (en->nf != lsa->nf)
   {
-    log(L_ERR "%s: LSA ID collision for %I/%d",
-	p->p.name, lsa->nf->fn.prefix, lsa->nf->fn.pxlen);
+    log(L_ERR "%s: LSA ID collision for %N",
+	p->p.name, lsa->nf->fn.addr);
 
     en = NULL;
     goto drop;
@@ -381,7 +381,7 @@ ospf_refresh_lsa(struct ospf_proto *p, struct top_hash_entry *en)
   en->lsa.sn++;
   en->lsa.age = 0;
   en->init_age = 0;
-  en->inst_time = now;
+  en->inst_time = current_time();
   lsa_generate_checksum(&en->lsa, en->lsa_body);
   ospf_flood_lsa(p, en, NULL);
 }
@@ -476,14 +476,15 @@ void
 ospf_update_lsadb(struct ospf_proto *p)
 {
   struct top_hash_entry *en, *nxt;
-  bird_clock_t real_age;
+  btime now_ = current_time();
+  int real_age;
 
   WALK_SLIST_DELSAFE(en, nxt, p->lsal)
   {
     if (en->next_lsa_body)
       ospf_originate_next_lsa(p, en);
 
-    real_age = en->init_age + (now - en->inst_time);
+    real_age = en->init_age + (now_ - en->inst_time) TO_S;
 
     if (en->lsa.age == LSA_MAXAGE)
     {
@@ -514,14 +515,14 @@ ospf_update_lsadb(struct ospf_proto *p)
 }
 
 
-static inline u32
-ort_to_lsaid(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf)
+static u32
+ort_to_lsaid(struct ospf_proto *p, ort *nf)
 {
   /*
    * In OSPFv2, We have to map IP prefixes to u32 in such manner that resulting
    * u32 interpreted as IP address is a member of given prefix. Therefore, /32
-   * prefix have to be mapped on itself.  All received prefixes have to be
-   * mapped on different u32s.
+   * prefix has to be mapped on itself.  All received prefixes have to be mapped
+   * on different u32s.
    *
    * We have an assumption that if there is nontrivial (non-/32) network prefix,
    * then there is not /32 prefix for the first and the last IP address of the
@@ -542,17 +543,21 @@ ort_to_lsaid(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf)
    * network appeared, we choose a different way.
    *
    * In OSPFv3, it is simpler. There is not a requirement for membership of the
-   * result in the input network, so we just use a hash-based unique ID of a
-   * routing table entry for a route that originated given LSA. For ext-LSA, it
-   * is an imported route in the nest's routing table (p->table). For summary-LSA,
-   * it is a 'source' route in the protocol internal routing table (p->rtf).
+   * result in the input network, so we just allocate a unique ID from ID map
+   * and store it in nf->lsa_id for further reference.
    */
 
   if (ospf_is_v3(p))
-    return nf->fn.uid;
+  {
+    if (!nf->lsa_id)
+      nf->lsa_id = idm_alloc(&p->idm);
 
-  u32 id = ipa_to_u32(nf->fn.prefix);
-  int pxlen = nf->fn.pxlen;
+    return nf->lsa_id;
+  }
+
+  net_addr_ip4 *net = (void *) nf->fn.addr;
+  u32 id = ip4_to_u32(net->prefix);
+  int pxlen = net->pxlen;
 
   if ((pxlen == 0) || (pxlen == 32))
     return id;
@@ -628,12 +633,12 @@ configured_stubnet(struct ospf_area *oa, struct ifa *a)
   {
     if (sn->summary)
     {
-      if (ipa_in_net(a->prefix, sn->px.addr, sn->px.len) && (a->pxlen >= sn->px.len))
+      if (net_in_netX(&a->prefix, &sn->prefix))
 	return 1;
     }
     else
     {
-      if (ipa_equal(a->prefix, sn->px.addr) && (a->pxlen == sn->px.len))
+      if (net_equal(&a->prefix, &sn->prefix))
 	return 1;
     }
   }
@@ -781,7 +786,8 @@ prepare_rt2_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
 	(ifa->type == OSPF_IT_PTMP))
       add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(ifa->addr->ip), 0xffffffff, 0);
     else
-      add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(ifa->addr->prefix), u32_mkmask(ifa->addr->pxlen), ifa->cost);
+      add_rt2_lsa_link(p, LSART_STUB, ip4_to_u32(net4_prefix(&ifa->addr->prefix)),
+		       u32_mkmask(net4_pxlen(&ifa->addr->prefix)), ifa->cost);
     i++;
 
     ifa->rt_pos_end = i;
@@ -790,7 +796,8 @@ prepare_rt2_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
   struct ospf_stubnet_config *sn;
   WALK_LIST(sn, oa->ac->stubnet_list)
     if (!sn->hidden)
-      add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(sn->px.addr), u32_mkmask(sn->px.len), sn->cost), i++;
+      add_rt2_lsa_link(p, LSART_STUB, ip4_to_u32(net4_prefix(&sn->prefix)),
+		       u32_mkmask(net4_pxlen(&sn->prefix)), sn->cost), i++;
 
   struct ospf_lsa_rt *rt = p->lsab;
   /* Store number of links in lower half of options */
@@ -907,7 +914,7 @@ prepare_net2_lsa_body(struct ospf_proto *p, struct ospf_iface *ifa)
   ASSERT(p->lsab_used == 0);
   net = lsab_alloc(p, sizeof(struct ospf_lsa_net) + 4 * nodes);
 
-  net->optx = u32_mkmask(ifa->addr->pxlen);
+  net->optx = u32_mkmask(ifa->addr->prefix.pxlen);
   net->routers[0] = p->router_id;
 
   WALK_LIST(n, ifa->neigh_list)
@@ -999,9 +1006,10 @@ prepare_sum3_net_lsa_body(struct ospf_proto *p, ort *nf, u32 metric)
 {
   struct ospf_lsa_sum3_net *sum;
 
-  sum = lsab_allocz(p, sizeof(struct ospf_lsa_sum3_net) + IPV6_PREFIX_SPACE(nf->fn.pxlen));
+  sum = lsab_allocz(p, sizeof(struct ospf_lsa_sum3_net) +
+		    IPV6_PREFIX_SPACE(nf->fn.addr->pxlen));
   sum->metric = metric;
-  put_ipv6_prefix(sum->prefix, nf->fn.prefix, nf->fn.pxlen, 0, 0);
+  ospf3_put_prefix(sum->prefix, nf->fn.addr, 0, 0);
 }
 
 static inline void
@@ -1028,7 +1036,7 @@ ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf,
   };
 
   if (ospf_is_v2(p))
-    prepare_sum2_lsa_body(p, nf->fn.pxlen, metric);
+    prepare_sum2_lsa_body(p, nf->fn.addr->pxlen, metric);
   else
     prepare_sum3_net_lsa_body(p, nf, metric);
 
@@ -1036,20 +1044,20 @@ ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf,
 }
 
 void
-ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric, u32 options)
+ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options)
 {
   struct ospf_new_lsa lsa = {
     .type = LSA_T_SUM_RT,
     .mode = LSA_M_RTCALC,
     .dom  = oa->areaid,
-    .id   = ipa_to_rid(nf->fn.prefix),	/* Router ID of ASBR, irrelevant for OSPFv3 */
+    .id   = drid,	/* Router ID of ASBR, irrelevant for OSPFv3 */
     .opts = oa->options
   };
 
   if (ospf_is_v2(p))
     prepare_sum2_lsa_body(p, 0, metric);
   else
-    prepare_sum3_rt_lsa_body(p, lsa.id, metric, options & LSA_OPTIONS_MASK);
+    prepare_sum3_rt_lsa_body(p, drid, metric, options & LSA_OPTIONS_MASK);
 
   ospf_originate_lsa(p, &lsa);
 }
@@ -1082,7 +1090,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf,
 {
   struct ospf_lsa_ext3 *ext;
   int bsize = sizeof(struct ospf_lsa_ext3)
-    + IPV6_PREFIX_SPACE(nf->fn.pxlen)
+    + IPV6_PREFIX_SPACE(nf->fn.addr->pxlen)
     + (ipa_nonzero(fwaddr) ? 16 : 0)
     + (tag ? 4 : 0);
 
@@ -1090,7 +1098,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf,
   ext->metric = metric & LSA_METRIC_MASK;
   u32 *buf = ext->rest;
 
-  buf = put_ipv6_prefix(buf, nf->fn.prefix, nf->fn.pxlen, pbit ? OPT_PX_P : 0, 0);
+  buf = ospf3_put_prefix(buf, nf->fn.addr, pbit ? OPT_PX_P : 0, 0);
 
   if (ebit)
     ext->metric |= LSA_EXT3_EBIT;
@@ -1098,7 +1106,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf,
   if (ipa_nonzero(fwaddr))
   {
     ext->metric |= LSA_EXT3_FBIT;
-    buf = put_ipv6_addr(buf, fwaddr);
+    buf = ospf3_put_addr(buf, fwaddr);
   }
 
   if (tag)
@@ -1140,7 +1148,7 @@ ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 m
   };
 
   if (ospf_is_v2(p))
-    prepare_ext2_lsa_body(p, nf->fn.pxlen, metric, ebit, fwaddr, tag);
+    prepare_ext2_lsa_body(p, nf->fn.addr->pxlen, metric, ebit, fwaddr, tag);
   else
     prepare_ext3_lsa_body(p, nf, metric, ebit, fwaddr, tag, oa && pbit);
 
@@ -1177,7 +1185,7 @@ use_gw_for_fwaddr(struct ospf_proto *p, ip_addr gw, struct iface *iface)
 
   WALK_LIST(ifa, p->iface_list)
     if ((ifa->iface == iface) &&
-	(!ospf_is_v2(p) || ipa_in_net(gw, ifa->addr->prefix, ifa->addr->pxlen)))
+	(!ospf_is_v2(p) || ipa_in_netX(gw, &ifa->addr->prefix)))
       return 1;
 
   return 0;
@@ -1215,7 +1223,8 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa)
     {
       WALK_LIST(a, ifa->iface->addrs)
       {
-	if ((a->flags & IA_SECONDARY) ||
+	if ((a->prefix.type != ospf_get_af(p)) ||
+	    (a->flags & IA_SECONDARY) ||
 	    (a->flags & IA_PEER) ||
 	    (a->scope <= SCOPE_LINK))
 	  continue;
@@ -1234,7 +1243,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa)
 }
 
 void
-ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *ea)
+ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *ea)
 {
   struct ospf_proto *p = (struct ospf_proto *) P;
   struct ospf_area *oa = NULL;	/* non-NULL for NSSA-LSA */
@@ -1253,7 +1262,7 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
 
   if (!new)
   {
-    nf = (ort *) fib_find(&p->rtf, &n->n.prefix, n->n.pxlen);
+    nf = fib_find(&p->rtf, n->n.addr);
 
     if (!nf || !nf->external_rte)
       return;
@@ -1280,8 +1289,8 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
   ip_addr fwd = IPA_NONE;
 
 
-  if ((a->dest == RTD_ROUTER) && use_gw_for_fwaddr(p, a->gw, a->iface))
-    fwd = a->gw;
+  if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface))
+    fwd = a->nh.gw;
 
   /* NSSA-LSA with P-bit set must have non-zero forwarding address */
   if (oa && ipa_zero(fwd))
@@ -1290,13 +1299,13 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
 
     if (ipa_zero(fwd))
     {
-      log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %I/%d",
-	  p->p.name, n->n.prefix, n->n.pxlen);
+      log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N",
+	  p->p.name, n->n.addr);
       return;
     }
   }
 
-  nf = (ort *) fib_get(&p->rtf, &n->n.prefix, n->n.pxlen);
+  nf = fib_get(&p->rtf, n->n.addr);
   ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1);
   nf->external_rte = 1;
 }
@@ -1308,38 +1317,47 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
  */
 
 static inline void
-lsab_put_prefix(struct ospf_proto *p, ip_addr prefix, u32 pxlen, u32 cost)
+lsab_put_prefix(struct ospf_proto *p, net_addr *n, u32 cost)
 {
-  void *buf = lsab_alloc(p, IPV6_PREFIX_SPACE(pxlen));
-  u8 flags = (pxlen < MAX_PREFIX_LENGTH) ? 0 : OPT_PX_LA;
-  put_ipv6_prefix(buf, prefix, pxlen, flags, cost);
+  void *buf = lsab_alloc(p, IPV6_PREFIX_SPACE(net_pxlen(n)));
+  uint max = (n->type == NET_IP4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
+  u8 flags = (net_pxlen(n) < max) ? 0 : OPT_PX_LA;
+  ospf3_put_prefix(buf, n, flags, cost);
 }
 
 static void
 prepare_link_lsa_body(struct ospf_proto *p, struct ospf_iface *ifa)
 {
-  struct ospf_lsa_link *ll;
+  ip_addr nh = ospf_is_ip4(p) ? IPA_NONE : ifa->addr->ip;
   int i = 0;
 
+  /* Preallocating space for header */
   ASSERT(p->lsab_used == 0);
-  ll = lsab_allocz(p, sizeof(struct ospf_lsa_link));
-  ll->options = ifa->oa->options | (ifa->priority << 24);
-  ll->lladdr = ipa_to_ip6(ifa->addr->ip);
-  ll = NULL; /* buffer might be reallocated later */
+  lsab_allocz(p, sizeof(struct ospf_lsa_link));
 
   struct ifa *a;
   WALK_LIST(a, ifa->iface->addrs)
   {
-    if ((a->flags & IA_SECONDARY) ||
-	(a->scope < SCOPE_SITE))
+    if ((a->prefix.type != ospf_get_af(p)) ||
+	(a->flags & IA_SECONDARY) ||
+	(a->scope <= SCOPE_LINK))
       continue;
 
-    lsab_put_prefix(p, a->prefix, a->pxlen, 0);
+    if (ospf_is_ip4(p) && ipa_zero(nh))
+      nh = a->ip;
+
+    lsab_put_prefix(p, &a->prefix, 0);
     i++;
   }
 
-  ll = p->lsab;
+  /* Filling the preallocated header */
+  struct ospf_lsa_link *ll = p->lsab;
+  ll->options = ifa->oa->options | (ifa->priority << 24);
+  ll->lladdr = ospf_is_ip4(p) ? ospf3_4to6(ipa_to_ip4(nh)) : ipa_to_ip6(nh);
   ll->pxcount = i;
+
+  if (ipa_zero(nh))
+    log(L_ERR "%s: Cannot find next hop address for %s", p->p.name, ifa->ifname);
 }
 
 static void
@@ -1401,12 +1419,13 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
     struct ifa *a;
     WALK_LIST(a, ifa->iface->addrs)
     {
-      if ((a->flags & IA_SECONDARY) ||
+      if ((a->prefix.type != ospf_get_af(p)) ||
+	  (a->flags & IA_SECONDARY) ||
 	  (a->flags & IA_PEER) ||
 	  (a->scope <= SCOPE_LINK))
 	continue;
 
-      if (((a->pxlen < MAX_PREFIX_LENGTH) && net_lsa) ||
+      if (((a->prefix.pxlen < IP6_MAX_PREFIX_LENGTH) && net_lsa) ||
 	  configured_stubnet(oa, a))
 	continue;
 
@@ -1414,11 +1433,12 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
 	  (ifa->state == OSPF_IS_LOOP) ||
 	  (ifa->type == OSPF_IT_PTMP))
       {
-	lsab_put_prefix(p, a->ip, MAX_PREFIX_LENGTH, 0);
+	net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH);
+	lsab_put_prefix(p, (net_addr *) &net, 0);
 	host_addr = 1;
       }
       else
-	lsab_put_prefix(p, a->prefix, a->pxlen, ifa->cost);
+	lsab_put_prefix(p, &a->prefix, ifa->cost);
       i++;
     }
 
@@ -1429,15 +1449,15 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
   WALK_LIST(sn, oa->ac->stubnet_list)
     if (!sn->hidden)
     {
-      lsab_put_prefix(p, sn->px.addr, sn->px.len, sn->cost);
-      if (sn->px.len == MAX_PREFIX_LENGTH)
+      lsab_put_prefix(p, &sn->prefix, sn->cost);
+      if (sn->prefix.pxlen == IP6_MAX_PREFIX_LENGTH)
 	host_addr = 1;
       i++;
     }
 
   /* If there are some configured vlinks, find some global address
      (even from another area), which will be used as a vlink endpoint. */
-  if (!EMPTY_LIST(cf->vlink_list) && !host_addr)
+  if (!EMPTY_LIST(cf->vlink_list) && !host_addr && ospf_is_ip6(p))
   {
     WALK_LIST(ifa, p->iface_list)
     {
@@ -1447,11 +1467,14 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
       struct ifa *a;
       WALK_LIST(a, ifa->iface->addrs)
       {
-	if ((a->flags & IA_SECONDARY) || (a->scope <= SCOPE_LINK))
+	if ((a->prefix.type != NET_IP6) ||
+	    (a->flags & IA_SECONDARY) ||
+	    (a->scope <= SCOPE_LINK))
 	  continue;
 
 	/* Found some IP */
-	lsab_put_prefix(p, a->ip, MAX_PREFIX_LENGTH, 0);
+	net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH);
+	lsab_put_prefix(p, (net_addr *) &net, 0);
 	i++;
 	goto done;
       }
@@ -1557,7 +1580,7 @@ add_link_lsa(struct ospf_proto *p, struct ospf_lsa_link *ll, int offset, int *px
       continue;
 
     /* Skip link-local prefixes */
-    if ((pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000))
+    if (ospf_is_ip6(p) && (pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000))
       continue;
 
     add_prefix(p, pxb, offset, pxc);
@@ -1614,7 +1637,7 @@ ospf_originate_prefix_net_lsa(struct ospf_proto *p, struct ospf_iface *ifa)
 }
 
 static inline int breaks_minlsinterval(struct top_hash_entry *en)
-{ return en && (en->lsa.age < LSA_MAXAGE) && ((en->inst_time + MINLSINTERVAL) > now); }
+{ return en && (en->lsa.age < LSA_MAXAGE) && (lsa_inst_age(en) < MINLSINTERVAL); }
 
 void
 ospf_update_topology(struct ospf_proto *p)
@@ -1748,7 +1771,7 @@ ospf_top_hash(struct top_graph *f, u32 domain, u32 lsaid, u32 rtrid, u32 type)
  * and request lists of OSPF neighbors.
  */
 struct top_graph *
-ospf_top_new(struct ospf_proto *p UNUSED4 UNUSED6, pool *pool)
+ospf_top_new(struct ospf_proto *p, pool *pool)
 {
   struct top_graph *f;
 
diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h
index 5652ced0..ac87334b 100644
--- a/proto/ospf/topology.h
+++ b/proto/ospf/topology.h
@@ -26,9 +26,9 @@ struct top_hash_entry
   void *next_lsa_body;		/* For postponed LSA origination */
   u16 next_lsa_blen;		/* For postponed LSA origination */
   u16 next_lsa_opts;		/* For postponed LSA origination */
-  bird_clock_t inst_time;	/* Time of installation into DB */
+  btime inst_time;		/* Time of installation into DB */
   struct ort *nf;		/* Reference fibnode for sum and ext LSAs, NULL for otherwise */
-  struct mpnh *nhs;		/* Computed nexthops - valid only in ospf_rt_spf() */
+  struct nexthop *nhs;		/* Computed nexthops - valid only in ospf_rt_spf() */
   ip_addr lb;			/* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */
   u32 lb_id;			/* Interface ID of link back iface (for bcast or NBMA networks) */
   u32 dist;			/* Distance from the root */
@@ -185,10 +185,10 @@ static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry *
 { if (*en) { ospf_flush_lsa(p, *en); *en = NULL; } }
 
 void ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric);
-void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric, u32 options);
+void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options);
 void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit);
 
-void ospf_rt_notify(struct proto *P, rtable *tbl, net *n, rte *new, rte *old, ea_list *attrs);
+void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old, ea_list *attrs);
 void ospf_update_topology(struct ospf_proto *p);
 
 struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type);
diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile
index 77de5b88..5093da98 100644
--- a/proto/pipe/Makefile
+++ b/proto/pipe/Makefile
@@ -1,6 +1,6 @@
-source=pipe.c
-root-rel=../../
-dir-name=proto/pipe
-
-include ../../Rules
+src := pipe.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y
index 8daf2e7c..f51ee575 100644
--- a/proto/pipe/config.Y
+++ b/proto/pipe/config.Y
@@ -16,28 +16,25 @@ CF_DEFINES
 
 CF_DECLS
 
-CF_KEYWORDS(PIPE, PEER, TABLE, MODE, OPAQUE, TRANSPARENT)
+CF_KEYWORDS(PIPE, PEER, TABLE)
 
 CF_GRAMMAR
 
-CF_ADDTO(proto, pipe_proto '}')
+CF_ADDTO(proto, pipe_proto '}' { this_channel = NULL; } )
 
-pipe_proto_start: proto_start PIPE {
-     this_proto = proto_config_new(&proto_pipe, $1);
-     PIPE_CFG->mode = PIPE_TRANSPARENT;
-  }
- ;
+pipe_proto_start: proto_start PIPE
+{
+  this_proto = proto_config_new(&proto_pipe, $1);
+  this_channel = channel_config_new(NULL, 0, this_proto);
+  this_channel->in_filter = FILTER_ACCEPT;
+  this_channel->out_filter = FILTER_ACCEPT;
+};
 
 pipe_proto:
    pipe_proto_start proto_name '{'
  | pipe_proto proto_item ';'
- | pipe_proto PEER TABLE SYM ';' {
-     if ($4->class != SYM_TABLE)
-       cf_error("Routing table name expected");
-     PIPE_CFG->peer = $4->def;
-   }
- | pipe_proto MODE OPAQUE ';' { PIPE_CFG->mode = PIPE_OPAQUE; }
- | pipe_proto MODE TRANSPARENT ';' { PIPE_CFG->mode = PIPE_TRANSPARENT; }
+ | pipe_proto channel_item ';'
+ | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; }
  ;
 
 CF_CODE
diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c
index 6ef80322..310f3c01 100644
--- a/proto/pipe/pipe.c
+++ b/proto/pipe/pipe.c
@@ -44,54 +44,42 @@
 #include "pipe.h"
 
 static void
-pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, ea_list *attrs)
+pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old, ea_list *attrs)
 {
-  struct pipe_proto *p = (struct pipe_proto *) P;
-  struct announce_hook *ah = (src_table == P->table) ? p->peer_ahook : P->main_ahook;
-  rtable *dst_table = ah->table;
+  struct pipe_proto *p = (void *) P;
+  struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri;
   struct rte_src *src;
 
-  net *nn;
   rte *e;
-  rta a;
+  rta *a;
 
   if (!new && !old)
     return;
 
-  if (dst_table->pipe_busy)
+  if (dst->table->pipe_busy)
     {
-      log(L_ERR "Pipe loop detected when sending %I/%d to table %s",
-	  n->n.prefix, n->n.pxlen, dst_table->name);
+      log(L_ERR "Pipe loop detected when sending %N to table %s",
+	  n->n.addr, dst->table->name);
       return;
     }
 
-  nn = net_get(dst_table, n->n.prefix, n->n.pxlen);
   if (new)
     {
-      memcpy(&a, new->attrs, sizeof(rta));
-
-      if (p->mode == PIPE_OPAQUE)
-	{
-	  a.src = P->main_source;
-	  a.source = RTS_PIPE;
-	}
-
-      a.aflags = 0;
-      a.eattrs = attrs;
-      a.hostentry = NULL;
-      e = rte_get_temp(&a);
-      e->net = nn;
+      a = alloca(rta_size(new->attrs));
+      memcpy(a, new->attrs, rta_size(new->attrs));
+
+      a->aflags = 0;
+      a->eattrs = attrs;
+      a->hostentry = NULL;
+      e = rte_get_temp(a);
       e->pflags = 0;
 
-      if (p->mode == PIPE_TRANSPARENT)
-	{
-	  /* Copy protocol specific embedded attributes. */
-	  memcpy(&(e->u), &(new->u), sizeof(e->u));
-	  e->pref = new->pref;
-	  e->pflags = new->pflags;
-	}
+      /* Copy protocol specific embedded attributes. */
+      memcpy(&(e->u), &(new->u), sizeof(e->u));
+      e->pref = new->pref;
+      e->pflags = new->pflags;
 
-      src = a.src;
+      src = a->src;
     }
   else
     {
@@ -99,9 +87,9 @@ pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, e
       src = old->attrs->src;
     }
 
-  src_table->pipe_busy = 1;
-  rte_update2(ah, nn, e, src);
-  src_table->pipe_busy = 0;
+  src_ch->table->pipe_busy = 1;
+  rte_update2(dst, n->n.addr, e, src);
+  src_ch->table->pipe_busy = 0;
 }
 
 static int
@@ -111,171 +99,117 @@ pipe_import_control(struct proto *P, rte **ee, ea_list **ea UNUSED, struct linpo
 
   if (pp == P)
     return -1;	/* Avoid local loops automatically */
+
   return 0;
 }
 
-static int
-pipe_reload_routes(struct proto *P)
+static void
+pipe_reload_routes(struct channel *C)
 {
-  struct pipe_proto *p = (struct pipe_proto *) P;
-
-  /*
-   * Because the pipe protocol feeds routes from both routing tables
-   * together, both directions are reloaded during refeed and 'reload
-   * out' command works like 'reload' command. For symmetry, we also
-   * request refeed when 'reload in' command is used.
-   */
-  proto_request_feeding(P);
+  struct pipe_proto *p = (void *) C->proto;
 
-  proto_reset_limit(P->main_ahook->in_limit);
-  proto_reset_limit(p->peer_ahook->in_limit);
-
-  return 1;
+  /* Route reload on one channel is just refeed on the other */
+  channel_request_feeding((C == p->pri) ? p->sec : p->pri);
 }
 
-static struct proto *
-pipe_init(struct proto_config *C)
-{
-  struct pipe_config *c = (struct pipe_config *) C;
-  struct proto *P = proto_new(C, sizeof(struct pipe_proto));
-  struct pipe_proto *p = (struct pipe_proto *) P;
 
-  p->mode = c->mode;
-  p->peer_table = c->peer->table;
-  P->accept_ra_types = (p->mode == PIPE_OPAQUE) ? RA_OPTIMAL : RA_ANY;
-  P->rt_notify = pipe_rt_notify;
-  P->import_control = pipe_import_control;
-  P->reload_routes = pipe_reload_routes;
-
-  return P;
-}
-
-static int
-pipe_start(struct proto *P)
+static void
+pipe_postconfig(struct proto_config *CF)
 {
-  struct pipe_config *cf = (struct pipe_config *) P->cf;
-  struct pipe_proto *p = (struct pipe_proto *) P;
+  struct pipe_config *cf = (void *) CF;
+  struct channel_config *cc = proto_cf_main_channel(CF);
 
-  /* Lock both tables, unlock is handled in pipe_cleanup() */
-  rt_lock_table(P->table);
-  rt_lock_table(p->peer_table);
+  if (!cc->table)
+    cf_error("Primary routing table not specified");
 
-  /* Going directly to PS_UP - prepare for feeding,
-     connect the protocol to both routing tables */
+  if (!cf->peer)
+    cf_error("Secondary routing table not specified");
 
-  P->main_ahook = proto_add_announce_hook(P, P->table, &P->stats);
-  P->main_ahook->out_filter = cf->c.out_filter;
-  P->main_ahook->in_limit = cf->c.in_limit;
-  proto_reset_limit(P->main_ahook->in_limit);
+  if (cc->table == cf->peer)
+    cf_error("Primary table and peer table must be different");
 
-  p->peer_ahook = proto_add_announce_hook(P, p->peer_table, &p->peer_stats);
-  p->peer_ahook->out_filter = cf->c.in_filter;
-  p->peer_ahook->in_limit = cf->c.out_limit;
-  proto_reset_limit(p->peer_ahook->in_limit);
+  if (cc->table->addr_type != cf->peer->addr_type)
+    cf_error("Primary table and peer table must have the same type");
 
-  if (p->mode == PIPE_OPAQUE)
-    {
-      P->main_source = rt_get_source(P, 0);
-      rt_lock_source(P->main_source);
-    }
+  if (cc->rx_limit.action)
+    cf_error("Pipe protocol does not support receive limits");
 
-  return PS_UP;
+  if (cc->in_keep_filtered)
+    cf_error("Pipe protocol prohibits keeping filtered routes");
 }
 
-static void
-pipe_cleanup(struct proto *P)
+static int
+pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf)
 {
-  struct pipe_proto *p = (struct pipe_proto *) P;
-
-  bzero(&P->stats, sizeof(struct proto_stats));
-  bzero(&p->peer_stats, sizeof(struct proto_stats));
-
-  P->main_ahook = NULL;
-  p->peer_ahook = NULL;
-
-  if (p->mode == PIPE_OPAQUE)
-    rt_unlock_source(P->main_source);
-  P->main_source = NULL;
-
-  rt_unlock_table(P->table);
-  rt_unlock_table(p->peer_table);
+  struct channel_config *cc = proto_cf_main_channel(&cf->c);
+
+  struct channel_config pri_cf = {
+    .name = "pri",
+    .channel = cc->channel,
+    .table = cc->table,
+    .out_filter = cc->out_filter,
+    .in_limit = cc->in_limit,
+    .ra_mode = RA_ANY
+  };
+
+  struct channel_config sec_cf = {
+    .name = "sec",
+    .channel = cc->channel,
+    .table = cf->peer,
+    .out_filter = cc->in_filter,
+    .in_limit = cc->out_limit,
+    .ra_mode = RA_ANY
+  };
+
+  return
+    proto_configure_channel(&p->p, &p->pri, &pri_cf) &&
+    proto_configure_channel(&p->p, &p->sec, &sec_cf);
 }
 
-static void
-pipe_postconfig(struct proto_config *C)
+static struct proto *
+pipe_init(struct proto_config *CF)
 {
-  struct pipe_config *c = (struct pipe_config *) C;
+  struct proto *P = proto_new(CF);
+  struct pipe_proto *p = (void *) P;
+  struct pipe_config *cf = (void *) CF;
 
-  if (!c->peer)
-    cf_error("Name of peer routing table not specified");
-  if (c->peer == C->table)
-    cf_error("Primary table and peer table must be different");
+  P->rt_notify = pipe_rt_notify;
+  P->import_control = pipe_import_control;
+  P->reload_routes = pipe_reload_routes;
 
-  if (C->in_keep_filtered)
-    cf_error("Pipe protocol prohibits keeping filtered routes");
-  if (C->rx_limit)
-    cf_error("Pipe protocol does not support receive limits");
-}
+  pipe_configure_channels(p, cf);
 
-extern int proto_reconfig_type;
+  return P;
+}
 
 static int
-pipe_reconfigure(struct proto *P, struct proto_config *new)
+pipe_reconfigure(struct proto *P, struct proto_config *CF)
 {
-  struct pipe_proto *p = (struct pipe_proto *)P;
-  struct proto_config *old = P->cf;
-  struct pipe_config *oc = (struct pipe_config *) old;
-  struct pipe_config *nc = (struct pipe_config *) new;
-
-  if ((oc->peer->table != nc->peer->table) || (oc->mode != nc->mode))
-    return 0;
-
-  /* Update output filters in ahooks */
-  if (P->main_ahook)
-    {
-      P->main_ahook->out_filter = new->out_filter;
-      P->main_ahook->in_limit = new->in_limit;
-      proto_verify_limits(P->main_ahook);
-    }
-
-  if (p->peer_ahook)
-    {
-      p->peer_ahook->out_filter = new->in_filter;
-      p->peer_ahook->in_limit = new->out_limit;
-      proto_verify_limits(p->peer_ahook);
-    }
-
-  if ((P->proto_state != PS_UP) || (proto_reconfig_type == RECONFIG_SOFT))
-    return 1;
-
-  if ((new->preference != old->preference)
-      || ! filter_same(new->in_filter, old->in_filter)
-      || ! filter_same(new->out_filter, old->out_filter))
-    proto_request_feeding(P);
+  struct pipe_proto *p = (void *) P;
+  struct pipe_config *cf = (void *) CF;
 
-  return 1;
+  return pipe_configure_channels(p, cf);
 }
 
 static void
-pipe_copy_config(struct proto_config *dest, struct proto_config *src)
+pipe_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
 {
   /* Just a shallow copy, not many items here */
-  proto_copy_rest(dest, src, sizeof(struct pipe_config));
 }
 
 static void
 pipe_get_status(struct proto *P, byte *buf)
 {
-  struct pipe_proto *p = (struct pipe_proto *) P;
+  struct pipe_proto *p = (void *) P;
 
-  bsprintf(buf, "%c> %s", (p->mode == PIPE_OPAQUE) ? '-' : '=', p->peer_table->name);
+  bsprintf(buf, "%s <=> %s", p->pri->table->name, p->sec->table->name);
 }
 
 static void
 pipe_show_stats(struct pipe_proto *p)
 {
-  struct proto_stats *s1 = &p->p.stats;
-  struct proto_stats *s2 = &p->peer_stats;
+  struct proto_stats *s1 = &p->pri->stats;
+  struct proto_stats *s2 = &p->sec->stats;
 
   /*
    * Pipe stats (as anything related to pipes) are a bit tricky. There
@@ -318,17 +252,16 @@ pipe_show_stats(struct pipe_proto *p)
 static void
 pipe_show_proto_info(struct proto *P)
 {
-  struct pipe_proto *p = (struct pipe_proto *) P;
-  struct pipe_config *cf = (struct pipe_config *) P->cf;
+  struct pipe_proto *p = (void *) P;
 
-  // cli_msg(-1006, "  Table:          %s", P->table->name);
-  // cli_msg(-1006, "  Peer table:     %s", p->peer_table->name);
-  cli_msg(-1006, "  Preference:     %d", P->preference);
-  cli_msg(-1006, "  Input filter:   %s", filter_name(cf->c.in_filter));
-  cli_msg(-1006, "  Output filter:  %s", filter_name(cf->c.out_filter));
+  cli_msg(-1006, "  Channel %s", "main");
+  cli_msg(-1006, "    Table:          %s", p->pri->table->name);
+  cli_msg(-1006, "    Peer table:     %s", p->sec->table->name);
+  cli_msg(-1006, "    Import filter:  %s", filter_name(p->sec->out_filter));
+  cli_msg(-1006, "    Export filter:  %s", filter_name(p->pri->out_filter));
 
-  proto_show_limit(cf->c.in_limit, "Import limit:");
-  proto_show_limit(cf->c.out_limit, "Export limit:");
+  channel_show_limit(&p->pri->in_limit, "Import limit:");
+  channel_show_limit(&p->sec->in_limit, "Export limit:");
 
   if (P->proto_state != PS_DOWN)
     pipe_show_stats(p);
@@ -338,13 +271,10 @@ pipe_show_proto_info(struct proto *P)
 struct protocol proto_pipe = {
   .name =		"Pipe",
   .template =		"pipe%d",
-  .multitable =		1,
-  .preference =		DEF_PREF_PIPE,
+  .proto_size =		sizeof(struct pipe_proto),
   .config_size =	sizeof(struct pipe_config),
   .postconfig =		pipe_postconfig,
   .init =		pipe_init,
-  .start =		pipe_start,
-  .cleanup =		pipe_cleanup,
   .reconfigure =	pipe_reconfigure,
   .copy_config = 	pipe_copy_config,
   .get_status = 	pipe_get_status,
diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h
index 50b31698..038c6666 100644
--- a/proto/pipe/pipe.h
+++ b/proto/pipe/pipe.h
@@ -9,27 +9,15 @@
 #ifndef _BIRD_PIPE_H_
 #define _BIRD_PIPE_H_
 
-#define PIPE_OPAQUE 0
-#define PIPE_TRANSPARENT 1
-
 struct pipe_config {
   struct proto_config c;
   struct rtable_config *peer;		/* Table we're connected to */
-  int mode;				/* PIPE_OPAQUE or PIPE_TRANSPARENT */
 };
 
 struct pipe_proto {
   struct proto p;
-  struct rtable *peer_table;
-  struct announce_hook *peer_ahook;	/* Announce hook for direction peer->primary */
-  struct proto_stats peer_stats;	/* Statistics for the direction peer->primary */
-  int mode;				/* PIPE_OPAQUE or PIPE_TRANSPARENT */
+  struct channel *pri;
+  struct channel *sec;
 };
 
-
-extern struct protocol proto_pipe;
-
-static inline int proto_is_pipe(struct proto *p)
-{ return p->proto == &proto_pipe; }
-
 #endif
diff --git a/proto/radv/Makefile b/proto/radv/Makefile
index efc4d4af..05317eff 100644
--- a/proto/radv/Makefile
+++ b/proto/radv/Makefile
@@ -1,5 +1,6 @@
-source=radv.c packets.c
-root-rel=../../
-dir-name=proto/radv
+src := packets.c radv.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/radv/config.Y b/proto/radv/config.Y
index 0ff84aeb..0e43c237 100644
--- a/proto/radv/config.Y
+++ b/proto/radv/config.Y
@@ -41,6 +41,7 @@ CF_ADDTO(proto, radv_proto)
 radv_proto_start: proto_start RADV
 {
   this_proto = proto_config_new(&proto_radv, $1);
+
   init_list(&RADV_CFG->patt_list);
   init_list(&RADV_CFG->pref_list);
   init_list(&RADV_CFG->rdnss_list);
@@ -49,15 +50,12 @@ radv_proto_start: proto_start RADV
 
 radv_proto_item:
    proto_item
+ | proto_channel
  | INTERFACE radv_iface
  | PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); }
  | RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); }
  | DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); }
- | TRIGGER prefix {
-     RADV_CFG->trigger_prefix = $2.addr;
-     RADV_CFG->trigger_pxlen = $2.len;
-     RADV_CFG->trigger_valid = 1;
-   }
+ | TRIGGER net_ip6 { RADV_CFG->trigger = $2; }
  ;
 
 radv_proto_opts:
@@ -94,15 +92,15 @@ radv_iface_item:
  | MIN DELAY expr { RADV_IFACE->min_delay = $3; if ($3 <= 0) cf_error("Min delay must be positive"); }
  | MANAGED bool { RADV_IFACE->managed = $2; }
  | OTHER CONFIG bool { RADV_IFACE->other_config = $3; }
- | LINK MTU expr { RADV_IFACE->link_mtu = $3; if ($3 < 0) cf_error("Link MTU must be 0 or positive"); }
- | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if (($3 < 0) || ($3 > 3600000)) cf_error("Reachable time must be in range 0-3600000"); }
- | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; if ($3 < 0) cf_error("Retrans timer must be 0 or positive"); }
- | LINGER TIME expr { RADV_IFACE->linger_time = $3; if (($3 < 0) || ($3 > 3600)) cf_error("Linger time must be in range 0-3600"); }
- | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if (($4 < 0) || ($4 > 255))  cf_error("Current hop limit must be in range 0-255"); }
+ | LINK MTU expr { RADV_IFACE->link_mtu = $3; }
+ | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if ($3 > 3600000) cf_error("Reachable time must be in range 0-3600000"); }
+ | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; }
+ | LINGER TIME expr { RADV_IFACE->linger_time = $3; if ($3 > 3600) cf_error("Linger time must be in range 0-3600"); }
+ | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if ($4 > 255) cf_error("Current hop limit must be in range 0-255"); }
  | DEFAULT LIFETIME expr radv_sensitive {
      RADV_IFACE->default_lifetime = $3;
-     if (($3 < 0) || ($3 > 9000))  cf_error("Default lifetime must be in range 0-9000");
-     if ($4 != -1) RADV_IFACE->default_lifetime_sensitive = $4;
+     if ($3 > 9000)  cf_error("Default lifetime must be in range 0-9000");
+     if ($4 != (uint) -1) RADV_IFACE->default_lifetime_sensitive = $4;
    }
  | DEFAULT PREFERENCE radv_preference { RADV_IFACE->default_preference = $3; }
  | PREFIX radv_prefix { add_tail(&RADV_IFACE->pref_list, NODE this_radv_prefix); }
@@ -129,7 +127,7 @@ radv_iface_finish:
 
   if ((ic->min_ra_int > 3) &&
       (ic->min_ra_int > (ic->max_ra_int * 3 / 4)))
-    cf_error("Min RA interval must be at most 3/4 * Max RA interval %d %d", ic->min_ra_int, ic->max_ra_int);
+    cf_error("Min RA interval must be at most 3/4 * Max RA interval");
 
   if ((ic->default_lifetime > 0) && (ic->default_lifetime < ic->max_ra_int))
     cf_error("Default lifetime must be either 0 or at least Max RA interval");
@@ -150,11 +148,10 @@ radv_iface:
   radv_iface_start iface_patt_list_nopx radv_iface_opt_list radv_iface_finish;
 
 
-radv_prefix_start: prefix
+radv_prefix_start: net_ip6
 {
   this_radv_prefix = cfg_allocz(sizeof(struct radv_prefix_config));
-  RADV_PREFIX->prefix = $1.addr;
-  RADV_PREFIX->pxlen = $1.len;
+  RADV_PREFIX->prefix = *(net_addr_ip6 *) &($1);
 
   RADV_PREFIX->onlink = 1;
   RADV_PREFIX->autonomous = 1;
@@ -168,13 +165,11 @@ radv_prefix_item:
  | AUTONOMOUS bool { RADV_PREFIX->autonomous = $2; }
  | VALID LIFETIME expr radv_sensitive {
      RADV_PREFIX->valid_lifetime = $3;
-     if ($3 < 0) cf_error("Valid lifetime must be 0 or positive");
-     if ($4 != -1) RADV_PREFIX->valid_lifetime_sensitive = $4;
+     if ($4 != (uint) -1) RADV_PREFIX->valid_lifetime_sensitive = $4;
    }
  | PREFERRED LIFETIME expr radv_sensitive {
      RADV_PREFIX->preferred_lifetime = $3;
-     if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive");
-     if ($4 != -1) RADV_PREFIX->preferred_lifetime_sensitive = $4;
+     if ($4 != (uint) -1) RADV_PREFIX->preferred_lifetime_sensitive = $4;
    }
  ;
 
diff --git a/proto/radv/packets.c b/proto/radv/packets.c
index 19d71f97..e07296e1 100644
--- a/proto/radv/packets.c
+++ b/proto/radv/packets.c
@@ -38,7 +38,7 @@ struct radv_opt_prefix
   u32 valid_lifetime;
   u32 preferred_lifetime;
   u32 reserved;
-  ip_addr prefix;
+  ip6_addr prefix;
 };
 
 #define OPT_PX_ONLINK 0x80
@@ -58,7 +58,7 @@ struct radv_opt_rdnss
   u8 length;
   u16 reserved;
   u32 lifetime;
-  ip_addr servers[];
+  ip6_addr servers[];
 };
 
 struct radv_opt_dnssl
@@ -79,7 +79,7 @@ radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *b
   {
     struct radv_rdnss_config *rcf_base = rcf;
     struct radv_opt_rdnss *op = (void *) *buf;
-    int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip_addr);
+    int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip6_addr);
     int i = 0;
 
     if (max_i < 1)
@@ -100,8 +100,7 @@ radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *b
 	if (i >= max_i)
 	  goto too_much;
 
-	op->servers[i] = rcf->server;
-	ipa_hton(op->servers[i]);
+	op->servers[i] = ip6_hton(rcf->server);
 	i++;
 
 	rcf = NODE_NEXT(rcf);
@@ -206,10 +205,10 @@ radv_prepare_dnssl(struct radv_iface *ifa, list *dnssl_list, char **buf, char *b
 }
 
 static int
-radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix,
+radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *px,
 		    char **buf, char *bufend)
 {
-  struct radv_prefix_config *pc = prefix->cf;
+  struct radv_prefix_config *pc = px->cf;
 
   if (*buf + sizeof(struct radv_opt_prefix) > bufend)
   {
@@ -221,7 +220,7 @@ radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix,
   struct radv_opt_prefix *op = (void *) *buf;
   op->type = OPT_PREFIX;
   op->length = 4;
-  op->pxlen = prefix->len;
+  op->pxlen = px->prefix.pxlen;
   op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) |
     (pc->autonomous ? OPT_PX_AUTONOMOUS : 0);
   op->valid_lifetime = (ifa->ra->active || !pc->valid_lifetime_sensitive) ?
@@ -229,8 +228,7 @@ radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix,
   op->preferred_lifetime = (ifa->ra->active || !pc->preferred_lifetime_sensitive) ?
     htonl(pc->preferred_lifetime) : 0;
   op->reserved = 0;
-  op->prefix = prefix->prefix;
-  ipa_hton(op->prefix);
+  op->prefix = ip6_hton(px->prefix.prefix);
   *buf += sizeof(*op);
 
   return 0;
@@ -334,7 +332,7 @@ radv_rx_hook(sock *sk, uint size)
   if (sk->lifindex != sk->iface->index)
     return 1;
 
-  if (ipa_equal(sk->faddr, ifa->addr->ip))
+  if (ipa_equal(sk->faddr, sk->saddr))
     return 1;
 
   if (size < 8)
@@ -386,6 +384,7 @@ radv_sk_open(struct radv_iface *ifa)
 {
   sock *sk = sk_new(ifa->pool);
   sk->type = SK_IP;
+  sk->subtype = SK_IPV6;
   sk->dport = ICMPV6_PROTO;
   sk->saddr = ifa->addr->ip;
 
diff --git a/proto/radv/radv.c b/proto/radv/radv.c
index 227c8ef6..c96d7724 100644
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -58,23 +58,24 @@ radv_timer(timer *tm)
    * This sets the timer, but we replace it just at the end of this function
    * (replacing a timer is fine).
    */
-  if (ifa->prefix_expires && (ifa->prefix_expires <= now))
+  if (ifa->prefix_expires && (ifa->prefix_expires <= current_time()))
     radv_iface_notify(ifa, RA_EV_GC);
 
   radv_send_ra(ifa, 0);
 
   /* Update timer */
-  ifa->last = now;
-  unsigned after = ifa->cf->min_ra_int;
-  after += random() % (ifa->cf->max_ra_int - ifa->cf->min_ra_int + 1);
+  ifa->last = current_time();
+  btime t = ifa->cf->min_ra_int S;
+  btime r = (ifa->cf->max_ra_int - ifa->cf->min_ra_int) S;
+  t += random() % (r + 1);
 
   if (ifa->initial)
+  {
+    t = MIN(t, MAX_INITIAL_RTR_ADVERT_INTERVAL);
     ifa->initial--;
+  }
 
-  if (ifa->initial)
-    after = MIN(after, MAX_INITIAL_RTR_ADVERT_INTERVAL);
-
-  tm_start(ifa->timer, after);
+  tm_start(ifa->timer, t);
 }
 
 static struct radv_prefix_config default_prefix = {
@@ -89,21 +90,18 @@ static struct radv_prefix_config dead_prefix = {
 
 /* Find a corresponding config for the given prefix */
 static struct radv_prefix_config *
-radv_prefix_match(struct radv_iface *ifa, struct ifa *a)
+radv_prefix_match(struct radv_iface *ifa, net_addr_ip6 *px)
 {
   struct radv_proto *p = ifa->ra;
   struct radv_config *cf = (struct radv_config *) (p->p.cf);
   struct radv_prefix_config *pc;
 
-  if (a->scope <= SCOPE_LINK)
-    return NULL;
-
   WALK_LIST(pc, ifa->cf->pref_list)
-    if ((a->pxlen >= pc->pxlen) && ipa_in_net(a->prefix, pc->prefix, pc->pxlen))
+    if (net_in_net_ip6(px, &pc->prefix))
       return pc;
 
   WALK_LIST(pc, cf->pref_list)
-    if ((a->pxlen >= pc->pxlen) && ipa_in_net(a->prefix, pc->prefix, pc->pxlen))
+    if (net_in_net_ip6(px, &pc->prefix))
       return pc;
 
   return &default_prefix;
@@ -128,7 +126,12 @@ radv_prepare_prefixes(struct radv_iface *ifa)
   struct ifa *addr;
   WALK_LIST(addr, ifa->iface->addrs)
   {
-    struct radv_prefix_config *pc = radv_prefix_match(ifa, addr);
+    if ((addr->prefix.type != NET_IP6) ||
+	(addr->scope <= SCOPE_LINK))
+      continue;
+
+    net_addr_ip6 *prefix = (void *) &addr->prefix;
+    struct radv_prefix_config *pc = radv_prefix_match(ifa, prefix);
 
     if (!pc || pc->skip)
       continue;
@@ -136,7 +139,7 @@ radv_prepare_prefixes(struct radv_iface *ifa)
     /* Do we have it already? */
     struct radv_prefix *existing = NULL;
     WALK_LIST(pfx, ifa->prefixes)
-      if ((pfx->len == addr->pxlen) && ipa_equal(pfx->prefix, addr->prefix))
+      if (net_equal_ip6(&pfx->prefix, prefix))
       {
 	existing = pfx;
 	break;
@@ -144,12 +147,11 @@ radv_prepare_prefixes(struct radv_iface *ifa)
 
     if (!existing)
     {
-      RADV_TRACE(D_EVENTS, "Adding new prefix %I/%d on %s",
-		 addr->prefix, addr->pxlen, ifa->iface->name);
+      RADV_TRACE(D_EVENTS, "Adding new prefix %N on %s",
+		 prefix, ifa->iface->name);
 
       existing = mb_allocz(ifa->pool, sizeof *existing);
-      existing->prefix = addr->prefix;
-      existing->len = addr->pxlen;
+      net_copy_ip6(&existing->prefix, prefix);
       add_tail(&ifa->prefixes, NODE existing);
     }
 
@@ -167,15 +169,16 @@ radv_prepare_prefixes(struct radv_iface *ifa)
    * dropped just yet). If something is dead and rots there for long enough,
    * clean it up.
    */
-  bird_clock_t expires = now + cf->linger_time;
-  bird_clock_t expires_min = 0;
+  btime now_ = current_time();
+  btime expires = now_ + cf->linger_time S;
+  btime expires_min = 0;
   struct radv_prefix *next;
   WALK_LIST_DELSAFE(pfx, next, ifa->prefixes)
   {
     if (pfx->alive && !pfx->mark)
     {
-      RADV_TRACE(D_EVENTS, "Marking prefix %I/$d on %s as dead",
-		 pfx->prefix, pfx->len, ifa->iface->name);
+      RADV_TRACE(D_EVENTS, "Marking prefix %N on %s as dead",
+		 pfx->prefix, ifa->iface->name);
 
       pfx->alive = 0;
       pfx->expires = expires;
@@ -184,10 +187,10 @@ radv_prepare_prefixes(struct radv_iface *ifa)
 
     if (!pfx->alive)
     {
-      if (pfx->expires <= now)
+      if (pfx->expires <= now_)
       {
-	RADV_TRACE(D_EVENTS, "Removing prefix %I/%d on %s",
-		   pfx->prefix, pfx->len, ifa->iface->name);
+	RADV_TRACE(D_EVENTS, "Removing prefix %N on %s",
+		   pfx->prefix, ifa->iface->name);
 
 	rem_node(NODE pfx);
 	mb_free(pfx);
@@ -232,13 +235,8 @@ radv_iface_notify(struct radv_iface *ifa, int event)
   radv_prepare_prefixes(ifa);
 
   /* Update timer */
-  unsigned delta = now - ifa->last;
-  unsigned after = 0;
-
-  if (delta < ifa->cf->min_delay)
-    after = ifa->cf->min_delay - delta;
-
-  tm_start(ifa->timer, after);
+  btime t = ifa->last + ifa->cf->min_delay S - current_time();
+  tm_start(ifa->timer, t);
 }
 
 static void
@@ -278,17 +276,6 @@ radv_iface_add(struct object_lock *lock)
   radv_iface_notify(ifa, RA_EV_INIT);
 }
 
-static inline struct ifa *
-find_lladdr(struct iface *iface)
-{
-  struct ifa *a;
-  WALK_LIST(a, iface->addrs)
-    if (a->scope == SCOPE_LINK)
-      return a;
-
-  return NULL;
-}
-
 static void
 radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_config *cf)
 {
@@ -302,23 +289,12 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf
   ifa->ra = p;
   ifa->cf = cf;
   ifa->iface = iface;
+  ifa->addr = iface->llv6;
   init_list(&ifa->prefixes);
 
   add_tail(&p->iface_list, NODE ifa);
 
-  ifa->addr = find_lladdr(iface);
-  if (!ifa->addr)
-  {
-    log(L_ERR "%s: Missing link-local address on interface %s", p->p.name, iface->name);
-    return;
-  }
-
-  timer *tm = tm_new(pool);
-  tm->hook = radv_timer;
-  tm->data = ifa;
-  tm->randomize = 0;
-  tm->recurrent = 0;
-  ifa->timer = tm;
+  ifa->timer = tm_new_init(pool, radv_timer, ifa, 0, 0);
 
   struct object_lock *lock = olock_new(pool);
   lock->addr = IPA_NONE;
@@ -354,8 +330,15 @@ radv_if_notify(struct proto *P, unsigned flags, struct iface *iface)
 
   if (flags & IF_CHANGE_UP)
   {
-    struct radv_iface_config *ic = (struct radv_iface_config *)
-      iface_patt_find(&cf->patt_list, iface, NULL);
+    struct radv_iface_config *ic = (void *) iface_patt_find(&cf->patt_list, iface, NULL);
+
+    /* Ignore non-multicast ifaces */
+    if (!(iface->flags & IF_MULTICAST))
+      return;
+
+    /* Ignore ifaces without link-local address */
+    if (!iface->llv6)
+      return;
 
     if (ic)
       radv_iface_new(p, iface, ic);
@@ -394,11 +377,16 @@ radv_ifa_notify(struct proto *P, unsigned flags UNUSED, struct ifa *a)
     radv_iface_notify(ifa, RA_EV_CHANGE);
 }
 
-static inline int radv_net_match_trigger(struct radv_config *cf, net *n)
+static inline int
+radv_trigger_valid(struct radv_config *cf)
+{
+  return cf->trigger.type != 0;
+}
+
+static inline int
+radv_net_match_trigger(struct radv_config *cf, net *n)
 {
-  return cf->trigger_valid &&
-    (n->n.pxlen == cf->trigger_pxlen) &&
-    ipa_equal(n->n.prefix, cf->trigger_prefix);
+  return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger);
 }
 
 int
@@ -414,7 +402,7 @@ radv_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct l
 }
 
 static void
-radv_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
+radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
 {
   struct radv_proto *p = (struct radv_proto *) P;
   struct radv_config *cf = (struct radv_config *) (P->cf);
@@ -441,19 +429,30 @@ radv_check_active(struct radv_proto *p)
 {
   struct radv_config *cf = (struct radv_config *) (p->p.cf);
 
-  if (! cf->trigger_valid)
+  if (!radv_trigger_valid(cf))
     return 1;
 
-  return rt_examine(p->p.table, cf->trigger_prefix, cf->trigger_pxlen,
-		    &(p->p), p->p.cf->out_filter);
+  struct channel *c = p->p.main_channel;
+  return rt_examine(c->table, &cf->trigger, &p->p, c->out_filter);
+}
+
+static void
+radv_postconfig(struct proto_config *CF)
+{
+  // struct radv_config *cf = (void *) CF;
+
+  /* Define default channel */
+  if (EMPTY_LIST(CF->channels))
+    channel_config_new(NULL, NET_IP6, CF);
 }
 
 static struct proto *
-radv_init(struct proto_config *c)
+radv_init(struct proto_config *CF)
 {
-  struct proto *P = proto_new(c, sizeof(struct radv_proto));
+  struct proto *P = proto_new(CF);
+
+  P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
 
-  P->accept_ra_types = RA_OPTIMAL;
   P->import_control = radv_import_control;
   P->rt_notify = radv_rt_notify;
   P->if_notify = radv_if_notify;
@@ -469,7 +468,7 @@ radv_start(struct proto *P)
   struct radv_config *cf = (struct radv_config *) (P->cf);
 
   init_list(&(p->iface_list));
-  p->active = !cf->trigger_valid;
+  p->active = !radv_trigger_valid(cf);
 
   return PS_UP;
 }
@@ -494,11 +493,11 @@ radv_shutdown(struct proto *P)
 }
 
 static int
-radv_reconfigure(struct proto *P, struct proto_config *c)
+radv_reconfigure(struct proto *P, struct proto_config *CF)
 {
   struct radv_proto *p = (struct radv_proto *) P;
   // struct radv_config *old = (struct radv_config *) (p->cf);
-  struct radv_config *new = (struct radv_config *) c;
+  struct radv_config *new = (struct radv_config *) CF;
 
   /*
    * The question is why there is a reconfigure function for RAdv if
@@ -508,12 +507,26 @@ radv_reconfigure(struct proto *P, struct proto_config *c)
    * causing nodes to temporary remove their default routes.
    */
 
-  P->cf = c; /* radv_check_active() requires proper P->cf */
+  if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
+    return 0;
+
+  P->cf = CF; /* radv_check_active() requires proper P->cf */
   p->active = radv_check_active(p);
 
   struct iface *iface;
   WALK_LIST(iface, iface_list)
   {
+    if (!(iface->flags & IF_UP))
+      continue;
+
+    /* Ignore non-multicast ifaces */
+    if (!(iface->flags & IF_MULTICAST))
+      continue;
+
+    /* Ignore ifaces without link-local address */
+    if (!iface->llv6)
+      continue;
+
     struct radv_iface *ifa = radv_iface_find(p, iface);
     struct radv_iface_config *ic = (struct radv_iface_config *)
       iface_patt_find(&new->patt_list, iface, NULL);
@@ -565,7 +578,10 @@ radv_get_status(struct proto *P, byte *buf)
 struct protocol proto_radv = {
   .name =		"RAdv",
   .template =		"radv%d",
+  .channel_mask =	NB_IP6,
+  .proto_size =		sizeof(struct radv_proto),
   .config_size =	sizeof(struct radv_config),
+  .postconfig =		radv_postconfig,
   .init =		radv_init,
   .start =		radv_start,
   .shutdown =		radv_shutdown,
diff --git a/proto/radv/radv.h b/proto/radv/radv.h
index 60b9980f..4672e3b2 100644
--- a/proto/radv/radv.h
+++ b/proto/radv/radv.h
@@ -30,7 +30,7 @@
 #define ICMPV6_RA 134
 
 #define MAX_INITIAL_RTR_ADVERTISEMENTS 3
-#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16
+#define MAX_INITIAL_RTR_ADVERT_INTERVAL (16 S_)
 
 #define DEFAULT_MAX_RA_INT 600
 #define DEFAULT_MIN_DELAY 3
@@ -51,9 +51,7 @@ struct radv_config
   list rdnss_list;		/* Global list of RDNSS configs (struct radv_rdnss_config) */
   list dnssl_list;		/* Global list of DNSSL configs (struct radv_dnssl_config) */
 
-  ip_addr trigger_prefix;	/* Prefix of a trigger route, if defined */
-  u8 trigger_pxlen;		/* Pxlen of a trigger route, if defined */
-  u8 trigger_valid;		/* Whether a trigger route is defined */
+  net_addr trigger;		/* Prefix of a trigger route, if defined */
 };
 
 struct radv_iface_config
@@ -87,8 +85,7 @@ struct radv_iface_config
 struct radv_prefix_config
 {
   node n;
-  ip_addr prefix;
-  uint pxlen;
+  net_addr_ip6 prefix;
 
   u8 skip;			/* Do not include this prefix to RA */
   u8 onlink;			/* Standard options from RFC 4861 */
@@ -104,7 +101,7 @@ struct radv_rdnss_config
   node n;
   u32 lifetime;			/* Valid if lifetime_mult is 0 */
   u16 lifetime_mult;		/* Lifetime specified as multiple of max_ra_int */
-  ip_addr server;		/* IP address of recursive DNS server */
+  ip6_addr server;		/* IP address of recursive DNS server */
 };
 
 struct radv_dnssl_config
@@ -128,12 +125,12 @@ struct radv_proto
 struct radv_prefix		/* One prefix we advertise */
 {
   node n;
-  ip_addr prefix;
-  u8 len;
+  net_addr_ip6 prefix;
+
   u8 alive;			/* Is the prefix alive? If not, we advertise it
 				   with 0 lifetime, so clients stop using it */
   u8 mark;			/* A temporary mark for processing */
-  bird_clock_t expires;		/* The time when we drop this prefix from
+  btime expires;		/* The time when we drop this prefix from
 				   advertising. It is valid only if !alive. */
   struct radv_prefix_config *cf; /* The config tied to this prefix */
 };
@@ -147,13 +144,13 @@ struct radv_iface
   struct ifa *addr;		/* Link-local address of iface */
   struct pool *pool;		/* A pool for interface-specific things */
   list prefixes;		/* The prefixes we advertise (struct radv_prefix) */
-  bird_clock_t prefix_expires;	/* When the soonest prefix expires (0 = none dead) */
+  btime prefix_expires;		/* When the soonest prefix expires (0 = none dead) */
 
   timer *timer;
   struct object_lock *lock;
   sock *sk;
 
-  bird_clock_t last;		/* Time of last sending of RA */
+  btime last;			/* Time of last sending of RA */
   u16 plen;			/* Length of prepared RA in tbuf, or 0 if not valid */
   byte initial;			/* How many RAs are still to be sent as initial */
 };
diff --git a/proto/rip/Makefile b/proto/rip/Makefile
index d2d3c987..7feabcd8 100644
--- a/proto/rip/Makefile
+++ b/proto/rip/Makefile
@@ -1,5 +1,6 @@
-source=rip.c packets.c
-root-rel=../../
-dir-name=proto/rip
+src := packets.c rip.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index 4ec45c7a..e3bc4ae3 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -32,34 +32,40 @@ rip_check_auth(void)
 
 CF_DECLS
 
-CF_KEYWORDS(RIP, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT,
+CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT,
 	    GARBAGE, PORT, ADDRESS, MODE, BROADCAST, MULTICAST, PASSIVE,
 	    VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD,
 	    AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY,
 	    RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, RIP_METRIC, RIP_TAG)
 
-%type <i> rip_auth
+%type <i> rip_variant rip_auth
 
 CF_GRAMMAR
 
 CF_ADDTO(proto, rip_proto)
 
-rip_proto_start: proto_start RIP
+rip_variant:
+   RIP    { $$ = 1; }
+ | RIP NG { $$ = 0; }
+ ;
+
+rip_proto_start: proto_start rip_variant
 {
   this_proto = proto_config_new(&proto_rip, $1);
-  init_list(&RIP_CFG->patt_list);
+  this_proto->net_type = $2 ? NET_IP4 : NET_IP6;
 
-  RIP_CFG->rip2 = RIP_IS_V2;
+  init_list(&RIP_CFG->patt_list);
+  RIP_CFG->rip2 = $2;
   RIP_CFG->infinity = RIP_DEFAULT_INFINITY;
-
-  RIP_CFG->min_timeout_time = 60;
-  RIP_CFG->max_garbage_time = 60;
+  RIP_CFG->min_timeout_time = 60 S_;
+  RIP_CFG->max_garbage_time = 60 S_;
 };
 
 rip_proto_item:
    proto_item
+ | proto_channel
  | ECMP bool		{ RIP_CFG->ecmp = $2 ? RIP_DEFAULT_ECMP_LIMIT : 0; }
- | ECMP bool LIMIT expr	{ RIP_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); }
+ | ECMP bool LIMIT expr	{ RIP_CFG->ecmp = $2 ? $4 : 0; }
  | INFINITY expr	{ RIP_CFG->infinity = $2; }
  | INTERFACE rip_iface
  ;
@@ -131,7 +137,7 @@ rip_iface_item:
  | MODE MULTICAST	{ RIP_IFACE->mode = RIP_IM_MULTICAST; }
  | MODE BROADCAST	{ RIP_IFACE->mode = RIP_IM_BROADCAST; if (rip_cfg_is_ng()) cf_error("Broadcast not supported in RIPng"); }
  | PASSIVE bool		{ RIP_IFACE->passive = $2; }
- | ADDRESS ipa		{ RIP_IFACE->address = $2; }
+ | ADDRESS ipa		{ RIP_IFACE->address = $2; if (ipa_is_ip4($2) != rip_cfg_is_v2()) cf_error("IP address version mismatch"); }
  | PORT expr		{ RIP_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); }
  | VERSION expr		{ RIP_IFACE->version = $2;
 			  if (rip_cfg_is_ng()) cf_error("Version not supported in RIPng");
@@ -141,9 +147,9 @@ rip_iface_item:
  | SPLIT HORIZON bool	{ RIP_IFACE->split_horizon = $3; }
  | POISON REVERSE bool	{ RIP_IFACE->poison_reverse = $3; }
  | CHECK ZERO bool	{ RIP_IFACE->check_zero = $3; }
- | UPDATE TIME expr	{ RIP_IFACE->update_time = $3; if ($3<=0) cf_error("Update time must be positive"); }
- | TIMEOUT TIME expr	{ RIP_IFACE->timeout_time = $3; if ($3<=0) cf_error("Timeout time must be positive"); }
- | GARBAGE TIME expr	{ RIP_IFACE->garbage_time = $3; if ($3<=0) cf_error("Garbage time must be positive"); }
+ | UPDATE TIME expr	{ RIP_IFACE->update_time = $3 S_; if ($3<=0) cf_error("Update time must be positive"); }
+ | TIMEOUT TIME expr	{ RIP_IFACE->timeout_time = $3 S_; if ($3<=0) cf_error("Timeout time must be positive"); }
+ | GARBAGE TIME expr	{ RIP_IFACE->garbage_time = $3 S_; if ($3<=0) cf_error("Garbage time must be positive"); }
  | ECMP WEIGHT expr	{ RIP_IFACE->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); }
  | RX BUFFER expr	{ RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX length must be in range 256-65535"); }
  | TX LENGTH expr	{ RIP_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); }
diff --git a/proto/rip/packets.c b/proto/rip/packets.c
index 468927e6..4925ca36 100644
--- a/proto/rip/packets.c
+++ b/proto/rip/packets.c
@@ -9,6 +9,8 @@
  *	Can be freely distributed and used under the terms of the GNU GPL.
  */
 
+#undef LOCAL_DEBUG
+
 #include "rip.h"
 #include "lib/mac.h"
 
@@ -76,8 +78,7 @@ struct rip_auth_tail
 /* Internal representation of RTE block data */
 struct rip_block
 {
-  ip_addr prefix;
-  int pxlen;
+  net_addr net;
   u32 metric;
   u16 tag;
   u16 no_af;
@@ -106,30 +107,30 @@ static inline uint rip_pkt_hdrlen(struct rip_iface *ifa)
 { return sizeof(struct rip_packet) + (ifa->cf->auth_type ? RIP_BLOCK_LENGTH : 0); }
 
 static inline void
-rip_put_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *rte)
+rip_put_block(struct rip_proto *p, byte *pos, struct rip_block *rte)
 {
   if (rip_is_v2(p))
   {
     struct rip_block_v2 *block = (void *) pos;
     block->family = rte->no_af ? 0 : htons(RIP_AF_IPV4);
     block->tag = htons(rte->tag);
-    block->network = ip4_hton(ipa_to_ip4(rte->prefix));
-    block->netmask = ip4_hton(ip4_mkmask(rte->pxlen));
+    block->network = ip4_hton(net4_prefix(&rte->net));
+    block->netmask = ip4_hton(ip4_mkmask(net4_pxlen(&rte->net)));
     block->next_hop = ip4_hton(ipa_to_ip4(rte->next_hop));
     block->metric = htonl(rte->metric);
   }
   else /* RIPng */
   {
     struct rip_block_ng *block = (void *) pos;
-    block->prefix = ip6_hton(ipa_to_ip6(rte->prefix));
+    block->prefix = ip6_hton(net6_prefix(&rte->net));
     block->tag = htons(rte->tag);
-    block->pxlen = rte->pxlen;
+    block->pxlen = net6_pxlen(&rte->net);
     block->metric = rte->metric;
   }
 }
 
 static inline void
-rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte UNUSED4)
+rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte)
 {
   struct rip_block_ng *block = (void *) pos;
   block->prefix = ip6_hton(ipa_to_ip6(rte->next_hop));
@@ -139,7 +140,7 @@ rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte UN
 }
 
 static inline int
-rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *rte)
+rip_get_block(struct rip_proto *p, byte *pos, struct rip_block *rte)
 {
   if (rip_is_v2(p))
   {
@@ -149,8 +150,8 @@ rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *
     if (block->family != (rte->no_af ? 0 : htons(RIP_AF_IPV4)))
       return 0;
 
-    rte->prefix = ipa_from_ip4(ip4_ntoh(block->network));
-    rte->pxlen = ip4_masklen(ip4_ntoh(block->netmask));
+    uint pxlen = ip4_masklen(ip4_ntoh(block->netmask));
+    net_fill_ip4(&rte->net, ip4_ntoh(block->network), pxlen);
     rte->metric = ntohl(block->metric);
     rte->tag = ntohs(block->tag);
     rte->next_hop = ipa_from_ip4(ip4_ntoh(block->next_hop));
@@ -169,8 +170,8 @@ rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *
       return 0;
     }
 
-    rte->prefix = ipa_from_ip6(ip6_ntoh(block->prefix));
-    rte->pxlen = block->pxlen;
+    uint pxlen = (block->pxlen <= IP6_MAX_PREFIX_LENGTH) ? block->pxlen : 255;
+    net_fill_ip6(&rte->net, ip6_ntoh(block->prefix), pxlen);
     rte->metric = block->metric;
     rte->tag = ntohs(block->tag);
     /* rte->next_hop is deliberately kept unmodified */;
@@ -188,7 +189,10 @@ rip_update_csn(struct rip_proto *p UNUSED, struct rip_iface *ifa)
    * have the same CSN. We are using real time, but enforcing monotonicity.
    */
   if (ifa->cf->auth_type == RIP_AUTH_CRYPTO)
-    ifa->csn = (ifa->csn < (u32) now_real) ? (u32) now_real : ifa->csn + 1;
+  {
+    u32 now_real = (u32) (current_real_time() TO_S);
+    ifa->csn = (ifa->csn < now_real) ? now_real : ifa->csn + 1;
+  }
 }
 
 static void
@@ -406,8 +410,9 @@ rip_receive_request(struct rip_proto *p, struct rip_iface *ifa, struct rip_packe
   if (!rip_get_block(p, pos, &b))
     return;
 
-  /* Special case - zero prefix, infinity metric */
-  if (ipa_nonzero(b.prefix) || b.pxlen || (b.metric != p->infinity))
+  /* Special case - infinity metric, for RIPng also zero prefix */
+  if ((b.metric != p->infinity) ||
+      (rip_is_ng(p) && !net_zero_ip6((net_addr_ip6 *) &b.net)))
     return;
 
   /* We do nothing if TX is already active */
@@ -432,6 +437,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
   byte *max = rip_tx_buffer(ifa) + ifa->tx_plen -
     (rip_is_v2(p) ? RIP_BLOCK_LENGTH : 2*RIP_BLOCK_LENGTH);
   ip_addr last_next_hop = IPA_NONE;
+  btime now_ = current_time();
   int send = 0;
 
   struct rip_packet *pkt = (void *) pos;
@@ -440,17 +446,15 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
   pkt->unused = 0;
   pos += rip_pkt_hdrlen(ifa);
 
-  FIB_ITERATE_START(&p->rtable, &ifa->tx_fit, z)
+  FIB_ITERATE_START(&p->rtable, &ifa->tx_fit, struct rip_entry, en)
   {
-    struct rip_entry *en = (struct rip_entry *) z;
-
     /* Dummy entries */
     if (!en->valid)
       goto next_entry;
 
     /* Stale entries that should be removed */
     if ((en->valid == RIP_ENTRY_STALE) &&
-	((en->changed + ifa->cf->garbage_time) <= now))
+	((en->changed + ifa->cf->garbage_time) <= now_))
       goto next_entry;
 
     /* Triggered updates */
@@ -460,28 +464,28 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
     /* Not enough space for current entry */
     if (pos > max)
     {
-      FIB_ITERATE_PUT(&ifa->tx_fit, z);
+      FIB_ITERATE_PUT(&ifa->tx_fit);
       goto break_loop;
     }
 
     struct rip_block rte = {
-      .prefix = en->n.prefix,
-      .pxlen = en->n.pxlen,
       .metric = en->metric,
       .tag = en->tag
     };
 
+    net_copy(&rte.net, en->n.addr);
+
     if (en->iface == ifa->iface)
       rte.next_hop = en->next_hop;
 
     if (rip_is_v2(p) && (ifa->cf->version == RIP_V1))
     {
       /* Skipping subnets (i.e. not hosts, classful networks or default route) */
-      if (ip4_masklen(ip4_class_mask(ipa_to_ip4(en->n.prefix))) != en->n.pxlen)
+      if (ip4_masklen(ip4_class_mask(net4_prefix(&rte.net))) != rte.net.pxlen)
 	goto next_entry;
 
       rte.tag = 0;
-      rte.pxlen = 0;
+      rte.net.pxlen = 0;
       rte.next_hop = IPA_NONE;
     }
 
@@ -497,7 +501,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
 	goto next_entry;
     }
 
-    // TRACE(D_PACKETS, "    %I/%d -> %I metric %d", rte.prefix, rte.pxlen, rte.next_hop, rte.metric);
+    // TRACE(D_PACKETS, "    %N -> %I metric %d", &rte.net, rte.next_hop, rte.metric);
 
     /* RIPng next hop entry */
     if (rip_is_ng(p) && !ipa_equal(rte.next_hop, last_next_hop))
@@ -513,7 +517,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
 
   next_entry: ;
   }
-  FIB_ITERATE_END(z);
+  FIB_ITERATE_END;
   ifa->tx_active = 0;
 
   /* Do not send empty packet */
@@ -540,9 +544,9 @@ break_loop:
  * activating the new one.
  */
 void
-rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, bird_clock_t changed)
+rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, btime changed)
 {
-  DBG("RIP: Opening TX session to %I on %s\n", dst, ifa->iface->name);
+  DBG("RIP: Opening TX session to %I on %s\n", addr, ifa->iface->name);
 
   rip_reset_tx_session(p, ifa);
 
@@ -591,6 +595,7 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
 
   byte *pos = (byte *) pkt + sizeof(struct rip_packet);
   byte *end = (byte *) pkt + plen;
+  btime now_ = current_time();
 
   for (; pos < end; pos += RIP_BLOCK_LENGTH)
   {
@@ -598,23 +603,25 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
     if (!rip_get_block(p, pos, &rte))
       continue;
 
-    int c = ipa_classify_net(rte.prefix);
-    if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
-      SKIP("invalid prefix");
-
     if (rip_is_v2(p) && (pkt->version == RIP_V1))
     {
-      if (ifa->cf->check_zero && (rte.tag || rte.pxlen || ipa_nonzero(rte.next_hop)))
+      if (ifa->cf->check_zero && (rte.tag || rte.net.pxlen || ipa_nonzero(rte.next_hop)))
 	SKIP("RIPv1 reserved field is nonzero");
 
       rte.tag = 0;
-      rte.pxlen = ip4_masklen(ip4_class_mask(ipa_to_ip4(rte.prefix)));
+      rte.net.pxlen = ip4_masklen(ip4_class_mask(net4_prefix(&rte.net)));
       rte.next_hop = IPA_NONE;
     }
 
-    if ((rte.pxlen < 0) || (rte.pxlen > MAX_PREFIX_LENGTH))
+    if (rte.net.pxlen == 255)
       SKIP("invalid prefix length");
 
+    net_normalize(&rte.net);
+
+    int c = net_classify(&rte.net);
+    if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
+      SKIP("invalid prefix");
+
     if (rte.metric > p->infinity)
       SKIP("invalid metric");
 
@@ -625,7 +632,7 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
 	rte.next_hop = IPA_NONE;
     }
 
-    // TRACE(D_PACKETS, "    %I/%d -> %I metric %d", rte.prefix, rte.pxlen, rte.next_hop, rte.metric);
+    // TRACE(D_PACKETS, "    %N -> %I metric %d", &rte.net.n, rte.next_hop, rte.metric);
 
     rte.metric += ifa->cf->metric;
 
@@ -636,19 +643,19 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
 	.next_hop = ipa_nonzero(rte.next_hop) ? rte.next_hop : from->nbr->addr,
 	.metric = rte.metric,
 	.tag = rte.tag,
-	.expires = now + ifa->cf->timeout_time
+	.expires = now_ + ifa->cf->timeout_time
       };
 
-      rip_update_rte(p, &rte.prefix, rte.pxlen, &new);
+      rip_update_rte(p, &rte.net, &new);
     }
     else
-      rip_withdraw_rte(p, &rte.prefix, rte.pxlen, from);
+      rip_withdraw_rte(p, &rte.net, from);
 
     continue;
 
   skip:
-    LOG_RTE("Ignoring route %I/%d received from %I - %s",
-	    rte.prefix, rte.pxlen, from->nbr->addr, err_dsc);
+    LOG_RTE("Ignoring route %N received from %I - %s",
+	    &rte.net, from->nbr->addr, err_dsc);
   }
 }
 
@@ -667,8 +674,7 @@ rip_rx_hook(sock *sk, uint len)
       sk->iface->name, sk->faddr, sk->laddr);
 
   /* Silently ignore my own packets */
-  /* FIXME: Better local address check */
-  if (ipa_equal(ifa->iface->addr->ip, sk->faddr))
+  if (ipa_equal(sk->faddr, sk->saddr))
     return 1;
 
   if (rip_is_ng(p) && !ipa_is_link_local(sk->faddr))
@@ -704,7 +710,7 @@ rip_rx_hook(sock *sk, uint len)
   if ((plen - sizeof(struct rip_packet)) % RIP_BLOCK_LENGTH)
     DROP("invalid length", plen);
 
-  n->last_seen = now;
+  n->last_seen = current_time();
   rip_update_bfd(p, n);
 
   switch (pkt->command)
@@ -736,17 +742,11 @@ rip_open_socket(struct rip_iface *ifa)
 
   sock *sk = sk_new(p->p.pool);
   sk->type = SK_UDP;
+  sk->subtype = rip_is_v2(p) ? SK_IPV4 : SK_IPV6;
   sk->sport = ifa->cf->port;
   sk->dport = ifa->cf->port;
   sk->iface = ifa->iface;
-
-  /*
-   * For RIPv2, we explicitly choose a primary address, mainly to ensure that
-   * RIP and BFD uses the same one. For RIPng, we left it to kernel, which
-   * should choose some link-local address based on the same scope rule.
-   */
-  if (rip_is_v2(p))
-    sk->saddr = ifa->iface->addr->ip;
+  sk->saddr = rip_is_v2(p) ? ifa->iface->addr4->ip : ifa->iface->llv6->ip;
 
   sk->rx_hook = rip_rx_hook;
   sk->tx_hook = rip_tx_hook;
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 7b380097..a3eeaf17 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -92,15 +92,6 @@ static void rip_trigger_update(struct rip_proto *p);
  *	RIP routes
  */
 
-static void
-rip_init_entry(struct fib_node *fn)
-{
-  // struct rip_entry *en = (void) *fn;
-
-  const uint offset = OFFSETOF(struct rip_entry, routes);
-  memset((byte *)fn + offset, 0, sizeof(struct rip_entry) - offset);
-}
-
 static struct rip_rte *
 rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src)
 {
@@ -152,27 +143,20 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
   if (rt)
   {
     /* Update */
-    net *n = net_get(p->p.table, en->n.prefix, en->n.pxlen);
-
     rta a0 = {
       .src = p->p.main_source,
       .source = RTS_RIP,
       .scope = SCOPE_UNIVERSE,
-      .cast = RTC_UNICAST
+      .dest = RTD_UNICAST,
     };
 
     u8 rt_metric = rt->metric;
     u16 rt_tag = rt->tag;
-    struct rip_rte *rt2 = rt->next;
 
-    /* Find second valid rte */
-    while (rt2 && !rip_valid_rte(rt2))
-      rt2 = rt2->next;
-
-    if (p->ecmp && rt2)
+    if (p->ecmp)
     {
       /* ECMP route */
-      struct mpnh *nhs = NULL;
+      struct nexthop *nhs = NULL;
       int num = 0;
 
       for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next)
@@ -180,54 +164,51 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
 	if (!rip_valid_rte(rt))
 	    continue;
 
-	struct mpnh *nh = alloca(sizeof(struct mpnh));
+	struct nexthop *nh = allocz(sizeof(struct nexthop));
+
 	nh->gw = rt->next_hop;
 	nh->iface = rt->from->nbr->iface;
 	nh->weight = rt->from->ifa->cf->ecmp_weight;
-	mpnh_insert(&nhs, nh);
+
+	nexthop_insert(&nhs, nh);
 	num++;
 
 	if (rt->tag != rt_tag)
 	  rt_tag = 0;
       }
 
-      a0.dest = RTD_MULTIPATH;
-      a0.nexthops = nhs;
+      a0.nh = *nhs;
     }
     else
     {
       /* Unipath route */
-      a0.dest = RTD_ROUTER;
-      a0.gw = rt->next_hop;
-      a0.iface = rt->from->nbr->iface;
       a0.from = rt->from->nbr->addr;
+      a0.nh.gw = rt->next_hop;
+      a0.nh.iface = rt->from->nbr->iface;
     }
 
     rta *a = rta_lookup(&a0);
     rte *e = rte_get_temp(a);
 
-    e->u.rip.from = a0.iface;
+    e->u.rip.from = a0.nh.iface;
     e->u.rip.metric = rt_metric;
     e->u.rip.tag = rt_tag;
 
-    e->net = n;
     e->pflags = 0;
 
-    rte_update(&p->p, n, e);
+    rte_update(&p->p, en->n.addr, e);
   }
   else
   {
     /* Withdraw */
-    net *n = net_find(p->p.table, en->n.prefix, en->n.pxlen);
-    rte_update(&p->p, n, NULL);
+    rte_update(&p->p, en->n.addr, NULL);
   }
 }
 
 /**
  * rip_update_rte - enter a route update to RIP routing table
  * @p: RIP instance
- * @prefix: network prefix
- * @pxlen: network prefix length
+ * @addr: network address
  * @new: a &rip_rte representing the new route
  *
  * The function is called by the RIP packet processing code whenever it receives
@@ -237,9 +218,9 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
  * rip_withdraw_rte() should be called instead of rip_update_rte().
  */
 void
-rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *new)
+rip_update_rte(struct rip_proto *p, net_addr *n, struct rip_rte *new)
 {
-  struct rip_entry *en = fib_get(&p->rtable, prefix, pxlen);
+  struct rip_entry *en = fib_get(&p->rtable, n);
   struct rip_rte *rt, **rp;
   int changed = 0;
 
@@ -279,8 +260,7 @@ rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *
 /**
  * rip_withdraw_rte - enter a route withdraw to RIP routing table
  * @p: RIP instance
- * @prefix: network prefix
- * @pxlen: network prefix length
+ * @addr: network address
  * @from: a &rip_neighbor propagating the withdraw
  *
  * The function is called by the RIP packet processing code whenever it receives
@@ -288,9 +268,9 @@ rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *
  * removed. Eventually, the change is also propagated by rip_announce_rte().
  */
 void
-rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_neighbor *from)
+rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from)
 {
-  struct rip_entry *en = fib_find(&p->rtable, prefix, pxlen);
+  struct rip_entry *en = fib_find(&p->rtable, n);
   struct rip_rte *rt, **rp;
 
   if (!en)
@@ -317,7 +297,7 @@ rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_nei
  * it into our data structures.
  */
 static void
-rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, struct rte *new,
+rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new,
 	      struct rte *old UNUSED, struct ea_list *attrs)
 {
   struct rip_proto *p = (struct rip_proto *) P;
@@ -332,15 +312,15 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
 
     if (rt_metric > p->infinity)
     {
-      log(L_WARN "%s: Invalid rip_metric value %u for route %I/%d",
-	  p->p.name, rt_metric, net->n.prefix, net->n.pxlen);
+      log(L_WARN "%s: Invalid rip_metric value %u for route %N",
+	  p->p.name, rt_metric, net->n.addr);
       rt_metric = p->infinity;
     }
 
     if (rt_tag > 0xffff)
     {
-      log(L_WARN "%s: Invalid rip_tag value %u for route %I/%d",
-	  p->p.name, rt_tag, net->n.prefix, net->n.pxlen);
+      log(L_WARN "%s: Invalid rip_tag value %u for route %N",
+	  p->p.name, rt_tag, net->n.addr);
       rt_metric = p->infinity;
       rt_tag = 0;
     }
@@ -352,7 +332,7 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
      * collection.
      */
 
-    en = fib_get(&p->rtable, &net->n.prefix, net->n.pxlen);
+    en = fib_get(&p->rtable, net->n.addr);
 
     old_metric = en->valid ? en->metric : -1;
 
@@ -360,13 +340,13 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
     en->metric = rt_metric;
     en->tag = rt_tag;
     en->from = (new->attrs->src->proto == P) ? new->u.rip.from : NULL;
-    en->iface = new->attrs->iface;
-    en->next_hop = new->attrs->gw;
+    en->iface = new->attrs->nh.iface;
+    en->next_hop = new->attrs->nh.gw;
   }
   else
   {
     /* Withdraw */
-    en = fib_find(&p->rtable, &net->n.prefix, net->n.pxlen);
+    en = fib_find(&p->rtable, net->n.addr);
 
     if (!en || en->valid != RIP_ENTRY_VALID)
       return;
@@ -384,7 +364,7 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
   /* Activate triggered updates */
   if (en->metric != old_metric)
   {
-    en->changed = now;
+    en->changed = current_time();
     rip_trigger_update(p);
   }
 }
@@ -526,10 +506,10 @@ rip_iface_start(struct rip_iface *ifa)
 
   TRACE(D_EVENTS, "Starting interface %s", ifa->iface->name);
 
-  ifa->next_regular = now + (random() % ifa->cf->update_time) + 1;
-  ifa->next_triggered = now;	/* Available immediately */
-  ifa->want_triggered = 1;	/* All routes in triggered update */
-  tm_start(ifa->timer, 1);	/* Or 100 ms */
+  ifa->next_regular = current_time() + (random() % ifa->cf->update_time) + 100 MS;
+  ifa->next_triggered = current_time();	/* Available immediately */
+  ifa->want_triggered = 1;		/* All routes in triggered update */
+  tm_start(ifa->timer, 100 MS);
   ifa->up = 1;
 
   if (!ifa->cf->passive)
@@ -650,13 +630,19 @@ rip_add_iface(struct rip_proto *p, struct iface *iface, struct rip_iface_config
   else if (ic->mode == RIP_IM_MULTICAST)
     ifa->addr = rip_is_v2(p) ? IP4_RIP_ROUTERS : IP6_RIP_ROUTERS;
   else /* Broadcast */
-    ifa->addr = iface->addr->brd;
+    ifa->addr = iface->addr4->brd;
+  /*
+   * The above is just a workaround for BSD as it can't send broadcasts
+   * to 255.255.255.255. BSD systems need the network broadcast address instead.
+   *
+   * TODO: move this to sysdep code
+   */
 
   init_list(&ifa->neigh_list);
 
   add_tail(&p->iface_list, NODE ifa);
 
-  ifa->timer = tm_new_set(p->p.pool, rip_iface_timer, ifa, 0, 0);
+  ifa->timer = tm_new_init(p->p.pool, rip_iface_timer, ifa, 0, 0);
 
   struct object_lock *lock = olock_new(p->p.pool);
   lock->type = OBJLOCK_UDP;
@@ -704,8 +690,8 @@ rip_reconfigure_iface(struct rip_proto *p, struct rip_iface *ifa, struct rip_ifa
 
   rip_iface_update_buffers(ifa);
 
-  if (ifa->next_regular > (now + new->update_time))
-    ifa->next_regular = now + (random() % new->update_time) + 1;
+  if (ifa->next_regular > (current_time() + new->update_time))
+    ifa->next_regular = current_time() + (random() % new->update_time) + 100 MS;
 
   if (new->check_link != old->check_link)
     rip_iface_update_state(ifa);
@@ -726,7 +712,11 @@ rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf)
 
   WALK_LIST(iface, iface_list)
   {
-    if (! (iface->flags & IF_UP))
+    if (!(iface->flags & IF_UP))
+      continue;
+
+    /* Ignore ifaces without appropriate address */
+    if (rip_is_v2(p) ? !iface->addr4 : !iface->llv6)
       continue;
 
     struct rip_iface *ifa = rip_find_iface(p, iface);
@@ -764,6 +754,10 @@ rip_if_notify(struct proto *P, unsigned flags, struct iface *iface)
   {
     struct rip_iface_config *ic = (void *) iface_patt_find(&cf->patt_list, iface, NULL);
 
+    /* Ignore ifaces without appropriate address */
+    if (rip_is_v2(p) ? !iface->addr4 : !iface->llv6)
+      return;
+
     if (ic)
       rip_add_iface(p, iface, ic);
 
@@ -822,24 +816,24 @@ rip_timer(timer *t)
   struct rip_iface *ifa;
   struct rip_neighbor *n, *nn;
   struct fib_iterator fit;
-  bird_clock_t next = now + MIN(cf->min_timeout_time, cf->max_garbage_time);
-  bird_clock_t expires = 0;
+  btime now_ = current_time();
+  btime next = now_ + MIN(cf->min_timeout_time, cf->max_garbage_time);
+  btime expires = 0;
 
   TRACE(D_EVENTS, "Main timer fired");
 
   FIB_ITERATE_INIT(&fit, &p->rtable);
 
   loop:
-  FIB_ITERATE_START(&p->rtable, &fit, node)
+  FIB_ITERATE_START(&p->rtable, &fit, struct rip_entry, en)
   {
-    struct rip_entry *en = (struct rip_entry *) node;
     struct rip_rte *rt, **rp;
     int changed = 0;
 
     /* Checking received routes for timeout and for dead neighbors */
     for (rp = &en->routes; rt = *rp; /* rp = &rt->next */)
     {
-      if (!rip_valid_rte(rt) || (rt->expires <= now))
+      if (!rip_valid_rte(rt) || (rt->expires <= now_))
       {
 	rip_remove_rte(p, rp);
 	changed = 1;
@@ -859,7 +853,7 @@ rip_timer(timer *t)
        * rip_rt_notify() -> p->rtable change, invalidating hidden variables.
        */
 
-      FIB_ITERATE_PUT_NEXT(&fit, &p->rtable, node);
+      FIB_ITERATE_PUT_NEXT(&fit, &p->rtable);
       rip_announce_rte(p, en);
       goto loop;
     }
@@ -869,9 +863,9 @@ rip_timer(timer *t)
     {
       expires = en->changed + cf->max_garbage_time;
 
-      if (expires <= now)
+      if (expires <= now_)
       {
-	// TRACE(D_EVENTS, "entry is too old: %I/%d", en->n.prefix, en->n.pxlen);
+	// TRACE(D_EVENTS, "entry is too old: %N", en->n.addr);
 	en->valid = 0;
       }
       else
@@ -881,12 +875,12 @@ rip_timer(timer *t)
     /* Remove empty nodes */
     if (!en->valid && !en->routes)
     {
-      FIB_ITERATE_PUT(&fit, node);
-      fib_delete(&p->rtable, node);
+      FIB_ITERATE_PUT(&fit);
+      fib_delete(&p->rtable, en);
       goto loop;
     }
   }
-  FIB_ITERATE_END(node);
+  FIB_ITERATE_END;
 
   p->rt_reload = 0;
 
@@ -897,20 +891,20 @@ rip_timer(timer *t)
       {
 	expires = n->last_seen + n->ifa->cf->timeout_time;
 
-	if (expires <= now)
+	if (expires <= now_)
 	  rip_remove_neighbor(p, n);
 	else
 	  next = MIN(next, expires);
       }
 
-  tm_start(p->timer, MAX(next - now, 1));
+  tm_start(p->timer, MAX(next - now_, 100 MS));
 }
 
 static inline void
 rip_kick_timer(struct rip_proto *p)
 {
-  if (p->timer->expires > (now + 1))
-    tm_start(p->timer, 1);	/* Or 100 ms */
+  if (p->timer->expires > (current_time() + 100 MS))
+    tm_start(p->timer, 100 MS);
 }
 
 /**
@@ -928,7 +922,8 @@ rip_iface_timer(timer *t)
 {
   struct rip_iface *ifa = t->data;
   struct rip_proto *p = ifa->rip;
-  bird_clock_t period = ifa->cf->update_time;
+  btime now_ = current_time();
+  btime period = ifa->cf->update_time;
 
   if (ifa->cf->passive)
     return;
@@ -937,40 +932,40 @@ rip_iface_timer(timer *t)
 
   if (ifa->tx_active)
   {
-    if (now < (ifa->next_regular + period))
-      { tm_start(ifa->timer, 1); return; }
+    if (now_ < (ifa->next_regular + period))
+    { tm_start(ifa->timer, 100 MS); return; }
 
     /* We are too late, reset is done by rip_send_table() */
     log(L_WARN "%s: Too slow update on %s, resetting", p->p.name, ifa->iface->name);
   }
 
-  if (now >= ifa->next_regular)
+  if (now_ >= ifa->next_regular)
   {
     /* Send regular update, set timer for next period (or following one if necessay) */
     TRACE(D_EVENTS, "Sending regular updates for %s", ifa->iface->name);
     rip_send_table(p, ifa, ifa->addr, 0);
-    ifa->next_regular += period * (1 + ((now - ifa->next_regular) / period));
+    ifa->next_regular += period * (1 + ((now_ - ifa->next_regular) / period));
     ifa->want_triggered = 0;
     p->triggered = 0;
   }
-  else if (ifa->want_triggered && (now >= ifa->next_triggered))
+  else if (ifa->want_triggered && (now_ >= ifa->next_triggered))
   {
     /* Send triggered update, enforce interval between triggered updates */
     TRACE(D_EVENTS, "Sending triggered updates for %s", ifa->iface->name);
     rip_send_table(p, ifa, ifa->addr, ifa->want_triggered);
-    ifa->next_triggered = now + MIN(5, period / 2 + 1);
+    ifa->next_triggered = now_ + MIN(5 S, period / 2);
     ifa->want_triggered = 0;
     p->triggered = 0;
   }
 
-  tm_start(ifa->timer, ifa->want_triggered ? 1 : (ifa->next_regular - now));
+  tm_start(ifa->timer, ifa->want_triggered ? (1 S) : (ifa->next_regular - now_));
 }
 
 static inline void
 rip_iface_kick_timer(struct rip_iface *ifa)
 {
-  if (ifa->timer->expires > (now + 1))
-    tm_start(ifa->timer, 1);	/* Or 100 ms */
+  if (ifa->timer->expires > (current_time() + 100 MS))
+    tm_start(ifa->timer, 100 MS);
 }
 
 static void
@@ -991,7 +986,7 @@ rip_trigger_update(struct rip_proto *p)
       continue;
 
     TRACE(D_EVENTS, "Scheduling triggered updates for %s", ifa->iface->name);
-    ifa->want_triggered = now;
+    ifa->want_triggered = current_time();
     rip_iface_kick_timer(ifa);
   }
 
@@ -1035,19 +1030,17 @@ rip_import_control(struct proto *P UNUSED, struct rte **rt, struct ea_list **att
   return 0;
 }
 
-static int
-rip_reload_routes(struct proto *P)
+static void
+rip_reload_routes(struct channel *C)
 {
-  struct rip_proto *p = (struct rip_proto *) P;
+  struct rip_proto *p = (struct rip_proto *) C->proto;
 
   if (p->rt_reload)
-    return 1;
+    return;
 
   TRACE(D_EVENTS, "Scheduling route reload");
   p->rt_reload = 1;
   rip_kick_timer(p);
-
-  return 1;
 }
 
 static struct ea_list *
@@ -1078,12 +1071,23 @@ rip_rte_same(struct rte *new, struct rte *old)
 }
 
 
+static void
+rip_postconfig(struct proto_config *CF)
+{
+  // struct rip_config *cf = (void *) CF;
+
+  /* Define default channel */
+  if (EMPTY_LIST(CF->channels))
+    channel_config_new(NULL, CF->net_type, CF);
+}
+
 static struct proto *
-rip_init(struct proto_config *cfg)
+rip_init(struct proto_config *CF)
 {
-  struct proto *P = proto_new(cfg, sizeof(struct rip_proto));
+  struct proto *P = proto_new(CF);
+
+  P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
 
-  P->accept_ra_types = RA_OPTIMAL;
   P->if_notify = rip_if_notify;
   P->rt_notify = rip_rt_notify;
   P->neigh_notify = rip_neigh_notify;
@@ -1104,10 +1108,12 @@ rip_start(struct proto *P)
   struct rip_config *cf = (void *) (P->cf);
 
   init_list(&p->iface_list);
-  fib_init(&p->rtable, P->pool, sizeof(struct rip_entry), 0, rip_init_entry);
+  fib_init(&p->rtable, P->pool, cf->rip2 ? NET_IP4 : NET_IP6,
+	   sizeof(struct rip_entry), OFFSETOF(struct rip_entry, n), 0, NULL);
   p->rte_slab = sl_new(P->pool, sizeof(struct rip_rte));
-  p->timer = tm_new_set(P->pool, rip_timer, p, 0, 0);
+  p->timer = tm_new_init(P->pool, rip_timer, p, 0, 0);
 
+  p->rip2 = cf->rip2;
   p->ecmp = cf->ecmp;
   p->infinity = cf->infinity;
   p->triggered = 0;
@@ -1121,18 +1127,24 @@ rip_start(struct proto *P)
 }
 
 static int
-rip_reconfigure(struct proto *P, struct proto_config *c)
+rip_reconfigure(struct proto *P, struct proto_config *CF)
 {
   struct rip_proto *p = (void *) P;
-  struct rip_config *new = (void *) c;
+  struct rip_config *new = (void *) CF;
   // struct rip_config *old = (void *) (P->cf);
 
+  if (new->rip2 != p->rip2)
+    return 0;
+
   if (new->infinity != p->infinity)
     return 0;
 
+  if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
+    return 0;
+
   TRACE(D_EVENTS, "Reconfiguring");
 
-  p->p.cf = c;
+  p->p.cf = CF;
   p->ecmp = new->ecmp;
   rip_reconfigure_ifaces(p, new);
 
@@ -1184,7 +1196,7 @@ rip_show_interfaces(struct proto *P, char *iff)
   }
 
   cli_msg(-1021, "%s:", p->p.name);
-  cli_msg(-1021, "%-10s %-6s %6s %6s %6s",
+  cli_msg(-1021, "%-10s %-6s %6s %6s %7s",
 	  "Interface", "State", "Metric", "Nbrs", "Timer");
 
   WALK_LIST(ifa, p->iface_list)
@@ -1197,8 +1209,9 @@ rip_show_interfaces(struct proto *P, char *iff)
       if (n->last_seen)
 	nbrs++;
 
-    int timer = MAX(ifa->next_regular - now, 0);
-    cli_msg(-1021, "%-10s %-6s %6u %6u %6u",
+    btime now_ = current_time();
+    btime timer = (ifa->next_regular > now_) ? (ifa->next_regular - now_) : 0;
+    cli_msg(-1021, "%-10s %-6s %6u %6u %7t",
 	    ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->metric, nbrs, timer);
   }
 
@@ -1220,7 +1233,7 @@ rip_show_neighbors(struct proto *P, char *iff)
   }
 
   cli_msg(-1022, "%s:", p->p.name);
-  cli_msg(-1022, "%-25s %-10s %6s %6s %6s",
+  cli_msg(-1022, "%-25s %-10s %6s %6s %7s",
 	  "IP address", "Interface", "Metric", "Routes", "Seen");
 
   WALK_LIST(ifa, p->iface_list)
@@ -1233,8 +1246,8 @@ rip_show_neighbors(struct proto *P, char *iff)
       if (!n->last_seen)
 	continue;
 
-      int timer = now - n->last_seen;
-      cli_msg(-1022, "%-25I %-10s %6u %6u %6u",
+      btime timer = current_time() - n->last_seen;
+      cli_msg(-1022, "%-25I %-10s %6u %6u %7t",
 	      n->nbr->addr, ifa->iface->name, ifa->cf->metric, n->uc, timer);
     }
   }
@@ -1250,12 +1263,11 @@ rip_dump(struct proto *P)
   int i;
 
   i = 0;
-  FIB_WALK(&p->rtable, e)
+  FIB_WALK(&p->rtable, struct rip_entry, en)
   {
-    struct rip_entry *en = (struct rip_entry *) e;
-    debug("RIP: entry #%d: %I/%d via %I dev %s valid %d metric %d age %d s\n",
-	  i++, en->n.prefix, en->n.pxlen, en->next_hop, en->iface->name,
-	  en->valid, en->metric, now - en->changed);
+    debug("RIP: entry #%d: %N via %I dev %s valid %d metric %d age %t\n",
+	  i++, en->n.addr, en->next_hop, en->iface->name,
+	  en->valid, en->metric, current_time() - en->changed);
   }
   FIB_WALK_END;
 
@@ -1274,7 +1286,10 @@ struct protocol proto_rip = {
   .template =		"rip%d",
   .attr_class =		EAP_RIP,
   .preference =		DEF_PREF_RIP,
+  .channel_mask =	NB_IP,
+  .proto_size =		sizeof(struct rip_proto),
   .config_size =	sizeof(struct rip_config),
+  .postconfig =		rip_postconfig,
   .init =		rip_init,
   .dump =		rip_dump,
   .start =		rip_start,
diff --git a/proto/rip/rip.h b/proto/rip/rip.h
index b24d9536..55696333 100644
--- a/proto/rip/rip.h
+++ b/proto/rip/rip.h
@@ -27,12 +27,6 @@
 #include "lib/timer.h"
 
 
-#ifdef IPV6
-#define RIP_IS_V2 0
-#else
-#define RIP_IS_V2 1
-#endif
-
 #define RIP_V1			1
 #define RIP_V2			2
 
@@ -44,9 +38,9 @@
 
 #define RIP_DEFAULT_ECMP_LIMIT	16
 #define RIP_DEFAULT_INFINITY	16
-#define RIP_DEFAULT_UPDATE_TIME	30
-#define RIP_DEFAULT_TIMEOUT_TIME 180
-#define RIP_DEFAULT_GARBAGE_TIME 120
+#define RIP_DEFAULT_UPDATE_TIME	  (30 S_)
+#define RIP_DEFAULT_TIMEOUT_TIME (180 S_)
+#define RIP_DEFAULT_GARBAGE_TIME (120 S_)
 
 
 struct rip_config
@@ -58,8 +52,8 @@ struct rip_config
   u8 ecmp;				/* Maximum number of nexthops in ECMP route, or 0 */
   u8 infinity;				/* Maximum metric value, representing infinity */
 
-  u32 min_timeout_time;			/* Minimum of interface timeout_time */
-  u32 max_garbage_time;			/* Maximum of interface garbage_time */
+  btime min_timeout_time;		/* Minimum of interface timeout_time */
+  btime max_garbage_time;		/* Maximum of interface garbage_time */
 };
 
 struct rip_iface_config
@@ -84,9 +78,9 @@ struct rip_iface_config
   u16 tx_length;			/* TX packet length limit (including headers), 0 for MTU */
   int tx_tos;
   int tx_priority;
-  u32 update_time;			/* Periodic update interval */
-  u32 timeout_time;			/* Route expiration timeout */
-  u32 garbage_time;			/* Unreachable entry GC timeout */
+  btime update_time;			/* Periodic update interval */
+  btime timeout_time;			/* Route expiration timeout */
+  btime garbage_time;			/* Unreachable entry GC timeout */
   list *passwords;			/* Passwords for authentication */
 };
 
@@ -98,6 +92,7 @@ struct rip_proto
   slab *rte_slab;			/* Slab for internal routes (struct rip_rte) */
   timer *timer;				/* Main protocol timer */
 
+  u8 rip2;				/* RIPv2 (IPv4) or RIPng (IPv6) */
   u8 ecmp;				/* Maximum number of nexthops in ECMP route, or 0 */
   u8 infinity;				/* Maximum metric value, representing infinity */
   u8 triggered;				/* Logical AND of interface want_triggered values */
@@ -125,14 +120,14 @@ struct rip_iface
   list neigh_list;			/* List of iface neighbors (struct rip_neighbor) */
 
   /* Update scheduling */
-  bird_clock_t next_regular;		/* Next time when regular update should be called */
-  bird_clock_t next_triggered;		/* Next time when triggerd update may be called */
-  bird_clock_t want_triggered;		/* Nonzero if triggered update is scheduled */
+  btime next_regular;			/* Next time when regular update should be called */
+  btime next_triggered;			/* Next time when triggerd update may be called */
+  btime want_triggered;			/* Nonzero if triggered update is scheduled */
 
   /* Active update */
   int tx_active;			/* Update session is active */
   ip_addr tx_addr;			/* Update session destination address */
-  bird_clock_t tx_changed;		/* Minimal changed time for triggered update */
+  btime tx_changed;			/* Minimal changed time for triggered update */
   struct fib_iterator tx_fit;		/* FIB iterator in RIP routing table (p.rtable) */
 };
 
@@ -142,14 +137,13 @@ struct rip_neighbor
   struct rip_iface *ifa;		/* Associated interface, may be NULL if stale */
   struct neighbor *nbr;			/* Associaded core neighbor, may be NULL if stale */
   struct bfd_request *bfd_req;		/* BFD request, if BFD is used */
-  bird_clock_t last_seen;		/* Time of last received and accepted message */
+  btime last_seen;			/* Time of last received and accepted message */
   u32 uc;				/* Use count, number of routes linking the neighbor */
   u32 csn;				/* Last received crypto sequence number */
 };
 
 struct rip_entry
 {
-  struct fib_node n;
   struct rip_rte *routes;		/* List of incoming routes */
 
   u8 valid;				/* Entry validity state (RIP_ENTRY_*) */
@@ -159,7 +153,9 @@ struct rip_entry
   struct iface *iface;			/* Outgoing route iface (for next hop) */
   ip_addr next_hop;			/* Outgoing route next hop */
 
-  bird_clock_t changed;			/* Last time when the outgoing route metric changed */
+  btime changed;			/* Last time when the outgoing route metric changed */
+
+  struct fib_node n;
 };
 
 struct rip_rte
@@ -171,7 +167,7 @@ struct rip_rte
   u16 metric;				/* Route metric (after increase) */
   u16 tag;				/* Route tag */
 
-  bird_clock_t expires;			/* Time of route expiration */
+  btime expires;			/* Time of route expiration */
 };
 
 
@@ -189,16 +185,11 @@ struct rip_rte
 #define EA_RIP_METRIC		EA_CODE(EAP_RIP, 0)
 #define EA_RIP_TAG		EA_CODE(EAP_RIP, 1)
 
-#define rip_is_v2(X) RIP_IS_V2
-#define rip_is_ng(X) (!RIP_IS_V2)
-
-/*
 static inline int rip_is_v2(struct rip_proto *p)
 { return p->rip2; }
 
 static inline int rip_is_ng(struct rip_proto *p)
 { return ! p->rip2; }
-*/
 
 static inline void
 rip_reset_tx_session(struct rip_proto *p, struct rip_iface *ifa)
@@ -211,8 +202,8 @@ rip_reset_tx_session(struct rip_proto *p, struct rip_iface *ifa)
 }
 
 /* rip.c */
-void rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *new);
-void rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_neighbor *from);
+void rip_update_rte(struct rip_proto *p, net_addr *n, struct rip_rte *new);
+void rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from);
 struct rip_neighbor * rip_get_neighbor(struct rip_proto *p, ip_addr *a, struct rip_iface *ifa);
 void rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n);
 void rip_show_interfaces(struct proto *P, char *iff);
@@ -220,7 +211,7 @@ void rip_show_neighbors(struct proto *P, char *iff);
 
 /* packets.c */
 void rip_send_request(struct rip_proto *p, struct rip_iface *ifa);
-void rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, bird_clock_t changed);
+void rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, btime changed);
 int rip_open_socket(struct rip_iface *ifa);
 
 
diff --git a/proto/rpki/Doc b/proto/rpki/Doc
new file mode 100644
index 00000000..d1d1bf55
--- /dev/null
+++ b/proto/rpki/Doc
@@ -0,0 +1,5 @@
+S rpki.c
+S packets.c
+S transport.c
+S tcp_transport.c
+S ssh_transport.c
diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile
new file mode 100644
index 00000000..eb09b7df
--- /dev/null
+++ b/proto/rpki/Makefile
@@ -0,0 +1,6 @@
+src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
+
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y
new file mode 100644
index 00000000..39fdfd01
--- /dev/null
+++ b/proto/rpki/config.Y
@@ -0,0 +1,144 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+CF_HDR
+
+#include "proto/rpki/rpki.h"
+
+CF_DEFINES
+
+#define RPKI_CFG ((struct rpki_config *) this_proto)
+#define RPKI_TR_SSH_CFG ((struct rpki_tr_ssh_config *) RPKI_CFG->tr_config.spec)
+
+static void
+rpki_check_unused_hostname(void)
+{
+  if (RPKI_CFG->hostname != NULL)
+    cf_error("Only one cache server per protocol allowed");
+}
+
+static void
+rpki_check_unused_transport(void)
+{
+  if (RPKI_CFG->tr_config.spec != NULL)
+    cf_error("At the most one transport per protocol allowed");
+}
+
+CF_DECLS
+
+CF_KEYWORDS(RPKI, REMOTE, BIRD, PRIVATE, PUBLIC, KEY, TCP, SSH, TRANSPORT, USER,
+	    RETRY, REFRESH, EXPIRE, KEEP)
+
+%type <i> rpki_keep_interval
+
+CF_GRAMMAR
+
+CF_ADDTO(proto, rpki_proto)
+
+rpki_proto_start: proto_start RPKI {
+  this_proto = proto_config_new(&proto_rpki, $1);
+  RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL;
+  RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL;
+  RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL;
+};
+
+rpki_proto: rpki_proto_start proto_name '{' rpki_proto_opts '}' { rpki_check_config(RPKI_CFG); };
+
+rpki_proto_opts:
+   /* empty */
+ | rpki_proto_opts rpki_proto_item ';'
+ ;
+
+rpki_proto_item:
+   proto_item
+ | proto_channel
+ | REMOTE rpki_cache_addr
+ | REMOTE rpki_cache_addr rpki_proto_item_port
+ | rpki_proto_item_port
+ | TRANSPORT rpki_transport
+ | REFRESH rpki_keep_interval expr {
+     if (rpki_check_refresh_interval($3))
+       cf_error(rpki_check_refresh_interval($3));
+     RPKI_CFG->refresh_interval = $3;
+     RPKI_CFG->keep_refresh_interval = $2;
+   }
+ | RETRY rpki_keep_interval expr {
+     if (rpki_check_retry_interval($3))
+       cf_error(rpki_check_retry_interval($3));
+     RPKI_CFG->retry_interval = $3;
+     RPKI_CFG->keep_retry_interval = $2;
+   }
+ | EXPIRE rpki_keep_interval expr {
+     if (rpki_check_expire_interval($3))
+       cf_error(rpki_check_expire_interval($3));
+     RPKI_CFG->expire_interval = $3;
+     RPKI_CFG->keep_expire_interval = $2;
+   }
+ ;
+
+rpki_keep_interval:
+ /* empty */ { $$ = 0; }
+ | KEEP { $$ = 1; }
+ ;
+
+rpki_proto_item_port: PORT expr { check_u16($2); RPKI_CFG->port = $2; };
+
+rpki_cache_addr:
+   text {
+     rpki_check_unused_hostname();
+     RPKI_CFG->hostname = $1;
+   }
+ | ipa {
+     rpki_check_unused_hostname();
+     RPKI_CFG->ip = $1;
+     /* Ensure hostname is filled */
+     char *hostname = cfg_allocz(sizeof(INET6_ADDRSTRLEN + 1));
+     bsnprintf(hostname, INET6_ADDRSTRLEN+1, "%I", RPKI_CFG->ip);
+     RPKI_CFG->hostname = hostname;
+   }
+ ;
+
+rpki_transport:
+   TCP rpki_transport_tcp_init
+ | SSH rpki_transport_ssh_init '{' rpki_transport_ssh_opts '}' rpki_transport_ssh_check
+ ;
+
+rpki_transport_tcp_init:
+{
+  rpki_check_unused_transport();
+  RPKI_CFG->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_tcp_config));
+  RPKI_CFG->tr_config.type = RPKI_TR_TCP;
+};
+
+rpki_transport_ssh_init:
+{
+  rpki_check_unused_transport();
+  RPKI_CFG->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_ssh_config));
+  RPKI_CFG->tr_config.type = RPKI_TR_SSH;
+};
+
+rpki_transport_ssh_opts:
+   /* empty */
+ | rpki_transport_ssh_opts rpki_transport_ssh_item ';'
+ ;
+
+rpki_transport_ssh_item:
+   BIRD PRIVATE KEY text  { RPKI_TR_SSH_CFG->bird_private_key = $4; }
+ | REMOTE PUBLIC KEY text { RPKI_TR_SSH_CFG->cache_public_key = $4; }
+ | USER text              { RPKI_TR_SSH_CFG->user = $2; }
+ ;
+
+rpki_transport_ssh_check:
+{
+  if (RPKI_TR_SSH_CFG->user == NULL)
+    cf_error("User must be set");
+};
+
+CF_CODE
+
+CF_END
diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c
new file mode 100644
index 00000000..59a5efaf
--- /dev/null
+++ b/proto/rpki/packets.c
@@ -0,0 +1,1073 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#undef LOCAL_DEBUG
+
+#include "rpki.h"
+#include "transport.h"
+#include "packets.h"
+
+#define RPKI_ADD_FLAG 		0b00000001
+
+enum rpki_transmit_type {
+  RPKI_RECV 			= 0,
+  RPKI_SEND 			= 1,
+};
+
+enum pdu_error_type {
+  CORRUPT_DATA 			= 0,
+  INTERNAL_ERROR 		= 1,
+  NO_DATA_AVAIL 		= 2,
+  INVALID_REQUEST 		= 3,
+  UNSUPPORTED_PROTOCOL_VER 	= 4,
+  UNSUPPORTED_PDU_TYPE 		= 5,
+  WITHDRAWAL_OF_UNKNOWN_RECORD 	= 6,
+  DUPLICATE_ANNOUNCEMENT 	= 7,
+  PDU_TOO_BIG 			= 32
+};
+
+static const char *str_pdu_error_type[] = {
+  [CORRUPT_DATA] 		= "Corrupt-Data",
+  [INTERNAL_ERROR] 		= "Internal-Error",
+  [NO_DATA_AVAIL] 		= "No-Data-Available",
+  [INVALID_REQUEST] 		= "Invalid-Request",
+  [UNSUPPORTED_PROTOCOL_VER] 	= "Unsupported-Protocol-Version",
+  [UNSUPPORTED_PDU_TYPE] 	= "Unsupported-PDU-Type",
+  [WITHDRAWAL_OF_UNKNOWN_RECORD]= "Withdrawal-Of-Unknown-Record",
+  [DUPLICATE_ANNOUNCEMENT] 	= "Duplicate-Announcement",
+  [PDU_TOO_BIG] 		= "PDU-Too-Big",
+};
+
+enum pdu_type {
+  SERIAL_NOTIFY 		= 0,
+  SERIAL_QUERY 			= 1,
+  RESET_QUERY 			= 2,
+  CACHE_RESPONSE 		= 3,
+  IPV4_PREFIX 			= 4,
+  RESERVED 			= 5,
+  IPV6_PREFIX			= 6,
+  END_OF_DATA 			= 7,
+  CACHE_RESET 			= 8,
+  ROUTER_KEY 			= 9,
+  ERROR 			= 10,
+  PDU_TYPE_MAX
+};
+
+static const char *str_pdu_type_[] = {
+  [SERIAL_NOTIFY] 		= "Serial Notify",
+  [SERIAL_QUERY] 		= "Serial Query",
+  [RESET_QUERY] 		= "Reset Query",
+  [CACHE_RESPONSE] 		= "Cache Response",
+  [IPV4_PREFIX] 		= "IPv4 Prefix",
+  [RESERVED] 			= "Reserved",
+  [IPV6_PREFIX] 		= "IPv6 Prefix",
+  [END_OF_DATA] 		= "End of Data",
+  [CACHE_RESET] 		= "Cache Reset",
+  [ROUTER_KEY] 			= "Router Key",
+  [ERROR] 			= "Error"
+};
+
+static const char *str_pdu_type(uint type) {
+  if (type < PDU_TYPE_MAX)
+    return str_pdu_type_[type];
+  else
+    return "Undefined packet type";
+}
+
+/*
+ *  0          8          16         24        31
+ * .-------------------------------------------.
+ * | Protocol |   PDU    |                     |
+ * | Version  |   Type   |    reserved = zero  |
+ * |  0 or 1  |  0 - 10  |                     |
+ * +-------------------------------------------+
+ * |                                           |
+ * |                 Length >= 8               |
+ * |                                           |
+ * `-------------------------------------------' */
+struct pdu_header {
+  u8 ver;
+  u8 type;
+  u16 reserved;
+  u32 len;
+} PACKED;
+
+struct pdu_cache_response {
+  u8 ver;
+  u8 type;
+  u16 session_id;
+  u32 len;
+} PACKED;
+
+struct pdu_serial_notify {
+  u8 ver;
+  u8 type;
+  u16 session_id;
+  u32 len;
+  u32 serial_num;
+} PACKED;
+
+struct pdu_serial_query {
+  u8 ver;
+  u8 type;
+  u16 session_id;
+  u32 len;
+  u32 serial_num;
+} PACKED;
+
+struct pdu_ipv4 {
+  u8 ver;
+  u8 type;
+  u16 reserved;
+  u32 len;
+  u8 flags;
+  u8 prefix_len;
+  u8 max_prefix_len;
+  u8 zero;
+  ip4_addr prefix;
+  u32 asn;
+} PACKED;
+
+struct pdu_ipv6 {
+  u8 ver;
+  u8 type;
+  u16 reserved;
+  u32 len;
+  u8 flags;
+  u8 prefix_len;
+  u8 max_prefix_len;
+  u8 zero;
+  ip6_addr prefix;
+  u32 asn;
+} PACKED;
+
+/*
+ *  0          8          16         24        31
+ *  .-------------------------------------------.
+ *  | Protocol |   PDU    |                     |
+ *  | Version  |   Type   |     Error Code      |
+ *  |    1     |    10    |                     |
+ *  +-------------------------------------------+
+ *  |                                           |
+ *  |                  Length                   |
+ *  |                                           |
+ *  +-------------------------------------------+
+ *  |                                           |
+ *  |       Length of Encapsulated PDU          |
+ *  |                                           |
+ *  +-------------------------------------------+
+ *  |                                           |
+ *  ~           Copy of Erroneous PDU           ~
+ *  |                                           |
+ *  +-------------------------------------------+
+ *  |                                           |
+ *  |           Length of Error Text            |
+ *  |                                           |
+ *  +-------------------------------------------+
+ *  |                                           |
+ *  |              Arbitrary Text               |
+ *  |                    of                     |
+ *  ~          Error Diagnostic Message         ~
+ *  |                                           |
+ *  `-------------------------------------------' */
+struct pdu_error {
+  u8 ver;
+  u8 type;
+  u16 error_code;
+  u32 len;
+  u32 len_enc_pdu;		/* Length of Encapsulated PDU */
+  byte rest[];			/* Copy of Erroneous PDU
+				 * Length of Error Text
+				 * Error Diagnostic Message */
+} PACKED;
+
+struct pdu_reset_query {
+  u8 ver;
+  u8 type;
+  u16 flags;
+  u32 len;
+} PACKED;
+
+struct pdu_end_of_data_v0 {
+  u8 ver;
+  u8 type;
+  u16 session_id;
+  u32 len;
+  u32 serial_num;
+} PACKED;
+
+struct pdu_end_of_data_v1 {
+  u8 ver;
+  u8 type;
+  u16 session_id;
+  u32 len;
+  u32 serial_num;
+  u32 refresh_interval;
+  u32 retry_interval;
+  u32 expire_interval;
+} PACKED;
+
+static const size_t min_pdu_size[] = {
+  [SERIAL_NOTIFY] 		= sizeof(struct pdu_serial_notify),
+  [SERIAL_QUERY] 		= sizeof(struct pdu_serial_query),
+  [RESET_QUERY] 		= sizeof(struct pdu_reset_query),
+  [CACHE_RESPONSE] 		= sizeof(struct pdu_cache_response),
+  [IPV4_PREFIX] 		= sizeof(struct pdu_ipv4),
+  [RESERVED] 			= sizeof(struct pdu_header),
+  [IPV6_PREFIX] 		= sizeof(struct pdu_ipv6),
+  [END_OF_DATA] 		= sizeof(struct pdu_end_of_data_v0),
+  [CACHE_RESET] 		= sizeof(struct pdu_cache_response),
+  [ROUTER_KEY] 			= sizeof(struct pdu_header), /* FIXME */
+  [ERROR] 			= 16,
+};
+
+static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...);
+
+static void
+rpki_pdu_to_network_byte_order(struct pdu_header *pdu)
+{
+  pdu->reserved = htons(pdu->reserved);
+  pdu->len = htonl(pdu->len);
+
+  switch (pdu->type)
+  {
+  case SERIAL_QUERY:
+  {
+    /* Note that a session_id is converted using converting header->reserved */
+    struct pdu_serial_query *sq_pdu = (void *) pdu;
+    sq_pdu->serial_num = htonl(sq_pdu->serial_num);
+    break;
+  }
+
+  case ERROR:
+  {
+    struct pdu_error *err = (void *) pdu;
+    u32 *err_text_len = (u32 *)(err->rest + err->len_enc_pdu);
+    *err_text_len = htonl(*err_text_len);
+    err->len_enc_pdu = htonl(err->len_enc_pdu);
+    break;
+  }
+
+  case RESET_QUERY:
+    break;
+
+  default:
+    bug("PDU type %s should not be sent by us", str_pdu_type(pdu->type));
+  }
+}
+
+static void
+rpki_pdu_to_host_byte_order(struct pdu_header *pdu)
+{
+  /* The Router Key PDU has two one-byte fields instead of one two-bytes field. */
+  if (pdu->type != ROUTER_KEY)
+    pdu->reserved = ntohs(pdu->reserved);
+
+  pdu->len = ntohl(pdu->len);
+
+  switch (pdu->type)
+  {
+  case SERIAL_NOTIFY:
+  {
+    /* Note that a session_id is converted using converting header->reserved */
+    struct pdu_serial_notify *sn_pdu = (void *) pdu;
+    sn_pdu->serial_num = ntohl(sn_pdu->serial_num);
+    break;
+  }
+
+  case END_OF_DATA:
+  {
+    /* Note that a session_id is converted using converting header->reserved */
+    struct pdu_end_of_data_v0 *eod0 = (void *) pdu;
+    eod0->serial_num = ntohl(eod0->serial_num); /* Same either for version 1 */
+
+    if (pdu->ver == RPKI_VERSION_1)
+    {
+      struct pdu_end_of_data_v1 *eod1 = (void *) pdu;
+      eod1->expire_interval = ntohl(eod1->expire_interval);
+      eod1->refresh_interval = ntohl(eod1->refresh_interval);
+      eod1->retry_interval = ntohl(eod1->retry_interval);
+    }
+    break;
+  }
+
+  case IPV4_PREFIX:
+  {
+    struct pdu_ipv4 *ipv4 = (void *) pdu;
+    ipv4->prefix = ip4_ntoh(ipv4->prefix);
+    ipv4->asn = ntohl(ipv4->asn);
+    break;
+  }
+
+  case IPV6_PREFIX:
+  {
+    struct pdu_ipv6 *ipv6 = (void *) pdu;
+    ipv6->prefix = ip6_ntoh(ipv6->prefix);
+    ipv6->asn = ntohl(ipv6->asn);
+    break;
+  }
+
+  case ERROR:
+  {
+    /* Note that a error_code is converted using converting header->reserved */
+    struct pdu_error *err = (void *) pdu;
+    err->len_enc_pdu = ntohl(err->len_enc_pdu);
+    u32 *err_text_len = (u32 *)(err->rest + err->len_enc_pdu);
+    *err_text_len = htonl(*err_text_len);
+    break;
+  }
+
+  case ROUTER_KEY:
+    /* Router Key PDU is not supported yet */
+
+  case SERIAL_QUERY:
+  case RESET_QUERY:
+    /* Serial/Reset Query are sent only in direction router to cache.
+     * We don't care here. */
+
+  case CACHE_RESPONSE:
+  case CACHE_RESET:
+    /* Converted with pdu->reserved */
+    break;
+  }
+}
+
+/**
+ * rpki_convert_pdu_back_to_network_byte_order - convert host-byte order PDU back to network-byte order
+ * @out: allocated memory for writing a converted PDU of size @in->len
+ * @in: host-byte order PDU
+ *
+ * Assumed: |A == ntoh(ntoh(A))|
+ */
+static struct pdu_header *
+rpki_pdu_back_to_network_byte_order(struct pdu_header *out, const struct pdu_header *in)
+{
+  memcpy(out, in, in->len);
+  rpki_pdu_to_host_byte_order(out);
+  return out;
+}
+
+static void
+rpki_log_packet(struct rpki_cache *cache, const struct pdu_header *pdu, const enum rpki_transmit_type action)
+{
+  if (!(cache->p->p.debug & D_PACKETS))
+    return;
+
+  const char *str_type = str_pdu_type(pdu->type);
+  char detail[256];
+
+#define SAVE(fn)		\
+  do {				\
+    if (fn < 0) 		\
+    {				\
+      bsnprintf(detail + sizeof(detail) - 16, 16, "... <too long>)"); \
+      goto detail_finished;	\
+    }				\
+  } while(0)			\
+
+  switch (pdu->type)
+  {
+  case SERIAL_NOTIFY:
+  case SERIAL_QUERY:
+    SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u)", pdu->reserved, ((struct pdu_serial_notify *) pdu)->serial_num));
+    break;
+
+  case END_OF_DATA:
+  {
+    const struct pdu_end_of_data_v1 *eod = (void *) pdu;
+    if (eod->ver == RPKI_VERSION_1)
+      SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u, refresh: %us, retry: %us, expire: %us)", eod->session_id, eod->serial_num, eod->refresh_interval, eod->retry_interval, eod->expire_interval));
+    else
+      SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u)", eod->session_id, eod->serial_num));
+    break;
+  }
+
+  case CACHE_RESPONSE:
+    SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u)", pdu->reserved));
+    break;
+
+  case IPV4_PREFIX:
+  {
+    const struct pdu_ipv4 *ipv4 = (void *) pdu;
+    SAVE(bsnprintf(detail, sizeof(detail), "(%I4/%u-%u AS%u)", ipv4->prefix, ipv4->prefix_len, ipv4->max_prefix_len, ipv4->asn));
+    break;
+  }
+
+  case IPV6_PREFIX:
+  {
+    const struct pdu_ipv6 *ipv6 = (void *) pdu;
+    SAVE(bsnprintf(detail, sizeof(detail), "(%I6/%u-%u AS%u)", ipv6->prefix, ipv6->prefix_len, ipv6->max_prefix_len, ipv6->asn));
+    break;
+  }
+
+  case ROUTER_KEY:
+    /* We don't support saving Router Key PDUs yet */
+    SAVE(bsnprintf(detail, sizeof(detail), "(ignored)"));
+    break;
+
+  case ERROR:
+  {
+    const struct pdu_error *err = (void *) pdu;
+    SAVE(bsnprintf(detail, sizeof(detail), "(%s", str_pdu_error_type[err->error_code]));
+
+    /* Optional description of error */
+    const u32 len_err_txt = *((u32 *) (err->rest + err->len_enc_pdu));
+    if (len_err_txt > 0)
+    {
+      size_t expected_len = err->len_enc_pdu + len_err_txt + 16;
+      if (expected_len == err->len)
+      {
+        char txt[len_err_txt + 1];
+        char *pdu_txt = (char *) err->rest + err->len_enc_pdu + 4;
+        bsnprintf(txt, sizeof(txt), "%s", pdu_txt); /* it's ensured that txt is ended with a null byte */
+        SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ": '%s'", txt));
+      }
+      else
+      {
+	SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ", malformed size"));
+      }
+    }
+
+    /* Optional encapsulated erroneous packet */
+    if (err->len_enc_pdu)
+    {
+      SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ", %s packet:", str_pdu_type(((struct pdu_header *) err->rest)->type)));
+      if (err->rest + err->len_enc_pdu <= (byte *)err + err->len)
+      {
+	for (const byte *c = err->rest; c != err->rest + err->len_enc_pdu; c++)
+	  SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), " %02X", *c));
+      }
+    }
+
+    SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ")"));
+    break;
+  }
+
+  default:
+    *detail = '\0';
+  }
+#undef SAVE
+
+ detail_finished:
+
+  if (action == RPKI_RECV)
+  {
+    CACHE_TRACE(D_PACKETS, cache, "Received %s packet %s", str_type, detail);
+  }
+  else
+  {
+    CACHE_TRACE(D_PACKETS, cache, "Sending %s packet %s", str_type, detail);
+  }
+
+#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG)
+  int seq = 0;
+  for(const byte *c = pdu; c != pdu + pdu->len; c++)
+  {
+    if ((seq % 4) == 0)
+      DBG("%2d: ", seq);
+
+    DBG("  0x%02X %-3u", *c, *c);
+
+    if ((++seq % 4) == 0)
+      DBG("\n");
+  }
+  if ((seq % 4) != 0)
+    DBG("\n");
+#endif
+}
+
+static int
+rpki_send_pdu(struct rpki_cache *cache, const void *pdu, const uint len)
+{
+  struct rpki_proto *p = cache->p;
+  sock *sk = cache->tr_sock->sk;
+
+  rpki_log_packet(cache, pdu, RPKI_SEND);
+
+  if (sk->tbuf != sk->tpos)
+  {
+    RPKI_WARN(p, "Old packet overwritten in TX buffer");
+  }
+
+  if (len > sk->tbsize)
+  {
+    RPKI_WARN(p, "%u bytes is too much for send", len);
+    ASSERT(0);
+    return RPKI_ERROR;
+  }
+
+  memcpy(sk->tbuf, pdu, len);
+  rpki_pdu_to_network_byte_order((void *) sk->tbuf);
+
+  if (!sk_send(sk, len))
+  {
+    DBG("Cannot send just the whole data. It will be sent using a call of tx_hook()");
+  }
+
+  return RPKI_SUCCESS;
+}
+
+/**
+ * rpki_check_receive_packet - make a basic validation of received RPKI PDU header
+ * @cache: cache connection instance
+ * @pdu: RPKI PDU in network byte order
+ *
+ * This function checks protocol version, PDU type, version and size. If all is all right then
+ * function returns |RPKI_SUCCESS| otherwise sends Error PDU and returns
+ * |RPKI_ERROR|.
+ */
+static int
+rpki_check_receive_packet(struct rpki_cache *cache, const struct pdu_header *pdu)
+{
+  u32 pdu_len = ntohl(pdu->len);
+
+  /*
+   * Minimal and maximal allowed PDU size is treated in rpki_rx_hook() function.
+   * @header.len corresponds to number of bytes of @pdu and
+   * it is in range from RPKI_PDU_HEADER_LEN to RPKI_PDU_MAX_LEN bytes.
+   */
+
+  /* Do not handle error PDUs here, leave this task to rpki_handle_error_pdu() */
+  if (pdu->ver != cache->version && pdu->type != ERROR)
+  {
+    /* If this is the first PDU we have received */
+    if (cache->request_session_id)
+    {
+      if (pdu->type == SERIAL_NOTIFY)
+      {
+	/*
+	 * The router MUST ignore any Serial Notify PDUs it might receive from
+	 * the cache during this initial start-up period, regardless of the
+	 * Protocol Version field in the Serial Notify PDU.
+	 * (https://tools.ietf.org/html/draft-ietf-sidr-rpki-rtr-rfc6810-bis-07#section-7)
+	 */
+      }
+      else if (!cache->last_update &&
+	       (pdu->ver <= RPKI_MAX_VERSION) &&
+	       (pdu->ver < cache->version))
+      {
+        CACHE_TRACE(D_EVENTS, cache, "Downgrade session to %s from %u to %u version", rpki_get_cache_ident(cache), cache->version, pdu->ver);
+        cache->version = pdu->ver;
+      }
+      else
+      {
+        /* If this is not the first PDU we have received, something is wrong with
+         * the server implementation -> Error */
+	rpki_send_error_pdu(cache, UNSUPPORTED_PROTOCOL_VER, pdu_len, pdu, "PDU with unsupported Protocol version received");
+	return RPKI_ERROR;
+      }
+    }
+  }
+
+  if ((pdu->type >= PDU_TYPE_MAX) || (pdu->ver == RPKI_VERSION_0 && pdu->type == ROUTER_KEY))
+  {
+    rpki_send_error_pdu(cache, UNSUPPORTED_PDU_TYPE, pdu_len, pdu, "Unsupported PDU type %u received", pdu->type);
+    return RPKI_ERROR;
+  }
+
+  if (pdu_len < min_pdu_size[pdu->type])
+  {
+    rpki_send_error_pdu(cache, CORRUPT_DATA, pdu_len, pdu, "Received %s packet with %d bytes, but expected at least %d bytes", str_pdu_type(pdu->type), pdu_len, min_pdu_size[pdu->type]);
+    return RPKI_ERROR;
+  }
+
+  return RPKI_SUCCESS;
+}
+
+static int
+rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu)
+{
+  switch (pdu->error_code)
+  {
+  case CORRUPT_DATA:
+  case INTERNAL_ERROR:
+  case INVALID_REQUEST:
+  case UNSUPPORTED_PDU_TYPE:
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+    break;
+
+  case NO_DATA_AVAIL:
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_NO_DATA_AVAIL);
+    break;
+
+  case UNSUPPORTED_PROTOCOL_VER:
+    CACHE_TRACE(D_PACKETS, cache, "Client uses unsupported protocol version");
+    if (pdu->ver <= RPKI_MAX_VERSION &&
+	pdu->ver < cache->version)
+    {
+      CACHE_TRACE(D_EVENTS, cache, "Downgrading from protocol version %d to version %d", cache->version, pdu->ver);
+      cache->version = pdu->ver;
+      rpki_cache_change_state(cache, RPKI_CS_FAST_RECONNECT);
+    }
+    else
+    {
+      CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PROTOCOL_VER error PDU with invalid values, " \
+		  "current version: %d, PDU version: %d", cache->version, pdu->ver);
+      rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+    }
+    break;
+
+  default:
+    CACHE_TRACE(D_PACKETS, cache, "Error unknown, server sent unsupported error code %u", pdu->error_code);
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+    break;
+  }
+
+  return RPKI_SUCCESS;
+}
+
+static void
+rpki_handle_serial_notify_pdu(struct rpki_cache *cache, const struct pdu_serial_notify *pdu)
+{
+  /* The router MUST ignore any Serial Notify PDUs it might receive from
+   * the cache during this initial start-up period, regardless of the
+   * Protocol Version field in the Serial Notify PDU.
+   * (https://tools.ietf.org/html/draft-ietf-sidr-rpki-rtr-rfc6810-bis-07#section-7)
+   */
+  if (cache->request_session_id)
+  {
+    CACHE_TRACE(D_PACKETS, cache, "Ignore a Serial Notify packet during initial start-up period");
+    return;
+  }
+
+  /* XXX Serial number should be compared using method RFC 1982 (3.2) */
+  if (cache->serial_num != pdu->serial_num)
+    rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+}
+
+static int
+rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_response *pdu)
+{
+  if (cache->request_session_id)
+  {
+    if (cache->last_update)
+    {
+      /*
+       * This isn't the first sync and we already received records. This point
+       * is after Reset Query and before importing new records from cache
+       * server. We need to load new ones and kick out missing ones.  So start
+       * a refresh cycle.
+       */
+      if (cache->p->roa4_channel)
+	rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel);
+      if (cache->p->roa6_channel)
+	rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel);
+
+      cache->p->refresh_channels = 1;
+    }
+    cache->session_id = pdu->session_id;
+    cache->request_session_id = 0;
+  }
+  else
+  {
+    if (cache->session_id != pdu->session_id)
+    {
+      byte tmp[pdu->len];
+      const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu);
+      rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Wrong session_id %u in Cache Response PDU", pdu->session_id);
+      rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+      return RPKI_ERROR;
+    }
+  }
+
+  rpki_cache_change_state(cache, RPKI_CS_SYNC_RUNNING);
+  return RPKI_SUCCESS;
+}
+
+/**
+ * rpki_prefix_pdu_2_net_addr - convert IPv4/IPv6 Prefix PDU into net_addr_union
+ * @pdu: host byte order IPv4/IPv6 Prefix PDU
+ * @n: allocated net_addr_union for save ROA
+ *
+ * This function reads ROA data from IPv4/IPv6 Prefix PDU and
+ * write them into net_addr_roa4 or net_addr_roa6 data structure.
+ */
+static net_addr_union *
+rpki_prefix_pdu_2_net_addr(const struct pdu_header *pdu, net_addr_union *n)
+{
+  /*
+   * Note that sizeof(net_addr_roa6) > sizeof(net_addr)
+   * and thence we must use net_addr_union and not only net_addr
+   */
+
+  if (pdu->type == IPV4_PREFIX)
+  {
+    const struct pdu_ipv4 *ipv4 = (void *) pdu;
+    n->roa4.type = NET_ROA4;
+    n->roa4.length = sizeof(net_addr_roa4);
+    n->roa4.prefix = ipv4->prefix;
+    n->roa4.asn = ipv4->asn;
+    n->roa4.pxlen = ipv4->prefix_len;
+    n->roa4.max_pxlen = ipv4->max_prefix_len;
+  }
+  else
+  {
+    const struct pdu_ipv6 *ipv6 = (void *) pdu;
+    n->roa6.type = NET_ROA6;
+    n->roa6.length = sizeof(net_addr_roa6);
+    n->roa6.prefix = ipv6->prefix;
+    n->roa6.asn = ipv6->asn;
+    n->roa6.pxlen = ipv6->prefix_len;
+    n->roa6.max_pxlen = ipv6->max_prefix_len;
+  }
+
+  return n;
+}
+
+static int
+rpki_handle_prefix_pdu(struct rpki_cache *cache, const struct pdu_header *pdu)
+{
+  const enum pdu_type type = pdu->type;
+  ASSERT(type == IPV4_PREFIX || type == IPV6_PREFIX);
+
+  net_addr_union addr = {};
+  rpki_prefix_pdu_2_net_addr(pdu, &addr);
+
+  struct channel *channel = NULL;
+
+  if (type == IPV4_PREFIX)
+    channel = cache->p->roa4_channel;
+  if (type == IPV6_PREFIX)
+    channel = cache->p->roa6_channel;
+
+  if (!channel)
+  {
+    CACHE_TRACE(D_ROUTES, cache, "Skip %N, missing %s channel", &addr, (type == IPV4_PREFIX ? "roa4" : "roa6"), addr);
+    return RPKI_ERROR;
+  }
+
+  cache->last_rx_prefix = current_time();
+
+  /* A place for 'flags' is same for both data structures pdu_ipv4 or pdu_ipv6  */
+  struct pdu_ipv4 *pfx = (void *) pdu;
+  if (pfx->flags & RPKI_ADD_FLAG)
+    rpki_table_add_roa(cache, channel, &addr);
+  else
+    rpki_table_remove_roa(cache, channel, &addr);
+
+  return RPKI_SUCCESS;
+}
+
+static uint
+rpki_check_interval(struct rpki_cache *cache, const char *(check_fn)(uint), uint interval)
+{
+  if (check_fn(interval))
+  {
+    RPKI_WARN(cache->p, "%s, received %u seconds", check_fn(interval), interval);
+    return 0;
+  }
+  return 1;
+}
+
+static void
+rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_data_v1 *pdu)
+{
+  const struct rpki_config *cf = (void *) cache->p->p.cf;
+
+  if (pdu->session_id != cache->session_id)
+  {
+    byte tmp[pdu->len];
+    const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu);
+    rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Received Session ID %u, but expected %u", pdu->session_id, cache->session_id);
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+    return;
+  }
+
+  if (pdu->ver == RPKI_VERSION_1)
+  {
+    if (!cf->keep_refresh_interval && rpki_check_interval(cache, rpki_check_refresh_interval, pdu->refresh_interval))
+      cache->refresh_interval = pdu->refresh_interval;
+
+    if (!cf->keep_retry_interval && rpki_check_interval(cache, rpki_check_retry_interval, pdu->retry_interval))
+          cache->retry_interval = pdu->retry_interval;
+
+    if (!cf->keep_expire_interval && rpki_check_interval(cache, rpki_check_expire_interval, pdu->expire_interval))
+      cache->expire_interval = pdu->expire_interval;
+
+    CACHE_TRACE(D_EVENTS, cache, "New interval values: "
+		"refresh: %s%us, "
+		"retry: %s%us, "
+		"expire: %s%us",
+		(cf->keep_refresh_interval ? "keeps " : ""), cache->refresh_interval,
+		(cf->keep_retry_interval ? "keeps " : ""),   cache->retry_interval,
+		(cf->keep_expire_interval ? "keeps " : ""),  cache->expire_interval);
+  }
+
+  if (cache->p->refresh_channels)
+  {
+    cache->p->refresh_channels = 0;
+    if (cache->p->roa4_channel)
+      rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel);
+    if (cache->p->roa6_channel)
+      rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel);
+  }
+
+  cache->last_update = current_time();
+  cache->serial_num = pdu->serial_num;
+  rpki_cache_change_state(cache, RPKI_CS_ESTABLISHED);
+}
+
+/**
+ * rpki_rx_packet - process a received RPKI PDU
+ * @cache: RPKI connection instance
+ * @pdu: a RPKI PDU in network byte order
+ */
+static void
+rpki_rx_packet(struct rpki_cache *cache, struct pdu_header *pdu)
+{
+  struct rpki_proto *p = cache->p;
+
+  if (rpki_check_receive_packet(cache, pdu) == RPKI_ERROR)
+  {
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+    return;
+  }
+
+  rpki_pdu_to_host_byte_order(pdu);
+  rpki_log_packet(cache, pdu, RPKI_RECV);
+
+  switch (pdu->type)
+  {
+  case RESET_QUERY:
+  case SERIAL_QUERY:
+    RPKI_WARN(p, "Received a %s packet that is destined for cache server", str_pdu_type(pdu->type));
+    break;
+
+  case SERIAL_NOTIFY:
+    /* This is a signal to synchronize with the cache server just now */
+    rpki_handle_serial_notify_pdu(cache, (void *) pdu);
+    break;
+
+  case CACHE_RESPONSE:
+    rpki_handle_cache_response_pdu(cache, (void *) pdu);
+    break;
+
+  case IPV4_PREFIX:
+  case IPV6_PREFIX:
+    rpki_handle_prefix_pdu(cache, pdu);
+    break;
+
+  case END_OF_DATA:
+    rpki_handle_end_of_data_pdu(cache, (void *) pdu);
+    break;
+
+  case CACHE_RESET:
+    /* Cache cannot provide an incremental update. */
+    rpki_cache_change_state(cache, RPKI_CS_NO_INCR_UPDATE_AVAIL);
+    break;
+
+  case ERROR:
+    rpki_handle_error_pdu(cache, (void *) pdu);
+    break;
+
+  case ROUTER_KEY:
+    /* TODO: Implement Router Key PDU handling */
+    break;
+
+  default:
+    CACHE_TRACE(D_PACKETS, cache, "Received unsupported type (%u)", pdu->type);
+  };
+}
+
+int
+rpki_rx_hook(struct birdsock *sk, uint size)
+{
+  struct rpki_cache *cache = sk->data;
+  struct rpki_proto *p = cache->p;
+
+  byte *pkt_start = sk->rbuf;
+  byte *end = pkt_start + size;
+
+  DBG("rx hook got %u bytes \n", size);
+
+  while (end >= pkt_start + RPKI_PDU_HEADER_LEN)
+  {
+    struct pdu_header *pdu = (void *) pkt_start;
+    u32 pdu_size = ntohl(pdu->len);
+
+    if (pdu_size < RPKI_PDU_HEADER_LEN || pdu_size > RPKI_PDU_MAX_LEN)
+    {
+      RPKI_WARN(p, "Received invalid packet length %u, purge the whole receiving buffer", pdu_size);
+      return 1; /* Purge recv buffer */
+    }
+
+    if (end < pkt_start + pdu_size)
+      break;
+
+    rpki_rx_packet(cache, pdu);
+
+    /* It is possible that bird socket was freed/closed */
+    if (p->p.proto_state == PS_DOWN || sk != cache->tr_sock->sk)
+      return 0;
+
+    pkt_start += pdu_size;
+  }
+
+  if (pkt_start != sk->rbuf)
+  {
+    CACHE_DBG(cache, "Move %u bytes of a memory at the start of buffer", end - pkt_start);
+    memmove(sk->rbuf, pkt_start, end - pkt_start);
+    sk->rpos = sk->rbuf + (end - pkt_start);
+  }
+
+  return 0; /* Not purge sk->rbuf */
+}
+
+void
+rpki_err_hook(struct birdsock *sk, int error_num)
+{
+  struct rpki_cache *cache = sk->data;
+
+  if (error_num)
+  {
+    /* sk->err may contains a SSH error description */
+    if (sk->err)
+      CACHE_TRACE(D_EVENTS, cache, "Lost connection: %s", sk->err);
+    else
+      CACHE_TRACE(D_EVENTS, cache, "Lost connection: %M", error_num);
+  }
+  else
+  {
+    CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection");
+  }
+
+
+  rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+}
+
+static int
+rpki_fire_tx(struct rpki_cache *cache)
+{
+  sock *sk = cache->tr_sock->sk;
+
+  uint bytes_to_send = sk->tpos - sk->tbuf;
+  DBG("Sending %u bytes", bytes_to_send);
+  return sk_send(sk, bytes_to_send);
+}
+
+void
+rpki_tx_hook(sock *sk)
+{
+  struct rpki_cache *cache = sk->data;
+
+  while (rpki_fire_tx(cache) > 0)
+    ;
+}
+
+void
+rpki_connected_hook(sock *sk)
+{
+  struct rpki_cache *cache = sk->data;
+
+  CACHE_TRACE(D_EVENTS, cache, "Connected");
+  proto_notify_state(&cache->p->p, PS_UP);
+
+  sk->rx_hook = rpki_rx_hook;
+  sk->tx_hook = rpki_tx_hook;
+
+  rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+}
+
+/**
+ * rpki_send_error_pdu - send RPKI Error PDU
+ * @cache: RPKI connection instance
+ * @error_code: PDU Error type
+ * @err_pdu_len: length of @erroneous_pdu
+ * @erroneous_pdu: optional network byte-order PDU that invokes Error by us or NULL
+ * @fmt: optional description text of error or NULL
+ * @args: optional arguments for @fmt
+ *
+ * This function prepares Error PDU and sends it to a cache server.
+ */
+static int
+rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...)
+{
+  va_list args;
+  char msg[128];
+
+  /* Size including the terminating null byte ('\0') */
+  int msg_len = 0;
+
+  /* Don't send errors for erroneous error PDUs */
+  if (err_pdu_len >= 2)
+  {
+    if (erroneous_pdu->type == ERROR)
+      return RPKI_SUCCESS;
+  }
+
+  if (fmt)
+  {
+    va_start(args, fmt);
+    msg_len = bvsnprintf(msg, sizeof(msg), fmt, args) + 1;
+  }
+
+  u32 pdu_size = 16 + err_pdu_len + msg_len;
+  byte pdu[pdu_size];
+  memset(pdu, 0, sizeof(pdu));
+
+  struct pdu_error *e = (void *) pdu;
+  e->ver = cache->version;
+  e->type = ERROR;
+  e->error_code = error_code;
+  e->len = pdu_size;
+
+  e->len_enc_pdu = err_pdu_len;
+  if (err_pdu_len > 0)
+    memcpy(e->rest, erroneous_pdu, err_pdu_len);
+
+  *((u32 *)(e->rest + err_pdu_len)) = msg_len;
+  if (msg_len > 0)
+    memcpy(e->rest + err_pdu_len + 4, msg, msg_len);
+
+  return rpki_send_pdu(cache, pdu, pdu_size);
+}
+
+int
+rpki_send_serial_query(struct rpki_cache *cache)
+{
+  struct pdu_serial_query pdu = {
+    .ver = cache->version,
+    .type = SERIAL_QUERY,
+    .session_id = cache->session_id,
+    .len = sizeof(pdu),
+    .serial_num = cache->serial_num
+  };
+
+  if (rpki_send_pdu(cache, &pdu, sizeof(pdu)) != RPKI_SUCCESS)
+  {
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    return RPKI_ERROR;
+  }
+
+  return RPKI_SUCCESS;
+}
+
+int
+rpki_send_reset_query(struct rpki_cache *cache)
+{
+  struct pdu_reset_query pdu = {
+    .ver = cache->version,
+    .type = RESET_QUERY,
+    .len = sizeof(pdu),
+  };
+
+  if (rpki_send_pdu(cache, &pdu, sizeof(pdu)) != RPKI_SUCCESS)
+  {
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    return RPKI_ERROR;
+  }
+
+  return RPKI_SUCCESS;
+}
diff --git a/proto/rpki/packets.h b/proto/rpki/packets.h
new file mode 100644
index 00000000..d6f8a249
--- /dev/null
+++ b/proto/rpki/packets.h
@@ -0,0 +1,45 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_RPKI_PACKETS_H_
+#define _BIRD_RPKI_PACKETS_H_
+
+#include <arpa/inet.h>
+
+#define RPKI_PDU_HEADER_LEN 	8
+
+/* A Error PDU size is the biggest (has encapsulate PDU inside):
+ * 	   +8 bytes (Header size)
+ * 	   +4 bytes (Length of Encapsulated PDU)
+ * 	  +32 bytes (Encapsulated PDU IPv6 32)
+ * 	   +4 bytes (Length of inserted text)
+ * 	 +800 bytes (UTF-8 text 400*2 bytes)
+ * 	------------
+ * 	= 848 bytes (Maximal expected PDU size) */
+#define RPKI_PDU_MAX_LEN	848
+
+/* RX buffer size has a great impact to scheduler granularity */
+#define RPKI_RX_BUFFER_SIZE	4096
+#define RPKI_TX_BUFFER_SIZE	RPKI_PDU_MAX_LEN
+
+/* Return values */
+enum rpki_rtvals {
+  RPKI_SUCCESS 			= 0,
+  RPKI_ERROR 			= -1
+};
+
+int rpki_send_serial_query(struct rpki_cache *cache);
+int rpki_send_reset_query(struct rpki_cache *cache);
+int rpki_rx_hook(sock *sk, uint size);
+void rpki_connected_hook(sock *sk);
+void rpki_err_hook(sock *sk, int size);
+
+#endif
diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c
new file mode 100644
index 00000000..3145399b
--- /dev/null
+++ b/proto/rpki/rpki.c
@@ -0,0 +1,928 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	Using RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/**
+ * DOC: RPKI To Router (RPKI-RTR)
+ *
+ * The RPKI-RTR protocol is implemented in several files: |rpki.c| containing
+ * the routes handling, protocol logic, timer events, cache connection,
+ * reconfiguration, configuration and protocol glue with BIRD core, |packets.c|
+ * containing the RPKI packets handling and finally all transports files:
+ * |transport.c|, |tcp_transport.c| and |ssh_transport.c|.
+ *
+ * The |transport.c| is a middle layer and interface for each specific
+ * transport. Transport is a way how to wrap a communication with a cache
+ * server. There is supported an unprotected TCP transport and an encrypted
+ * SSHv2 transport. The SSH transport requires LibSSH library. LibSSH is
+ * loading dynamically using |dlopen()| function. SSH support is integrated in
+ * |sysdep/unix/io.c|. Each transport must implement an initialization
+ * function, an open function and a socket identification function. That's all.
+ *
+ * This implementation is based on the RTRlib (http://rpki.realmv6.org/). The
+ * BIRD takes over files |packets.c|, |rtr.c| (inside |rpki.c|), |transport.c|,
+ * |tcp_transport.c| and |ssh_transport.c| from RTRlib.
+ *
+ * A RPKI-RTR connection is described by a structure &rpki_cache. The main
+ * logic is located in |rpki_cache_change_state()| function. There is a state
+ * machine. The standard starting state flow looks like |Down| ~> |Connecting|
+ * ~> |Sync-Start| ~> |Sync-Running| ~> |Established| and then the last three
+ * states are periodically repeated.
+ *
+ * |Connecting| state establishes the transport connection. The state from a
+ * call |rpki_cache_change_state(CONNECTING)| to a call |rpki_connected_hook()|
+ *
+ * |Sync-Start| state starts with sending |Reset Query| or |Serial Query| and
+ * then waits for |Cache Response|. The state from |rpki_connected_hook()| to
+ * |rpki_handle_cache_response_pdu()|
+ *
+ * During |Sync-Running| BIRD receives data with IPv4/IPv6 Prefixes from cache
+ * server. The state starts from |rpki_handle_cache_response_pdu()| and ends
+ * in |rpki_handle_end_of_data_pdu()|.
+ *
+ * |Established| state means that BIRD has synced all data with cache server.
+ * Schedules a refresh timer event that invokes |Sync-Start|. Schedules Expire
+ * timer event and stops a Retry timer event.
+ *
+ * |Transport Error| state means that we have some troubles with a network
+ * connection. We cannot connect to a cache server or we wait too long for some
+ * expected PDU for received - |Cache Response| or |End of Data|. It closes
+ * current connection and schedules a Retry timer event.
+ *
+ * |Fatal Protocol Error| is occurred e.g. by received a bad Session ID. We
+ * restart a protocol, so all ROAs are flushed immediately.
+ *
+ * The RPKI-RTR protocol (RFC 6810 bis) defines configurable refresh, retry and
+ * expire intervals. For maintaining a connection are used timer events that
+ * are scheduled by |rpki_schedule_next_refresh()|,
+ * |rpki_schedule_next_retry()| and |rpki_schedule_next_expire()| functions.
+ *
+ * A Refresh timer event performs a sync of |Established| connection. So it
+ * shifts state to |Sync-Start|. If at the beginning of second call of a
+ * refresh event is connection in |Sync-Start| state then we didn't receive a
+ * |Cache Response| from a cache server and we invoke |Transport Error| state.
+ *
+ * A Retry timer event attempts to connect cache server. It is activated after
+ * |Transport Error| state and terminated by reaching |Established| state.
+ * If cache connection is still connecting to the cache server at the beginning
+ * of an event call then the Retry timer event invokes |Transport Error| state.
+ *
+ * An Expire timer event checks expiration of ROAs. If a last successful sync
+ * was more ago than the expire interval then the Expire timer event invokes a
+ * protocol restart thereby removes all ROAs learned from that cache server and
+ * continue trying to connect to cache server. The Expire event is activated
+ * by initial successful loading of ROAs, receiving End of Data PDU.
+ *
+ * A reconfiguration of cache connection works well without restarting when we
+ * change only intervals values.
+ *
+ * Supported standards:
+ * - RFC 6810 - main RPKI-RTR standard
+ * - RFC 6810 bis - an explicit timing parameters and protocol version number negotiation
+ */
+
+#include <stdlib.h>
+#include <netdb.h>
+
+#undef LOCAL_DEBUG
+
+#include "rpki.h"
+#include "lib/string.h"
+#include "nest/cli.h"
+
+/* Return values for reconfiguration functions */
+#define NEED_RESTART 		0
+#define SUCCESSFUL_RECONF 	1
+
+static int rpki_open_connection(struct rpki_cache *cache);
+static void rpki_close_connection(struct rpki_cache *cache);
+static void rpki_schedule_next_refresh(struct rpki_cache *cache);
+static void rpki_schedule_next_retry(struct rpki_cache *cache);
+static void rpki_schedule_next_expire_check(struct rpki_cache *cache);
+static void rpki_stop_refresh_timer_event(struct rpki_cache *cache);
+static void rpki_stop_retry_timer_event(struct rpki_cache *cache);
+static void rpki_stop_expire_timer_event(struct rpki_cache *cache);
+
+
+/*
+ * 	Routes handling
+ */
+
+void
+rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr)
+{
+  struct rpki_proto *p = cache->p;
+
+  rta a0 = {
+    .src = p->p.main_source,
+    .source = RTS_RPKI,
+    .scope = SCOPE_UNIVERSE,
+    .dest = RTD_NONE,
+  };
+
+  rta *a = rta_lookup(&a0);
+  rte *e = rte_get_temp(a);
+
+  e->pflags = 0;
+
+  rte_update2(channel, &pfxr->n, e, a0.src);
+}
+
+void
+rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr)
+{
+  struct rpki_proto *p = cache->p;
+  rte_update2(channel, &pfxr->n, NULL, p->p.main_source);
+}
+
+
+/*
+ *	RPKI Protocol Logic
+ */
+
+static const char *str_cache_states[] = {
+  [RPKI_CS_CONNECTING] 		= "Connecting",
+  [RPKI_CS_ESTABLISHED] 	= "Established",
+  [RPKI_CS_RESET] 		= "Reseting",
+  [RPKI_CS_SYNC_START] 		= "Sync-Start",
+  [RPKI_CS_SYNC_RUNNING] 	= "Sync-Running",
+  [RPKI_CS_FAST_RECONNECT] 	= "Fast-Reconnect",
+  [RPKI_CS_NO_INCR_UPDATE_AVAIL]= "No-Increment-Update-Available",
+  [RPKI_CS_ERROR_NO_DATA_AVAIL] = "Cache-Error-No-Data-Available",
+  [RPKI_CS_ERROR_FATAL] 	= "Fatal-Protocol-Error",
+  [RPKI_CS_ERROR_TRANSPORT] 	= "Transport-Error",
+  [RPKI_CS_SHUTDOWN] 		= "Down"
+};
+
+/**
+ * rpki_cache_state_to_str - give a text representation of cache state
+ * @state: A cache state
+ *
+ * The function converts logic cache state into string.
+ */
+const char *
+rpki_cache_state_to_str(enum rpki_cache_state state)
+{
+  return str_cache_states[state];
+}
+
+/**
+ * rpki_start_cache - connect to a cache server
+ * @cache: RPKI connection instance
+ *
+ * This function is a high level method to kick up a connection to a cache server.
+ */
+static void
+rpki_start_cache(struct rpki_cache *cache)
+{
+  rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
+}
+
+/**
+ * rpki_force_restart_proto - force shutdown and start protocol again
+ * @p: RPKI protocol instance
+ *
+ * This function calls shutdown and frees all protocol resources as well.
+ * After calling this function should be no operations with protocol data,
+ * they could be freed already.
+ */
+static void
+rpki_force_restart_proto(struct rpki_proto *p)
+{
+  if (p->cache)
+  {
+    CACHE_DBG(p->cache, "Connection object destroying");
+  }
+
+  /* Sign as freed */
+  p->cache = NULL;
+
+  proto_notify_state(&p->p, PS_DOWN);
+}
+
+/**
+ * rpki_cache_change_state - check and change cache state
+ * @cache: RPKI cache instance
+ * @new_state: suggested new state
+ *
+ * This function makes transitions between internal states.
+ * It represents the core of logic management of RPKI protocol.
+ * Cannot transit into the same state as cache is in already.
+ */
+void
+rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state new_state)
+{
+  const enum rpki_cache_state old_state = cache->state;
+
+  if (old_state == new_state)
+    return;
+
+  cache->state = new_state;
+  CACHE_TRACE(D_EVENTS, cache, "Changing from %s to %s state", rpki_cache_state_to_str(old_state), rpki_cache_state_to_str(new_state));
+
+  switch (new_state)
+  {
+  case RPKI_CS_CONNECTING:
+  {
+    sock *sk = cache->tr_sock->sk;
+
+    if (sk == NULL || sk->fd < 0)
+      rpki_open_connection(cache);
+    else
+      rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+
+    rpki_schedule_next_retry(cache);
+    break;
+  }
+
+  case RPKI_CS_ESTABLISHED:
+    rpki_schedule_next_refresh(cache);
+    rpki_schedule_next_expire_check(cache);
+    rpki_stop_retry_timer_event(cache);
+    break;
+
+  case RPKI_CS_RESET:
+    /* Resetting cache connection. */
+    cache->request_session_id = 1;
+    cache->serial_num = 0;
+    rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+    break;
+
+  case RPKI_CS_SYNC_START:
+    /* Requesting for receive ROAs from a cache server. */
+    if (cache->request_session_id)
+    {
+      /* Send request for Session ID */
+      if (rpki_send_reset_query(cache) != RPKI_SUCCESS)
+	rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    }
+    else
+    {
+      /* We have already a session_id. So send a Serial Query and start an incremental sync */
+      if (rpki_send_serial_query(cache) != RPKI_SUCCESS)
+	rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    }
+    break;
+
+  case RPKI_CS_SYNC_RUNNING:
+    /* The state between Cache Response and End of Data. Only waiting for
+     * receiving all IP Prefix PDUs and finally a End of Data PDU. */
+    break;
+
+  case RPKI_CS_NO_INCR_UPDATE_AVAIL:
+    /* Server was unable to answer the last Serial Query and sent Cache Reset. */
+    rpki_cache_change_state(cache, RPKI_CS_RESET);
+    break;
+
+  case RPKI_CS_ERROR_NO_DATA_AVAIL:
+    /* No validation records are available on the cache server. */
+    rpki_cache_change_state(cache, RPKI_CS_RESET);
+    break;
+
+  case RPKI_CS_ERROR_FATAL:
+    /* Fatal protocol error occurred. */
+    rpki_force_restart_proto(cache->p);
+    break;
+
+  case RPKI_CS_ERROR_TRANSPORT:
+    /* Error on the transport socket occurred. */
+    rpki_close_connection(cache);
+    rpki_schedule_next_retry(cache);
+    rpki_stop_refresh_timer_event(cache);
+    break;
+
+  case RPKI_CS_FAST_RECONNECT:
+    /* Reconnect without any waiting period */
+    rpki_close_connection(cache);
+    rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
+    break;
+
+  case RPKI_CS_SHUTDOWN:
+    bug("This isn't never really called.");
+    break;
+  };
+}
+
+
+/*
+ * 	RPKI Timer Events
+ */
+
+static void
+rpki_schedule_next_refresh(struct rpki_cache *cache)
+{
+  btime t = cache->refresh_interval S;
+
+  CACHE_DBG(cache, "after %t s", t);
+  tm_start(cache->refresh_timer, t);
+}
+
+static void
+rpki_schedule_next_retry(struct rpki_cache *cache)
+{
+  btime t = cache->retry_interval S;
+
+  CACHE_DBG(cache, "after %t s", t);
+  tm_start(cache->retry_timer, t);
+}
+
+static void
+rpki_schedule_next_expire_check(struct rpki_cache *cache)
+{
+  /* A minimum time to wait is 1 second */
+  btime t = cache->last_update + cache->expire_interval S - current_time();
+  t = MAX(t, 1 S);
+
+  CACHE_DBG(cache, "after %t s", t);
+  tm_start(cache->expire_timer, t);
+}
+
+static void
+rpki_stop_refresh_timer_event(struct rpki_cache *cache)
+{
+  CACHE_DBG(cache, "Stop");
+  tm_stop(cache->refresh_timer);
+}
+
+static void
+rpki_stop_retry_timer_event(struct rpki_cache *cache)
+{
+  CACHE_DBG(cache, "Stop");
+  tm_stop(cache->retry_timer);
+}
+
+static void UNUSED
+rpki_stop_expire_timer_event(struct rpki_cache *cache)
+{
+  CACHE_DBG(cache, "Stop");
+  tm_stop(cache->expire_timer);
+}
+
+static int
+rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache)
+{
+  if (!cache->last_rx_prefix)
+    return 0;
+
+  return ((current_time() - cache->last_rx_prefix) <= 2 S);
+}
+
+/**
+ * rpki_refresh_hook - control a scheduling of downloading data from cache server
+ * @tm: refresh timer with cache connection instance in data
+ *
+ * This function is periodically called during &ESTABLISHED or &SYNC* state
+ * cache connection.  The first refresh schedule is invoked after receiving a
+ * |End of Data| PDU and has run by some &ERROR is occurred.
+ */
+static void
+rpki_refresh_hook(timer *tm)
+{
+  struct rpki_cache *cache = tm->data;
+
+  CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
+
+  switch (cache->state)
+  {
+  case RPKI_CS_ESTABLISHED:
+    rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+    break;
+
+  case RPKI_CS_SYNC_START:
+    /* We sent Serial/Reset Query in last refresh hook call
+     * and didn't receive Cache Response yet. It is probably
+     * troubles with network. */
+  case RPKI_CS_SYNC_RUNNING:
+    /* We sent Serial/Reset Query in last refresh hook call
+     * and we got Cache Response but didn't get End-Of-Data yet.
+     * It could be a trouble with network or only too long synchronization. */
+    if (!rpki_do_we_recv_prefix_pdu_in_last_seconds(cache))
+    {
+      CACHE_TRACE(D_EVENTS, cache, "Sync takes more time than refresh interval %us, resetting connection", cache->refresh_interval);
+      rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    }
+    break;
+
+  default:
+    break;
+  }
+
+  if (cache->state != RPKI_CS_SHUTDOWN && cache->state != RPKI_CS_ERROR_TRANSPORT)
+    rpki_schedule_next_refresh(cache);
+  else
+    rpki_stop_refresh_timer_event(cache);
+}
+
+/**
+ * rpki_retry_hook - control a scheduling of retrying connection to cache server
+ * @tm: retry timer with cache connection instance in data
+ *
+ * This function is periodically called during &ERROR* state cache connection.
+ * The first retry schedule is invoked after any &ERROR* state occurred and
+ * ends by reaching of &ESTABLISHED state again.
+ */
+static void
+rpki_retry_hook(timer *tm)
+{
+  struct rpki_cache *cache = tm->data;
+
+  CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
+
+  switch (cache->state)
+  {
+  case RPKI_CS_ESTABLISHED:
+  case RPKI_CS_SHUTDOWN:
+    break;
+
+  case RPKI_CS_CONNECTING:
+  case RPKI_CS_SYNC_START:
+  case RPKI_CS_SYNC_RUNNING:
+    if (!rpki_do_we_recv_prefix_pdu_in_last_seconds(cache))
+    {
+      /* We tried to establish a connection in last retry hook call and haven't done
+       * yet. It looks like troubles with network. We are aggressive here. */
+      CACHE_TRACE(D_EVENTS, cache, "Sync takes more time than retry interval %us, resetting connection.", cache->retry_interval);
+      rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    }
+    break;
+
+  default:
+    rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
+    break;
+  }
+
+  if (cache->state != RPKI_CS_ESTABLISHED)
+    rpki_schedule_next_retry(cache);
+  else
+    rpki_stop_retry_timer_event(cache);
+}
+
+/**
+ * rpki_expire_hook - control a expiration of ROA entries
+ * @tm: expire timer with cache connection instance in data
+ *
+ * This function is scheduled after received a |End of Data| PDU.
+ * A waiting interval is calculated dynamically by last update.
+ * If we reach an expiration time then we invoke a restarting
+ * of the protocol.
+ */
+static void
+rpki_expire_hook(timer *tm)
+{
+  struct rpki_cache *cache = tm->data;
+
+  if (!cache->last_update)
+    return;
+
+  CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
+
+  btime t = cache->last_update + cache->expire_interval S - current_time();
+  if (t <= 0)
+  {
+    CACHE_TRACE(D_EVENTS, cache, "All ROAs expired");
+    rpki_force_restart_proto(cache->p);
+  }
+  else
+  {
+    CACHE_DBG(cache, "Remains %t seconds to become ROAs obsolete", t);
+    rpki_schedule_next_expire_check(cache);
+  }
+}
+
+/**
+ * rpki_check_refresh_interval - check validity of refresh interval value
+ * @seconds: suggested value
+ *
+ * This function validates value and should return |NULL|.
+ * If the check doesn't pass then returns error message.
+ */
+const char *
+rpki_check_refresh_interval(uint seconds)
+{
+  if (seconds < 1)
+    return "Minimum allowed refresh interval is 1 second";
+  if (seconds > 86400)
+    return "Maximum allowed refresh interval is 86400 seconds";
+  return NULL;
+}
+
+/**
+ * rpki_check_retry_interval - check validity of retry interval value
+ * @seconds: suggested value
+ *
+ * This function validates value and should return |NULL|.
+ * If the check doesn't pass then returns error message.
+ */
+const char *
+rpki_check_retry_interval(uint seconds)
+{
+  if (seconds < 1)
+    return "Minimum allowed retry interval is 1 second";
+  if (seconds > 7200)
+    return "Maximum allowed retry interval is 7200 seconds";
+  return NULL;
+}
+
+/**
+ * rpki_check_expire_interval - check validity of expire interval value
+ * @seconds: suggested value
+ *
+ * This function validates value and should return |NULL|.
+ * If the check doesn't pass then returns error message.
+ */
+const char *
+rpki_check_expire_interval(uint seconds)
+{
+  if (seconds < 600)
+    return "Minimum allowed expire interval is 600 seconds";
+  if (seconds > 172800)
+    return "Maximum allowed expire interval is 172800 seconds";
+  return NULL;
+}
+
+
+/*
+ * 	RPKI Cache
+ */
+
+static struct rpki_cache *
+rpki_init_cache(struct rpki_proto *p, struct rpki_config *cf)
+{
+  pool *pool = rp_new(p->p.pool, cf->hostname);
+
+  struct rpki_cache *cache = mb_allocz(pool, sizeof(struct rpki_cache));
+
+  cache->pool = pool;
+  cache->p = p;
+
+  cache->state = RPKI_CS_SHUTDOWN;
+  cache->request_session_id = 1;
+  cache->version = RPKI_MAX_VERSION;
+
+  cache->refresh_interval = cf->refresh_interval;
+  cache->retry_interval = cf->retry_interval;
+  cache->expire_interval = cf->expire_interval;
+  cache->refresh_timer = tm_new_init(pool, &rpki_refresh_hook, cache, 0, 0);
+  cache->retry_timer = tm_new_init(pool, &rpki_retry_hook, cache, 0, 0);
+  cache->expire_timer = tm_new_init(pool, &rpki_expire_hook, cache, 0, 0);
+
+  cache->tr_sock = mb_allocz(pool, sizeof(struct rpki_tr_sock));
+  cache->tr_sock->cache = cache;
+
+  switch (cf->tr_config.type)
+  {
+  case RPKI_TR_TCP: rpki_tr_tcp_init(cache->tr_sock); break;
+  case RPKI_TR_SSH: rpki_tr_ssh_init(cache->tr_sock); break;
+  };
+
+  CACHE_DBG(cache, "Connection object created");
+
+  return cache;
+}
+
+/**
+ * rpki_get_cache_ident - give a text representation of cache server name
+ * @cache: RPKI connection instance
+ *
+ * The function converts cache connection into string.
+ */
+const char *
+rpki_get_cache_ident(struct rpki_cache *cache)
+{
+  return rpki_tr_ident(cache->tr_sock);
+}
+
+static int
+rpki_open_connection(struct rpki_cache *cache)
+{
+  CACHE_TRACE(D_EVENTS, cache, "Opening a connection");
+
+  if (rpki_tr_open(cache->tr_sock) == RPKI_TR_ERROR)
+  {
+    rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+    return RPKI_TR_ERROR;
+  }
+
+  return RPKI_TR_SUCCESS;
+}
+
+static void
+rpki_close_connection(struct rpki_cache *cache)
+{
+  CACHE_TRACE(D_EVENTS, cache, "Closing a connection");
+  rpki_tr_close(cache->tr_sock);
+  proto_notify_state(&cache->p->p, PS_START);
+}
+
+static int
+rpki_shutdown(struct proto *P)
+{
+  struct rpki_proto *p = (void *) P;
+
+  rpki_force_restart_proto(p);
+
+  /* Protocol memory pool will be automatically freed */
+  return PS_DOWN;
+}
+
+
+/*
+ * 	RPKI Reconfiguration
+ */
+
+static int
+rpki_try_fast_reconnect(struct rpki_cache *cache)
+{
+  if (cache->state == RPKI_CS_ESTABLISHED)
+  {
+    rpki_cache_change_state(cache, RPKI_CS_FAST_RECONNECT);
+    return SUCCESSFUL_RECONF;
+  }
+
+  return NEED_RESTART;
+}
+
+/**
+ * rpki_reconfigure_cache - a cache reconfiguration
+ * @p: RPKI protocol instance
+ * @cache: a cache connection
+ * @new: new RPKI configuration
+ * @old: old RPKI configuration
+ *
+ * This function reconfigures existing single cache server connection with new
+ * existing configuration.  Generally, a change of time intervals could be
+ * reconfigured without restarting and all others changes requires a restart of
+ * protocol.  Returns |NEED_TO_RESTART| or |SUCCESSFUL_RECONF|.
+ */
+static int
+rpki_reconfigure_cache(struct rpki_proto *p UNUSED, struct rpki_cache *cache, struct rpki_config *new, struct rpki_config *old)
+{
+  u8 try_fast_reconnect = 0;
+
+  if (strcmp(old->hostname, new->hostname) != 0)
+  {
+    CACHE_TRACE(D_EVENTS, cache, "Cache server address changed to %s", new->hostname);
+    return NEED_RESTART;
+  }
+
+  if (old->port != new->port)
+  {
+    CACHE_TRACE(D_EVENTS, cache, "Cache server port changed to %u", new->port);
+    return NEED_RESTART;
+  }
+
+  if (old->tr_config.type != new->tr_config.type)
+  {
+    CACHE_TRACE(D_EVENTS, cache, "Transport type changed");
+    return NEED_RESTART;
+  }
+  else if (new->tr_config.type == RPKI_TR_SSH)
+  {
+    struct rpki_tr_ssh_config *ssh_old = (void *) old->tr_config.spec;
+    struct rpki_tr_ssh_config *ssh_new = (void *) new->tr_config.spec;
+    if ((strcmp(ssh_old->bird_private_key, ssh_new->bird_private_key) != 0) ||
+	(strcmp(ssh_old->cache_public_key, ssh_new->cache_public_key) != 0) ||
+	(strcmp(ssh_old->user, ssh_new->user) != 0))
+    {
+      CACHE_TRACE(D_EVENTS, cache, "Settings of SSH transport configuration changed");
+      try_fast_reconnect = 1;
+    }
+  }
+
+#define TEST_INTERVAL(name, Name) 						\
+    if (cache->name##_interval != new->name##_interval ||			\
+	old->keep_##name##_interval != new->keep_##name##_interval) 		\
+    { 										\
+      cache->name##_interval = new->name##_interval;				\
+      CACHE_TRACE(D_EVENTS, cache, #Name " interval changed to %u seconds %s", cache->name##_interval, (new->keep_##name##_interval ? "and keep it" : "")); \
+      try_fast_reconnect = 1; 							\
+    }
+  TEST_INTERVAL(refresh, Refresh);
+  TEST_INTERVAL(retry, Retry);
+  TEST_INTERVAL(expire, Expire);
+#undef TEST_INTERVAL
+
+  if (try_fast_reconnect)
+    return rpki_try_fast_reconnect(cache);
+
+  return SUCCESSFUL_RECONF;
+}
+
+/**
+ * rpki_reconfigure - a protocol reconfiguration hook
+ * @P: a protocol instance
+ * @CF: a new protocol configuration
+ *
+ * This function reconfigures whole protocol.
+ * It sets new protocol configuration into a protocol structure.
+ * Returns |NEED_TO_RESTART| or |SUCCESSFUL_RECONF|.
+ */
+static int
+rpki_reconfigure(struct proto *P, struct proto_config *CF)
+{
+  struct rpki_proto *p = (void *) P;
+  struct rpki_config *new = (void *) CF;
+  struct rpki_config *old = (void *) p->p.cf;
+  struct rpki_cache *cache = p->cache;
+
+  if (!proto_configure_channel(&p->p, &p->roa4_channel, proto_cf_find_channel(CF, NET_ROA4)) ||
+      !proto_configure_channel(&p->p, &p->roa6_channel, proto_cf_find_channel(CF, NET_ROA6)))
+    return NEED_RESTART;
+
+  if (rpki_reconfigure_cache(p, cache, new, old) != SUCCESSFUL_RECONF)
+    return NEED_RESTART;
+
+  return SUCCESSFUL_RECONF;
+}
+
+
+/*
+ * 	RPKI Protocol Glue
+ */
+
+static struct proto *
+rpki_init(struct proto_config *CF)
+{
+  struct proto *P = proto_new(CF);
+  struct rpki_proto *p = (void *) P;
+
+  proto_configure_channel(&p->p, &p->roa4_channel, proto_cf_find_channel(CF, NET_ROA4));
+  proto_configure_channel(&p->p, &p->roa6_channel, proto_cf_find_channel(CF, NET_ROA6));
+
+  return P;
+}
+
+static int
+rpki_start(struct proto *P)
+{
+  struct rpki_proto *p = (void *) P;
+  struct rpki_config *cf = (void *) P->cf;
+
+  p->cache = rpki_init_cache(p, cf);
+  rpki_start_cache(p->cache);
+
+  return PS_START;
+}
+
+static void
+rpki_get_status(struct proto *P, byte *buf)
+{
+  struct rpki_proto *p = (struct rpki_proto *) P;
+
+  if (P->proto_state == PS_DOWN)
+  {
+    *buf = 0;
+    return;
+  }
+
+  if (p->cache)
+    bsprintf(buf, "%s", rpki_cache_state_to_str(p->cache->state));
+  else
+    bsprintf(buf, "No cache server configured");
+}
+
+static void
+rpki_show_proto_info_timer(const char *name, uint num, timer *t)
+{
+  if (tm_active(t))
+    cli_msg(-1006, "  %-16s: %t/%u", name, tm_remains(t), num);
+  else
+    cli_msg(-1006, "  %-16s: ---", name);
+}
+
+static void
+rpki_show_proto_info(struct proto *P)
+{
+  struct rpki_proto *p = (struct rpki_proto *) P;
+  struct rpki_config *cf = (void *) p->p.cf;
+  struct rpki_cache *cache = p->cache;
+
+  if (P->proto_state == PS_DOWN)
+    return;
+
+  if (cache)
+  {
+    const char *transport_name = "---";
+
+    switch (cf->tr_config.type)
+    {
+    case RPKI_TR_SSH: transport_name = "SSHv2"; break;
+    case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break;
+    };
+
+    cli_msg(-1006, "  Cache server:     %s", rpki_get_cache_ident(cache));
+    cli_msg(-1006, "  Status:           %s", rpki_cache_state_to_str(cache->state));
+    cli_msg(-1006, "  Transport:        %s", transport_name);
+    cli_msg(-1006, "  Protocol version: %u", cache->version);
+
+    if (cache->request_session_id)
+      cli_msg(-1006, "  Session ID:       ---");
+    else
+      cli_msg(-1006, "  Session ID:       %u", cache->session_id);
+
+    if (cache->last_update)
+    {
+      cli_msg(-1006, "  Serial number:    %u", cache->serial_num);
+      cli_msg(-1006, "  Last update:      before %t s", current_time() - cache->last_update);
+    }
+    else
+    {
+      cli_msg(-1006, "  Serial number:    ---");
+      cli_msg(-1006, "  Last update:      ---");
+    }
+
+    rpki_show_proto_info_timer("Refresh timer", cache->refresh_interval, cache->refresh_timer);
+    rpki_show_proto_info_timer("Retry timer", cache->retry_interval, cache->retry_timer);
+    rpki_show_proto_info_timer("Expire timer", cache->expire_interval, cache->expire_timer);
+
+    if (p->roa4_channel)
+      channel_show_info(p->roa4_channel);
+    else
+      cli_msg(-1006, "  No roa4 channel");
+
+    if (p->roa6_channel)
+      channel_show_info(p->roa6_channel);
+    else
+      cli_msg(-1006, "  No roa6 channel");
+  }
+}
+
+
+/*
+ * 	RPKI Protocol Configuration
+ */
+
+/**
+ * rpki_check_config - check and complete configuration of RPKI protocol
+ * @cf: RPKI configuration
+ *
+ * This function is called at the end of parsing RPKI protocol configuration.
+ */
+void
+rpki_check_config(struct rpki_config *cf)
+{
+  /* Do not check templates at all */
+  if (cf->c.class == SYM_TEMPLATE)
+    return;
+
+  if (ipa_zero(cf->ip) && cf->hostname == NULL)
+    cf_error("IP address or hostname of cache server must be set");
+
+  /* Set default transport type */
+  if (cf->tr_config.spec == NULL)
+  {
+    cf->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_tcp_config));
+    cf->tr_config.type = RPKI_TR_TCP;
+  }
+
+  if (cf->port == 0)
+  {
+    /* Set default port numbers */
+    switch (cf->tr_config.type)
+    {
+    case RPKI_TR_SSH:
+      cf->port = RPKI_SSH_PORT;
+      break;
+    default:
+      cf->port = RPKI_TCP_PORT;
+    }
+  }
+}
+
+static void
+rpki_postconfig(struct proto_config *CF)
+{
+  /* Define default channel */
+  if (EMPTY_LIST(CF->channels))
+    channel_config_new(NULL, CF->net_type, CF);
+}
+
+static void
+rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
+{
+  /* FIXME: Should copy transport */
+}
+
+struct protocol proto_rpki = {
+  .name = 		"RPKI",
+  .template = 		"rpki%d",
+  .preference = 	DEF_PREF_RPKI,
+  .proto_size = 	sizeof(struct rpki_proto),
+  .config_size =	sizeof(struct rpki_config),
+  .init = 		rpki_init,
+  .start = 		rpki_start,
+  .postconfig = 	rpki_postconfig,
+  .channel_mask =	(NB_ROA4 | NB_ROA6),
+  .show_proto_info =	rpki_show_proto_info,
+  .shutdown = 		rpki_shutdown,
+  .copy_config = 	rpki_copy_config,
+  .reconfigure = 	rpki_reconfigure,
+  .get_status = 	rpki_get_status,
+};
diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h
new file mode 100644
index 00000000..8972b33a
--- /dev/null
+++ b/proto/rpki/rpki.h
@@ -0,0 +1,165 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	Using RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_RPKI_H_
+#define _BIRD_RPKI_H_
+
+#include "nest/bird.h"
+#include "nest/route.h"
+#include "nest/protocol.h"
+#include "lib/socket.h"
+#include "lib/ip.h"
+
+#include "transport.h"
+#include "packets.h"
+
+#define RPKI_TCP_PORT		323
+#define RPKI_SSH_PORT		22
+#define RPKI_RETRY_INTERVAL	600
+#define RPKI_REFRESH_INTERVAL	3600
+#define RPKI_EXPIRE_INTERVAL	7200
+
+#define RPKI_VERSION_0		0
+#define RPKI_VERSION_1		1
+#define RPKI_MAX_VERSION 	RPKI_VERSION_1
+
+
+/*
+ * 	RPKI Cache
+ */
+
+enum rpki_cache_state {
+  RPKI_CS_CONNECTING, 			/* Socket is establishing the transport connection. */
+  RPKI_CS_ESTABLISHED,			/* Connection is established, socket is waiting for a Serial Notify or expiration of the refresh_interval timer */
+  RPKI_CS_RESET,			/* Resetting RTR connection. */
+  RPKI_CS_SYNC_START,			/* Sending a Serial/Reset Query PDU and expecting a Cache Response PDU */
+  RPKI_CS_SYNC_RUNNING,			/* Receiving validation records from the RTR server. A state between Cache Response PDU and End of Data PDU */
+  RPKI_CS_FAST_RECONNECT,		/* Reconnect without any waiting period */
+  RPKI_CS_NO_INCR_UPDATE_AVAIL, 	/* Server is unable to answer the last Serial Query and sent Cache Reset. */
+  RPKI_CS_ERROR_NO_DATA_AVAIL,		/* Server is unable to answer either a Serial Query or a Reset Query because it has no useful data available at this time. */
+  RPKI_CS_ERROR_FATAL,			/* Fatal protocol error occurred. */
+  RPKI_CS_ERROR_TRANSPORT,		/* Error on the transport socket occurred. */
+  RPKI_CS_SHUTDOWN,			/* RTR Socket is stopped. */
+};
+
+struct rpki_cache {
+  pool *pool;				/* Pool containing cache objects */
+  struct rpki_proto *p;
+
+  struct rpki_tr_sock *tr_sock;		/* Transport specific socket */
+  enum rpki_cache_state state;		/* RPKI_CS_* */
+  u32 session_id;
+  u8 request_session_id;		/* 1: have to request new session id; 0: we have already received session id */
+  u32 serial_num;			/* Serial number denotes the logical version of data from cache server */
+  u8 version;				/* Protocol version */
+  btime last_update;			/* Last successful synchronization with cache server */
+  btime last_rx_prefix;			/* Last received prefix PDU */
+
+  /* Intervals can be changed by cache server on the fly */
+  u32 refresh_interval;			/* Actual refresh interval (in seconds) */
+  u32 retry_interval;
+  u32 expire_interval;
+  timer *retry_timer;			/* Retry timer event */
+  timer *refresh_timer;			/* Refresh timer event */
+  timer *expire_timer;			/* Expire timer event */
+};
+
+const char *rpki_get_cache_ident(struct rpki_cache *cache);
+const char *rpki_cache_state_to_str(enum rpki_cache_state state);
+
+
+/*
+ * 	Routes handling
+ */
+
+void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr);
+void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr);
+
+
+/*
+ *	RPKI Protocol Logic
+ */
+
+void rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state new_state);
+
+
+/*
+ * 	RPKI Timer Events
+ */
+
+const char *rpki_check_refresh_interval(uint seconds);
+const char *rpki_check_retry_interval(uint seconds);
+const char *rpki_check_expire_interval(uint seconds);
+
+
+/*
+ * 	RPKI Protocol Configuration
+ */
+
+struct rpki_proto {
+  struct proto p;
+  struct rpki_cache *cache;
+
+  struct channel *roa4_channel;
+  struct channel *roa6_channel;
+  u8 refresh_channels;			/* For non-incremental updates using rt_refresh_begin(), rt_refresh_end() */
+};
+
+struct rpki_config {
+  struct proto_config c;
+  const char *hostname;			/* Full domain name or stringified IP address of cache server */
+  ip_addr ip;				/* IP address of cache server or IPA_NONE */
+  u16 port;				/* Port number of cache server */
+  struct rpki_tr_config tr_config;	/* Specific transport configuration structure */
+  u32 refresh_interval;			/* Time interval (in seconds) for periodical downloading data from cache server */
+  u32 retry_interval;			/* Time interval (in seconds) for an unreachable server */
+  u32 expire_interval;			/* Maximal lifetime (in seconds) of ROAs without any successful refreshment */
+  u8 keep_refresh_interval:1;		/* Do not overwrite refresh interval by cache server update */
+  u8 keep_retry_interval:1;		/* Do not overwrite retry interval by cache server update */
+  u8 keep_expire_interval:1;		/* Do not overwrite expire interval by cache server update */
+};
+
+void rpki_check_config(struct rpki_config *cf);
+
+
+/*
+ *	Logger
+ */
+
+#define RPKI_LOG(log_level, rpki, msg, args...) 			\
+    do { 								\
+      log(log_level "%s: " msg, (rpki)->p.name , ## args); 		\
+    } while(0)
+
+#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG)
+#define CACHE_DBG(cache,msg,args...) 					\
+    do { 								\
+      RPKI_LOG(L_DEBUG, (cache)->p, "%s [%s] %s " msg, rpki_get_cache_ident(cache), rpki_cache_state_to_str((cache)->state), __func__, ## args); \
+    } while(0)
+#else
+#define CACHE_DBG(cache,msg,args...) do { } while(0)
+#endif
+
+#define RPKI_TRACE(level,rpki,msg,args...) 				\
+    do {								\
+      if ((rpki)->p.debug & level)					\
+        RPKI_LOG(L_TRACE, rpki, msg, ## args);				\
+    } while(0)
+
+#define CACHE_TRACE(level,cache,msg,args...)				\
+    do {								\
+      if ((cache)->p->p.debug & level)					\
+        RPKI_LOG(L_TRACE, (cache)->p, msg, ## args); 			\
+    } while(0)
+
+#define RPKI_WARN(p, msg, args...) RPKI_LOG(L_WARN, p, msg, ## args);
+
+#endif /* _BIRD_RPKI_H_ */
diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c
new file mode 100644
index 00000000..cd49ab90
--- /dev/null
+++ b/proto/rpki/ssh_transport.c
@@ -0,0 +1,75 @@
+/*
+ *	BIRD -- An implementation of the SSH protocol for the RPKI transport
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	This file was a part of RTRlib: http://rpki.realmv6.org/
+ *	This transport implementation uses libssh (http://www.libssh.org/)
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "rpki.h"
+
+static int
+rpki_tr_ssh_open(struct rpki_tr_sock *tr)
+{
+  struct rpki_cache *cache = tr->cache;
+  struct rpki_config *cf = (void *) cache->p->p.cf;
+  struct rpki_tr_ssh_config *ssh_cf = (void *) cf->tr_config.spec;
+  sock *sk = tr->sk;
+
+  sk->type = SK_SSH_ACTIVE;
+  sk->ssh = mb_allocz(sk->pool, sizeof(struct ssh_sock));
+  sk->ssh->username = ssh_cf->user;
+  sk->ssh->client_privkey_path = ssh_cf->bird_private_key;
+  sk->ssh->server_hostkey_path = ssh_cf->cache_public_key;
+  sk->ssh->subsystem = "rpki-rtr";
+  sk->ssh->state = SK_SSH_CONNECT;
+
+  if (sk_open(sk) != 0)
+    return RPKI_TR_ERROR;
+
+  return RPKI_TR_SUCCESS;
+}
+
+static const char *
+rpki_tr_ssh_ident(struct rpki_tr_sock *tr)
+{
+  ASSERT(tr != NULL);
+
+  struct rpki_cache *cache = tr->cache;
+  struct rpki_config *cf = (void *) cache->p->p.cf;
+  struct rpki_tr_ssh_config *ssh_cf = (void *) cf->tr_config.spec;
+
+  if (tr->ident != NULL)
+    return tr->ident;
+
+  const char *username = ssh_cf->user;
+  const char *host = cf->hostname;
+  u16 port = cf->port;
+
+  size_t len = strlen(username) + 1 + strlen(host) + 1 + 5 + 1; /* <user> + '@' + <host> + ':' + <port> + '\0' */
+  char *ident = mb_alloc(cache->pool, len);
+  bsnprintf(ident, len, "%s@%s:%u", username, host, port);
+  tr->ident = ident;
+
+  return tr->ident;
+}
+
+/**
+ * rpki_tr_ssh_init - initializes the RPKI transport structure for a SSH connection
+ * @tr: allocated RPKI transport structure
+ */
+void
+rpki_tr_ssh_init(struct rpki_tr_sock *tr)
+{
+  tr->open_fp = &rpki_tr_ssh_open;
+  tr->ident_fp = &rpki_tr_ssh_ident;
+}
diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c
new file mode 100644
index 00000000..6c05964a
--- /dev/null
+++ b/proto/rpki/tcp_transport.c
@@ -0,0 +1,78 @@
+/*
+ *	BIRD -- An implementation of the TCP protocol for the RPKI protocol transport
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "rpki.h"
+#include "sysdep/unix/unix.h"
+
+static int
+rpki_tr_tcp_open(struct rpki_tr_sock *tr)
+{
+  sock *sk = tr->sk;
+
+  sk->type = SK_TCP_ACTIVE;
+
+  if (sk_open(sk) != 0)
+    return RPKI_TR_ERROR;
+
+  return RPKI_TR_SUCCESS;
+}
+
+static const char *
+rpki_tr_tcp_ident(struct rpki_tr_sock *tr)
+{
+  ASSERT(tr != NULL);
+
+  struct rpki_cache *cache = tr->cache;
+  struct rpki_config *cf = (void *) cache->p->p.cf;
+
+  if (tr->ident != NULL)
+    return tr->ident;
+
+  const char *host = cf->hostname;
+  ip_addr ip = cf->ip;
+  u16 port = cf->port;
+
+  size_t colon_and_port_len = 6; /* max ":65535" */
+  size_t ident_len;
+  if (host)
+    ident_len = strlen(host) + colon_and_port_len + 1;
+  else
+    ident_len = IPA_MAX_TEXT_LENGTH + colon_and_port_len + 1;
+
+  char *ident = mb_alloc(cache->pool, ident_len);
+  if (host)
+    bsnprintf(ident, ident_len, "%s:%u", host, port);
+  else
+    bsnprintf(ident, ident_len, "%I:%u", ip, port);
+
+  tr->ident = ident;
+  return tr->ident;
+}
+
+/**
+ * rpki_tr_tcp_init - initializes the RPKI transport structure for a TCP connection
+ * @tr: allocated RPKI transport structure
+ */
+void
+rpki_tr_tcp_init(struct rpki_tr_sock *tr)
+{
+  tr->open_fp = &rpki_tr_tcp_open;
+  tr->ident_fp = &rpki_tr_tcp_ident;
+}
diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c
new file mode 100644
index 00000000..182667be
--- /dev/null
+++ b/proto/rpki/transport.c
@@ -0,0 +1,135 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "rpki.h"
+#include "transport.h"
+#include "sysdep/unix/unix.h"
+
+/**
+ * rpki_hostname_autoresolv - auto-resolve an IP address from a hostname
+ * @host: domain name of host, e.g. "rpki-validator.realmv6.org"
+ *
+ * This function resolves an IP address from a hostname.
+ * Returns &ip_addr structure with IP address or |IPA_NONE|.
+ */
+static ip_addr
+rpki_hostname_autoresolv(const char *host)
+{
+  ip_addr addr = {};
+  struct addrinfo *res;
+  struct addrinfo hints = {
+      .ai_family = AF_UNSPEC,
+      .ai_socktype = SOCK_STREAM,
+      .ai_flags = AI_ADDRCONFIG,
+  };
+
+  if (!host)
+    return IPA_NONE;
+
+  int err_code = getaddrinfo(host, NULL, &hints, &res);
+  if (err_code != 0)
+  {
+    log(L_DEBUG "getaddrinfo failed: %s", gai_strerror(err_code));
+    return IPA_NONE;
+  }
+
+  sockaddr sa = {
+      .sa = *res->ai_addr,
+  };
+
+  uint unused;
+  sockaddr_read(&sa, res->ai_family, &addr, NULL, &unused);
+
+  freeaddrinfo(res);
+  return addr;
+}
+
+/**
+ * rpki_tr_open - prepare and open a socket connection
+ * @tr: initialized transport socket
+ *
+ * Prepare and open a socket connection specified by @tr that must be initialized before.
+ * This function ends with a calling the sk_open() function.
+ * Returns RPKI_TR_SUCCESS or RPKI_TR_ERROR.
+ */
+int
+rpki_tr_open(struct rpki_tr_sock *tr)
+{
+  struct rpki_cache *cache = tr->cache;
+  struct rpki_config *cf = (void *) cache->p->p.cf;
+
+  ASSERT(tr->sk == NULL);
+  tr->sk = sk_new(cache->pool);
+  sock *sk = tr->sk;
+
+  /* sk->type -1 is invalid value, a correct value MUST be set in the specific transport layer in open_fp() hook */
+  sk->type = -1;
+
+  sk->tx_hook = rpki_connected_hook;
+  sk->err_hook = rpki_err_hook;
+  sk->data = cache;
+  sk->daddr = cf->ip;
+  sk->dport = cf->port;
+  sk->host = cf->hostname;
+  sk->rbsize = RPKI_RX_BUFFER_SIZE;
+  sk->tbsize = RPKI_TX_BUFFER_SIZE;
+  sk->tos = IP_PREC_INTERNET_CONTROL;
+
+  if (ipa_zero2(sk->daddr) && sk->host)
+  {
+    sk->daddr = rpki_hostname_autoresolv(sk->host);
+    if (ipa_zero(sk->daddr))
+    {
+      CACHE_TRACE(D_EVENTS, cache, "Cannot resolve the hostname '%s'", sk->host);
+      return RPKI_TR_ERROR;
+    }
+  }
+
+  return tr->open_fp(tr);
+}
+
+/**
+ * rpki_tr_close - close socket and prepare it for possible next open
+ * @tr: successfully opened transport socket
+ *
+ * Close socket and free resources.
+ */
+void
+rpki_tr_close(struct rpki_tr_sock *tr)
+{
+  if (tr->ident)
+  {
+    mb_free((char *) tr->ident);
+    tr->ident = NULL;
+  }
+
+  if (tr->sk)
+  {
+    rfree(tr->sk);
+    tr->sk = NULL;
+  }
+}
+
+/**
+ * rpki_tr_ident - Returns a string identifier for the rpki transport socket
+ * @tr: successfully opened transport socket
+ *
+ * Returns a \0 terminated string identifier for the socket endpoint, e.g. "<host>:<port>".
+ * Memory is allocated inside @tr structure.
+ */
+inline const char *
+rpki_tr_ident(struct rpki_tr_sock *tr)
+{
+  return tr->ident_fp(tr);
+}
diff --git a/proto/rpki/transport.h b/proto/rpki/transport.h
new file mode 100644
index 00000000..f90b7e42
--- /dev/null
+++ b/proto/rpki/transport.h
@@ -0,0 +1,79 @@
+/*
+ *	BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ *	(c) 2015 CZ.NIC
+ *	(c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ *	This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ *	Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/*
+ * The RPKI transport sockets implement the communication channel
+ * (e.g., SSH, TCP, TCP-AO) between an RPKI server and client.
+ *
+ * Before using the transport socket, a tr_socket must be
+ * initialized based on a protocol-dependent init function (e.g.,
+ * rpki_tr_tcp_init()).
+ *
+ * The rpki_tr_* functions call the corresponding function pointers, which are
+ * passed in the rpki_tr_sock structure, and forward the remaining arguments.
+ */
+
+#ifndef _BIRD_RPKI_TRANSPORT_H_
+#define _BIRD_RPKI_TRANSPORT_H_
+
+#include <time.h>
+
+/* The return values for rpki_tr_ functions */
+enum rpki_tr_rtvals {
+  RPKI_TR_SUCCESS 		= 0,	/* Operation was successful */
+  RPKI_TR_ERROR 		= -1,	/* Error occurred */
+  RPKI_TR_WOULDBLOCK 		= -2,	/* No data is available on the socket */
+  RPKI_TR_INTR 			= -3,	/* Call was interrupted from a signal */
+  RPKI_TR_CLOSED 		= -4	/* Connection closed */
+};
+
+/* A transport socket structure */
+struct rpki_tr_sock {
+  sock *sk;				/* Standard BIRD socket */
+  struct rpki_cache *cache;		/* Cache server */
+  int (*open_fp)(struct rpki_tr_sock *);	  /* Function that establishes the socket connection */
+  const char *(*ident_fp)(struct rpki_tr_sock *); /* Function that returns an identifier for the socket endpoint */
+  const char *ident;			/* Internal. Use ident_fp() hook instead of this pointer */
+};
+
+int rpki_tr_open(struct rpki_tr_sock *tr);
+void rpki_tr_close(struct rpki_tr_sock *tr);
+const char *rpki_tr_ident(struct rpki_tr_sock *tr);
+
+/* Types of supported transports */
+enum rpki_tr_type {
+  RPKI_TR_TCP,				/* Unprotected transport over TCP */
+  RPKI_TR_SSH,				/* Protected transport by SSHv2 connection */
+};
+
+/* Common configure structure for transports */
+struct rpki_tr_config {
+  enum rpki_tr_type type;		/* RPKI_TR_TCP or RPKI_TR_SSH */
+  const void *spec;			/* Specific transport configuration, i.e. rpki_tr_tcp_config or rpki_tr_ssh_config */
+};
+
+struct rpki_tr_tcp_config {
+  /* No internal configuration data */
+};
+
+struct rpki_tr_ssh_config {
+  const char *bird_private_key;		/* Filepath to the BIRD server private key */
+  const char *cache_public_key;		/* Filepath to the public key of cache server, can be file known_hosts */
+  const char *user;			/* Username for SSH connection */
+};
+
+/* ssh_transport.c */
+void rpki_tr_ssh_init(struct rpki_tr_sock *tr);
+
+/* tcp_transport.c */
+void rpki_tr_tcp_init(struct rpki_tr_sock *tr);
+
+#endif /* _BIRD_RPKI_TRANSPORT_H_ */
diff --git a/proto/static/Makefile b/proto/static/Makefile
index 61fadbea..e38f9b74 100644
--- a/proto/static/Makefile
+++ b/proto/static/Makefile
@@ -1,6 +1,6 @@
-source=static.c
-root-rel=../../
-dir-name=proto/static
-
-include ../../Rules
+src := static.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
 
+tests_objs := $(tests_objs) $(src-o-files)
+\ No newline at end of file
diff --git a/proto/static/config.Y b/proto/static/config.Y
index 182721b3..66e5ea4c 100644
--- a/proto/static/config.Y
+++ b/proto/static/config.Y
@@ -13,98 +13,119 @@ CF_HDR
 CF_DEFINES
 
 #define STATIC_CFG ((struct static_config *) this_proto)
-static struct static_route *this_srt, *this_srt_nh, *last_srt_nh;
+static struct static_route *this_srt, *this_snh;
 static struct f_inst **this_srt_last_cmd;
 
+static struct static_route *
+static_nexthop_new(void)
+{
+  struct static_route *nh = this_srt;
+
+  if (this_snh)
+  {
+    /* Additional next hop */
+    nh = cfg_allocz(sizeof(struct static_route));
+    nh->net = this_srt->net;
+    this_snh->mp_next = nh;
+  }
+
+  nh->dest = RTD_UNICAST;
+  nh->mp_head = this_srt;
+  return nh;
+};
+
 static void
 static_route_finish(void)
 {
-  struct static_route *r;
-
-  /* Update undefined use_bfd entries in multipath nexthops */
-  if (this_srt->dest == RTD_MULTIPATH)
-    for (r = this_srt->mp_next; r; r = r->mp_next)
-      if (r->use_bfd < 0)
-        r->use_bfd = this_srt->use_bfd;
+  if (net_type_match(this_srt->net, NB_DEST) == !this_srt->dest)
+    cf_error("Unexpected or missing nexthop/type");
 }
 
 CF_DECLS
 
 CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK)
-CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD)
+CF_KEYWORDS(ONLINK, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD, MPLS)
 
 
 CF_GRAMMAR
 
 CF_ADDTO(proto, static_proto '}')
 
-static_proto_start: proto_start STATIC {
-     this_proto = proto_config_new(&proto_static, $1);
-     static_init_config((struct static_config *) this_proto);
-  }
- ;
+static_proto_start: proto_start STATIC
+{
+  this_proto = proto_config_new(&proto_static, $1);
+  init_list(&STATIC_CFG->routes);
+};
 
 static_proto:
    static_proto_start proto_name '{'
  | static_proto proto_item ';'
+ | static_proto proto_channel ';' { this_proto->net_type = $2->net_type; }
  | static_proto CHECK LINK bool ';' { STATIC_CFG->check_link = $4; }
- | static_proto IGP TABLE rtable ';' { STATIC_CFG->igp_table = $4; }
+ | static_proto IGP TABLE rtable ';' {
+    if ($4->addr_type == NET_IP4)
+      STATIC_CFG->igp_table_ip4 = $4;
+    else if ($4->addr_type == NET_IP6)
+      STATIC_CFG->igp_table_ip6 = $4;
+    else
+      cf_error("Incompatible IGP table type");
+   }
  | static_proto stat_route stat_route_opt_list ';' { static_route_finish(); }
  ;
 
-stat_route0: ROUTE prefix {
-     this_srt = cfg_allocz(sizeof(struct static_route));
-     add_tail(&STATIC_CFG->other_routes, &this_srt->n);
-     this_srt->net = $2.addr;
-     this_srt->masklen = $2.len;
-     this_srt_last_cmd = &(this_srt->cmds);
+stat_nexthop:
+    VIA ipa ipa_scope {
+      this_snh = static_nexthop_new();
+      this_snh->via = $2;
+      this_snh->iface = $3;
+    }
+  | VIA TEXT {
+      this_snh = static_nexthop_new();
+      this_snh->via = IPA_NONE;
+      this_snh->iface = if_get_by_name($2);
+    }
+  | stat_nexthop MPLS label_stack {
+    this_snh->mls = $3;
   }
- ;
+  | stat_nexthop ONLINK bool {
+    this_snh->onlink = $3;
+  }
+  | stat_nexthop WEIGHT expr {
+    this_snh->weight = $3 - 1;
+    if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256");
+  }
+  | stat_nexthop BFD bool {
+    this_snh->use_bfd = $3; cf_check_bfd($3);
+  }
+;
 
-stat_multipath1:
-   VIA ipa ipa_scope {
-     last_srt_nh = this_srt_nh;
-     this_srt_nh = cfg_allocz(sizeof(struct static_route));
-     this_srt_nh->dest = RTD_NONE;
-     this_srt_nh->via = $2;
-     this_srt_nh->via_if = $3;
-     this_srt_nh->if_name = (void *) this_srt; /* really */
-     this_srt_nh->use_bfd = -1; /* undefined */
-   }
- | stat_multipath1 WEIGHT expr {
-     this_srt_nh->masklen = $3 - 1; /* really */
-     if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256"); 
-   }
- | stat_multipath1 BFD bool {
-     this_srt_nh->use_bfd = $3; cf_check_bfd($3);
-   }
- ;
+stat_nexthops:
+    stat_nexthop
+  | stat_nexthops stat_nexthop
+;
 
-stat_multipath:
-   stat_multipath1 { this_srt->mp_next = this_srt_nh; }
- | stat_multipath stat_multipath1 { last_srt_nh->mp_next = this_srt_nh; }
+stat_route0: ROUTE net_any {
+     this_srt = cfg_allocz(sizeof(struct static_route));
+     add_tail(&STATIC_CFG->routes, &this_srt->n);
+     this_srt->net = $2;
+     this_srt_last_cmd = &(this_srt->cmds);
+     this_srt->mp_next = NULL;
+     this_snh = NULL;
+  }
  ;
 
 stat_route:
-   stat_route0 VIA ipa ipa_scope {
-      this_srt->dest = RTD_ROUTER;
+   stat_route0 stat_nexthops
+ | stat_route0 RECURSIVE ipa {
+      this_srt->dest = RTDX_RECURSIVE;
       this_srt->via = $3;
-      this_srt->via_if = $4;
-   }
- | stat_route0 VIA TEXT {
-      this_srt->dest = RTD_DEVICE;
-      this_srt->if_name = $3;
-      rem_node(&this_srt->n);
-      add_tail(&STATIC_CFG->iface_routes, &this_srt->n);
    }
- | stat_route0 MULTIPATH stat_multipath {
-      this_srt->dest = RTD_MULTIPATH;
-   }
- | stat_route0 RECURSIVE ipa {
+ | stat_route0 RECURSIVE ipa MPLS label_stack {
       this_srt->dest = RTDX_RECURSIVE;
       this_srt->via = $3;
+      this_srt->mls = $5;
    }
-
+ | stat_route0			{ this_srt->dest = RTD_NONE; }
  | stat_route0 DROP		{ this_srt->dest = RTD_BLACKHOLE; }
  | stat_route0 REJECT		{ this_srt->dest = RTD_UNREACHABLE; }
  | stat_route0 BLACKHOLE	{ this_srt->dest = RTD_BLACKHOLE; }
@@ -114,7 +135,6 @@ stat_route:
 
 stat_route_item:
    cmd { *this_srt_last_cmd = $1; this_srt_last_cmd = &($1->next); }
- | BFD bool ';' { this_srt->use_bfd = $2; cf_check_bfd($2); }
  ;
 
 stat_route_opts:
diff --git a/proto/static/static.c b/proto/static/static.c
index 849067b9..ede4c734 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -9,33 +9,32 @@
 /**
  * DOC: Static
  *
- * The Static protocol is implemented in a straightforward way. It keeps
- * two lists of static routes: one containing interface routes and one
- * holding the remaining ones. Interface routes are inserted and removed according
- * to interface events received from the core via the if_notify() hook. Routes
- * pointing to a neighboring router use a sticky node in the neighbor cache
- * to be notified about gaining or losing the neighbor. Special
- * routes like black holes or rejects are inserted all the time.
+ * The Static protocol is implemented in a straightforward way. It keeps a list
+ * of static routes. Routes of dest RTD_UNICAST have associated sticky node in
+ * the neighbor cache to be notified about gaining or losing the neighbor and
+ * about interface-related events (e.g. link down). They may also have a BFD
+ * request if associated with a BFD session. When a route is notified,
+ * static_decide() is used to see whether the route activeness is changed. In
+ * such case, the route is marked as dirty and scheduled to be announced or
+ * withdrawn, which is done asynchronously from event hook. Routes of other
+ * types (e.g. black holes) are announced all the time.
  *
- * Multipath routes are tricky. Because these routes depends on
- * several neighbors we need to integrate that to the neighbor
- * notification handling, we use dummy static_route nodes, one for
- * each nexthop. Therefore, a multipath route consists of a master
- * static_route node (of dest RTD_MULTIPATH), which specifies prefix
- * and is used in most circumstances, and a list of dummy static_route
- * nodes (of dest RTD_NONE), which stores info about nexthops and are
- * connected to neighbor entries and neighbor notifications. Dummy
- * nodes are chained using mp_next, they aren't in other_routes list,
- * and abuse some fields (masklen, if_name) for other purposes.
+ * Multipath routes are a bit tricky. To represent additional next hops, dummy
+ * static_route nodes are used, which are chained using @mp_next field and link
+ * to the master node by @mp_head field. Each next hop has a separate neighbor
+ * entry and an activeness state, but the master node is used for most purposes.
+ * Note that most functions DO NOT accept dummy nodes as arguments.
  *
  * The only other thing worth mentioning is that when asked for reconfiguration,
  * Static not only compares the two configurations, but it also calculates
- * difference between the lists of static routes and it just inserts the
- * newly added routes and removes the obsolete ones.
+ * difference between the lists of static routes and it just inserts the newly
+ * added routes, removes the obsolete ones and reannounces changed ones.
  */
 
 #undef LOCAL_DEBUG
 
+#include <stdlib.h>
+
 #include "nest/bird.h"
 #include "nest/iface.h"
 #include "nest/protocol.h"
@@ -50,107 +49,117 @@
 
 static linpool *static_lp;
 
-static inline rtable *
-p_igp_table(struct proto *p)
-{
-  struct static_config *cf = (void *) p->cf;
-  return cf->igp_table ? cf->igp_table->table : p->table;
-}
-
 static void
-static_install(struct proto *p, struct static_route *r, struct iface *ifa)
+static_announce_rte(struct static_proto *p, struct static_route *r)
 {
-  net *n;
-  rta a;
-  rte *e;
+  rta *a = allocz(RTA_MAX_SIZE);
+  a->src = p->p.main_source;
+  a->source = RTS_STATIC;
+  a->scope = SCOPE_UNIVERSE;
+  a->dest = r->dest;
 
-  if (r->installed > 0)
-    return;
+  if (r->dest == RTD_UNICAST)
+  {
+    struct static_route *r2;
+    struct nexthop *nhs = NULL;
 
-  DBG("Installing static route %I/%d, rtd=%d\n", r->net, r->masklen, r->dest);
-  bzero(&a, sizeof(a));
-  a.src = p->main_source;
-  a.source = (r->dest == RTD_DEVICE) ? RTS_STATIC_DEVICE : RTS_STATIC;
-  a.scope = SCOPE_UNIVERSE;
-  a.cast = RTC_UNICAST;
-  a.dest = r->dest;
-  a.gw = r->via;
-  a.iface = ifa;
-
-  if (r->dest == RTD_MULTIPATH)
+    for (r2 = r; r2; r2 = r2->mp_next)
     {
-      struct static_route *r2;
-      struct mpnh *nhs = NULL;
-
-      for (r2 = r->mp_next; r2; r2 = r2->mp_next)
-	if (r2->installed)
-	  {
-	    struct mpnh *nh = alloca(sizeof(struct mpnh));
-	    nh->gw = r2->via;
-	    nh->iface = r2->neigh->iface;
-	    nh->weight = r2->masklen; /* really */
-	    mpnh_insert(&nhs, nh);
-	  }
-
-      /* There is at least one nexthop */
-      if (!nhs->next)
-	{
-	  /* Fallback to unipath route for exactly one nexthop */
-	  a.dest = RTD_ROUTER;
-	  a.gw = nhs->gw;
-	  a.iface = nhs->iface;
-	}
-      else
-	a.nexthops = nhs;
+      if (!r2->active)
+	continue;
+
+      struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE);
+      nh->gw = r2->via;
+      nh->iface = r2->neigh->iface;
+      nh->flags = r2->onlink ? RNF_ONLINK : 0;
+      nh->weight = r2->weight;
+      if (r2->mls)
+      {
+	nh->labels = r2->mls->len;
+	memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32));
+      }
+
+      nexthop_insert(&nhs, nh);
     }
 
+    if (!nhs)
+      goto withdraw;
+
+    nexthop_link(a, nhs);
+  }
+
   if (r->dest == RTDX_RECURSIVE)
-    rta_set_recursive_next_hop(p->table, &a, p_igp_table(p), &r->via, &r->via);
+  {
+    rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6;
+    rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls);
+  }
 
-  /* We skip rta_lookup() here */
+  /* Already announced */
+  if (r->state == SRS_CLEAN)
+    return;
 
-  n = net_get(p->table, r->net, r->masklen);
-  e = rte_get_temp(&a);
-  e->net = n;
+  /* We skip rta_lookup() here */
+  rte *e = rte_get_temp(a);
   e->pflags = 0;
 
   if (r->cmds)
     f_eval_rte(r->cmds, &e, static_lp);
 
-  rte_update(p, n, e);
-  r->installed = 1;
+  rte_update(&p->p, r->net, e);
+  r->state = SRS_CLEAN;
 
   if (r->cmds)
     lp_flush(static_lp);
+
+  return;
+
+withdraw:
+  if (r->state == SRS_DOWN)
+    return;
+
+  rte_update(&p->p, r->net, NULL);
+  r->state = SRS_DOWN;
 }
 
 static void
-static_remove(struct proto *p, struct static_route *r)
+static_mark_rte(struct static_proto *p, struct static_route *r)
 {
-  net *n;
-
-  if (!r->installed)
+  if (r->state == SRS_DIRTY)
     return;
 
-  DBG("Removing static route %I/%d via %I\n", r->net, r->masklen, r->via);
-  n = net_find(p->table, r->net, r->masklen);
-  rte_update(p, n, NULL);
-  r->installed = 0;
+  r->state = SRS_DIRTY;
+  BUFFER_PUSH(p->marked) = r;
+
+  if (!ev_active(p->event))
+    ev_schedule(p->event);
+}
+
+static void
+static_announce_marked(void *P)
+{
+  struct static_proto *p = P;
+
+  BUFFER_WALK(p->marked, r)
+    static_announce_rte(P, r);
+
+  BUFFER_FLUSH(p->marked);
 }
 
 static void
 static_bfd_notify(struct bfd_request *req);
 
 static void
-static_update_bfd(struct proto *p, struct static_route *r)
+static_update_bfd(struct static_proto *p, struct static_route *r)
 {
+  /* The @r is a RTD_UNICAST next hop, may be a dummy node */
+
   struct neighbor *nb = r->neigh;
   int bfd_up = (nb->scope > 0) && r->use_bfd;
 
   if (bfd_up && !r->bfd_req)
   {
     // ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip;
-    r->bfd_req = bfd_request_session(p->pool, r->via, nb->ifa->ip, nb->iface,
+    r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip, nb->iface,
 				     static_bfd_notify, r);
   }
 
@@ -162,212 +171,173 @@ static_update_bfd(struct proto *p, struct static_route *r)
 }
 
 static int
-static_decide(struct static_config *cf, struct static_route *r)
+static_decide(struct static_proto *p, struct static_route *r)
 {
-  /* r->dest != RTD_MULTIPATH, but may be RTD_NONE (part of multipath route)
-     the route also have to be valid (r->neigh != NULL) */
+  /* The @r is a RTD_UNICAST next hop, may be a dummy node */
+
+  struct static_config *cf = (void *) p->p.cf;
+  uint old_active = r->active;
 
   if (r->neigh->scope < 0)
-    return 0;
+    goto fail;
 
   if (cf->check_link && !(r->neigh->iface->flags & IF_LINK_UP))
-    return 0;
+    goto fail;
 
-  if (r->bfd_req && r->bfd_req->state != BFD_STATE_UP)
-    return 0;
+  if (r->bfd_req && (r->bfd_req->state != BFD_STATE_UP))
+    goto fail;
 
-  return 1;
-}
+  r->active = 1;
+  return !old_active;
 
+fail:
+  r->active = 0;
+  return old_active;
+}
 
 static void
-static_add(struct proto *p, struct static_config *cf, struct static_route *r)
+static_add_rte(struct static_proto *p, struct static_route *r)
 {
-  DBG("static_add(%I/%d,%d)\n", r->net, r->masklen, r->dest);
-  switch (r->dest)
-    {
-    case RTD_ROUTER:
-      {
-	struct neighbor *n = neigh_find2(p, &r->via, r->via_if, NEF_STICKY);
-	if (n)
-	  {
-	    r->chain = n->data;
-	    n->data = r;
-	    r->neigh = n;
-
-	    static_update_bfd(p, r);
-	    if (static_decide(cf, r))
-	      static_install(p, r, n->iface);
-	    else
-	      static_remove(p, r);
-	  }
-	else
-	  {
-	    log(L_ERR "Static route destination %I is invalid. Ignoring.", r->via);
-	    static_remove(p, r);
-	  }
-	break;
-      }
+  if (r->dest == RTD_UNICAST)
+  {
+    struct static_route *r2;
+    struct neighbor *n;
 
-    case RTD_DEVICE:
-      break;
+    for (r2 = r; r2; r2 = r2->mp_next)
+    {
+      n = ipa_nonzero(r2->via) ?
+	neigh_find2(&p->p, &r2->via, r2->iface,
+		    NEF_STICKY | (r2->onlink ? NEF_ONLINK : 0)) :
+	neigh_find_iface(&p->p, r2->iface);
 
-    case RTD_MULTIPATH:
+      if (!n)
       {
-	int count = 0;
-	struct static_route *r2;
-
-	for (r2 = r->mp_next; r2; r2 = r2->mp_next)
-	  {
-	    struct neighbor *n = neigh_find2(p, &r2->via, r2->via_if, NEF_STICKY);
-	    if (n)
-	      {
-		r2->chain = n->data;
-		n->data = r2;
-		r2->neigh = n;
-
-		static_update_bfd(p, r2);
-		r2->installed = static_decide(cf, r2);
-		count += r2->installed;
-	      }
-	    else
-	      {
-		log(L_ERR "Static route destination %I is invalid. Ignoring.", r2->via);
-		r2->installed = 0;
-	      }
-	  }
-
-	if (count)
-	  static_install(p, r, NULL);
-	else
-	  static_remove(p, r);
-	break;
+	log(L_WARN "Invalid next hop %I of static route %N", r2->via, r2->net);
+	continue;
       }
 
-    default:
-      static_install(p, r, NULL);
+      r2->neigh = n;
+      r2->chain = n->data;
+      n->data = r2;
+
+      static_update_bfd(p, r2);
+      static_decide(p, r2);
     }
+  }
+
+  static_announce_rte(p, r);
 }
 
 static void
-static_rte_cleanup(struct proto *p UNUSED, struct static_route *r)
+static_reset_rte(struct static_proto *p UNUSED, struct static_route *r)
 {
   struct static_route *r2;
 
-  if (r->bfd_req)
+  for (r2 = r; r2; r2 = r2->mp_next)
   {
-    rfree(r->bfd_req);
-    r->bfd_req = NULL;
-  }
+    r2->neigh = NULL;
+    r2->chain = NULL;
 
-  if (r->dest == RTD_MULTIPATH)
-    for (r2 = r->mp_next; r2; r2 = r2->mp_next)
-      if (r2->bfd_req)
-      {
-	rfree(r2->bfd_req);
-	r2->bfd_req = NULL;
-      }
+    r2->state = 0;
+    r2->active = 0;
+
+    rfree(r2->bfd_req);
+    r2->bfd_req = NULL;
+  }
 }
 
-static int
-static_start(struct proto *p)
+static void
+static_remove_rte(struct static_proto *p, struct static_route *r)
 {
-  struct static_config *cf = (void *) p->cf;
-  struct static_route *r;
+  if (r->state)
+    rte_update(&p->p, r->net, NULL);
 
-  DBG("Static: take off!\n");
+  static_reset_rte(p, r);
+}
 
-  if (!static_lp)
-    static_lp = lp_new(&root_pool, 1008);
 
-  if (cf->igp_table)
-    rt_lock_table(cf->igp_table->table);
+static inline int
+static_same_dest(struct static_route *x, struct static_route *y)
+{
+  if (x->dest != y->dest)
+    return 0;
+
+  switch (x->dest)
+  {
+  case RTD_UNICAST:
+    for (; x && y; x = x->mp_next, y = y->mp_next)
+    {
+      if (!ipa_equal(x->via, y->via) ||
+	  (x->iface != y->iface) ||
+	  (x->onlink != y->onlink) ||
+	  (x->weight != y->weight) ||
+	  (x->use_bfd != y->use_bfd) ||
+	  (!x->mls != !y->mls) ||
+	  ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+	return 0;
+
+      if (!x->mls)
+	continue;
+
+      for (uint i = 0; i < x->mls->len; i++)
+	if (x->mls->stack[i] != y->mls->stack[i])
+	  return 0;
+    }
+    return !x && !y;
 
-  /* We have to go UP before routes could be installed */
-  proto_notify_state(p, PS_UP);
+  case RTDX_RECURSIVE:
+    if (!ipa_equal(x->via, y->via) ||
+	(!x->mls != !y->mls) ||
+	((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+      return 0;
 
-  WALK_LIST(r, cf->other_routes)
-    static_add(p, cf, r);
-  return PS_UP;
-}
+    if (!x->mls)
+      return 1;
 
-static int
-static_shutdown(struct proto *p)
-{
-  struct static_config *cf = (void *) p->cf;
-  struct static_route *r;
+    for (uint i = 0; i < x->mls->len; i++)
+      if (x->mls->stack[i] != y->mls->stack[i])
+	return 0;
 
-  /* Just reset the flag, the routes will be flushed by the nest */
-  WALK_LIST(r, cf->iface_routes)
-    r->installed = 0;
-  WALK_LIST(r, cf->other_routes)
-  {
-    static_rte_cleanup(p, r);
-    r->installed = 0;
-  }
+    return 1;
 
-  return PS_DOWN;
+  default:
+    return 1;
+  }
 }
 
-static void
-static_cleanup(struct proto *p)
+static inline int
+static_same_rte(struct static_route *or, struct static_route *nr)
 {
-  struct static_config *cf = (void *) p->cf;
-
-  if (cf->igp_table)
-    rt_unlock_table(cf->igp_table->table);
+  /* Note that i_same() requires arguments in (new, old) order */
+  return static_same_dest(or, nr) && i_same(nr->cmds, or->cmds);
 }
 
 static void
-static_update_rte(struct proto *p, struct static_route *r)
+static_reconfigure_rte(struct static_proto *p, struct static_route *or, struct static_route *nr)
 {
-  switch (r->dest)
-  {
-  case RTD_ROUTER:
-    if (static_decide((struct static_config *) p->cf, r))
-      static_install(p, r, r->neigh->iface);
-    else
-      static_remove(p, r);
-    break;
-
-  case RTD_NONE: /* a part of multipath route */
-  {
-    int decision = static_decide((struct static_config *) p->cf, r);
-    if (decision == r->installed)
-      break; /* no change */
-    r->installed = decision;
-
-    struct static_route *r1, *r2;
-    int count = 0;
-    r1 = (void *) r->if_name; /* really */
-    for (r2 = r1->mp_next; r2; r2 = r2->mp_next)
-      count += r2->installed;
-
-    if (count)
-    {
-      /* Set of nexthops changed - force reinstall */
-      r1->installed = 0;
-      static_install(p, r1, NULL);
-    }
-    else
-      static_remove(p, r1);
+  if ((or->state == SRS_CLEAN) && !static_same_rte(or, nr))
+    nr->state = SRS_DIRTY;
+  else
+    nr->state = or->state;
 
-    break;
-  }
-  }
+  static_add_rte(p, nr);
+  static_reset_rte(p, or);
 }
 
+
 static void
 static_neigh_notify(struct neighbor *n)
 {
-  struct proto *p = n->proto;
+  struct static_proto *p = (void *) n->proto;
   struct static_route *r;
 
   DBG("Static: neighbor notify for %I: iface %p\n", n->addr, n->iface);
-  for(r=n->data; r; r=r->chain)
+  for (r = n->data; r; r = r->chain)
   {
     static_update_bfd(p, r);
-    static_update_rte(p, r);
+
+    if (static_decide(p, r))
+      static_mark_rte(p, r->mp_head);
   }
 }
 
@@ -375,241 +345,232 @@ static void
 static_bfd_notify(struct bfd_request *req)
 {
   struct static_route *r = req->data;
-  struct proto *p = r->neigh->proto;
+  struct static_proto *p = (void *) r->neigh->proto;
 
   // if (req->down) TRACE(D_EVENTS, "BFD session down for nbr %I on %s", XXXX);
 
-  static_update_rte(p, r);
+  if (static_decide(p, r))
+    static_mark_rte(p, r->mp_head);
 }
 
-static void
-static_dump_rt(struct static_route *r)
+static int
+static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED)
 {
-  debug("%-1I/%2d: ", r->net, r->masklen);
-  switch (r->dest)
-    {
-    case RTD_ROUTER:
-      debug("via %I\n", r->via);
-      break;
-    case RTD_DEVICE:
-      debug("dev %s\n", r->if_name);
-      break;
-    default:
-      debug("rtd %d\n", r->dest);
-      break;
-    }
+  return 1;
 }
 
+
 static void
-static_dump(struct proto *p)
+static_postconfig(struct proto_config *CF)
 {
-  struct static_config *c = (void *) p->cf;
+  struct static_config *cf = (void *) CF;
   struct static_route *r;
 
-  debug("Independent static routes:\n");
-  WALK_LIST(r, c->other_routes)
-    static_dump_rt(r);
-  debug("Device static routes:\n");
-  WALK_LIST(r, c->iface_routes)
-    static_dump_rt(r);
-}
+  if (EMPTY_LIST(CF->channels))
+    cf_error("Channel not specified");
 
-static void
-static_if_notify(struct proto *p, unsigned flags, struct iface *i)
-{
-  struct static_route *r;
-  struct static_config *c = (void *) p->cf;
+  struct channel_config *cc = proto_cf_main_channel(CF);
 
-  if (flags & IF_CHANGE_UP)
-    {
-      WALK_LIST(r, c->iface_routes)
-	if (!strcmp(r->if_name, i->name))
-	  static_install(p, r, i);
-    }
-  else if (flags & IF_CHANGE_DOWN)
-    {
-      WALK_LIST(r, c->iface_routes)
-	if (!strcmp(r->if_name, i->name))
-	  static_remove(p, r);
-    }
-}
+  if (!cf->igp_table_ip4)
+    cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ?
+      cc->table : cf->c.global->def_tables[NET_IP4];
 
-int
-static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED)
-{
-  return 1;
-}
+  if (!cf->igp_table_ip6)
+    cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ?
+      cc->table : cf->c.global->def_tables[NET_IP6];
 
-void
-static_init_config(struct static_config *c)
-{
-  init_list(&c->iface_routes);
-  init_list(&c->other_routes);
+  WALK_LIST(r, cf->routes)
+    if (r->net && (r->net->type != CF->net_type))
+      cf_error("Route %N incompatible with channel type", r->net);
 }
 
 static struct proto *
-static_init(struct proto_config *c)
+static_init(struct proto_config *CF)
 {
-  struct proto *p = proto_new(c, sizeof(struct proto));
+  struct proto *P = proto_new(CF);
+  struct static_proto *p = (void *) P;
+  struct static_config *cf = (void *) CF;
 
-  p->neigh_notify = static_neigh_notify;
-  p->if_notify = static_if_notify;
-  p->rte_mergable = static_rte_mergable;
+  P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
 
-  return p;
-}
+  P->neigh_notify = static_neigh_notify;
+  P->rte_mergable = static_rte_mergable;
 
-static inline int
-static_same_net(struct static_route *x, struct static_route *y)
-{
-  return ipa_equal(x->net, y->net) && (x->masklen == y->masklen);
+  if (cf->igp_table_ip4)
+    p->igp_table_ip4 = cf->igp_table_ip4->table;
+
+  if (cf->igp_table_ip6)
+    p->igp_table_ip6 = cf->igp_table_ip6->table;
+
+  return P;
 }
 
-static inline int
-static_same_dest(struct static_route *x, struct static_route *y)
+static int
+static_start(struct proto *P)
 {
-  if (x->dest != y->dest)
-    return 0;
+  struct static_proto *p = (void *) P;
+  struct static_config *cf = (void *) P->cf;
+  struct static_route *r;
 
-  switch (x->dest)
-    {
-    case RTD_ROUTER:
-      return ipa_equal(x->via, y->via) && (x->via_if == y->via_if);
+  if (!static_lp)
+    static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024));
 
-    case RTD_DEVICE:
-      return !strcmp(x->if_name, y->if_name);
+  if (p->igp_table_ip4)
+    rt_lock_table(p->igp_table_ip4);
 
-    case RTD_MULTIPATH:
-      for (x = x->mp_next, y = y->mp_next;
-	   x && y;
-	   x = x->mp_next, y = y->mp_next)
-	if (!ipa_equal(x->via, y->via) || (x->via_if != y->via_if) || (x->use_bfd != y->use_bfd))
-	  return 0;
-      return !x && !y;
+  if (p->igp_table_ip6)
+    rt_lock_table(p->igp_table_ip6);
 
-    case RTDX_RECURSIVE:
-      return ipa_equal(x->via, y->via);
+  p->event = ev_new(p->p.pool);
+  p->event->hook = static_announce_marked;
+  p->event->data = p;
 
-    default:
-      return 1;
-    }
+  BUFFER_INIT(p->marked, p->p.pool, 4);
+
+  /* We have to go UP before routes could be installed */
+  proto_notify_state(P, PS_UP);
+
+  WALK_LIST(r, cf->routes)
+    static_add_rte(p, r);
+
+  return PS_UP;
 }
 
-static inline int
-static_same_rte(struct static_route *x, struct static_route *y)
+static int
+static_shutdown(struct proto *P)
 {
-  /* Note that i_same() requires arguments in (new, old) order */
-  return static_same_dest(x, y) && i_same(y->cmds, x->cmds);
-}
+  struct static_proto *p = (void *) P;
+  struct static_config *cf = (void *) P->cf;
+  struct static_route *r;
 
+  /* Just reset the flag, the routes will be flushed by the nest */
+  WALK_LIST(r, cf->routes)
+    static_reset_rte(p, r);
+
+  return PS_DOWN;
+}
 
 static void
-static_match(struct proto *p, struct static_route *r, struct static_config *n)
+static_cleanup(struct proto *P)
 {
-  struct static_route *t;
-
-  /*
-   * For given old route *r we find whether a route to the same
-   * network is also in the new route list. In that case, we keep the
-   * route and possibly update the route later if destination changed.
-   * Otherwise, we remove the route.
-   */
+  struct static_proto *p = (void *) P;
 
-  if (r->neigh)
-    r->neigh->data = NULL;
+  if (p->igp_table_ip4)
+    rt_unlock_table(p->igp_table_ip4);
 
-  WALK_LIST(t, n->iface_routes)
-    if (static_same_net(r, t))
-      goto found;
+  if (p->igp_table_ip6)
+    rt_unlock_table(p->igp_table_ip6);
+}
 
-  WALK_LIST(t, n->other_routes)
-    if (static_same_net(r, t))
-      goto found;
+static void
+static_dump_rte(struct static_route *r)
+{
+  debug("%-1N: ", r->net);
+  if (r->dest == RTD_UNICAST)
+    if (r->iface && ipa_zero(r->via))
+      debug("dev %s\n", r->iface->name);
+    else
+      debug("via %I%J\n", r->via, r->iface);
+  else
+    debug("rtd %d\n", r->dest);
+}
 
-  static_remove(p, r);
-  return;
+static void
+static_dump(struct proto *P)
+{
+  struct static_config *c = (void *) P->cf;
+  struct static_route *r;
 
- found:
-  /* If destination is different, force reinstall */
-  if ((r->installed > 0) && !static_same_rte(r, t))
-    t->installed = -1;
-  else
-    t->installed = r->installed;
+  debug("Static routes:\n");
+  WALK_LIST(r, c->routes)
+    static_dump_rte(r);
 }
 
-static inline rtable *
-cf_igp_table(struct static_config *cf)
+#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+
+static inline int
+static_cmp_rte(const void *X, const void *Y)
 {
-  return cf->igp_table ? cf->igp_table->table : NULL;
+  struct static_route *x = *(void **)X, *y = *(void **)Y;
+  return net_compare(x->net, y->net);
 }
 
 static int
-static_reconfigure(struct proto *p, struct proto_config *new)
+static_reconfigure(struct proto *P, struct proto_config *CF)
 {
-  struct static_config *o = (void *) p->cf;
-  struct static_config *n = (void *) new;
-  struct static_route *r;
+  struct static_proto *p = (void *) P;
+  struct static_config *o = (void *) P->cf;
+  struct static_config *n = (void *) CF;
+  struct static_route *r, *r2, *or, *nr;
+
+  /* Check change in IGP tables */
+  if ((IGP_TABLE(o, ip4) != IGP_TABLE(n, ip4)) ||
+      (IGP_TABLE(o, ip6) != IGP_TABLE(n, ip6)))
+    return 0;
 
-  if (cf_igp_table(o) != cf_igp_table(n))
+  if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
     return 0;
 
-  /* Delete all obsolete routes and reset neighbor entries */
-  WALK_LIST(r, o->iface_routes)
-    static_match(p, r, n);
-  WALK_LIST(r, o->other_routes)
-    static_match(p, r, n);
+  p->p.cf = CF;
 
-  /* Now add all new routes, those not changed will be ignored by static_install() */
-  WALK_LIST(r, n->iface_routes)
-    {
-      struct iface *ifa;
-      if ((ifa = if_find_by_name(r->if_name)) && (ifa->flags & IF_UP))
-	static_install(p, r, ifa);
-    }
-  WALK_LIST(r, n->other_routes)
-    static_add(p, n, r);
+  /* Reset route lists in neighbor entries */
+  WALK_LIST(r, o->routes)
+    for (r2 = r; r2; r2 = r2->mp_next)
+      if (r2->neigh)
+	r2->neigh->data = NULL;
 
-  WALK_LIST(r, o->other_routes)
-    static_rte_cleanup(p, r);
+  /* Reconfigure initial matching sequence */
+  for (or = HEAD(o->routes), nr = HEAD(n->routes);
+       NODE_VALID(or) && NODE_VALID(nr) && net_equal(or->net, nr->net);
+       or = NODE_NEXT(or), nr = NODE_NEXT(nr))
+    static_reconfigure_rte(p, or, nr);
 
-  return 1;
-}
+  if (!NODE_VALID(or) && !NODE_VALID(nr))
+    return 1;
 
-static void
-static_copy_routes(list *dlst, list *slst)
-{
-  struct static_route *dr, *sr;
+  /* Reconfigure remaining routes, sort them to find matching pairs */
+  struct static_route *or2, *nr2, **orbuf, **nrbuf;
+  uint ornum = 0, nrnum = 0, orpos = 0, nrpos = 0, i;
 
-  init_list(dlst);
-  WALK_LIST(sr, *slst)
-    {
-      /* copy one route */
-      dr = cfg_alloc(sizeof(struct static_route));
-      memcpy(dr, sr, sizeof(struct static_route));
-
-      /* This fn is supposed to be called on fresh src routes, which have 'live'
-	 fields (like .chain, .neigh or .installed) zero, so no need to zero them */
-
-      /* We need to copy multipath chain, because there are backptrs in 'if_name' */
-      if (dr->dest == RTD_MULTIPATH)
-	{
-	  struct static_route *md, *ms, **mp_last;
-
-	  mp_last = &(dr->mp_next);
-	  for (ms = sr->mp_next; ms; ms = ms->mp_next)
-	    {
-	      md = cfg_alloc(sizeof(struct static_route));
-	      memcpy(md, ms, sizeof(struct static_route));
-	      md->if_name = (void *) dr; /* really */
-
-	      *mp_last = md;
-	      mp_last = &(md->mp_next);
-	    }
-	  *mp_last = NULL;
-	}
-
-      add_tail(dlst, (node *) dr);
-    }
+  for (or2 = or; NODE_VALID(or2); or2 = NODE_NEXT(or2))
+    ornum++;
+
+  for (nr2 = nr; NODE_VALID(nr2); nr2 = NODE_NEXT(nr2))
+    nrnum++;
+
+  orbuf = xmalloc(ornum * sizeof(void *));
+  nrbuf = xmalloc(nrnum * sizeof(void *));
+
+  for (i = 0, or2 = or; i < ornum; i++, or2 = NODE_NEXT(or2))
+    orbuf[i] = or2;
+
+  for (i = 0, nr2 = nr; i < nrnum; i++, nr2 = NODE_NEXT(nr2))
+    nrbuf[i] = nr2;
+
+  qsort(orbuf, ornum, sizeof(struct static_route *), static_cmp_rte);
+  qsort(nrbuf, nrnum, sizeof(struct static_route *), static_cmp_rte);
+
+  while ((orpos < ornum) && (nrpos < nrnum))
+  {
+    int x = net_compare(orbuf[orpos]->net, nrbuf[nrpos]->net);
+    if (x < 0)
+      static_remove_rte(p, orbuf[orpos++]);
+    else if (x > 0)
+      static_add_rte(p, nrbuf[nrpos++]);
+    else
+      static_reconfigure_rte(p, orbuf[orpos++], nrbuf[nrpos++]);
+  }
+
+  while (orpos < ornum)
+    static_remove_rte(p, orbuf[orpos++]);
+
+  while (nrpos < nrnum)
+    static_add_rte(p, nrbuf[nrpos++]);
+
+  xfree(orbuf);
+  xfree(nrbuf);
+
+  return 1;
 }
 
 static void
@@ -618,53 +579,66 @@ static_copy_config(struct proto_config *dest, struct proto_config *src)
   struct static_config *d = (struct static_config *) dest;
   struct static_config *s = (struct static_config *) src;
 
-  /* Shallow copy of everything */
-  proto_copy_rest(dest, src, sizeof(struct static_config));
+  struct static_route *srt, *snh;
 
-  /* Copy route lists */
-  static_copy_routes(&d->iface_routes, &s->iface_routes);
-  static_copy_routes(&d->other_routes, &s->other_routes);
-}
+  /* Copy route list */
+  init_list(&d->routes);
+  WALK_LIST(srt, s->routes)
+  {
+    struct static_route *drt = NULL, *dnh = NULL, **dnp = &drt;
 
+    for (snh = srt; snh; snh = snh->mp_next)
+    {
+      dnh = cfg_alloc(sizeof(struct static_route));
+      memcpy(dnh, snh, sizeof(struct static_route));
 
-struct protocol proto_static = {
-  .name =		"Static",
-  .template =		"static%d",
-  .preference =		DEF_PREF_STATIC,
-  .config_size =	sizeof(struct static_config),
-  .init =		static_init,
-  .dump =		static_dump,
-  .start =		static_start,
-  .shutdown =		static_shutdown,
-  .cleanup =		static_cleanup,
-  .reconfigure =	static_reconfigure,
-  .copy_config =	static_copy_config
-};
+      if (!drt)
+	add_tail(&d->routes, &(dnh->n));
+
+      *dnp = dnh;
+      dnp = &(dnh->mp_next);
+
+      if (snh->mp_head)
+	dnh->mp_head = drt;
+    }
+  }
+}
 
 static void
 static_show_rt(struct static_route *r)
 {
-  byte via[STD_ADDRESS_P_LENGTH + 16];
-
   switch (r->dest)
+  {
+  case RTD_UNICAST:
+  {
+    struct static_route *r2;
+
+    cli_msg(-1009, "%N", r->net);
+    for (r2 = r; r2; r2 = r2->mp_next)
     {
-    case RTD_ROUTER:	bsprintf(via, "via %I%J", r->via, r->via_if); break;
-    case RTD_DEVICE:	bsprintf(via, "dev %s", r->if_name); break;
-    case RTD_BLACKHOLE:	bsprintf(via, "blackhole"); break;
-    case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
-    case RTD_PROHIBIT:	bsprintf(via, "prohibited"); break;
-    case RTD_MULTIPATH:	bsprintf(via, "multipath"); break;
-    case RTDX_RECURSIVE: bsprintf(via, "recursive %I", r->via); break;
-    default:		bsprintf(via, "???");
+      if (r2->iface && ipa_zero(r2->via))
+	cli_msg(-1009, "\tdev %s%s", r2->iface->name,
+		r2->active ? "" : " (dormant)");
+      else
+	cli_msg(-1009, "\tvia %I%J%s%s%s", r2->via, r2->iface,
+		r2->onlink ? " onlink" : "",
+		r2->bfd_req ? " (bfd)" : "",
+		r2->active ? "" : " (dormant)");
     }
-  cli_msg(-1009, "%I/%d %s%s%s", r->net, r->masklen, via,
-	  r->bfd_req ? " (bfd)" : "", r->installed ? "" : " (dormant)");
+    break;
+  }
 
-  struct static_route *r2;
-  if (r->dest == RTD_MULTIPATH)
-    for (r2 = r->mp_next; r2; r2 = r2->mp_next)
-      cli_msg(-1009, "\tvia %I%J weight %d%s%s", r2->via, r2->via_if, r2->masklen + 1, /* really */
-	      r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)");
+  case RTD_NONE:
+  case RTD_BLACKHOLE:
+  case RTD_UNREACHABLE:
+  case RTD_PROHIBIT:
+    cli_msg(-1009, "%N\t%s", r->net, rta_dest_names[r->dest]);
+    break;
+
+  case RTDX_RECURSIVE:
+    cli_msg(-1009, "%N\trecursive %I", r->net, r->via);
+    break;
+  }
 }
 
 void
@@ -673,9 +647,25 @@ static_show(struct proto *P)
   struct static_config *c = (void *) P->cf;
   struct static_route *r;
 
-  WALK_LIST(r, c->other_routes)
-    static_show_rt(r);
-  WALK_LIST(r, c->iface_routes)
+  WALK_LIST(r, c->routes)
     static_show_rt(r);
   cli_msg(0, "");
 }
+
+
+struct protocol proto_static = {
+  .name =		"Static",
+  .template =		"static%d",
+  .preference =		DEF_PREF_STATIC,
+  .channel_mask =	NB_ANY,
+  .proto_size =		sizeof(struct static_proto),
+  .config_size =	sizeof(struct static_config),
+  .postconfig =		static_postconfig,
+  .init =		static_init,
+  .dump =		static_dump,
+  .start =		static_start,
+  .shutdown =		static_shutdown,
+  .cleanup =		static_cleanup,
+  .reconfigure =	static_reconfigure,
+  .copy_config =	static_copy_config
+};
diff --git a/proto/static/static.h b/proto/static/static.h
index 6b047234..b202c0b1 100644
--- a/proto/static/static.h
+++ b/proto/static/static.h
@@ -11,41 +11,61 @@
 
 #include "nest/route.h"
 #include "nest/bfd.h"
+#include "lib/buffer.h"
 
 struct static_config {
   struct proto_config c;
-  list iface_routes;		/* Routes to search on interface events */
-  list other_routes;		/* Routes hooked to neighbor cache and reject routes */
+  list routes;				/* List of static routes (struct static_route) */
   int check_link;			/* Whether iface link state is used */
-  struct rtable_config *igp_table;	/* Table used for recursive next hop lookups */
+  struct rtable_config *igp_table_ip4;	/* Table for recursive IPv4 next hop lookups */
+  struct rtable_config *igp_table_ip6;	/* Table for recursive IPv6 next hop lookups */
 };
 
+struct static_proto {
+  struct proto p;
 
-void static_init_config(struct static_config *);
+  struct event *event;			/* Event for announcing updated routes */
+  BUFFER(struct static_route *) marked;	/* Routes marked for reannouncement */
+  rtable *igp_table_ip4;		/* Table for recursive IPv4 next hop lookups */
+  rtable *igp_table_ip6;		/* Table for recursive IPv6 next hop lookups */
+};
 
 struct static_route {
   node n;
-  struct static_route *chain;		/* Next for the same neighbor */
-  ip_addr net;				/* Network we route */
-  int masklen;				/* Mask length */
-  int dest;				/* Destination type (RTD_*) */
+  net_addr *net;			/* Network we route */
   ip_addr via;				/* Destination router */
-  struct iface *via_if;			/* Destination iface, for link-local vias */
-  struct neighbor *neigh;
-  byte *if_name;			/* Name for RTD_DEVICE routes */
-  struct static_route *mp_next;		/* Nexthops for RTD_MULTIPATH routes */
+  struct iface *iface;			/* Destination iface, for link-local vias or device routes */
+  struct neighbor *neigh;		/* Associated neighbor entry */
+  struct static_route *chain;		/* Next for the same neighbor */
+  struct static_route *mp_head;		/* First nexthop of this route */
+  struct static_route *mp_next;		/* Nexthops for multipath routes */
   struct f_inst *cmds;			/* List of commands for setting attributes */
-  int installed;			/* Installed in rt table, -1 for reinstall */
-  int use_bfd;				/* Configured to use BFD */
+  byte dest;				/* Destination type (RTD_*) */
+  byte state;				/* State of route announcement (SRS_*) */
+  byte active;				/* Next hop is active (nbr/iface/BFD available) */
+  byte onlink;				/* Gateway is onlink regardless of IP ranges */
+  byte weight;				/* Multipath next hop weight */
+  byte use_bfd;				/* Configured to use BFD */
   struct bfd_request *bfd_req;		/* BFD request, if BFD is used */
+  mpls_label_stack *mls;		/* MPLS label stack; may be NULL */
 };
 
-/* Dummy nodes (parts of multipath route) abuses masklen field for weight
-   and if_name field for a ptr to the master (RTD_MULTIPATH) node. */
-
+/*
+ * Note that data fields neigh, chain, state, active and bfd_req are runtime
+ * data, not real configuration data. Must be handled carefully.
+ *
+ * Regular (i.e. dest == RTD_UNICAST) routes use static_route structure for
+ * additional next hops (fields mp_head, mp_next). Note that 'state' is for
+ * whole route, while 'active' is for each next hop. Also note that fields
+ * mp_head, mp_next, active are zero for other kinds of routes.
+ */
 
 #define RTDX_RECURSIVE 0x7f		/* Phony dest value for recursive routes */
 
+#define SRS_DOWN	0		/* Route is not announced */
+#define SRS_CLEAN	1		/* Route is active and announced */
+#define SRS_DIRTY	2		/* Route changed since announcement */
+
 void static_show(struct proto *);
 
 #endif