diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2013-06-24 16:37:30 +0200 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2013-06-24 16:37:30 +0200 |
commit | ef4a50be10c6dd0abffd957132cd146029c3d79d (patch) | |
tree | f01df1b69d1d5f495dcad82e2f0e30478be55cb8 | |
parent | fad04c750ca6906fb095f1b45958dec0ac8e210c (diff) |
Better packet priority and traffic class handling.
Implements support for IPv6 traffic class, sets higher priority for OSPF
and RIP outgoing packets by default and allows to configure ToS/DS/TClass
IP header field and the local priority of outgoing packets.
-rw-r--r-- | doc/bird.sgml | 48 | ||||
-rw-r--r-- | lib/ipv6.h | 7 | ||||
-rw-r--r-- | lib/socket.h | 4 | ||||
-rw-r--r-- | nest/config.Y | 8 | ||||
-rw-r--r-- | proto/ospf/config.Y | 6 | ||||
-rw-r--r-- | proto/ospf/iface.c | 8 | ||||
-rw-r--r-- | proto/ospf/ospf.h | 2 | ||||
-rw-r--r-- | proto/rip/config.Y | 6 | ||||
-rw-r--r-- | proto/rip/rip.c | 7 | ||||
-rw-r--r-- | proto/rip/rip.h | 2 | ||||
-rw-r--r-- | sysdep/bsd/sysio.h | 9 | ||||
-rw-r--r-- | sysdep/linux/sysio.h | 19 | ||||
-rw-r--r-- | sysdep/unix/io.c | 11 |
13 files changed, 113 insertions, 24 deletions
diff --git a/doc/bird.sgml b/doc/bird.sgml index 0681bd53..7277b2b9 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -567,6 +567,22 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/ <cf>interface "eth*" 192.168.1.0/24;</cf> - start the protocol on all ethernet interfaces that have address from 192.168.1.0/24. + <tag><label id="dsc-prio">tx class|dscp <m/num/</tag> + This option specifies the value of ToS/DS/Class field in IP + headers of the outgoing protocol packets. This may affect how the + protocol packets are processed by the network relative to the + other network traffic. With <cf/class/ keyword, the value + (0-255) is used for the whole ToS/Class octet (but two bits + reserved for ECN are ignored). With <cf/dscp/ keyword, the + value (0-63) is used just for the DS field in the + octet. Default value is 0xc0 (DSCP 0x30 - CS6). + + <tag>tx priority <m/num/</tag> + This option specifies the local packet priority. This may + affect how the protocol packets are processed in the local TX + queues. This option is Linux specific. Default value is 7 + (highest priority, privileged traffic). + <tag><label id="dsc-pass">password "<m/password/" [ { id <m/num/; generate from <m/time/; generate to <m/time/; accept from <m/time/; accept to <m/time/; } ]</tag> Specifies a password that can be used by the protocol. Password option can be used more times to specify more passwords. If more passwords are @@ -2220,6 +2236,11 @@ protocol ospf <name> { prefix) is propagated. It is possible that some hardware drivers or platforms do not implement this feature. Default value is no. + <tag>tx class|dscp|priority <m/num/</tag> + These options specify the ToS/DiffServ/Traffic class/Priority + of the outgoing OSPF packets. See <ref id="dsc-prio" name="tx + class"> common option for detailed description. + <tag>ecmp weight <M>num</M></tag> When ECMP (multipath) routes are allowed, this value specifies a relative weight used for nexthops going through the iface. @@ -2748,13 +2769,26 @@ makes it pretty much obsolete. (It is still usable on very small networks.) neighbors, that is not configurable. Default: never. </descrip> -<p>There are two options that can be specified per-interface. First is <cf>metric</cf>, with -default one. Second is <cf>mode multicast|broadcast|quiet|nolisten|version1</cf>, it selects mode for -rip to work in. If nothing is specified, rip runs in multicast mode. <cf>version1</cf> is -currently equivalent to <cf>broadcast</cf>, and it makes RIP talk to a broadcast address even -through multicast mode is possible. <cf>quiet</cf> option means that RIP will not transmit -any periodic messages to this interface and <cf>nolisten</cf> means that RIP will send to this -interface but not listen to it. +<p>There are some options that can be specified per-interface: + +<descrip> + <tag>metric <m/num/</tag> + This option specifies the metric of the interface. Valid + + <tag>mode multicast|broadcast|quiet|nolisten|version1</tag> + This option selects the mode for RIP to work in. If nothing is + specified, RIP runs in multicast mode. <cf/version1/ is + currently equivalent to <cf/broadcast/, and it makes RIP talk + to a broadcast address even through multicast mode is + possible. <cf/quiet/ option means that RIP will not transmit + any periodic messages to this interface and <cf/nolisten/ + means that RIP will send to this interface butnot listen to it. + + <tag>tx class|dscp|priority <m/num/</tag> + These options specify the ToS/DiffServ/Traffic class/Priority + of the outgoing RIP packets. See <ref id="dsc-prio" name="tx + class"> common option for detailed description. +</descrip> <p>The following options generally override behavior specified in RFC. If you use any of these options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything @@ -128,11 +128,6 @@ static inline byte * ipv6_put_addr(byte *buf, ip_addr a) return buf+16; } -/* - * RFC 1883 defines packet precendece, but RFC 2460 replaces it - * by generic Traffic Class ID with no defined semantics. Better - * not use it yet. - */ -#define IP_PREC_INTERNET_CONTROL -1 +#define IP_PREC_INTERNET_CONTROL 0xc0 #endif diff --git a/lib/socket.h b/lib/socket.h index 0ee43b52..fbddfb4c 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -20,7 +20,8 @@ typedef struct birdsock { void *data; /* User data */ ip_addr saddr, daddr; /* IPA_NONE = unspecified */ unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */ - int tos; /* TOS and priority, -1 = default */ + int tos; /* TOS / traffic class, -1 = default */ + int priority; /* Local socket priority, -1 = default */ int ttl; /* Time To Live, -1 = default */ u32 flags; struct iface *iface; /* Interface; specify this for broad/multicast sockets */ @@ -81,6 +82,7 @@ sk_send_buffer_empty(sock *sk) return sk->tbuf == sk->tpos; } +extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */ /* Socket flags */ diff --git a/nest/config.Y b/nest/config.Y index 183059e8..b85a5733 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -48,7 +48,7 @@ CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERE CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH) CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE) @@ -65,7 +65,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <ro> roa_args %type <rot> roa_table_arg %type <sd> sym_args -%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted +%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted tos %type <ps> proto_patt proto_patt2 %type <g> limit_spec @@ -277,6 +277,10 @@ iface_patt: iface_patt_init iface_patt_list ; +tos: + CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); } + | DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); } + ; /* Direct device route protocol */ diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index ba050d85..d9379a7c 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -131,7 +131,7 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC) CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) -CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK) +CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY) %type <t> opttext %type <ld> lsadb_args @@ -305,6 +305,8 @@ ospf_iface_item: | RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; } | RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; } | RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); } + | TX tos { OSPF_PATT->tx_tos = $2; } + | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; } | password_list ; @@ -367,6 +369,8 @@ ospf_iface_start: init_list(&OSPF_PATT->nbma_list); OSPF_PATT->autype = OSPF_AUTH_NONE; OSPF_PATT->ptp_netmask = 2; /* not specified */ + OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL; + OSPF_PATT->tx_priority = sk_priority_control; reset_passwords(); } ; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 3da8f56c..bc3b1ef6 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa) sk->dport = OSPF_PROTO; sk->saddr = IPA_NONE; - sk->tos = IP_PREC_INTERNET_CONTROL; + sk->tos = ifa->cf->tx_tos; + sk->priority = ifa->cf->tx_priority; sk->rx_hook = ospf_rx_hook; sk->tx_hook = ospf_tx_hook; sk->err_hook = ospf_err_hook; @@ -659,7 +660,10 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) if (ifa->stub != new_stub) return 0; - if (new->real_bcast != ifa->cf->real_bcast) + /* Change of these options would require to reset the iface socket */ + if ((new->real_bcast != ifa->cf->real_bcast) || + (new->tx_tos != ifa->cf->tx_tos) || + (new->tx_priority != ifa->cf->tx_priority)) return 0; ifa->cf = new; diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 7608225f..56ebcd31 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -800,6 +800,8 @@ struct ospf_iface_patt u32 priority; u32 voa; u32 vid; + int tx_tos; + int tx_priority; u16 rxbuf; #define OSPF_RXBUF_NORMAL 0 #define OSPF_RXBUF_LARGE 1 diff --git a/proto/rip/config.Y b/proto/rip/config.Y index cd4f30e7..ec82aa3d 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -27,7 +27,7 @@ CF_DECLS CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT, MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1, AUTHENTICATION, NONE, PLAINTEXT, MD5, - HONOR, NEVER, NEIGHBOR, ALWAYS, + HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY, RIP_METRIC, RIP_TAG) %type <i> rip_mode rip_auth @@ -76,6 +76,8 @@ rip_mode: rip_iface_item: | METRIC expr { RIP_IPATT->metric = $2; } | MODE rip_mode { RIP_IPATT->mode |= $2; } + | TX tos { RIP_IPATT->tx_tos = $2; } + | TX PRIORITY expr { RIP_IPATT->tx_priority = $3; } ; rip_iface_opts: @@ -94,6 +96,8 @@ rip_iface_init: add_tail(&RIP_CFG->iface_list, NODE this_ipatt); init_list(&this_ipatt->ipn_list); RIP_IPATT->metric = 1; + RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL; + RIP_IPATT->tx_priority = sk_priority_control; } ; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 341df7eb..c09eae79 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -707,7 +707,8 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_ if (new) { rif->sock->ttl = 1; - rif->sock->tos = IP_PREC_INTERNET_CONTROL; + rif->sock->tos = PATT->tx_tos; + rif->sock->priority = PATT->tx_priority; rif->sock->flags = SKF_LADDR_RX; } @@ -1007,7 +1008,9 @@ static int rip_pat_compare(struct rip_patt *a, struct rip_patt *b) { return ((a->metric == b->metric) && - (a->mode == b->mode)); + (a->mode == b->mode) && + (a->tx_tos == b->tx_tos) && + (a->tx_priority == b->tx_priority)); } static int diff --git a/proto/rip/rip.h b/proto/rip/rip.h index e0816d0e..2cce8c81 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -128,6 +128,8 @@ struct rip_patt { #define IM_QUIET 4 #define IM_NOLISTEN 8 #define IM_VERSION1 16 + int tx_tos; + int tx_priority; }; struct rip_proto_config { diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index 4f91def5..085f16fa 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -284,3 +284,12 @@ sk_set_min_ttl6(sock *s, int ttl) #endif + +int sk_priority_control = -1; + +static int +sk_set_priority(sock *s, int prio UNUSED) +{ + log(L_WARN "Socket priority not supported"); + return -1; +} diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 90b3ebd9..41287e71 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -310,3 +310,22 @@ sk_set_min_ttl6(sock *s, int ttl) } #endif + + +#ifndef IPV6_TCLASS +#define IPV6_TCLASS 67 +#endif + +int sk_priority_control = 7; + +static int +sk_set_priority(sock *s, int prio) +{ + if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0) + { + log(L_WARN "sk_set_priority: setsockopt: %m"); + return -1; + } + + return 0; +} diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 80914afe..434a05be 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -598,7 +598,7 @@ sock_new(pool *p) sock *s = ralloc(p, &sk_class); s->pool = p; // s->saddr = s->daddr = IPA_NONE; - s->tos = s->ttl = -1; + s->tos = s->priority = s->ttl = -1; s->fd = -1; return s; } @@ -783,11 +783,18 @@ sk_setup(sock *s) ERR("fcntl(O_NONBLOCK)"); if (s->type == SK_UNIX) return NULL; -#ifndef IPV6 + +#ifdef IPV6 + if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0) + WARN("IPV6_TCLASS"); +#else if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) WARN("IP_TOS"); #endif + if (s->priority >= 0) + sk_set_priority(s, s->priority); + #ifdef IPV6 int v = 1; if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) |