summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2013-06-24 16:37:30 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2013-06-24 16:37:30 +0200
commitef4a50be10c6dd0abffd957132cd146029c3d79d (patch)
treef01df1b69d1d5f495dcad82e2f0e30478be55cb8
parentfad04c750ca6906fb095f1b45958dec0ac8e210c (diff)
Better packet priority and traffic class handling.
Implements support for IPv6 traffic class, sets higher priority for OSPF and RIP outgoing packets by default and allows to configure ToS/DS/TClass IP header field and the local priority of outgoing packets.
-rw-r--r--doc/bird.sgml48
-rw-r--r--lib/ipv6.h7
-rw-r--r--lib/socket.h4
-rw-r--r--nest/config.Y8
-rw-r--r--proto/ospf/config.Y6
-rw-r--r--proto/ospf/iface.c8
-rw-r--r--proto/ospf/ospf.h2
-rw-r--r--proto/rip/config.Y6
-rw-r--r--proto/rip/rip.c7
-rw-r--r--proto/rip/rip.h2
-rw-r--r--sysdep/bsd/sysio.h9
-rw-r--r--sysdep/linux/sysio.h19
-rw-r--r--sysdep/unix/io.c11
13 files changed, 113 insertions, 24 deletions
diff --git a/doc/bird.sgml b/doc/bird.sgml
index 0681bd53..7277b2b9 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -567,6 +567,22 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
<cf>interface "eth*" 192.168.1.0/24;</cf> - start the protocol on all
ethernet interfaces that have address from 192.168.1.0/24.
+ <tag><label id="dsc-prio">tx class|dscp <m/num/</tag>
+ This option specifies the value of ToS/DS/Class field in IP
+ headers of the outgoing protocol packets. This may affect how the
+ protocol packets are processed by the network relative to the
+ other network traffic. With <cf/class/ keyword, the value
+ (0-255) is used for the whole ToS/Class octet (but two bits
+ reserved for ECN are ignored). With <cf/dscp/ keyword, the
+ value (0-63) is used just for the DS field in the
+ octet. Default value is 0xc0 (DSCP 0x30 - CS6).
+
+ <tag>tx priority <m/num/</tag>
+ This option specifies the local packet priority. This may
+ affect how the protocol packets are processed in the local TX
+ queues. This option is Linux specific. Default value is 7
+ (highest priority, privileged traffic).
+
<tag><label id="dsc-pass">password "<m/password/" [ { id <m/num/; generate from <m/time/; generate to <m/time/; accept from <m/time/; accept to <m/time/; } ]</tag>
Specifies a password that can be used by the protocol. Password option can
be used more times to specify more passwords. If more passwords are
@@ -2220,6 +2236,11 @@ protocol ospf &lt;name&gt; {
prefix) is propagated. It is possible that some hardware
drivers or platforms do not implement this feature. Default value is no.
+ <tag>tx class|dscp|priority <m/num/</tag>
+ These options specify the ToS/DiffServ/Traffic class/Priority
+ of the outgoing OSPF packets. See <ref id="dsc-prio" name="tx
+ class"> common option for detailed description.
+
<tag>ecmp weight <M>num</M></tag>
When ECMP (multipath) routes are allowed, this value specifies
a relative weight used for nexthops going through the iface.
@@ -2748,13 +2769,26 @@ makes it pretty much obsolete. (It is still usable on very small networks.)
neighbors, that is not configurable. Default: never.
</descrip>
-<p>There are two options that can be specified per-interface. First is <cf>metric</cf>, with
-default one. Second is <cf>mode multicast|broadcast|quiet|nolisten|version1</cf>, it selects mode for
-rip to work in. If nothing is specified, rip runs in multicast mode. <cf>version1</cf> is
-currently equivalent to <cf>broadcast</cf>, and it makes RIP talk to a broadcast address even
-through multicast mode is possible. <cf>quiet</cf> option means that RIP will not transmit
-any periodic messages to this interface and <cf>nolisten</cf> means that RIP will send to this
-interface but not listen to it.
+<p>There are some options that can be specified per-interface:
+
+<descrip>
+ <tag>metric <m/num/</tag>
+ This option specifies the metric of the interface. Valid
+
+ <tag>mode multicast|broadcast|quiet|nolisten|version1</tag>
+ This option selects the mode for RIP to work in. If nothing is
+ specified, RIP runs in multicast mode. <cf/version1/ is
+ currently equivalent to <cf/broadcast/, and it makes RIP talk
+ to a broadcast address even through multicast mode is
+ possible. <cf/quiet/ option means that RIP will not transmit
+ any periodic messages to this interface and <cf/nolisten/
+ means that RIP will send to this interface butnot listen to it.
+
+ <tag>tx class|dscp|priority <m/num/</tag>
+ These options specify the ToS/DiffServ/Traffic class/Priority
+ of the outgoing RIP packets. See <ref id="dsc-prio" name="tx
+ class"> common option for detailed description.
+</descrip>
<p>The following options generally override behavior specified in RFC. If you use any of these
options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything
diff --git a/lib/ipv6.h b/lib/ipv6.h
index 6f8e7b3c..2247d3fd 100644
--- a/lib/ipv6.h
+++ b/lib/ipv6.h
@@ -128,11 +128,6 @@ static inline byte * ipv6_put_addr(byte *buf, ip_addr a)
return buf+16;
}
-/*
- * RFC 1883 defines packet precendece, but RFC 2460 replaces it
- * by generic Traffic Class ID with no defined semantics. Better
- * not use it yet.
- */
-#define IP_PREC_INTERNET_CONTROL -1
+#define IP_PREC_INTERNET_CONTROL 0xc0
#endif
diff --git a/lib/socket.h b/lib/socket.h
index 0ee43b52..fbddfb4c 100644
--- a/lib/socket.h
+++ b/lib/socket.h
@@ -20,7 +20,8 @@ typedef struct birdsock {
void *data; /* User data */
ip_addr saddr, daddr; /* IPA_NONE = unspecified */
unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */
- int tos; /* TOS and priority, -1 = default */
+ int tos; /* TOS / traffic class, -1 = default */
+ int priority; /* Local socket priority, -1 = default */
int ttl; /* Time To Live, -1 = default */
u32 flags;
struct iface *iface; /* Interface; specify this for broad/multicast sockets */
@@ -81,6 +82,7 @@ sk_send_buffer_empty(sock *sk)
return sk->tbuf == sk->tpos;
}
+extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */
/* Socket flags */
diff --git a/nest/config.Y b/nest/config.Y
index 183059e8..b85a5733 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -48,7 +48,7 @@ CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERE
CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES)
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
-CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC)
+CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
@@ -65,7 +65,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID)
%type <ro> roa_args
%type <rot> roa_table_arg
%type <sd> sym_args
-%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted
+%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted tos
%type <ps> proto_patt proto_patt2
%type <g> limit_spec
@@ -277,6 +277,10 @@ iface_patt:
iface_patt_init iface_patt_list
;
+tos:
+ CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); }
+ | DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); }
+ ;
/* Direct device route protocol */
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index ba050d85..d9379a7c 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -131,7 +131,7 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC)
CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK)
CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
-CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK)
+CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY)
%type <t> opttext
%type <ld> lsadb_args
@@ -305,6 +305,8 @@ ospf_iface_item:
| RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; }
| RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; }
| RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); }
+ | TX tos { OSPF_PATT->tx_tos = $2; }
+ | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; }
| password_list
;
@@ -367,6 +369,8 @@ ospf_iface_start:
init_list(&OSPF_PATT->nbma_list);
OSPF_PATT->autype = OSPF_AUTH_NONE;
OSPF_PATT->ptp_netmask = 2; /* not specified */
+ OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL;
+ OSPF_PATT->tx_priority = sk_priority_control;
reset_passwords();
}
;
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 3da8f56c..bc3b1ef6 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->dport = OSPF_PROTO;
sk->saddr = IPA_NONE;
- sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->tos = ifa->cf->tx_tos;
+ sk->priority = ifa->cf->tx_priority;
sk->rx_hook = ospf_rx_hook;
sk->tx_hook = ospf_tx_hook;
sk->err_hook = ospf_err_hook;
@@ -659,7 +660,10 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
if (ifa->stub != new_stub)
return 0;
- if (new->real_bcast != ifa->cf->real_bcast)
+ /* Change of these options would require to reset the iface socket */
+ if ((new->real_bcast != ifa->cf->real_bcast) ||
+ (new->tx_tos != ifa->cf->tx_tos) ||
+ (new->tx_priority != ifa->cf->tx_priority))
return 0;
ifa->cf = new;
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index 7608225f..56ebcd31 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -800,6 +800,8 @@ struct ospf_iface_patt
u32 priority;
u32 voa;
u32 vid;
+ int tx_tos;
+ int tx_priority;
u16 rxbuf;
#define OSPF_RXBUF_NORMAL 0
#define OSPF_RXBUF_LARGE 1
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index cd4f30e7..ec82aa3d 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -27,7 +27,7 @@ CF_DECLS
CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT,
MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1,
AUTHENTICATION, NONE, PLAINTEXT, MD5,
- HONOR, NEVER, NEIGHBOR, ALWAYS,
+ HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY,
RIP_METRIC, RIP_TAG)
%type <i> rip_mode rip_auth
@@ -76,6 +76,8 @@ rip_mode:
rip_iface_item:
| METRIC expr { RIP_IPATT->metric = $2; }
| MODE rip_mode { RIP_IPATT->mode |= $2; }
+ | TX tos { RIP_IPATT->tx_tos = $2; }
+ | TX PRIORITY expr { RIP_IPATT->tx_priority = $3; }
;
rip_iface_opts:
@@ -94,6 +96,8 @@ rip_iface_init:
add_tail(&RIP_CFG->iface_list, NODE this_ipatt);
init_list(&this_ipatt->ipn_list);
RIP_IPATT->metric = 1;
+ RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL;
+ RIP_IPATT->tx_priority = sk_priority_control;
}
;
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 341df7eb..c09eae79 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -707,7 +707,8 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_
if (new)
{
rif->sock->ttl = 1;
- rif->sock->tos = IP_PREC_INTERNET_CONTROL;
+ rif->sock->tos = PATT->tx_tos;
+ rif->sock->priority = PATT->tx_priority;
rif->sock->flags = SKF_LADDR_RX;
}
@@ -1007,7 +1008,9 @@ static int
rip_pat_compare(struct rip_patt *a, struct rip_patt *b)
{
return ((a->metric == b->metric) &&
- (a->mode == b->mode));
+ (a->mode == b->mode) &&
+ (a->tx_tos == b->tx_tos) &&
+ (a->tx_priority == b->tx_priority));
}
static int
diff --git a/proto/rip/rip.h b/proto/rip/rip.h
index e0816d0e..2cce8c81 100644
--- a/proto/rip/rip.h
+++ b/proto/rip/rip.h
@@ -128,6 +128,8 @@ struct rip_patt {
#define IM_QUIET 4
#define IM_NOLISTEN 8
#define IM_VERSION1 16
+ int tx_tos;
+ int tx_priority;
};
struct rip_proto_config {
diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h
index 4f91def5..085f16fa 100644
--- a/sysdep/bsd/sysio.h
+++ b/sysdep/bsd/sysio.h
@@ -284,3 +284,12 @@ sk_set_min_ttl6(sock *s, int ttl)
#endif
+
+int sk_priority_control = -1;
+
+static int
+sk_set_priority(sock *s, int prio UNUSED)
+{
+ log(L_WARN "Socket priority not supported");
+ return -1;
+}
diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h
index 90b3ebd9..41287e71 100644
--- a/sysdep/linux/sysio.h
+++ b/sysdep/linux/sysio.h
@@ -310,3 +310,22 @@ sk_set_min_ttl6(sock *s, int ttl)
}
#endif
+
+
+#ifndef IPV6_TCLASS
+#define IPV6_TCLASS 67
+#endif
+
+int sk_priority_control = 7;
+
+static int
+sk_set_priority(sock *s, int prio)
+{
+ if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0)
+ {
+ log(L_WARN "sk_set_priority: setsockopt: %m");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 80914afe..434a05be 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -598,7 +598,7 @@ sock_new(pool *p)
sock *s = ralloc(p, &sk_class);
s->pool = p;
// s->saddr = s->daddr = IPA_NONE;
- s->tos = s->ttl = -1;
+ s->tos = s->priority = s->ttl = -1;
s->fd = -1;
return s;
}
@@ -783,11 +783,18 @@ sk_setup(sock *s)
ERR("fcntl(O_NONBLOCK)");
if (s->type == SK_UNIX)
return NULL;
-#ifndef IPV6
+
+#ifdef IPV6
+ if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
+ WARN("IPV6_TCLASS");
+#else
if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
WARN("IP_TOS");
#endif
+ if (s->priority >= 0)
+ sk_set_priority(s, s->priority);
+
#ifdef IPV6
int v = 1;
if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)