From 7fb23041a52d01754c53ba963e2282e524813364 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 13 Jan 2023 13:17:46 +0100 Subject: BSD: Add support for kernel route metric Add support for kernel route metric/priority, exported as krt_metric attribute, like in Linux. This should also fix issues with overwriting or removing system routes. --- sysdep/bsd/krt-sock.Y | 10 +++++++++- sysdep/bsd/krt-sock.c | 39 +++++++++++++++++++++++++++++++++++++-- sysdep/bsd/krt-sys.h | 2 ++ 3 files changed, 48 insertions(+), 3 deletions(-) (limited to 'sysdep') diff --git a/sysdep/bsd/krt-sock.Y b/sysdep/bsd/krt-sock.Y index 8581bd43..a03d6df5 100644 --- a/sysdep/bsd/krt-sock.Y +++ b/sysdep/bsd/krt-sock.Y @@ -10,7 +10,7 @@ CF_HDR CF_DECLS -CF_KEYWORDS(KERNEL, TABLE) +CF_KEYWORDS(KERNEL, TABLE, METRIC) CF_GRAMMAR @@ -25,6 +25,14 @@ kern_sys_item: THIS_KRT->sys.table_id = $3; } + | METRIC expr { + if ($2 && !krt_max_metric) + cf_error("Kernel route metric not supported"); + if ($2 > krt_max_metric) + cf_error("Kernel table id must be in range 0-%u", krt_max_metric); + + THIS_KRT->sys.metric = $2; + } ; CF_CODE diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 47f5cf59..540c246f 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -47,6 +47,11 @@ const int rt_default_ecmp = 0; * table_id is specified explicitly as sysctl scan argument, while in FreeBSD it * is handled implicitly by changing default table using setfib() syscall. * + * OpenBSD allows to use route metric. The behavior is controlled by these macro + * KRT_USE_METRIC, which enables use of rtm_priority in route send/recevive. + * There is also KRT_DEFAULT_METRIC and KRT_MAX_METRIC for default and maximum + * metric values. + * * KRT_SHARED_SOCKET - use shared kernel socked instead of one for each krt_proto * KRT_USE_SETFIB_SCAN - use setfib() for sysctl() route scan * KRT_USE_SETFIB_SOCK - use SO_SETFIB socket option for kernel sockets @@ -63,6 +68,9 @@ const int rt_default_ecmp = 0; #ifdef __OpenBSD__ #define KRT_MAX_TABLES (RT_TABLEID_MAX+1) +#define KRT_USE_METRIC +#define KRT_MAX_METRIC 255 +#define KRT_DEFAULT_METRIC 56 #define KRT_SHARED_SOCKET #define KRT_USE_SYSCTL_7 #endif @@ -71,6 +79,14 @@ const int rt_default_ecmp = 0; #define KRT_MAX_TABLES 1 #endif +#ifndef KRT_MAX_METRIC +#define KRT_MAX_METRIC 0 +#endif + +#ifndef KRT_DEFAULT_METRIC +#define KRT_DEFAULT_METRIC 0 +#endif + /* Dynamic max number of tables */ @@ -143,6 +159,10 @@ static struct krt_proto *krt_table_map[KRT_MAX_TABLES][2]; #endif +/* Make it available to parser code */ +const uint krt_max_metric = KRT_MAX_METRIC; + + /* Route socket message processing */ int @@ -231,6 +251,10 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_tableid = KRT_CF->sys.table_id; #endif +#ifdef KRT_USE_METRIC + msg.rtm.rtm_priority = KRT_CF->sys.metric; +#endif + #ifdef RTF_REJECT if(a->dest == RTD_UNREACHABLE) msg.rtm.rtm_flags |= RTF_REJECT; @@ -586,7 +610,7 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) e = rte_get_temp(&a, p->p.main_source); e->net = net; - ea_list *ea = alloca(sizeof(ea_list) + 1 * sizeof(eattr)); + ea_list *ea = alloca(sizeof(ea_list) + 2 * sizeof(eattr)); *ea = (ea_list) { .count = 1, .next = e->attrs->eattrs }; e->attrs->eattrs = ea; @@ -596,6 +620,15 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) .u.data = src2, }; +#ifdef KRT_USE_METRIC + ea->count++; + ea->attrs[1] = (eattr) { + .id = EA_KRT_METRIC, + .type = EAF_TYPE_INT, + .u.data = msg->rtm.rtm_priority, + }; +#endif + if (scan) krt_got_route(p, e, src); else @@ -1155,7 +1188,7 @@ krt_sys_shutdown(struct krt_proto *p) int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) { - return n->sys.table_id == o->sys.table_id; + return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric); } void @@ -1168,11 +1201,13 @@ krt_sys_preconfig(struct config *c UNUSED) void krt_sys_init_config(struct krt_config *c) { c->sys.table_id = 0; /* Default table */ + c->sys.metric = KRT_DEFAULT_METRIC; } void krt_sys_copy_config(struct krt_config *d, struct krt_config *s) { d->sys.table_id = s->sys.table_id; + d->sys.metric = s->sys.metric; } diff --git a/sysdep/bsd/krt-sys.h b/sysdep/bsd/krt-sys.h index 57501884..198373c0 100644 --- a/sysdep/bsd/krt-sys.h +++ b/sysdep/bsd/krt-sys.h @@ -32,9 +32,11 @@ static inline void kif_sys_copy_config(struct kif_config *d UNUSED, struct kif_c /* Kernel routes */ extern uint krt_max_tables; +extern const uint krt_max_metric; struct krt_params { int table_id; /* Kernel table ID we sync with */ + u32 metric; /* Kernel metric used for all routes */ }; struct krt_state { -- cgit v1.2.3 From f8276812e6bf3aaefe22cdf9135b06e344298273 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 13 Jan 2023 13:32:29 +0100 Subject: Minor cleanups --- filter/decl.m4 | 2 +- proto/mrt/mrt.c | 4 ++-- sysdep/bsd/krt-sock.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'sysdep') diff --git a/filter/decl.m4 b/filter/decl.m4 index d0c86912..b6026867 100644 --- a/filter/decl.m4 +++ b/filter/decl.m4 @@ -589,7 +589,7 @@ f_linearize_concat(const struct f_inst * const inst[], uint count, uint results) for (uint i=0; ilen = linearize(out, inst[i], out->len); - out->results = results; + out->results = results; #ifdef LOCAL_DEBUG f_dump_line(out, 0); diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 321c6395..d1c334e1 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - static inline void mrt_put_##S##_(buffer *b, T x) \ + static inline void UNUSED mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - static inline void mrt_put_##S(buffer *b, T x) \ + static inline void UNUSED mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 540c246f..1f793293 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -206,7 +206,7 @@ struct ks_msg memcpy(p, body, (l > sizeof(*p) ? sizeof(*p) : l));\ body += l;} -static inline void +static inline void UNUSED sockaddr_fill_dl(struct sockaddr_dl *sa, struct iface *ifa) { uint len = OFFSETOF(struct sockaddr_dl, sdl_data); -- cgit v1.2.3 From 928a1cb034e6f9e8edcdd1dc07264cd703e00827 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 17 Jan 2023 17:13:50 +0100 Subject: Alloc: Disable transparent huge pages The usage pattern implemented in allocator seems to be incompatible with transparent huge pages, as memory released using madvise(MADV_DONTNEED) with regular page size and alignment does not seem to trigger demotion of huge pages back to regular pages, even when significant number of pages is released. Even if demotion is triggered when system memory is low, it still breaks memory accounting. --- sysdep/cf/README | 3 +++ sysdep/cf/linux.h | 1 + sysdep/unix/alloc.c | 10 ++++++++++ 3 files changed, 14 insertions(+) (limited to 'sysdep') diff --git a/sysdep/cf/README b/sysdep/cf/README index 68078bbe..af65aaec 100644 --- a/sysdep/cf/README +++ b/sysdep/cf/README @@ -14,3 +14,6 @@ CONFIG_DONTROUTE_UNICAST Use MSG_DONTROUTE flag for unicast packets (def for Fre CONFIG_USE_HDRINCL Use IP_HDRINCL instead of control messages for source address on raw IP sockets. CONFIG_RESTRICTED_PRIVILEGES Implements restricted privileges using drop_uid() + +CONFIG_MADV_DONTNEED_TO_FREE To free pages, use MADV_DONTNEED instead of MADV_FREE (linux) +CONFIG_DISABLE_THP Disable transparent huge pages (linux) diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index 9c37dd8a..5edc4969 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -24,6 +24,7 @@ #define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" #define CONFIG_MADV_DONTNEED_TO_FREE +#define CONFIG_DISABLE_THP #ifndef AF_MPLS #define AF_MPLS 28 diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index e7c4e6b0..5d9c6bcd 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -19,6 +19,10 @@ #include #endif +#ifdef CONFIG_DISABLE_THP +#include +#endif + long page_size = 0; #ifdef HAVE_MMAP @@ -218,6 +222,12 @@ global_free_pages_cleanup_event(void *data UNUSED) void resource_sys_init(void) { +#ifdef CONFIG_DISABLE_THP + /* Disable transparent huge pages, they do not work properly with madvice(MADV_DONTNEED) */ + if (prctl(PR_SET_THP_DISABLE, (unsigned long) 1, (unsigned long) 0, (unsigned long) 0, (unsigned long) 0) < 0) + die("prctl(PR_SET_THP_DISABLE) failed: %m"); +#endif + #ifdef HAVE_MMAP ASSERT_DIE(global_free_pages.cnt == 0); -- cgit v1.2.3