summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJianfeng Tan <henry.tjf@antfin.com>2020-01-28 12:31:58 -0800
committergVisor bot <gvisor-bot@google.com>2020-01-28 12:32:59 -0800
commitd99329e58492ef91b44a0bac346f757e8af2a7ec (patch)
tree0188f72f1edd543040041bf3377db4756fdf7e03
parent1119644080ae57c206b9b0d8d127cf48423af7f2 (diff)
netlink: add support for RTM_F_LOOKUP_TABLE
Test command: $ ip route get 1.1.1.1 Fixes: #1099 Signed-off-by: Jianfeng Tan <henry.tjf@antfin.com> COPYBARA_INTEGRATE_REVIEW=https://github.com/google/gvisor/pull/1121 from tanjianfeng:fix-1099 e6919f3d4ede5aa51a48b3d2be0d7a4b482dd53d PiperOrigin-RevId: 291990716
-rw-r--r--pkg/abi/linux/netlink_route.go13
-rw-r--r--pkg/sentry/socket/netlink/route/BUILD6
-rw-r--r--pkg/sentry/socket/netlink/route/protocol.go158
-rw-r--r--test/syscalls/linux/socket_netlink_route.cc84
-rw-r--r--test/syscalls/linux/socket_netlink_util.cc38
-rw-r--r--test/syscalls/linux/socket_netlink_util.h11
6 files changed, 295 insertions, 15 deletions
diff --git a/pkg/abi/linux/netlink_route.go b/pkg/abi/linux/netlink_route.go
index 0e3582ab6..40bec566c 100644
--- a/pkg/abi/linux/netlink_route.go
+++ b/pkg/abi/linux/netlink_route.go
@@ -205,6 +205,9 @@ type RouteMessage struct {
Flags uint32
}
+// SizeOfRouteMessage is the size of RouteMessage.
+const SizeOfRouteMessage = 12
+
// Route types, from uapi/linux/rtnetlink.h.
const (
// RTN_UNSPEC represents an unspecified route type.
@@ -331,3 +334,13 @@ const (
RTF_GATEWAY = 0x2
RTF_UP = 0x1
)
+
+// RtAttr is the header of optional addition route information, as a netlink
+// attribute. From include/uapi/linux/rtnetlink.h.
+type RtAttr struct {
+ Len uint16
+ Type uint16
+}
+
+// SizeOfRtAttr is the size of RtAttr.
+const SizeOfRtAttr = 4
diff --git a/pkg/sentry/socket/netlink/route/BUILD b/pkg/sentry/socket/netlink/route/BUILD
index 0234aadde..622a1eafc 100644
--- a/pkg/sentry/socket/netlink/route/BUILD
+++ b/pkg/sentry/socket/netlink/route/BUILD
@@ -4,15 +4,19 @@ package(licenses = ["notice"])
go_library(
name = "route",
- srcs = ["protocol.go"],
+ srcs = [
+ "protocol.go",
+ ],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/binary",
"//pkg/context",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/socket/netlink",
"//pkg/syserr",
+ "//pkg/usermem",
],
)
diff --git a/pkg/sentry/socket/netlink/route/protocol.go b/pkg/sentry/socket/netlink/route/protocol.go
index 80a15d6cb..2b3c7f5b3 100644
--- a/pkg/sentry/socket/netlink/route/protocol.go
+++ b/pkg/sentry/socket/netlink/route/protocol.go
@@ -19,12 +19,14 @@ import (
"bytes"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/socket/netlink"
"gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// commandKind describes the operational class of a message type.
@@ -66,8 +68,14 @@ func (p *Protocol) CanSend() bool {
return true
}
-// dumpLinks handles RTM_GETLINK + NLM_F_DUMP requests.
+// dumpLinks handles RTM_GETLINK dump requests.
func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+ // TODO(b/68878065): Only the dump variant of the types below are
+ // supported.
+ if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP {
+ return syserr.ErrNotSupported
+ }
+
// NLM_F_DUMP + RTM_GETLINK messages are supposed to include an
// ifinfomsg. However, Linux <3.9 only checked for rtgenmsg, and some
// userspace applications (including glibc) still include rtgenmsg.
@@ -121,8 +129,14 @@ func (p *Protocol) dumpLinks(ctx context.Context, hdr linux.NetlinkMessageHeader
return nil
}
-// dumpAddrs handles RTM_GETADDR + NLM_F_DUMP requests.
+// dumpAddrs handles RTM_GETADDR dump requests.
func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
+ // TODO(b/68878065): Only the dump variant of the types below are
+ // supported.
+ if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP {
+ return syserr.ErrNotSupported
+ }
+
// RTM_GETADDR dump requests need not contain anything more than the
// netlink header and 1 byte protocol family common to all
// NETLINK_ROUTE requests.
@@ -163,22 +177,146 @@ func (p *Protocol) dumpAddrs(ctx context.Context, hdr linux.NetlinkMessageHeader
return nil
}
-// dumpRoutes handles RTM_GETROUTE + NLM_F_DUMP requests.
+// commonPrefixLen reports the length of the longest IP address prefix.
+// This is a simplied version from Golang's src/net/addrselect.go.
+func commonPrefixLen(a, b []byte) (cpl int) {
+ for len(a) > 0 {
+ if a[0] == b[0] {
+ cpl += 8
+ a = a[1:]
+ b = b[1:]
+ continue
+ }
+ bits := 8
+ ab, bb := a[0], b[0]
+ for {
+ ab >>= 1
+ bb >>= 1
+ bits--
+ if ab == bb {
+ cpl += bits
+ return
+ }
+ }
+ }
+ return
+}
+
+// fillRoute returns the Route using LPM algorithm. Refer to Linux's
+// net/ipv4/route.c:rt_fill_info().
+func fillRoute(routes []inet.Route, addr []byte) (inet.Route, *syserr.Error) {
+ family := uint8(linux.AF_INET)
+ if len(addr) != 4 {
+ family = linux.AF_INET6
+ }
+
+ idx := -1 // Index of the Route rule to be returned.
+ idxDef := -1 // Index of the default route rule.
+ prefix := 0 // Current longest prefix.
+ for i, route := range routes {
+ if route.Family != family {
+ continue
+ }
+
+ if len(route.GatewayAddr) > 0 && route.DstLen == 0 {
+ idxDef = i
+ continue
+ }
+
+ cpl := commonPrefixLen(addr, route.DstAddr)
+ if cpl < int(route.DstLen) {
+ continue
+ }
+ cpl = int(route.DstLen)
+ if cpl > prefix {
+ idx = i
+ prefix = cpl
+ }
+ }
+ if idx == -1 {
+ idx = idxDef
+ }
+ if idx == -1 {
+ return inet.Route{}, syserr.ErrNoRoute
+ }
+
+ route := routes[idx]
+ if family == linux.AF_INET {
+ route.DstLen = 32
+ } else {
+ route.DstLen = 128
+ }
+ route.DstAddr = addr
+ route.Flags |= linux.RTM_F_CLONED // This route is cloned.
+ return route, nil
+}
+
+// parseForDestination parses a message as format of RouteMessage-RtAttr-dst.
+func parseForDestination(data []byte) ([]byte, *syserr.Error) {
+ var rtMsg linux.RouteMessage
+ if len(data) < linux.SizeOfRouteMessage {
+ return nil, syserr.ErrInvalidArgument
+ }
+ binary.Unmarshal(data[:linux.SizeOfRouteMessage], usermem.ByteOrder, &rtMsg)
+ // iproute2 added the RTM_F_LOOKUP_TABLE flag in version v4.4.0. See
+ // commit bc234301af12. Note we don't check this flag for backward
+ // compatibility.
+ if rtMsg.Flags != 0 && rtMsg.Flags != linux.RTM_F_LOOKUP_TABLE {
+ return nil, syserr.ErrNotSupported
+ }
+
+ data = data[linux.SizeOfRouteMessage:]
+
+ // TODO(gvisor.dev/issue/1611): Add generic attribute parsing.
+ var rtAttr linux.RtAttr
+ if len(data) < linux.SizeOfRtAttr {
+ return nil, syserr.ErrInvalidArgument
+ }
+ binary.Unmarshal(data[:linux.SizeOfRtAttr], usermem.ByteOrder, &rtAttr)
+ if rtAttr.Type != linux.RTA_DST {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ if len(data) < int(rtAttr.Len) {
+ return nil, syserr.ErrInvalidArgument
+ }
+ return data[linux.SizeOfRtAttr:rtAttr.Len], nil
+}
+
+// dumpRoutes handles RTM_GETROUTE requests.
func (p *Protocol) dumpRoutes(ctx context.Context, hdr linux.NetlinkMessageHeader, data []byte, ms *netlink.MessageSet) *syserr.Error {
// RTM_GETROUTE dump requests need not contain anything more than the
// netlink header and 1 byte protocol family common to all
// NETLINK_ROUTE requests.
- // We always send back an NLMSG_DONE.
- ms.Multi = true
-
stack := inet.StackFromContext(ctx)
if stack == nil {
// No network routes.
return nil
}
- for _, rt := range stack.RouteTable() {
+ routeTables := stack.RouteTable()
+
+ if hdr.Flags == linux.NLM_F_REQUEST {
+ dst, err := parseForDestination(data)
+ if err != nil {
+ return err
+ }
+ route, err := fillRoute(routeTables, dst)
+ if err != nil {
+ // TODO(gvisor.dev/issue/1237): return NLMSG_ERROR with ENETUNREACH.
+ return syserr.ErrNotSupported
+ }
+ routeTables = append([]inet.Route{}, route)
+ } else if hdr.Flags&linux.NLM_F_DUMP == linux.NLM_F_DUMP {
+ // We always send back an NLMSG_DONE.
+ ms.Multi = true
+ } else {
+ // TODO(b/68878065): Only above cases are supported.
+ return syserr.ErrNotSupported
+ }
+
+ for _, rt := range routeTables {
m := ms.AddMessage(linux.NetlinkMessageHeader{
Type: linux.RTM_NEWROUTE,
})
@@ -236,12 +374,6 @@ func (p *Protocol) ProcessMessage(ctx context.Context, hdr linux.NetlinkMessageH
}
}
- // TODO(b/68878065): Only the dump variant of the types below are
- // supported.
- if hdr.Flags&linux.NLM_F_DUMP != linux.NLM_F_DUMP {
- return syserr.ErrNotSupported
- }
-
switch hdr.Type {
case linux.RTM_GETLINK:
return p.dumpLinks(ctx, hdr, data, ms)
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
index ef567f512..1e28e658d 100644
--- a/test/syscalls/linux/socket_netlink_route.cc
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -442,6 +442,90 @@ TEST(NetlinkRouteTest, GetRouteDump) {
EXPECT_TRUE(dstFound);
}
+// GetRouteRequest tests a RTM_GETROUTE request with RTM_F_LOOKUP_TABLE flag.
+TEST(NetlinkRouteTest, GetRouteRequest) {
+ FileDescriptor fd =
+ ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
+ uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
+
+ struct __attribute__((__packed__)) request {
+ struct nlmsghdr hdr;
+ struct rtmsg rtm;
+ struct nlattr nla;
+ struct in_addr sin_addr;
+ };
+
+ constexpr uint32_t kSeq = 12345;
+
+ struct request req = {};
+ req.hdr.nlmsg_len = sizeof(req);
+ req.hdr.nlmsg_type = RTM_GETROUTE;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST;
+ req.hdr.nlmsg_seq = kSeq;
+
+ req.rtm.rtm_family = AF_INET;
+ req.rtm.rtm_dst_len = 32;
+ req.rtm.rtm_src_len = 0;
+ req.rtm.rtm_tos = 0;
+ req.rtm.rtm_table = RT_TABLE_UNSPEC;
+ req.rtm.rtm_protocol = RTPROT_UNSPEC;
+ req.rtm.rtm_scope = RT_SCOPE_UNIVERSE;
+ req.rtm.rtm_type = RTN_UNSPEC;
+ req.rtm.rtm_flags = RTM_F_LOOKUP_TABLE;
+
+ req.nla.nla_len = 8;
+ req.nla.nla_type = RTA_DST;
+ inet_aton("127.0.0.2", &req.sin_addr);
+
+ bool rtDstFound = false;
+ ASSERT_NO_ERRNO(NetlinkRequestResponseSingle(
+ fd, &req, sizeof(req), [&](const struct nlmsghdr* hdr) {
+ // Validate the reponse to RTM_GETROUTE request with RTM_F_LOOKUP_TABLE
+ // flag.
+ EXPECT_THAT(hdr->nlmsg_type, RTM_NEWROUTE);
+
+ EXPECT_TRUE(hdr->nlmsg_flags == 0) << std::hex << hdr->nlmsg_flags;
+
+ EXPECT_EQ(hdr->nlmsg_seq, kSeq);
+ EXPECT_EQ(hdr->nlmsg_pid, port);
+
+ // RTM_NEWROUTE contains at least the header and rtmsg.
+ ASSERT_GE(hdr->nlmsg_len, NLMSG_SPACE(sizeof(struct rtmsg)));
+ const struct rtmsg* msg =
+ reinterpret_cast<const struct rtmsg*>(NLMSG_DATA(hdr));
+
+ // NOTE: rtmsg fields are char fields.
+ std::cout << "Found route table=" << static_cast<int>(msg->rtm_table)
+ << ", protocol=" << static_cast<int>(msg->rtm_protocol)
+ << ", scope=" << static_cast<int>(msg->rtm_scope)
+ << ", type=" << static_cast<int>(msg->rtm_type);
+
+ EXPECT_EQ(msg->rtm_family, AF_INET);
+ EXPECT_EQ(msg->rtm_dst_len, 32);
+ EXPECT_TRUE((msg->rtm_flags & RTM_F_CLONED) == RTM_F_CLONED)
+ << std::hex << msg->rtm_flags;
+
+ int len = RTM_PAYLOAD(hdr);
+ std::cout << ", len=" << len;
+ for (struct rtattr* attr = RTM_RTA(msg); RTA_OK(attr, len);
+ attr = RTA_NEXT(attr, len)) {
+ if (attr->rta_type == RTA_DST) {
+ char address[INET_ADDRSTRLEN] = {};
+ inet_ntop(AF_INET, RTA_DATA(attr), address, sizeof(address));
+ std::cout << ", dst=" << address;
+ rtDstFound = true;
+ } else if (attr->rta_type == RTA_OIF) {
+ const char* oif = reinterpret_cast<const char*>(RTA_DATA(attr));
+ std::cout << ", oif=" << oif;
+ }
+ }
+
+ std::cout << std::endl;
+ }));
+ // Found RTA_DST for RTM_F_LOOKUP_TABLE.
+ EXPECT_TRUE(rtDstFound);
+}
+
// RecvmsgTrunc tests the recvmsg MSG_TRUNC flag with zero length output
// buffer. MSG_TRUNC with a zero length buffer should consume subsequent
// messages off the socket.
diff --git a/test/syscalls/linux/socket_netlink_util.cc b/test/syscalls/linux/socket_netlink_util.cc
index 723f5d728..cd2212a1a 100644
--- a/test/syscalls/linux/socket_netlink_util.cc
+++ b/test/syscalls/linux/socket_netlink_util.cc
@@ -108,5 +108,43 @@ PosixError NetlinkRequestResponse(
return NoError();
}
+PosixError NetlinkRequestResponseSingle(
+ const FileDescriptor& fd, void* request, size_t len,
+ const std::function<void(const struct nlmsghdr* hdr)>& fn) {
+ struct iovec iov = {};
+ iov.iov_base = request;
+ iov.iov_len = len;
+
+ struct msghdr msg = {};
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ // No destination required; it defaults to pid 0, the kernel.
+
+ RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(sendmsg)(fd.get(), &msg, 0));
+
+ constexpr size_t kBufferSize = 4096;
+ std::vector<char> buf(kBufferSize);
+ iov.iov_base = buf.data();
+ iov.iov_len = buf.size();
+
+ int ret;
+ RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(recvmsg)(fd.get(), &msg, 0));
+
+ // We don't bother with the complexity of dealing with truncated messages.
+ // We must allocate a large enough buffer up front.
+ if ((msg.msg_flags & MSG_TRUNC) == MSG_TRUNC) {
+ return PosixError(
+ EIO,
+ absl::StrCat("Received truncated message with flags: ", msg.msg_flags));
+ }
+
+ for (struct nlmsghdr* hdr = reinterpret_cast<struct nlmsghdr*>(buf.data());
+ NLMSG_OK(hdr, ret); hdr = NLMSG_NEXT(hdr, ret)) {
+ fn(hdr);
+ }
+
+ return NoError();
+}
+
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/socket_netlink_util.h b/test/syscalls/linux/socket_netlink_util.h
index 76e772c48..3678c0599 100644
--- a/test/syscalls/linux/socket_netlink_util.h
+++ b/test/syscalls/linux/socket_netlink_util.h
@@ -32,12 +32,21 @@ PosixErrorOr<FileDescriptor> NetlinkBoundSocket(int protocol);
// Returns the port ID of the passed socket.
PosixErrorOr<uint32_t> NetlinkPortID(int fd);
-// Send the passed request and call fn will all response netlink messages.
+// Send the passed request and call fn on all response netlink messages.
+//
+// To be used on requests with NLM_F_MULTI reponses.
PosixError NetlinkRequestResponse(
const FileDescriptor& fd, void* request, size_t len,
const std::function<void(const struct nlmsghdr* hdr)>& fn,
bool expect_nlmsgerr);
+// Send the passed request and call fn on all response netlink messages.
+//
+// To be used on requests without NLM_F_MULTI reponses.
+PosixError NetlinkRequestResponseSingle(
+ const FileDescriptor& fd, void* request, size_t len,
+ const std::function<void(const struct nlmsghdr* hdr)>& fn);
+
} // namespace testing
} // namespace gvisor