From b1d57877264c2b94e3024375efc9914881f0bbe8 Mon Sep 17 00:00:00 2001 From: Kevin Krakauer Date: Mon, 15 Mar 2021 18:47:41 -0700 Subject: Make netstack (//pkg/tcpip) buildable for 32 bit Doing so involved breaking dependencies between //pkg/tcpip and the rest of gVisor, which are discouraged anyways. Tested on the Go branch via: gvisor.dev/gvisor/pkg/tcpip/... Addresses #1446. PiperOrigin-RevId: 363081778 --- pkg/iovec/BUILD | 10 +++--- pkg/iovec/iovec.go | 18 ++++------ pkg/iovec/iovec_max.go | 19 +++++++++++ pkg/sentry/devices/tundev/tundev.go | 15 ++++---- pkg/sentry/fs/dev/net_tun.go | 15 ++++---- pkg/sentry/socket/netstack/BUILD | 2 ++ pkg/sentry/socket/netstack/tun.go | 51 ++++++++++++++++++++++++++++ pkg/tcpip/link/fdbased/endpoint.go | 2 +- pkg/tcpip/link/fdbased/packet_dispatchers.go | 14 +++----- pkg/tcpip/link/tun/device.go | 37 ++++++++++---------- pkg/tcpip/transport/tcp/snd.go | 4 ++- 11 files changed, 124 insertions(+), 63 deletions(-) create mode 100644 pkg/iovec/iovec_max.go create mode 100644 pkg/sentry/socket/netstack/tun.go diff --git a/pkg/iovec/BUILD b/pkg/iovec/BUILD index b92a58556..e0c016fa3 100644 --- a/pkg/iovec/BUILD +++ b/pkg/iovec/BUILD @@ -4,12 +4,12 @@ package(licenses = ["notice"]) go_library( name = "iovec", - srcs = ["iovec.go"], - visibility = ["//:sandbox"], - deps = [ - "//pkg/abi/linux", - "@org_golang_x_sys//unix:go_default_library", + srcs = [ + "iovec.go", + "iovec_max.go", ], + visibility = ["//:sandbox"], + deps = ["@org_golang_x_sys//unix:go_default_library"], ) go_test( diff --git a/pkg/iovec/iovec.go b/pkg/iovec/iovec.go index 0789c74bf..f6791060f 100644 --- a/pkg/iovec/iovec.go +++ b/pkg/iovec/iovec.go @@ -20,12 +20,8 @@ package iovec import ( "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/abi/linux" ) -// MaxIovs is the maximum number of iovecs host platform can accept. -var MaxIovs = linux.UIO_MAXIOV - // Builder is a builder for slice of unix.Iovec. type Builder struct { iovec []unix.Iovec @@ -47,10 +43,10 @@ func (b *Builder) Add(buf []byte) { b.addByAppend(buf) return } - b.iovec = append(b.iovec, unix.Iovec{ - Base: &buf[0], - Len: uint64(len(buf)), - }) + + b.iovec = append(b.iovec, unix.Iovec{Base: &buf[0]}) + b.iovec[len(b.iovec)-1].SetLen(len(buf)) + // Keep the last buf if iovec is at max capacity. We will need to append to it // for later bufs. if len(b.iovec) == MaxIovs { @@ -61,10 +57,8 @@ func (b *Builder) Add(buf []byte) { func (b *Builder) addByAppend(buf []byte) { b.overflow = append(b.overflow, buf...) - b.iovec[len(b.iovec)-1] = unix.Iovec{ - Base: &b.overflow[0], - Len: uint64(len(b.overflow)), - } + b.iovec[len(b.iovec)-1] = unix.Iovec{Base: &b.overflow[0]} + b.iovec[len(b.iovec)-1].SetLen(len(b.overflow)) } // Build returns the final Iovec slice. The length of returned iovec will not diff --git a/pkg/iovec/iovec_max.go b/pkg/iovec/iovec_max.go new file mode 100644 index 000000000..724b256e8 --- /dev/null +++ b/pkg/iovec/iovec_max.go @@ -0,0 +1,19 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package iovec + +// MaxIovs is the maximum number of iovecs host platform can accept. It +// corresponds to Linux's UIO_MAXIOV, which is not in the unix package. +const MaxIovs = 1024 diff --git a/pkg/sentry/devices/tundev/tundev.go b/pkg/sentry/devices/tundev/tundev.go index d8f4e1d35..c43158aa4 100644 --- a/pkg/sentry/devices/tundev/tundev.go +++ b/pkg/sentry/devices/tundev/tundev.go @@ -87,19 +87,18 @@ func (fd *tunFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArg if _, err := req.CopyIn(t, data); err != nil { return 0, err } - flags := usermem.ByteOrder.Uint16(req.Data[:]) + + // Validate flags. + flags, err := netstack.LinuxToTUNFlags(usermem.ByteOrder.Uint16(req.Data[:])) + if err != nil { + return 0, err + } return 0, fd.device.SetIff(stack.Stack, req.Name(), flags) case linux.TUNGETIFF: var req linux.IFReq - copy(req.IFName[:], fd.device.Name()) - - // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) when - // there is no sk_filter. See __tun_chr_ioctl() in net/drivers/tun.c. - flags := fd.device.Flags() | linux.IFF_NOFILTER - usermem.ByteOrder.PutUint16(req.Data[:], flags) - + usermem.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(fd.device.Flags())) _, err := req.CopyOut(t, data) return 0, err diff --git a/pkg/sentry/fs/dev/net_tun.go b/pkg/sentry/fs/dev/net_tun.go index 5227ef652..11a2984d8 100644 --- a/pkg/sentry/fs/dev/net_tun.go +++ b/pkg/sentry/fs/dev/net_tun.go @@ -108,19 +108,18 @@ func (n *netTunFileOperations) Ioctl(ctx context.Context, file *fs.File, io user if _, err := req.CopyIn(t, data); err != nil { return 0, err } - flags := usermem.ByteOrder.Uint16(req.Data[:]) + + // Validate flags. + flags, err := netstack.LinuxToTUNFlags(usermem.ByteOrder.Uint16(req.Data[:])) + if err != nil { + return 0, err + } return 0, n.device.SetIff(stack.Stack, req.Name(), flags) case linux.TUNGETIFF: var req linux.IFReq - copy(req.IFName[:], n.device.Name()) - - // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) when - // there is no sk_filter. See __tun_chr_ioctl() in net/drivers/tun.c. - flags := n.device.Flags() | linux.IFF_NOFILTER - usermem.ByteOrder.PutUint16(req.Data[:], flags) - + usermem.ByteOrder.PutUint16(req.Data[:], netstack.TUNFlagsToLinux(n.device.Flags())) _, err := req.CopyOut(t, data) return 0, err diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD index 915134b41..244d99436 100644 --- a/pkg/sentry/socket/netstack/BUILD +++ b/pkg/sentry/socket/netstack/BUILD @@ -12,6 +12,7 @@ go_library( "provider_vfs2.go", "save_restore.go", "stack.go", + "tun.go", ], visibility = [ "//pkg/sentry:internal", @@ -42,6 +43,7 @@ go_library( "//pkg/syserror", "//pkg/tcpip", "//pkg/tcpip/header", + "//pkg/tcpip/link/tun", "//pkg/tcpip/network/ipv4", "//pkg/tcpip/network/ipv6", "//pkg/tcpip/stack", diff --git a/pkg/sentry/socket/netstack/tun.go b/pkg/sentry/socket/netstack/tun.go new file mode 100644 index 000000000..288dd0c9e --- /dev/null +++ b/pkg/sentry/socket/netstack/tun.go @@ -0,0 +1,51 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package netstack + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/tcpip/link/tun" +) + +// TUNFlagsToLinux converts a tun.Flags to Linux TUN flags. +func TUNFlagsToLinux(flags tun.Flags) uint16 { + ret := uint16(linux.IFF_NOFILTER) + if flags.TAP { + ret |= linux.IFF_TAP + } + if flags.TUN { + ret |= linux.IFF_TUN + } + if flags.NoPacketInfo { + ret |= linux.IFF_NO_PI + } + return ret +} + +// LinuxToTUNFlags converts Linux TUN flags to a tun.Flags. +func LinuxToTUNFlags(flags uint16) (tun.Flags, error) { + // Linux adds IFF_NOFILTER (the same value as IFF_NO_PI unfortunately) + // when there is no sk_filter. See __tun_chr_ioctl() in + // net/drivers/tun.c. + if flags&^uint16(linux.IFF_TUN|linux.IFF_TAP|linux.IFF_NO_PI) != 0 { + return tun.Flags{}, syserror.EINVAL + } + return tun.Flags{ + TUN: flags&linux.IFF_TUN != 0, + TAP: flags&linux.IFF_TAP != 0, + NoPacketInfo: flags&linux.IFF_NO_PI != 0, + }, nil +} diff --git a/pkg/tcpip/link/fdbased/endpoint.go b/pkg/tcpip/link/fdbased/endpoint.go index e17e2085c..2bb1be5d6 100644 --- a/pkg/tcpip/link/fdbased/endpoint.go +++ b/pkg/tcpip/link/fdbased/endpoint.go @@ -492,7 +492,7 @@ func (e *endpoint) sendBatch(batchFD int, batch []*stack.PacketBuffer) (int, tcp var mmsgHdr rawfile.MMsgHdr mmsgHdr.Msg.Iov = &iovecs[0] - mmsgHdr.Msg.Iovlen = uint64(len(iovecs)) + mmsgHdr.Msg.SetIovlen((len(iovecs))) mmsgHdrs = append(mmsgHdrs, mmsgHdr) } diff --git a/pkg/tcpip/link/fdbased/packet_dispatchers.go b/pkg/tcpip/link/fdbased/packet_dispatchers.go index 46df87f44..a7adf822b 100644 --- a/pkg/tcpip/link/fdbased/packet_dispatchers.go +++ b/pkg/tcpip/link/fdbased/packet_dispatchers.go @@ -68,10 +68,8 @@ func (b *iovecBuffer) nextIovecs() []unix.Iovec { // The kernel adds virtioNetHdr before each packet, but // we don't use it, so so we allocate a buffer for it, // add it in iovecs but don't add it in a view. - b.iovecs[0] = unix.Iovec{ - Base: &vnetHdr[0], - Len: uint64(virtioNetHdrSize), - } + b.iovecs[0] = unix.Iovec{Base: &vnetHdr[0]} + b.iovecs[0].SetLen(virtioNetHdrSize) vnetHdrOff++ } for i := range b.views { @@ -80,10 +78,8 @@ func (b *iovecBuffer) nextIovecs() []unix.Iovec { } v := buffer.NewView(b.sizes[i]) b.views[i] = v - b.iovecs[i+vnetHdrOff] = unix.Iovec{ - Base: &v[0], - Len: uint64(len(v)), - } + b.iovecs[i+vnetHdrOff] = unix.Iovec{Base: &v[0]} + b.iovecs[i+vnetHdrOff].SetLen(len(v)) } return b.iovecs } @@ -235,7 +231,7 @@ func (d *recvMMsgDispatcher) dispatch() (bool, tcpip.Error) { iovLen := len(iovecs) d.msgHdrs[k].Len = 0 d.msgHdrs[k].Msg.Iov = &iovecs[0] - d.msgHdrs[k].Msg.Iovlen = uint64(iovLen) + d.msgHdrs[k].Msg.SetIovlen(iovLen) } nMsgs, err := rawfile.BlockingRecvMMsg(d.fd, d.msgHdrs) diff --git a/pkg/tcpip/link/tun/device.go b/pkg/tcpip/link/tun/device.go index c1678c4f4..80fb343c5 100644 --- a/pkg/tcpip/link/tun/device.go +++ b/pkg/tcpip/link/tun/device.go @@ -17,7 +17,6 @@ package tun import ( "fmt" - "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/sync" "gvisor.dev/gvisor/pkg/syserror" @@ -49,7 +48,14 @@ type Device struct { mu sync.RWMutex `state:"nosave"` endpoint *tunEndpoint notifyHandle *channel.NotificationHandle - flags uint16 + flags Flags +} + +// Flags set properties of a Device +type Flags struct { + TUN bool + TAP bool + NoPacketInfo bool } // beforeSave is invoked by stateify. @@ -77,7 +83,7 @@ func (d *Device) Release(ctx context.Context) { } // SetIff services TUNSETIFF ioctl(2) request. -func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error { +func (d *Device) SetIff(s *stack.Stack, name string, flags Flags) error { d.mu.Lock() defer d.mu.Unlock() @@ -85,21 +91,18 @@ func (d *Device) SetIff(s *stack.Stack, name string, flags uint16) error { return syserror.EINVAL } - // Input validations. - isTun := flags&linux.IFF_TUN != 0 - isTap := flags&linux.IFF_TAP != 0 - supportedFlags := uint16(linux.IFF_TUN | linux.IFF_TAP | linux.IFF_NO_PI) - if isTap && isTun || !isTap && !isTun || flags&^supportedFlags != 0 { + // Input validation. + if flags.TAP && flags.TUN || !flags.TAP && !flags.TUN { return syserror.EINVAL } prefix := "tun" - if isTap { + if flags.TAP { prefix = "tap" } linkCaps := stack.CapabilityNone - if isTap { + if flags.TAP { linkCaps |= stack.CapabilityResolutionRequired } @@ -177,7 +180,7 @@ func (d *Device) Write(data []byte) (int64, error) { // Packet information. var pktInfoHdr PacketInfoHeader - if !d.hasFlags(linux.IFF_NO_PI) { + if !d.flags.NoPacketInfo { if len(data) < PacketInfoHeaderSize { // Ignore bad packet. return dataLen, nil @@ -188,7 +191,7 @@ func (d *Device) Write(data []byte) (int64, error) { // Ethernet header (TAP only). var ethHdr header.Ethernet - if d.hasFlags(linux.IFF_TAP) { + if d.flags.TAP { if len(data) < header.EthernetMinimumSize { // Ignore bad packet. return dataLen, nil @@ -253,7 +256,7 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) { var vv buffer.VectorisedView // Packet information. - if !d.hasFlags(linux.IFF_NO_PI) { + if !d.flags.NoPacketInfo { hdr := make(PacketInfoHeader, PacketInfoHeaderSize) hdr.Encode(&PacketInfoFields{ Protocol: info.Proto, @@ -269,7 +272,7 @@ func (d *Device) encodePkt(info *channel.PacketInfo) (buffer.View, bool) { } // Ethernet header (TAP only). - if d.hasFlags(linux.IFF_TAP) { + if d.flags.TAP { // Add ethernet header if not provided. if info.Pkt.LinkHeader().View().IsEmpty() { d.endpoint.AddHeader(info.Route.LocalLinkAddress, info.Route.RemoteLinkAddress, info.Proto, info.Pkt) @@ -298,16 +301,12 @@ func (d *Device) Name() string { } // Flags returns the flags set for d. Zero value if unset. -func (d *Device) Flags() uint16 { +func (d *Device) Flags() Flags { d.mu.RLock() defer d.mu.RUnlock() return d.flags } -func (d *Device) hasFlags(flags uint16) bool { - return d.flags&flags == flags -} - // Readiness implements watier.Waitable.Readiness. func (d *Device) Readiness(mask waiter.EventMask) waiter.EventMask { if mask&waiter.EventIn != 0 { diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go index 18817029d..faca35892 100644 --- a/pkg/tcpip/transport/tcp/snd.go +++ b/pkg/tcpip/transport/tcp/snd.go @@ -323,7 +323,9 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint // their initial values. func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionControlOption) congestionControl { s.sndCwnd = InitialCwnd - s.sndSsthresh = math.MaxInt64 + // Set sndSsthresh to the maximum int value, which depends on the + // platform. + s.sndSsthresh = int(^uint(0) >> 1) switch congestionControlName { case ccCubic: -- cgit v1.2.3