summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/BUILD2
-rw-r--r--pkg/abi/linux/ptrace_amd64.go52
-rw-r--r--pkg/abi/linux/ptrace_arm64.go (renamed from pkg/sentry/arch/arch_state_aarch64.go)25
-rw-r--r--pkg/sentry/arch/BUILD4
-rw-r--r--pkg/sentry/arch/arch_aarch64.go26
-rw-r--r--pkg/sentry/arch/arch_amd64.go13
-rw-r--r--pkg/sentry/arch/arch_arm64.go2
-rw-r--r--pkg/sentry/arch/arch_state_x86.go42
-rw-r--r--pkg/sentry/arch/arch_x86.go25
-rw-r--r--pkg/sentry/arch/arch_x86_impl.go4
-rw-r--r--pkg/sentry/arch/signal.go3
-rw-r--r--pkg/sentry/arch/signal_act.go4
-rw-r--r--pkg/sentry/arch/signal_stack.go3
-rw-r--r--pkg/sentry/fs/host/control.go6
-rw-r--r--pkg/sentry/fs/host/socket.go4
-rw-r--r--pkg/sentry/fs/host/socket_iovec.go4
-rw-r--r--pkg/sentry/fs/host/socket_unsafe.go4
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go2
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go2
-rw-r--r--pkg/sentry/fsimpl/devpts/slave.go2
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go1
-rw-r--r--pkg/sentry/fsimpl/host/BUILD13
-rw-r--r--pkg/sentry/fsimpl/host/control.go96
-rw-r--r--pkg/sentry/fsimpl/host/host.go67
-rw-r--r--pkg/sentry/fsimpl/host/socket.go397
-rw-r--r--pkg/sentry/fsimpl/host/socket_iovec.go113
-rw-r--r--pkg/sentry/fsimpl/host/socket_unsafe.go101
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go8
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go4
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go5
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go2
-rw-r--r--pkg/sentry/fsimpl/proc/task.go2
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go4
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go2
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go2
-rw-r--r--pkg/sentry/fsimpl/sockfs/sockfs.go22
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go2
-rw-r--r--pkg/sentry/kernel/task_signals.go8
-rw-r--r--pkg/sentry/kernel/task_start.go3
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64.go30
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go14
-rw-r--r--pkg/sentry/platform/kvm/kvm_arm64.go5
-rw-r--r--pkg/sentry/platform/kvm/kvm_const_arm64.go41
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go36
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go5
-rw-r--r--pkg/sentry/platform/kvm/testutil/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/testutil/testutil_amd64.go17
-rw-r--r--pkg/sentry/platform/kvm/testutil/testutil_arm64.go13
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_amd64.go7
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_arm64.go5
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go4
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go8
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_amd64.go16
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_arm64.go16
-rw-r--r--pkg/sentry/platform/ring0/BUILD1
-rw-r--r--pkg/sentry/platform/ring0/defs.go9
-rw-r--r--pkg/sentry/platform/ring0/defs_arm64.go6
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.go6
-rw-r--r--pkg/sentry/platform/ring0/gen_offsets/BUILD1
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.go4
-rw-r--r--pkg/sentry/platform/ring0/offsets_amd64.go5
-rw-r--r--pkg/sentry/platform/ring0/offsets_arm64.go5
-rw-r--r--pkg/sentry/socket/control/BUILD6
-rw-r--r--pkg/sentry/socket/control/control.go24
-rw-r--r--pkg/sentry/socket/control/control_vfs2.go131
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go5
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go5
-rw-r--r--pkg/sentry/socket/unix/BUILD1
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go42
-rw-r--r--pkg/sentry/syscalls/linux/sys_signal.go10
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/socket.go2
-rw-r--r--pkg/sentry/watchdog/watchdog.go20
-rw-r--r--pkg/tcpip/buffer/view.go55
-rw-r--r--pkg/tcpip/buffer/view_test.go113
-rw-r--r--pkg/tcpip/header/ipv6.go10
-rw-r--r--pkg/tcpip/link/loopback/loopback.go10
-rw-r--r--pkg/tcpip/link/rawfile/BUILD9
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_test.go46
-rw-r--r--pkg/tcpip/link/rawfile/rawfile_unsafe.go6
-rw-r--r--pkg/tcpip/link/sharedmem/sharedmem_test.go2
-rw-r--r--pkg/tcpip/link/sniffer/sniffer.go65
-rw-r--r--pkg/tcpip/network/arp/arp.go5
-rw-r--r--pkg/tcpip/network/ipv4/icmp.go20
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go12
-rw-r--r--pkg/tcpip/network/ipv6/icmp.go74
-rw-r--r--pkg/tcpip/network/ipv6/icmp_test.go3
-rw-r--r--pkg/tcpip/network/ipv6/ipv6.go6
-rw-r--r--pkg/tcpip/stack/forwarder_test.go13
-rw-r--r--pkg/tcpip/stack/iptables.go22
-rw-r--r--pkg/tcpip/stack/iptables_targets.go23
-rw-r--r--pkg/tcpip/stack/ndp.go31
-rw-r--r--pkg/tcpip/stack/nic.go34
-rw-r--r--pkg/tcpip/stack/packet_buffer.go8
-rw-r--r--pkg/tcpip/stack/stack_test.go10
-rw-r--r--pkg/tcpip/stack/transport_test.go5
-rw-r--r--pkg/tcpip/timer.go25
-rw-r--r--pkg/tcpip/timer_test.go16
-rw-r--r--pkg/tcpip/transport/icmp/endpoint.go8
-rw-r--r--pkg/tcpip/transport/tcp/segment.go29
-rw-r--r--pkg/tcpip/transport/tcp/tcp_sack_test.go68
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go16
-rw-r--r--pkg/tcpip/transport/udp/endpoint.go6
-rw-r--r--pkg/tcpip/transport/udp/protocol.go9
-rw-r--r--pkg/test/dockerutil/dockerutil.go5
-rw-r--r--pkg/test/testutil/testutil.go38
109 files changed, 1549 insertions, 847 deletions
diff --git a/pkg/abi/linux/BUILD b/pkg/abi/linux/BUILD
index 59b0e138a..114b516e2 100644
--- a/pkg/abi/linux/BUILD
+++ b/pkg/abi/linux/BUILD
@@ -44,6 +44,8 @@ go_library(
"poll.go",
"prctl.go",
"ptrace.go",
+ "ptrace_amd64.go",
+ "ptrace_arm64.go",
"rseq.go",
"rusage.go",
"sched.go",
diff --git a/pkg/abi/linux/ptrace_amd64.go b/pkg/abi/linux/ptrace_amd64.go
new file mode 100644
index 000000000..ed3881e27
--- /dev/null
+++ b/pkg/abi/linux/ptrace_amd64.go
@@ -0,0 +1,52 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package linux
+
+// PtraceRegs is the set of CPU registers exposed by ptrace. Source:
+// syscall.PtraceRegs.
+//
+// +marshal
+// +stateify savable
+type PtraceRegs struct {
+ R15 uint64
+ R14 uint64
+ R13 uint64
+ R12 uint64
+ Rbp uint64
+ Rbx uint64
+ R11 uint64
+ R10 uint64
+ R9 uint64
+ R8 uint64
+ Rax uint64
+ Rcx uint64
+ Rdx uint64
+ Rsi uint64
+ Rdi uint64
+ Orig_rax uint64
+ Rip uint64
+ Cs uint64
+ Eflags uint64
+ Rsp uint64
+ Ss uint64
+ Fs_base uint64
+ Gs_base uint64
+ Ds uint64
+ Es uint64
+ Fs uint64
+ Gs uint64
+}
diff --git a/pkg/sentry/arch/arch_state_aarch64.go b/pkg/abi/linux/ptrace_arm64.go
index 0136a85ad..6147738b3 100644
--- a/pkg/sentry/arch/arch_state_aarch64.go
+++ b/pkg/abi/linux/ptrace_arm64.go
@@ -1,4 +1,4 @@
-// Copyright 2020 The gVisor Authors.
+// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -14,25 +14,16 @@
// +build arm64
-package arch
+package linux
-import (
- "syscall"
-)
-
-type syscallPtraceRegs struct {
+// PtraceRegs is the set of CPU registers exposed by ptrace. Source:
+// syscall.PtraceRegs.
+//
+// +marshal
+// +stateify savable
+type PtraceRegs struct {
Regs [31]uint64
Sp uint64
Pc uint64
Pstate uint64
}
-
-// saveRegs is invoked by stateify.
-func (s *State) saveRegs() syscallPtraceRegs {
- return syscallPtraceRegs(s.Regs)
-}
-
-// loadRegs is invoked by stateify.
-func (s *State) loadRegs(r syscallPtraceRegs) {
- s.Regs = syscall.PtraceRegs(r)
-}
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index e27f21e5e..901e0f320 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -11,7 +11,6 @@ go_library(
"arch_amd64.go",
"arch_amd64.s",
"arch_arm64.go",
- "arch_state_aarch64.go",
"arch_state_x86.go",
"arch_x86.go",
"arch_x86_impl.go",
@@ -26,11 +25,11 @@ go_library(
"syscalls_amd64.go",
"syscalls_arm64.go",
],
+ marshal = True,
visibility = ["//:sandbox"],
deps = [
":registers_go_proto",
"//pkg/abi/linux",
- "//pkg/binary",
"//pkg/context",
"//pkg/cpuid",
"//pkg/log",
@@ -38,6 +37,7 @@ go_library(
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
+ "//tools/go_marshal/marshal",
],
)
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go
index c29e1b841..529980267 100644
--- a/pkg/sentry/arch/arch_aarch64.go
+++ b/pkg/sentry/arch/arch_aarch64.go
@@ -17,18 +17,20 @@
package arch
import (
+ "encoding/binary"
"fmt"
"io"
- "syscall"
- "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
"gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
)
+// Registers represents the CPU registers for this architecture.
+type Registers = linux.PtraceRegs
+
const (
// SyscallWidth is the width of insturctions.
SyscallWidth = 4
@@ -90,7 +92,7 @@ func NewFloatingPointData() *FloatingPointData {
// file ensures it's only built on aarch64).
type State struct {
// The system registers.
- Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"`
+ Regs Registers
// Our floating point state.
aarch64FPState `state:"wait"`
@@ -226,25 +228,27 @@ func (s *State) RegisterMap() (map[string]uintptr, error) {
// PtraceGetRegs implements Context.PtraceGetRegs.
func (s *State) PtraceGetRegs(dst io.Writer) (int, error) {
- return dst.Write(binary.Marshal(nil, usermem.ByteOrder, s.ptraceGetRegs()))
+ regs := s.ptraceGetRegs()
+ n, err := regs.WriteTo(dst)
+ return int(n), err
}
-func (s *State) ptraceGetRegs() syscall.PtraceRegs {
+func (s *State) ptraceGetRegs() Registers {
return s.Regs
}
-var ptraceRegsSize = int(binary.Size(syscall.PtraceRegs{}))
+var registersSize = (*Registers)(nil).SizeBytes()
// PtraceSetRegs implements Context.PtraceSetRegs.
func (s *State) PtraceSetRegs(src io.Reader) (int, error) {
- var regs syscall.PtraceRegs
- buf := make([]byte, ptraceRegsSize)
+ var regs Registers
+ buf := make([]byte, registersSize)
if _, err := io.ReadFull(src, buf); err != nil {
return 0, err
}
- binary.Unmarshal(buf, usermem.ByteOrder, &regs)
+ regs.UnmarshalUnsafe(buf)
s.Regs = regs
- return ptraceRegsSize, nil
+ return registersSize, nil
}
// PtraceGetFPRegs implements Context.PtraceGetFPRegs.
diff --git a/pkg/sentry/arch/arch_amd64.go b/pkg/sentry/arch/arch_amd64.go
index 85d6acc0f..3b3a0a272 100644
--- a/pkg/sentry/arch/arch_amd64.go
+++ b/pkg/sentry/arch/arch_amd64.go
@@ -22,7 +22,6 @@ import (
"math/rand"
"syscall"
- "gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/usermem"
@@ -301,8 +300,10 @@ func (c *context64) PtracePeekUser(addr uintptr) (interface{}, error) {
// PTRACE_PEEKUSER and PTRACE_POKEUSER are only effective on regs and
// u_debugreg, returning 0 or silently no-oping for other fields
// respectively.
- if addr < uintptr(ptraceRegsSize) {
- buf := binary.Marshal(nil, usermem.ByteOrder, c.ptraceGetRegs())
+ if addr < uintptr(registersSize) {
+ regs := c.ptraceGetRegs()
+ buf := make([]byte, regs.SizeBytes())
+ regs.MarshalUnsafe(buf)
return c.Native(uintptr(usermem.ByteOrder.Uint64(buf[addr:]))), nil
}
// Note: x86 debug registers are missing.
@@ -314,8 +315,10 @@ func (c *context64) PtracePokeUser(addr, data uintptr) error {
if addr&7 != 0 || addr >= userStructSize {
return syscall.EIO
}
- if addr < uintptr(ptraceRegsSize) {
- buf := binary.Marshal(nil, usermem.ByteOrder, c.ptraceGetRegs())
+ if addr < uintptr(registersSize) {
+ regs := c.ptraceGetRegs()
+ buf := make([]byte, regs.SizeBytes())
+ regs.MarshalUnsafe(buf)
usermem.ByteOrder.PutUint64(buf[addr:], uint64(data))
_, err := c.PtraceSetRegs(bytes.NewBuffer(buf))
return err
diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go
index db99c5acb..ada7ac7b8 100644
--- a/pkg/sentry/arch/arch_arm64.go
+++ b/pkg/sentry/arch/arch_arm64.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build arm64
+
package arch
import (
diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go
index aa31169e0..19ce99d25 100644
--- a/pkg/sentry/arch/arch_state_x86.go
+++ b/pkg/sentry/arch/arch_state_x86.go
@@ -18,7 +18,6 @@ package arch
import (
"fmt"
- "syscall"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/usermem"
@@ -90,44 +89,3 @@ func (s *State) afterLoadFPState() {
// Copy to the new, aligned location.
copy(s.x86FPState, old)
}
-
-// +stateify savable
-type syscallPtraceRegs struct {
- R15 uint64
- R14 uint64
- R13 uint64
- R12 uint64
- Rbp uint64
- Rbx uint64
- R11 uint64
- R10 uint64
- R9 uint64
- R8 uint64
- Rax uint64
- Rcx uint64
- Rdx uint64
- Rsi uint64
- Rdi uint64
- Orig_rax uint64
- Rip uint64
- Cs uint64
- Eflags uint64
- Rsp uint64
- Ss uint64
- Fs_base uint64
- Gs_base uint64
- Ds uint64
- Es uint64
- Fs uint64
- Gs uint64
-}
-
-// saveRegs is invoked by stateify.
-func (s *State) saveRegs() syscallPtraceRegs {
- return syscallPtraceRegs(s.Regs)
-}
-
-// loadRegs is invoked by stateify.
-func (s *State) loadRegs(r syscallPtraceRegs) {
- s.Regs = syscall.PtraceRegs(r)
-}
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index 7fc4c0473..dc458b37f 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -21,7 +21,7 @@ import (
"io"
"syscall"
- "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/log"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
@@ -30,6 +30,9 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
+// Registers represents the CPU registers for this architecture.
+type Registers = linux.PtraceRegs
+
// System-related constants for x86.
const (
// SyscallWidth is the width of syscall, sysenter, and int 80 insturctions.
@@ -267,10 +270,12 @@ func (s *State) RegisterMap() (map[string]uintptr, error) {
// PtraceGetRegs implements Context.PtraceGetRegs.
func (s *State) PtraceGetRegs(dst io.Writer) (int, error) {
- return dst.Write(binary.Marshal(nil, usermem.ByteOrder, s.ptraceGetRegs()))
+ regs := s.ptraceGetRegs()
+ n, err := regs.WriteTo(dst)
+ return int(n), err
}
-func (s *State) ptraceGetRegs() syscall.PtraceRegs {
+func (s *State) ptraceGetRegs() Registers {
regs := s.Regs
// These may not be initialized.
if regs.Cs == 0 || regs.Ss == 0 || regs.Eflags == 0 {
@@ -306,16 +311,16 @@ func (s *State) ptraceGetRegs() syscall.PtraceRegs {
return regs
}
-var ptraceRegsSize = int(binary.Size(syscall.PtraceRegs{}))
+var registersSize = (*Registers)(nil).SizeBytes()
// PtraceSetRegs implements Context.PtraceSetRegs.
func (s *State) PtraceSetRegs(src io.Reader) (int, error) {
- var regs syscall.PtraceRegs
- buf := make([]byte, ptraceRegsSize)
+ var regs Registers
+ buf := make([]byte, registersSize)
if _, err := io.ReadFull(src, buf); err != nil {
return 0, err
}
- binary.Unmarshal(buf, usermem.ByteOrder, &regs)
+ regs.UnmarshalUnsafe(buf)
// Truncate segment registers to 16 bits.
regs.Cs = uint64(uint16(regs.Cs))
regs.Ds = uint64(uint16(regs.Ds))
@@ -369,7 +374,7 @@ func (s *State) PtraceSetRegs(src io.Reader) (int, error) {
}
regs.Eflags = (s.Regs.Eflags &^ eflagsPtraceMutable) | (regs.Eflags & eflagsPtraceMutable)
s.Regs = regs
- return ptraceRegsSize, nil
+ return registersSize, nil
}
// isUserSegmentSelector returns true if the given segment selector specifies a
@@ -538,7 +543,7 @@ const (
func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, error) {
switch regset {
case _NT_PRSTATUS:
- if maxlen < ptraceRegsSize {
+ if maxlen < registersSize {
return 0, syserror.EFAULT
}
return s.PtraceGetRegs(dst)
@@ -558,7 +563,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int,
func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, error) {
switch regset {
case _NT_PRSTATUS:
- if maxlen < ptraceRegsSize {
+ if maxlen < registersSize {
return 0, syserror.EFAULT
}
return s.PtraceSetRegs(src)
diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go
index 3edf40764..0c73fcbfb 100644
--- a/pkg/sentry/arch/arch_x86_impl.go
+++ b/pkg/sentry/arch/arch_x86_impl.go
@@ -17,8 +17,6 @@
package arch
import (
- "syscall"
-
"gvisor.dev/gvisor/pkg/cpuid"
)
@@ -28,7 +26,7 @@ import (
// +stateify savable
type State struct {
// The system registers.
- Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"`
+ Regs Registers
// Our floating point state.
x86FPState `state:"wait"`
diff --git a/pkg/sentry/arch/signal.go b/pkg/sentry/arch/signal.go
index 8b03d0187..c9fb55d00 100644
--- a/pkg/sentry/arch/signal.go
+++ b/pkg/sentry/arch/signal.go
@@ -22,6 +22,7 @@ import (
// SignalAct represents the action that should be taken when a signal is
// delivered, and is equivalent to struct sigaction.
//
+// +marshal
// +stateify savable
type SignalAct struct {
Handler uint64
@@ -43,6 +44,7 @@ func (s *SignalAct) DeserializeTo(other *SignalAct) {
// SignalStack represents information about a user stack, and is equivalent to
// stack_t.
//
+// +marshal
// +stateify savable
type SignalStack struct {
Addr uint64
@@ -64,6 +66,7 @@ func (s *SignalStack) DeserializeTo(other *SignalStack) {
// SignalInfo represents information about a signal being delivered, and is
// equivalent to struct siginfo in linux kernel(linux/include/uapi/asm-generic/siginfo.h).
//
+// +marshal
// +stateify savable
type SignalInfo struct {
Signo int32 // Signal number
diff --git a/pkg/sentry/arch/signal_act.go b/pkg/sentry/arch/signal_act.go
index f9ca2e74e..32173aa20 100644
--- a/pkg/sentry/arch/signal_act.go
+++ b/pkg/sentry/arch/signal_act.go
@@ -14,6 +14,8 @@
package arch
+import "gvisor.dev/gvisor/tools/go_marshal/marshal"
+
// Special values for SignalAct.Handler.
const (
// SignalActDefault is SIG_DFL and specifies that the default behavior for
@@ -71,6 +73,8 @@ func (s SignalAct) HasRestorer() bool {
// NativeSignalAct is a type that is equivalent to struct sigaction in the
// guest architecture.
type NativeSignalAct interface {
+ marshal.Marshallable
+
// SerializeFrom copies the data in the host SignalAct s into this object.
SerializeFrom(s *SignalAct)
diff --git a/pkg/sentry/arch/signal_stack.go b/pkg/sentry/arch/signal_stack.go
index e58f055c7..0fa738a1d 100644
--- a/pkg/sentry/arch/signal_stack.go
+++ b/pkg/sentry/arch/signal_stack.go
@@ -18,6 +18,7 @@ package arch
import (
"gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/tools/go_marshal/marshal"
)
const (
@@ -55,6 +56,8 @@ func (s *SignalStack) Contains(sp usermem.Addr) bool {
// NativeSignalStack is a type that is equivalent to stack_t in the guest
// architecture.
type NativeSignalStack interface {
+ marshal.Marshallable
+
// SerializeFrom copies the data in the host SignalStack s into this
// object.
SerializeFrom(s *SignalStack)
diff --git a/pkg/sentry/fs/host/control.go b/pkg/sentry/fs/host/control.go
index 52c0504b6..39299b7e4 100644
--- a/pkg/sentry/fs/host/control.go
+++ b/pkg/sentry/fs/host/control.go
@@ -23,6 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
)
+// LINT.IfChange
+
type scmRights struct {
fds []int
}
@@ -32,8 +34,6 @@ func newSCMRights(fds []int) control.SCMRights {
}
// Files implements control.SCMRights.Files.
-//
-// TODO(gvisor.dev/issue/2017): Port to VFS2.
func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFiles, bool) {
n := max
var trunc bool
@@ -93,3 +93,5 @@ func fdsToFiles(ctx context.Context, fds []int) []*fs.File {
}
return files
}
+
+// LINT.ThenChange(../../fsimpl/host/control.go)
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index 06fc2d80a..b6e94583e 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -37,6 +37,8 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
+// LINT.IfChange
+
// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
//
// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
@@ -388,3 +390,5 @@ func (c *ConnectedEndpoint) Release() {
// CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
func (c *ConnectedEndpoint) CloseUnread() {}
+
+// LINT.ThenChange(../../fsimpl/host/socket.go)
diff --git a/pkg/sentry/fs/host/socket_iovec.go b/pkg/sentry/fs/host/socket_iovec.go
index af6955675..5c18dbd5e 100644
--- a/pkg/sentry/fs/host/socket_iovec.go
+++ b/pkg/sentry/fs/host/socket_iovec.go
@@ -21,6 +21,8 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
+// LINT.IfChange
+
// maxIovs is the maximum number of iovecs to pass to the host.
var maxIovs = linux.UIO_MAXIOV
@@ -111,3 +113,5 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
return total, iovecs, nil, err
}
+
+// LINT.ThenChange(../../fsimpl/host/socket_iovec.go)
diff --git a/pkg/sentry/fs/host/socket_unsafe.go b/pkg/sentry/fs/host/socket_unsafe.go
index f3bbed7ea..5d4f312cf 100644
--- a/pkg/sentry/fs/host/socket_unsafe.go
+++ b/pkg/sentry/fs/host/socket_unsafe.go
@@ -19,6 +19,8 @@ import (
"unsafe"
)
+// LINT.IfChange
+
// fdReadVec receives from fd to bufs.
//
// If the total length of bufs is > maxlen, fdReadVec will do a partial read
@@ -99,3 +101,5 @@ func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int6
return int64(n), length, err
}
+
+// LINT.ThenChange(../../fsimpl/host/socket_unsafe.go)
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index f36bf50fc..181d765d3 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -160,7 +160,7 @@ func (i *rootInode) masterClose(t *Terminal) {
}
// Open implements kernfs.Inode.Open.
-func (i *rootInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 60340c28e..04a292927 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -46,7 +46,7 @@ type masterInode struct {
var _ kernfs.Inode = (*masterInode)(nil)
// Open implements kernfs.Inode.Open.
-func (mi *masterInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (mi *masterInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
t, err := mi.root.allocateTerminal(rp.Credentials())
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index e7e50d51e..0a98dc896 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -48,7 +48,7 @@ type slaveInode struct {
var _ kernfs.Inode = (*slaveInode)(nil)
// Open implements kernfs.Inode.Open.
-func (si *slaveInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (si *slaveInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
si.IncRef()
fd := &slaveFileDescription{
inode: si,
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index c67766ab2..55f9ed911 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -75,6 +75,7 @@ func (d *dentry) createSyntheticDirectoryLocked(name string, mode linux.FileMode
handle: handle{
fd: -1,
},
+ nlink: uint32(2),
}
d2.pf.dentry = d2
d2.vfsd.Init(d2)
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 44dd9f672..e1c56d89b 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -5,8 +5,12 @@ licenses(["notice"])
go_library(
name = "host",
srcs = [
+ "control.go",
"host.go",
"ioctl_unsafe.go",
+ "socket.go",
+ "socket_iovec.go",
+ "socket_unsafe.go",
"tty.go",
"util.go",
"util_unsafe.go",
@@ -15,6 +19,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/fdnotifier",
"//pkg/log",
"//pkg/refs",
"//pkg/sentry/arch",
@@ -23,11 +28,19 @@ go_library(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
+ "//pkg/sentry/socket/control",
+ "//pkg/sentry/socket/unix",
+ "//pkg/sentry/socket/unix/transport",
"//pkg/sentry/unimpl",
+ "//pkg/sentry/uniqueid",
"//pkg/sentry/vfs",
"//pkg/sync",
+ "//pkg/syserr",
"//pkg/syserror",
+ "//pkg/tcpip",
+ "//pkg/unet",
"//pkg/usermem",
+ "//pkg/waiter",
"@org_golang_x_sys//unix:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/host/control.go b/pkg/sentry/fsimpl/host/control.go
new file mode 100644
index 000000000..b9082a20f
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/control.go
@@ -0,0 +1,96 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+type scmRights struct {
+ fds []int
+}
+
+func newSCMRights(fds []int) control.SCMRightsVFS2 {
+ return &scmRights{fds}
+}
+
+// Files implements control.SCMRights.Files.
+func (c *scmRights) Files(ctx context.Context, max int) (control.RightsFilesVFS2, bool) {
+ n := max
+ var trunc bool
+ if l := len(c.fds); n > l {
+ n = l
+ } else if n < l {
+ trunc = true
+ }
+
+ rf := control.RightsFilesVFS2(fdsToFiles(ctx, c.fds[:n]))
+
+ // Only consume converted FDs (fdsToFiles may convert fewer than n FDs).
+ c.fds = c.fds[len(rf):]
+ return rf, trunc
+}
+
+// Clone implements transport.RightsControlMessage.Clone.
+func (c *scmRights) Clone() transport.RightsControlMessage {
+ // Host rights never need to be cloned.
+ return nil
+}
+
+// Release implements transport.RightsControlMessage.Release.
+func (c *scmRights) Release() {
+ for _, fd := range c.fds {
+ syscall.Close(fd)
+ }
+ c.fds = nil
+}
+
+// If an error is encountered, only files created before the error will be
+// returned. This is what Linux does.
+func fdsToFiles(ctx context.Context, fds []int) []*vfs.FileDescription {
+ files := make([]*vfs.FileDescription, 0, len(fds))
+ for _, fd := range fds {
+ // Get flags. We do it here because they may be modified
+ // by subsequent functions.
+ fileFlags, _, errno := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), syscall.F_GETFL, 0)
+ if errno != 0 {
+ ctx.Warningf("Error retrieving host FD flags: %v", error(errno))
+ break
+ }
+
+ // Create the file backed by hostFD.
+ file, err := ImportFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fd, false /* isTTY */)
+ if err != nil {
+ ctx.Warningf("Error creating file from host FD: %v", err)
+ break
+ }
+
+ if err := file.SetStatusFlags(ctx, auth.CredentialsFromContext(ctx), uint32(fileFlags&linux.O_NONBLOCK)); err != nil {
+ ctx.Warningf("Error setting flags on host FD file: %v", err)
+ break
+ }
+
+ files = append(files, file)
+ }
+ return files
+}
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index a26b13067..05538ea42 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -17,7 +17,6 @@
package host
import (
- "errors"
"fmt"
"math"
"syscall"
@@ -31,6 +30,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/hostfd"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
+ unixsocket "gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
@@ -66,7 +66,7 @@ func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem {
}
// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
-func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
+func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
fs, ok := mnt.Filesystem().Impl().(*kernfs.Filesystem)
if !ok {
return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl())
@@ -108,7 +108,7 @@ func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, err
// i.open will take a reference on d.
defer d.DecRef()
- return i.open(d.VFSDentry(), mnt)
+ return i.open(ctx, d.VFSDentry(), mnt)
}
// inode implements kernfs.Inode.
@@ -156,7 +156,7 @@ type inode struct {
// Note that these flags may become out of date, since they can be modified
// on the host, e.g. with fcntl.
-func fileFlagsFromHostFD(fd int) (int, error) {
+func fileFlagsFromHostFD(fd int) (uint32, error) {
flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0)
if err != nil {
log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err)
@@ -164,7 +164,7 @@ func fileFlagsFromHostFD(fd int) (int, error) {
}
// TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags.
flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND
- return flags, nil
+ return uint32(flags), nil
}
// CheckPermissions implements kernfs.Inode.
@@ -360,52 +360,59 @@ func (i *inode) Destroy() {
}
// Open implements kernfs.Inode.
-func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- return i.open(vfsd, rp.Mount())
+func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ // Once created, we cannot re-open a socket fd through /proc/[pid]/fd/.
+ if i.Mode().FileType() == linux.S_IFSOCK {
+ return nil, syserror.ENXIO
+ }
+ return i.open(ctx, vfsd, rp.Mount())
}
-func (i *inode) open(d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) {
+func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) {
var s syscall.Stat_t
if err := syscall.Fstat(i.hostFD, &s); err != nil {
return nil, err
}
fileType := s.Mode & linux.FileTypeMask
+ flags, err := fileFlagsFromHostFD(i.hostFD)
+ if err != nil {
+ return nil, err
+ }
+
if fileType == syscall.S_IFSOCK {
if i.isTTY {
- return nil, errors.New("cannot use host socket as TTY")
+ log.Warningf("cannot use host socket fd %d as TTY", i.hostFD)
+ return nil, syserror.ENOTTY
}
- // TODO(gvisor.dev/issue/1672): support importing sockets.
- return nil, errors.New("importing host sockets not supported")
+
+ ep, err := newEndpoint(ctx, i.hostFD)
+ if err != nil {
+ return nil, err
+ }
+ // Currently, we only allow Unix sockets to be imported.
+ return unixsocket.NewFileDescription(ep, ep.Type(), flags, mnt, d)
}
// TODO(gvisor.dev/issue/1672): Whitelist specific file types here, so that
// we don't allow importing arbitrary file types without proper support.
- var (
- vfsfd *vfs.FileDescription
- fdImpl vfs.FileDescriptionImpl
- )
if i.isTTY {
fd := &ttyFD{
fileDescription: fileDescription{inode: i},
termios: linux.DefaultSlaveTermios,
}
- vfsfd = &fd.vfsfd
- fdImpl = fd
- } else {
- // For simplicity, set offset to 0. Technically, we should
- // only set to 0 on files that are not seekable (sockets, pipes, etc.),
- // and use the offset from the host fd otherwise.
- fd := &fileDescription{inode: i}
- vfsfd = &fd.vfsfd
- fdImpl = fd
- }
-
- flags, err := fileFlagsFromHostFD(i.hostFD)
- if err != nil {
- return nil, err
+ vfsfd := &fd.vfsfd
+ if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ return vfsfd, nil
}
- if err := vfsfd.Init(fdImpl, uint32(flags), mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
+ // For simplicity, set offset to 0. Technically, we should
+ // only set to 0 on files that are not seekable (sockets, pipes, etc.),
+ // and use the offset from the host fd otherwise.
+ fd := &fileDescription{inode: i}
+ vfsfd := &fd.vfsfd
+ if err := vfsfd.Init(fd, flags, mnt, d, &vfs.FileDescriptionOptions{}); err != nil {
return nil, err
}
return vfsfd, nil
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
new file mode 100644
index 000000000..39dd624a0
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -0,0 +1,397 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+ "fmt"
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fdnotifier"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/socket/control"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip"
+ "gvisor.dev/gvisor/pkg/unet"
+ "gvisor.dev/gvisor/pkg/waiter"
+)
+
+// Create a new host-backed endpoint from the given fd.
+func newEndpoint(ctx context.Context, hostFD int) (transport.Endpoint, error) {
+ // Set up an external transport.Endpoint using the host fd.
+ addr := fmt.Sprintf("hostfd:[%d]", hostFD)
+ var q waiter.Queue
+ e, err := NewConnectedEndpoint(ctx, hostFD, &q, addr, true /* saveable */)
+ if err != nil {
+ return nil, err.ToError()
+ }
+ e.Init()
+ ep := transport.NewExternal(ctx, e.stype, uniqueid.GlobalProviderFromContext(ctx), &q, e, e)
+ return ep, nil
+}
+
+// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
+//
+// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
+const maxSendBufferSize = 8 << 20
+
+// ConnectedEndpoint is an implementation of transport.ConnectedEndpoint and
+// transport.Receiver. It is backed by a host fd that was imported at sentry
+// startup. This fd is shared with a hostfs inode, which retains ownership of
+// it.
+//
+// ConnectedEndpoint is saveable, since we expect that the host will provide
+// the same fd upon restore.
+//
+// As of this writing, we only allow Unix sockets to be imported.
+//
+// +stateify savable
+type ConnectedEndpoint struct {
+ // ref keeps track of references to a ConnectedEndpoint.
+ ref refs.AtomicRefCount
+
+ // mu protects fd below.
+ mu sync.RWMutex `state:"nosave"`
+
+ // fd is the host fd backing this endpoint.
+ fd int
+
+ // addr is the address at which this endpoint is bound.
+ addr string
+
+ queue *waiter.Queue
+
+ // sndbuf is the size of the send buffer.
+ //
+ // N.B. When this is smaller than the host size, we present it via
+ // GetSockOpt and message splitting/rejection in SendMsg, but do not
+ // prevent lots of small messages from filling the real send buffer
+ // size on the host.
+ sndbuf int64 `state:"nosave"`
+
+ // stype is the type of Unix socket.
+ stype linux.SockType
+}
+
+// init performs initialization required for creating new ConnectedEndpoints and
+// for restoring them.
+func (c *ConnectedEndpoint) init() *syserr.Error {
+ family, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_DOMAIN)
+ if err != nil {
+ return syserr.FromError(err)
+ }
+
+ if family != syscall.AF_UNIX {
+ // We only allow Unix sockets.
+ return syserr.ErrInvalidEndpointState
+ }
+
+ stype, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_TYPE)
+ if err != nil {
+ return syserr.FromError(err)
+ }
+
+ if err := syscall.SetNonblock(c.fd, true); err != nil {
+ return syserr.FromError(err)
+ }
+
+ sndbuf, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF)
+ if err != nil {
+ return syserr.FromError(err)
+ }
+ if sndbuf > maxSendBufferSize {
+ log.Warningf("Socket send buffer too large: %d", sndbuf)
+ return syserr.ErrInvalidEndpointState
+ }
+
+ c.stype = linux.SockType(stype)
+ c.sndbuf = int64(sndbuf)
+
+ return nil
+}
+
+// NewConnectedEndpoint creates a new ConnectedEndpoint backed by a host fd
+// imported at sentry startup,
+//
+// The caller is responsible for calling Init(). Additionaly, Release needs to
+// be called twice because ConnectedEndpoint is both a transport.Receiver and
+// transport.ConnectedEndpoint.
+func NewConnectedEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr string, saveable bool) (*ConnectedEndpoint, *syserr.Error) {
+ e := ConnectedEndpoint{
+ fd: hostFD,
+ addr: addr,
+ queue: queue,
+ }
+
+ if err := e.init(); err != nil {
+ return nil, err
+ }
+
+ // AtomicRefCounters start off with a single reference. We need two.
+ e.ref.IncRef()
+ e.ref.EnableLeakCheck("host.ConnectedEndpoint")
+ return &e, nil
+}
+
+// Init will do the initialization required without holding other locks.
+func (c *ConnectedEndpoint) Init() {
+ if err := fdnotifier.AddFD(int32(c.fd), c.queue); err != nil {
+ panic(err)
+ }
+}
+
+// Send implements transport.ConnectedEndpoint.Send.
+func (c *ConnectedEndpoint) Send(data [][]byte, controlMessages transport.ControlMessages, from tcpip.FullAddress) (int64, bool, *syserr.Error) {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+
+ if !controlMessages.Empty() {
+ return 0, false, syserr.ErrInvalidEndpointState
+ }
+
+ // Since stream sockets don't preserve message boundaries, we can write
+ // only as much of the message as fits in the send buffer.
+ truncate := c.stype == linux.SOCK_STREAM
+
+ n, totalLen, err := fdWriteVec(c.fd, data, c.sndbuf, truncate)
+ if n < totalLen && err == nil {
+ // The host only returns a short write if it would otherwise
+ // block (and only for stream sockets).
+ err = syserror.EAGAIN
+ }
+ if n > 0 && err != syserror.EAGAIN {
+ // The caller may need to block to send more data, but
+ // otherwise there isn't anything that can be done about an
+ // error with a partial write.
+ err = nil
+ }
+
+ // There is no need for the callee to call SendNotify because fdWriteVec
+ // uses the host's sendmsg(2) and the host kernel's queue.
+ return n, false, syserr.FromError(err)
+}
+
+// SendNotify implements transport.ConnectedEndpoint.SendNotify.
+func (c *ConnectedEndpoint) SendNotify() {}
+
+// CloseSend implements transport.ConnectedEndpoint.CloseSend.
+func (c *ConnectedEndpoint) CloseSend() {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ if err := syscall.Shutdown(c.fd, syscall.SHUT_WR); err != nil {
+ // A well-formed UDS shutdown can't fail. See
+ // net/unix/af_unix.c:unix_shutdown.
+ panic(fmt.Sprintf("failed write shutdown on host socket %+v: %v", c, err))
+ }
+}
+
+// CloseNotify implements transport.ConnectedEndpoint.CloseNotify.
+func (c *ConnectedEndpoint) CloseNotify() {}
+
+// Writable implements transport.ConnectedEndpoint.Writable.
+func (c *ConnectedEndpoint) Writable() bool {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+
+ return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.EventOut)&waiter.EventOut != 0
+}
+
+// Passcred implements transport.ConnectedEndpoint.Passcred.
+func (c *ConnectedEndpoint) Passcred() bool {
+ // We don't support credential passing for host sockets.
+ return false
+}
+
+// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress.
+func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+ return tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, nil
+}
+
+// EventUpdate implements transport.ConnectedEndpoint.EventUpdate.
+func (c *ConnectedEndpoint) EventUpdate() {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ if c.fd != -1 {
+ fdnotifier.UpdateFD(int32(c.fd))
+ }
+}
+
+// Recv implements transport.Receiver.Recv.
+func (c *ConnectedEndpoint) Recv(data [][]byte, creds bool, numRights int, peek bool) (int64, int64, transport.ControlMessages, bool, tcpip.FullAddress, bool, *syserr.Error) {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+
+ var cm unet.ControlMessage
+ if numRights > 0 {
+ cm.EnableFDs(int(numRights))
+ }
+
+ // N.B. Unix sockets don't have a receive buffer, the send buffer
+ // serves both purposes.
+ rl, ml, cl, cTrunc, err := fdReadVec(c.fd, data, []byte(cm), peek, c.sndbuf)
+ if rl > 0 && err != nil {
+ // We got some data, so all we need to do on error is return
+ // the data that we got. Short reads are fine, no need to
+ // block.
+ err = nil
+ }
+ if err != nil {
+ return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.FromError(err)
+ }
+
+ // There is no need for the callee to call RecvNotify because fdReadVec uses
+ // the host's recvmsg(2) and the host kernel's queue.
+
+ // Trim the control data if we received less than the full amount.
+ if cl < uint64(len(cm)) {
+ cm = cm[:cl]
+ }
+
+ // Avoid extra allocations in the case where there isn't any control data.
+ if len(cm) == 0 {
+ return rl, ml, transport.ControlMessages{}, cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil
+ }
+
+ fds, err := cm.ExtractFDs()
+ if err != nil {
+ return 0, 0, transport.ControlMessages{}, false, tcpip.FullAddress{}, false, syserr.FromError(err)
+ }
+
+ if len(fds) == 0 {
+ return rl, ml, transport.ControlMessages{}, cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil
+ }
+ return rl, ml, control.NewVFS2(nil, nil, newSCMRights(fds)), cTrunc, tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, false, nil
+}
+
+// RecvNotify implements transport.Receiver.RecvNotify.
+func (c *ConnectedEndpoint) RecvNotify() {}
+
+// CloseRecv implements transport.Receiver.CloseRecv.
+func (c *ConnectedEndpoint) CloseRecv() {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ if err := syscall.Shutdown(c.fd, syscall.SHUT_RD); err != nil {
+ // A well-formed UDS shutdown can't fail. See
+ // net/unix/af_unix.c:unix_shutdown.
+ panic(fmt.Sprintf("failed read shutdown on host socket %+v: %v", c, err))
+ }
+}
+
+// Readable implements transport.Receiver.Readable.
+func (c *ConnectedEndpoint) Readable() bool {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+
+ return fdnotifier.NonBlockingPoll(int32(c.fd), waiter.EventIn)&waiter.EventIn != 0
+}
+
+// SendQueuedSize implements transport.Receiver.SendQueuedSize.
+func (c *ConnectedEndpoint) SendQueuedSize() int64 {
+ // TODO(gvisor.dev/issue/273): SendQueuedSize isn't supported for host
+ // sockets because we don't allow the sentry to call ioctl(2).
+ return -1
+}
+
+// RecvQueuedSize implements transport.Receiver.RecvQueuedSize.
+func (c *ConnectedEndpoint) RecvQueuedSize() int64 {
+ // TODO(gvisor.dev/issue/273): RecvQueuedSize isn't supported for host
+ // sockets because we don't allow the sentry to call ioctl(2).
+ return -1
+}
+
+// SendMaxQueueSize implements transport.Receiver.SendMaxQueueSize.
+func (c *ConnectedEndpoint) SendMaxQueueSize() int64 {
+ return int64(c.sndbuf)
+}
+
+// RecvMaxQueueSize implements transport.Receiver.RecvMaxQueueSize.
+func (c *ConnectedEndpoint) RecvMaxQueueSize() int64 {
+ // N.B. Unix sockets don't use the receive buffer. We'll claim it is
+ // the same size as the send buffer.
+ return int64(c.sndbuf)
+}
+
+func (c *ConnectedEndpoint) destroyLocked() {
+ fdnotifier.RemoveFD(int32(c.fd))
+ c.fd = -1
+}
+
+// Release implements transport.ConnectedEndpoint.Release and
+// transport.Receiver.Release.
+func (c *ConnectedEndpoint) Release() {
+ c.ref.DecRefWithDestructor(func() {
+ c.mu.Lock()
+ c.destroyLocked()
+ c.mu.Unlock()
+ })
+}
+
+// CloseUnread implements transport.ConnectedEndpoint.CloseUnread.
+func (c *ConnectedEndpoint) CloseUnread() {}
+
+// SCMConnectedEndpoint represents an endpoint backed by a host fd that was
+// passed through a gofer Unix socket. It is almost the same as
+// ConnectedEndpoint, with the following differences:
+// - SCMConnectedEndpoint is not saveable, because the host cannot guarantee
+// the same descriptor number across S/R.
+// - SCMConnectedEndpoint holds ownership of its fd and is responsible for
+// closing it.
+type SCMConnectedEndpoint struct {
+ ConnectedEndpoint
+}
+
+// Release implements transport.ConnectedEndpoint.Release and
+// transport.Receiver.Release.
+func (e *SCMConnectedEndpoint) Release() {
+ e.ref.DecRefWithDestructor(func() {
+ e.mu.Lock()
+ if err := syscall.Close(e.fd); err != nil {
+ log.Warningf("Failed to close host fd %d: %v", err)
+ }
+ e.destroyLocked()
+ e.mu.Unlock()
+ })
+}
+
+// NewSCMEndpoint creates a new SCMConnectedEndpoint backed by a host fd that
+// was passed through a Unix socket.
+//
+// The caller is responsible for calling Init(). Additionaly, Release needs to
+// be called twice because ConnectedEndpoint is both a transport.Receiver and
+// transport.ConnectedEndpoint.
+func NewSCMEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue, addr string) (*SCMConnectedEndpoint, *syserr.Error) {
+ e := SCMConnectedEndpoint{ConnectedEndpoint{
+ fd: hostFD,
+ addr: addr,
+ queue: queue,
+ }}
+
+ if err := e.init(); err != nil {
+ return nil, err
+ }
+
+ // AtomicRefCounters start off with a single reference. We need two.
+ e.ref.IncRef()
+ e.ref.EnableLeakCheck("host.SCMConnectedEndpoint")
+ return &e, nil
+}
diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go
new file mode 100644
index 000000000..584c247d2
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/socket_iovec.go
@@ -0,0 +1,113 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// maxIovs is the maximum number of iovecs to pass to the host.
+var maxIovs = linux.UIO_MAXIOV
+
+// copyToMulti copies as many bytes from src to dst as possible.
+func copyToMulti(dst [][]byte, src []byte) {
+ for _, d := range dst {
+ done := copy(d, src)
+ src = src[done:]
+ if len(src) == 0 {
+ break
+ }
+ }
+}
+
+// copyFromMulti copies as many bytes from src to dst as possible.
+func copyFromMulti(dst []byte, src [][]byte) {
+ for _, s := range src {
+ done := copy(dst, s)
+ dst = dst[done:]
+ if len(dst) == 0 {
+ break
+ }
+ }
+}
+
+// buildIovec builds an iovec slice from the given []byte slice.
+//
+// If truncate, truncate bufs > maxlen. Otherwise, immediately return an error.
+//
+// If length < the total length of bufs, err indicates why, even when returning
+// a truncated iovec.
+//
+// If intermediate != nil, iovecs references intermediate rather than bufs and
+// the caller must copy to/from bufs as necessary.
+func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovecs []syscall.Iovec, intermediate []byte, err error) {
+ var iovsRequired int
+ for _, b := range bufs {
+ length += int64(len(b))
+ if len(b) > 0 {
+ iovsRequired++
+ }
+ }
+
+ stopLen := length
+ if length > maxlen {
+ if truncate {
+ stopLen = maxlen
+ err = syserror.EAGAIN
+ } else {
+ return 0, nil, nil, syserror.EMSGSIZE
+ }
+ }
+
+ if iovsRequired > maxIovs {
+ // The kernel will reject our call if we pass this many iovs.
+ // Use a single intermediate buffer instead.
+ b := make([]byte, stopLen)
+
+ return stopLen, []syscall.Iovec{{
+ Base: &b[0],
+ Len: uint64(stopLen),
+ }}, b, err
+ }
+
+ var total int64
+ iovecs = make([]syscall.Iovec, 0, iovsRequired)
+ for i := range bufs {
+ l := len(bufs[i])
+ if l == 0 {
+ continue
+ }
+
+ stop := int64(l)
+ if total+stop > stopLen {
+ stop = stopLen - total
+ }
+
+ iovecs = append(iovecs, syscall.Iovec{
+ Base: &bufs[i][0],
+ Len: uint64(stop),
+ })
+
+ total += stop
+ if total >= stopLen {
+ break
+ }
+ }
+
+ return total, iovecs, nil, err
+}
diff --git a/pkg/sentry/fsimpl/host/socket_unsafe.go b/pkg/sentry/fsimpl/host/socket_unsafe.go
new file mode 100644
index 000000000..35ded24bc
--- /dev/null
+++ b/pkg/sentry/fsimpl/host/socket_unsafe.go
@@ -0,0 +1,101 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package host
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+// fdReadVec receives from fd to bufs.
+//
+// If the total length of bufs is > maxlen, fdReadVec will do a partial read
+// and err will indicate why the message was truncated.
+func fdReadVec(fd int, bufs [][]byte, control []byte, peek bool, maxlen int64) (readLen int64, msgLen int64, controlLen uint64, controlTrunc bool, err error) {
+ flags := uintptr(syscall.MSG_DONTWAIT | syscall.MSG_TRUNC)
+ if peek {
+ flags |= syscall.MSG_PEEK
+ }
+
+ // Always truncate the receive buffer. All socket types will truncate
+ // received messages.
+ length, iovecs, intermediate, err := buildIovec(bufs, maxlen, true)
+ if err != nil && len(iovecs) == 0 {
+ // No partial write to do, return error immediately.
+ return 0, 0, 0, false, err
+ }
+
+ var msg syscall.Msghdr
+ if len(control) != 0 {
+ msg.Control = &control[0]
+ msg.Controllen = uint64(len(control))
+ }
+
+ if len(iovecs) != 0 {
+ msg.Iov = &iovecs[0]
+ msg.Iovlen = uint64(len(iovecs))
+ }
+
+ rawN, _, e := syscall.RawSyscall(syscall.SYS_RECVMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), flags)
+ if e != 0 {
+ // N.B. prioritize the syscall error over the buildIovec error.
+ return 0, 0, 0, false, e
+ }
+ n := int64(rawN)
+
+ // Copy data back to bufs.
+ if intermediate != nil {
+ copyToMulti(bufs, intermediate)
+ }
+
+ controlTrunc = msg.Flags&syscall.MSG_CTRUNC == syscall.MSG_CTRUNC
+
+ if n > length {
+ return length, n, msg.Controllen, controlTrunc, err
+ }
+
+ return n, n, msg.Controllen, controlTrunc, err
+}
+
+// fdWriteVec sends from bufs to fd.
+//
+// If the total length of bufs is > maxlen && truncate, fdWriteVec will do a
+// partial write and err will indicate why the message was truncated.
+func fdWriteVec(fd int, bufs [][]byte, maxlen int64, truncate bool) (int64, int64, error) {
+ length, iovecs, intermediate, err := buildIovec(bufs, maxlen, truncate)
+ if err != nil && len(iovecs) == 0 {
+ // No partial write to do, return error immediately.
+ return 0, length, err
+ }
+
+ // Copy data to intermediate buf.
+ if intermediate != nil {
+ copyFromMulti(intermediate, bufs)
+ }
+
+ var msg syscall.Msghdr
+ if len(iovecs) > 0 {
+ msg.Iov = &iovecs[0]
+ msg.Iovlen = uint64(len(iovecs))
+ }
+
+ n, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msg)), syscall.MSG_DONTWAIT|syscall.MSG_NOSIGNAL)
+ if e != 0 {
+ // N.B. prioritize the syscall error over the buildIovec error.
+ return 0, length, e
+ }
+
+ return int64(n), length, err
+}
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index d8bddbafa..c7779fc11 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -53,7 +53,7 @@ func (f *DynamicBytesFile) Init(creds *auth.Credentials, ino uint64, data vfs.Dy
}
// Open implements Inode.Open.
-func (f *DynamicBytesFile) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (f *DynamicBytesFile) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd := &DynamicBytesFD{}
if err := fd.Init(rp.Mount(), vfsd, f.data, opts.Flags); err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 3ccd92fc5..9e8d80414 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -406,7 +406,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- return inode.Open(rp, vfsd, opts)
+ return inode.Open(ctx, rp, vfsd, opts)
}
// May create new file.
@@ -425,7 +425,7 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- return inode.Open(rp, vfsd, opts)
+ return inode.Open(ctx, rp, vfsd, opts)
}
afterTrailingSymlink:
parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp)
@@ -466,7 +466,7 @@ afterTrailingSymlink:
}
child := childVFSD.Impl().(*Dentry)
parentVFSD.Impl().(*Dentry).InsertChild(pc, child)
- return child.inode.Open(rp, childVFSD, opts)
+ return child.inode.Open(ctx, rp, childVFSD, opts)
}
if err != nil {
return nil, err
@@ -499,7 +499,7 @@ afterTrailingSymlink:
if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- return child.inode.Open(rp, &child.vfsd, opts)
+ return child.inode.Open(ctx, rp, &child.vfsd, opts)
}
// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 02f35a675..615592d5f 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -525,7 +525,7 @@ type InodeSymlink struct {
}
// Open implements Inode.Open.
-func (InodeSymlink) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
return nil, syserror.ELOOP
}
@@ -567,7 +567,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, ino uint64, perm linux.F
}
// Open implements kernfs.Inode.
-func (s *StaticDirectory) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := NewGenericDirectoryFD(rp.Mount(), vfsd, &s.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 95cf6dc24..732837933 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -308,7 +308,7 @@ type Inode interface {
//
// Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing
// the inode on which Open() is being called.
- Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error)
+ Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error)
}
type inodeRefs interface {
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 0964d5456..a9f671bc8 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -116,7 +116,7 @@ func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMod
return &dir.dentry
}
-func (d *readonlyDir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (d *readonlyDir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts)
if err != nil {
return nil, err
@@ -146,7 +146,7 @@ func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, conte
return &dir.dentry
}
-func (d *dir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index faf3179bc..d6bd67467 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -129,9 +129,8 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.
}
// Open implements kernfs.Inode.Open.
-func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- // FIXME(b/38173783): kernfs does not plumb Context here.
- return i.pipe.Open(context.Background(), rp.Mount(), vfsd, opts.Flags)
+func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags)
}
// NewConnectedPipeFDs returns a pair of FileDescriptions representing the read
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index 28ec2484a..a5cfa8333 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -151,7 +151,7 @@ func (fd *subtasksFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) erro
}
// Open implements kernfs.Inode.
-func (i *subtasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *subtasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd := &subtasksFD{task: i.task}
if err := fd.Init(&i.OrderedChildren, &opts); err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index e2790d35b..66419d91b 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -102,7 +102,7 @@ func (i *taskInode) Valid(ctx context.Context) bool {
}
// Open implements kernfs.Inode.
-func (i *taskInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index a7622f1b6..8ad976073 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -142,7 +142,7 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
}
// Open implements kernfs.Inode.
-func (i *fdDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *fdDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
if err != nil {
return nil, err
@@ -269,7 +269,7 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry,
}
// Open implements kernfs.Inode.
-func (i *fdInfoDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 410cc3552..515f25327 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -788,7 +788,7 @@ func (i *namespaceInode) Init(creds *auth.Credentials, ino uint64, perm linux.Fi
}
// Open implements Inode.Open.
-func (i *namespaceInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *namespaceInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd := &namespaceFD{inode: i}
i.IncRef()
if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), vfsd, &vfs.FileDescriptionOptions{}); err != nil {
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 26518ed03..5aeda8c9b 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -201,7 +201,7 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback
}
// Open implements kernfs.Inode.
-func (i *tasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index 632cfde88..271134af8 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -69,30 +69,18 @@ type inode struct {
}
// Open implements kernfs.Inode.Open.
-func (i *inode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
return nil, syserror.ENXIO
}
-// InitSocket initializes a socket FileDescription, with a corresponding
-// Dentry in mnt.
-//
-// fd should be the FileDescription associated with socketImpl, i.e. its first
-// field. mnt should be the global socket mount, Kernel.socketMount.
-func InitSocket(socketImpl vfs.FileDescriptionImpl, fd *vfs.FileDescription, mnt *vfs.Mount, creds *auth.Credentials) error {
- fsimpl := mnt.Filesystem().Impl()
- fs := fsimpl.(*kernfs.Filesystem)
-
+// NewDentry constructs and returns a sockfs dentry.
+func NewDentry(creds *auth.Credentials, ino uint64) *vfs.Dentry {
// File mode matches net/socket.c:sock_alloc.
filemode := linux.FileMode(linux.S_IFSOCK | 0600)
i := &inode{}
- i.Init(creds, fs.NextIno(), filemode)
+ i.Init(creds, ino, filemode)
d := &kernfs.Dentry{}
d.Init(i)
-
- opts := &vfs.FileDescriptionOptions{UseDentryMetadata: true}
- if err := fd.Init(socketImpl, linux.O_RDWR, mnt, d.VFSDentry(), opts); err != nil {
- return err
- }
- return nil
+ return d.VFSDentry()
}
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 34e8e0cbe..f8d25d35e 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -105,7 +105,7 @@ func (*dir) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.Set
}
// Open implements kernfs.Inode.Open.
-func (d *dir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &d.OrderedChildren, &opts)
if err != nil {
return nil, err
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 7d25e98f7..79766cafe 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -716,7 +716,7 @@ func (tg *ThreadGroup) SetSignalAct(sig linux.Signal, actptr *arch.SignalAct) (a
func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error {
n := t.Arch().NewSignalAct()
n.SerializeFrom(s)
- _, err := t.CopyOut(addr, n)
+ _, err := n.CopyOut(t, addr)
return err
}
@@ -725,7 +725,7 @@ func (t *Task) CopyOutSignalAct(addr usermem.Addr, s *arch.SignalAct) error {
func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) {
n := t.Arch().NewSignalAct()
var s arch.SignalAct
- if _, err := t.CopyIn(addr, n); err != nil {
+ if _, err := n.CopyIn(t, addr); err != nil {
return s, err
}
n.DeserializeTo(&s)
@@ -737,7 +737,7 @@ func (t *Task) CopyInSignalAct(addr usermem.Addr) (arch.SignalAct, error) {
func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error {
n := t.Arch().NewSignalStack()
n.SerializeFrom(s)
- _, err := t.CopyOut(addr, n)
+ _, err := n.CopyOut(t, addr)
return err
}
@@ -746,7 +746,7 @@ func (t *Task) CopyOutSignalStack(addr usermem.Addr, s *arch.SignalStack) error
func (t *Task) CopyInSignalStack(addr usermem.Addr) (arch.SignalStack, error) {
n := t.Arch().NewSignalStack()
var s arch.SignalStack
- if _, err := t.CopyIn(addr, n); err != nil {
+ if _, err := n.CopyIn(t, addr); err != nil {
return s, err
}
n.DeserializeTo(&s)
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index a5035bb7f..8485fb4b6 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -104,6 +104,9 @@ func (ts *TaskSet) NewTask(cfg *TaskConfig) (*Task, error) {
cfg.TaskContext.release()
cfg.FSContext.DecRef()
cfg.FDTable.DecRef()
+ if cfg.MountNamespaceVFS2 != nil {
+ cfg.MountNamespaceVFS2.DecRef()
+ }
return nil, err
}
return t, nil
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index 552341721..c215d443c 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -54,6 +54,14 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
context.Pstate = regs.Pstate
context.Pstate &^= uint64(ring0.UserFlagsClear)
context.Pstate |= ring0.UserFlagsSet
+
+ lazyVfp := c.GetLazyVFP()
+ if lazyVfp != 0 {
+ fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ context.Fpsimd64.Fpsr = fpsimd.Fpsr
+ context.Fpsimd64.Fpcr = fpsimd.Fpcr
+ context.Fpsimd64.Vregs = fpsimd.Vregs
+ }
}
// KernelSyscall handles kernel syscalls.
@@ -64,6 +72,17 @@ func (c *vCPU) KernelSyscall() {
if regs.Regs[8] != ^uint64(0) {
regs.Pc -= 4 // Rewind.
}
+
+ vfpEnable := ring0.CPACREL1()
+ if vfpEnable != 0 {
+ fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpcr := ring0.GetFPCR()
+ fpsr := ring0.GetFPSR()
+ fpsimd.Fpcr = uint32(fpcr)
+ fpsimd.Fpsr = uint32(fpsr)
+ ring0.SaveVRegs((*byte)(c.floatingPointState))
+ }
+
ring0.Halt()
}
@@ -75,5 +94,16 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
if vector == ring0.Vector(bounce) {
regs.Pc = 0
}
+
+ vfpEnable := ring0.CPACREL1()
+ if vfpEnable != 0 {
+ fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpcr := ring0.GetFPCR()
+ fpsr := ring0.GetFPSR()
+ fpsimd.Fpcr = uint32(fpcr)
+ fpsimd.Fpsr = uint32(fpsr)
+ ring0.SaveVRegs((*byte)(c.floatingPointState))
+ }
+
ring0.Halt()
}
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
index eb5ed574e..4ca2b7717 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
@@ -23,6 +23,13 @@ import (
"gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
+// fpsimdPtr returns a fpsimd64 for the given address.
+//
+//go:nosplit
+func fpsimdPtr(addr *byte) *arch.FpsimdContext {
+ return (*arch.FpsimdContext)(unsafe.Pointer(addr))
+}
+
// dieArchSetup initialies the state for dieTrampoline.
//
// The arm64 dieTrampoline requires the vCPU to be set in R1, and the last PC
@@ -47,3 +54,10 @@ func dieArchSetup(c *vCPU, context *arch.SignalContext64, guestRegs *userRegs) {
context.Regs[1] = uint64(uintptr(unsafe.Pointer(c)))
context.Pc = uint64(dieTrampolineAddr)
}
+
+// bluepillArchFpContext returns the arch-specific fpsimd context.
+//
+//go:nosplit
+func bluepillArchFpContext(context unsafe.Pointer) *arch.FpsimdContext {
+ return &((*arch.SignalContext64)(context).Fpsimd64)
+}
diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go
index 716198712..29d457a7e 100644
--- a/pkg/sentry/platform/kvm/kvm_arm64.go
+++ b/pkg/sentry/platform/kvm/kvm_arm64.go
@@ -17,8 +17,7 @@
package kvm
import (
- "syscall"
-
+ "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
@@ -37,7 +36,7 @@ type userFpsimdState struct {
}
type userRegs struct {
- Regs syscall.PtraceRegs
+ Regs arch.Registers
sp_el1 uint64
elr_el1 uint64
spsr [KVM_NR_SPSR]uint64
diff --git a/pkg/sentry/platform/kvm/kvm_const_arm64.go b/pkg/sentry/platform/kvm/kvm_const_arm64.go
index 5a74c6e36..531ae8b1e 100644
--- a/pkg/sentry/platform/kvm/kvm_const_arm64.go
+++ b/pkg/sentry/platform/kvm/kvm_const_arm64.go
@@ -19,30 +19,31 @@ const (
_KVM_GET_ONE_REG = 0x4010aeab
_KVM_SET_ONE_REG = 0x4010aeac
- _KVM_ARM_PREFERRED_TARGET = 0x8020aeaf
- _KVM_ARM_VCPU_INIT = 0x4020aeae
- _KVM_ARM64_REGS_PSTATE = 0x6030000000100042
- _KVM_ARM64_REGS_SP_EL1 = 0x6030000000100044
- _KVM_ARM64_REGS_R0 = 0x6030000000100000
- _KVM_ARM64_REGS_R1 = 0x6030000000100002
- _KVM_ARM64_REGS_R2 = 0x6030000000100004
- _KVM_ARM64_REGS_R3 = 0x6030000000100006
- _KVM_ARM64_REGS_R8 = 0x6030000000100010
- _KVM_ARM64_REGS_R18 = 0x6030000000100024
- _KVM_ARM64_REGS_PC = 0x6030000000100040
- _KVM_ARM64_REGS_MAIR_EL1 = 0x603000000013c510
- _KVM_ARM64_REGS_TCR_EL1 = 0x603000000013c102
- _KVM_ARM64_REGS_TTBR0_EL1 = 0x603000000013c100
- _KVM_ARM64_REGS_TTBR1_EL1 = 0x603000000013c101
- _KVM_ARM64_REGS_SCTLR_EL1 = 0x603000000013c080
- _KVM_ARM64_REGS_CPACR_EL1 = 0x603000000013c082
- _KVM_ARM64_REGS_VBAR_EL1 = 0x603000000013c600
+ _KVM_ARM_TARGET_GENERIC_V8 = 5
+ _KVM_ARM_PREFERRED_TARGET = 0x8020aeaf
+ _KVM_ARM_VCPU_INIT = 0x4020aeae
+ _KVM_ARM64_REGS_PSTATE = 0x6030000000100042
+ _KVM_ARM64_REGS_SP_EL1 = 0x6030000000100044
+ _KVM_ARM64_REGS_R0 = 0x6030000000100000
+ _KVM_ARM64_REGS_R1 = 0x6030000000100002
+ _KVM_ARM64_REGS_R2 = 0x6030000000100004
+ _KVM_ARM64_REGS_R3 = 0x6030000000100006
+ _KVM_ARM64_REGS_R8 = 0x6030000000100010
+ _KVM_ARM64_REGS_R18 = 0x6030000000100024
+ _KVM_ARM64_REGS_PC = 0x6030000000100040
+ _KVM_ARM64_REGS_MAIR_EL1 = 0x603000000013c510
+ _KVM_ARM64_REGS_TCR_EL1 = 0x603000000013c102
+ _KVM_ARM64_REGS_TTBR0_EL1 = 0x603000000013c100
+ _KVM_ARM64_REGS_TTBR1_EL1 = 0x603000000013c101
+ _KVM_ARM64_REGS_SCTLR_EL1 = 0x603000000013c080
+ _KVM_ARM64_REGS_CPACR_EL1 = 0x603000000013c082
+ _KVM_ARM64_REGS_VBAR_EL1 = 0x603000000013c600
)
// Arm64: Architectural Feature Access Control Register EL1.
const (
- _FPEN_NOTRAP = 0x3
- _FPEN_SHIFT = 0x20
+ _FPEN_NOTRAP = 3
+ _FPEN_SHIFT = 20
)
// Arm64: System Control Register EL1.
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index c42752d50..6c8f4fa28 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -117,10 +117,10 @@ func TestKernelFloatingPoint(t *testing.T) {
})
}
-func applicationTest(t testHarness, useHostMappings bool, target func(), fn func(*vCPU, *syscall.PtraceRegs, *pagetables.PageTables) bool) {
+func applicationTest(t testHarness, useHostMappings bool, target func(), fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) {
// Initialize registers & page tables.
var (
- regs syscall.PtraceRegs
+ regs arch.Registers
pt *pagetables.PageTables
)
testutil.SetTestTarget(&regs, target)
@@ -154,7 +154,7 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func
}
func TestApplicationSyscall(t *testing.T) {
- applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -168,7 +168,7 @@ func TestApplicationSyscall(t *testing.T) {
}
return false
})
- applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -184,7 +184,7 @@ func TestApplicationSyscall(t *testing.T) {
}
func TestApplicationFault(t *testing.T) {
- applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, nil) // Cause fault.
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
@@ -199,7 +199,7 @@ func TestApplicationFault(t *testing.T) {
}
return false
})
- applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, nil) // Cause fault.
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
@@ -216,7 +216,7 @@ func TestApplicationFault(t *testing.T) {
}
func TestRegistersSyscall(t *testing.T) {
- applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestRegs(regs) // Fill values for all registers.
for {
var si arch.SignalInfo
@@ -239,7 +239,7 @@ func TestRegistersSyscall(t *testing.T) {
}
func TestRegistersFault(t *testing.T) {
- applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestRegs(regs) // Fill values for all registers.
for {
var si arch.SignalInfo
@@ -263,7 +263,7 @@ func TestRegistersFault(t *testing.T) {
}
func TestSegments(t *testing.T) {
- applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestSegments(regs)
for {
var si arch.SignalInfo
@@ -287,7 +287,7 @@ func TestSegments(t *testing.T) {
}
func TestBounce(t *testing.T) {
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
go func() {
time.Sleep(time.Millisecond)
c.BounceToKernel()
@@ -302,7 +302,7 @@ func TestBounce(t *testing.T) {
}
return false
})
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
go func() {
time.Sleep(time.Millisecond)
c.BounceToKernel()
@@ -321,7 +321,7 @@ func TestBounce(t *testing.T) {
}
func TestBounceStress(t *testing.T) {
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
randomSleep := func() {
// O(hundreds of microseconds) is appropriate to ensure
// different overlaps and different schedules.
@@ -357,7 +357,7 @@ func TestBounceStress(t *testing.T) {
func TestInvalidate(t *testing.T) {
var data uintptr // Used below.
- applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, &data) // Read legitimate value.
for {
var si arch.SignalInfo
@@ -398,7 +398,7 @@ func IsFault(err error, si *arch.SignalInfo) bool {
}
func TestEmptyAddressSpace(t *testing.T) {
- applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -412,7 +412,7 @@ func TestEmptyAddressSpace(t *testing.T) {
}
return false
})
- applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -471,7 +471,7 @@ func BenchmarkApplicationSyscall(b *testing.B) {
i int // Iteration includes machine.Get() / machine.Put().
a int // Count for ErrContextInterrupt.
)
- applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -493,7 +493,7 @@ func BenchmarkApplicationSyscall(b *testing.B) {
func BenchmarkKernelSyscall(b *testing.B) {
// Note that the target passed here is irrelevant, we never execute SwitchToUser.
- applicationTest(b, true, testutil.Getpid, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(b, true, testutil.Getpid, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
// iteration does not include machine.Get() / machine.Put().
for i := 0; i < b.N; i++ {
testutil.Getpid()
@@ -508,7 +508,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) {
i int
a int
)
- applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si arch.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 09552837a..e42505542 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -28,6 +28,10 @@ type vCPUArchState struct {
//
// This starts above fixedKernelPCID.
PCIDs *pagetables.PCIDs
+
+ // floatingPointState is the floating point state buffer used in guest
+ // to host transitions. See usage in bluepill_arm64.go.
+ floatingPointState *arch.FloatingPointData
}
const (
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 3b35858ae..3c02cef7c 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -60,6 +60,7 @@ func (c *vCPU) initArchState() error {
reg.addr = uint64(reflect.ValueOf(&data).Pointer())
regGet.addr = uint64(reflect.ValueOf(&dataGet).Pointer())
+ vcpuInit.target = _KVM_ARM_TARGET_GENERIC_V8
vcpuInit.features[0] |= (1 << _KVM_ARM_VCPU_PSCI_0_2)
if _, _, errno := syscall.RawSyscall(
syscall.SYS_IOCTL,
@@ -71,7 +72,8 @@ func (c *vCPU) initArchState() error {
// cpacr_el1
reg.id = _KVM_ARM64_REGS_CPACR_EL1
- data = (_FPEN_NOTRAP << _FPEN_SHIFT)
+ // It is off by default, and it is turned on only when in use.
+ data = 0 // Disable fpsimd.
if err := c.setOneRegister(&reg); err != nil {
return err
}
@@ -163,6 +165,7 @@ func (c *vCPU) initArchState() error {
return err
}
+ c.floatingPointState = arch.NewFloatingPointData()
return nil
}
diff --git a/pkg/sentry/platform/kvm/testutil/BUILD b/pkg/sentry/platform/kvm/testutil/BUILD
index f7605df8a..f7feb8683 100644
--- a/pkg/sentry/platform/kvm/testutil/BUILD
+++ b/pkg/sentry/platform/kvm/testutil/BUILD
@@ -13,4 +13,5 @@ go_library(
"testutil_arm64.s",
],
visibility = ["//pkg/sentry/platform/kvm:__pkg__"],
+ deps = ["//pkg/sentry/arch"],
)
diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go
index 4c108abbf..8048eedec 100644
--- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go
+++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go
@@ -18,19 +18,20 @@ package testutil
import (
"reflect"
- "syscall"
+
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// TwiddleSegments reads segments into known registers.
func TwiddleSegments()
// SetTestTarget sets the rip appropriately.
-func SetTestTarget(regs *syscall.PtraceRegs, fn func()) {
+func SetTestTarget(regs *arch.Registers, fn func()) {
regs.Rip = uint64(reflect.ValueOf(fn).Pointer())
}
// SetTouchTarget sets rax appropriately.
-func SetTouchTarget(regs *syscall.PtraceRegs, target *uintptr) {
+func SetTouchTarget(regs *arch.Registers, target *uintptr) {
if target != nil {
regs.Rax = uint64(reflect.ValueOf(target).Pointer())
} else {
@@ -39,12 +40,12 @@ func SetTouchTarget(regs *syscall.PtraceRegs, target *uintptr) {
}
// RewindSyscall rewinds a syscall RIP.
-func RewindSyscall(regs *syscall.PtraceRegs) {
+func RewindSyscall(regs *arch.Registers) {
regs.Rip -= 2
}
// SetTestRegs initializes registers to known values.
-func SetTestRegs(regs *syscall.PtraceRegs) {
+func SetTestRegs(regs *arch.Registers) {
regs.R15 = 0x15
regs.R14 = 0x14
regs.R13 = 0x13
@@ -64,7 +65,7 @@ func SetTestRegs(regs *syscall.PtraceRegs) {
}
// CheckTestRegs checks that registers were twiddled per TwiddleRegs.
-func CheckTestRegs(regs *syscall.PtraceRegs, full bool) (err error) {
+func CheckTestRegs(regs *arch.Registers, full bool) (err error) {
if need := ^uint64(0x15); regs.R15 != need {
err = addRegisterMismatch(err, "R15", regs.R15, need)
}
@@ -121,13 +122,13 @@ var fsData uint64 = 0x55
var gsData uint64 = 0x85
// SetTestSegments initializes segments to known values.
-func SetTestSegments(regs *syscall.PtraceRegs) {
+func SetTestSegments(regs *arch.Registers) {
regs.Fs_base = uint64(reflect.ValueOf(&fsData).Pointer())
regs.Gs_base = uint64(reflect.ValueOf(&gsData).Pointer())
}
// CheckTestSegments checks that registers were twiddled per TwiddleSegments.
-func CheckTestSegments(regs *syscall.PtraceRegs) (err error) {
+func CheckTestSegments(regs *arch.Registers) (err error) {
if regs.Rax != fsData {
err = addRegisterMismatch(err, "Rax", regs.Rax, fsData)
}
diff --git a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go
index 40b2e4acc..ca902c8c1 100644
--- a/pkg/sentry/platform/kvm/testutil/testutil_arm64.go
+++ b/pkg/sentry/platform/kvm/testutil/testutil_arm64.go
@@ -19,16 +19,17 @@ package testutil
import (
"fmt"
"reflect"
- "syscall"
+
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// SetTestTarget sets the rip appropriately.
-func SetTestTarget(regs *syscall.PtraceRegs, fn func()) {
+func SetTestTarget(regs *arch.Registers, fn func()) {
regs.Pc = uint64(reflect.ValueOf(fn).Pointer())
}
// SetTouchTarget sets rax appropriately.
-func SetTouchTarget(regs *syscall.PtraceRegs, target *uintptr) {
+func SetTouchTarget(regs *arch.Registers, target *uintptr) {
if target != nil {
regs.Regs[8] = uint64(reflect.ValueOf(target).Pointer())
} else {
@@ -37,19 +38,19 @@ func SetTouchTarget(regs *syscall.PtraceRegs, target *uintptr) {
}
// RewindSyscall rewinds a syscall RIP.
-func RewindSyscall(regs *syscall.PtraceRegs) {
+func RewindSyscall(regs *arch.Registers) {
regs.Pc -= 4
}
// SetTestRegs initializes registers to known values.
-func SetTestRegs(regs *syscall.PtraceRegs) {
+func SetTestRegs(regs *arch.Registers) {
for i := 0; i <= 30; i++ {
regs.Regs[i] = uint64(i) + 1
}
}
// CheckTestRegs checks that registers were twiddled per TwiddleRegs.
-func CheckTestRegs(regs *syscall.PtraceRegs, full bool) (err error) {
+func CheckTestRegs(regs *arch.Registers, full bool) (err error) {
for i := 0; i <= 30; i++ {
if need := ^uint64(i + 1); regs.Regs[i] != need {
err = addRegisterMismatch(err, fmt.Sprintf("R%d", i), regs.Regs[i], need)
diff --git a/pkg/sentry/platform/ptrace/ptrace_amd64.go b/pkg/sentry/platform/ptrace/ptrace_amd64.go
index 24fc5dc62..3b9a870a5 100644
--- a/pkg/sentry/platform/ptrace/ptrace_amd64.go
+++ b/pkg/sentry/platform/ptrace/ptrace_amd64.go
@@ -15,9 +15,8 @@
package ptrace
import (
- "syscall"
-
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// fpRegSet returns the GETREGSET/SETREGSET register set type to be used.
@@ -28,12 +27,12 @@ func fpRegSet(useXsave bool) uintptr {
return linux.NT_PRFPREG
}
-func stackPointer(r *syscall.PtraceRegs) uintptr {
+func stackPointer(r *arch.Registers) uintptr {
return uintptr(r.Rsp)
}
// x86 use the fs_base register to store the TLS pointer which can be
-// get/set in "func (t *thread) get/setRegs(regs *syscall.PtraceRegs)".
+// get/set in "func (t *thread) get/setRegs(regs *arch.Registers)".
// So both of the get/setTLS() operations are noop here.
// getTLS gets the thread local storage register.
diff --git a/pkg/sentry/platform/ptrace/ptrace_arm64.go b/pkg/sentry/platform/ptrace/ptrace_arm64.go
index 4db28c534..5c869926a 100644
--- a/pkg/sentry/platform/ptrace/ptrace_arm64.go
+++ b/pkg/sentry/platform/ptrace/ptrace_arm64.go
@@ -15,9 +15,8 @@
package ptrace
import (
- "syscall"
-
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// fpRegSet returns the GETREGSET/SETREGSET register set type to be used.
@@ -25,6 +24,6 @@ func fpRegSet(_ bool) uintptr {
return linux.NT_PRFPREG
}
-func stackPointer(r *syscall.PtraceRegs) uintptr {
+func stackPointer(r *arch.Registers) uintptr {
return uintptr(r.Sp)
}
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 6c0ed7b3e..8b72d24e8 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -24,7 +24,7 @@ import (
)
// getRegs gets the general purpose register set.
-func (t *thread) getRegs(regs *syscall.PtraceRegs) error {
+func (t *thread) getRegs(regs *arch.Registers) error {
iovec := syscall.Iovec{
Base: (*byte)(unsafe.Pointer(regs)),
Len: uint64(unsafe.Sizeof(*regs)),
@@ -43,7 +43,7 @@ func (t *thread) getRegs(regs *syscall.PtraceRegs) error {
}
// setRegs sets the general purpose register set.
-func (t *thread) setRegs(regs *syscall.PtraceRegs) error {
+func (t *thread) setRegs(regs *arch.Registers) error {
iovec := syscall.Iovec{
Base: (*byte)(unsafe.Pointer(regs)),
Len: uint64(unsafe.Sizeof(*regs)),
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index 773ddb1ed..2389423b0 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -63,7 +63,7 @@ type thread struct {
// initRegs are the initial registers for the first thread.
//
// These are used for the register set for system calls.
- initRegs syscall.PtraceRegs
+ initRegs arch.Registers
}
// threadPool is a collection of threads.
@@ -317,7 +317,7 @@ const (
)
func (t *thread) dumpAndPanic(message string) {
- var regs syscall.PtraceRegs
+ var regs arch.Registers
message += "\n"
if err := t.getRegs(&regs); err == nil {
message += dumpRegs(&regs)
@@ -423,7 +423,7 @@ func (t *thread) init() {
// This is _not_ for use by application system calls, rather it is for use when
// a system call must be injected into the remote context (e.g. mmap, munmap).
// Note that clones are handled separately.
-func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
+func (t *thread) syscall(regs *arch.Registers) (uintptr, error) {
// Set registers.
if err := t.setRegs(regs); err != nil {
panic(fmt.Sprintf("ptrace set regs failed: %v", err))
@@ -461,7 +461,7 @@ func (t *thread) syscall(regs *syscall.PtraceRegs) (uintptr, error) {
// syscallIgnoreInterrupt ignores interrupts on the system call thread and
// restarts the syscall if the kernel indicates that should happen.
func (t *thread) syscallIgnoreInterrupt(
- initRegs *syscall.PtraceRegs,
+ initRegs *arch.Registers,
sysno uintptr,
args ...arch.SyscallArgument) (uintptr, error) {
for {
diff --git a/pkg/sentry/platform/ptrace/subprocess_amd64.go b/pkg/sentry/platform/ptrace/subprocess_amd64.go
index cd74945e7..84b699f0d 100644
--- a/pkg/sentry/platform/ptrace/subprocess_amd64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_amd64.go
@@ -41,7 +41,7 @@ const (
// resetSysemuRegs sets up emulation registers.
//
// This should be called prior to calling sysemu.
-func (t *thread) resetSysemuRegs(regs *syscall.PtraceRegs) {
+func (t *thread) resetSysemuRegs(regs *arch.Registers) {
regs.Cs = t.initRegs.Cs
regs.Ss = t.initRegs.Ss
regs.Ds = t.initRegs.Ds
@@ -53,7 +53,7 @@ func (t *thread) resetSysemuRegs(regs *syscall.PtraceRegs) {
// createSyscallRegs sets up syscall registers.
//
// This should be called to generate registers for a system call.
-func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch.SyscallArgument) syscall.PtraceRegs {
+func createSyscallRegs(initRegs *arch.Registers, sysno uintptr, args ...arch.SyscallArgument) arch.Registers {
// Copy initial registers.
regs := *initRegs
@@ -82,18 +82,18 @@ func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch
}
// isSingleStepping determines if the registers indicate single-stepping.
-func isSingleStepping(regs *syscall.PtraceRegs) bool {
+func isSingleStepping(regs *arch.Registers) bool {
return (regs.Eflags & arch.X86TrapFlag) != 0
}
// updateSyscallRegs updates registers after finishing sysemu.
-func updateSyscallRegs(regs *syscall.PtraceRegs) {
+func updateSyscallRegs(regs *arch.Registers) {
// Ptrace puts -ENOSYS in rax on syscall-enter-stop.
regs.Rax = regs.Orig_rax
}
// syscallReturnValue extracts a sensible return from registers.
-func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
+func syscallReturnValue(regs *arch.Registers) (uintptr, error) {
rval := int64(regs.Rax)
if rval < 0 {
return 0, syscall.Errno(-rval)
@@ -101,7 +101,7 @@ func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
return uintptr(rval), nil
}
-func dumpRegs(regs *syscall.PtraceRegs) string {
+func dumpRegs(regs *arch.Registers) string {
var m strings.Builder
fmt.Fprintf(&m, "Registers:\n")
@@ -143,7 +143,7 @@ func (t *thread) adjustInitRegsRip() {
}
// Pass the expected PPID to the child via R15 when creating stub process.
-func initChildProcessPPID(initregs *syscall.PtraceRegs, ppid int32) {
+func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
initregs.R15 = uint64(ppid)
// Rbx has to be set to 1 when creating stub process.
initregs.Rbx = 1
@@ -156,7 +156,7 @@ func initChildProcessPPID(initregs *syscall.PtraceRegs, ppid int32) {
//
// Note that this should only be called after verifying that the signalInfo has
// been generated by the kernel.
-func patchSignalInfo(regs *syscall.PtraceRegs, signalInfo *arch.SignalInfo) {
+func patchSignalInfo(regs *arch.Registers, signalInfo *arch.SignalInfo) {
if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
signalInfo.Signo = int32(linux.SIGSEGV)
diff --git a/pkg/sentry/platform/ptrace/subprocess_arm64.go b/pkg/sentry/platform/ptrace/subprocess_arm64.go
index 7f5c393f0..bd618fae8 100644
--- a/pkg/sentry/platform/ptrace/subprocess_arm64.go
+++ b/pkg/sentry/platform/ptrace/subprocess_arm64.go
@@ -41,13 +41,13 @@ const (
// resetSysemuRegs sets up emulation registers.
//
// This should be called prior to calling sysemu.
-func (t *thread) resetSysemuRegs(regs *syscall.PtraceRegs) {
+func (t *thread) resetSysemuRegs(regs *arch.Registers) {
}
// createSyscallRegs sets up syscall registers.
//
// This should be called to generate registers for a system call.
-func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch.SyscallArgument) syscall.PtraceRegs {
+func createSyscallRegs(initRegs *arch.Registers, sysno uintptr, args ...arch.SyscallArgument) arch.Registers {
// Copy initial registers (Pc, Sp, etc.).
regs := *initRegs
@@ -78,7 +78,7 @@ func createSyscallRegs(initRegs *syscall.PtraceRegs, sysno uintptr, args ...arch
}
// isSingleStepping determines if the registers indicate single-stepping.
-func isSingleStepping(regs *syscall.PtraceRegs) bool {
+func isSingleStepping(regs *arch.Registers) bool {
// Refer to the ARM SDM D2.12.3: software step state machine
// return (regs.Pstate.SS == 1) && (MDSCR_EL1.SS == 1).
//
@@ -89,13 +89,13 @@ func isSingleStepping(regs *syscall.PtraceRegs) bool {
}
// updateSyscallRegs updates registers after finishing sysemu.
-func updateSyscallRegs(regs *syscall.PtraceRegs) {
+func updateSyscallRegs(regs *arch.Registers) {
// No special work is necessary.
return
}
// syscallReturnValue extracts a sensible return from registers.
-func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
+func syscallReturnValue(regs *arch.Registers) (uintptr, error) {
rval := int64(regs.Regs[0])
if rval < 0 {
return 0, syscall.Errno(-rval)
@@ -103,7 +103,7 @@ func syscallReturnValue(regs *syscall.PtraceRegs) (uintptr, error) {
return uintptr(rval), nil
}
-func dumpRegs(regs *syscall.PtraceRegs) string {
+func dumpRegs(regs *arch.Registers) string {
var m strings.Builder
fmt.Fprintf(&m, "Registers:\n")
@@ -125,7 +125,7 @@ func (t *thread) adjustInitRegsRip() {
}
// Pass the expected PPID to the child via X7 when creating stub process
-func initChildProcessPPID(initregs *syscall.PtraceRegs, ppid int32) {
+func initChildProcessPPID(initregs *arch.Registers, ppid int32) {
initregs.Regs[7] = uint64(ppid)
// R9 has to be set to 1 when creating stub process.
initregs.Regs[9] = 1
@@ -138,7 +138,7 @@ func initChildProcessPPID(initregs *syscall.PtraceRegs, ppid int32) {
//
// Note that this should only be called after verifying that the signalInfo has
// been generated by the kernel.
-func patchSignalInfo(regs *syscall.PtraceRegs, signalInfo *arch.SignalInfo) {
+func patchSignalInfo(regs *arch.Registers, signalInfo *arch.SignalInfo) {
if linux.Signal(signalInfo.Signo) == linux.SIGSYS {
signalInfo.Signo = int32(linux.SIGSEGV)
diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD
index b69520030..679b287c3 100644
--- a/pkg/sentry/platform/ring0/BUILD
+++ b/pkg/sentry/platform/ring0/BUILD
@@ -79,6 +79,7 @@ go_library(
deps = [
"//pkg/cpuid",
"//pkg/safecopy",
+ "//pkg/sentry/arch",
"//pkg/sentry/platform/ring0/pagetables",
"//pkg/usermem",
],
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
index 86fd5ed58..e6daf24df 100644
--- a/pkg/sentry/platform/ring0/defs.go
+++ b/pkg/sentry/platform/ring0/defs.go
@@ -15,8 +15,7 @@
package ring0
import (
- "syscall"
-
+ "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
)
@@ -72,7 +71,7 @@ type CPU struct {
// registers is a set of registers; these may be used on kernel system
// calls and exceptions via the Registers function.
- registers syscall.PtraceRegs
+ registers arch.Registers
// hooks are kernel hooks.
hooks Hooks
@@ -83,14 +82,14 @@ type CPU struct {
// This is explicitly safe to call during KernelException and KernelSyscall.
//
//go:nosplit
-func (c *CPU) Registers() *syscall.PtraceRegs {
+func (c *CPU) Registers() *arch.Registers {
return &c.registers
}
// SwitchOpts are passed to the Switch function.
type SwitchOpts struct {
// Registers are the user register state.
- Registers *syscall.PtraceRegs
+ Registers *arch.Registers
// FloatingPointState is a byte pointer where floating point state is
// saved and restored.
diff --git a/pkg/sentry/platform/ring0/defs_arm64.go b/pkg/sentry/platform/ring0/defs_arm64.go
index 1583dda12..0e2ab716c 100644
--- a/pkg/sentry/platform/ring0/defs_arm64.go
+++ b/pkg/sentry/platform/ring0/defs_arm64.go
@@ -124,6 +124,12 @@ func (c *CPU) SetAppAddr(value uintptr) {
c.appAddr = value
}
+// GetLazyVFP returns the value of cpacr_el1.
+//go:nosplit
+func (c *CPU) GetLazyVFP() (value uintptr) {
+ return c.lazyVFP
+}
+
// SwitchArchOpts are embedded in SwitchOpts.
type SwitchArchOpts struct {
// UserASID indicates that the application ASID to be used on switch,
diff --git a/pkg/sentry/platform/ring0/entry_amd64.go b/pkg/sentry/platform/ring0/entry_amd64.go
index a5ce67885..7fa43c2f5 100644
--- a/pkg/sentry/platform/ring0/entry_amd64.go
+++ b/pkg/sentry/platform/ring0/entry_amd64.go
@@ -17,7 +17,7 @@
package ring0
import (
- "syscall"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// This is an assembly function.
@@ -41,7 +41,7 @@ func swapgs()
// The return code is the vector that interrupted execution.
//
// See stubs.go for a note regarding the frame size of this function.
-func sysret(*CPU, *syscall.PtraceRegs) Vector
+func sysret(*CPU, *arch.Registers) Vector
// "iret is the cadillac of CPL switching."
//
@@ -50,7 +50,7 @@ func sysret(*CPU, *syscall.PtraceRegs) Vector
// iret is nearly identical to sysret, except an iret is used to fully restore
// all user state. This must be called in cases where all registers need to be
// restored.
-func iret(*CPU, *syscall.PtraceRegs) Vector
+func iret(*CPU, *arch.Registers) Vector
// exception is the generic exception entry.
//
diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD
index 4cae10459..549f3d228 100644
--- a/pkg/sentry/platform/ring0/gen_offsets/BUILD
+++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD
@@ -27,6 +27,7 @@ go_binary(
visibility = ["//pkg/sentry/platform/ring0:__pkg__"],
deps = [
"//pkg/cpuid",
+ "//pkg/sentry/arch",
"//pkg/sentry/platform/ring0/pagetables",
"//pkg/usermem",
],
diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/sentry/platform/ring0/lib_arm64.go
index 242b9305c..444a83913 100644
--- a/pkg/sentry/platform/ring0/lib_arm64.go
+++ b/pkg/sentry/platform/ring0/lib_arm64.go
@@ -20,13 +20,13 @@ package ring0
func CPACREL1() (value uintptr)
// FPCR returns the value of FPCR register.
-func FPCR() (value uintptr)
+func GetFPCR() (value uintptr)
// SetFPCR writes the FPCR value.
func SetFPCR(value uintptr)
// FPSR returns the value of FPSR register.
-func FPSR() (value uintptr)
+func GetFPSR() (value uintptr)
// SetFPSR writes the FPSR value.
func SetFPSR(value uintptr)
diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go
index 85cc3fdad..b8ab120a0 100644
--- a/pkg/sentry/platform/ring0/offsets_amd64.go
+++ b/pkg/sentry/platform/ring0/offsets_amd64.go
@@ -20,7 +20,8 @@ import (
"fmt"
"io"
"reflect"
- "syscall"
+
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// Emit prints architecture-specific offsets.
@@ -64,7 +65,7 @@ func Emit(w io.Writer) {
fmt.Fprintf(w, "#define SyscallInt80 0x%02x\n", SyscallInt80)
fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
- p := &syscall.PtraceRegs{}
+ p := &arch.Registers{}
fmt.Fprintf(w, "\n// Ptrace registers.\n")
fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer())
fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer())
diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/sentry/platform/ring0/offsets_arm64.go
index 057fb5c69..f3de962f0 100644
--- a/pkg/sentry/platform/ring0/offsets_arm64.go
+++ b/pkg/sentry/platform/ring0/offsets_arm64.go
@@ -20,7 +20,8 @@ import (
"fmt"
"io"
"reflect"
- "syscall"
+
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// Emit prints architecture-specific offsets.
@@ -87,7 +88,7 @@ func Emit(w io.Writer) {
fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
- p := &syscall.PtraceRegs{}
+ p := &arch.Registers{}
fmt.Fprintf(w, "\n// Ptrace registers.\n")
fmt.Fprintf(w, "#define PTRACE_R0 0x%02x\n", reflect.ValueOf(&p.Regs[0]).Pointer()-reflect.ValueOf(p).Pointer())
fmt.Fprintf(w, "#define PTRACE_R1 0x%02x\n", reflect.ValueOf(&p.Regs[1]).Pointer()-reflect.ValueOf(p).Pointer())
diff --git a/pkg/sentry/socket/control/BUILD b/pkg/sentry/socket/control/BUILD
index 4d42d29cb..ca16d0381 100644
--- a/pkg/sentry/socket/control/BUILD
+++ b/pkg/sentry/socket/control/BUILD
@@ -4,7 +4,10 @@ package(licenses = ["notice"])
go_library(
name = "control",
- srcs = ["control.go"],
+ srcs = [
+ "control.go",
+ "control_vfs2.go",
+ ],
imports = [
"gvisor.dev/gvisor/pkg/sentry/fs",
],
@@ -18,6 +21,7 @@ go_library(
"//pkg/sentry/kernel/auth",
"//pkg/sentry/socket",
"//pkg/sentry/socket/unix/transport",
+ "//pkg/sentry/vfs",
"//pkg/syserror",
"//pkg/tcpip",
"//pkg/usermem",
diff --git a/pkg/sentry/socket/control/control.go b/pkg/sentry/socket/control/control.go
index 8834a1e1a..8b439a078 100644
--- a/pkg/sentry/socket/control/control.go
+++ b/pkg/sentry/socket/control/control.go
@@ -41,6 +41,8 @@ type SCMCredentials interface {
Credentials(t *kernel.Task) (kernel.ThreadID, auth.UID, auth.GID)
}
+// LINT.IfChange
+
// SCMRights represents a SCM_RIGHTS socket control message.
type SCMRights interface {
transport.RightsControlMessage
@@ -142,6 +144,8 @@ func PackRights(t *kernel.Task, rights SCMRights, cloexec bool, buf []byte, flag
return putCmsg(buf, flags, linux.SCM_RIGHTS, align, fds)
}
+// LINT.ThenChange(./control_vfs2.go)
+
// scmCredentials represents an SCM_CREDENTIALS socket control message.
//
// +stateify savable
@@ -537,11 +541,19 @@ func Parse(t *kernel.Task, socketOrEndpoint interface{}, buf []byte) (socket.Con
}
if len(fds) > 0 {
- rights, err := NewSCMRights(t, fds)
- if err != nil {
- return socket.ControlMessages{}, err
+ if kernel.VFS2Enabled {
+ rights, err := NewSCMRightsVFS2(t, fds)
+ if err != nil {
+ return socket.ControlMessages{}, err
+ }
+ cmsgs.Unix.Rights = rights
+ } else {
+ rights, err := NewSCMRights(t, fds)
+ if err != nil {
+ return socket.ControlMessages{}, err
+ }
+ cmsgs.Unix.Rights = rights
}
- cmsgs.Unix.Rights = rights
}
return cmsgs, nil
@@ -566,6 +578,8 @@ func MakeCreds(t *kernel.Task) SCMCredentials {
return &scmCredentials{t, tcred.EffectiveKUID, tcred.EffectiveKGID}
}
+// LINT.IfChange
+
// New creates default control messages if needed.
func New(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRights) transport.ControlMessages {
return transport.ControlMessages{
@@ -573,3 +587,5 @@ func New(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRights) transpo
Rights: rights,
}
}
+
+// LINT.ThenChange(./control_vfs2.go)
diff --git a/pkg/sentry/socket/control/control_vfs2.go b/pkg/sentry/socket/control/control_vfs2.go
new file mode 100644
index 000000000..fd08179be
--- /dev/null
+++ b/pkg/sentry/socket/control/control_vfs2.go
@@ -0,0 +1,131 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package control
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// SCMRightsVFS2 represents a SCM_RIGHTS socket control message.
+type SCMRightsVFS2 interface {
+ transport.RightsControlMessage
+
+ // Files returns up to max RightsFiles.
+ //
+ // Returned files are consumed and ownership is transferred to the caller.
+ // Subsequent calls to Files will return the next files.
+ Files(ctx context.Context, max int) (rf RightsFilesVFS2, truncated bool)
+}
+
+// RightsFiles represents a SCM_RIGHTS socket control message. A reference is
+// maintained for each vfs.FileDescription and is release either when an FD is created or
+// when the Release method is called.
+type RightsFilesVFS2 []*vfs.FileDescription
+
+// NewSCMRightsVFS2 creates a new SCM_RIGHTS socket control message
+// representation using local sentry FDs.
+func NewSCMRightsVFS2(t *kernel.Task, fds []int32) (SCMRightsVFS2, error) {
+ files := make(RightsFilesVFS2, 0, len(fds))
+ for _, fd := range fds {
+ file := t.GetFileVFS2(fd)
+ if file == nil {
+ files.Release()
+ return nil, syserror.EBADF
+ }
+ files = append(files, file)
+ }
+ return &files, nil
+}
+
+// Files implements SCMRights.Files.
+func (fs *RightsFilesVFS2) Files(ctx context.Context, max int) (RightsFilesVFS2, bool) {
+ n := max
+ var trunc bool
+ if l := len(*fs); n > l {
+ n = l
+ } else if n < l {
+ trunc = true
+ }
+ rf := (*fs)[:n]
+ *fs = (*fs)[n:]
+ return rf, trunc
+}
+
+// Clone implements transport.RightsControlMessage.Clone.
+func (fs *RightsFilesVFS2) Clone() transport.RightsControlMessage {
+ nfs := append(RightsFilesVFS2(nil), *fs...)
+ for _, nf := range nfs {
+ nf.IncRef()
+ }
+ return &nfs
+}
+
+// Release implements transport.RightsControlMessage.Release.
+func (fs *RightsFilesVFS2) Release() {
+ for _, f := range *fs {
+ f.DecRef()
+ }
+ *fs = nil
+}
+
+// rightsFDsVFS2 gets up to the specified maximum number of FDs.
+func rightsFDsVFS2(t *kernel.Task, rights SCMRightsVFS2, cloexec bool, max int) ([]int32, bool) {
+ files, trunc := rights.Files(t, max)
+ fds := make([]int32, 0, len(files))
+ for i := 0; i < max && len(files) > 0; i++ {
+ fd, err := t.NewFDFromVFS2(0, files[0], kernel.FDFlags{
+ CloseOnExec: cloexec,
+ })
+ files[0].DecRef()
+ files = files[1:]
+ if err != nil {
+ t.Warningf("Error inserting FD: %v", err)
+ // This is what Linux does.
+ break
+ }
+
+ fds = append(fds, int32(fd))
+ }
+ return fds, trunc
+}
+
+// PackRightsVFS2 packs as many FDs as will fit into the unused capacity of buf.
+func PackRightsVFS2(t *kernel.Task, rights SCMRightsVFS2, cloexec bool, buf []byte, flags int) ([]byte, int) {
+ maxFDs := (cap(buf) - len(buf) - linux.SizeOfControlMessageHeader) / 4
+ // Linux does not return any FDs if none fit.
+ if maxFDs <= 0 {
+ flags |= linux.MSG_CTRUNC
+ return buf, flags
+ }
+ fds, trunc := rightsFDsVFS2(t, rights, cloexec, maxFDs)
+ if trunc {
+ flags |= linux.MSG_CTRUNC
+ }
+ align := t.Arch().Width()
+ return putCmsg(buf, flags, linux.SCM_RIGHTS, align, fds)
+}
+
+// NewVFS2 creates default control messages if needed.
+func NewVFS2(t *kernel.Task, socketOrEndpoint interface{}, rights SCMRightsVFS2) transport.ControlMessages {
+ return transport.ControlMessages{
+ Credentials: makeCreds(t, socketOrEndpoint),
+ Rights: rights,
+ }
+}
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 55c0f04f3..ff1cfd8f6 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -121,13 +121,12 @@ func (tm *TCPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceNa
tcpHeader = header.TCP(pkt.TransportHeader)
} else {
// The TCP header hasn't been parsed yet. We have to do it here.
- hdr, ok := pkt.Data.PullUp(header.TCPMinimumSize)
- if !ok {
+ if len(pkt.Data.First()) < header.TCPMinimumSize {
// There's no valid TCP header here, so we hotdrop the
// packet.
return false, true
}
- tcpHeader = header.TCP(hdr)
+ tcpHeader = header.TCP(pkt.Data.First())
}
// Check whether the source and destination ports are within the
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index 04d03d494..3359418c1 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -120,13 +120,12 @@ func (um *UDPMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interfaceNa
udpHeader = header.UDP(pkt.TransportHeader)
} else {
// The UDP header hasn't been parsed yet. We have to do it here.
- hdr, ok := pkt.Data.PullUp(header.UDPMinimumSize)
- if !ok {
+ if len(pkt.Data.First()) < header.UDPMinimumSize {
// There's no valid UDP header here, so we hotdrop the
// packet.
return false, true
}
- udpHeader = header.UDP(hdr)
+ udpHeader = header.UDP(pkt.Data.First())
}
// Check whether the source and destination ports are within the
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index de2cc4bdf..941a91097 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -21,6 +21,7 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
+ "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/time",
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 3e54d49c4..315dc274f 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -19,6 +19,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket/control"
@@ -42,30 +43,44 @@ type SocketVFS2 struct {
socketOpsCommon
}
-// NewVFS2File creates and returns a new vfs.FileDescription for a unix socket.
-func NewVFS2File(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
- sock := NewFDImpl(ep, stype)
- vfsfd := &sock.vfsfd
- if err := sockfs.InitSocket(sock, vfsfd, t.Kernel().SocketMount(), t.Credentials()); err != nil {
+// NewSockfsFile creates a new socket file in the global sockfs mount and
+// returns a corresponding file description.
+func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
+ mnt := t.Kernel().SocketMount()
+ fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
+ d := sockfs.NewDentry(t.Credentials(), fs.NextIno())
+
+ fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d)
+ if err != nil {
return nil, syserr.FromError(err)
}
- return vfsfd, nil
+ return fd, nil
}
-// NewFDImpl creates and returns a new SocketVFS2.
-func NewFDImpl(ep transport.Endpoint, stype linux.SockType) *SocketVFS2 {
+// NewFileDescription creates and returns a socket file description
+// corresponding to the given mount and dentry.
+func NewFileDescription(ep transport.Endpoint, stype linux.SockType, flags uint32, mnt *vfs.Mount, d *vfs.Dentry) (*vfs.FileDescription, error) {
// You can create AF_UNIX, SOCK_RAW sockets. They're the same as
// SOCK_DGRAM and don't require CAP_NET_RAW.
if stype == linux.SOCK_RAW {
stype = linux.SOCK_DGRAM
}
- return &SocketVFS2{
+ sock := &SocketVFS2{
socketOpsCommon: socketOpsCommon{
ep: ep,
stype: stype,
},
}
+ vfsfd := &sock.vfsfd
+ if err := vfsfd.Init(sock, flags, mnt, d, &vfs.FileDescriptionOptions{
+ DenyPRead: true,
+ DenyPWrite: true,
+ UseDentryMetadata: true,
+ }); err != nil {
+ return nil, err
+ }
+ return vfsfd, nil
}
// GetSockOpt implements the linux syscall getsockopt(2) for sockets backed by
@@ -112,8 +127,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block
}
}
- // We expect this to be a FileDescription here.
- ns, err := NewVFS2File(t, ep, s.stype)
+ ns, err := NewSockfsFile(t, ep, s.stype)
if err != nil {
return 0, nil, 0, err
}
@@ -307,7 +321,7 @@ func (*providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int)
return nil, syserr.ErrInvalidArgument
}
- f, err := NewVFS2File(t, ep, stype)
+ f, err := NewSockfsFile(t, ep, stype)
if err != nil {
ep.Close()
return nil, err
@@ -331,13 +345,13 @@ func (*providerVFS2) Pair(t *kernel.Task, stype linux.SockType, protocol int) (*
// Create the endpoints and sockets.
ep1, ep2 := transport.NewPair(t, stype, t.Kernel())
- s1, err := NewVFS2File(t, ep1, stype)
+ s1, err := NewSockfsFile(t, ep1, stype)
if err != nil {
ep1.Close()
ep2.Close()
return nil, nil, err
}
- s2, err := NewVFS2File(t, ep2, stype)
+ s2, err := NewSockfsFile(t, ep2, stype)
if err != nil {
s1.DecRef()
ep2.Close()
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index 7e1747a0c..582d37e03 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -355,7 +355,7 @@ func Pause(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
func RtSigpending(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
addr := args[0].Pointer()
pending := t.PendingSignals()
- _, err := t.CopyOut(addr, pending)
+ _, err := pending.CopyOut(t, addr)
return 0, nil, err
}
@@ -392,7 +392,7 @@ func RtSigtimedwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
if siginfo != 0 {
si.FixSignalCodeForUser()
- if _, err := t.CopyOut(siginfo, si); err != nil {
+ if _, err := si.CopyOut(t, siginfo); err != nil {
return 0, nil, err
}
}
@@ -411,7 +411,7 @@ func RtSigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
// same way), and that the code is in the allowed set. This same logic
// appears below in RtSigtgqueueinfo and should be kept in sync.
var info arch.SignalInfo
- if _, err := t.CopyIn(infoAddr, &info); err != nil {
+ if _, err := info.CopyIn(t, infoAddr); err != nil {
return 0, nil, err
}
info.Signo = int32(sig)
@@ -455,7 +455,7 @@ func RtTgsigqueueinfo(t *kernel.Task, args arch.SyscallArguments) (uintptr, *ker
// Copy in the info. See RtSigqueueinfo above.
var info arch.SignalInfo
- if _, err := t.CopyIn(infoAddr, &info); err != nil {
+ if _, err := info.CopyIn(t, infoAddr); err != nil {
return 0, nil, err
}
info.Signo = int32(sig)
@@ -485,7 +485,7 @@ func RtSigsuspend(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.
// Copy in the signal mask.
var mask linux.SignalSet
- if _, err := t.CopyIn(sigset, &mask); err != nil {
+ if _, err := mask.CopyIn(t, sigset); err != nil {
return 0, nil, err
}
mask &^= kernel.UnblockableSignals
diff --git a/pkg/sentry/syscalls/linux/vfs2/socket.go b/pkg/sentry/syscalls/linux/vfs2/socket.go
index b1ede32f0..10b668477 100644
--- a/pkg/sentry/syscalls/linux/vfs2/socket.go
+++ b/pkg/sentry/syscalls/linux/vfs2/socket.go
@@ -804,7 +804,7 @@ func recvSingleMsg(t *kernel.Task, s socket.SocketVFS2, msgPtr usermem.Addr, fla
}
if cms.Unix.Rights != nil {
- controlData, mflags = control.PackRights(t, cms.Unix.Rights.(control.SCMRights), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags)
+ controlData, mflags = control.PackRightsVFS2(t, cms.Unix.Rights.(control.SCMRightsVFS2), flags&linux.MSG_CMSG_CLOEXEC != 0, controlData, mflags)
}
// Copy the address to the caller.
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index fcc46420f..101497ed6 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -255,7 +255,7 @@ func (w *Watchdog) runTurn() {
case <-done:
case <-time.After(w.TaskTimeout):
// Report if the watchdog is not making progress.
- // No one is wathching the watchdog watcher though.
+ // No one is watching the watchdog watcher though.
w.reportStuckWatchdog()
<-done
}
@@ -317,10 +317,8 @@ func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound boo
buf.WriteString("Search for '(*Task).run(0x..., 0x<tid>)' in the stack dump to find the offending goroutine")
- // Dump stack only if a new task is detected or if it sometime has
- // passed since the last time a stack dump was generated.
- showStack := newTaskFound || time.Since(w.lastStackDump) >= stackDumpSameTaskPeriod
- w.doAction(w.TaskTimeoutAction, showStack, &buf)
+ // Force stack dump only if a new task is detected.
+ w.doAction(w.TaskTimeoutAction, newTaskFound, &buf)
}
func (w *Watchdog) reportStuckWatchdog() {
@@ -329,12 +327,15 @@ func (w *Watchdog) reportStuckWatchdog() {
w.doAction(w.TaskTimeoutAction, false, &buf)
}
-// doAction will take the given action. If the action is LogWarning and
-// showStack is false, then the stack printing will be skipped.
-func (w *Watchdog) doAction(action Action, showStack bool, msg *bytes.Buffer) {
+// doAction will take the given action. If the action is LogWarning, the stack
+// is not always dumpped to the log to prevent log flooding. "forceStack"
+// guarantees that the stack will be dumped regarless.
+func (w *Watchdog) doAction(action Action, forceStack bool, msg *bytes.Buffer) {
switch action {
case LogWarning:
- if !showStack {
+ // Dump stack only if forced or sometime has passed since the last time a
+ // stack dump was generated.
+ if !forceStack && time.Since(w.lastStackDump) < stackDumpSameTaskPeriod {
msg.WriteString("\n...[stack dump skipped]...")
log.Warningf(msg.String())
return
@@ -359,6 +360,7 @@ func (w *Watchdog) doAction(action Action, showStack bool, msg *bytes.Buffer) {
case <-time.After(1 * time.Second):
}
panic(fmt.Sprintf("%s\nStack for running G's are skipped while panicking.", msg.String()))
+
default:
panic(fmt.Sprintf("Unknown watchdog action %v", action))
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index f01217c91..8ec5d5d5c 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -77,8 +77,7 @@ func NewVectorisedView(size int, views []View) VectorisedView {
return VectorisedView{views: views, size: size}
}
-// TrimFront removes the first "count" bytes of the vectorised view. It panics
-// if count > vv.Size().
+// TrimFront removes the first "count" bytes of the vectorised view.
func (vv *VectorisedView) TrimFront(count int) {
for count > 0 && len(vv.views) > 0 {
if count < len(vv.views[0]) {
@@ -87,7 +86,7 @@ func (vv *VectorisedView) TrimFront(count int) {
return
}
count -= len(vv.views[0])
- vv.removeFirst()
+ vv.RemoveFirst()
}
}
@@ -105,7 +104,7 @@ func (vv *VectorisedView) Read(v View) (copied int, err error) {
count -= len(vv.views[0])
copy(v[copied:], vv.views[0])
copied += len(vv.views[0])
- vv.removeFirst()
+ vv.RemoveFirst()
}
if copied == 0 {
return 0, io.EOF
@@ -127,7 +126,7 @@ func (vv *VectorisedView) ReadToVV(dstVV *VectorisedView, count int) (copied int
count -= len(vv.views[0])
dstVV.AppendView(vv.views[0])
copied += len(vv.views[0])
- vv.removeFirst()
+ vv.RemoveFirst()
}
return copied
}
@@ -163,37 +162,22 @@ func (vv *VectorisedView) Clone(buffer []View) VectorisedView {
return VectorisedView{views: append(buffer[:0], vv.views...), size: vv.size}
}
-// PullUp returns the first "count" bytes of the vectorised view. If those
-// bytes aren't already contiguous inside the vectorised view, PullUp will
-// reallocate as needed to make them contiguous. PullUp fails and returns false
-// when count > vv.Size().
-func (vv *VectorisedView) PullUp(count int) (View, bool) {
+// First returns the first view of the vectorised view.
+func (vv *VectorisedView) First() View {
if len(vv.views) == 0 {
- return nil, count == 0
- }
- if count <= len(vv.views[0]) {
- return vv.views[0][:count], true
- }
- if count > vv.size {
- return nil, false
+ return nil
}
+ return vv.views[0]
+}
- newFirst := NewView(count)
- i := 0
- for offset := 0; offset < count; i++ {
- copy(newFirst[offset:], vv.views[i])
- if count-offset < len(vv.views[i]) {
- vv.views[i].TrimFront(count - offset)
- break
- }
- offset += len(vv.views[i])
- vv.views[i] = nil
+// RemoveFirst removes the first view of the vectorised view.
+func (vv *VectorisedView) RemoveFirst() {
+ if len(vv.views) == 0 {
+ return
}
- // We're guaranteed that i > 0, since count is too large for the first
- // view.
- vv.views[i-1] = newFirst
- vv.views = vv.views[i-1:]
- return newFirst, true
+ vv.size -= len(vv.views[0])
+ vv.views[0] = nil
+ vv.views = vv.views[1:]
}
// Size returns the size in bytes of the entire content stored in the vectorised view.
@@ -241,10 +225,3 @@ func (vv *VectorisedView) Readers() []bytes.Reader {
}
return readers
}
-
-// removeFirst panics when len(vv.views) < 1.
-func (vv *VectorisedView) removeFirst() {
- vv.size -= len(vv.views[0])
- vv.views[0] = nil
- vv.views = vv.views[1:]
-}
diff --git a/pkg/tcpip/buffer/view_test.go b/pkg/tcpip/buffer/view_test.go
index c56795c7b..106e1994c 100644
--- a/pkg/tcpip/buffer/view_test.go
+++ b/pkg/tcpip/buffer/view_test.go
@@ -16,7 +16,6 @@
package buffer
import (
- "bytes"
"reflect"
"testing"
)
@@ -371,115 +370,3 @@ func TestVVRead(t *testing.T) {
})
}
}
-
-var pullUpTestCases = []struct {
- comment string
- in VectorisedView
- count int
- want []byte
- result VectorisedView
- ok bool
-}{
- {
- comment: "simple case",
- in: vv(2, "12"),
- count: 1,
- want: []byte("1"),
- result: vv(2, "12"),
- ok: true,
- },
- {
- comment: "entire View",
- in: vv(2, "1", "2"),
- count: 1,
- want: []byte("1"),
- result: vv(2, "1", "2"),
- ok: true,
- },
- {
- comment: "spanning across two Views",
- in: vv(3, "1", "23"),
- count: 2,
- want: []byte("12"),
- result: vv(3, "12", "3"),
- ok: true,
- },
- {
- comment: "spanning across all Views",
- in: vv(5, "1", "23", "45"),
- count: 5,
- want: []byte("12345"),
- result: vv(5, "12345"),
- ok: true,
- },
- {
- comment: "count = 0",
- in: vv(1, "1"),
- count: 0,
- want: []byte{},
- result: vv(1, "1"),
- ok: true,
- },
- {
- comment: "count = size",
- in: vv(1, "1"),
- count: 1,
- want: []byte("1"),
- result: vv(1, "1"),
- ok: true,
- },
- {
- comment: "count too large",
- in: vv(3, "1", "23"),
- count: 4,
- want: nil,
- result: vv(3, "1", "23"),
- ok: false,
- },
- {
- comment: "empty vv",
- in: vv(0, ""),
- count: 1,
- want: nil,
- result: vv(0, ""),
- ok: false,
- },
- {
- comment: "empty vv, count = 0",
- in: vv(0, ""),
- count: 0,
- want: nil,
- result: vv(0, ""),
- ok: true,
- },
- {
- comment: "empty views",
- in: vv(3, "", "1", "", "23"),
- count: 2,
- want: []byte("12"),
- result: vv(3, "12", "3"),
- ok: true,
- },
-}
-
-func TestPullUp(t *testing.T) {
- for _, c := range pullUpTestCases {
- got, ok := c.in.PullUp(c.count)
-
- // Is the return value right?
- if ok != c.ok {
- t.Errorf("Test %q failed when calling PullUp(%d) on %v. Got an ok of %t. Want %t",
- c.comment, c.count, c.in, ok, c.ok)
- }
- if bytes.Compare(got, View(c.want)) != 0 {
- t.Errorf("Test %q failed when calling PullUp(%d) on %v. Got %v. Want %v",
- c.comment, c.count, c.in, got, c.want)
- }
-
- // Is the underlying structure right?
- if !reflect.DeepEqual(c.in, c.result) {
- t.Errorf("Test %q failed when calling PullUp(%d). Got vv with structure %v. Wanted %v",
- c.comment, c.count, c.in, c.result)
- }
- }
-}
diff --git a/pkg/tcpip/header/ipv6.go b/pkg/tcpip/header/ipv6.go
index 76e88e9b3..ba80b64a8 100644
--- a/pkg/tcpip/header/ipv6.go
+++ b/pkg/tcpip/header/ipv6.go
@@ -27,7 +27,9 @@ const (
// IPv6PayloadLenOffset is the offset of the PayloadLength field in
// IPv6 header.
IPv6PayloadLenOffset = 4
- nextHdr = 6
+ // IPv6NextHeaderOffset is the offset of the NextHeader field in
+ // IPv6 header.
+ IPv6NextHeaderOffset = 6
hopLimit = 7
v6SrcAddr = 8
v6DstAddr = v6SrcAddr + IPv6AddressSize
@@ -163,7 +165,7 @@ func (b IPv6) HopLimit() uint8 {
// NextHeader returns the value of the "next header" field of the ipv6 header.
func (b IPv6) NextHeader() uint8 {
- return b[nextHdr]
+ return b[IPv6NextHeaderOffset]
}
// TransportProtocol implements Network.TransportProtocol.
@@ -223,7 +225,7 @@ func (b IPv6) SetDestinationAddress(addr tcpip.Address) {
// SetNextHeader sets the value of the "next header" field of the ipv6 header.
func (b IPv6) SetNextHeader(v uint8) {
- b[nextHdr] = v
+ b[IPv6NextHeaderOffset] = v
}
// SetChecksum implements Network.SetChecksum. Given that IPv6 doesn't have a
@@ -235,7 +237,7 @@ func (IPv6) SetChecksum(uint16) {
func (b IPv6) Encode(i *IPv6Fields) {
b.SetTOS(i.TrafficClass, i.FlowLabel)
b.SetPayloadLength(i.PayloadLength)
- b[nextHdr] = i.NextHeader
+ b[IPv6NextHeaderOffset] = i.NextHeader
b[hopLimit] = i.HopLimit
b.SetSourceAddress(i.SrcAddr)
b.SetDestinationAddress(i.DstAddr)
diff --git a/pkg/tcpip/link/loopback/loopback.go b/pkg/tcpip/link/loopback/loopback.go
index 073c84ef9..1e2255bfa 100644
--- a/pkg/tcpip/link/loopback/loopback.go
+++ b/pkg/tcpip/link/loopback/loopback.go
@@ -98,13 +98,13 @@ func (e *endpoint) WritePackets(*stack.Route, *stack.GSO, stack.PacketBufferList
// WriteRawPacket implements stack.LinkEndpoint.WriteRawPacket.
func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
- // There should be an ethernet header at the beginning of vv.
- hdr, ok := vv.PullUp(header.EthernetMinimumSize)
- if !ok {
- // Reject the packet if it's shorter than an ethernet header.
+ // Reject the packet if it's shorter than an ethernet header.
+ if vv.Size() < header.EthernetMinimumSize {
return tcpip.ErrBadAddress
}
- linkHeader := header.Ethernet(hdr)
+
+ // There should be an ethernet header at the beginning of vv.
+ linkHeader := header.Ethernet(vv.First()[:header.EthernetMinimumSize])
vv.TrimFront(len(linkHeader))
e.dispatcher.DeliverNetworkPacket(e, "" /* remote */, "" /* local */, linkHeader.Type(), stack.PacketBuffer{
Data: vv,
diff --git a/pkg/tcpip/link/rawfile/BUILD b/pkg/tcpip/link/rawfile/BUILD
index 9cc08d0e2..14b527bc2 100644
--- a/pkg/tcpip/link/rawfile/BUILD
+++ b/pkg/tcpip/link/rawfile/BUILD
@@ -1,4 +1,4 @@
-load("//tools:defs.bzl", "go_library", "go_test")
+load("//tools:defs.bzl", "go_library")
package(licenses = ["notice"])
@@ -18,10 +18,3 @@ go_library(
"@org_golang_x_sys//unix:go_default_library",
],
)
-
-go_test(
- name = "rawfile_test",
- size = "small",
- srcs = ["rawfile_test.go"],
- library = ":rawfile",
-)
diff --git a/pkg/tcpip/link/rawfile/rawfile_test.go b/pkg/tcpip/link/rawfile/rawfile_test.go
deleted file mode 100644
index 8f14ba761..000000000
--- a/pkg/tcpip/link/rawfile/rawfile_test.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build linux
-
-package rawfile
-
-import (
- "syscall"
- "testing"
-)
-
-func TestNonBlockingWrite3ZeroLength(t *testing.T) {
- fd, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
- if err != nil {
- t.Fatalf("failed to open /dev/null: %v", err)
- }
- defer syscall.Close(fd)
-
- if err := NonBlockingWrite3(fd, []byte{}, []byte{0}, nil); err != nil {
- t.Fatalf("failed to write: %v", err)
- }
-}
-
-func TestNonBlockingWrite3Nil(t *testing.T) {
- fd, err := syscall.Open("/dev/null", syscall.O_WRONLY, 0)
- if err != nil {
- t.Fatalf("failed to open /dev/null: %v", err)
- }
- defer syscall.Close(fd)
-
- if err := NonBlockingWrite3(fd, nil, []byte{0}, nil); err != nil {
- t.Fatalf("failed to write: %v", err)
- }
-}
diff --git a/pkg/tcpip/link/rawfile/rawfile_unsafe.go b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
index 92efd0bf8..44e25d475 100644
--- a/pkg/tcpip/link/rawfile/rawfile_unsafe.go
+++ b/pkg/tcpip/link/rawfile/rawfile_unsafe.go
@@ -76,13 +76,9 @@ func NonBlockingWrite3(fd int, b1, b2, b3 []byte) *tcpip.Error {
// We have two buffers. Build the iovec that represents them and issue
// a writev syscall.
- var base *byte
- if len(b1) > 0 {
- base = &b1[0]
- }
iovec := [3]syscall.Iovec{
{
- Base: base,
+ Base: &b1[0],
Len: uint64(len(b1)),
},
{
diff --git a/pkg/tcpip/link/sharedmem/sharedmem_test.go b/pkg/tcpip/link/sharedmem/sharedmem_test.go
index 33f640b85..27ea3f531 100644
--- a/pkg/tcpip/link/sharedmem/sharedmem_test.go
+++ b/pkg/tcpip/link/sharedmem/sharedmem_test.go
@@ -674,7 +674,7 @@ func TestSimpleReceive(t *testing.T) {
// Wait for packet to be received, then check it.
c.waitForPackets(1, time.After(5*time.Second), "Timeout waiting for packet")
c.mu.Lock()
- rcvd := []byte(c.packets[0].vv.ToView())
+ rcvd := []byte(c.packets[0].vv.First())
c.packets = c.packets[:0]
c.mu.Unlock()
diff --git a/pkg/tcpip/link/sniffer/sniffer.go b/pkg/tcpip/link/sniffer/sniffer.go
index 0799c8f4d..be2537a82 100644
--- a/pkg/tcpip/link/sniffer/sniffer.go
+++ b/pkg/tcpip/link/sniffer/sniffer.go
@@ -171,7 +171,11 @@ func (e *endpoint) GSOMaxSize() uint32 {
func (e *endpoint) dumpPacket(prefix string, gso *stack.GSO, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) {
writer := e.writer
if writer == nil && atomic.LoadUint32(&LogPackets) == 1 {
- logPacket(prefix, protocol, pkt, gso)
+ first := pkt.Header.View()
+ if len(first) == 0 {
+ first = pkt.Data.First()
+ }
+ logPacket(prefix, protocol, first, gso)
}
if writer != nil && atomic.LoadUint32(&LogPacketsToPCAP) == 1 {
totalLength := pkt.Header.UsedLength() + pkt.Data.Size()
@@ -234,7 +238,7 @@ func (e *endpoint) WriteRawPacket(vv buffer.VectorisedView) *tcpip.Error {
// Wait implements stack.LinkEndpoint.Wait.
func (e *endpoint) Wait() { e.lower.Wait() }
-func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer, gso *stack.GSO) {
+func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, b buffer.View, gso *stack.GSO) {
// Figure out the network layer info.
var transProto uint8
src := tcpip.Address("unknown")
@@ -243,49 +247,28 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
size := uint16(0)
var fragmentOffset uint16
var moreFragments bool
-
- // Create a clone of pkt, including any headers if present. Avoid allocating
- // backing memory for the clone.
- views := [8]buffer.View{}
- vv := buffer.NewVectorisedView(0, views[:0])
- vv.AppendView(pkt.Header.View())
- vv.Append(pkt.Data)
-
switch protocol {
case header.IPv4ProtocolNumber:
- hdr, ok := vv.PullUp(header.IPv4MinimumSize)
- if !ok {
- return
- }
- ipv4 := header.IPv4(hdr)
+ ipv4 := header.IPv4(b)
fragmentOffset = ipv4.FragmentOffset()
moreFragments = ipv4.Flags()&header.IPv4FlagMoreFragments == header.IPv4FlagMoreFragments
src = ipv4.SourceAddress()
dst = ipv4.DestinationAddress()
transProto = ipv4.Protocol()
size = ipv4.TotalLength() - uint16(ipv4.HeaderLength())
- vv.TrimFront(int(ipv4.HeaderLength()))
+ b = b[ipv4.HeaderLength():]
id = int(ipv4.ID())
case header.IPv6ProtocolNumber:
- hdr, ok := vv.PullUp(header.IPv6MinimumSize)
- if !ok {
- return
- }
- ipv6 := header.IPv6(hdr)
+ ipv6 := header.IPv6(b)
src = ipv6.SourceAddress()
dst = ipv6.DestinationAddress()
transProto = ipv6.NextHeader()
size = ipv6.PayloadLength()
- vv.TrimFront(header.IPv6MinimumSize)
+ b = b[header.IPv6MinimumSize:]
case header.ARPProtocolNumber:
- hdr, ok := vv.PullUp(header.ARPSize)
- if !ok {
- return
- }
- vv.TrimFront(header.ARPSize)
- arp := header.ARP(hdr)
+ arp := header.ARP(b)
log.Infof(
"%s arp %v (%v) -> %v (%v) valid:%v",
prefix,
@@ -301,7 +284,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
// We aren't guaranteed to have a transport header - it's possible for
// writes via raw endpoints to contain only network headers.
- if minSize, ok := transportProtocolMinSizes[tcpip.TransportProtocolNumber(transProto)]; ok && vv.Size() < minSize {
+ if minSize, ok := transportProtocolMinSizes[tcpip.TransportProtocolNumber(transProto)]; ok && len(b) < minSize {
log.Infof("%s %v -> %v transport protocol: %d, but no transport header found (possible raw packet)", prefix, src, dst, transProto)
return
}
@@ -314,11 +297,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
switch tcpip.TransportProtocolNumber(transProto) {
case header.ICMPv4ProtocolNumber:
transName = "icmp"
- hdr, ok := vv.PullUp(header.ICMPv4MinimumSize)
- if !ok {
- break
- }
- icmp := header.ICMPv4(hdr)
+ icmp := header.ICMPv4(b)
icmpType := "unknown"
if fragmentOffset == 0 {
switch icmp.Type() {
@@ -351,11 +330,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
case header.ICMPv6ProtocolNumber:
transName = "icmp"
- hdr, ok := vv.PullUp(header.ICMPv6MinimumSize)
- if !ok {
- break
- }
- icmp := header.ICMPv6(hdr)
+ icmp := header.ICMPv6(b)
icmpType := "unknown"
switch icmp.Type() {
case header.ICMPv6DstUnreachable:
@@ -386,11 +361,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
case header.UDPProtocolNumber:
transName = "udp"
- hdr, ok := vv.PullUp(header.UDPMinimumSize)
- if !ok {
- break
- }
- udp := header.UDP(hdr)
+ udp := header.UDP(b)
if fragmentOffset == 0 && len(udp) >= header.UDPMinimumSize {
srcPort = udp.SourcePort()
dstPort = udp.DestinationPort()
@@ -400,11 +371,7 @@ func logPacket(prefix string, protocol tcpip.NetworkProtocolNumber, pkt *stack.P
case header.TCPProtocolNumber:
transName = "tcp"
- hdr, ok := vv.PullUp(header.TCPMinimumSize)
- if !ok {
- break
- }
- tcp := header.TCP(hdr)
+ tcp := header.TCP(b)
if fragmentOffset == 0 && len(tcp) >= header.TCPMinimumSize {
offset := int(tcp.DataOffset())
if offset < header.TCPMinimumSize {
diff --git a/pkg/tcpip/network/arp/arp.go b/pkg/tcpip/network/arp/arp.go
index cf73a939e..7acbfa0a8 100644
--- a/pkg/tcpip/network/arp/arp.go
+++ b/pkg/tcpip/network/arp/arp.go
@@ -93,10 +93,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuf
}
func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
- v, ok := pkt.Data.PullUp(header.ARPSize)
- if !ok {
- return
- }
+ v := pkt.Data.First()
h := header.ARP(v)
if !h.IsValid() {
return
diff --git a/pkg/tcpip/network/ipv4/icmp.go b/pkg/tcpip/network/ipv4/icmp.go
index 4cbefe5ab..c4bf1ba5c 100644
--- a/pkg/tcpip/network/ipv4/icmp.go
+++ b/pkg/tcpip/network/ipv4/icmp.go
@@ -25,11 +25,7 @@ import (
// used to find out which transport endpoint must be notified about the ICMP
// packet.
func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
- h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
- if !ok {
- return
- }
- hdr := header.IPv4(h)
+ h := header.IPv4(pkt.Data.First())
// We don't use IsValid() here because ICMP only requires that the IP
// header plus 8 bytes of the transport header be included. So it's
@@ -38,12 +34,12 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
//
// Drop packet if it doesn't have the basic IPv4 header or if the
// original source address doesn't match the endpoint's address.
- if hdr.SourceAddress() != e.id.LocalAddress {
+ if len(h) < header.IPv4MinimumSize || h.SourceAddress() != e.id.LocalAddress {
return
}
- hlen := int(hdr.HeaderLength())
- if pkt.Data.Size() < hlen || hdr.FragmentOffset() != 0 {
+ hlen := int(h.HeaderLength())
+ if pkt.Data.Size() < hlen || h.FragmentOffset() != 0 {
// We won't be able to handle this if it doesn't contain the
// full IPv4 header, or if it's a fragment not at offset 0
// (because it won't have the transport header).
@@ -52,15 +48,15 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
// Skip the ip header, then deliver control message.
pkt.Data.TrimFront(hlen)
- p := hdr.TransportProtocol()
- e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+ p := h.TransportProtocol()
+ e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
}
func (e *endpoint) handleICMP(r *stack.Route, pkt stack.PacketBuffer) {
stats := r.Stats()
received := stats.ICMP.V4PacketsReceived
- v, ok := pkt.Data.PullUp(header.ICMPv4MinimumSize)
- if !ok {
+ v := pkt.Data.First()
+ if len(v) < header.ICMPv4MinimumSize {
received.Invalid.Increment()
return
}
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 17202cc7a..104aafbed 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -328,11 +328,7 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffer) *tcpip.Error {
// The packet already has an IP header, but there are a few required
// checks.
- h, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
- if !ok {
- return tcpip.ErrInvalidOptionValue
- }
- ip := header.IPv4(h)
+ ip := header.IPv4(pkt.Data.First())
if !ip.IsValid(pkt.Data.Size()) {
return tcpip.ErrInvalidOptionValue
}
@@ -382,11 +378,7 @@ func (e *endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuf
// HandlePacket is called by the link layer when new ipv4 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
- headerView, ok := pkt.Data.PullUp(header.IPv4MinimumSize)
- if !ok {
- r.Stats().IP.MalformedPacketsReceived.Increment()
- return
- }
+ headerView := pkt.Data.First()
h := header.IPv4(headerView)
if !h.IsValid(pkt.Data.Size()) {
r.Stats().IP.MalformedPacketsReceived.Increment()
diff --git a/pkg/tcpip/network/ipv6/icmp.go b/pkg/tcpip/network/ipv6/icmp.go
index bdf3a0d25..b68983d10 100644
--- a/pkg/tcpip/network/ipv6/icmp.go
+++ b/pkg/tcpip/network/ipv6/icmp.go
@@ -28,11 +28,7 @@ import (
// used to find out which transport endpoint must be notified about the ICMP
// packet.
func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.PacketBuffer) {
- h, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
- if !ok {
- return
- }
- hdr := header.IPv6(h)
+ h := header.IPv6(pkt.Data.First())
// We don't use IsValid() here because ICMP only requires that up to
// 1280 bytes of the original packet be included. So it's likely that it
@@ -40,21 +36,17 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
//
// Drop packet if it doesn't have the basic IPv6 header or if the
// original source address doesn't match the endpoint's address.
- if hdr.SourceAddress() != e.id.LocalAddress {
+ if len(h) < header.IPv6MinimumSize || h.SourceAddress() != e.id.LocalAddress {
return
}
// Skip the IP header, then handle the fragmentation header if there
// is one.
pkt.Data.TrimFront(header.IPv6MinimumSize)
- p := hdr.TransportProtocol()
+ p := h.TransportProtocol()
if p == header.IPv6FragmentHeader {
- f, ok := pkt.Data.PullUp(header.IPv6FragmentHeaderSize)
- if !ok {
- return
- }
- fragHdr := header.IPv6Fragment(f)
- if !fragHdr.IsValid() || fragHdr.FragmentOffset() != 0 {
+ f := header.IPv6Fragment(pkt.Data.First())
+ if !f.IsValid() || f.FragmentOffset() != 0 {
// We can't handle fragments that aren't at offset 0
// because they don't have the transport headers.
return
@@ -63,19 +55,19 @@ func (e *endpoint) handleControl(typ stack.ControlType, extra uint32, pkt stack.
// Skip fragmentation header and find out the actual protocol
// number.
pkt.Data.TrimFront(header.IPv6FragmentHeaderSize)
- p = fragHdr.TransportProtocol()
+ p = f.TransportProtocol()
}
// Deliver the control packet to the transport endpoint.
- e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, hdr.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
+ e.dispatcher.DeliverTransportControlPacket(e.id.LocalAddress, h.DestinationAddress(), ProtocolNumber, p, typ, extra, pkt)
}
func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.PacketBuffer, hasFragmentHeader bool) {
stats := r.Stats().ICMP
sent := stats.V6PacketsSent
received := stats.V6PacketsReceived
- v, ok := pkt.Data.PullUp(header.ICMPv6HeaderSize)
- if !ok {
+ v := pkt.Data.First()
+ if len(v) < header.ICMPv6MinimumSize {
received.Invalid.Increment()
return
}
@@ -84,9 +76,11 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
// Validate ICMPv6 checksum before processing the packet.
//
+ // Only the first view in vv is accounted for by h. To account for the
+ // rest of vv, a shallow copy is made and the first view is removed.
// This copy is used as extra payload during the checksum calculation.
payload := pkt.Data.Clone(nil)
- payload.TrimFront(len(h))
+ payload.RemoveFirst()
if got, want := h.Checksum(), header.ICMPv6Checksum(h, iph.SourceAddress(), iph.DestinationAddress(), payload); got != want {
received.Invalid.Increment()
return
@@ -107,40 +101,34 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
switch h.Type() {
case header.ICMPv6PacketTooBig:
received.PacketTooBig.Increment()
- hdr, ok := pkt.Data.PullUp(header.ICMPv6PacketTooBigMinimumSize)
- if !ok {
+ if len(v) < header.ICMPv6PacketTooBigMinimumSize {
received.Invalid.Increment()
return
}
pkt.Data.TrimFront(header.ICMPv6PacketTooBigMinimumSize)
- mtu := header.ICMPv6(hdr).MTU()
+ mtu := h.MTU()
e.handleControl(stack.ControlPacketTooBig, calculateMTU(mtu), pkt)
case header.ICMPv6DstUnreachable:
received.DstUnreachable.Increment()
- hdr, ok := pkt.Data.PullUp(header.ICMPv6DstUnreachableMinimumSize)
- if !ok {
+ if len(v) < header.ICMPv6DstUnreachableMinimumSize {
received.Invalid.Increment()
return
}
pkt.Data.TrimFront(header.ICMPv6DstUnreachableMinimumSize)
- switch header.ICMPv6(hdr).Code() {
+ switch h.Code() {
case header.ICMPv6PortUnreachable:
e.handleControl(stack.ControlPortUnreachable, 0, pkt)
}
case header.ICMPv6NeighborSolicit:
received.NeighborSolicit.Increment()
- if pkt.Data.Size() < header.ICMPv6NeighborSolicitMinimumSize || !isNDPValid() {
+ if len(v) < header.ICMPv6NeighborSolicitMinimumSize || !isNDPValid() {
received.Invalid.Increment()
return
}
- // The remainder of payload must be only the neighbor solicitation, so
- // payload.ToView() always returns the solicitation. Per RFC 6980 section 5,
- // NDP messages cannot be fragmented. Also note that in the common case NDP
- // datagrams are very small and ToView() will not incur allocations.
- ns := header.NDPNeighborSolicit(payload.ToView())
+ ns := header.NDPNeighborSolicit(h.NDPPayload())
it, err := ns.Options().Iter(true)
if err != nil {
// If we have a malformed NDP NS option, drop the packet.
@@ -298,16 +286,12 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
case header.ICMPv6NeighborAdvert:
received.NeighborAdvert.Increment()
- if pkt.Data.Size() < header.ICMPv6NeighborAdvertSize || !isNDPValid() {
+ if len(v) < header.ICMPv6NeighborAdvertSize || !isNDPValid() {
received.Invalid.Increment()
return
}
- // The remainder of payload must be only the neighbor advertisement, so
- // payload.ToView() always returns the advertisement. Per RFC 6980 section
- // 5, NDP messages cannot be fragmented. Also note that in the common case
- // NDP datagrams are very small and ToView() will not incur allocations.
- na := header.NDPNeighborAdvert(payload.ToView())
+ na := header.NDPNeighborAdvert(h.NDPPayload())
it, err := na.Options().Iter(true)
if err != nil {
// If we have a malformed NDP NA option, drop the packet.
@@ -379,15 +363,14 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
case header.ICMPv6EchoRequest:
received.EchoRequest.Increment()
- icmpHdr, ok := pkt.Data.PullUp(header.ICMPv6EchoMinimumSize)
- if !ok {
+ if len(v) < header.ICMPv6EchoMinimumSize {
received.Invalid.Increment()
return
}
pkt.Data.TrimFront(header.ICMPv6EchoMinimumSize)
hdr := buffer.NewPrependable(int(r.MaxHeaderLength()) + header.ICMPv6EchoMinimumSize)
packet := header.ICMPv6(hdr.Prepend(header.ICMPv6EchoMinimumSize))
- copy(packet, icmpHdr)
+ copy(packet, h)
packet.SetType(header.ICMPv6EchoReply)
packet.SetChecksum(header.ICMPv6Checksum(packet, r.LocalAddress, r.RemoteAddress, pkt.Data))
if err := r.WritePacket(nil /* gso */, stack.NetworkHeaderParams{Protocol: header.ICMPv6ProtocolNumber, TTL: r.DefaultTTL(), TOS: stack.DefaultTOS}, stack.PacketBuffer{
@@ -401,7 +384,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
case header.ICMPv6EchoReply:
received.EchoReply.Increment()
- if pkt.Data.Size() < header.ICMPv6EchoMinimumSize {
+ if len(v) < header.ICMPv6EchoMinimumSize {
received.Invalid.Increment()
return
}
@@ -423,9 +406,8 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
case header.ICMPv6RouterAdvert:
received.RouterAdvert.Increment()
- // Is the NDP payload of sufficient size to hold a Router
- // Advertisement?
- if pkt.Data.Size()-header.ICMPv6HeaderSize < header.NDPRAMinimumSize || !isNDPValid() {
+ p := h.NDPPayload()
+ if len(p) < header.NDPRAMinimumSize || !isNDPValid() {
received.Invalid.Increment()
return
}
@@ -443,11 +425,7 @@ func (e *endpoint) handleICMP(r *stack.Route, netHeader buffer.View, pkt stack.P
return
}
- // The remainder of payload must be only the router advertisement, so
- // payload.ToView() always returns the advertisement. Per RFC 6980 section
- // 5, NDP messages cannot be fragmented. Also note that in the common case
- // NDP datagrams are very small and ToView() will not incur allocations.
- ra := header.NDPRouterAdvert(payload.ToView())
+ ra := header.NDPRouterAdvert(p)
opts := ra.Options()
// Are options valid as per the wire format?
diff --git a/pkg/tcpip/network/ipv6/icmp_test.go b/pkg/tcpip/network/ipv6/icmp_test.go
index d412ff688..bd099a7f8 100644
--- a/pkg/tcpip/network/ipv6/icmp_test.go
+++ b/pkg/tcpip/network/ipv6/icmp_test.go
@@ -166,8 +166,7 @@ func TestICMPCounts(t *testing.T) {
},
{
typ: header.ICMPv6NeighborSolicit,
- size: header.ICMPv6NeighborSolicitMinimumSize,
- },
+ size: header.ICMPv6NeighborSolicitMinimumSize},
{
typ: header.ICMPv6NeighborAdvert,
size: header.ICMPv6NeighborAdvertMinimumSize,
diff --git a/pkg/tcpip/network/ipv6/ipv6.go b/pkg/tcpip/network/ipv6/ipv6.go
index 486725131..331b0817b 100644
--- a/pkg/tcpip/network/ipv6/ipv6.go
+++ b/pkg/tcpip/network/ipv6/ipv6.go
@@ -171,11 +171,7 @@ func (*endpoint) WriteHeaderIncludedPacket(r *stack.Route, pkt stack.PacketBuffe
// HandlePacket is called by the link layer when new ipv6 packets arrive for
// this endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
- headerView, ok := pkt.Data.PullUp(header.IPv6MinimumSize)
- if !ok {
- r.Stats().IP.MalformedPacketsReceived.Increment()
- return
- }
+ headerView := pkt.Data.First()
h := header.IPv6(headerView)
if !h.IsValid(pkt.Data.Size()) {
r.Stats().IP.MalformedPacketsReceived.Increment()
diff --git a/pkg/tcpip/stack/forwarder_test.go b/pkg/tcpip/stack/forwarder_test.go
index c7c663498..e9c652042 100644
--- a/pkg/tcpip/stack/forwarder_test.go
+++ b/pkg/tcpip/stack/forwarder_test.go
@@ -70,10 +70,7 @@ func (f *fwdTestNetworkEndpoint) ID() *NetworkEndpointID {
func (f *fwdTestNetworkEndpoint) HandlePacket(r *Route, pkt PacketBuffer) {
// Consume the network header.
- b, ok := pkt.Data.PullUp(fwdTestNetHeaderLen)
- if !ok {
- return
- }
+ b := pkt.Data.First()
pkt.Data.TrimFront(fwdTestNetHeaderLen)
// Dispatch the packet to the transport protocol.
@@ -476,7 +473,7 @@ func TestForwardingWithFakeResolverPartialTimeout(t *testing.T) {
t.Fatal("packet not forwarded")
}
- b := p.Pkt.Data.ToView()
+ b := p.Pkt.Header.View()
if b[0] != 3 {
t.Fatalf("got b[0] = %d, want = 3", b[0])
}
@@ -520,7 +517,7 @@ func TestForwardingWithFakeResolverTwoPackets(t *testing.T) {
t.Fatal("packet not forwarded")
}
- b := p.Pkt.Data.ToView()
+ b := p.Pkt.Header.View()
if b[0] != 3 {
t.Fatalf("got b[0] = %d, want = 3", b[0])
}
@@ -567,7 +564,7 @@ func TestForwardingWithFakeResolverManyPackets(t *testing.T) {
t.Fatal("packet not forwarded")
}
- b := p.Pkt.Data.ToView()
+ b := p.Pkt.Header.View()
if b[0] != 3 {
t.Fatalf("got b[0] = %d, want = 3", b[0])
}
@@ -622,7 +619,7 @@ func TestForwardingWithFakeResolverManyResolutions(t *testing.T) {
// The first 5 packets (address 3 to 7) should not be forwarded
// because their address resolutions are interrupted.
- b := p.Pkt.Data.ToView()
+ b := p.Pkt.Header.View()
if b[0] < 8 {
t.Fatalf("got b[0] = %d, want b[0] >= 8", b[0])
}
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 6b91159d4..6c0a4b24d 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -212,11 +212,6 @@ func (it *IPTables) Check(hook Hook, pkt PacketBuffer) bool {
// CheckPackets runs pkts through the rules for hook and returns a map of packets that
// should not go forward.
//
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-//
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
-//
// NOTE: unlike the Check API the returned map contains packets that should be
// dropped.
func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList) (drop map[*PacketBuffer]struct{}) {
@@ -231,9 +226,7 @@ func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList) (drop map[*Pa
return drop
}
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
+// Precondition: pkt.NetworkHeader is set.
func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx int) chainVerdict {
// Start from ruleIdx and walk the list of rules until a rule gives us
// a verdict.
@@ -278,21 +271,14 @@ func (it *IPTables) checkChain(hook Hook, pkt PacketBuffer, table Table, ruleIdx
return chainDrop
}
-// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
-// TODO(gvisor.dev/issue/170): pk.NetworkHeader will always be set as a
-// precondition.
+// Precondition: pk.NetworkHeader is set.
func (it *IPTables) checkRule(hook Hook, pkt PacketBuffer, table Table, ruleIdx int) (RuleVerdict, int) {
rule := table.Rules[ruleIdx]
// If pkt.NetworkHeader hasn't been set yet, it will be contained in
- // pkt.Data.
+ // pkt.Data.First().
if pkt.NetworkHeader == nil {
- var ok bool
- pkt.NetworkHeader, ok = pkt.Data.PullUp(header.IPv4MinimumSize)
- if !ok {
- // Precondition has been violated.
- panic(fmt.Sprintf("iptables checks require IPv4 headers of at least %d bytes", header.IPv4MinimumSize))
- }
+ pkt.NetworkHeader = pkt.Data.First()
}
// Check whether the packet matches the IP header filter.
diff --git a/pkg/tcpip/stack/iptables_targets.go b/pkg/tcpip/stack/iptables_targets.go
index 8be61f4b1..7b4543caf 100644
--- a/pkg/tcpip/stack/iptables_targets.go
+++ b/pkg/tcpip/stack/iptables_targets.go
@@ -96,12 +96,9 @@ func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) {
newPkt := pkt.Clone()
// Set network header.
- headerView, ok := newPkt.Data.PullUp(header.IPv4MinimumSize)
- if !ok {
- return RuleDrop, 0
- }
+ headerView := newPkt.Data.First()
netHeader := header.IPv4(headerView)
- newPkt.NetworkHeader = headerView
+ newPkt.NetworkHeader = headerView[:header.IPv4MinimumSize]
hlen := int(netHeader.HeaderLength())
tlen := int(netHeader.TotalLength())
@@ -120,14 +117,10 @@ func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) {
if newPkt.TransportHeader != nil {
udpHeader = header.UDP(newPkt.TransportHeader)
} else {
- if pkt.Data.Size() < header.UDPMinimumSize {
- return RuleDrop, 0
- }
- hdr, ok := newPkt.Data.PullUp(header.UDPMinimumSize)
- if !ok {
+ if len(pkt.Data.First()) < header.UDPMinimumSize {
return RuleDrop, 0
}
- udpHeader = header.UDP(hdr)
+ udpHeader = header.UDP(newPkt.Data.First())
}
udpHeader.SetDestinationPort(rt.MinPort)
case header.TCPProtocolNumber:
@@ -135,14 +128,10 @@ func (rt RedirectTarget) Action(pkt PacketBuffer) (RuleVerdict, int) {
if newPkt.TransportHeader != nil {
tcpHeader = header.TCP(newPkt.TransportHeader)
} else {
- if pkt.Data.Size() < header.TCPMinimumSize {
+ if len(pkt.Data.First()) < header.TCPMinimumSize {
return RuleDrop, 0
}
- hdr, ok := newPkt.Data.PullUp(header.TCPMinimumSize)
- if !ok {
- return RuleDrop, 0
- }
- tcpHeader = header.TCP(hdr)
+ tcpHeader = header.TCP(newPkt.TransportHeader)
}
// TODO(gvisor.dev/issue/170): Need to recompute checksum
// and implement nat connection tracking to support TCP.
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 193a9dfde..c11d62f97 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -412,20 +412,33 @@ type dadState struct {
// defaultRouterState holds data associated with a default router discovered by
// a Router Advertisement (RA).
type defaultRouterState struct {
- invalidationTimer tcpip.CancellableTimer
+ // Timer to invalidate the default router.
+ //
+ // May not be nil.
+ invalidationTimer *tcpip.CancellableTimer
}
// onLinkPrefixState holds data associated with an on-link prefix discovered by
// a Router Advertisement's Prefix Information option (PI) when the NDP
// configurations was configured to do so.
type onLinkPrefixState struct {
- invalidationTimer tcpip.CancellableTimer
+ // Timer to invalidate the on-link prefix.
+ //
+ // May not be nil.
+ invalidationTimer *tcpip.CancellableTimer
}
// slaacPrefixState holds state associated with a SLAAC prefix.
type slaacPrefixState struct {
- deprecationTimer tcpip.CancellableTimer
- invalidationTimer tcpip.CancellableTimer
+ // Timer to deprecate the prefix.
+ //
+ // May not be nil.
+ deprecationTimer *tcpip.CancellableTimer
+
+ // Timer to invalidate the prefix.
+ //
+ // May not be nil.
+ invalidationTimer *tcpip.CancellableTimer
// Nonzero only when the address is not valid forever.
validUntil time.Time
@@ -775,7 +788,6 @@ func (ndp *ndpState) invalidateDefaultRouter(ip tcpip.Address) {
}
rtr.invalidationTimer.StopLocked()
-
delete(ndp.defaultRouters, ip)
// Let the integrator know a discovered default router is invalidated.
@@ -804,7 +816,7 @@ func (ndp *ndpState) rememberDefaultRouter(ip tcpip.Address, rl time.Duration) {
}
state := defaultRouterState{
- invalidationTimer: tcpip.MakeCancellableTimer(&ndp.nic.mu, func() {
+ invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
ndp.invalidateDefaultRouter(ip)
}),
}
@@ -834,7 +846,7 @@ func (ndp *ndpState) rememberOnLinkPrefix(prefix tcpip.Subnet, l time.Duration)
}
state := onLinkPrefixState{
- invalidationTimer: tcpip.MakeCancellableTimer(&ndp.nic.mu, func() {
+ invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
ndp.invalidateOnLinkPrefix(prefix)
}),
}
@@ -859,7 +871,6 @@ func (ndp *ndpState) invalidateOnLinkPrefix(prefix tcpip.Subnet) {
}
s.invalidationTimer.StopLocked()
-
delete(ndp.onLinkPrefixes, prefix)
// Let the integrator know a discovered on-link prefix is invalidated.
@@ -979,7 +990,7 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
}
state := slaacPrefixState{
- deprecationTimer: tcpip.MakeCancellableTimer(&ndp.nic.mu, func() {
+ deprecationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
state, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the deprecated SLAAC prefix %s", prefix))
@@ -987,7 +998,7 @@ func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
ndp.deprecateSLAACAddress(state.ref)
}),
- invalidationTimer: tcpip.MakeCancellableTimer(&ndp.nic.mu, func() {
+ invalidationTimer: tcpip.NewCancellableTimer(&ndp.nic.mu, func() {
state, ok := ndp.slaacPrefixes[prefix]
if !ok {
panic(fmt.Sprintf("ndp: must have a slaacPrefixes entry for the invalidated SLAAC prefix %s", prefix))
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 0c2b1f36a..016dbe15e 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -1203,12 +1203,12 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
n.stack.stats.IP.PacketsReceived.Increment()
}
- netHeader, ok := pkt.Data.PullUp(netProto.MinimumPacketSize())
- if !ok {
+ if len(pkt.Data.First()) < netProto.MinimumPacketSize() {
n.stack.stats.MalformedRcvdPackets.Increment()
return
}
- src, dst := netProto.ParseAddresses(netHeader)
+
+ src, dst := netProto.ParseAddresses(pkt.Data.First())
if n.stack.handleLocal && !n.isLoopback() && n.getRef(protocol, src) != nil {
// The source address is one of our own, so we never should have gotten a
@@ -1289,8 +1289,22 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
func (n *NIC) forwardPacket(r *Route, protocol tcpip.NetworkProtocolNumber, pkt PacketBuffer) {
// TODO(b/143425874) Decrease the TTL field in forwarded packets.
- if linkHeaderLen := int(n.linkEP.MaxHeaderLength()); linkHeaderLen != 0 {
- pkt.Header = buffer.NewPrependable(linkHeaderLen)
+
+ firstData := pkt.Data.First()
+ pkt.Data.RemoveFirst()
+
+ if linkHeaderLen := int(n.linkEP.MaxHeaderLength()); linkHeaderLen == 0 {
+ pkt.Header = buffer.NewPrependableFromView(firstData)
+ } else {
+ firstDataLen := len(firstData)
+
+ // pkt.Header should have enough capacity to hold n.linkEP's headers.
+ pkt.Header = buffer.NewPrependable(firstDataLen + linkHeaderLen)
+
+ // TODO(b/151227689): avoid copying the packet when forwarding
+ if n := copy(pkt.Header.Prepend(firstDataLen), firstData); n != firstDataLen {
+ panic(fmt.Sprintf("copied %d bytes, expected %d", n, firstDataLen))
+ }
}
if err := n.linkEP.WritePacket(r, nil /* gso */, protocol, pkt); err != nil {
@@ -1318,13 +1332,12 @@ func (n *NIC) DeliverTransportPacket(r *Route, protocol tcpip.TransportProtocolN
// validly formed.
n.stack.demux.deliverRawPacket(r, protocol, pkt)
- transHeader, ok := pkt.Data.PullUp(transProto.MinimumPacketSize())
- if !ok {
+ if len(pkt.Data.First()) < transProto.MinimumPacketSize() {
n.stack.stats.MalformedRcvdPackets.Increment()
return
}
- srcPort, dstPort, err := transProto.ParsePorts(transHeader)
+ srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First())
if err != nil {
n.stack.stats.MalformedRcvdPackets.Increment()
return
@@ -1362,12 +1375,11 @@ func (n *NIC) DeliverTransportControlPacket(local, remote tcpip.Address, net tcp
// ICMPv4 only guarantees that 8 bytes of the transport protocol will
// be present in the payload. We know that the ports are within the
// first 8 bytes for all known transport protocols.
- transHeader, ok := pkt.Data.PullUp(8)
- if !ok {
+ if len(pkt.Data.First()) < 8 {
return
}
- srcPort, dstPort, err := transProto.ParsePorts(transHeader)
+ srcPort, dstPort, err := transProto.ParsePorts(pkt.Data.First())
if err != nil {
return
}
diff --git a/pkg/tcpip/stack/packet_buffer.go b/pkg/tcpip/stack/packet_buffer.go
index 7d36f8e84..dc125f25e 100644
--- a/pkg/tcpip/stack/packet_buffer.go
+++ b/pkg/tcpip/stack/packet_buffer.go
@@ -37,13 +37,7 @@ type PacketBuffer struct {
Data buffer.VectorisedView
// Header holds the headers of outbound packets. As a packet is passed
- // down the stack, each layer adds to Header. Note that forwarded
- // packets don't populate Headers on their way out -- their headers and
- // payload are never parsed out and remain in Data.
- //
- // TODO(gvisor.dev/issue/170): Forwarded packets don't currently
- // populate Header, but should. This will be doable once early parsing
- // (https://github.com/google/gvisor/pull/1995) is supported.
+ // down the stack, each layer adds to Header.
Header buffer.Prependable
// These fields are used by both inbound and outbound packets. They
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index d45d2cc1f..c7634ceb1 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -95,18 +95,16 @@ func (f *fakeNetworkEndpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffe
f.proto.packetCount[int(f.id.LocalAddress[0])%len(f.proto.packetCount)]++
// Consume the network header.
- b, ok := pkt.Data.PullUp(fakeNetHeaderLen)
- if !ok {
- return
- }
+ b := pkt.Data.First()
pkt.Data.TrimFront(fakeNetHeaderLen)
// Handle control packets.
if b[2] == uint8(fakeControlProtocol) {
- nb, ok := pkt.Data.PullUp(fakeNetHeaderLen)
- if !ok {
+ nb := pkt.Data.First()
+ if len(nb) < fakeNetHeaderLen {
return
}
+
pkt.Data.TrimFront(fakeNetHeaderLen)
f.dispatcher.DeliverTransportControlPacket(tcpip.Address(nb[1:2]), tcpip.Address(nb[0:1]), fakeNetNumber, tcpip.TransportProtocolNumber(nb[2]), stack.ControlPortUnreachable, 0, pkt)
return
diff --git a/pkg/tcpip/stack/transport_test.go b/pkg/tcpip/stack/transport_test.go
index a611e44ab..3084e6593 100644
--- a/pkg/tcpip/stack/transport_test.go
+++ b/pkg/tcpip/stack/transport_test.go
@@ -642,11 +642,10 @@ func TestTransportForwarding(t *testing.T) {
t.Fatal("Response packet not forwarded")
}
- hdrs := p.Pkt.Data.ToView()
- if dst := hdrs[0]; dst != 3 {
+ if dst := p.Pkt.Header.View()[0]; dst != 3 {
t.Errorf("Response packet has incorrect destination addresss: got = %d, want = 3", dst)
}
- if src := hdrs[1]; src != 1 {
+ if src := p.Pkt.Header.View()[1]; src != 1 {
t.Errorf("Response packet has incorrect source addresss: got = %d, want = 3", src)
}
}
diff --git a/pkg/tcpip/timer.go b/pkg/tcpip/timer.go
index 67f66fc72..59f3b391f 100644
--- a/pkg/tcpip/timer.go
+++ b/pkg/tcpip/timer.go
@@ -88,6 +88,9 @@ func (t *cancellableTimerInstance) stop() {
//
// The term "related work" is defined as some work that needs to be done while
// holding some lock that the timer must also hold while doing some work.
+//
+// Note, it is not safe to copy a CancellableTimer as its timer instance creates
+// a closure over the address of the CancellableTimer.
type CancellableTimer struct {
// The active instance of a cancellable timer.
instance cancellableTimerInstance
@@ -154,12 +157,28 @@ func (t *CancellableTimer) Reset(d time.Duration) {
}
}
-// MakeCancellableTimer returns an unscheduled CancellableTimer with the given
+// Lock is a no-op used by the copylocks checker from go vet.
+//
+// See CancellableTimer for details about why it shouldn't be copied.
+//
+// See https://github.com/golang/go/issues/8005#issuecomment-190753527 for more
+// details about the copylocks checker.
+func (*CancellableTimer) Lock() {}
+
+// Unlock is a no-op used by the copylocks checker from go vet.
+//
+// See CancellableTimer for details about why it shouldn't be copied.
+//
+// See https://github.com/golang/go/issues/8005#issuecomment-190753527 for more
+// details about the copylocks checker.
+func (*CancellableTimer) Unlock() {}
+
+// NewCancellableTimer returns an unscheduled CancellableTimer with the given
// locker and fn.
//
// fn MUST NOT attempt to lock locker.
//
// Callers must call Reset to schedule the timer to fire.
-func MakeCancellableTimer(locker sync.Locker, fn func()) CancellableTimer {
- return CancellableTimer{locker: locker, fn: fn}
+func NewCancellableTimer(locker sync.Locker, fn func()) *CancellableTimer {
+ return &CancellableTimer{locker: locker, fn: fn}
}
diff --git a/pkg/tcpip/timer_test.go b/pkg/tcpip/timer_test.go
index 730134906..b4940e397 100644
--- a/pkg/tcpip/timer_test.go
+++ b/pkg/tcpip/timer_test.go
@@ -43,7 +43,7 @@ func TestCancellableTimerReassignment(t *testing.T) {
// that has an active timer (even if it has been stopped as a stopped
// timer may be blocked on a lock before it can check if it has been
// stopped while another goroutine holds the same lock).
- timer = tcpip.MakeCancellableTimer(&lock, func() {
+ timer = *tcpip.NewCancellableTimer(&lock, func() {
wg.Done()
})
timer.Reset(shortDuration)
@@ -59,7 +59,7 @@ func TestCancellableTimerFire(t *testing.T) {
ch := make(chan struct{})
var lock sync.Mutex
- timer := tcpip.MakeCancellableTimer(&lock, func() {
+ timer := tcpip.NewCancellableTimer(&lock, func() {
ch <- struct{}{}
})
timer.Reset(shortDuration)
@@ -85,7 +85,7 @@ func TestCancellableTimerResetFromLongDuration(t *testing.T) {
ch := make(chan struct{})
var lock sync.Mutex
- timer := tcpip.MakeCancellableTimer(&lock, func() { ch <- struct{}{} })
+ timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
timer.Reset(middleDuration)
lock.Lock()
@@ -116,7 +116,7 @@ func TestCancellableTimerResetFromShortDuration(t *testing.T) {
var lock sync.Mutex
lock.Lock()
- timer := tcpip.MakeCancellableTimer(&lock, func() { ch <- struct{}{} })
+ timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
timer.Reset(shortDuration)
timer.StopLocked()
lock.Unlock()
@@ -153,7 +153,7 @@ func TestCancellableTimerImmediatelyStop(t *testing.T) {
for i := 0; i < 1000; i++ {
lock.Lock()
- timer := tcpip.MakeCancellableTimer(&lock, func() { ch <- struct{}{} })
+ timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
timer.Reset(shortDuration)
timer.StopLocked()
lock.Unlock()
@@ -174,7 +174,7 @@ func TestCancellableTimerStoppedResetWithoutLock(t *testing.T) {
var lock sync.Mutex
lock.Lock()
- timer := tcpip.MakeCancellableTimer(&lock, func() { ch <- struct{}{} })
+ timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
timer.Reset(shortDuration)
timer.StopLocked()
lock.Unlock()
@@ -205,7 +205,7 @@ func TestManyCancellableTimerResetAfterBlockedOnLock(t *testing.T) {
var lock sync.Mutex
lock.Lock()
- timer := tcpip.MakeCancellableTimer(&lock, func() { ch <- struct{}{} })
+ timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
timer.Reset(shortDuration)
for i := 0; i < 10; i++ {
// Sleep until the timer fires and gets blocked trying to take the lock.
@@ -237,7 +237,7 @@ func TestManyCancellableTimerResetUnderLock(t *testing.T) {
var lock sync.Mutex
lock.Lock()
- timer := tcpip.MakeCancellableTimer(&lock, func() { ch <- struct{}{} })
+ timer := tcpip.NewCancellableTimer(&lock, func() { ch <- struct{}{} })
timer.Reset(shortDuration)
for i := 0; i < 10; i++ {
timer.StopLocked()
diff --git a/pkg/tcpip/transport/icmp/endpoint.go b/pkg/tcpip/transport/icmp/endpoint.go
index b1d820372..feef8dca0 100644
--- a/pkg/tcpip/transport/icmp/endpoint.go
+++ b/pkg/tcpip/transport/icmp/endpoint.go
@@ -747,15 +747,15 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
// Only accept echo replies.
switch e.NetProto {
case header.IPv4ProtocolNumber:
- h, ok := pkt.Data.PullUp(header.ICMPv4MinimumSize)
- if !ok || header.ICMPv4(h).Type() != header.ICMPv4EchoReply {
+ h := header.ICMPv4(pkt.Data.First())
+ if h.Type() != header.ICMPv4EchoReply {
e.stack.Stats().DroppedPackets.Increment()
e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
return
}
case header.IPv6ProtocolNumber:
- h, ok := pkt.Data.PullUp(header.ICMPv6MinimumSize)
- if !ok || header.ICMPv6(h).Type() != header.ICMPv6EchoReply {
+ h := header.ICMPv6(pkt.Data.First())
+ if h.Type() != header.ICMPv6EchoReply {
e.stack.Stats().DroppedPackets.Increment()
e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
return
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 7712ce652..40461fd31 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -144,11 +144,7 @@ func (s *segment) logicalLen() seqnum.Size {
// TCP checksum and stores the checksum and result of checksum verification in
// the csum and csumValid fields of the segment.
func (s *segment) parse() bool {
- h, ok := s.data.PullUp(header.TCPMinimumSize)
- if !ok {
- return false
- }
- hdr := header.TCP(h)
+ h := header.TCP(s.data.First())
// h is the header followed by the payload. We check that the offset to
// the data respects the following constraints:
@@ -160,16 +156,12 @@ func (s *segment) parse() bool {
// N.B. The segment has already been validated as having at least the
// minimum TCP size before reaching here, so it's safe to read the
// fields.
- offset := int(hdr.DataOffset())
- if offset < header.TCPMinimumSize {
- return false
- }
- hdrWithOpts, ok := s.data.PullUp(offset)
- if !ok {
+ offset := int(h.DataOffset())
+ if offset < header.TCPMinimumSize || offset > len(h) {
return false
}
- s.options = []byte(hdrWithOpts[header.TCPMinimumSize:])
+ s.options = []byte(h[header.TCPMinimumSize:offset])
s.parsedOptions = header.ParseTCPOptions(s.options)
// Query the link capabilities to decide if checksum validation is
@@ -181,19 +173,18 @@ func (s *segment) parse() bool {
s.data.TrimFront(offset)
}
if verifyChecksum {
- hdr = header.TCP(hdrWithOpts)
- s.csum = hdr.Checksum()
+ s.csum = h.Checksum()
xsum := s.route.PseudoHeaderChecksum(ProtocolNumber, uint16(s.data.Size()))
- xsum = hdr.CalculateChecksum(xsum)
+ xsum = h.CalculateChecksum(xsum)
s.data.TrimFront(offset)
xsum = header.ChecksumVV(s.data, xsum)
s.csumValid = xsum == 0xffff
}
- s.sequenceNumber = seqnum.Value(hdr.SequenceNumber())
- s.ackNumber = seqnum.Value(hdr.AckNumber())
- s.flags = hdr.Flags()
- s.window = seqnum.Size(hdr.WindowSize())
+ s.sequenceNumber = seqnum.Value(h.SequenceNumber())
+ s.ackNumber = seqnum.Value(h.AckNumber())
+ s.flags = h.Flags()
+ s.window = seqnum.Size(h.WindowSize())
return true
}
diff --git a/pkg/tcpip/transport/tcp/tcp_sack_test.go b/pkg/tcpip/transport/tcp/tcp_sack_test.go
index 1dd63dd61..ace79b7b2 100644
--- a/pkg/tcpip/transport/tcp/tcp_sack_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_sack_test.go
@@ -28,6 +28,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/test/testutil"
)
// createConnectedWithSACKPermittedOption creates and connects c.ep with the
@@ -439,21 +440,28 @@ func TestSACKRecovery(t *testing.T) {
// Receive the retransmitted packet.
c.ReceiveAndCheckPacketWithOptions(data, rtxOffset, maxPayload, tsOptionSize)
- tcpStats := c.Stack().Stats().TCP
- stats := []struct {
- stat *tcpip.StatCounter
- name string
- want uint64
- }{
- {tcpStats.FastRetransmit, "stats.TCP.FastRetransmit", 1},
- {tcpStats.Retransmits, "stats.TCP.Retransmits", 1},
- {tcpStats.SACKRecovery, "stats.TCP.SACKRecovery", 1},
- {tcpStats.FastRecovery, "stats.TCP.FastRecovery", 0},
- }
- for _, s := range stats {
- if got, want := s.stat.Value(), s.want; got != want {
- t.Errorf("got %s.Value() = %v, want = %v", s.name, got, want)
+ metricPollFn := func() error {
+ tcpStats := c.Stack().Stats().TCP
+ stats := []struct {
+ stat *tcpip.StatCounter
+ name string
+ want uint64
+ }{
+ {tcpStats.FastRetransmit, "stats.TCP.FastRetransmit", 1},
+ {tcpStats.Retransmits, "stats.TCP.Retransmits", 1},
+ {tcpStats.SACKRecovery, "stats.TCP.SACKRecovery", 1},
+ {tcpStats.FastRecovery, "stats.TCP.FastRecovery", 0},
+ }
+ for _, s := range stats {
+ if got, want := s.stat.Value(), s.want; got != want {
+ return fmt.Errorf("got %s.Value() = %v, want = %v", s.name, got, want)
+ }
}
+ return nil
+ }
+
+ if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+ t.Error(err)
}
// Now send 7 mode duplicate ACKs. In SACK TCP dupAcks do not cause
@@ -517,22 +525,28 @@ func TestSACKRecovery(t *testing.T) {
bytesRead += maxPayload
}
- // In SACK recovery only the first segment is fast retransmitted when
- // entering recovery.
- if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want {
- t.Errorf("got stats.TCP.FastRetransmit.Value = %v, want = %v", got, want)
- }
+ metricPollFn = func() error {
+ // In SACK recovery only the first segment is fast retransmitted when
+ // entering recovery.
+ if got, want := c.Stack().Stats().TCP.FastRetransmit.Value(), uint64(1); got != want {
+ return fmt.Errorf("got stats.TCP.FastRetransmit.Value = %v, want = %v", got, want)
+ }
- if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.FastRetransmit.Value(), uint64(1); got != want {
- t.Errorf("got EP stats SendErrors.FastRetransmit = %v, want = %v", got, want)
- }
+ if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.FastRetransmit.Value(), uint64(1); got != want {
+ return fmt.Errorf("got EP stats SendErrors.FastRetransmit = %v, want = %v", got, want)
+ }
- if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(4); got != want {
- t.Errorf("got stats.TCP.Retransmits.Value = %v, want = %v", got, want)
- }
+ if got, want := c.Stack().Stats().TCP.Retransmits.Value(), uint64(4); got != want {
+ return fmt.Errorf("got stats.TCP.Retransmits.Value = %v, want = %v", got, want)
+ }
- if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Retransmits.Value(), uint64(4); got != want {
- t.Errorf("got EP stats Stats.SendErrors.Retransmits = %v, want = %v", got, want)
+ if got, want := c.EP.Stats().(*tcp.Stats).SendErrors.Retransmits.Value(), uint64(4); got != want {
+ return fmt.Errorf("got EP stats Stats.SendErrors.Retransmits = %v, want = %v", got, want)
+ }
+ return nil
+ }
+ if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+ t.Error(err)
}
c.CheckNoPacketTimeout("More packets received than expected during recovery after partial ack for this cwnd.", 50*time.Millisecond)
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 286c66cf5..7e574859b 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -35,6 +35,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp/testing/context"
+ "gvisor.dev/gvisor/pkg/test/testutil"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -209,8 +210,15 @@ func TestTCPResetsSentIncrement(t *testing.T) {
c.SendPacket(nil, ackHeaders)
c.GetPacket()
- if got := stats.TCP.ResetsSent.Value(); got != want {
- t.Errorf("got stats.TCP.ResetsSent.Value() = %v, want = %v", got, want)
+
+ metricPollFn := func() error {
+ if got := stats.TCP.ResetsSent.Value(); got != want {
+ return fmt.Errorf("got stats.TCP.ResetsSent.Value() = %v, want = %v", got, want)
+ }
+ return nil
+ }
+ if err := testutil.Poll(metricPollFn, 1*time.Second); err != nil {
+ t.Error(err)
}
}
@@ -3548,7 +3556,7 @@ func TestReceivedInvalidSegmentCountIncrement(t *testing.T) {
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
- tcpbuf := vv.ToView()[header.IPv4MinimumSize:]
+ tcpbuf := vv.First()[header.IPv4MinimumSize:]
tcpbuf[header.TCPDataOffset] = ((header.TCPMinimumSize - 1) / 4) << 4
c.SendSegment(vv)
@@ -3575,7 +3583,7 @@ func TestReceivedIncorrectChecksumIncrement(t *testing.T) {
AckNum: c.IRS.Add(1),
RcvWnd: 30000,
})
- tcpbuf := vv.ToView()[header.IPv4MinimumSize:]
+ tcpbuf := vv.First()[header.IPv4MinimumSize:]
// Overwrite a byte in the payload which should cause checksum
// verification to fail.
tcpbuf[(tcpbuf[header.TCPDataOffset]>>4)*4] = 0x4
diff --git a/pkg/tcpip/transport/udp/endpoint.go b/pkg/tcpip/transport/udp/endpoint.go
index 756ab913a..edb54f0be 100644
--- a/pkg/tcpip/transport/udp/endpoint.go
+++ b/pkg/tcpip/transport/udp/endpoint.go
@@ -1250,8 +1250,8 @@ func (e *endpoint) Readiness(mask waiter.EventMask) waiter.EventMask {
// endpoint.
func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) {
// Get the header then trim it from the view.
- hdr, ok := pkt.Data.PullUp(header.UDPMinimumSize)
- if !ok || int(header.UDP(hdr).Length()) > pkt.Data.Size() {
+ hdr := header.UDP(pkt.Data.First())
+ if int(hdr.Length()) > pkt.Data.Size() {
// Malformed packet.
e.stack.Stats().UDP.MalformedPacketsReceived.Increment()
e.stats.ReceiveErrors.MalformedPacketsReceived.Increment()
@@ -1286,7 +1286,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, id stack.TransportEndpointID, pk
senderAddress: tcpip.FullAddress{
NIC: r.NICID(),
Addr: id.RemoteAddress,
- Port: header.UDP(hdr).SourcePort(),
+ Port: hdr.SourcePort(),
},
}
packet.data = pkt.Data
diff --git a/pkg/tcpip/transport/udp/protocol.go b/pkg/tcpip/transport/udp/protocol.go
index 52af6de22..6e31a9bac 100644
--- a/pkg/tcpip/transport/udp/protocol.go
+++ b/pkg/tcpip/transport/udp/protocol.go
@@ -68,13 +68,8 @@ func (*protocol) ParsePorts(v buffer.View) (src, dst uint16, err *tcpip.Error) {
// that don't match any existing endpoint.
func (p *protocol) HandleUnknownDestinationPacket(r *stack.Route, id stack.TransportEndpointID, pkt stack.PacketBuffer) bool {
// Get the header then trim it from the view.
- h, ok := pkt.Data.PullUp(header.UDPMinimumSize)
- if !ok {
- // Malformed packet.
- r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
- return true
- }
- if int(header.UDP(h).Length()) > pkt.Data.Size() {
+ hdr := header.UDP(pkt.Data.First())
+ if int(hdr.Length()) > pkt.Data.Size() {
// Malformed packet.
r.Stack().Stats().UDP.MalformedPacketsReceived.Increment()
return true
diff --git a/pkg/test/dockerutil/dockerutil.go b/pkg/test/dockerutil/dockerutil.go
index baa8fc2f2..5f2af9f3b 100644
--- a/pkg/test/dockerutil/dockerutil.go
+++ b/pkg/test/dockerutil/dockerutil.go
@@ -353,7 +353,10 @@ func (d *Docker) run(r RunOpts, command string, p ...string) (string, error) {
// Create calls 'docker create' with the arguments provided.
func (d *Docker) Create(r RunOpts, args ...string) error {
- _, err := d.run(r, "create", args...)
+ out, err := d.run(r, "create", args...)
+ if strings.Contains(out, "Unable to find image") {
+ return fmt.Errorf("unable to find image, did you remember to `make load-%s`: %w", r.Image, err)
+ }
return err
}
diff --git a/pkg/test/testutil/testutil.go b/pkg/test/testutil/testutil.go
index d75ceca3d..ee8c78014 100644
--- a/pkg/test/testutil/testutil.go
+++ b/pkg/test/testutil/testutil.go
@@ -57,42 +57,10 @@ func IsCheckpointSupported() bool {
return *checkpoint
}
-// nameToActual is used by ImageByName (for now).
-var nameToActual = map[string]string{
- "basic/alpine": "alpine",
- "basic/busybox": "busybox:1.31.1",
- "basic/httpd": "httpd",
- "basic/mysql": "mysql",
- "basic/nginx": "nginx",
- "basic/python": "gcr.io/gvisor-presubmit/python-hello",
- "basic/resolv": "k8s.gcr.io/busybox",
- "basic/ruby": "ruby",
- "basic/tomcat": "tomcat:8.0",
- "basic/ubuntu": "ubuntu:trusty",
- "iptables": "gcr.io/gvisor-presubmit/iptables-test",
- "packetdrill": "gcr.io/gvisor-presubmit/packetdrill",
- "packetimpact": "gcr.io/gvisor-presubmit/packetimpact",
- "runtimes/go1.12": "gcr.io/gvisor-presubmit/go1.12",
- "runtimes/java11": "gcr.io/gvisor-presubmit/java11",
- "runtimes/nodejs12.4.0": "gcr.io/gvisor-presubmit/nodejs12.4.0",
- "runtimes/php7.3.6": "gcr.io/gvisor-presubmit/php7.3.6",
- "runtimes/python3.7.3": "gcr.io/gvisor-presubmit/python3.7.3",
-}
-
-// ImageByName mangles the image name used locally.
-//
-// For now, this is implemented as a static lookup table. In a subsequent
-// change, this will be used to reference a locally-generated image.
+// ImageByName mangles the image name used locally. This depends on the image
+// build infrastructure in images/ and tools/vm.
func ImageByName(name string) string {
- actual, ok := nameToActual[name]
- if !ok {
- panic(fmt.Sprintf("unknown image: %v", name))
- }
- // A terrible hack, for now execute a manual pull.
- if out, err := exec.Command("docker", "pull", actual).CombinedOutput(); err != nil {
- panic(fmt.Sprintf("error pulling image %q -> %q: %v, out: %s", name, actual, err, string(out)))
- }
- return actual
+ return fmt.Sprintf("gvisor.dev/images/%s", name)
}
// ConfigureExePath configures the executable for runsc in the test environment.