summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fs/proc/BUILD1
-rw-r--r--pkg/sentry/fs/proc/net.go195
-rw-r--r--pkg/sentry/inet/BUILD5
-rw-r--r--pkg/sentry/inet/inet.go32
-rw-r--r--pkg/sentry/inet/test_stack.go16
-rw-r--r--pkg/sentry/kernel/fd_table.go22
-rw-r--r--pkg/sentry/kernel/fd_table_test.go36
-rw-r--r--pkg/sentry/kernel/kernel.go15
-rw-r--r--pkg/sentry/kernel/task.go4
-rw-r--r--pkg/sentry/kernel/task_context.go27
-rw-r--r--pkg/sentry/loader/elf.go19
-rw-r--r--pkg/sentry/loader/loader.go263
-rw-r--r--pkg/sentry/platform/kvm/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/address_space.go2
-rw-r--r--pkg/sentry/platform/kvm/allocator.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_fault.go10
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm_const.go7
-rw-r--r--pkg/sentry/platform/kvm/machine.go26
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go10
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64_unsafe.go24
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go61
-rw-r--r--pkg/sentry/platform/kvm/machine_unsafe.go26
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go3
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_unsafe.go2
-rw-r--r--pkg/sentry/socket/hostinet/BUILD1
-rw-r--r--pkg/sentry/socket/hostinet/stack.go116
-rw-r--r--pkg/sentry/socket/netstack/netstack.go12
-rw-r--r--pkg/sentry/socket/netstack/stack.go108
-rw-r--r--pkg/sentry/socket/rpcinet/BUILD1
-rw-r--r--pkg/sentry/socket/rpcinet/stack.go10
-rw-r--r--pkg/sentry/syscalls/linux/BUILD1
-rw-r--r--pkg/sentry/syscalls/linux/linux64_amd64.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_thread.go55
-rw-r--r--pkg/sentry/vfs/mount_unsafe.go2
35 files changed, 926 insertions, 195 deletions
diff --git a/pkg/sentry/fs/proc/BUILD b/pkg/sentry/fs/proc/BUILD
index 497475db5..75cbb0622 100644
--- a/pkg/sentry/fs/proc/BUILD
+++ b/pkg/sentry/fs/proc/BUILD
@@ -52,6 +52,7 @@ go_library(
"//pkg/sentry/usage",
"//pkg/sentry/usermem",
"//pkg/syserror",
+ "//pkg/tcpip/header",
"//pkg/waiter",
],
)
diff --git a/pkg/sentry/fs/proc/net.go b/pkg/sentry/fs/proc/net.go
index f70239449..402919924 100644
--- a/pkg/sentry/fs/proc/net.go
+++ b/pkg/sentry/fs/proc/net.go
@@ -18,6 +18,7 @@ import (
"bytes"
"fmt"
"io"
+ "reflect"
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -33,6 +34,8 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
)
// newNet creates a new proc net entry.
@@ -40,7 +43,8 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
var contents map[string]*fs.Inode
if s := p.k.NetworkStack(); s != nil {
contents = map[string]*fs.Inode{
- "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc),
+ "dev": seqfile.NewSeqFileInode(ctx, &netDev{s: s}, msrc),
+ "snmp": seqfile.NewSeqFileInode(ctx, &netSnmp{s: s}, msrc),
// The following files are simple stubs until they are
// implemented in netstack, if the file contains a
@@ -57,7 +61,7 @@ func (p *proc) newNetDir(ctx context.Context, k *kernel.Kernel, msrc *fs.MountSo
// (ClockGetres returns 1ns resolution).
"psched": newStaticProcInode(ctx, msrc, []byte(fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)))),
"ptype": newStaticProcInode(ctx, msrc, []byte("Type Device Function")),
- "route": newStaticProcInode(ctx, msrc, []byte("Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT")),
+ "route": seqfile.NewSeqFileInode(ctx, &netRoute{s: s}, msrc),
"tcp": seqfile.NewSeqFileInode(ctx, &netTCP{k: k}, msrc),
"udp": seqfile.NewSeqFileInode(ctx, &netUDP{k: k}, msrc),
"unix": seqfile.NewSeqFileInode(ctx, &netUnix{k: k}, msrc),
@@ -195,6 +199,193 @@ func (n *netDev) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]se
return data, 0
}
+// netSnmp implements seqfile.SeqSource for /proc/net/snmp.
+//
+// +stateify savable
+type netSnmp struct {
+ s inet.Stack
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (n *netSnmp) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+type snmpLine struct {
+ prefix string
+ header string
+}
+
+var snmp = []snmpLine{
+ {
+ prefix: "Ip",
+ header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates",
+ },
+ {
+ prefix: "Icmp",
+ header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps",
+ },
+ {
+ prefix: "IcmpMsg",
+ },
+ {
+ prefix: "Tcp",
+ header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors",
+ },
+ {
+ prefix: "Udp",
+ header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
+ },
+ {
+ prefix: "UdpLite",
+ header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
+ },
+}
+
+func toSlice(a interface{}) []uint64 {
+ v := reflect.Indirect(reflect.ValueOf(a))
+ return v.Slice(0, v.Len()).Interface().([]uint64)
+}
+
+func sprintSlice(s []uint64) string {
+ if len(s) == 0 {
+ return ""
+ }
+ r := fmt.Sprint(s)
+ return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice.
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. See Linux's
+// net/core/net-procfs.c:dev_seq_show.
+func (n *netSnmp) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ contents := make([]string, 0, len(snmp)*2)
+ types := []interface{}{
+ &inet.StatSNMPIP{},
+ &inet.StatSNMPICMP{},
+ nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats.
+ &inet.StatSNMPTCP{},
+ &inet.StatSNMPUDP{},
+ &inet.StatSNMPUDPLite{},
+ }
+ for i, stat := range types {
+ line := snmp[i]
+ if stat == nil {
+ contents = append(
+ contents,
+ fmt.Sprintf("%s:\n", line.prefix),
+ fmt.Sprintf("%s:\n", line.prefix),
+ )
+ continue
+ }
+ if err := n.s.Statistics(stat, line.prefix); err != nil {
+ if err == syserror.EOPNOTSUPP {
+ log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
+ } else {
+ log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
+ }
+ }
+ var values string
+ if line.prefix == "Tcp" {
+ tcp := stat.(*inet.StatSNMPTCP)
+ // "Tcp" needs special processing because MaxConn is signed. RFC 2012.
+ values = fmt.Sprintf("%s %d %s", sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:]))
+ } else {
+ values = sprintSlice(toSlice(stat))
+ }
+ contents = append(
+ contents,
+ fmt.Sprintf("%s: %s\n", line.prefix, line.header),
+ fmt.Sprintf("%s: %s\n", line.prefix, values),
+ )
+ }
+
+ data := make([]seqfile.SeqData, 0, len(snmp)*2)
+ for _, l := range contents {
+ data = append(data, seqfile.SeqData{Buf: []byte(l), Handle: (*netSnmp)(nil)})
+ }
+
+ return data, 0
+}
+
+// netRoute implements seqfile.SeqSource for /proc/net/route.
+//
+// +stateify savable
+type netRoute struct {
+ s inet.Stack
+}
+
+// NeedsUpdate implements seqfile.SeqSource.NeedsUpdate.
+func (n *netRoute) NeedsUpdate(generation int64) bool {
+ return true
+}
+
+// ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData.
+// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
+func (n *netRoute) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) {
+ if h != nil {
+ return nil, 0
+ }
+
+ interfaces := n.s.Interfaces()
+ contents := []string{"Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT"}
+ for _, rt := range n.s.RouteTable() {
+ // /proc/net/route only includes ipv4 routes.
+ if rt.Family != linux.AF_INET {
+ continue
+ }
+
+ // /proc/net/route does not include broadcast or multicast routes.
+ if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST {
+ continue
+ }
+
+ iface, ok := interfaces[rt.OutputInterface]
+ if !ok || iface.Name == "lo" {
+ continue
+ }
+
+ var (
+ gw uint32
+ prefix uint32
+ flags = linux.RTF_UP
+ )
+ if len(rt.GatewayAddr) == header.IPv4AddressSize {
+ flags |= linux.RTF_GATEWAY
+ gw = usermem.ByteOrder.Uint32(rt.GatewayAddr)
+ }
+ if len(rt.DstAddr) == header.IPv4AddressSize {
+ prefix = usermem.ByteOrder.Uint32(rt.DstAddr)
+ }
+ l := fmt.Sprintf(
+ "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
+ iface.Name,
+ prefix,
+ gw,
+ flags,
+ 0, // RefCnt.
+ 0, // Use.
+ 0, // Metric.
+ (uint32(1)<<rt.DstLen)-1,
+ 0, // MTU.
+ 0, // Window.
+ 0, // RTT.
+ )
+ contents = append(contents, l)
+ }
+
+ var data []seqfile.SeqData
+ for _, l := range contents {
+ l = fmt.Sprintf("%-127s\n", l)
+ data = append(data, seqfile.SeqData{Buf: []byte(l), Handle: (*netRoute)(nil)})
+ }
+
+ return data, 0
+}
+
// netUnix implements seqfile.SeqSource for /proc/net/unix.
//
// +stateify savable
diff --git a/pkg/sentry/inet/BUILD b/pkg/sentry/inet/BUILD
index d5284f0d9..8d60ad4ad 100644
--- a/pkg/sentry/inet/BUILD
+++ b/pkg/sentry/inet/BUILD
@@ -13,5 +13,8 @@ go_library(
"test_stack.go",
],
importpath = "gvisor.dev/gvisor/pkg/sentry/inet",
- deps = ["//pkg/sentry/context"],
+ deps = [
+ "//pkg/sentry/context",
+ "//pkg/tcpip/stack",
+ ],
)
diff --git a/pkg/sentry/inet/inet.go b/pkg/sentry/inet/inet.go
index 80f227dbe..a7dfb78a7 100644
--- a/pkg/sentry/inet/inet.go
+++ b/pkg/sentry/inet/inet.go
@@ -15,6 +15,8 @@
// Package inet defines semantics for IP stacks.
package inet
+import "gvisor.dev/gvisor/pkg/tcpip/stack"
+
// Stack represents a TCP/IP stack.
type Stack interface {
// Interfaces returns all network interfaces as a mapping from interface
@@ -58,6 +60,16 @@ type Stack interface {
// Resume restarts the network stack after restore.
Resume()
+
+ // RegisteredEndpoints returns all endpoints which are currently registered.
+ RegisteredEndpoints() []stack.TransportEndpoint
+
+ // CleanupEndpoints returns endpoints currently in the cleanup state.
+ CleanupEndpoints() []stack.TransportEndpoint
+
+ // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful
+ // for restoring a stack after a save.
+ RestoreCleanupEndpoints([]stack.TransportEndpoint)
}
// Interface contains information about a network interface.
@@ -153,3 +165,23 @@ type Route struct {
// GatewayAddr is the route gateway address (RTA_GATEWAY).
GatewayAddr []byte
}
+
+// Below SNMP metrics are from Linux/usr/include/linux/snmp.h.
+
+// StatSNMPIP describes Ip line of /proc/net/snmp.
+type StatSNMPIP [19]uint64
+
+// StatSNMPICMP describes Icmp line of /proc/net/snmp.
+type StatSNMPICMP [27]uint64
+
+// StatSNMPICMPMSG describes IcmpMsg line of /proc/net/snmp.
+type StatSNMPICMPMSG [512]uint64
+
+// StatSNMPTCP describes Tcp line of /proc/net/snmp.
+type StatSNMPTCP [15]uint64
+
+// StatSNMPUDP describes Udp line of /proc/net/snmp.
+type StatSNMPUDP [8]uint64
+
+// StatSNMPUDPLite describes UdpLite line of /proc/net/snmp.
+type StatSNMPUDPLite [8]uint64
diff --git a/pkg/sentry/inet/test_stack.go b/pkg/sentry/inet/test_stack.go
index b9eed7c3a..dcfcbd97e 100644
--- a/pkg/sentry/inet/test_stack.go
+++ b/pkg/sentry/inet/test_stack.go
@@ -14,6 +14,8 @@
package inet
+import "gvisor.dev/gvisor/pkg/tcpip/stack"
+
// TestStack is a dummy implementation of Stack for tests.
type TestStack struct {
InterfacesMap map[int32]Interface
@@ -94,5 +96,17 @@ func (s *TestStack) RouteTable() []Route {
}
// Resume implements Stack.Resume.
-func (s *TestStack) Resume() {
+func (s *TestStack) Resume() {}
+
+// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints.
+func (s *TestStack) RegisteredEndpoints() []stack.TransportEndpoint {
+ return nil
}
+
+// CleanupEndpoints implements inet.Stack.CleanupEndpoints.
+func (s *TestStack) CleanupEndpoints() []stack.TransportEndpoint {
+ return nil
+}
+
+// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
+func (s *TestStack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index cc3f43a45..11f613a11 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -81,6 +81,9 @@ type FDTable struct {
// mu protects below.
mu sync.Mutex `state:"nosave"`
+ // next is start position to find fd.
+ next int32
+
// used contains the number of non-nil entries. It must be accessed
// atomically. It may be read atomically without holding mu (but not
// written).
@@ -226,6 +229,11 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags
f.mu.Lock()
defer f.mu.Unlock()
+ // From f.next to find available fd.
+ if fd < f.next {
+ fd = f.next
+ }
+
// Install all entries.
for i := fd; i < end && len(fds) < len(files); i++ {
if d, _, _ := f.get(i); d == nil {
@@ -242,6 +250,11 @@ func (f *FDTable) NewFDs(ctx context.Context, fd int32, files []*fs.File, flags
return nil, syscall.EMFILE
}
+ if fd == f.next {
+ // Update next search start position.
+ f.next = fds[len(fds)-1] + 1
+ }
+
return fds, nil
}
@@ -361,6 +374,11 @@ func (f *FDTable) Remove(fd int32) *fs.File {
f.mu.Lock()
defer f.mu.Unlock()
+ // Update current available position.
+ if fd < f.next {
+ f.next = fd
+ }
+
orig, _, _ := f.get(fd)
if orig != nil {
orig.IncRef() // Reference for caller.
@@ -377,6 +395,10 @@ func (f *FDTable) RemoveIf(cond func(*fs.File, FDFlags) bool) {
f.forEach(func(fd int32, file *fs.File, flags FDFlags) {
if cond(file, flags) {
f.set(fd, nil, FDFlags{}) // Clear from table.
+ // Update current available position.
+ if fd < f.next {
+ f.next = fd
+ }
}
})
}
diff --git a/pkg/sentry/kernel/fd_table_test.go b/pkg/sentry/kernel/fd_table_test.go
index 2413788e7..2bcb6216a 100644
--- a/pkg/sentry/kernel/fd_table_test.go
+++ b/pkg/sentry/kernel/fd_table_test.go
@@ -70,6 +70,42 @@ func TestFDTableMany(t *testing.T) {
if err := fdTable.NewFDAt(ctx, 1, file, FDFlags{}); err != nil {
t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
}
+
+ i := int32(2)
+ fdTable.Remove(i)
+ if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != i {
+ t.Fatalf("Allocated %v FDs but wanted to allocate %v: %v", i, maxFD, err)
+ }
+ })
+}
+
+func TestFDTableOverLimit(t *testing.T) {
+ runTest(t, func(ctx context.Context, fdTable *FDTable, file *fs.File, _ *limits.LimitSet) {
+ if _, err := fdTable.NewFDs(ctx, maxFD, []*fs.File{file}, FDFlags{}); err == nil {
+ t.Fatalf("fdTable.NewFDs(maxFD, f): got nil, wanted error")
+ }
+
+ if _, err := fdTable.NewFDs(ctx, maxFD-2, []*fs.File{file, file, file}, FDFlags{}); err == nil {
+ t.Fatalf("fdTable.NewFDs(maxFD-2, {f,f,f}): got nil, wanted error")
+ }
+
+ if fds, err := fdTable.NewFDs(ctx, maxFD-3, []*fs.File{file, file, file}, FDFlags{}); err != nil {
+ t.Fatalf("fdTable.NewFDs(maxFD-3, {f,f,f}): got %v, wanted nil", err)
+ } else {
+ for _, fd := range fds {
+ fdTable.Remove(fd)
+ }
+ }
+
+ if fds, err := fdTable.NewFDs(ctx, maxFD-1, []*fs.File{file}, FDFlags{}); err != nil || fds[0] != maxFD-1 {
+ t.Fatalf("fdTable.NewFDAt(1, r, FDFlags{}): got %v, wanted nil", err)
+ }
+
+ if fds, err := fdTable.NewFDs(ctx, 0, []*fs.File{file}, FDFlags{}); err != nil {
+ t.Fatalf("Adding an FD to a resized map: got %v, want nil", err)
+ } else if len(fds) != 1 || fds[0] != 0 {
+ t.Fatalf("Added an FD to a resized map: got %v, want {1}", fds)
+ }
})
}
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 3cda03891..e64d648e2 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -804,8 +804,21 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
// Create a fresh task context.
remainingTraversals = uint(args.MaxSymlinkTraversals)
+ loadArgs := loader.LoadArgs{
+ Mounts: mounts,
+ Root: root,
+ WorkingDirectory: wd,
+ RemainingTraversals: &remainingTraversals,
+ ResolveFinal: true,
+ Filename: args.Filename,
+ File: args.File,
+ CloseOnExec: false,
+ Argv: args.Argv,
+ Envv: args.Envv,
+ Features: k.featureSet,
+ }
- tc, se := k.LoadTaskImage(ctx, mounts, root, wd, &remainingTraversals, args.Filename, args.File, args.Argv, args.Envv, k.featureSet)
+ tc, se := k.LoadTaskImage(ctx, loadArgs)
if se != nil {
return nil, 0, errors.New(se.String())
}
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index c82ef5486..9be3dae3c 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -709,9 +709,9 @@ func (t *Task) FDTable() *FDTable {
return t.fdTable
}
-// GetFile is a convenience wrapper t.FDTable().GetFile.
+// GetFile is a convenience wrapper for t.FDTable().Get.
//
-// Precondition: same as FDTable.
+// Precondition: same as FDTable.Get.
func (t *Task) GetFile(fd int32) *fs.File {
f, _ := t.fdTable.Get(fd)
return f
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index 8639d379f..bb5560acf 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -18,10 +18,8 @@ import (
"fmt"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/cpuid"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/context"
- "gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/loader"
"gvisor.dev/gvisor/pkg/sentry/mm"
@@ -132,30 +130,21 @@ func (t *Task) Stack() *arch.Stack {
return &arch.Stack{t.Arch(), t.MemoryManager(), usermem.Addr(t.Arch().Stack())}
}
-// LoadTaskImage loads filename into a new TaskContext.
+// LoadTaskImage loads a specified file into a new TaskContext.
//
-// It takes several arguments:
-// * mounts: MountNamespace to lookup filename in
-// * root: Root to lookup filename under
-// * wd: Working directory to lookup filename under
-// * maxTraversals: maximum number of symlinks to follow
-// * filename: path to binary to load
-// * file: an open fs.File object of the binary to load. If set,
-// file will be loaded and not filename.
-// * argv: Binary argv
-// * envv: Binary envv
-// * fs: Binary FeatureSet
-func (k *Kernel) LoadTaskImage(ctx context.Context, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, filename string, file *fs.File, argv, envv []string, fs *cpuid.FeatureSet) (*TaskContext, *syserr.Error) {
- // If File is not nil, we should load that instead of resolving filename.
- if file != nil {
- filename = file.MappedName(ctx)
+// args.MemoryManager does not need to be set by the caller.
+func (k *Kernel) LoadTaskImage(ctx context.Context, args loader.LoadArgs) (*TaskContext, *syserr.Error) {
+ // If File is not nil, we should load that instead of resolving Filename.
+ if args.File != nil {
+ args.Filename = args.File.MappedName(ctx)
}
// Prepare a new user address space to load into.
m := mm.NewMemoryManager(k, k)
defer m.DecUsers(ctx)
+ args.MemoryManager = m
- os, ac, name, err := loader.Load(ctx, m, mounts, root, wd, maxTraversals, fs, filename, file, argv, envv, k.extraAuxv, k.vdso)
+ os, ac, name, err := loader.Load(ctx, args, k.extraAuxv, k.vdso)
if err != nil {
return nil, err
}
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
index 2d9251e92..c2c3ec06e 100644
--- a/pkg/sentry/loader/elf.go
+++ b/pkg/sentry/loader/elf.go
@@ -624,15 +624,15 @@ func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, in
return loadParsedELF(ctx, m, f, info, 0)
}
-// loadELF loads f into the Task address space.
+// loadELF loads args.File into the Task address space.
//
// If loadELF returns ErrSwitchFile it should be called again with the returned
// path and argv.
//
// Preconditions:
-// * f is an ELF file
-func loadELF(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, fs *cpuid.FeatureSet, f *fs.File) (loadedELF, arch.Context, error) {
- bin, ac, err := loadInitialELF(ctx, m, fs, f)
+// * args.File is an ELF file
+func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error) {
+ bin, ac, err := loadInitialELF(ctx, args.MemoryManager, args.Features, args.File)
if err != nil {
ctx.Infof("Error loading binary: %v", err)
return loadedELF{}, nil, err
@@ -640,7 +640,14 @@ func loadELF(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace
var interp loadedELF
if bin.interpreter != "" {
- d, i, err := openPath(ctx, mounts, root, wd, maxTraversals, bin.interpreter)
+ // Even if we do not allow the final link of the script to be
+ // resolved, the interpreter should still be resolved if it is
+ // a symlink.
+ args.ResolveFinal = true
+ // Refresh the traversal limit.
+ *args.RemainingTraversals = linux.MaxSymlinkTraversals
+ args.Filename = bin.interpreter
+ d, i, err := openPath(ctx, args)
if err != nil {
ctx.Infof("Error opening interpreter %s: %v", bin.interpreter, err)
return loadedELF{}, nil, err
@@ -649,7 +656,7 @@ func loadELF(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace
// We don't need the Dirent.
d.DecRef()
- interp, err = loadInterpreterELF(ctx, m, i, bin)
+ interp, err = loadInterpreterELF(ctx, args.MemoryManager, i, bin)
if err != nil {
ctx.Infof("Error loading interpreter: %v", err)
return loadedELF{}, nil, err
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
index 089d1635b..b03eeb005 100644
--- a/pkg/sentry/loader/loader.go
+++ b/pkg/sentry/loader/loader.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Package loader loads a binary into a MemoryManager.
+// Package loader loads an executable file into a MemoryManager.
package loader
import (
@@ -20,6 +20,7 @@ import (
"fmt"
"io"
"path"
+ "strings"
"gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/abi/linux"
@@ -35,6 +36,54 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
+// LoadArgs holds specifications for an executable file to be loaded.
+type LoadArgs struct {
+ // MemoryManager is the memory manager to load the executable into.
+ MemoryManager *mm.MemoryManager
+
+ // Mounts is the mount namespace in which to look up Filename.
+ Mounts *fs.MountNamespace
+
+ // Root is the root directory under which to look up Filename.
+ Root *fs.Dirent
+
+ // WorkingDirectory is the working directory under which to look up
+ // Filename.
+ WorkingDirectory *fs.Dirent
+
+ // RemainingTraversals is the maximum number of symlinks to follow to
+ // resolve Filename. This counter is passed by reference to keep it
+ // updated throughout the call stack.
+ RemainingTraversals *uint
+
+ // ResolveFinal indicates whether the final link of Filename should be
+ // resolved, if it is a symlink.
+ ResolveFinal bool
+
+ // Filename is the path for the executable.
+ Filename string
+
+ // File is an open fs.File object of the executable. If File is not
+ // nil, then File will be loaded and Filename will be ignored.
+ File *fs.File
+
+ // CloseOnExec indicates that the executable (or one of its parent
+ // directories) was opened with O_CLOEXEC. If the executable is an
+ // interpreter script, then cause an ENOENT error to occur, since the
+ // script would otherwise be inaccessible to the interpreter.
+ CloseOnExec bool
+
+ // Argv is the vector of arguments to pass to the executable.
+ Argv []string
+
+ // Envv is the vector of environment variables to pass to the
+ // executable.
+ Envv []string
+
+ // Features specifies the CPU feature set for the executable.
+ Features *cpuid.FeatureSet
+}
+
// readFull behaves like io.ReadFull for an *fs.File.
func readFull(ctx context.Context, f *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
var total int64
@@ -51,80 +100,82 @@ func readFull(ctx context.Context, f *fs.File, dst usermem.IOSequence, offset in
return total, nil
}
-// openPath opens name for loading.
+// openPath opens args.Filename and checks that it is valid for loading.
//
-// openPath returns the fs.Dirent and an *fs.File for name, which is not
+// openPath returns an *fs.Dirent and *fs.File for args.Filename, which is not
// installed in the Task FDTable. The caller takes ownership of both.
//
-// name must be a readable, executable, regular file.
-func openPath(ctx context.Context, mm *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, name string) (*fs.Dirent, *fs.File, error) {
- if name == "" {
+// args.Filename must be a readable, executable, regular file.
+func openPath(ctx context.Context, args LoadArgs) (*fs.Dirent, *fs.File, error) {
+ if args.Filename == "" {
ctx.Infof("cannot open empty name")
return nil, nil, syserror.ENOENT
}
- d, err := mm.FindInode(ctx, root, wd, name, maxTraversals)
+ var d *fs.Dirent
+ var err error
+ if args.ResolveFinal {
+ d, err = args.Mounts.FindInode(ctx, args.Root, args.WorkingDirectory, args.Filename, args.RemainingTraversals)
+ } else {
+ d, err = args.Mounts.FindLink(ctx, args.Root, args.WorkingDirectory, args.Filename, args.RemainingTraversals)
+ }
if err != nil {
return nil, nil, err
}
-
- // Open file will take a reference to Dirent, so destroy this one.
+ // Defer a DecRef for the sake of failure cases.
defer d.DecRef()
- return openFile(ctx, nil, d, name)
-}
+ if !args.ResolveFinal && fs.IsSymlink(d.Inode.StableAttr) {
+ return nil, nil, syserror.ELOOP
+ }
-// openFile performs checks on a file to be executed. If provided a *fs.File,
-// openFile takes that file's Dirent and performs checks on it. If provided a
-// *fs.Dirent and not a *fs.File, it creates a *fs.File object from the Dirent's
-// Inode and performs checks on that.
-//
-// openFile returns an *fs.File and *fs.Dirent, and the caller takes ownership
-// of both.
-//
-// "dirent" and "file" must not both be nil and point to a readable, executable, regular file.
-func openFile(ctx context.Context, file *fs.File, dirent *fs.Dirent, name string) (*fs.Dirent, *fs.File, error) {
- // file and dirent must not be nil.
- if dirent == nil && file == nil {
- ctx.Infof("dirent and file cannot both be nil.")
- return nil, nil, syserror.ENOENT
+ if err := checkPermission(ctx, d); err != nil {
+ return nil, nil, err
}
- if file != nil {
- dirent = file.Dirent
+ // If they claim it's a directory, then make sure.
+ //
+ // N.B. we reject directories below, but we must first reject
+ // non-directories passed as directories.
+ if strings.HasSuffix(args.Filename, "/") && !fs.IsDir(d.Inode.StableAttr) {
+ return nil, nil, syserror.ENOTDIR
}
- // Perform permissions checks on the file.
- if err := checkFile(ctx, dirent, name); err != nil {
+ if err := checkIsRegularFile(ctx, d, args.Filename); err != nil {
return nil, nil, err
}
- if file == nil {
- var ferr error
- if file, ferr = dirent.Inode.GetFile(ctx, dirent, fs.FileFlags{Read: true}); ferr != nil {
- return nil, nil, ferr
- }
- } else {
- // GetFile takes a reference to the created file, so make one in the case
- // that the file reference already existed.
- file.IncRef()
+ f, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true})
+ if err != nil {
+ return nil, nil, err
+ }
+ // Defer a DecRef for the sake of failure cases.
+ defer f.DecRef()
+
+ if err := checkPread(ctx, f, args.Filename); err != nil {
+ return nil, nil, err
+ }
+
+ d.IncRef()
+ f.IncRef()
+ return d, f, err
+}
+
+// checkFile performs checks on a file to be executed.
+func checkFile(ctx context.Context, f *fs.File, filename string) error {
+ if err := checkPermission(ctx, f.Dirent); err != nil {
+ return err
}
- // We must be able to read at arbitrary offsets.
- if !file.Flags().Pread {
- file.DecRef()
- ctx.Infof("%s cannot be read at an offset: %+v", file.MappedName(ctx), file.Flags())
- return nil, nil, syserror.EACCES
+ if err := checkIsRegularFile(ctx, f.Dirent, filename); err != nil {
+ return err
}
- // Grab reference for caller.
- dirent.IncRef()
- return dirent, file, nil
+ return checkPread(ctx, f, filename)
}
-// checkFile performs file permissions checks for binaries called in openPath
-// and openFile
-func checkFile(ctx context.Context, d *fs.Dirent, name string) error {
+// checkPermission checks whether the file is readable and executable.
+func checkPermission(ctx context.Context, d *fs.Dirent) error {
perms := fs.PermMask{
// TODO(gvisor.dev/issue/160): Linux requires only execute
// permission, not read. However, our backing filesystems may
@@ -135,26 +186,26 @@ func checkFile(ctx context.Context, d *fs.Dirent, name string) error {
Read: true,
Execute: true,
}
- if err := d.Inode.CheckPermission(ctx, perms); err != nil {
- return err
- }
+ return d.Inode.CheckPermission(ctx, perms)
+}
- // If they claim it's a directory, then make sure.
- //
- // N.B. we reject directories below, but we must first reject
- // non-directories passed as directories.
- if len(name) > 0 && name[len(name)-1] == '/' && !fs.IsDir(d.Inode.StableAttr) {
- return syserror.ENOTDIR
+// checkIsRegularFile prevents us from trying to execute a directory, pipe, etc.
+func checkIsRegularFile(ctx context.Context, d *fs.Dirent, filename string) error {
+ attr := d.Inode.StableAttr
+ if !fs.IsRegular(attr) {
+ ctx.Infof("%s is not regular: %v", filename, attr)
+ return syserror.EACCES
}
+ return nil
+}
- // No exec-ing directories, pipes, etc!
- if !fs.IsRegular(d.Inode.StableAttr) {
- ctx.Infof("%s is not regular: %v", name, d.Inode.StableAttr)
+// checkPread checks whether we can read the file at arbitrary offsets.
+func checkPread(ctx context.Context, f *fs.File, filename string) error {
+ if !f.Flags().Pread {
+ ctx.Infof("%s cannot be read at an offset: %+v", filename, f.Flags())
return syserror.EACCES
}
-
return nil
-
}
// allocStack allocates and maps a stack in to any available part of the address space.
@@ -173,45 +224,49 @@ const (
maxLoaderAttempts = 6
)
-// loadBinary loads a binary that is pointed to by "file". If nil, the path
-// "filename" is resolved and loaded.
+// loadExecutable loads an executable that is pointed to by args.File. If nil,
+// the path args.Filename is resolved and loaded. If the executable is an
+// interpreter script rather than an ELF, the binary of the corresponding
+// interpreter will be loaded.
//
// It returns:
// * loadedELF, description of the loaded binary
// * arch.Context matching the binary arch
// * fs.Dirent of the binary file
-// * Possibly updated argv
-func loadBinary(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, root, wd *fs.Dirent, remainingTraversals *uint, features *cpuid.FeatureSet, filename string, passedFile *fs.File, argv []string) (loadedELF, arch.Context, *fs.Dirent, []string, error) {
+// * Possibly updated args.Argv
+func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, *fs.Dirent, []string, error) {
for i := 0; i < maxLoaderAttempts; i++ {
var (
d *fs.Dirent
- f *fs.File
err error
)
- if passedFile == nil {
- d, f, err = openPath(ctx, mounts, root, wd, remainingTraversals, filename)
-
+ if args.File == nil {
+ d, args.File, err = openPath(ctx, args)
+ // We will return d in the successful case, but defer a DecRef for the
+ // sake of intermediate loops and failure cases.
+ if d != nil {
+ defer d.DecRef()
+ }
+ if args.File != nil {
+ defer args.File.DecRef()
+ }
} else {
- d, f, err = openFile(ctx, passedFile, nil, "")
- // Set to nil in case we loop on a Interpreter Script.
- passedFile = nil
+ d = args.File.Dirent
+ d.IncRef()
+ defer d.DecRef()
+ err = checkFile(ctx, args.File, args.Filename)
}
-
if err != nil {
- ctx.Infof("Error opening %s: %v", filename, err)
+ ctx.Infof("Error opening %s: %v", args.Filename, err)
return loadedELF{}, nil, nil, nil, err
}
- defer f.DecRef()
- // We will return d in the successful case, but defer a DecRef
- // for intermediate loops and failure cases.
- defer d.DecRef()
// Check the header. Is this an ELF or interpreter script?
var hdr [4]uint8
// N.B. We assume that reading from a regular file cannot block.
- _, err = readFull(ctx, f, usermem.BytesIOSequence(hdr[:]), 0)
- // Allow unexpected EOF, as a valid executable could be only three
- // bytes (e.g., #!a).
+ _, err = readFull(ctx, args.File, usermem.BytesIOSequence(hdr[:]), 0)
+ // Allow unexpected EOF, as a valid executable could be only three bytes
+ // (e.g., #!a).
if err != nil && err != io.ErrUnexpectedEOF {
if err == io.EOF {
err = syserror.ENOEXEC
@@ -221,33 +276,38 @@ func loadBinary(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamesp
switch {
case bytes.Equal(hdr[:], []byte(elfMagic)):
- loaded, ac, err := loadELF(ctx, m, mounts, root, wd, remainingTraversals, features, f)
+ loaded, ac, err := loadELF(ctx, args)
if err != nil {
ctx.Infof("Error loading ELF: %v", err)
return loadedELF{}, nil, nil, nil, err
}
// An ELF is always terminal. Hold on to d.
d.IncRef()
- return loaded, ac, d, argv, err
+ return loaded, ac, d, args.Argv, err
case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)):
- newpath, newargv, err := parseInterpreterScript(ctx, filename, f, argv)
+ if args.CloseOnExec {
+ return loadedELF{}, nil, nil, nil, syserror.ENOENT
+ }
+ args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv)
if err != nil {
ctx.Infof("Error loading interpreter script: %v", err)
return loadedELF{}, nil, nil, nil, err
}
- filename = newpath
- argv = newargv
+ // Refresh the traversal limit for the interpreter.
+ *args.RemainingTraversals = linux.MaxSymlinkTraversals
default:
ctx.Infof("Unknown magic: %v", hdr)
return loadedELF{}, nil, nil, nil, syserror.ENOEXEC
}
+ // Set to nil in case we loop on a Interpreter Script.
+ args.File = nil
}
return loadedELF{}, nil, nil, nil, syserror.ELOOP
}
-// Load loads "file" into a MemoryManager. If file is nil, the path "filename"
-// is resolved and loaded instead.
+// Load loads args.File into a MemoryManager. If args.File is nil, the path
+// args.Filename is resolved and loaded instead.
//
// If Load returns ErrSwitchFile it should be called again with the returned
// path and argv.
@@ -255,37 +315,37 @@ func loadBinary(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamesp
// Preconditions:
// * The Task MemoryManager is empty.
// * Load is called on the Task goroutine.
-func Load(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals *uint, fs *cpuid.FeatureSet, filename string, file *fs.File, argv, envv []string, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, *syserr.Error) {
- // Load the binary itself.
- loaded, ac, d, argv, err := loadBinary(ctx, m, mounts, root, wd, maxTraversals, fs, filename, file, argv)
+func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, *syserr.Error) {
+ // Load the executable itself.
+ loaded, ac, d, newArgv, err := loadExecutable(ctx, args)
if err != nil {
- return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to load %s: %v", filename, err), syserr.FromError(err).ToLinux())
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
}
defer d.DecRef()
// Load the VDSO.
- vdsoAddr, err := loadVDSO(ctx, m, vdso, loaded)
+ vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded)
if err != nil {
return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Error loading VDSO: %v", err), syserr.FromError(err).ToLinux())
}
// Setup the heap. brk starts at the next page after the end of the
- // binary. Userspace can assume that the remainer of the page after
+ // executable. Userspace can assume that the remainer of the page after
// loaded.end is available for its use.
e, ok := loaded.end.RoundUp()
if !ok {
return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("brk overflows: %#x", loaded.end), linux.ENOEXEC)
}
- m.BrkSetup(ctx, e)
+ args.MemoryManager.BrkSetup(ctx, e)
// Allocate our stack.
- stack, err := allocStack(ctx, m, ac)
+ stack, err := allocStack(ctx, args.MemoryManager, ac)
if err != nil {
return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to allocate stack: %v", err), syserr.FromError(err).ToLinux())
}
// Push the original filename to the stack, for AT_EXECFN.
- execfn, err := stack.Push(filename)
+ execfn, err := stack.Push(args.Filename)
if err != nil {
return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to push exec filename: %v", err), syserr.FromError(err).ToLinux())
}
@@ -319,11 +379,12 @@ func Load(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, r
}...)
auxv = append(auxv, extraAuxv...)
- sl, err := stack.Load(argv, envv, auxv)
+ sl, err := stack.Load(newArgv, args.Envv, auxv)
if err != nil {
return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to load stack: %v", err), syserr.FromError(err).ToLinux())
}
+ m := args.MemoryManager
m.SetArgvStart(sl.ArgvStart)
m.SetArgvEnd(sl.ArgvEnd)
m.SetEnvvStart(sl.EnvvStart)
@@ -334,7 +395,7 @@ func Load(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, r
ac.SetIP(uintptr(loaded.entry))
ac.SetStack(uintptr(stack.Bottom))
- name := path.Base(filename)
+ name := path.Base(args.Filename)
if len(name) > linux.TASK_COMM_LEN-1 {
name = name[:linux.TASK_COMM_LEN-1]
}
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 31fa48ec5..6803d488c 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -23,6 +23,7 @@ go_library(
"machine.go",
"machine_amd64.go",
"machine_amd64_unsafe.go",
+ "machine_arm64.go",
"machine_unsafe.go",
"physical_map.go",
"virtual_map.go",
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index acd41f73d..ea8b9632e 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -127,7 +127,7 @@ func (as *addressSpace) mapHost(addr usermem.Addr, m hostMapEntry, at usermem.Ac
// not have physical mappings, the KVM module may inject
// spurious exceptions when emulation fails (i.e. it tries to
// emulate because the RIP is pointed at those pages).
- as.machine.mapPhysical(physical, length)
+ as.machine.mapPhysical(physical, length, physicalRegions, _KVM_MEM_FLAGS_NONE)
// Install the page table mappings. Note that the ordering is
// important; if the pagetable mappings were installed before
diff --git a/pkg/sentry/platform/kvm/allocator.go b/pkg/sentry/platform/kvm/allocator.go
index 80942e9c9..3f35414bb 100644
--- a/pkg/sentry/platform/kvm/allocator.go
+++ b/pkg/sentry/platform/kvm/allocator.go
@@ -54,7 +54,7 @@ func (a allocator) PhysicalFor(ptes *pagetables.PTEs) uintptr {
//
//go:nosplit
func (a allocator) LookupPTEs(physical uintptr) *pagetables.PTEs {
- virtualStart, physicalStart, _, ok := calculateBluepillFault(physical)
+ virtualStart, physicalStart, _, ok := calculateBluepillFault(physical, physicalRegions)
if !ok {
panic(fmt.Sprintf("LookupPTEs failed for 0x%x", physical))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go
index b97476053..f6459cda9 100644
--- a/pkg/sentry/platform/kvm/bluepill_fault.go
+++ b/pkg/sentry/platform/kvm/bluepill_fault.go
@@ -46,9 +46,9 @@ func yield() {
// calculateBluepillFault calculates the fault address range.
//
//go:nosplit
-func calculateBluepillFault(physical uintptr) (virtualStart, physicalStart, length uintptr, ok bool) {
+func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virtualStart, physicalStart, length uintptr, ok bool) {
alignedPhysical := physical &^ uintptr(usermem.PageSize-1)
- for _, pr := range physicalRegions {
+ for _, pr := range phyRegions {
end := pr.physical + pr.length
if physical < pr.physical || physical >= end {
continue
@@ -77,12 +77,12 @@ func calculateBluepillFault(physical uintptr) (virtualStart, physicalStart, leng
// The corresponding virtual address is returned. This may throw on error.
//
//go:nosplit
-func handleBluepillFault(m *machine, physical uintptr) (uintptr, bool) {
+func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegion, flags uint32) (uintptr, bool) {
// Paging fault: we need to map the underlying physical pages for this
// fault. This all has to be done in this function because we're in a
// signal handler context. (We can't call any functions that might
// split the stack.)
- virtualStart, physicalStart, length, ok := calculateBluepillFault(physical)
+ virtualStart, physicalStart, length, ok := calculateBluepillFault(physical, phyRegions)
if !ok {
return 0, false
}
@@ -96,7 +96,7 @@ func handleBluepillFault(m *machine, physical uintptr) (uintptr, bool) {
yield() // Race with another call.
slot = atomic.SwapUint32(&m.nextSlot, ^uint32(0))
}
- errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart)
+ errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart, flags)
if errno == 0 {
// Successfully added region; we can increment nextSlot and
// allow another set to proceed here.
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 7e8e9f42a..3734bfb7a 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.14
+// +build !go1.15
// Check go:linkname function signatures when updating Go version.
@@ -162,7 +162,7 @@ func bluepillHandler(context unsafe.Pointer) {
// For MMIO, the physical address is the first data item.
physical := uintptr(c.runData.data[0])
- virtual, ok := handleBluepillFault(c.machine, physical)
+ virtual, ok := handleBluepillFault(c.machine, physical, physicalRegions, _KVM_MEM_FLAGS_NONE)
if !ok {
c.die(bluepillArchContext(context), "invalid physical address")
return
diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go
index d05f05c29..766131d60 100644
--- a/pkg/sentry/platform/kvm/kvm_const.go
+++ b/pkg/sentry/platform/kvm/kvm_const.go
@@ -62,3 +62,10 @@ const (
_KVM_NR_INTERRUPTS = 0x100
_KVM_NR_CPUID_ENTRIES = 0x100
)
+
+// KVM kvm_memory_region::flags.
+const (
+ _KVM_MEM_LOG_DIRTY_PAGES = uint32(1) << 0
+ _KVM_MEM_READONLY = uint32(1) << 1
+ _KVM_MEM_FLAGS_NONE = 0
+)
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index cc6c138b2..7d02ebf19 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -215,6 +215,17 @@ func newMachine(vm int) (*machine, error) {
return true // Keep iterating.
})
+ var physicalRegionsReadOnly []physicalRegion
+ var physicalRegionsAvailable []physicalRegion
+
+ physicalRegionsReadOnly = rdonlyRegionsForSetMem()
+ physicalRegionsAvailable = availableRegionsForSetMem()
+
+ // Map all read-only regions.
+ for _, r := range physicalRegionsReadOnly {
+ m.mapPhysical(r.physical, r.length, physicalRegionsReadOnly, _KVM_MEM_READONLY)
+ }
+
// Ensure that the currently mapped virtual regions are actually
// available in the VM. Note that this doesn't guarantee no future
// faults, however it should guarantee that everything is available to
@@ -223,6 +234,13 @@ func newMachine(vm int) (*machine, error) {
if excludeVirtualRegion(vr) {
return // skip region.
}
+
+ for _, r := range physicalRegionsReadOnly {
+ if vr.virtual == r.virtual {
+ return
+ }
+ }
+
for virtual := vr.virtual; virtual < vr.virtual+vr.length; {
physical, length, ok := translateToPhysical(virtual)
if !ok {
@@ -236,7 +254,7 @@ func newMachine(vm int) (*machine, error) {
}
// Ensure the physical range is mapped.
- m.mapPhysical(physical, length)
+ m.mapPhysical(physical, length, physicalRegionsAvailable, _KVM_MEM_FLAGS_NONE)
virtual += length
}
})
@@ -256,9 +274,9 @@ func newMachine(vm int) (*machine, error) {
// not available. This attempts to be efficient for calls in the hot path.
//
// This panics on error.
-func (m *machine) mapPhysical(physical, length uintptr) {
+func (m *machine) mapPhysical(physical, length uintptr, phyRegions []physicalRegion, flags uint32) {
for end := physical + length; physical < end; {
- _, physicalStart, length, ok := calculateBluepillFault(physical)
+ _, physicalStart, length, ok := calculateBluepillFault(physical, phyRegions)
if !ok {
// Should never happen.
panic("mapPhysical on unknown physical address")
@@ -266,7 +284,7 @@ func (m *machine) mapPhysical(physical, length uintptr) {
if _, ok := m.mappingCache.LoadOrStore(physicalStart, true); !ok {
// Not present in the cache; requires setting the slot.
- if _, ok := handleBluepillFault(m, physical); !ok {
+ if _, ok := handleBluepillFault(m, physical, phyRegions, flags); !ok {
panic("handleBluepillFault failed")
}
}
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index c1cbe33be..b99fe425e 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -355,3 +355,13 @@ func (m *machine) retryInGuest(fn func()) {
}
}
}
+
+// On x86 platform, the flags for "setMemoryRegion" can always be set as 0.
+// There is no need to return read-only physicalRegions.
+func rdonlyRegionsForSetMem() (phyRegions []physicalRegion) {
+ return nil
+}
+
+func availableRegionsForSetMem() (phyRegions []physicalRegion) {
+ return physicalRegions
+}
diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
index 506ec9af1..61227cafb 100644
--- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
@@ -26,30 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/time"
)
-// setMemoryRegion initializes a region.
-//
-// This may be called from bluepillHandler, and therefore returns an errno
-// directly (instead of wrapping in an error) to avoid allocations.
-//
-//go:nosplit
-func (m *machine) setMemoryRegion(slot int, physical, length, virtual uintptr) syscall.Errno {
- userRegion := userMemoryRegion{
- slot: uint32(slot),
- flags: 0,
- guestPhysAddr: uint64(physical),
- memorySize: uint64(length),
- userspaceAddr: uint64(virtual),
- }
-
- // Set the region.
- _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(m.fd),
- _KVM_SET_USER_MEMORY_REGION,
- uintptr(unsafe.Pointer(&userRegion)))
- return errno
-}
-
// loadSegments copies the current segments.
//
// This may be called from within the signal context and throws on error.
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
new file mode 100644
index 000000000..b7e2cfb9d
--- /dev/null
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -0,0 +1,61 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kvm
+
+// Get all read-only physicalRegions.
+func rdonlyRegionsForSetMem() (phyRegions []physicalRegion) {
+ var rdonlyRegions []region
+
+ applyVirtualRegions(func(vr virtualRegion) {
+ if excludeVirtualRegion(vr) {
+ return
+ }
+
+ if !vr.accessType.Write && vr.accessType.Read {
+ rdonlyRegions = append(rdonlyRegions, vr.region)
+ }
+ })
+
+ for _, r := range rdonlyRegions {
+ physical, _, ok := translateToPhysical(r.virtual)
+ if !ok {
+ continue
+ }
+
+ phyRegions = append(phyRegions, physicalRegion{
+ region: region{
+ virtual: r.virtual,
+ length: r.length,
+ },
+ physical: physical,
+ })
+ }
+
+ return phyRegions
+}
+
+// Get all available physicalRegions.
+func availableRegionsForSetMem() (phyRegions []physicalRegion) {
+ var excludeRegions []region
+ applyVirtualRegions(func(vr virtualRegion) {
+ if !vr.accessType.Write {
+ excludeRegions = append(excludeRegions, vr.region)
+ }
+ })
+
+ phyRegions = computePhysicalRegions(excludeRegions)
+
+ return phyRegions
+}
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index 405e00292..ed9433311 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.14
+// +build !go1.15
// Check go:linkname function signatures when updating Go version.
@@ -35,6 +35,30 @@ func entersyscall()
//go:linkname exitsyscall runtime.exitsyscall
func exitsyscall()
+// setMemoryRegion initializes a region.
+//
+// This may be called from bluepillHandler, and therefore returns an errno
+// directly (instead of wrapping in an error) to avoid allocations.
+//
+//go:nosplit
+func (m *machine) setMemoryRegion(slot int, physical, length, virtual uintptr, flags uint32) syscall.Errno {
+ userRegion := userMemoryRegion{
+ slot: uint32(slot),
+ flags: uint32(flags),
+ guestPhysAddr: uint64(physical),
+ memorySize: uint64(length),
+ userspaceAddr: uint64(virtual),
+ }
+
+ // Set the region.
+ _, _, errno := syscall.RawSyscall(
+ syscall.SYS_IOCTL,
+ uintptr(m.fd),
+ _KVM_SET_USER_MEMORY_REGION,
+ uintptr(unsafe.Pointer(&userRegion)))
+ return errno
+}
+
// mapRunData maps the vCPU run data.
func mapRunData(fd int) (*runData, error) {
r, _, errno := syscall.RawSyscall6(
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index b699b057d..ddb1f41e3 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -335,7 +335,8 @@ func (t *thread) unexpectedStubExit() {
// these cases, we don't need to panic. There is no reasons to
// think that something wrong in gVisor.
log.Warningf("The ptrace stub process %v has been killed by SIGKILL.", t.tgid)
- syscall.Kill(os.Getpid(), syscall.SIGKILL)
+ pid := os.Getpid()
+ syscall.Tgkill(pid, pid, syscall.Signal(syscall.SIGKILL))
}
t.dumpAndPanic(fmt.Sprintf("wait failed: the process %d:%d exited: %x (err %v)", t.tgid, t.tid, msg, err))
}
diff --git a/pkg/sentry/platform/ptrace/subprocess_unsafe.go b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
index b80a3604d..2ae6b9f9d 100644
--- a/pkg/sentry/platform/ptrace/subprocess_unsafe.go
+++ b/pkg/sentry/platform/ptrace/subprocess_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.14
+// +build !go1.15
// Check go:linkname function signatures when updating Go version.
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index 4d174dda4..8b66a719d 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -32,6 +32,7 @@ go_library(
"//pkg/sentry/usermem",
"//pkg/syserr",
"//pkg/syserror",
+ "//pkg/tcpip/stack",
"//pkg/waiter",
],
)
diff --git a/pkg/sentry/socket/hostinet/stack.go b/pkg/sentry/socket/hostinet/stack.go
index 3a4fdec47..e67b46c9e 100644
--- a/pkg/sentry/socket/hostinet/stack.go
+++ b/pkg/sentry/socket/hostinet/stack.go
@@ -16,8 +16,11 @@ package hostinet
import (
"fmt"
+ "io"
"io/ioutil"
"os"
+ "reflect"
+ "strconv"
"strings"
"syscall"
@@ -26,7 +29,9 @@ import (
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/syserr"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
)
var defaultRecvBufSize = inet.TCPBufferSize{
@@ -51,6 +56,8 @@ type Stack struct {
tcpRecvBufSize inet.TCPBufferSize
tcpSendBufSize inet.TCPBufferSize
tcpSACKEnabled bool
+ netDevFile *os.File
+ netSNMPFile *os.File
}
// NewStack returns an empty Stack containing no configuration.
@@ -98,6 +105,18 @@ func (s *Stack) Configure() error {
log.Warningf("Failed to read if TCP SACK if enabled, setting to true")
}
+ if f, err := os.Open("/proc/net/dev"); err != nil {
+ log.Warningf("Failed to open /proc/net/dev: %v", err)
+ } else {
+ s.netDevFile = f
+ }
+
+ if f, err := os.Open("/proc/net/snmp"); err != nil {
+ log.Warningf("Failed to open /proc/net/snmp: %v", err)
+ } else {
+ s.netSNMPFile = f
+ }
+
return nil
}
@@ -326,9 +345,95 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
return syserror.EACCES
}
+// getLine reads one line from proc file, with specified prefix.
+// The last argument, withHeader, specifies if it contains line header.
+func getLine(f *os.File, prefix string, withHeader bool) string {
+ data := make([]byte, 4096)
+
+ if _, err := f.Seek(0, 0); err != nil {
+ return ""
+ }
+
+ if _, err := io.ReadFull(f, data); err != io.ErrUnexpectedEOF {
+ return ""
+ }
+
+ prefix = prefix + ":"
+ lines := strings.Split(string(data), "\n")
+ for _, l := range lines {
+ l = strings.TrimSpace(l)
+ if strings.HasPrefix(l, prefix) {
+ if withHeader {
+ withHeader = false
+ continue
+ }
+ return l
+ }
+ }
+ return ""
+}
+
+func toSlice(i interface{}) []uint64 {
+ v := reflect.Indirect(reflect.ValueOf(i))
+ return v.Slice(0, v.Len()).Interface().([]uint64)
+}
+
// Statistics implements inet.Stack.Statistics.
func (s *Stack) Statistics(stat interface{}, arg string) error {
- return syserror.EOPNOTSUPP
+ var (
+ snmpTCP bool
+ rawLine string
+ sliceStat []uint64
+ )
+
+ switch stat.(type) {
+ case *inet.StatDev:
+ if s.netDevFile == nil {
+ return fmt.Errorf("/proc/net/dev is not opened for hostinet")
+ }
+ rawLine = getLine(s.netDevFile, arg, false /* with no header */)
+ case *inet.StatSNMPIP, *inet.StatSNMPICMP, *inet.StatSNMPICMPMSG, *inet.StatSNMPTCP, *inet.StatSNMPUDP, *inet.StatSNMPUDPLite:
+ if s.netSNMPFile == nil {
+ return fmt.Errorf("/proc/net/snmp is not opened for hostinet")
+ }
+ rawLine = getLine(s.netSNMPFile, arg, true)
+ default:
+ return syserr.ErrEndpointOperation.ToError()
+ }
+
+ if rawLine == "" {
+ return fmt.Errorf("Failed to get raw line")
+ }
+
+ parts := strings.SplitN(rawLine, ":", 2)
+ if len(parts) != 2 {
+ return fmt.Errorf("Failed to get prefix from: %q", rawLine)
+ }
+
+ sliceStat = toSlice(stat)
+ fields := strings.Fields(strings.TrimSpace(parts[1]))
+ if len(fields) != len(sliceStat) {
+ return fmt.Errorf("Failed to parse fields: %q", rawLine)
+ }
+ if _, ok := stat.(*inet.StatSNMPTCP); ok {
+ snmpTCP = true
+ }
+ for i := 0; i < len(sliceStat); i++ {
+ var err error
+ if snmpTCP && i == 3 {
+ var tmp int64
+ // MaxConn field is signed, RFC 2012.
+ tmp, err = strconv.ParseInt(fields[i], 10, 64)
+ sliceStat[i] = uint64(tmp) // Convert back to int before use.
+ } else {
+ sliceStat[i], err = strconv.ParseUint(fields[i], 10, 64)
+ }
+ if err != nil {
+ return fmt.Errorf("Failed to parse field %d from: %q, %v", i, rawLine, err)
+ }
+ }
+
+ return nil
}
// RouteTable implements inet.Stack.RouteTable.
@@ -338,3 +443,12 @@ func (s *Stack) RouteTable() []inet.Route {
// Resume implements inet.Stack.Resume.
func (s *Stack) Resume() {}
+
+// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints.
+func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil }
+
+// CleanupEndpoints implements inet.Stack.CleanupEndpoints.
+func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil }
+
+// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
+func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index debf9bce0..27c6692c4 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -149,6 +149,8 @@ var Metrics = tcpip.Stats{
TCP: tcpip.TCPStats{
ActiveConnectionOpenings: mustCreateMetric("/netstack/tcp/active_connection_openings", "Number of connections opened successfully via Connect."),
PassiveConnectionOpenings: mustCreateMetric("/netstack/tcp/passive_connection_openings", "Number of connections opened successfully via Listen."),
+ CurrentEstablished: mustCreateMetric("/netstack/tcp/current_established", "Number of connections in either ESTABLISHED or CLOSE-WAIT state now."),
+ EstablishedResets: mustCreateMetric("/netstack/tcp/established_resets", "Number of times TCP connections have made a direct transition to the CLOSED state from either the ESTABLISHED state or the CLOSE-WAIT state"),
ListenOverflowSynDrop: mustCreateMetric("/netstack/tcp/listen_overflow_syn_drop", "Number of times the listen queue overflowed and a SYN was dropped."),
ListenOverflowAckDrop: mustCreateMetric("/netstack/tcp/listen_overflow_ack_drop", "Number of times the listen queue overflowed and the final ACK in the handshake was dropped."),
ListenOverflowSynCookieSent: mustCreateMetric("/netstack/tcp/listen_overflow_syn_cookie_sent", "Number of times a SYN cookie was sent."),
@@ -279,7 +281,7 @@ type SocketOperations struct {
// New creates a new endpoint socket.
func New(t *kernel.Task, family int, skType linux.SockType, protocol int, queue *waiter.Queue, endpoint tcpip.Endpoint) (*fs.File, *syserr.Error) {
if skType == linux.SOCK_STREAM {
- if err := endpoint.SetSockOpt(tcpip.DelayOption(1)); err != nil {
+ if err := endpoint.SetSockOptInt(tcpip.DelayOption, 1); err != nil {
return nil, syserr.TranslateNetstackError(err)
}
}
@@ -1053,8 +1055,8 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return nil, syserr.ErrInvalidArgument
}
- var v tcpip.DelayOption
- if err := ep.GetSockOpt(&v); err != nil {
+ v, err := ep.GetSockOptInt(tcpip.DelayOption)
+ if err != nil {
return nil, syserr.TranslateNetstackError(err)
}
@@ -1495,11 +1497,11 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
}
v := usermem.ByteOrder.Uint32(optVal)
- var o tcpip.DelayOption
+ var o int
if v == 0 {
o = 1
}
- return syserr.TranslateNetstackError(ep.SetSockOpt(o))
+ return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.DelayOption, o))
case linux.TCP_CORK:
if len(optVal) < sizeOfInt32 {
diff --git a/pkg/sentry/socket/netstack/stack.go b/pkg/sentry/socket/netstack/stack.go
index fda0156e5..a0db2d4fd 100644
--- a/pkg/sentry/socket/netstack/stack.go
+++ b/pkg/sentry/socket/netstack/stack.go
@@ -144,7 +144,98 @@ func (s *Stack) SetTCPSACKEnabled(enabled bool) error {
// Statistics implements inet.Stack.Statistics.
func (s *Stack) Statistics(stat interface{}, arg string) error {
- return syserr.ErrEndpointOperation.ToError()
+ switch stats := stat.(type) {
+ case *inet.StatSNMPIP:
+ ip := Metrics.IP
+ *stats = inet.StatSNMPIP{
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/Forwarding.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/DefaultTTL.
+ ip.PacketsReceived.Value(), // InReceives.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/InHdrErrors.
+ ip.InvalidAddressesReceived.Value(), // InAddrErrors.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/ForwDatagrams.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/InUnknownProtos.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/InDiscards.
+ ip.PacketsDelivered.Value(), // InDelivers.
+ ip.PacketsSent.Value(), // OutRequests.
+ ip.OutgoingPacketErrors.Value(), // OutDiscards.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/OutNoRoutes.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmTimeout.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmReqds.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmOKs.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/ReasmFails.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/FragOKs.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/FragFails.
+ 0, // TODO(gvisor.dev/issue/969): Support Ip/FragCreates.
+ }
+ case *inet.StatSNMPICMP:
+ in := Metrics.ICMP.V4PacketsReceived.ICMPv4PacketStats
+ out := Metrics.ICMP.V4PacketsSent.ICMPv4PacketStats
+ *stats = inet.StatSNMPICMP{
+ 0, // TODO(gvisor.dev/issue/969): Support Icmp/InMsgs.
+ Metrics.ICMP.V4PacketsSent.Dropped.Value(), // InErrors.
+ 0, // TODO(gvisor.dev/issue/969): Support Icmp/InCsumErrors.
+ in.DstUnreachable.Value(), // InDestUnreachs.
+ in.TimeExceeded.Value(), // InTimeExcds.
+ in.ParamProblem.Value(), // InParmProbs.
+ in.SrcQuench.Value(), // InSrcQuenchs.
+ in.Redirect.Value(), // InRedirects.
+ in.Echo.Value(), // InEchos.
+ in.EchoReply.Value(), // InEchoReps.
+ in.Timestamp.Value(), // InTimestamps.
+ in.TimestampReply.Value(), // InTimestampReps.
+ in.InfoRequest.Value(), // InAddrMasks.
+ in.InfoReply.Value(), // InAddrMaskReps.
+ 0, // TODO(gvisor.dev/issue/969): Support Icmp/OutMsgs.
+ Metrics.ICMP.V4PacketsReceived.Invalid.Value(), // OutErrors.
+ out.DstUnreachable.Value(), // OutDestUnreachs.
+ out.TimeExceeded.Value(), // OutTimeExcds.
+ out.ParamProblem.Value(), // OutParmProbs.
+ out.SrcQuench.Value(), // OutSrcQuenchs.
+ out.Redirect.Value(), // OutRedirects.
+ out.Echo.Value(), // OutEchos.
+ out.EchoReply.Value(), // OutEchoReps.
+ out.Timestamp.Value(), // OutTimestamps.
+ out.TimestampReply.Value(), // OutTimestampReps.
+ out.InfoRequest.Value(), // OutAddrMasks.
+ out.InfoReply.Value(), // OutAddrMaskReps.
+ }
+ case *inet.StatSNMPTCP:
+ tcp := Metrics.TCP
+ // RFC 2012 (updates 1213): SNMPv2-MIB-TCP.
+ *stats = inet.StatSNMPTCP{
+ 1, // RtoAlgorithm.
+ 200, // RtoMin.
+ 120000, // RtoMax.
+ (1<<64 - 1), // MaxConn.
+ tcp.ActiveConnectionOpenings.Value(), // ActiveOpens.
+ tcp.PassiveConnectionOpenings.Value(), // PassiveOpens.
+ tcp.FailedConnectionAttempts.Value(), // AttemptFails.
+ tcp.EstablishedResets.Value(), // EstabResets.
+ tcp.CurrentEstablished.Value(), // CurrEstab.
+ tcp.ValidSegmentsReceived.Value(), // InSegs.
+ tcp.SegmentsSent.Value(), // OutSegs.
+ tcp.Retransmits.Value(), // RetransSegs.
+ tcp.InvalidSegmentsReceived.Value(), // InErrs.
+ tcp.ResetsSent.Value(), // OutRsts.
+ tcp.ChecksumErrors.Value(), // InCsumErrors.
+ }
+ case *inet.StatSNMPUDP:
+ udp := Metrics.UDP
+ *stats = inet.StatSNMPUDP{
+ udp.PacketsReceived.Value(), // InDatagrams.
+ udp.UnknownPortErrors.Value(), // NoPorts.
+ 0, // TODO(gvisor.dev/issue/969): Support Udp/InErrors.
+ udp.PacketsSent.Value(), // OutDatagrams.
+ udp.ReceiveBufferErrors.Value(), // RcvbufErrors.
+ 0, // TODO(gvisor.dev/issue/969): Support Udp/SndbufErrors.
+ 0, // TODO(gvisor.dev/issue/969): Support Udp/InCsumErrors.
+ 0, // TODO(gvisor.dev/issue/969): Support Udp/IgnoredMulti.
+ }
+ default:
+ return syserr.ErrEndpointOperation.ToError()
+ }
+ return nil
}
// RouteTable implements inet.Stack.RouteTable.
@@ -200,3 +291,18 @@ func (s *Stack) FillDefaultIPTables() {
func (s *Stack) Resume() {
s.Stack.Resume()
}
+
+// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints.
+func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint {
+ return s.Stack.RegisteredEndpoints()
+}
+
+// CleanupEndpoints implements inet.Stack.CleanupEndpoints.
+func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint {
+ return s.Stack.CleanupEndpoints()
+}
+
+// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
+func (s *Stack) RestoreCleanupEndpoints(es []stack.TransportEndpoint) {
+ s.Stack.RestoreCleanupEndpoints(es)
+}
diff --git a/pkg/sentry/socket/rpcinet/BUILD b/pkg/sentry/socket/rpcinet/BUILD
index 3a6baa308..4668b87d1 100644
--- a/pkg/sentry/socket/rpcinet/BUILD
+++ b/pkg/sentry/socket/rpcinet/BUILD
@@ -37,6 +37,7 @@ go_library(
"//pkg/syserror",
"//pkg/tcpip",
"//pkg/tcpip/buffer",
+ "//pkg/tcpip/stack",
"//pkg/unet",
"//pkg/waiter",
],
diff --git a/pkg/sentry/socket/rpcinet/stack.go b/pkg/sentry/socket/rpcinet/stack.go
index 5dcb6b455..f7878a760 100644
--- a/pkg/sentry/socket/rpcinet/stack.go
+++ b/pkg/sentry/socket/rpcinet/stack.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/conn"
"gvisor.dev/gvisor/pkg/sentry/socket/rpcinet/notifier"
"gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/unet"
)
@@ -165,3 +166,12 @@ func (s *Stack) RouteTable() []inet.Route {
// Resume implements inet.Stack.Resume.
func (s *Stack) Resume() {}
+
+// RegisteredEndpoints implements inet.Stack.RegisteredEndpoints.
+func (s *Stack) RegisteredEndpoints() []stack.TransportEndpoint { return nil }
+
+// CleanupEndpoints implements inet.Stack.CleanupEndpoints.
+func (s *Stack) CleanupEndpoints() []stack.TransportEndpoint { return nil }
+
+// RestoreCleanupEndpoints implements inet.Stack.RestoreCleanupEndpoints.
+func (s *Stack) RestoreCleanupEndpoints([]stack.TransportEndpoint) {}
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index fb2c1777f..4c0bf96e4 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -79,6 +79,7 @@ go_library(
"//pkg/sentry/kernel/signalfd",
"//pkg/sentry/kernel/time",
"//pkg/sentry/limits",
+ "//pkg/sentry/loader",
"//pkg/sentry/memmap",
"//pkg/sentry/mm",
"//pkg/sentry/safemem",
diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go
index aedb6d774..3021440ed 100644
--- a/pkg/sentry/syscalls/linux/linux64_amd64.go
+++ b/pkg/sentry/syscalls/linux/linux64_amd64.go
@@ -362,7 +362,7 @@ var AMD64 = &kernel.SyscallTable{
319: syscalls.Supported("memfd_create", MemfdCreate),
320: syscalls.CapError("kexec_file_load", linux.CAP_SYS_BOOT, "", nil),
321: syscalls.CapError("bpf", linux.CAP_SYS_ADMIN, "", nil),
- 322: syscalls.PartiallySupported("execveat", Execveat, "No support for AT_EMPTY_PATH, AT_SYMLINK_FOLLOW.", nil),
+ 322: syscalls.Supported("execveat", Execveat),
323: syscalls.ErrorWithEvent("userfaultfd", syserror.ENOSYS, "", []string{"gvisor.dev/issue/266"}), // TODO(b/118906345)
324: syscalls.ErrorWithEvent("membarrier", syserror.ENOSYS, "", []string{"gvisor.dev/issue/267"}), // TODO(b/118904897)
325: syscalls.PartiallySupported("mlock2", Mlock2, "Stub implementation. The sandbox lacks appropriate permissions.", nil),
diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go
index 6e425f1ec..4115116ff 100644
--- a/pkg/sentry/syscalls/linux/sys_thread.go
+++ b/pkg/sentry/syscalls/linux/sys_thread.go
@@ -23,6 +23,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
+ "gvisor.dev/gvisor/pkg/sentry/loader"
"gvisor.dev/gvisor/pkg/sentry/usermem"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -105,38 +106,66 @@ func execveat(t *kernel.Task, dirFD int32, pathnameAddr, argvAddr, envvAddr user
}
}
- if flags != 0 {
- // TODO(b/128449944): Handle AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW.
- t.Kernel().EmitUnimplementedEvent(t)
- return 0, nil, syserror.ENOSYS
+ if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+ atEmptyPath := flags&linux.AT_EMPTY_PATH != 0
+ if !atEmptyPath && len(pathname) == 0 {
+ return 0, nil, syserror.ENOENT
}
+ resolveFinal := flags&linux.AT_SYMLINK_NOFOLLOW == 0
root := t.FSContext().RootDirectory()
defer root.DecRef()
var wd *fs.Dirent
+ var executable *fs.File
+ var closeOnExec bool
if dirFD == linux.AT_FDCWD || path.IsAbs(pathname) {
- // If pathname is absolute, LoadTaskImage() will ignore the wd.
+ // Even if the pathname is absolute, we may still need the wd
+ // for interpreter scripts if the path of the interpreter is
+ // relative.
wd = t.FSContext().WorkingDirectory()
} else {
// Need to extract the given FD.
- f := t.GetFile(dirFD)
+ f, fdFlags := t.FDTable().Get(dirFD)
if f == nil {
return 0, nil, syserror.EBADF
}
defer f.DecRef()
+ closeOnExec = fdFlags.CloseOnExec
- wd = f.Dirent
- wd.IncRef()
- if !fs.IsDir(wd.Inode.StableAttr) {
- return 0, nil, syserror.ENOTDIR
+ if atEmptyPath && len(pathname) == 0 {
+ executable = f
+ } else {
+ wd = f.Dirent
+ wd.IncRef()
+ if !fs.IsDir(wd.Inode.StableAttr) {
+ return 0, nil, syserror.ENOTDIR
+ }
}
}
- defer wd.DecRef()
+ if wd != nil {
+ defer wd.DecRef()
+ }
// Load the new TaskContext.
- maxTraversals := uint(linux.MaxSymlinkTraversals)
- tc, se := t.Kernel().LoadTaskImage(t, t.MountNamespace(), root, wd, &maxTraversals, pathname, nil, argv, envv, t.Arch().FeatureSet())
+ remainingTraversals := uint(linux.MaxSymlinkTraversals)
+ loadArgs := loader.LoadArgs{
+ Mounts: t.MountNamespace(),
+ Root: root,
+ WorkingDirectory: wd,
+ RemainingTraversals: &remainingTraversals,
+ ResolveFinal: resolveFinal,
+ Filename: pathname,
+ File: executable,
+ CloseOnExec: closeOnExec,
+ Argv: argv,
+ Envv: envv,
+ Features: t.Arch().FeatureSet(),
+ }
+
+ tc, se := t.Kernel().LoadTaskImage(t, loadArgs)
if se != nil {
return 0, nil, se.ToError()
}
diff --git a/pkg/sentry/vfs/mount_unsafe.go b/pkg/sentry/vfs/mount_unsafe.go
index b0511aa40..75e6c7dfa 100644
--- a/pkg/sentry/vfs/mount_unsafe.go
+++ b/pkg/sentry/vfs/mount_unsafe.go
@@ -13,7 +13,7 @@
// limitations under the License.
// +build go1.12
-// +build !go1.14
+// +build !go1.15
// Check go:linkname function signatures when updating Go version.