summaryrefslogtreecommitdiffhomepage
path: root/runsc/boot
diff options
context:
space:
mode:
Diffstat (limited to 'runsc/boot')
-rw-r--r--runsc/boot/BUILD9
-rw-r--r--runsc/boot/compat.go61
-rw-r--r--runsc/boot/compat_amd64.go85
-rw-r--r--runsc/boot/compat_arm64.go91
-rw-r--r--runsc/boot/compat_test.go45
-rw-r--r--runsc/boot/config.go53
-rw-r--r--runsc/boot/controller.go30
-rw-r--r--runsc/boot/filter/BUILD2
-rw-r--r--runsc/boot/filter/config.go91
-rw-r--r--runsc/boot/filter/config_amd64.go31
-rw-r--r--runsc/boot/filter/config_arm64.go21
-rw-r--r--runsc/boot/fs.go152
-rw-r--r--runsc/boot/fs_test.go131
-rw-r--r--runsc/boot/loader.go119
-rw-r--r--runsc/boot/loader_amd64.go27
-rw-r--r--runsc/boot/loader_arm64.go27
-rw-r--r--runsc/boot/network.go47
-rw-r--r--runsc/boot/user.go28
-rw-r--r--runsc/boot/user_test.go3
19 files changed, 789 insertions, 264 deletions
diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD
index 588bb8851..6226b63f8 100644
--- a/runsc/boot/BUILD
+++ b/runsc/boot/BUILD
@@ -7,6 +7,7 @@ go_library(
srcs = [
"compat.go",
"compat_amd64.go",
+ "compat_arm64.go",
"config.go",
"controller.go",
"debug.go",
@@ -15,6 +16,8 @@ go_library(
"fs.go",
"limits.go",
"loader.go",
+ "loader_amd64.go",
+ "loader_arm64.go",
"network.go",
"pprof.go",
"strace.go",
@@ -57,10 +60,11 @@ go_library(
"//pkg/sentry/pgalloc",
"//pkg/sentry/platform",
"//pkg/sentry/sighandling",
- "//pkg/sentry/socket/epsocket",
"//pkg/sentry/socket/hostinet",
"//pkg/sentry/socket/netlink",
"//pkg/sentry/socket/netlink/route",
+ "//pkg/sentry/socket/netlink/uevent",
+ "//pkg/sentry/socket/netstack",
"//pkg/sentry/socket/unix",
"//pkg/sentry/state",
"//pkg/sentry/strace",
@@ -80,6 +84,7 @@ go_library(
"//pkg/tcpip/network/ipv6",
"//pkg/tcpip/stack",
"//pkg/tcpip/transport/icmp",
+ "//pkg/tcpip/transport/raw",
"//pkg/tcpip/transport/tcp",
"//pkg/tcpip/transport/udp",
"//pkg/urpc",
@@ -106,9 +111,9 @@ go_test(
"//pkg/control/server",
"//pkg/log",
"//pkg/p9",
- "//pkg/sentry/arch:registers_go_proto",
"//pkg/sentry/context/contexttest",
"//pkg/sentry/fs",
+ "//pkg/sentry/kernel/auth",
"//pkg/unet",
"//runsc/fsgofer",
"@com_github_opencontainers_runtime-spec//specs-go:go_default_library",
diff --git a/runsc/boot/compat.go b/runsc/boot/compat.go
index 07e35ab10..352e710d2 100644
--- a/runsc/boot/compat.go
+++ b/runsc/boot/compat.go
@@ -21,10 +21,8 @@ import (
"syscall"
"github.com/golang/protobuf/proto"
- "gvisor.dev/gvisor/pkg/abi"
"gvisor.dev/gvisor/pkg/eventchannel"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/arch"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
ucspb "gvisor.dev/gvisor/pkg/sentry/kernel/uncaught_signal_go_proto"
"gvisor.dev/gvisor/pkg/sentry/strace"
@@ -53,9 +51,9 @@ type compatEmitter struct {
}
func newCompatEmitter(logFD int) (*compatEmitter, error) {
- nameMap, ok := strace.Lookup(abi.Linux, arch.AMD64)
+ nameMap, ok := getSyscallNameMap()
if !ok {
- return nil, fmt.Errorf("amd64 Linux syscall table not found")
+ return nil, fmt.Errorf("Linux syscall table not found")
}
c := &compatEmitter{
@@ -86,16 +84,16 @@ func (c *compatEmitter) Emit(msg proto.Message) (bool, error) {
}
func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
- regs := us.Registers.GetArch().(*rpb.Registers_Amd64).Amd64
+ regs := us.Registers
c.mu.Lock()
defer c.mu.Unlock()
- sysnr := regs.OrigRax
+ sysnr := syscallNum(regs)
tr := c.trackers[sysnr]
if tr == nil {
switch sysnr {
- case syscall.SYS_PRCTL, syscall.SYS_ARCH_PRCTL:
+ case syscall.SYS_PRCTL:
// args: cmd, ...
tr = newArgsTracker(0)
@@ -112,10 +110,14 @@ func (c *compatEmitter) emitUnimplementedSyscall(us *spb.UnimplementedSyscall) {
tr = newArgsTracker(2)
default:
- tr = &onceTracker{}
+ tr = newArchArgsTracker(sysnr)
+ if tr == nil {
+ tr = &onceTracker{}
+ }
}
c.trackers[sysnr] = tr
}
+
if tr.shouldReport(regs) {
c.sink.Infof("Unsupported syscall: %s, regs: %+v", c.nameMap.Name(uintptr(sysnr)), regs)
tr.onReported(regs)
@@ -139,10 +141,10 @@ func (c *compatEmitter) Close() error {
// the syscall and arguments.
type syscallTracker interface {
// shouldReport returns true is the syscall should be reported.
- shouldReport(regs *rpb.AMD64Registers) bool
+ shouldReport(regs *rpb.Registers) bool
// onReported marks the syscall as reported.
- onReported(regs *rpb.AMD64Registers)
+ onReported(regs *rpb.Registers)
}
// onceTracker reports only a single time, used for most syscalls.
@@ -150,10 +152,45 @@ type onceTracker struct {
reported bool
}
-func (o *onceTracker) shouldReport(_ *rpb.AMD64Registers) bool {
+func (o *onceTracker) shouldReport(_ *rpb.Registers) bool {
return !o.reported
}
-func (o *onceTracker) onReported(_ *rpb.AMD64Registers) {
+func (o *onceTracker) onReported(_ *rpb.Registers) {
o.reported = true
}
+
+// argsTracker reports only once for each different combination of arguments.
+// It's used for generic syscalls like ioctl to report once per 'cmd'.
+type argsTracker struct {
+ // argsIdx is the syscall arguments to use as unique ID.
+ argsIdx []int
+ reported map[string]struct{}
+ count int
+}
+
+func newArgsTracker(argIdx ...int) *argsTracker {
+ return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})}
+}
+
+// key returns the command based on the syscall argument index.
+func (a *argsTracker) key(regs *rpb.Registers) string {
+ var rv string
+ for _, idx := range a.argsIdx {
+ rv += fmt.Sprintf("%d|", argVal(idx, regs))
+ }
+ return rv
+}
+
+func (a *argsTracker) shouldReport(regs *rpb.Registers) bool {
+ if a.count >= reportLimit {
+ return false
+ }
+ _, ok := a.reported[a.key(regs)]
+ return !ok
+}
+
+func (a *argsTracker) onReported(regs *rpb.Registers) {
+ a.count++
+ a.reported[a.key(regs)] = struct{}{}
+}
diff --git a/runsc/boot/compat_amd64.go b/runsc/boot/compat_amd64.go
index 43cd0db94..42b0ca8b0 100644
--- a/runsc/boot/compat_amd64.go
+++ b/runsc/boot/compat_amd64.go
@@ -16,62 +16,81 @@ package boot
import (
"fmt"
+ "syscall"
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
)
// reportLimit is the max number of events that should be reported per tracker.
const reportLimit = 100
-// argsTracker reports only once for each different combination of arguments.
-// It's used for generic syscalls like ioctl to report once per 'cmd'.
-type argsTracker struct {
- // argsIdx is the syscall arguments to use as unique ID.
- argsIdx []int
- reported map[string]struct{}
- count int
+// newRegs create a empty Registers instance.
+func newRegs() *rpb.Registers {
+ return &rpb.Registers{
+ Arch: &rpb.Registers_Amd64{
+ Amd64: &rpb.AMD64Registers{},
+ },
+ }
}
-func newArgsTracker(argIdx ...int) *argsTracker {
- return &argsTracker{argsIdx: argIdx, reported: make(map[string]struct{})}
-}
+func argVal(argIdx int, regs *rpb.Registers) uint32 {
+ amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
-// cmd returns the command based on the syscall argument index.
-func (a *argsTracker) key(regs *rpb.AMD64Registers) string {
- var rv string
- for _, idx := range a.argsIdx {
- rv += fmt.Sprintf("%d|", argVal(idx, regs))
+ switch argIdx {
+ case 0:
+ return uint32(amd64Regs.Rdi)
+ case 1:
+ return uint32(amd64Regs.Rsi)
+ case 2:
+ return uint32(amd64Regs.Rdx)
+ case 3:
+ return uint32(amd64Regs.R10)
+ case 4:
+ return uint32(amd64Regs.R8)
+ case 5:
+ return uint32(amd64Regs.R9)
}
- return rv
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
-func argVal(argIdx int, regs *rpb.AMD64Registers) uint32 {
+func setArgVal(argIdx int, argVal uint64, regs *rpb.Registers) {
+ amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
+
switch argIdx {
case 0:
- return uint32(regs.Rdi)
+ amd64Regs.Rdi = argVal
case 1:
- return uint32(regs.Rsi)
+ amd64Regs.Rsi = argVal
case 2:
- return uint32(regs.Rdx)
+ amd64Regs.Rdx = argVal
case 3:
- return uint32(regs.R10)
+ amd64Regs.R10 = argVal
case 4:
- return uint32(regs.R8)
+ amd64Regs.R8 = argVal
case 5:
- return uint32(regs.R9)
+ amd64Regs.R9 = argVal
+ default:
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
- panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
}
-func (a *argsTracker) shouldReport(regs *rpb.AMD64Registers) bool {
- if a.count >= reportLimit {
- return false
- }
- _, ok := a.reported[a.key(regs)]
- return !ok
+func getSyscallNameMap() (strace.SyscallMap, bool) {
+ return strace.Lookup(abi.Linux, arch.AMD64)
+}
+
+func syscallNum(regs *rpb.Registers) uint64 {
+ amd64Regs := regs.GetArch().(*rpb.Registers_Amd64).Amd64
+ return amd64Regs.OrigRax
}
-func (a *argsTracker) onReported(regs *rpb.AMD64Registers) {
- a.count++
- a.reported[a.key(regs)] = struct{}{}
+func newArchArgsTracker(sysnr uint64) syscallTracker {
+ switch sysnr {
+ case syscall.SYS_ARCH_PRCTL:
+ // args: cmd, ...
+ return newArgsTracker(0)
+ }
+ return nil
}
diff --git a/runsc/boot/compat_arm64.go b/runsc/boot/compat_arm64.go
new file mode 100644
index 000000000..f784cd237
--- /dev/null
+++ b/runsc/boot/compat_arm64.go
@@ -0,0 +1,91 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package boot
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
+ "gvisor.dev/gvisor/pkg/sentry/strace"
+)
+
+// reportLimit is the max number of events that should be reported per tracker.
+const reportLimit = 100
+
+// newRegs create a empty Registers instance.
+func newRegs() *rpb.Registers {
+ return &rpb.Registers{
+ Arch: &rpb.Registers_Arm64{
+ Arm64: &rpb.ARM64Registers{},
+ },
+ }
+}
+
+func argVal(argIdx int, regs *rpb.Registers) uint32 {
+ arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
+
+ switch argIdx {
+ case 0:
+ return uint32(arm64Regs.R0)
+ case 1:
+ return uint32(arm64Regs.R1)
+ case 2:
+ return uint32(arm64Regs.R2)
+ case 3:
+ return uint32(arm64Regs.R3)
+ case 4:
+ return uint32(arm64Regs.R4)
+ case 5:
+ return uint32(arm64Regs.R5)
+ }
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
+}
+
+func setArgVal(argIdx int, argVal uint64, regs *rpb.Registers) {
+ arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
+
+ switch argIdx {
+ case 0:
+ arm64Regs.R0 = argVal
+ case 1:
+ arm64Regs.R1 = argVal
+ case 2:
+ arm64Regs.R2 = argVal
+ case 3:
+ arm64Regs.R3 = argVal
+ case 4:
+ arm64Regs.R4 = argVal
+ case 5:
+ arm64Regs.R5 = argVal
+ default:
+ panic(fmt.Sprintf("invalid syscall argument index %d", argIdx))
+ }
+}
+
+func getSyscallNameMap() (strace.SyscallMap, bool) {
+ return strace.Lookup(abi.Linux, arch.ARM64)
+}
+
+func syscallNum(regs *rpb.Registers) uint64 {
+ arm64Regs := regs.GetArch().(*rpb.Registers_Arm64).Arm64
+ return arm64Regs.R8
+}
+
+func newArchArgsTracker(sysnr uint64) syscallTracker {
+ // currently, no arch specific syscalls need to be handled here.
+ return nil
+}
diff --git a/runsc/boot/compat_test.go b/runsc/boot/compat_test.go
index 388298d8d..839c5303b 100644
--- a/runsc/boot/compat_test.go
+++ b/runsc/boot/compat_test.go
@@ -16,8 +16,6 @@ package boot
import (
"testing"
-
- rpb "gvisor.dev/gvisor/pkg/sentry/arch/registers_go_proto"
)
func TestOnceTracker(t *testing.T) {
@@ -35,31 +33,34 @@ func TestOnceTracker(t *testing.T) {
func TestArgsTracker(t *testing.T) {
for _, tc := range []struct {
- name string
- idx []int
- rdi1 uint64
- rdi2 uint64
- rsi1 uint64
- rsi2 uint64
- want bool
+ name string
+ idx []int
+ arg1_1 uint64
+ arg1_2 uint64
+ arg2_1 uint64
+ arg2_2 uint64
+ want bool
}{
- {name: "same rdi", idx: []int{0}, rdi1: 123, rdi2: 123, want: false},
- {name: "same rsi", idx: []int{1}, rsi1: 123, rsi2: 123, want: false},
- {name: "diff rdi", idx: []int{0}, rdi1: 123, rdi2: 321, want: true},
- {name: "diff rsi", idx: []int{1}, rsi1: 123, rsi2: 321, want: true},
- {name: "cmd is uint32", idx: []int{0}, rsi1: 0xdead00000123, rsi2: 0xbeef00000123, want: false},
- {name: "same 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 123, rdi2: 321, want: false},
- {name: "diff 2 args", idx: []int{0, 1}, rsi1: 123, rdi1: 321, rsi2: 789, rdi2: 987, want: true},
+ {name: "same arg1", idx: []int{0}, arg1_1: 123, arg1_2: 123, want: false},
+ {name: "same arg2", idx: []int{1}, arg2_1: 123, arg2_2: 123, want: false},
+ {name: "diff arg1", idx: []int{0}, arg1_1: 123, arg1_2: 321, want: true},
+ {name: "diff arg2", idx: []int{1}, arg2_1: 123, arg2_2: 321, want: true},
+ {name: "cmd is uint32", idx: []int{0}, arg2_1: 0xdead00000123, arg2_2: 0xbeef00000123, want: false},
+ {name: "same 2 args", idx: []int{0, 1}, arg2_1: 123, arg1_1: 321, arg2_2: 123, arg1_2: 321, want: false},
+ {name: "diff 2 args", idx: []int{0, 1}, arg2_1: 123, arg1_1: 321, arg2_2: 789, arg1_2: 987, want: true},
} {
t.Run(tc.name, func(t *testing.T) {
c := newArgsTracker(tc.idx...)
- regs := &rpb.AMD64Registers{Rdi: tc.rdi1, Rsi: tc.rsi1}
+ regs := newRegs()
+ setArgVal(0, tc.arg1_1, regs)
+ setArgVal(1, tc.arg2_1, regs)
if !c.shouldReport(regs) {
t.Error("first call to shouldReport, got: false, want: true")
}
c.onReported(regs)
- regs.Rdi, regs.Rsi = tc.rdi2, tc.rsi2
+ setArgVal(0, tc.arg1_2, regs)
+ setArgVal(1, tc.arg2_2, regs)
if got := c.shouldReport(regs); tc.want != got {
t.Errorf("second call to shouldReport, got: %t, want: %t", got, tc.want)
}
@@ -70,7 +71,9 @@ func TestArgsTracker(t *testing.T) {
func TestArgsTrackerLimit(t *testing.T) {
c := newArgsTracker(0, 1)
for i := 0; i < reportLimit; i++ {
- regs := &rpb.AMD64Registers{Rdi: 123, Rsi: uint64(i)}
+ regs := newRegs()
+ setArgVal(0, 123, regs)
+ setArgVal(1, uint64(i), regs)
if !c.shouldReport(regs) {
t.Error("shouldReport before limit was reached, got: false, want: true")
}
@@ -78,7 +81,9 @@ func TestArgsTrackerLimit(t *testing.T) {
}
// Should hit the count limit now.
- regs := &rpb.AMD64Registers{Rdi: 123, Rsi: 123456}
+ regs := newRegs()
+ setArgVal(0, 123, regs)
+ setArgVal(1, 123456, regs)
if c.shouldReport(regs) {
t.Error("shouldReport after limit was reached, got: true, want: false")
}
diff --git a/runsc/boot/config.go b/runsc/boot/config.go
index 05b8f8761..a878bc2ce 100644
--- a/runsc/boot/config.go
+++ b/runsc/boot/config.go
@@ -167,6 +167,9 @@ type Config struct {
// Overlay is whether to wrap the root filesystem in an overlay.
Overlay bool
+ // FSGoferHostUDS enables the gofer to mount a host UDS.
+ FSGoferHostUDS bool
+
// Network indicates what type of network to use.
Network NetworkType
@@ -175,8 +178,11 @@ type Config struct {
// capabilities.
EnableRaw bool
- // GSO indicates that generic segmentation offload is enabled.
- GSO bool
+ // HardwareGSO indicates that hardware segmentation offload is enabled.
+ HardwareGSO bool
+
+ // SoftwareGSO indicates that software segmentation offload is enabled.
+ SoftwareGSO bool
// LogPackets indicates that all network packets should be logged.
LogPackets bool
@@ -211,12 +217,6 @@ type Config struct {
// RestoreFile is the path to the saved container image
RestoreFile string
- // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
- // tests. It allows runsc to start the sandbox process as the current
- // user, and without chrooting the sandbox process. This can be
- // necessary in test environments that have limited capabilities.
- TestOnlyAllowRunAsCurrentUserWithoutChroot bool
-
// NumNetworkChannels controls the number of AF_PACKET sockets that map
// to the same underlying network device. This allows netstack to better
// scale for high throughput use cases.
@@ -233,6 +233,29 @@ type Config struct {
// ReferenceLeakMode sets reference leak check mode
ReferenceLeakMode refs.LeakMode
+
+ // OverlayfsStaleRead causes cached FDs to reopen after a file is opened for
+ // write to workaround overlayfs limitation on kernels before 4.19.
+ OverlayfsStaleRead bool
+
+ // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
+ // tests. It allows runsc to start the sandbox process as the current
+ // user, and without chrooting the sandbox process. This can be
+ // necessary in test environments that have limited capabilities.
+ TestOnlyAllowRunAsCurrentUserWithoutChroot bool
+
+ // TestOnlyTestNameEnv should only be used in tests. It looks up for the
+ // test name in the container environment variables and adds it to the debug
+ // log file name. This is done to help identify the log with the test when
+ // multiple tests are run in parallel, since there is no way to pass
+ // parameters to the runtime from docker.
+ TestOnlyTestNameEnv string
+
+ // CPUNumFromQuota sets CPU number count to available CPU quota, using
+ // least integer value greater than or equal to quota.
+ //
+ // E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
+ CPUNumFromQuota bool
}
// ToFlags returns a slice of flags that correspond to the given Config.
@@ -246,6 +269,7 @@ func (c *Config) ToFlags() []string {
"--debug-log-format=" + c.DebugLogFormat,
"--file-access=" + c.FileAccess.String(),
"--overlay=" + strconv.FormatBool(c.Overlay),
+ "--fsgofer-host-uds=" + strconv.FormatBool(c.FSGoferHostUDS),
"--network=" + c.Network.String(),
"--log-packets=" + strconv.FormatBool(c.LogPackets),
"--platform=" + c.Platform,
@@ -260,10 +284,19 @@ func (c *Config) ToFlags() []string {
"--rootless=" + strconv.FormatBool(c.Rootless),
"--alsologtostderr=" + strconv.FormatBool(c.AlsoLogToStderr),
"--ref-leak-mode=" + refsLeakModeToString(c.ReferenceLeakMode),
+ "--gso=" + strconv.FormatBool(c.HardwareGSO),
+ "--software-gso=" + strconv.FormatBool(c.SoftwareGSO),
+ "--overlayfs-stale-read=" + strconv.FormatBool(c.OverlayfsStaleRead),
}
+ if c.CPUNumFromQuota {
+ f = append(f, "--cpu-num-from-quota")
+ }
+ // Only include these if set since it is never to be used by users.
if c.TestOnlyAllowRunAsCurrentUserWithoutChroot {
- // Only include if set since it is never to be used by users.
- f = append(f, "-TESTONLY-unsafe-nonroot=true")
+ f = append(f, "--TESTONLY-unsafe-nonroot=true")
+ }
+ if len(c.TestOnlyTestNameEnv) != 0 {
+ f = append(f, "--TESTONLY-test-name-env="+c.TestOnlyTestNameEnv)
}
return f
}
diff --git a/runsc/boot/controller.go b/runsc/boot/controller.go
index 72cbabd16..9c9e94864 100644
--- a/runsc/boot/controller.go
+++ b/runsc/boot/controller.go
@@ -18,7 +18,6 @@ import (
"errors"
"fmt"
"os"
- "path"
"syscall"
specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -27,12 +26,13 @@ import (
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/fs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netstack"
"gvisor.dev/gvisor/pkg/sentry/state"
"gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sentry/watchdog"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/urpc"
+ "gvisor.dev/gvisor/runsc/specutils"
)
const (
@@ -51,7 +51,7 @@ const (
ContainerEvent = "containerManager.Event"
// ContainerExecuteAsync is the URPC endpoint for executing a command in a
- // container..
+ // container.
ContainerExecuteAsync = "containerManager.ExecuteAsync"
// ContainerPause pauses the container.
@@ -142,7 +142,7 @@ func newController(fd int, l *Loader) (*controller, error) {
}
srv.Register(manager)
- if eps, ok := l.k.NetworkStack().(*epsocket.Stack); ok {
+ if eps, ok := l.k.NetworkStack().(*netstack.Stack); ok {
net := &Network{
Stack: eps.Stack,
}
@@ -152,7 +152,9 @@ func newController(fd int, l *Loader) (*controller, error) {
srv.Register(&debug{})
srv.Register(&control.Logging{})
if l.conf.ProfileEnable {
- srv.Register(&control.Profile{})
+ srv.Register(&control.Profile{
+ Kernel: l.k,
+ })
}
return &controller{
@@ -161,7 +163,7 @@ func newController(fd int, l *Loader) (*controller, error) {
}, nil
}
-// containerManager manages sandboes containers.
+// containerManager manages sandbox containers.
type containerManager struct {
// startChan is used to signal when the root container process should
// be started.
@@ -234,17 +236,13 @@ func (cm *containerManager) Start(args *StartArgs, _ *struct{}) error {
if args.CID == "" {
return errors.New("start argument missing container ID")
}
- // Prevent CIDs containing ".." from confusing the sentry when creating
- // /containers/<cid> directory.
- // TODO(b/129293409): Once we have multiple independent roots, this
- // check won't be necessary.
- if path.Clean(args.CID) != args.CID {
- return fmt.Errorf("container ID shouldn't contain directory traversals such as \"..\": %q", args.CID)
- }
if len(args.FilePayload.Files) < 4 {
return fmt.Errorf("start arguments must contain stdin, stderr, and stdout followed by at least one file for the container root gofer")
}
+ // All validation passed, logs the spec for debugging.
+ specutils.LogSpec(args.Spec)
+
err := cm.l.startContainer(args.Spec, args.Conf, args.CID, args.FilePayload.Files)
if err != nil {
log.Debugf("containerManager.Start failed %q: %+v: %v", args.CID, args, err)
@@ -355,7 +353,7 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
fs.SetRestoreEnvironment(*renv)
// Prepare to load from the state file.
- if eps, ok := networkStack.(*epsocket.Stack); ok {
+ if eps, ok := networkStack.(*netstack.Stack); ok {
stack.StackFromEnv = eps.Stack // FIXME(b/36201077)
}
info, err := specFile.Stat()
@@ -384,7 +382,9 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
}
// Since we have a new kernel we also must make a new watchdog.
- dog := watchdog.New(k, watchdog.DefaultTimeout, cm.l.conf.WatchdogAction)
+ dogOpts := watchdog.DefaultOpts
+ dogOpts.TaskTimeoutAction = cm.l.conf.WatchdogAction
+ dog := watchdog.New(k, dogOpts)
// Change the loader fields to reflect the changes made when restoring.
cm.l.k = k
diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD
index f5509b6b7..3a9dcfc04 100644
--- a/runsc/boot/filter/BUILD
+++ b/runsc/boot/filter/BUILD
@@ -6,6 +6,8 @@ go_library(
name = "filter",
srcs = [
"config.go",
+ "config_amd64.go",
+ "config_arm64.go",
"extra_filters.go",
"extra_filters_msan.go",
"extra_filters_race.go",
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go
index 7ca776b3a..4fb9adca6 100644
--- a/runsc/boot/filter/config.go
+++ b/runsc/boot/filter/config.go
@@ -26,10 +26,6 @@ import (
// allowedSyscalls is the set of syscalls executed by the Sentry to the host OS.
var allowedSyscalls = seccomp.SyscallRules{
- syscall.SYS_ARCH_PRCTL: []seccomp.Rule{
- {seccomp.AllowValue(linux.ARCH_GET_FS)},
- {seccomp.AllowValue(linux.ARCH_SET_FS)},
- },
syscall.SYS_CLOCK_GETTIME: {},
syscall.SYS_CLONE: []seccomp.Rule{
{
@@ -42,8 +38,15 @@ var allowedSyscalls = seccomp.SyscallRules{
syscall.CLONE_THREAD),
},
},
- syscall.SYS_CLOSE: {},
- syscall.SYS_DUP: {},
+ syscall.SYS_CLOSE: {},
+ syscall.SYS_DUP: {},
+ syscall.SYS_DUP3: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(0),
+ },
+ },
syscall.SYS_EPOLL_CREATE1: {},
syscall.SYS_EPOLL_CTL: {},
syscall.SYS_EPOLL_PWAIT: []seccomp.Rule{
@@ -88,14 +91,24 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowValue(linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG),
seccomp.AllowAny{},
seccomp.AllowAny{},
- seccomp.AllowValue(0),
},
{
seccomp.AllowAny{},
seccomp.AllowValue(linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG),
seccomp.AllowAny{},
+ },
+ // Non-private variants are included for flipcall support. They are otherwise
+ // unncessary, as the sentry will use only private futexes internally.
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(linux.FUTEX_WAIT),
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(linux.FUTEX_WAKE),
seccomp.AllowAny{},
- seccomp.AllowValue(0),
},
},
syscall.SYS_GETPID: {},
@@ -121,11 +134,6 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowValue(syscall.SOL_SOCKET),
seccomp.AllowValue(syscall.SO_SNDBUF),
},
- {
- seccomp.AllowAny{},
- seccomp.AllowValue(syscall.SOL_SOCKET),
- seccomp.AllowValue(syscall.SO_REUSEADDR),
- },
},
syscall.SYS_GETTID: {},
syscall.SYS_GETTIMEOFDAY: {},
@@ -232,6 +240,15 @@ var allowedSyscalls = seccomp.SyscallRules{
seccomp.AllowValue(0),
},
},
+ unix.SYS_SENDMMSG: []seccomp.Rule{
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.MSG_DONTWAIT),
+ seccomp.AllowValue(0),
+ },
+ },
syscall.SYS_RESTART_SYSCALL: {},
syscall.SYS_RT_SIGACTION: {},
syscall.SYS_RT_SIGPROCMASK: {},
@@ -295,6 +312,26 @@ func hostInetFilters() seccomp.SyscallRules {
syscall.SYS_GETSOCKOPT: []seccomp.Rule{
{
seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_TOS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_RECVTOS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_TCLASS),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_RECVTCLASS),
+ },
+ {
+ seccomp.AllowAny{},
seccomp.AllowValue(syscall.SOL_IPV6),
seccomp.AllowValue(syscall.IPV6_V6ONLY),
},
@@ -396,6 +433,34 @@ func hostInetFilters() seccomp.SyscallRules {
seccomp.AllowAny{},
seccomp.AllowValue(4),
},
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_TOS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IP),
+ seccomp.AllowValue(syscall.IP_RECVTOS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_TCLASS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
+ {
+ seccomp.AllowAny{},
+ seccomp.AllowValue(syscall.SOL_IPV6),
+ seccomp.AllowValue(syscall.IPV6_RECVTCLASS),
+ seccomp.AllowAny{},
+ seccomp.AllowValue(4),
+ },
},
syscall.SYS_SHUTDOWN: []seccomp.Rule{
{
diff --git a/runsc/boot/filter/config_amd64.go b/runsc/boot/filter/config_amd64.go
new file mode 100644
index 000000000..5335ff82c
--- /dev/null
+++ b/runsc/boot/filter/config_amd64.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package filter
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/seccomp"
+)
+
+func init() {
+ allowedSyscalls[syscall.SYS_ARCH_PRCTL] = append(allowedSyscalls[syscall.SYS_ARCH_PRCTL],
+ seccomp.Rule{seccomp.AllowValue(linux.ARCH_GET_FS)},
+ seccomp.Rule{seccomp.AllowValue(linux.ARCH_SET_FS)},
+ )
+}
diff --git a/runsc/boot/filter/config_arm64.go b/runsc/boot/filter/config_arm64.go
new file mode 100644
index 000000000..7fa9bbda3
--- /dev/null
+++ b/runsc/boot/filter/config_arm64.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package filter
+
+// Reserve for future customization.
+func init() {
+}
diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go
index 34c674840..421ccd255 100644
--- a/runsc/boot/fs.go
+++ b/runsc/boot/fs.go
@@ -16,7 +16,6 @@ package boot
import (
"fmt"
- "path"
"path/filepath"
"sort"
"strconv"
@@ -52,7 +51,7 @@ const (
rootDevice = "9pfs-/"
// MountPrefix is the annotation prefix for mount hints.
- MountPrefix = "gvisor.dev/spec/mount"
+ MountPrefix = "dev.gvisor.spec.mount."
// Filesystems that runsc supports.
bind = "bind"
@@ -64,6 +63,9 @@ const (
nonefs = "none"
)
+// tmpfs has some extra supported options that we must pass through.
+var tmpfsAllowedOptions = []string{"mode", "uid", "gid"}
+
func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) {
// Upper layer uses the same flags as lower, but it must be read-write.
upperFlags := lowerFlags
@@ -172,27 +174,25 @@ func p9MountOptions(fd int, fa FileAccessType) []string {
func parseAndFilterOptions(opts []string, allowedKeys ...string) ([]string, error) {
var out []string
for _, o := range opts {
- kv := strings.Split(o, "=")
- switch len(kv) {
- case 1:
- if specutils.ContainsStr(allowedKeys, o) {
- out = append(out, o)
- continue
- }
- log.Warningf("ignoring unsupported key %q", kv)
- case 2:
- if specutils.ContainsStr(allowedKeys, kv[0]) {
- out = append(out, o)
- continue
- }
- log.Warningf("ignoring unsupported key %q", kv[0])
- default:
- return nil, fmt.Errorf("invalid option %q", o)
+ ok, err := parseMountOption(o, allowedKeys...)
+ if err != nil {
+ return nil, err
+ }
+ if ok {
+ out = append(out, o)
}
}
return out, nil
}
+func parseMountOption(opt string, allowedKeys ...string) (bool, error) {
+ kv := strings.SplitN(opt, "=", 3)
+ if len(kv) > 2 {
+ return false, fmt.Errorf("invalid option %q", opt)
+ }
+ return specutils.ContainsStr(allowedKeys, kv[0]), nil
+}
+
// mountDevice returns a device string based on the fs type and target
// of the mount.
func mountDevice(m specs.Mount) string {
@@ -207,6 +207,8 @@ func mountDevice(m specs.Mount) string {
func mountFlags(opts []string) fs.MountSourceFlags {
mf := fs.MountSourceFlags{}
+ // Note: changes to supported options must be reflected in
+ // isSupportedMountFlag() as well.
for _, o := range opts {
switch o {
case "rw":
@@ -224,6 +226,18 @@ func mountFlags(opts []string) fs.MountSourceFlags {
return mf
}
+func isSupportedMountFlag(fstype, opt string) bool {
+ switch opt {
+ case "rw", "ro", "noatime", "noexec":
+ return true
+ }
+ if fstype == tmpfs {
+ ok, err := parseMountOption(opt, tmpfsAllowedOptions...)
+ return ok && err == nil
+ }
+ return false
+}
+
func mustFindFilesystem(name string) fs.Filesystem {
fs, ok := fs.FindFilesystem(name)
if !ok {
@@ -427,6 +441,46 @@ func (m *mountHint) isSupported() bool {
return m.mount.Type == tmpfs && m.share == pod
}
+// checkCompatible verifies that shared mount is compatible with master.
+// For now enforce that all options are the same. Once bind mount is properly
+// supported, then we should ensure the master is less restrictive than the
+// container, e.g. master can be 'rw' while container mounts as 'ro'.
+func (m *mountHint) checkCompatible(mount specs.Mount) error {
+ // Remove options that don't affect to mount's behavior.
+ masterOpts := filterUnsupportedOptions(m.mount)
+ slaveOpts := filterUnsupportedOptions(mount)
+
+ if len(masterOpts) != len(slaveOpts) {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts)
+ }
+
+ sort.Strings(masterOpts)
+ sort.Strings(slaveOpts)
+ for i, opt := range masterOpts {
+ if opt != slaveOpts[i] {
+ return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", masterOpts, slaveOpts)
+ }
+ }
+ return nil
+}
+
+func (m *mountHint) fileAccessType() FileAccessType {
+ if m.share == container {
+ return FileAccessExclusive
+ }
+ return FileAccessShared
+}
+
+func filterUnsupportedOptions(mount specs.Mount) []string {
+ rv := make([]string, 0, len(mount.Options))
+ for _, o := range mount.Options {
+ if isSupportedMountFlag(mount.Type, o) {
+ rv = append(rv, o)
+ }
+ }
+ return rv
+}
+
// podMountHints contains a collection of mountHints for the pod.
type podMountHints struct {
mounts map[string]*mountHint
@@ -435,14 +489,15 @@ type podMountHints struct {
func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
mnts := make(map[string]*mountHint)
for k, v := range spec.Annotations {
- // Look for 'gvisor.dev/spec/mount' annotations and parse them.
+ // Look for 'dev.gvisor.spec.mount' annotations and parse them.
if strings.HasPrefix(k, MountPrefix) {
- parts := strings.Split(k, "/")
- if len(parts) != 5 {
+ // Remove the prefix and split the rest.
+ parts := strings.Split(k[len(MountPrefix):], ".")
+ if len(parts) != 2 {
return nil, fmt.Errorf("invalid mount annotation: %s=%s", k, v)
}
- name := parts[3]
- if len(name) == 0 || path.Clean(name) != name {
+ name := parts[0]
+ if len(name) == 0 {
return nil, fmt.Errorf("invalid mount name: %s", name)
}
mnt := mnts[name]
@@ -450,7 +505,7 @@ func newPodMountHints(spec *specs.Spec) (*podMountHints, error) {
mnt = &mountHint{name: name}
mnts[name] = mnt
}
- if err := mnt.setField(parts[4], v); err != nil {
+ if err := mnt.setField(parts[1], v); err != nil {
return nil, err
}
}
@@ -520,6 +575,11 @@ func newContainerMounter(spec *specs.Spec, goferFDs []int, k *kernel.Kernel, hin
func (c *containerMounter) processHints(conf *Config) error {
ctx := c.k.SupervisorContext()
for _, hint := range c.hints.mounts {
+ // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a
+ // common gofer to mount all shared volumes.
+ if hint.mount.Type != tmpfs {
+ continue
+ }
log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type)
inode, err := c.mountSharedMaster(ctx, conf, hint)
if err != nil {
@@ -655,6 +715,14 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (*
log.Infof("Mounting root over 9P, ioFD: %d", fd)
p9FS := mustFindFilesystem("9p")
opts := p9MountOptions(fd, conf.FileAccess)
+
+ if conf.OverlayfsStaleRead {
+ // We can't check for overlayfs here because sandbox is chroot'ed and gofer
+ // can only send mount options for specs.Mounts (specs.Root is missing
+ // Options field). So assume root is always on top of overlayfs.
+ opts = append(opts, "overlayfs_stale_read")
+ }
+
rootInode, err := p9FS.Mount(ctx, rootDevice, mf, strings.Join(opts, ","), nil)
if err != nil {
return nil, fmt.Errorf("creating root mount point: %v", err)
@@ -689,7 +757,6 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
fsName string
opts []string
useOverlay bool
- err error
)
switch m.Type {
@@ -700,14 +767,16 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
case tmpfs:
fsName = m.Type
- // tmpfs has some extra supported options that we must pass through.
- opts, err = parseAndFilterOptions(m.Options, "mode", "uid", "gid")
+ var err error
+ opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...)
+ if err != nil {
+ return "", nil, false, err
+ }
case bind:
fd := c.fds.remove()
fsName = "9p"
- // Non-root bind mounts are always shared.
- opts = p9MountOptions(fd, FileAccessShared)
+ opts = p9MountOptions(fd, c.getMountAccessType(m))
// If configured, add overlay to all writable mounts.
useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly
@@ -717,7 +786,15 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) (
// for now.
log.Warningf("ignoring unknown filesystem type %q", m.Type)
}
- return fsName, opts, useOverlay, err
+ return fsName, opts, useOverlay, nil
+}
+
+func (c *containerMounter) getMountAccessType(mount specs.Mount) FileAccessType {
+ if hint := c.hints.findMount(mount); hint != nil {
+ return hint.fileAccessType()
+ }
+ // Non-root bind mounts are always shared if no hints were provided.
+ return FileAccessShared
}
// mountSubmount mounts volumes inside the container's root. Because mounts may
@@ -779,24 +856,15 @@ func (c *containerMounter) mountSubmount(ctx context.Context, conf *Config, mns
return fmt.Errorf("mount %q error: %v", m.Destination, err)
}
- log.Infof("Mounted %q to %q type %s", m.Source, m.Destination, m.Type)
+ log.Infof("Mounted %q to %q type: %s, internal-options: %q", m.Source, m.Destination, m.Type, opts)
return nil
}
// mountSharedSubmount binds mount to a previously mounted volume that is shared
// among containers in the same pod.
func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.MountNamespace, root *fs.Dirent, mount specs.Mount, source *mountHint) error {
- // For now enforce that all options are the same. Once bind mount is properly
- // supported, then we should ensure the master is less restrictive than the
- // container, e.g. master can be 'rw' while container mounts as 'ro'.
- if len(mount.Options) != len(source.mount.Options) {
- return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
- }
- sort.Strings(mount.Options)
- for i, opt := range mount.Options {
- if opt != source.mount.Options[i] {
- return fmt.Errorf("mount options in annotations differ from container mount, annotation: %s, mount: %s", source.mount.Options, mount.Options)
- }
+ if err := source.checkCompatible(mount); err != nil {
+ return err
}
maxTraversals := uint(0)
diff --git a/runsc/boot/fs_test.go b/runsc/boot/fs_test.go
index 49ab34b33..912037075 100644
--- a/runsc/boot/fs_test.go
+++ b/runsc/boot/fs_test.go
@@ -15,7 +15,6 @@
package boot
import (
- "path"
"reflect"
"strings"
"testing"
@@ -26,19 +25,19 @@ import (
func TestPodMountHintsHappy(t *testing.T) {
spec := &specs.Spec{
Annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
- path.Join(MountPrefix, "mount2", "source"): "bar",
- path.Join(MountPrefix, "mount2", "type"): "bind",
- path.Join(MountPrefix, "mount2", "share"): "container",
- path.Join(MountPrefix, "mount2", "options"): "rw,private",
+ MountPrefix + "mount2.source": "bar",
+ MountPrefix + "mount2.type": "bind",
+ MountPrefix + "mount2.share": "container",
+ MountPrefix + "mount2.options": "rw,private",
},
}
podHints, err := newPodMountHints(spec)
if err != nil {
- t.Errorf("newPodMountHints failed: %v", err)
+ t.Fatalf("newPodMountHints failed: %v", err)
}
// Check that fields were set correctly.
@@ -86,95 +85,95 @@ func TestPodMountHintsErrors(t *testing.T) {
{
name: "too short",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1"): "foo",
+ MountPrefix + "mount1": "foo",
},
error: "invalid mount annotation",
},
{
name: "no name",
annotations: map[string]string{
- MountPrefix + "//source": "foo",
+ MountPrefix + ".source": "foo",
},
error: "invalid mount name",
},
{
name: "missing source",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
},
error: "source field",
},
{
name: "missing type",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.share": "pod",
},
error: "type field",
},
{
name: "missing share",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
},
error: "share field",
},
{
name: "invalid field name",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "invalid"): "foo",
+ MountPrefix + "mount1.invalid": "foo",
},
error: "invalid mount annotation",
},
{
name: "invalid source",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
},
error: "source cannot be empty",
},
{
name: "invalid type",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "invalid-type",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "invalid-type",
+ MountPrefix + "mount1.share": "pod",
},
error: "invalid type",
},
{
name: "invalid share",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "invalid-share",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "invalid-share",
},
error: "invalid share",
},
{
name: "invalid options",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
- path.Join(MountPrefix, "mount1", "options"): "invalid-option",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
+ MountPrefix + "mount1.options": "invalid-option",
},
error: "unknown mount option",
},
{
name: "duplicate source",
annotations: map[string]string{
- path.Join(MountPrefix, "mount1", "source"): "foo",
- path.Join(MountPrefix, "mount1", "type"): "tmpfs",
- path.Join(MountPrefix, "mount1", "share"): "pod",
+ MountPrefix + "mount1.source": "foo",
+ MountPrefix + "mount1.type": "tmpfs",
+ MountPrefix + "mount1.share": "pod",
- path.Join(MountPrefix, "mount2", "source"): "foo",
- path.Join(MountPrefix, "mount2", "type"): "bind",
- path.Join(MountPrefix, "mount2", "share"): "container",
+ MountPrefix + "mount2.source": "foo",
+ MountPrefix + "mount2.type": "bind",
+ MountPrefix + "mount2.share": "container",
},
error: "have the same mount source",
},
@@ -191,3 +190,61 @@ func TestPodMountHintsErrors(t *testing.T) {
})
}
}
+
+func TestGetMountAccessType(t *testing.T) {
+ const source = "foo"
+ for _, tst := range []struct {
+ name string
+ annotations map[string]string
+ want FileAccessType
+ }{
+ {
+ name: "container=exclusive",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source,
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "container",
+ },
+ want: FileAccessExclusive,
+ },
+ {
+ name: "pod=shared",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source,
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "pod",
+ },
+ want: FileAccessShared,
+ },
+ {
+ name: "shared=shared",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source,
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "shared",
+ },
+ want: FileAccessShared,
+ },
+ {
+ name: "default=shared",
+ annotations: map[string]string{
+ MountPrefix + "mount1.source": source + "mismatch",
+ MountPrefix + "mount1.type": "bind",
+ MountPrefix + "mount1.share": "container",
+ },
+ want: FileAccessShared,
+ },
+ } {
+ t.Run(tst.name, func(t *testing.T) {
+ spec := &specs.Spec{Annotations: tst.annotations}
+ podHints, err := newPodMountHints(spec)
+ if err != nil {
+ t.Fatalf("newPodMountHints failed: %v", err)
+ }
+ mounter := containerMounter{hints: podHints}
+ if got := mounter.getMountAccessType(specs.Mount{Source: source}); got != tst.want {
+ t.Errorf("getMountAccessType(), want: %v, got: %v", tst.want, got)
+ }
+ })
+ }
+}
diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go
index 823a34619..bc1d0c1bb 100644
--- a/runsc/boot/loader.go
+++ b/runsc/boot/loader.go
@@ -20,7 +20,6 @@ import (
mrand "math/rand"
"os"
"runtime"
- "strings"
"sync"
"sync/atomic"
"syscall"
@@ -44,7 +43,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/sighandling"
- slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
"gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/watchdog"
@@ -55,6 +53,7 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
+ "gvisor.dev/gvisor/pkg/tcpip/transport/raw"
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
"gvisor.dev/gvisor/pkg/tcpip/transport/udp"
"gvisor.dev/gvisor/runsc/boot/filter"
@@ -62,10 +61,11 @@ import (
"gvisor.dev/gvisor/runsc/specutils"
// Include supported socket providers.
- "gvisor.dev/gvisor/pkg/sentry/socket/epsocket"
"gvisor.dev/gvisor/pkg/sentry/socket/hostinet"
_ "gvisor.dev/gvisor/pkg/sentry/socket/netlink"
_ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/route"
+ _ "gvisor.dev/gvisor/pkg/sentry/socket/netlink/uevent"
+ "gvisor.dev/gvisor/pkg/sentry/socket/netstack"
_ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
)
@@ -93,10 +93,6 @@ type Loader struct {
// spec is the base configuration for the root container.
spec *specs.Spec
- // startSignalForwarding enables forwarding of signals to the sandboxed
- // container. It should be called after the init process is loaded.
- startSignalForwarding func() func()
-
// stopSignalForwarding disables forwarding of signals to the sandboxed
// container. It should be called when a sandbox is destroyed.
stopSignalForwarding func()
@@ -146,9 +142,6 @@ type execProcess struct {
func init() {
// Initialize the random number generator.
mrand.Seed(gtime.Now().UnixNano())
-
- // Register the global syscall table.
- kernel.RegisterSyscallTable(slinux.AMD64)
}
// Args are the arguments for New().
@@ -232,7 +225,7 @@ func New(args Args) (*Loader, error) {
// this point. Netns is configured before Run() is called. Netstack is
// configured using a control uRPC message. Host network is configured inside
// Run().
- networkStack, err := newEmptyNetworkStack(args.Conf, k)
+ networkStack, err := newEmptyNetworkStack(args.Conf, k, k)
if err != nil {
return nil, fmt.Errorf("creating network: %v", err)
}
@@ -300,7 +293,9 @@ func New(args Args) (*Loader, error) {
}
// Create a watchdog.
- dog := watchdog.New(k, watchdog.DefaultTimeout, args.Conf.WatchdogAction)
+ dogOpts := watchdog.DefaultOpts
+ dogOpts.TaskTimeoutAction = args.Conf.WatchdogAction
+ dog := watchdog.New(k, dogOpts)
procArgs, err := newProcess(args.ID, args.Spec, creds, k, k.RootPIDNamespace())
if err != nil {
@@ -337,29 +332,6 @@ func New(args Args) (*Loader, error) {
return nil, fmt.Errorf("ignore child stop signals failed: %v", err)
}
- // Handle signals by forwarding them to the root container process
- // (except for panic signal, which should cause a panic).
- l.startSignalForwarding = sighandling.PrepareHandler(func(sig linux.Signal) {
- // Panic signal should cause a panic.
- if args.Conf.PanicSignal != -1 && sig == linux.Signal(args.Conf.PanicSignal) {
- panic("Signal-induced panic")
- }
-
- // Otherwise forward to root container.
- deliveryMode := DeliverToProcess
- if args.Console {
- // Since we are running with a console, we should
- // forward the signal to the foreground process group
- // so that job control signals like ^C can be handled
- // properly.
- deliveryMode = DeliverToForegroundProcessGroup
- }
- log.Infof("Received external signal %d, mode: %v", sig, deliveryMode)
- if err := l.signal(args.ID, 0, int32(sig), deliveryMode); err != nil {
- log.Warningf("error sending signal %v to container %q: %v", sig, args.ID, err)
- }
- })
-
// Create the control server using the provided FD.
//
// This must be done *after* we have initialized the kernel since the
@@ -535,23 +507,12 @@ func (l *Loader) run() error {
return err
}
- // Read /etc/passwd for the user's HOME directory and set the HOME
- // environment variable as required by POSIX if it is not overridden by
- // the user.
- hasHomeEnvv := false
- for _, envv := range l.rootProcArgs.Envv {
- if strings.HasPrefix(envv, "HOME=") {
- hasHomeEnvv = true
- }
- }
- if !hasHomeEnvv {
- homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID))
- if err != nil {
- return fmt.Errorf("error reading exec user: %v", err)
- }
-
- l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir)
+ // Add the HOME enviroment variable if it is not already set.
+ envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv)
+ if err != nil {
+ return err
}
+ l.rootProcArgs.Envv = envv
// Create the root container init task. It will begin running
// when the kernel is started.
@@ -578,8 +539,27 @@ func (l *Loader) run() error {
ep.tty.InitForegroundProcessGroup(ep.tg.ProcessGroup())
}
- // Start signal forwarding only after an init process is created.
- l.stopSignalForwarding = l.startSignalForwarding()
+ // Handle signals by forwarding them to the root container process
+ // (except for panic signal, which should cause a panic).
+ l.stopSignalForwarding = sighandling.StartSignalForwarding(func(sig linux.Signal) {
+ // Panic signal should cause a panic.
+ if l.conf.PanicSignal != -1 && sig == linux.Signal(l.conf.PanicSignal) {
+ panic("Signal-induced panic")
+ }
+
+ // Otherwise forward to root container.
+ deliveryMode := DeliverToProcess
+ if l.console {
+ // Since we are running with a console, we should forward the signal to
+ // the foreground process group so that job control signals like ^C can
+ // be handled properly.
+ deliveryMode = DeliverToForegroundProcessGroup
+ }
+ log.Infof("Received external signal %d, mode: %v", sig, deliveryMode)
+ if err := l.signal(l.sandboxID, 0, int32(sig), deliveryMode); err != nil {
+ log.Warningf("error sending signal %v to container %q: %v", sig, l.sandboxID, err)
+ }
+ })
log.Infof("Process should have started...")
l.watchdog.Start()
@@ -815,6 +795,16 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
})
defer args.MountNamespace.DecRef()
+ // Add the HOME enviroment varible if it is not already set.
+ root := args.MountNamespace.Root()
+ defer root.DecRef()
+ ctx := fs.WithRoot(l.k.SupervisorContext(), root)
+ envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv)
+ if err != nil {
+ return 0, err
+ }
+ args.Envv = envv
+
// Start the process.
proc := control.Proc{Kernel: l.k}
args.PIDNamespace = tg.PIDNamespace()
@@ -906,22 +896,25 @@ func (l *Loader) WaitExit() kernel.ExitStatus {
return l.k.GlobalInit().ExitStatus()
}
-func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
+func newEmptyNetworkStack(conf *Config, clock tcpip.Clock, uniqueID stack.UniqueID) (inet.Stack, error) {
switch conf.Network {
case NetworkHost:
return hostinet.NewStack(), nil
case NetworkNone, NetworkSandbox:
// NetworkNone sets up loopback using netstack.
- netProtos := []string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName}
- protoNames := []string{tcp.ProtocolName, udp.ProtocolName, icmp.ProtocolName4}
- s := epsocket.Stack{stack.New(netProtos, protoNames, stack.Options{
- Clock: clock,
- Stats: epsocket.Metrics,
- HandleLocal: true,
+ netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()}
+ transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()}
+ s := netstack.Stack{stack.New(stack.Options{
+ NetworkProtocols: netProtos,
+ TransportProtocols: transProtos,
+ Clock: clock,
+ Stats: netstack.Metrics,
+ HandleLocal: true,
// Enable raw sockets for users with sufficient
// privileges.
- Raw: true,
+ RawFactory: raw.EndpointFactory{},
+ UniqueID: uniqueID,
})}
// Enable SACK Recovery.
@@ -929,6 +922,10 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) {
return nil, fmt.Errorf("failed to enable SACK: %v", err)
}
+ // Set default TTLs as required by socket/netstack.
+ s.Stack.SetNetworkProtocolOption(ipv4.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+ s.Stack.SetNetworkProtocolOption(ipv6.ProtocolNumber, tcpip.DefaultTTLOption(netstack.DefaultTTL))
+
// Enable Receive Buffer Auto-Tuning.
if err := s.Stack.SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.ModerateReceiveBufferOption(true)); err != nil {
return nil, fmt.Errorf("SetTransportProtocolOption failed: %v", err)
diff --git a/runsc/boot/loader_amd64.go b/runsc/boot/loader_amd64.go
new file mode 100644
index 000000000..b9669f2ac
--- /dev/null
+++ b/runsc/boot/loader_amd64.go
@@ -0,0 +1,27 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package boot
+
+import (
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+)
+
+func init() {
+ // Register the global syscall table.
+ kernel.RegisterSyscallTable(linux.AMD64)
+}
diff --git a/runsc/boot/loader_arm64.go b/runsc/boot/loader_arm64.go
new file mode 100644
index 000000000..cf64d28c8
--- /dev/null
+++ b/runsc/boot/loader_arm64.go
@@ -0,0 +1,27 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package boot
+
+import (
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+)
+
+func init() {
+ // Register the global syscall table.
+ kernel.RegisterSyscallTable(linux.ARM64)
+}
diff --git a/runsc/boot/network.go b/runsc/boot/network.go
index ea0d9f790..dd4926bb9 100644
--- a/runsc/boot/network.go
+++ b/runsc/boot/network.go
@@ -50,12 +50,13 @@ type DefaultRoute struct {
// FDBasedLink configures an fd-based link.
type FDBasedLink struct {
- Name string
- MTU int
- Addresses []net.IP
- Routes []Route
- GSOMaxSize uint32
- LinkAddress net.HardwareAddr
+ Name string
+ MTU int
+ Addresses []net.IP
+ Routes []Route
+ GSOMaxSize uint32
+ SoftwareGSOEnabled bool
+ LinkAddress net.HardwareAddr
// NumChannels controls how many underlying FD's are to be used to
// create this endpoint.
@@ -79,7 +80,8 @@ type CreateLinksAndRoutesArgs struct {
LoopbackLinks []LoopbackLink
FDBasedLinks []FDBasedLink
- DefaultGateway DefaultRoute
+ Defaultv4Gateway DefaultRoute
+ Defaultv6Gateway DefaultRoute
}
// Empty returns true if route hasn't been set.
@@ -163,6 +165,7 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
Address: mac,
PacketDispatchMode: fdbased.RecvMMsg,
GSOMaxSize: link.GSOMaxSize,
+ SoftwareGSOEnabled: link.SoftwareGSOEnabled,
RXChecksumOffload: true,
})
if err != nil {
@@ -184,12 +187,24 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
}
}
- if !args.DefaultGateway.Route.Empty() {
- nicID, ok := nicids[args.DefaultGateway.Name]
+ if !args.Defaultv4Gateway.Route.Empty() {
+ nicID, ok := nicids[args.Defaultv4Gateway.Name]
if !ok {
- return fmt.Errorf("invalid interface name %q for default route", args.DefaultGateway.Name)
+ return fmt.Errorf("invalid interface name %q for default route", args.Defaultv4Gateway.Name)
}
- route, err := args.DefaultGateway.Route.toTcpipRoute(nicID)
+ route, err := args.Defaultv4Gateway.Route.toTcpipRoute(nicID)
+ if err != nil {
+ return err
+ }
+ routes = append(routes, route)
+ }
+
+ if !args.Defaultv6Gateway.Route.Empty() {
+ nicID, ok := nicids[args.Defaultv6Gateway.Name]
+ if !ok {
+ return fmt.Errorf("invalid interface name %q for default route", args.Defaultv6Gateway.Name)
+ }
+ route, err := args.Defaultv6Gateway.Route.toTcpipRoute(nicID)
if err != nil {
return err
}
@@ -203,14 +218,14 @@ func (n *Network) CreateLinksAndRoutes(args *CreateLinksAndRoutesArgs, _ *struct
// createNICWithAddrs creates a NIC in the network stack and adds the given
// addresses.
-func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, linkEP tcpip.LinkEndpointID, addrs []net.IP, loopback bool) error {
+func (n *Network) createNICWithAddrs(id tcpip.NICID, name string, ep stack.LinkEndpoint, addrs []net.IP, loopback bool) error {
if loopback {
- if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(linkEP)); err != nil {
- return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v, %v) failed: %v", id, name, linkEP, err)
+ if err := n.Stack.CreateNamedLoopbackNIC(id, name, sniffer.New(ep)); err != nil {
+ return fmt.Errorf("CreateNamedLoopbackNIC(%v, %v, %v) failed: %v", id, name, ep, err)
}
} else {
- if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(linkEP)); err != nil {
- return fmt.Errorf("CreateNamedNIC(%v, %v, %v) failed: %v", id, name, linkEP, err)
+ if err := n.Stack.CreateNamedNIC(id, name, sniffer.New(ep)); err != nil {
+ return fmt.Errorf("CreateNamedNIC(%v, %v, %v) failed: %v", id, name, ep, err)
}
}
diff --git a/runsc/boot/user.go b/runsc/boot/user.go
index d1d423a5c..56cc12ee0 100644
--- a/runsc/boot/user.go
+++ b/runsc/boot/user.go
@@ -16,6 +16,7 @@ package boot
import (
"bufio"
+ "fmt"
"io"
"strconv"
"strings"
@@ -23,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/context"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/usermem"
)
@@ -42,7 +44,7 @@ func (r *fileReader) Read(buf []byte) (int, error) {
// getExecUserHome returns the home directory of the executing user read from
// /etc/passwd as read from the container filesystem.
-func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) {
+func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) {
// The default user home directory to return if no user matching the user
// if found in the /etc/passwd found in the image.
const defaultHome = "/"
@@ -82,7 +84,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32
File: f,
}
- homeDir, err := findHomeInPasswd(uid, r, defaultHome)
+ homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome)
if err != nil {
return "", err
}
@@ -90,6 +92,28 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32
return homeDir, nil
}
+// maybeAddExecUserHome returns a new slice with the HOME enviroment variable
+// set if the slice does not already contain it, otherwise it returns the
+// original slice unmodified.
+func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) {
+ // Check if the envv already contains HOME.
+ for _, env := range envv {
+ if strings.HasPrefix(env, "HOME=") {
+ // We have it. Return the original slice unmodified.
+ return envv, nil
+ }
+ }
+
+ // Read /etc/passwd for the user's HOME directory and set the HOME
+ // environment variable as required by POSIX if it is not overridden by
+ // the user.
+ homeDir, err := getExecUserHome(ctx, mns, uid)
+ if err != nil {
+ return nil, fmt.Errorf("error reading exec user: %v", err)
+ }
+ return append(envv, "HOME="+homeDir), nil
+}
+
// findHomeInPasswd parses a passwd file and returns the given user's home
// directory. This function does it's best to replicate the runc's behavior.
func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) {
diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go
index 906baf3e5..9aee2ad07 100644
--- a/runsc/boot/user_test.go
+++ b/runsc/boot/user_test.go
@@ -25,6 +25,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/pkg/sentry/context/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
)
func setupTempDir() (string, error) {
@@ -68,7 +69,7 @@ func setupPasswd(contents string, perms os.FileMode) func() (string, error) {
// TestGetExecUserHome tests the getExecUserHome function.
func TestGetExecUserHome(t *testing.T) {
tests := map[string]struct {
- uid uint32
+ uid auth.KUID
createRoot func() (string, error)
expected string
}{