diff options
Diffstat (limited to 'runsc')
37 files changed, 252 insertions, 171 deletions
diff --git a/runsc/BUILD b/runsc/BUILD index 0e4cf8e09..6b8c92706 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -16,6 +16,7 @@ go_binary( x_defs = {"main.version": "{VERSION}"}, deps = [ "//pkg/log", + "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", "//runsc/specutils", @@ -47,6 +48,7 @@ go_binary( x_defs = {"main.version": "{VERSION}"}, deps = [ "//pkg/log", + "//pkg/sentry/platform", "//runsc/boot", "//runsc/cmd", "//runsc/specutils", diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 7ec15a524..5025401dd 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/log", "//pkg/memutil", "//pkg/rand", + "//pkg/refs", "//pkg/sentry/arch", "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/context", @@ -51,13 +52,10 @@ go_library( "//pkg/sentry/kernel", "//pkg/sentry/kernel:uncaught_signal_go_proto", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/kdefs", "//pkg/sentry/limits", "//pkg/sentry/loader", "//pkg/sentry/pgalloc", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/sentry/sighandling", "//pkg/sentry/socket/epsocket", "//pkg/sentry/socket/hostinet", @@ -86,6 +84,7 @@ go_library( "//pkg/tcpip/transport/udp", "//pkg/urpc", "//runsc/boot/filter", + "//runsc/boot/platforms", "//runsc/specutils", "@com_github_golang_protobuf//proto:go_default_library", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 6d276f207..6f1eb9a41 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -22,40 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/watchdog" ) -// PlatformType tells which platform to use. -type PlatformType int - -const ( - // PlatformPtrace runs the sandbox with the ptrace platform. - PlatformPtrace PlatformType = iota - - // PlatformKVM runs the sandbox with the KVM platform. - PlatformKVM -) - -// MakePlatformType converts type from string. -func MakePlatformType(s string) (PlatformType, error) { - switch s { - case "ptrace": - return PlatformPtrace, nil - case "kvm": - return PlatformKVM, nil - default: - return 0, fmt.Errorf("invalid platform type %q", s) - } -} - -func (p PlatformType) String() string { - switch p { - case PlatformPtrace: - return "ptrace" - case PlatformKVM: - return "kvm" - default: - return fmt.Sprintf("unknown(%d)", p) - } -} - // FileAccessType tells how the filesystem is accessed. type FileAccessType int @@ -187,7 +153,7 @@ type Config struct { LogPackets bool // Platform is the platform to run on. - Platform PlatformType + Platform string // Strace indicates that strace should be enabled. Strace bool @@ -247,7 +213,7 @@ func (c *Config) ToFlags() []string { "--overlay=" + strconv.FormatBool(c.Overlay), "--network=" + c.Network.String(), "--log-packets=" + strconv.FormatBool(c.LogPackets), - "--platform=" + c.Platform.String(), + "--platform=" + c.Platform, "--strace=" + strconv.FormatBool(c.Strace), "--strace-syscalls=" + strings.Join(c.StraceSyscalls, ","), "--strace-log-size=" + strconv.Itoa(int(c.StraceLogSize)), diff --git a/runsc/boot/fds.go b/runsc/boot/fds.go index 59e1b46ec..e5de1f3d7 100644 --- a/runsc/boot/fds.go +++ b/runsc/boot/fds.go @@ -21,32 +21,23 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/fs/host" "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/kdefs" - "gvisor.dev/gvisor/pkg/sentry/limits" ) -// createFDMap creates an FD map that contains stdin, stdout, and stderr. If -// console is true, then ioctl calls will be passed through to the host FD. +// createFDTable creates an FD table that contains stdin, stdout, and stderr. +// If console is true, then ioctl calls will be passed through to the host FD. // Upon success, createFDMap dups then closes stdioFDs. -func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs []int) (*kernel.FDMap, error) { +func createFDTable(ctx context.Context, console bool, stdioFDs []int) (*kernel.FDTable, error) { if len(stdioFDs) != 3 { return nil, fmt.Errorf("stdioFDs should contain exactly 3 FDs (stdin, stdout, and stderr), but %d FDs received", len(stdioFDs)) } k := kernel.KernelFromContext(ctx) - fdm := k.NewFDMap() - defer fdm.DecRef() + fdTable := k.NewFDTable() + defer fdTable.DecRef() mounter := fs.FileOwnerFromContext(ctx) - // Maps sandbox FD to host FD. - fdMap := map[int]int{ - 0: stdioFDs[0], - 1: stdioFDs[1], - 2: stdioFDs[2], - } - var ttyFile *fs.File - for appFD, hostFD := range fdMap { + for appFD, hostFD := range stdioFDs { var appFile *fs.File if console && appFD < 3 { @@ -80,11 +71,11 @@ func createFDMap(ctx context.Context, l *limits.LimitSet, console bool, stdioFDs } // Add the file to the FD map. - if err := fdm.NewFDAt(kdefs.FD(appFD), appFile, kernel.FDFlags{}, l); err != nil { + if err := fdTable.NewFDAt(ctx, int32(appFD), appFile, kernel.FDFlags{}); err != nil { return nil, err } } - fdm.IncRef() - return fdm, nil + fdTable.IncRef() + return fdTable, nil } diff --git a/runsc/boot/filter/BUILD b/runsc/boot/filter/BUILD index 07898f3de..f5509b6b7 100644 --- a/runsc/boot/filter/BUILD +++ b/runsc/boot/filter/BUILD @@ -20,8 +20,6 @@ go_library( "//pkg/log", "//pkg/seccomp", "//pkg/sentry/platform", - "//pkg/sentry/platform/kvm", - "//pkg/sentry/platform/ptrace", "//pkg/tcpip/link/fdbased", "@org_golang_x_sys//unix:go_default_library", ], diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index e4ccb40d9..0ee5b8bbd 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -437,29 +437,6 @@ func hostInetFilters() seccomp.SyscallRules { } } -// ptraceFilters returns syscalls made exclusively by the ptrace platform. -func ptraceFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - unix.SYS_GETCPU: {}, - unix.SYS_SCHED_SETAFFINITY: {}, - syscall.SYS_PTRACE: {}, - syscall.SYS_TGKILL: {}, - syscall.SYS_WAIT4: {}, - } -} - -// kvmFilters returns syscalls made exclusively by the KVM platform. -func kvmFilters() seccomp.SyscallRules { - return seccomp.SyscallRules{ - syscall.SYS_ARCH_PRCTL: {}, - syscall.SYS_IOCTL: {}, - syscall.SYS_MMAP: {}, - syscall.SYS_RT_SIGSUSPEND: {}, - syscall.SYS_RT_SIGTIMEDWAIT: {}, - 0xffffffffffffffff: {}, // KVM uses syscall -1 to transition to host. - } -} - func controlServerFilters(fd int) seccomp.SyscallRules { return seccomp.SyscallRules{ syscall.SYS_ACCEPT: []seccomp.Rule{ diff --git a/runsc/boot/filter/extra_filters.go b/runsc/boot/filter/extra_filters.go index 1056cd314..e28d4b8d6 100644 --- a/runsc/boot/filter/extra_filters.go +++ b/runsc/boot/filter/extra_filters.go @@ -21,7 +21,7 @@ import ( ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/boot/filter/extra_filters_race.go b/runsc/boot/filter/extra_filters_race.go index d5bee4453..9ff80276a 100644 --- a/runsc/boot/filter/extra_filters_race.go +++ b/runsc/boot/filter/extra_filters_race.go @@ -33,6 +33,7 @@ func instrumentationFilters() seccomp.SyscallRules { syscall.SYS_MUNLOCK: {}, syscall.SYS_NANOSLEEP: {}, syscall.SYS_OPEN: {}, + syscall.SYS_OPENAT: {}, syscall.SYS_SET_ROBUST_LIST: {}, // Used within glibc's malloc. syscall.SYS_TIME: {}, diff --git a/runsc/boot/filter/filter.go b/runsc/boot/filter/filter.go index 468481f29..e80c171b3 100644 --- a/runsc/boot/filter/filter.go +++ b/runsc/boot/filter/filter.go @@ -18,13 +18,9 @@ package filter import ( - "fmt" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/seccomp" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/kvm" - "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" ) // Options are seccomp filter related options. @@ -53,14 +49,7 @@ func Install(opt Options) error { s.Merge(profileFilters()) } - switch p := opt.Platform.(type) { - case *ptrace.PTrace: - s.Merge(ptraceFilters()) - case *kvm.KVM: - s.Merge(kvmFilters()) - default: - return fmt.Errorf("unknown platform type %T", p) - } + s.Merge(opt.Platform.SyscallFilters()) return seccomp.Install(s) } diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 67a286212..d3e3196fd 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -25,8 +25,10 @@ import ( // Include filesystem types that OCI spec might mount. _ "gvisor.dev/gvisor/pkg/sentry/fs/dev" + "gvisor.dev/gvisor/pkg/sentry/fs/gofer" _ "gvisor.dev/gvisor/pkg/sentry/fs/host" _ "gvisor.dev/gvisor/pkg/sentry/fs/proc" + "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" _ "gvisor.dev/gvisor/pkg/sentry/fs/sys" _ "gvisor.dev/gvisor/pkg/sentry/fs/tmpfs" _ "gvisor.dev/gvisor/pkg/sentry/fs/tty" @@ -36,8 +38,6 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/fs/gofer" - "gvisor.dev/gvisor/pkg/sentry/fs/ramfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/syserror" @@ -226,7 +226,11 @@ func mustFindFilesystem(name string) fs.Filesystem { // addSubmountOverlay overlays the inode over a ramfs tree containing the given // paths. func addSubmountOverlay(ctx context.Context, inode *fs.Inode, submounts []string) (*fs.Inode, error) { - msrc := fs.NewPseudoMountSource(ctx) + // Construct a ramfs tree of mount points. The contents never + // change, so this can be fully caching. There's no real + // filesystem backing this tree, so we set the filesystem to + // nil. + msrc := fs.NewCachingMountSource(ctx, nil, fs.MountSourceFlags{}) mountTree, err := ramfs.MakeDirectoryTree(ctx, msrc, submounts) if err != nil { return nil, fmt.Errorf("creating mount tree: %v", err) @@ -902,7 +906,10 @@ func (c *containerMounter) mountSharedSubmount(ctx context.Context, mns *fs.Moun } defer target.DecRef() + // Take a ref on the inode that is about to be (re)-mounted. + source.root.IncRef() if err := mns.Mount(ctx, target, source.root); err != nil { + source.root.DecRef() return fmt.Errorf("bind mount %q error: %v", mount.Destination, err) } diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 41027416d..8e8c6105b 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -32,6 +32,7 @@ import ( "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/memutil" "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/fs/host" @@ -41,8 +42,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/loader" "gvisor.dev/gvisor/pkg/sentry/pgalloc" "gvisor.dev/gvisor/pkg/sentry/platform" - "gvisor.dev/gvisor/pkg/sentry/platform/kvm" - "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" "gvisor.dev/gvisor/pkg/sentry/sighandling" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" "gvisor.dev/gvisor/pkg/sentry/time" @@ -58,6 +57,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/runsc/boot/filter" + _ "gvisor.dev/gvisor/runsc/boot/platforms" // register all platforms. "gvisor.dev/gvisor/runsc/specutils" // Include supported socket providers. @@ -262,7 +262,7 @@ func New(args Args) (*Loader, error) { // Adjust the total memory returned by the Sentry so that applications that // use /proc/meminfo can make allocations based on this limit. usage.MinimumTotalMemoryBytes = args.TotalMem - log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(2^30)) + log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30)) } // Initiate the Kernel object, which is required by the Context passed @@ -415,19 +415,12 @@ func (l *Loader) Destroy() { } func createPlatform(conf *Config, deviceFile *os.File) (platform.Platform, error) { - switch conf.Platform { - case PlatformPtrace: - log.Infof("Platform: ptrace") - return ptrace.New() - case PlatformKVM: - log.Infof("Platform: kvm") - if deviceFile == nil { - return nil, fmt.Errorf("kvm device file must be provided") - } - return kvm.New(deviceFile) - default: - return nil, fmt.Errorf("invalid platform %v", conf.Platform) + p, err := platform.Lookup(conf.Platform) + if err != nil { + panic(fmt.Sprintf("invalid platform %v: %v", conf.Platform, err)) } + log.Infof("Platform: %s", conf.Platform) + return p.New(deviceFile) } func createMemoryFile() (*pgalloc.MemoryFile, error) { @@ -516,13 +509,13 @@ func (l *Loader) run() error { // Create the FD map, which will set stdin, stdout, and stderr. If console // is true, then ioctl calls will be passed through to the host fd. ctx := l.rootProcArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, l.rootProcArgs.Limits, l.console, l.stdioFDs) + fdTable, err := createFDTable(ctx, l.console, l.stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } // CreateProcess takes a reference on FDMap if successful. We won't need // ours either way. - l.rootProcArgs.FDMap = fdm + l.rootProcArgs.FDTable = fdTable // cid for root container can be empty. Only subcontainers need it to set // the mount location. @@ -561,13 +554,13 @@ func (l *Loader) run() error { return fmt.Errorf("creating init process: %v", err) } - // CreateProcess takes a reference on FDMap if successful. - l.rootProcArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + l.rootProcArgs.FDTable.DecRef() } ep.tg = l.k.GlobalInit() if l.console { - ttyFile := l.rootProcArgs.FDMap.GetFile(0) + ttyFile, _ := l.rootProcArgs.FDTable.Get(0) defer ttyFile.DecRef() ep.tty = ttyFile.FileOperations.(*host.TTYFileOperations) @@ -647,13 +640,13 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file // Create the FD map, which will set stdin, stdout, and stderr. ctx := procArgs.NewContext(l.k) - fdm, err := createFDMap(ctx, procArgs.Limits, false, stdioFDs) + fdTable, err := createFDTable(ctx, false, stdioFDs) if err != nil { return fmt.Errorf("importing fds: %v", err) } - // CreateProcess takes a reference on FDMap if successful. We won't need ours - // either way. - procArgs.FDMap = fdm + // CreateProcess takes a reference on fdTable if successful. We won't + // need ours either way. + procArgs.FDTable = fdTable // Can't take ownership away from os.File. dup them to get a new FDs. var goferFDs []int @@ -682,8 +675,8 @@ func (l *Loader) startContainer(spec *specs.Spec, conf *Config, cid string, file } l.k.StartProcess(tg) - // CreateProcess takes a reference on FDMap if successful. - procArgs.FDMap.DecRef() + // CreateProcess takes a reference on FDTable if successful. + procArgs.FDTable.DecRef() l.processes[eid].tg = tg return nil @@ -1006,3 +999,8 @@ func (l *Loader) threadGroupFromIDLocked(key execID) (*kernel.ThreadGroup, *host } return ep.tg, ep.tty, nil } + +func init() { + // TODO(gvisor.dev/issue/365): Make this configurable. + refs.SetLeakMode(refs.NoLeakChecking) +} diff --git a/runsc/boot/loader_test.go b/runsc/boot/loader_test.go index 4af45bfcc..ff713660d 100644 --- a/runsc/boot/loader_test.go +++ b/runsc/boot/loader_test.go @@ -37,6 +37,9 @@ import ( func init() { log.SetLevel(log.Debug) rand.Seed(time.Now().UnixNano()) + if err := fsgofer.OpenProcSelfFD(); err != nil { + panic(err) + } } func testConfig() *Config { @@ -44,6 +47,7 @@ func testConfig() *Config { RootDir: "unused_root_dir", Network: NetworkNone, DisableSeccomp: true, + Platform: "ptrace", } } diff --git a/runsc/boot/platforms/BUILD b/runsc/boot/platforms/BUILD new file mode 100644 index 000000000..03391cdca --- /dev/null +++ b/runsc/boot/platforms/BUILD @@ -0,0 +1,16 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +package(licenses = ["notice"]) + +go_library( + name = "platforms", + srcs = ["platforms.go"], + importpath = "gvisor.dev/gvisor/runsc/boot/platforms", + visibility = [ + "//runsc:__subpackages__", + ], + deps = [ + "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform/ptrace", + ], +) diff --git a/runsc/boot/platforms/platforms.go b/runsc/boot/platforms/platforms.go new file mode 100644 index 000000000..056b46ad5 --- /dev/null +++ b/runsc/boot/platforms/platforms.go @@ -0,0 +1,30 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package platforms imports all available platform packages. +package platforms + +import ( + // Import platforms that runsc might use. + _ "gvisor.dev/gvisor/pkg/sentry/platform/kvm" + _ "gvisor.dev/gvisor/pkg/sentry/platform/ptrace" +) + +const ( + // Ptrace runs the sandbox with the ptrace platform. + Ptrace = "ptrace" + + // KVM runs the sandbox with the KVM platform. + KVM = "kvm" +) diff --git a/runsc/cmd/BUILD b/runsc/cmd/BUILD index 2c8b84252..5223b9972 100644 --- a/runsc/cmd/BUILD +++ b/runsc/cmd/BUILD @@ -46,6 +46,7 @@ go_library( "//pkg/unet", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/console", "//runsc/container", "//runsc/fsgofer", diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 272eb14d3..b40fded5b 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -26,6 +26,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" ) @@ -172,7 +173,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if caps == nil { caps = &specs.LinuxCapabilities{} } - if conf.Platform == boot.PlatformPtrace { + if conf.Platform == platforms.Ptrace { // Ptrace platform requires extra capabilities. const c = "CAP_SYS_PTRACE" caps.Bounding = append(caps.Bounding, c) diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index 7adc23a77..e817eff77 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -235,7 +235,11 @@ func (ex *Exec) execChildAndWait(waitStatus *syscall.WaitStatus) subcommands.Exi cmd.SysProcAttr = &syscall.SysProcAttr{ Setsid: true, Setctty: true, - Ctty: int(tty.Fd()), + // The Ctty FD must be the FD in the child process's FD + // table. Since we set cmd.Stdin/Stdout/Stderr to the + // tty FD, we can use any of 0, 1, or 2 here. + // See https://github.com/golang/go/issues/29458. + Ctty: 0, } } diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 52609a57a..9faabf494 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -152,6 +152,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // modes exactly as sent by the sandbox, which will have applied its own umask. syscall.Umask(0) + if err := fsgofer.OpenProcSelfFD(); err != nil { + Fatalf("failed to open /proc/self/fd: %v", err) + } + if err := syscall.Chroot(root); err != nil { Fatalf("failed to chroot to %q: %v", root, err) } diff --git a/runsc/cmd/syscalls.go b/runsc/cmd/syscalls.go index df92c126a..fb6c1ab29 100644 --- a/runsc/cmd/syscalls.go +++ b/runsc/cmd/syscalls.go @@ -41,7 +41,7 @@ type Syscalls struct { // Maps operating system to architecture to ArchInfo. type CompatibilityInfo map[string]map[string]ArchInfo -// ArchInfo is compatbility doc for an architecture. +// ArchInfo is compatibility doc for an architecture. type ArchInfo struct { // Syscalls maps syscall number for the architecture to the doc. Syscalls map[uintptr]SyscallDoc `json:"syscalls"` diff --git a/runsc/console/BUILD b/runsc/console/BUILD index 2d71cd371..e623c1a0f 100644 --- a/runsc/console/BUILD +++ b/runsc/console/BUILD @@ -4,7 +4,9 @@ package(licenses = ["notice"]) go_library( name = "console", - srcs = ["console.go"], + srcs = [ + "console.go", + ], importpath = "gvisor.dev/gvisor/runsc/console", visibility = [ "//runsc:__subpackages__", diff --git a/runsc/container/BUILD b/runsc/container/BUILD index ebe77165e..e246c38ae 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -53,6 +53,7 @@ go_test( "//pkg/unet", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/specutils", "//runsc/test/testutil", "@com_github_cenkalti_backoff//:go_default_library", diff --git a/runsc/container/container.go b/runsc/container/container.go index bde46fb9b..8320bb2ca 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -206,7 +206,7 @@ func findContainerRoot(rootDir, partialID string) (string, error) { } // Now see whether id could be an abbreviation of exactly 1 of the - // container ids. If id is ambigious (it could match more than 1 + // container ids. If id is ambiguous (it could match more than 1 // container), it is an error. cRoot = "" ids, err := List(rootDir) @@ -273,7 +273,7 @@ type Args struct { Attached bool } -// Create creates the container in a new Sandbox process, unless the metadata +// New creates the container in a new Sandbox process, unless the metadata // indicates that an existing Sandbox should be used. The caller must call // Destroy() on the container. func New(conf *boot.Config, args Args) (*Container, error) { @@ -329,7 +329,7 @@ func New(conf *boot.Config, args Args) (*Container, error) { log.Debugf("Creating new sandbox for container %q", args.ID) // Create and join cgroup before processes are created to ensure they are - // part of the cgroup from the start (and all tneir children processes). + // part of the cgroup from the start (and all their children processes). cg, err := cgroup.New(args.Spec) if err != nil { return nil, err @@ -446,7 +446,7 @@ func (c *Container) Start(conf *boot.Config) error { } } else { // Join cgroup to strt gofer process to ensure it's part of the cgroup from - // the start (and all tneir children processes). + // the start (and all their children processes). if err := runInCgroup(c.Sandbox.Cgroup, func() error { // Create the gofer process. ioFiles, mountsFile, err := c.createGoferProcess(c.Spec, conf, c.BundleDir) @@ -514,7 +514,7 @@ func Run(conf *boot.Config, args Args) (syscall.WaitStatus, error) { if err != nil { return 0, fmt.Errorf("creating container: %v", err) } - // Clean up partially created container if an error ocurrs. + // Clean up partially created container if an error occurs. // Any errors returned by Destroy() itself are ignored. cu := specutils.MakeCleanup(func() { c.Destroy() diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index e0786866b..c1d6ca7b8 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -17,6 +17,7 @@ package container import ( "bytes" "fmt" + "io" "io/ioutil" "os" "path" @@ -36,6 +37,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/control" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/specutils" "gvisor.dev/gvisor/runsc/test/testutil" ) @@ -256,7 +258,7 @@ func configs(opts ...configOption) []*boot.Config { if testutil.RaceEnabled { continue } - c.Platform = boot.PlatformKVM + c.Platform = platforms.KVM case nonExclusiveFS: c.FileAccess = boot.FileAccessShared default: @@ -408,6 +410,46 @@ func TestLifecycle(t *testing.T) { // Test the we can execute the application with different path formats. func TestExePath(t *testing.T) { + // Create two directories that will be prepended to PATH. + firstPath, err := ioutil.TempDir(testutil.TmpDir(), "first") + if err != nil { + t.Fatal(err) + } + secondPath, err := ioutil.TempDir(testutil.TmpDir(), "second") + if err != nil { + t.Fatal(err) + } + + // Create two minimal executables in the second path, two of which + // will be masked by files in first path. + for _, p := range []string{"unmasked", "masked1", "masked2"} { + path := filepath.Join(secondPath, p) + f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0777) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if _, err := io.WriteString(f, "#!/bin/true\n"); err != nil { + t.Fatal(err) + } + } + + // Create a non-executable file in the first path which masks a healthy + // executable in the second. + nonExecutable := filepath.Join(firstPath, "masked1") + f2, err := os.OpenFile(nonExecutable, os.O_CREATE|os.O_EXCL, 0666) + if err != nil { + t.Fatal(err) + } + f2.Close() + + // Create a non-regular file in the first path which masks a healthy + // executable in the second. + nonRegular := filepath.Join(firstPath, "masked2") + if err := os.Mkdir(nonRegular, 0777); err != nil { + t.Fatal(err) + } + for _, conf := range configs(overlay) { t.Logf("Running test with conf: %+v", conf) for _, test := range []struct { @@ -420,8 +462,24 @@ func TestExePath(t *testing.T) { {path: "thisfiledoesntexit", success: false}, {path: "bin/thisfiledoesntexit", success: false}, {path: "/bin/thisfiledoesntexit", success: false}, + + {path: "unmasked", success: true}, + {path: filepath.Join(firstPath, "unmasked"), success: false}, + {path: filepath.Join(secondPath, "unmasked"), success: true}, + + {path: "masked1", success: true}, + {path: filepath.Join(firstPath, "masked1"), success: false}, + {path: filepath.Join(secondPath, "masked1"), success: true}, + + {path: "masked2", success: true}, + {path: filepath.Join(firstPath, "masked2"), success: false}, + {path: filepath.Join(secondPath, "masked2"), success: true}, } { spec := testutil.NewSpecWithArgs(test.path) + spec.Process.Env = []string{ + fmt.Sprintf("PATH=%s:%s:%s", firstPath, secondPath, os.Getenv("PATH")), + } + rootDir, bundleDir, err := testutil.SetupContainer(spec, conf) if err != nil { t.Fatalf("exec: %s, error setting up container: %v", test.path, err) @@ -831,7 +889,7 @@ func TestUnixDomainSockets(t *testing.T) { t.Logf("Running test with conf: %+v", conf) // UDS path is limited to 108 chars for compatibility with older systems. - // Use '/tmp' (instead of testutil.TmpDir) to to ensure the size limit is + // Use '/tmp' (instead of testutil.TmpDir) to ensure the size limit is // not exceeded. Assumes '/tmp' exists in the system. dir, err := ioutil.TempDir("/tmp", "uds-test") if err != nil { diff --git a/runsc/fsgofer/filter/extra_filters.go b/runsc/fsgofer/filter/extra_filters.go index 1056cd314..e28d4b8d6 100644 --- a/runsc/fsgofer/filter/extra_filters.go +++ b/runsc/fsgofer/filter/extra_filters.go @@ -21,7 +21,7 @@ import ( ) // instrumentationFilters returns additional filters for syscalls used by -// Go intrumentation tools, e.g. -race, -msan. +// Go instrumentation tools, e.g. -race, -msan. // Returns empty when disabled. func instrumentationFilters() seccomp.SyscallRules { return nil diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 8f50af780..fe450c64f 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -28,6 +28,7 @@ import ( "path" "path/filepath" "runtime" + "strconv" "sync" "syscall" @@ -223,6 +224,28 @@ type localFile struct { lastDirentOffset uint64 } +var procSelfFD *fd.FD + +// OpenProcSelfFD opens the /proc/self/fd directory, which will be used to +// reopen file descriptors. +func OpenProcSelfFD() error { + d, err := syscall.Open("/proc/self/fd", syscall.O_RDONLY|syscall.O_DIRECTORY, 0) + if err != nil { + return fmt.Errorf("error opening /proc/self/fd: %v", err) + } + procSelfFD = fd.New(d) + return nil +} + +func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) { + d, err := syscall.Openat(int(procSelfFD.FD()), strconv.Itoa(f.FD()), mode&^syscall.O_NOFOLLOW, 0) + if err != nil { + return nil, err + } + + return fd.New(d), nil +} + func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, error) { path := path.Join(parent.hostPath, name) f, err := openAnyFile(path, func(mode int) (*fd.FD, error) { @@ -348,7 +371,7 @@ func (l *localFile) Open(mode p9.OpenFlags) (*fd.FD, p9.QID, uint32, error) { // name_to_handle_at and open_by_handle_at aren't supported by overlay2. log.Debugf("Open reopening file, mode: %v, %q", mode, l.hostPath) var err error - newFile, err = fd.Open(l.hostPath, openFlags|mode.OSFlags(), 0) + newFile, err = reopenProcFd(l.file, openFlags|mode.OSFlags()) if err != nil { return nil, p9.QID{}, 0, extractErrno(err) } @@ -477,7 +500,7 @@ func (l *localFile) Walk(names []string) ([]p9.QID, p9.File, error) { // Duplicate current file if 'names' is empty. if len(names) == 0 { newFile, err := openAnyFile(l.hostPath, func(mode int) (*fd.FD, error) { - return fd.Open(l.hostPath, openFlags|mode, 0) + return reopenProcFd(l.file, openFlags|mode) }) if err != nil { return nil, nil, extractErrno(err) @@ -596,7 +619,7 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) } // SetAttr implements p9.File. Due to mismatch in file API, options -// cannot be changed atomicaly and user may see partial changes when +// cannot be changed atomically and user may see partial changes when // an error happens. func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { conf := l.attachPoint.conf @@ -635,7 +658,7 @@ func (l *localFile) SetAttr(valid p9.SetAttrMask, attr p9.SetAttr) error { f := l.file if l.ft == regular && l.mode != p9.WriteOnly && l.mode != p9.ReadWrite { var err error - f, err = fd.Open(l.hostPath, openFlags|syscall.O_WRONLY, 0) + f, err = reopenProcFd(l.file, openFlags|os.O_WRONLY) if err != nil { return extractErrno(err) } diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index 68267df1b..0a162bb8a 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -31,6 +31,10 @@ func init() { allConfs = append(allConfs, rwConfs...) allConfs = append(allConfs, roConfs...) + + if err := OpenProcSelfFD(); err != nil { + panic(err) + } } func assertPanic(t *testing.T, f func()) { diff --git a/runsc/main.go b/runsc/main.go index 135061cd3..bc83c57a2 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -30,6 +30,7 @@ import ( "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/runsc/boot" "gvisor.dev/gvisor/runsc/cmd" "gvisor.dev/gvisor/runsc/specutils" @@ -61,7 +62,7 @@ var ( straceLogSize = flag.Uint("strace-log-size", 1024, "default size (in bytes) to log data argument blobs") // Flags that control sandbox runtime behavior. - platform = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") + platformName = flag.String("platform", "ptrace", "specifies which platform to use: ptrace (default), kvm") network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") gso = flag.Bool("gso", true, "enable generic segmenation offload") fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") @@ -139,8 +140,8 @@ func main() { } cmd.ErrorLogger = errorLogger - platformType, err := boot.MakePlatformType(*platform) - if err != nil { + platformType := *platformName + if _, err := platform.Lookup(platformType); err != nil { cmd.Fatalf("%v", err) } diff --git a/runsc/sandbox/BUILD b/runsc/sandbox/BUILD index f32da45c1..7fdceaab6 100644 --- a/runsc/sandbox/BUILD +++ b/runsc/sandbox/BUILD @@ -18,9 +18,10 @@ go_library( "//pkg/control/server", "//pkg/log", "//pkg/sentry/control", - "//pkg/sentry/platform/kvm", + "//pkg/sentry/platform", "//pkg/urpc", "//runsc/boot", + "//runsc/boot/platforms", "//runsc/cgroup", "//runsc/console", "//runsc/specutils", diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 52a5dfd77..4a11f617d 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -32,9 +32,10 @@ import ( "gvisor.dev/gvisor/pkg/control/server" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/control" - "gvisor.dev/gvisor/pkg/sentry/platform/kvm" + "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/urpc" "gvisor.dev/gvisor/runsc/boot" + "gvisor.dev/gvisor/runsc/boot/platforms" "gvisor.dev/gvisor/runsc/cgroup" "gvisor.dev/gvisor/runsc/console" "gvisor.dev/gvisor/runsc/specutils" @@ -437,10 +438,10 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF defer tty.Close() // Set the TTY as a controlling TTY on the sandbox process. - // Note that the Ctty field must be the FD of the TTY in the - // *new* process, not this process. Since we are about to - // assign the TTY to nextFD, we can use that value here. cmd.SysProcAttr.Setctty = true + // The Ctty FD must be the FD in the child process's FD table, + // which will be nextFD in this case. + // See https://github.com/golang/go/issues/29458. cmd.SysProcAttr.Ctty = nextFD // Pass the tty as all stdio fds to sandbox. @@ -491,7 +492,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF {Type: specs.UTSNamespace}, } - if conf.Platform == boot.PlatformPtrace { + if conf.Platform == platforms.Ptrace { // TODO(b/75837838): Also set a new PID namespace so that we limit // access to other host processes. log.Infof("Sandbox will be started in the current PID namespace") @@ -1046,19 +1047,15 @@ func (s *Sandbox) waitForStopped() error { // deviceFileForPlatform opens the device file for the given platform. If the // platform does not need a device file, then nil is returned. -func deviceFileForPlatform(p boot.PlatformType) (*os.File, error) { - var ( - f *os.File - err error - ) - switch p { - case boot.PlatformKVM: - f, err = kvm.OpenDevice() - default: - return nil, nil +func deviceFileForPlatform(name string) (*os.File, error) { + p, err := platform.Lookup(name) + if err != nil { + return nil, err } + + f, err := p.OpenDevice() if err != nil { return nil, fmt.Errorf("opening device file for platform %q: %v", p, err) } - return f, err + return f, nil } diff --git a/runsc/specutils/fs.go b/runsc/specutils/fs.go index 6e6902e9f..138aa4dd1 100644 --- a/runsc/specutils/fs.go +++ b/runsc/specutils/fs.go @@ -87,7 +87,7 @@ func OptionsToFlags(opts []string) uint32 { // PropOptionsToFlags converts propagation mount options to syscall flags. // Propagation options cannot be set other with other options and must be -// handled separatedly. +// handled separately. func PropOptionsToFlags(opts []string) uint32 { return optionsToFlags(opts, propOptionsMap) } diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index c9ef606cb..d441419cb 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -204,7 +204,7 @@ func SetUIDGIDMappings(cmd *exec.Cmd, s *specs.Spec) { } } -// HasCapabilities returns true if the user has all capabilties in 'cs'. +// HasCapabilities returns true if the user has all capabilities in 'cs'. func HasCapabilities(cs ...capability.Cap) bool { caps, err := capability.NewPid2(os.Getpid()) if err != nil { diff --git a/runsc/specutils/specutils.go b/runsc/specutils/specutils.go index 215828120..0b40e38a3 100644 --- a/runsc/specutils/specutils.go +++ b/runsc/specutils/specutils.go @@ -394,7 +394,7 @@ func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) er // DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern' // ends with '/', it's used as a directory with default file name. -// 'logPattern' can contain variables that are substitued: +// 'logPattern' can contain variables that are substituted: // - %TIMESTAMP%: is replaced with a timestamp using the following format: // <yyyymmdd-hhmmss.uuuuuu> // - %COMMAND%: is replaced with 'command' diff --git a/runsc/test/image/image_test.go b/runsc/test/image/image_test.go index 14cbd30c4..ddaa2c13b 100644 --- a/runsc/test/image/image_test.go +++ b/runsc/test/image/image_test.go @@ -209,11 +209,14 @@ func TestMysql(t *testing.T) { } func TestPythonHello(t *testing.T) { - if err := testutil.Pull("google/python-hello"); err != nil { + // TODO(b/136503277): Once we have more complete python runtime tests, + // we can drop this one. + const img = "gcr.io/gvisor-presubmit/python-hello" + if err := testutil.Pull(img); err != nil { t.Fatalf("docker pull failed: %v", err) } d := testutil.MakeDocker("python-hello-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() diff --git a/runsc/test/integration/integration_test.go b/runsc/test/integration/integration_test.go index 55ebc2f5d..7cef4b9dd 100644 --- a/runsc/test/integration/integration_test.go +++ b/runsc/test/integration/integration_test.go @@ -86,16 +86,17 @@ func TestLifeCycle(t *testing.T) { } func TestPauseResume(t *testing.T) { + const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsPauseResumeSupported() { t.Log("Pause/resume is not supported, skipping test.") return } - if err := testutil.Pull("google/python-hello"); err != nil { + if err := testutil.Pull(img); err != nil { t.Fatal("docker pull failed:", err) } d := testutil.MakeDocker("pause-resume-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() @@ -149,15 +150,16 @@ func TestPauseResume(t *testing.T) { } func TestCheckpointRestore(t *testing.T) { + const img = "gcr.io/gvisor-presubmit/python-hello" if !testutil.IsPauseResumeSupported() { t.Log("Pause/resume is not supported, skipping test.") return } - if err := testutil.Pull("google/python-hello"); err != nil { + if err := testutil.Pull(img); err != nil { t.Fatal("docker pull failed:", err) } d := testutil.MakeDocker("save-restore-test") - if err := d.Run("-p", "8080", "google/python-hello"); err != nil { + if err := d.Run("-p", "8080", img); err != nil { t.Fatalf("docker run failed: %v", err) } defer d.CleanUp() diff --git a/runsc/test/integration/regression_test.go b/runsc/test/integration/regression_test.go index 39b30e757..fb68dda99 100644 --- a/runsc/test/integration/regression_test.go +++ b/runsc/test/integration/regression_test.go @@ -32,7 +32,7 @@ func TestBindOverlay(t *testing.T) { } d := testutil.MakeDocker("bind-overlay-test") - cmd := "nc -l -U /var/run/sock& sleep 1 && echo foobar-asdf | nc -U /var/run/sock" + cmd := "nc -l -U /var/run/sock & p=$! && sleep 1 && echo foobar-asdf | nc -U /var/run/sock && wait $p" got, err := d.RunFg("ubuntu:trusty", "bash", "-c", cmd) if err != nil { t.Fatal("docker run failed:", err) diff --git a/runsc/test/testutil/docker.go b/runsc/test/testutil/docker.go index b20f8e783..3f3e191b0 100644 --- a/runsc/test/testutil/docker.go +++ b/runsc/test/testutil/docker.go @@ -203,7 +203,7 @@ func (d *Docker) Stop() error { } // Run calls 'docker run' with the arguments provided. The container starts -// running in the backgroud and the call returns immediately. +// running in the background and the call returns immediately. func (d *Docker) Run(args ...string) error { a := []string{"run", "--runtime", d.Runtime, "--name", d.Name, "-d"} a = append(a, args...) diff --git a/runsc/test/testutil/testutil.go b/runsc/test/testutil/testutil.go index ecab6871d..a98675bfc 100644 --- a/runsc/test/testutil/testutil.go +++ b/runsc/test/testutil/testutil.go @@ -132,6 +132,7 @@ func TestConfig() *boot.Config { LogPackets: true, Network: boot.NetworkNone, Strace: true, + Platform: "ptrace", FileAccess: boot.FileAccessExclusive, TestOnlyAllowRunAsCurrentUserWithoutChroot: true, NumNetworkChannels: 1, |