diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/BUILD | 2 | ||||
-rw-r--r-- | runsc/boot/BUILD | 2 | ||||
-rw-r--r-- | runsc/boot/config.go | 4 | ||||
-rw-r--r-- | runsc/boot/loader.go | 49 | ||||
-rw-r--r-- | runsc/boot/user.go | 28 | ||||
-rw-r--r-- | runsc/boot/user_test.go | 3 | ||||
-rw-r--r-- | runsc/cmd/exec.go | 1 | ||||
-rw-r--r-- | runsc/cmd/gofer.go | 5 | ||||
-rw-r--r-- | runsc/dockerutil/dockerutil.go | 8 | ||||
-rw-r--r-- | runsc/fsgofer/filter/config.go | 13 | ||||
-rw-r--r-- | runsc/fsgofer/filter/filter.go | 13 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 70 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer_test.go | 2 | ||||
-rw-r--r-- | runsc/main.go | 4 | ||||
-rw-r--r-- | runsc/sandbox/sandbox.go | 2 |
15 files changed, 157 insertions, 49 deletions
diff --git a/runsc/BUILD b/runsc/BUILD index 5e7dacb87..a3a0d6730 100644 --- a/runsc/BUILD +++ b/runsc/BUILD @@ -1,7 +1,7 @@ package(licenses = ["notice"]) # Apache 2.0 load("@io_bazel_rules_go//go:def.bzl", "go_binary") -load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_deb", "pkg_tar") +load("@rules_pkg//:pkg.bzl", "pkg_deb", "pkg_tar") go_binary( name = "runsc", diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index 588bb8851..d90381c0f 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -80,6 +80,7 @@ go_library( "//pkg/tcpip/network/ipv6", "//pkg/tcpip/stack", "//pkg/tcpip/transport/icmp", + "//pkg/tcpip/transport/raw", "//pkg/tcpip/transport/tcp", "//pkg/tcpip/transport/udp", "//pkg/urpc", @@ -109,6 +110,7 @@ go_test( "//pkg/sentry/arch:registers_go_proto", "//pkg/sentry/context/contexttest", "//pkg/sentry/fs", + "//pkg/sentry/kernel/auth", "//pkg/unet", "//runsc/fsgofer", "@com_github_opencontainers_runtime-spec//specs-go:go_default_library", diff --git a/runsc/boot/config.go b/runsc/boot/config.go index 31103367d..38278d0a2 100644 --- a/runsc/boot/config.go +++ b/runsc/boot/config.go @@ -167,6 +167,9 @@ type Config struct { // Overlay is whether to wrap the root filesystem in an overlay. Overlay bool + // FSGoferHostUDS enables the gofer to mount a host UDS. + FSGoferHostUDS bool + // Network indicates what type of network to use. Network NetworkType @@ -253,6 +256,7 @@ func (c *Config) ToFlags() []string { "--debug-log-format=" + c.DebugLogFormat, "--file-access=" + c.FileAccess.String(), "--overlay=" + strconv.FormatBool(c.Overlay), + "--fsgofer-host-uds=" + strconv.FormatBool(c.FSGoferHostUDS), "--network=" + c.Network.String(), "--log-packets=" + strconv.FormatBool(c.LogPackets), "--platform=" + c.Platform, diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 823a34619..adf345490 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -20,7 +20,6 @@ import ( mrand "math/rand" "os" "runtime" - "strings" "sync" "sync/atomic" "syscall" @@ -55,6 +54,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/network/ipv6" "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/pkg/tcpip/transport/icmp" + "gvisor.dev/gvisor/pkg/tcpip/transport/raw" "gvisor.dev/gvisor/pkg/tcpip/transport/tcp" "gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/runsc/boot/filter" @@ -535,23 +535,12 @@ func (l *Loader) run() error { return err } - // Read /etc/passwd for the user's HOME directory and set the HOME - // environment variable as required by POSIX if it is not overridden by - // the user. - hasHomeEnvv := false - for _, envv := range l.rootProcArgs.Envv { - if strings.HasPrefix(envv, "HOME=") { - hasHomeEnvv = true - } - } - if !hasHomeEnvv { - homeDir, err := getExecUserHome(ctx, l.rootProcArgs.MountNamespace, uint32(l.rootProcArgs.Credentials.RealKUID)) - if err != nil { - return fmt.Errorf("error reading exec user: %v", err) - } - - l.rootProcArgs.Envv = append(l.rootProcArgs.Envv, "HOME="+homeDir) + // Add the HOME enviroment variable if it is not already set. + envv, err := maybeAddExecUserHome(ctx, l.rootProcArgs.MountNamespace, l.rootProcArgs.Credentials.RealKUID, l.rootProcArgs.Envv) + if err != nil { + return err } + l.rootProcArgs.Envv = envv // Create the root container init task. It will begin running // when the kernel is started. @@ -815,6 +804,16 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) { }) defer args.MountNamespace.DecRef() + // Add the HOME enviroment varible if it is not already set. + root := args.MountNamespace.Root() + defer root.DecRef() + ctx := fs.WithRoot(l.k.SupervisorContext(), root) + envv, err := maybeAddExecUserHome(ctx, args.MountNamespace, args.KUID, args.Envv) + if err != nil { + return 0, err + } + args.Envv = envv + // Start the process. proc := control.Proc{Kernel: l.k} args.PIDNamespace = tg.PIDNamespace() @@ -913,15 +912,17 @@ func newEmptyNetworkStack(conf *Config, clock tcpip.Clock) (inet.Stack, error) { case NetworkNone, NetworkSandbox: // NetworkNone sets up loopback using netstack. - netProtos := []string{ipv4.ProtocolName, ipv6.ProtocolName, arp.ProtocolName} - protoNames := []string{tcp.ProtocolName, udp.ProtocolName, icmp.ProtocolName4} - s := epsocket.Stack{stack.New(netProtos, protoNames, stack.Options{ - Clock: clock, - Stats: epsocket.Metrics, - HandleLocal: true, + netProtos := []stack.NetworkProtocol{ipv4.NewProtocol(), ipv6.NewProtocol(), arp.NewProtocol()} + transProtos := []stack.TransportProtocol{tcp.NewProtocol(), udp.NewProtocol(), icmp.NewProtocol4()} + s := epsocket.Stack{stack.New(stack.Options{ + NetworkProtocols: netProtos, + TransportProtocols: transProtos, + Clock: clock, + Stats: epsocket.Metrics, + HandleLocal: true, // Enable raw sockets for users with sufficient // privileges. - Raw: true, + UnassociatedFactory: raw.EndpointFactory{}, })} // Enable SACK Recovery. diff --git a/runsc/boot/user.go b/runsc/boot/user.go index d1d423a5c..56cc12ee0 100644 --- a/runsc/boot/user.go +++ b/runsc/boot/user.go @@ -16,6 +16,7 @@ package boot import ( "bufio" + "fmt" "io" "strconv" "strings" @@ -23,6 +24,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/usermem" ) @@ -42,7 +44,7 @@ func (r *fileReader) Read(buf []byte) (int, error) { // getExecUserHome returns the home directory of the executing user read from // /etc/passwd as read from the container filesystem. -func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32) (string, error) { +func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid auth.KUID) (string, error) { // The default user home directory to return if no user matching the user // if found in the /etc/passwd found in the image. const defaultHome = "/" @@ -82,7 +84,7 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32 File: f, } - homeDir, err := findHomeInPasswd(uid, r, defaultHome) + homeDir, err := findHomeInPasswd(uint32(uid), r, defaultHome) if err != nil { return "", err } @@ -90,6 +92,28 @@ func getExecUserHome(ctx context.Context, rootMns *fs.MountNamespace, uid uint32 return homeDir, nil } +// maybeAddExecUserHome returns a new slice with the HOME enviroment variable +// set if the slice does not already contain it, otherwise it returns the +// original slice unmodified. +func maybeAddExecUserHome(ctx context.Context, mns *fs.MountNamespace, uid auth.KUID, envv []string) ([]string, error) { + // Check if the envv already contains HOME. + for _, env := range envv { + if strings.HasPrefix(env, "HOME=") { + // We have it. Return the original slice unmodified. + return envv, nil + } + } + + // Read /etc/passwd for the user's HOME directory and set the HOME + // environment variable as required by POSIX if it is not overridden by + // the user. + homeDir, err := getExecUserHome(ctx, mns, uid) + if err != nil { + return nil, fmt.Errorf("error reading exec user: %v", err) + } + return append(envv, "HOME="+homeDir), nil +} + // findHomeInPasswd parses a passwd file and returns the given user's home // directory. This function does it's best to replicate the runc's behavior. func findHomeInPasswd(uid uint32, passwd io.Reader, defaultHome string) (string, error) { diff --git a/runsc/boot/user_test.go b/runsc/boot/user_test.go index 906baf3e5..9aee2ad07 100644 --- a/runsc/boot/user_test.go +++ b/runsc/boot/user_test.go @@ -25,6 +25,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" ) func setupTempDir() (string, error) { @@ -68,7 +69,7 @@ func setupPasswd(contents string, perms os.FileMode) func() (string, error) { // TestGetExecUserHome tests the getExecUserHome function. func TestGetExecUserHome(t *testing.T) { tests := map[string]struct { - uid uint32 + uid auth.KUID createRoot func() (string, error) expected string }{ diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index e817eff77..bf1225e1c 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -127,6 +127,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("getting environment variables: %v", err) } } + if e.Capabilities == nil { // enableRaw is set to true to prevent the filtering out of // CAP_NET_RAW. This is the opposite of Create() because exec diff --git a/runsc/cmd/gofer.go b/runsc/cmd/gofer.go index 9faabf494..fbd579fb8 100644 --- a/runsc/cmd/gofer.go +++ b/runsc/cmd/gofer.go @@ -182,6 +182,7 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) cfg := fsgofer.Config{ ROMount: isReadonlyMount(m.Options), PanicOnWrite: g.panicOnWrite, + HostUDS: conf.FSGoferHostUDS, } ap, err := fsgofer.NewAttachPoint(m.Destination, cfg) if err != nil { @@ -200,6 +201,10 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs)) } + if conf.FSGoferHostUDS { + filter.InstallUDSFilters() + } + if err := filter.Install(); err != nil { Fatalf("installing seccomp filters: %v", err) } diff --git a/runsc/dockerutil/dockerutil.go b/runsc/dockerutil/dockerutil.go index c073d8f75..e37ec0ffd 100644 --- a/runsc/dockerutil/dockerutil.go +++ b/runsc/dockerutil/dockerutil.go @@ -287,6 +287,14 @@ func (d *Docker) Exec(args ...string) (string, error) { return do(a...) } +// ExecAsUser calls 'docker exec' as the given user with the arguments +// provided. +func (d *Docker) ExecAsUser(user string, args ...string) (string, error) { + a := []string{"exec", "--user", user, d.Name} + a = append(a, args...) + return do(a...) +} + // ExecWithTerminal calls 'docker exec -it' with the arguments provided and // attaches a pty to stdio. func (d *Docker) ExecWithTerminal(args ...string) (*exec.Cmd, *os.File, error) { diff --git a/runsc/fsgofer/filter/config.go b/runsc/fsgofer/filter/config.go index 2f3f2039a..c7922b54f 100644 --- a/runsc/fsgofer/filter/config.go +++ b/runsc/fsgofer/filter/config.go @@ -214,3 +214,16 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_UTIMENSAT: {}, syscall.SYS_WRITE: {}, } + +var udsSyscalls = seccomp.SyscallRules{ + syscall.SYS_SOCKET: []seccomp.Rule{ + { + seccomp.AllowValue(syscall.AF_UNIX), + }, + }, + syscall.SYS_CONNECT: []seccomp.Rule{ + { + seccomp.AllowAny{}, + }, + }, +} diff --git a/runsc/fsgofer/filter/filter.go b/runsc/fsgofer/filter/filter.go index 65053415f..289886720 100644 --- a/runsc/fsgofer/filter/filter.go +++ b/runsc/fsgofer/filter/filter.go @@ -23,11 +23,16 @@ import ( // Install installs seccomp filters. func Install() error { - s := allowedSyscalls - // Set of additional filters used by -race and -msan. Returns empty // when not enabled. - s.Merge(instrumentationFilters()) + allowedSyscalls.Merge(instrumentationFilters()) + + return seccomp.Install(allowedSyscalls) +} - return seccomp.Install(s) +// InstallUDSFilters extends the allowed syscalls to include those necessary for +// connecting to a host UDS. +func InstallUDSFilters() { + // Add additional filters required for connecting to the host's sockets. + allowedSyscalls.Merge(udsSyscalls) } diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 7c4d2b94e..29a82138e 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -21,6 +21,7 @@ package fsgofer import ( + "errors" "fmt" "io" "math" @@ -54,6 +55,7 @@ const ( regular fileType = iota directory symlink + socket unknown ) @@ -66,6 +68,8 @@ func (f fileType) String() string { return "directory" case symlink: return "symlink" + case socket: + return "socket" } return "unknown" } @@ -82,6 +86,9 @@ type Config struct { // PanicOnWrite panics on attempts to write to RO mounts. PanicOnWrite bool + + // HostUDS signals whether the gofer can mount a host's UDS. + HostUDS bool } type attachPoint struct { @@ -124,24 +131,50 @@ func (a *attachPoint) Attach() (p9.File, error) { if err != nil { return nil, fmt.Errorf("stat file %q, err: %v", a.prefix, err) } - mode := syscall.O_RDWR - if a.conf.ROMount || (stat.Mode&syscall.S_IFMT) == syscall.S_IFDIR { - mode = syscall.O_RDONLY - } - - // Open the root directory. - f, err := fd.Open(a.prefix, openFlags|mode, 0) - if err != nil { - return nil, fmt.Errorf("unable to open file %q, err: %v", a.prefix, err) - } + // Acquire the attach point lock. a.attachedMu.Lock() defer a.attachedMu.Unlock() + if a.attached { - f.Close() return nil, fmt.Errorf("attach point already attached, prefix: %s", a.prefix) } + // Hold the file descriptor we are converting into a p9.File. + var f *fd.FD + + // Apply the S_IFMT bitmask so we can detect file type appropriately. + switch fmtStat := stat.Mode & syscall.S_IFMT; fmtStat { + case syscall.S_IFSOCK: + // Check to see if the CLI option has been set to allow the UDS mount. + if !a.conf.HostUDS { + return nil, errors.New("host UDS support is disabled") + } + + // Attempt to open a connection. Bubble up the failures. + f, err = fd.DialUnix(a.prefix) + if err != nil { + return nil, err + } + + default: + // Default to Read/Write permissions. + mode := syscall.O_RDWR + + // If the configuration is Read Only or the mount point is a directory, + // set the mode to Read Only. + if a.conf.ROMount || fmtStat == syscall.S_IFDIR { + mode = syscall.O_RDONLY + } + + // Open the mount point & capture the FD. + f, err = fd.Open(a.prefix, openFlags|mode, 0) + if err != nil { + return nil, fmt.Errorf("unable to open file %q, err: %v", a.prefix, err) + } + } + + // Return a localFile object to the caller with the UDS FD included. rv, err := newLocalFile(a, f, a.prefix, stat) if err != nil { return nil, err @@ -295,7 +328,7 @@ func openAnyFile(path string, fn func(mode int) (*fd.FD, error)) (*fd.FD, error) return file, nil } -func getSupportedFileType(stat syscall.Stat_t) (fileType, error) { +func getSupportedFileType(stat syscall.Stat_t, permitSocket bool) (fileType, error) { var ft fileType switch stat.Mode & syscall.S_IFMT { case syscall.S_IFREG: @@ -304,6 +337,11 @@ func getSupportedFileType(stat syscall.Stat_t) (fileType, error) { ft = directory case syscall.S_IFLNK: ft = symlink + case syscall.S_IFSOCK: + if !permitSocket { + return unknown, syscall.EPERM + } + ft = socket default: return unknown, syscall.EPERM } @@ -311,7 +349,7 @@ func getSupportedFileType(stat syscall.Stat_t) (fileType, error) { } func newLocalFile(a *attachPoint, file *fd.FD, path string, stat syscall.Stat_t) (*localFile, error) { - ft, err := getSupportedFileType(stat) + ft, err := getSupportedFileType(stat, a.conf.HostUDS) if err != nil { return nil, err } @@ -1026,7 +1064,11 @@ func (l *localFile) Flush() error { // Connect implements p9.File. func (l *localFile) Connect(p9.ConnectFlags) (*fd.FD, error) { - return nil, syscall.ECONNREFUSED + // Check to see if the CLI option has been set to allow the UDS mount. + if !l.attachPoint.conf.HostUDS { + return nil, syscall.ECONNREFUSED + } + return fd.DialUnix(l.hostPath) } // Close implements p9.File. diff --git a/runsc/fsgofer/fsgofer_test.go b/runsc/fsgofer/fsgofer_test.go index cbbe71019..05af7e397 100644 --- a/runsc/fsgofer/fsgofer_test.go +++ b/runsc/fsgofer/fsgofer_test.go @@ -665,7 +665,7 @@ func TestAttachInvalidType(t *testing.T) { } f, err := a.Attach() if f != nil || err == nil { - t.Fatalf("Attach should have failed, got (%v, nil)", f) + t.Fatalf("Attach should have failed, got (%v, %v)", f, err) } }) } diff --git a/runsc/main.go b/runsc/main.go index ff74c0a3d..7dce9dc00 100644 --- a/runsc/main.go +++ b/runsc/main.go @@ -68,6 +68,7 @@ var ( network = flag.String("network", "sandbox", "specifies which network to use: sandbox (default), host, none. Using network inside the sandbox is more secure because it's isolated from the host network.") gso = flag.Bool("gso", true, "enable generic segmenation offload") fileAccess = flag.String("file-access", "exclusive", "specifies which filesystem to use for the root mount: exclusive (default), shared. Volume mounts are always shared.") + fsGoferHostUDS = flag.Bool("fsgofer-host-uds", false, "Allow the gofer to mount Unix Domain Sockets.") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable overlay. All modifications are stored in memory inside the sandbox.") watchdogAction = flag.String("watchdog-action", "log", "sets what action the watchdog takes when triggered: log (default), panic.") panicSignal = flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.") @@ -195,6 +196,7 @@ func main() { DebugLog: *debugLog, DebugLogFormat: *debugLogFormat, FileAccess: fsAccess, + FSGoferHostUDS: *fsGoferHostUDS, Overlay: *overlay, Network: netType, GSO: *gso, @@ -239,7 +241,7 @@ func main() { // want with them. Since Docker and Containerd both eat boot's stderr, we // dup our stderr to the provided log FD so that panics will appear in the // logs, rather than just disappear. - if err := syscall.Dup2(int(f.Fd()), int(os.Stderr.Fd())); err != nil { + if err := syscall.Dup3(int(f.Fd()), int(os.Stderr.Fd()), 0); err != nil { cmd.Fatalf("error dup'ing fd %d to stderr: %v", f.Fd(), err) } diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 4c6c83fbd..ee9327fc8 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -352,7 +352,7 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF } if conf.DebugLog != "" { test := "" - if len(conf.TestOnlyTestNameEnv) == 0 { + if len(conf.TestOnlyTestNameEnv) != 0 { // Fetch test name if one is provided and the test only flag was set. if t, ok := specutils.EnvVar(args.Spec.Process.Env, conf.TestOnlyTestNameEnv); ok { test = t |