diff options
Diffstat (limited to 'runsc')
-rw-r--r-- | runsc/boot/filter/config.go | 38 | ||||
-rw-r--r-- | runsc/boot/loader.go | 2 | ||||
-rw-r--r-- | runsc/cmd/checkpoint.go | 2 | ||||
-rw-r--r-- | runsc/cmd/debug.go | 11 | ||||
-rw-r--r-- | runsc/cmd/delete.go | 2 | ||||
-rw-r--r-- | runsc/cmd/events.go | 2 | ||||
-rw-r--r-- | runsc/cmd/exec.go | 2 | ||||
-rw-r--r-- | runsc/cmd/kill.go | 2 | ||||
-rw-r--r-- | runsc/cmd/list.go | 8 | ||||
-rw-r--r-- | runsc/cmd/pause.go | 2 | ||||
-rw-r--r-- | runsc/cmd/ps.go | 2 | ||||
-rw-r--r-- | runsc/cmd/resume.go | 2 | ||||
-rw-r--r-- | runsc/cmd/start.go | 2 | ||||
-rw-r--r-- | runsc/cmd/state.go | 2 | ||||
-rw-r--r-- | runsc/cmd/wait.go | 2 | ||||
-rw-r--r-- | runsc/config/config.go | 2 | ||||
-rw-r--r-- | runsc/container/container.go | 182 | ||||
-rw-r--r-- | runsc/container/state_file.go | 236 | ||||
-rw-r--r-- | runsc/fsgofer/fsgofer.go | 79 |
19 files changed, 348 insertions, 232 deletions
diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 4e3bb9ac7..eacd73531 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -353,6 +353,11 @@ func hostInetFilters() seccomp.SyscallRules { }, { seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_RECVERR), + }, + { + seccomp.MatchAny{}, seccomp.EqualTo(syscall.SOL_IPV6), seccomp.EqualTo(syscall.IPV6_TCLASS), }, @@ -364,6 +369,11 @@ func hostInetFilters() seccomp.SyscallRules { { seccomp.MatchAny{}, seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_RECVERR), + }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), seccomp.EqualTo(syscall.IPV6_V6ONLY), }, { @@ -446,13 +456,6 @@ func hostInetFilters() seccomp.SyscallRules { syscall.SYS_SETSOCKOPT: []seccomp.Rule{ { seccomp.MatchAny{}, - seccomp.EqualTo(syscall.SOL_IPV6), - seccomp.EqualTo(syscall.IPV6_V6ONLY), - seccomp.MatchAny{}, - seccomp.EqualTo(4), - }, - { - seccomp.MatchAny{}, seccomp.EqualTo(syscall.SOL_SOCKET), seccomp.EqualTo(syscall.SO_SNDBUF), seccomp.MatchAny{}, @@ -523,6 +526,13 @@ func hostInetFilters() seccomp.SyscallRules { }, { seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IP), + seccomp.EqualTo(syscall.IP_RECVERR), + seccomp.MatchAny{}, + seccomp.EqualTo(4), + }, + { + seccomp.MatchAny{}, seccomp.EqualTo(syscall.SOL_IPV6), seccomp.EqualTo(syscall.IPV6_TCLASS), seccomp.MatchAny{}, @@ -542,6 +552,20 @@ func hostInetFilters() seccomp.SyscallRules { seccomp.MatchAny{}, seccomp.EqualTo(4), }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_RECVERR), + seccomp.MatchAny{}, + seccomp.EqualTo(4), + }, + { + seccomp.MatchAny{}, + seccomp.EqualTo(syscall.SOL_IPV6), + seccomp.EqualTo(syscall.IPV6_V6ONLY), + seccomp.MatchAny{}, + seccomp.EqualTo(4), + }, }, syscall.SYS_SHUTDOWN: []seccomp.Rule{ { diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 3df013d34..98ea8db64 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -294,7 +294,7 @@ func New(args Args) (*Loader, error) { if args.TotalMem > 0 { // Adjust the total memory returned by the Sentry so that applications that // use /proc/meminfo can make allocations based on this limit. - usage.MinimumTotalMemoryBytes = args.TotalMem + usage.MaximumTotalMemoryBytes = args.TotalMem log.Infof("Setting total memory to %.2f GB", float64(args.TotalMem)/(1<<30)) } diff --git a/runsc/cmd/checkpoint.go b/runsc/cmd/checkpoint.go index c0bc8f064..124198239 100644 --- a/runsc/cmd/checkpoint.go +++ b/runsc/cmd/checkpoint.go @@ -75,7 +75,7 @@ func (c *Checkpoint) Execute(_ context.Context, f *flag.FlagSet, args ...interfa conf := args[0].(*config.Config) waitStatus := args[1].(*syscall.WaitStatus) - cont, err := container.LoadAndCheck(conf.RootDir, id) + cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/cmd/debug.go b/runsc/cmd/debug.go index 609e8231c..1e5a7471a 100644 --- a/runsc/cmd/debug.go +++ b/runsc/cmd/debug.go @@ -90,8 +90,10 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) f.Usage() return subcommands.ExitUsageError } + id := f.Arg(0) + var err error - c, err = container.LoadAndCheck(conf.RootDir, f.Arg(0)) + c, err = container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { return Errorf("loading container %q: %v", f.Arg(0), err) } @@ -106,9 +108,10 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) return Errorf("listing containers: %v", err) } for _, id := range ids { - candidate, err := container.LoadAndCheck(conf.RootDir, id) + candidate, err := container.Load(conf.RootDir, id, container.LoadOpts{Exact: true, SkipCheck: true}) if err != nil { - return Errorf("loading container %q: %v", id, err) + log.Warningf("Skipping container %q: %v", id, err) + continue } if candidate.SandboxPid() == d.pid { c = candidate @@ -120,7 +123,7 @@ func (d *Debug) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } } - if c.Sandbox == nil || !c.Sandbox.IsRunning() { + if c.IsSandboxRunning() { return Errorf("container sandbox is not running") } log.Infof("Found sandbox %q, PID: %d", c.Sandbox.ID, c.Sandbox.Pid) diff --git a/runsc/cmd/delete.go b/runsc/cmd/delete.go index a25637265..a750be131 100644 --- a/runsc/cmd/delete.go +++ b/runsc/cmd/delete.go @@ -68,7 +68,7 @@ func (d *Delete) Execute(_ context.Context, f *flag.FlagSet, args ...interface{} func (d *Delete) execute(ids []string, conf *config.Config) error { for _, id := range ids { - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { if os.IsNotExist(err) && d.force { log.Warningf("couldn't find container %q: %v", id, err) diff --git a/runsc/cmd/events.go b/runsc/cmd/events.go index 3836b7b4e..75b0aac8d 100644 --- a/runsc/cmd/events.go +++ b/runsc/cmd/events.go @@ -74,7 +74,7 @@ func (evs *Events) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa id := f.Arg(0) conf := args[0].(*config.Config) - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading sandbox: %v", err) } diff --git a/runsc/cmd/exec.go b/runsc/cmd/exec.go index eafd6285c..8558d34ae 100644 --- a/runsc/cmd/exec.go +++ b/runsc/cmd/exec.go @@ -112,7 +112,7 @@ func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } waitStatus := args[1].(*syscall.WaitStatus) - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading sandbox: %v", err) } diff --git a/runsc/cmd/kill.go b/runsc/cmd/kill.go index fe69e2a08..aecf0b7ab 100644 --- a/runsc/cmd/kill.go +++ b/runsc/cmd/kill.go @@ -69,7 +69,7 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) Fatalf("it is invalid to specify both --all and --pid") } - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/cmd/list.go b/runsc/cmd/list.go index 6907eb16a..9f9a47bd8 100644 --- a/runsc/cmd/list.go +++ b/runsc/cmd/list.go @@ -24,6 +24,7 @@ import ( "github.com/google/subcommands" specs "github.com/opencontainers/runtime-spec/specs-go" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/runsc/config" "gvisor.dev/gvisor/runsc/container" "gvisor.dev/gvisor/runsc/flag" @@ -71,7 +72,7 @@ func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) if l.quiet { for _, id := range ids { - fmt.Println(id) + fmt.Println(id.ContainerID) } return subcommands.ExitSuccess } @@ -79,9 +80,10 @@ func (l *List) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) // Collect the containers. var containers []*container.Container for _, id := range ids { - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, id, container.LoadOpts{Exact: true}) if err != nil { - Fatalf("loading container %q: %v", id, err) + log.Warningf("Skipping container %q: %v", id, err) + continue } containers = append(containers, c) } diff --git a/runsc/cmd/pause.go b/runsc/cmd/pause.go index fe7d4e257..15ef7b577 100644 --- a/runsc/cmd/pause.go +++ b/runsc/cmd/pause.go @@ -55,7 +55,7 @@ func (*Pause) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s id := f.Arg(0) conf := args[0].(*config.Config) - cont, err := container.LoadAndCheck(conf.RootDir, id) + cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/cmd/ps.go b/runsc/cmd/ps.go index 18d7a1436..04e3e0bdd 100644 --- a/runsc/cmd/ps.go +++ b/runsc/cmd/ps.go @@ -60,7 +60,7 @@ func (ps *PS) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) id := f.Arg(0) conf := args[0].(*config.Config) - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading sandbox: %v", err) } diff --git a/runsc/cmd/resume.go b/runsc/cmd/resume.go index a00928204..856469252 100644 --- a/runsc/cmd/resume.go +++ b/runsc/cmd/resume.go @@ -56,7 +56,7 @@ func (r *Resume) Execute(_ context.Context, f *flag.FlagSet, args ...interface{} id := f.Arg(0) conf := args[0].(*config.Config) - cont, err := container.LoadAndCheck(conf.RootDir, id) + cont, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/cmd/start.go b/runsc/cmd/start.go index f6499cc44..964a65064 100644 --- a/runsc/cmd/start.go +++ b/runsc/cmd/start.go @@ -55,7 +55,7 @@ func (*Start) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s id := f.Arg(0) conf := args[0].(*config.Config) - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/cmd/state.go b/runsc/cmd/state.go index d8a70dd7f..1f7913d5a 100644 --- a/runsc/cmd/state.go +++ b/runsc/cmd/state.go @@ -57,7 +57,7 @@ func (*State) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) s id := f.Arg(0) conf := args[0].(*config.Config) - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/cmd/wait.go b/runsc/cmd/wait.go index c1d6aeae2..5d55422c7 100644 --- a/runsc/cmd/wait.go +++ b/runsc/cmd/wait.go @@ -72,7 +72,7 @@ func (wt *Wait) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) id := f.Arg(0) conf := args[0].(*config.Config) - c, err := container.LoadAndCheck(conf.RootDir, id) + c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{}) if err != nil { Fatalf("loading container: %v", err) } diff --git a/runsc/config/config.go b/runsc/config/config.go index b02d8e2e1..e9fd7708f 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -131,7 +131,7 @@ type Config struct { NumNetworkChannels int `flag:"num-network-channels"` // Rootless allows the sandbox to be started with a user that is not root. - // Defense is depth measures are weaker with rootless. Specifically, the + // Defense in depth measures are weaker in rootless mode. Specifically, the // sandbox and Gofer process run as root inside a user namespace with root // mapped to the caller's user. Rootless bool `flag:"rootless"` diff --git a/runsc/container/container.go b/runsc/container/container.go index 418a27beb..8b78660f7 100644 --- a/runsc/container/container.go +++ b/runsc/container/container.go @@ -128,125 +128,6 @@ type Container struct { goferIsChild bool } -// loadSandbox loads all containers that belong to the sandbox with the given -// ID. -func loadSandbox(rootDir, id string) ([]*Container, error) { - cids, err := List(rootDir) - if err != nil { - return nil, err - } - - // Load the container metadata. - var containers []*Container - for _, cid := range cids { - container, err := Load(rootDir, cid) - if err != nil { - // Container file may not exist if it raced with creation/deletion or - // directory was left behind. Load provides a snapshot in time, so it's - // fine to skip it. - if os.IsNotExist(err) { - continue - } - return nil, fmt.Errorf("loading container %q: %v", id, err) - } - if container.Sandbox.ID == id { - containers = append(containers, container) - } - } - return containers, nil -} - -// Load loads a container with the given id from a metadata file. partialID may -// be an abbreviation of the full container id, in which case Load loads the -// container to which id unambiguously refers to. Returns ErrNotExist if -// container doesn't exist. -func Load(rootDir, partialID string) (*Container, error) { - log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID) - if err := validateID(partialID); err != nil { - return nil, fmt.Errorf("invalid container id: %v", err) - } - - id, err := findContainerID(rootDir, partialID) - if err != nil { - // Preserve error so that callers can distinguish 'not found' errors. - return nil, err - } - - state := StateFile{ - RootDir: rootDir, - ID: id, - } - defer state.close() - - c := &Container{} - if err := state.load(c); err != nil { - if os.IsNotExist(err) { - // Preserve error so that callers can distinguish 'not found' errors. - return nil, err - } - return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err) - } - return c, nil -} - -// LoadAndCheck is similar to Load(), but also checks if the container is still -// running to get an error earlier to the caller. -func LoadAndCheck(rootDir, partialID string) (*Container, error) { - c, err := Load(rootDir, partialID) - if err != nil { - // Preserve error so that callers can distinguish 'not found' errors. - return nil, err - } - - // If the status is "Running" or "Created", check that the sandbox/container - // is still running, setting it to Stopped if not. - // - // This is inherently racy. - switch c.Status { - case Created: - if !c.isSandboxRunning() { - // Sandbox no longer exists, so this container definitely does not exist. - c.changeStatus(Stopped) - } - case Running: - if err := c.SignalContainer(syscall.Signal(0), false); err != nil { - c.changeStatus(Stopped) - } - } - - return c, nil -} - -func findContainerID(rootDir, partialID string) (string, error) { - // Check whether the id fully specifies an existing container. - stateFile := buildStatePath(rootDir, partialID) - if _, err := os.Stat(stateFile); err == nil { - return partialID, nil - } - - // Now see whether id could be an abbreviation of exactly 1 of the - // container ids. If id is ambiguous (it could match more than 1 - // container), it is an error. - ids, err := List(rootDir) - if err != nil { - return "", err - } - rv := "" - for _, id := range ids { - if strings.HasPrefix(id, partialID) { - if rv != "" { - return "", fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id) - } - rv = id - } - } - if rv == "" { - return "", os.ErrNotExist - } - log.Debugf("abbreviated id %q resolves to full id %q", partialID, rv) - return rv, nil -} - // Args is used to configure a new container. type Args struct { // ID is the container unique identifier. @@ -291,6 +172,15 @@ func New(conf *config.Config, args Args) (*Container, error) { return nil, fmt.Errorf("creating container root directory %q: %v", conf.RootDir, err) } + sandboxID := args.ID + if !isRoot(args.Spec) { + var ok bool + sandboxID, ok = specutils.SandboxID(args.Spec) + if !ok { + return nil, fmt.Errorf("no sandbox ID found when creating container") + } + } + c := &Container{ ID: args.ID, Spec: args.Spec, @@ -301,7 +191,10 @@ func New(conf *config.Config, args Args) (*Container, error) { Owner: os.Getenv("USER"), Saver: StateFile{ RootDir: conf.RootDir, - ID: args.ID, + ID: FullID{ + SandboxID: sandboxID, + ContainerID: args.ID, + }, }, } // The Cleanup object cleans up partially created containers when an error @@ -316,10 +209,17 @@ func New(conf *config.Config, args Args) (*Container, error) { } defer c.Saver.unlock() - // If the metadata annotations indicate that this container should be - // started in an existing sandbox, we must do so. The metadata will - // indicate the ID of the sandbox, which is the same as the ID of the - // init container in the sandbox. + // If the metadata annotations indicate that this container should be started + // in an existing sandbox, we must do so. These are the possible metadata + // annotation states: + // 1. No annotations: it means that there is a single container and this + // container is obviously the root. Both container and sandbox share the + // ID. + // 2. Container type == sandbox: it means this is the root container + // starting the sandbox. Both container and sandbox share the same ID. + // 3. Container type == container: it means this is a subcontainer of an + // already started sandbox. In this case, container ID is different than + // the sandbox ID. if isRoot(args.Spec) { log.Debugf("Creating new sandbox for container, cid: %s", args.ID) @@ -358,7 +258,7 @@ func New(conf *config.Config, args Args) (*Container, error) { // Start a new sandbox for this container. Any errors after this point // must destroy the container. sandArgs := &sandbox.Args{ - ID: args.ID, + ID: sandboxID, Spec: args.Spec, BundleDir: args.BundleDir, ConsoleSocket: args.ConsoleSocket, @@ -379,22 +279,14 @@ func New(conf *config.Config, args Args) (*Container, error) { return nil, err } } else { - // This is sort of confusing. For a sandbox with a root - // container and a child container in it, runsc sees: - // * A container struct whose sandbox ID is equal to the - // container ID. This is the root container that is tied to - // the creation of the sandbox. - // * A container struct whose sandbox ID is equal to the above - // container/sandbox ID, but that has a different container - // ID. This is the child container. - sbid, ok := specutils.SandboxID(args.Spec) - if !ok { - return nil, fmt.Errorf("no sandbox ID found when creating container") - } - log.Debugf("Creating new container, cid: %s, sandbox: %s", c.ID, sbid) + log.Debugf("Creating new container, cid: %s, sandbox: %s", c.ID, sandboxID) // Find the sandbox associated with this ID. - sb, err := LoadAndCheck(conf.RootDir, sbid) + fullID := FullID{ + SandboxID: sandboxID, + ContainerID: sandboxID, + } + sb, err := Load(conf.RootDir, fullID, LoadOpts{Exact: true}) if err != nil { return nil, err } @@ -628,7 +520,7 @@ func (c *Container) Wait() (syscall.WaitStatus, error) { // returns its WaitStatus. func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) { log.Debugf("Wait on process %d in sandbox, cid: %s", pid, c.Sandbox.ID) - if !c.isSandboxRunning() { + if !c.IsSandboxRunning() { return 0, fmt.Errorf("sandbox is not running") } return c.Sandbox.WaitPID(c.Sandbox.ID, pid) @@ -638,7 +530,7 @@ func (c *Container) WaitRootPID(pid int32) (syscall.WaitStatus, error) { // its WaitStatus. func (c *Container) WaitPID(pid int32) (syscall.WaitStatus, error) { log.Debugf("Wait on process %d in container, cid: %s", pid, c.ID) - if !c.isSandboxRunning() { + if !c.IsSandboxRunning() { return 0, fmt.Errorf("sandbox is not running") } return c.Sandbox.WaitPID(c.ID, pid) @@ -658,7 +550,7 @@ func (c *Container) SignalContainer(sig syscall.Signal, all bool) error { if err := c.requireStatus("signal", Running, Stopped); err != nil { return err } - if !c.isSandboxRunning() { + if !c.IsSandboxRunning() { return fmt.Errorf("sandbox is not running") } return c.Sandbox.SignalContainer(c.ID, sig, all) @@ -670,7 +562,7 @@ func (c *Container) SignalProcess(sig syscall.Signal, pid int32) error { if err := c.requireStatus("signal a process inside", Running); err != nil { return err } - if !c.isSandboxRunning() { + if !c.IsSandboxRunning() { return fmt.Errorf("sandbox is not running") } return c.Sandbox.SignalProcess(c.ID, int32(pid), sig, false) @@ -889,7 +781,7 @@ func (c *Container) waitForStopped() error { defer cancel() b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) op := func() error { - if c.isSandboxRunning() { + if c.IsSandboxRunning() { if err := c.SignalContainer(syscall.Signal(0), false); err == nil { return fmt.Errorf("container is still running") } @@ -1091,7 +983,7 @@ func (c *Container) changeStatus(s Status) { c.Status = s } -func (c *Container) isSandboxRunning() bool { +func (c *Container) IsSandboxRunning() bool { return c.Sandbox != nil && c.Sandbox.IsRunning() } diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go index 17a251530..dfbf1f2d3 100644 --- a/runsc/container/state_file.go +++ b/runsc/container/state_file.go @@ -20,58 +20,228 @@ import ( "io/ioutil" "os" "path/filepath" + "regexp" + "strings" + "syscall" "github.com/gofrs/flock" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sync" ) -const stateFileExtension = ".state" +const stateFileExtension = "state" -// StateFile handles load from/save to container state safely from multiple -// processes. It uses a lock file to provide synchronization between operations. +// LoadOpts provides options for Load()ing a container. +type LoadOpts struct { + // Exact tells whether the search should be exact. See Load() for more. + Exact bool + + // SkipCheck tells Load() to skip checking if container is runnning. + SkipCheck bool +} + +// Load loads a container with the given id from a metadata file. "id" may +// be an abbreviation of the full container id in case LoadOpts.Exact if not +// set. It also checks if the container is still running, in order to return +// an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck +// is set. // -// The lock file is located at: "${s.RootDir}/${s.ID}.lock". -// The state file is located at: "${s.RootDir}/${s.ID}.state". -type StateFile struct { - // RootDir is the directory containing the container metadata file. - RootDir string `json:"rootDir"` +// Returns ErrNotExist if no container is found. Returns error in case more than +// one containers matching the ID prefix is found. +func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) { + //log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID) + if !opts.Exact { + var err error + id, err = findContainerID(rootDir, id.ContainerID) + if err != nil { + // Preserve error so that callers can distinguish 'not found' errors. + return nil, err + } + } - // ID is the container ID. - ID string `json:"id"` + if err := id.validate(); err != nil { + return nil, fmt.Errorf("invalid container id: %v", err) + } + state := StateFile{ + RootDir: rootDir, + ID: id, + } + defer state.close() - // - // Fields below this line are not saved in the state file and will not - // be preserved across commands. - // + c := &Container{} + if err := state.load(c); err != nil { + if os.IsNotExist(err) { + // Preserve error so that callers can distinguish 'not found' errors. + return nil, err + } + return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err) + } - once sync.Once - flock *flock.Flock + if !opts.SkipCheck { + // If the status is "Running" or "Created", check that the sandbox/container + // is still running, setting it to Stopped if not. + // + // This is inherently racy. + switch c.Status { + case Created: + if !c.IsSandboxRunning() { + // Sandbox no longer exists, so this container definitely does not exist. + c.changeStatus(Stopped) + } + case Running: + if err := c.SignalContainer(syscall.Signal(0), false); err != nil { + c.changeStatus(Stopped) + } + } + } + + return c, nil } // List returns all container ids in the given root directory. -func List(rootDir string) ([]string, error) { +func List(rootDir string) ([]FullID, error) { log.Debugf("List containers %q", rootDir) - list, err := filepath.Glob(filepath.Join(rootDir, "*"+stateFileExtension)) + return listMatch(rootDir, FullID{}) +} + +// listMatch returns all container ids that match the provided id. +func listMatch(rootDir string, id FullID) ([]FullID, error) { + id.SandboxID += "*" + id.ContainerID += "*" + pattern := buildPath(rootDir, id, stateFileExtension) + list, err := filepath.Glob(pattern) if err != nil { return nil, err } - var out []string + var out []FullID for _, path := range list { - // Filter out files that do no belong to a container. - fileName := filepath.Base(path) - if len(fileName) < len(stateFileExtension) { - panic(fmt.Sprintf("invalid file match %q", path)) - } - // Remove the extension. - cid := fileName[:len(fileName)-len(stateFileExtension)] - if validateID(cid) == nil { - out = append(out, cid) + id, err := parseFileName(filepath.Base(path)) + if err == nil { + out = append(out, id) } } return out, nil } +// loadSandbox loads all containers that belong to the sandbox with the given +// ID. +func loadSandbox(rootDir, id string) ([]*Container, error) { + cids, err := listMatch(rootDir, FullID{SandboxID: id}) + if err != nil { + return nil, err + } + + // Load the container metadata. + var containers []*Container + for _, cid := range cids { + container, err := Load(rootDir, cid, LoadOpts{Exact: true, SkipCheck: true}) + if err != nil { + // Container file may not exist if it raced with creation/deletion or + // directory was left behind. Load provides a snapshot in time, so it's + // fine to skip it. + if os.IsNotExist(err) { + continue + } + return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err) + } + containers = append(containers, container) + } + return containers, nil +} + +func findContainerID(rootDir, partialID string) (FullID, error) { + // Check whether the id fully specifies an existing container. + pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension) + list, err := filepath.Glob(pattern) + if err != nil { + return FullID{}, err + } + switch len(list) { + case 0: + return FullID{}, os.ErrNotExist + case 1: + return parseFileName(filepath.Base(list[0])) + } + + // Now see whether id could be an abbreviation of exactly 1 of the + // container ids. If id is ambiguous (it could match more than 1 + // container), it is an error. + ids, err := List(rootDir) + if err != nil { + return FullID{}, err + } + var rv *FullID + for _, id := range ids { + if strings.HasPrefix(id.ContainerID, partialID) { + if rv != nil { + return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id) + } + rv = &id + } + } + if rv == nil { + return FullID{}, os.ErrNotExist + } + log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv) + return *rv, nil +} + +func parseFileName(name string) (FullID, error) { + re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension) + groups := re.FindStringSubmatch(name) + if len(groups) != 3 { + return FullID{}, fmt.Errorf("invalid state file name format: %q", name) + } + id := FullID{ + SandboxID: groups[2], + ContainerID: groups[1], + } + if err := id.validate(); err != nil { + return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err) + } + return id, nil +} + +// FullID combines sandbox and container ID to identify a container. Sandbox ID +// is used to allow all containers for a given sandbox to be loaded by matching +// sandbox ID in the file name. +type FullID struct { + SandboxID string `json:"sandboxId"` + ContainerID string `json:"containerId"` +} + +func (f *FullID) String() string { + return f.SandboxID + "/" + f.ContainerID +} + +func (f *FullID) validate() error { + if err := validateID(f.SandboxID); err != nil { + return err + } + return validateID(f.ContainerID) +} + +// StateFile handles load from/save to container state safely from multiple +// processes. It uses a lock file to provide synchronization between operations. +// +// The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock". +// The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state". +type StateFile struct { + // RootDir is the directory containing the container metadata file. + RootDir string `json:"rootDir"` + + // ID is the sandbox+container ID. + ID FullID `json:"id"` + + // + // Fields below this line are not saved in the state file and will not + // be preserved across commands. + // + + once sync.Once + flock *flock.Flock +} + // lock globally locks all locking operations for the container. func (s *StateFile) lock() error { s.once.Do(func() { @@ -157,18 +327,20 @@ func (s *StateFile) close() error { return s.flock.Close() } -func buildStatePath(rootDir, id string) string { - return filepath.Join(rootDir, id+stateFileExtension) +func buildPath(rootDir string, id FullID, extension string) string { + // Note: "_" and ":" are not valid in IDs. + name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension) + return filepath.Join(rootDir, name) } // statePath is the full path to the state file. func (s *StateFile) statePath() string { - return buildStatePath(s.RootDir, s.ID) + return buildPath(s.RootDir, s.ID, stateFileExtension) } // lockPath is the full path to the lock file. func (s *StateFile) lockPath() string { - return filepath.Join(s.RootDir, s.ID+".lock") + return buildPath(s.RootDir, s.ID, "lock") } // destroy deletes all state created by the stateFile. It may be called with the diff --git a/runsc/fsgofer/fsgofer.go b/runsc/fsgofer/fsgofer.go index 0b628c8ce..3d94ffeb4 100644 --- a/runsc/fsgofer/fsgofer.go +++ b/runsc/fsgofer/fsgofer.go @@ -49,6 +49,21 @@ const ( allowedOpenFlags = unix.O_TRUNC ) +var ( + // Remember the process uid/gid to skip chown calls when file owner/group + // doesn't need to be changed. + processUID = p9.UID(os.Getuid()) + processGID = p9.GID(os.Getgid()) +) + +// join is equivalent to path.Join() but skips path.Clean() which is expensive. +func join(parent, child string) string { + if child == "." || child == ".." { + panic(fmt.Sprintf("invalid child path %q", child)) + } + return parent + "/" + child +} + // Config sets configuration options for each attach point. type Config struct { // ROMount is set to true if this is a readonly mount. @@ -115,7 +130,7 @@ func (a *attachPoint) Attach() (p9.File, error) { return nil, fmt.Errorf("unable to stat %q: %v", a.prefix, err) } - lf, err := newLocalFile(a, f, a.prefix, readable, stat) + lf, err := newLocalFile(a, f, a.prefix, readable, &stat) if err != nil { return nil, fmt.Errorf("unable to create localFile %q: %v", a.prefix, err) } @@ -124,7 +139,7 @@ func (a *attachPoint) Attach() (p9.File, error) { } // makeQID returns a unique QID for the given stat buffer. -func (a *attachPoint) makeQID(stat unix.Stat_t) p9.QID { +func (a *attachPoint) makeQID(stat *unix.Stat_t) p9.QID { a.deviceMu.Lock() defer a.deviceMu.Unlock() @@ -245,7 +260,7 @@ func reopenProcFd(f *fd.FD, mode int) (*fd.FD, error) { } func openAnyFileFromParent(parent *localFile, name string) (*fd.FD, string, bool, error) { - pathDebug := path.Join(parent.hostPath, name) + pathDebug := join(parent.hostPath, name) f, readable, err := openAnyFile(pathDebug, func(mode int) (*fd.FD, error) { return fd.OpenAt(parent.file, name, openFlags|mode, 0) }) @@ -297,8 +312,8 @@ func openAnyFile(pathDebug string, fn func(mode int) (*fd.FD, error)) (*fd.FD, b return nil, false, extractErrno(err) } -func checkSupportedFileType(stat unix.Stat_t, permitSocket bool) error { - switch stat.Mode & unix.S_IFMT { +func checkSupportedFileType(mode uint32, permitSocket bool) error { + switch mode & unix.S_IFMT { case unix.S_IFREG, unix.S_IFDIR, unix.S_IFLNK: return nil @@ -313,8 +328,8 @@ func checkSupportedFileType(stat unix.Stat_t, permitSocket bool) error { } } -func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat unix.Stat_t) (*localFile, error) { - if err := checkSupportedFileType(stat, a.conf.HostUDS); err != nil { +func newLocalFile(a *attachPoint, file *fd.FD, path string, readable bool, stat *unix.Stat_t) (*localFile, error) { + if err := checkSupportedFileType(stat.Mode, a.conf.HostUDS); err != nil { return nil, err } @@ -442,8 +457,10 @@ func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode, }) defer cu.Clean() - if err := fchown(child.FD(), uid, gid); err != nil { - return nil, nil, p9.QID{}, 0, extractErrno(err) + if uid != processUID || gid != processGID { + if err := fchown(child.FD(), uid, gid); err != nil { + return nil, nil, p9.QID{}, 0, extractErrno(err) + } } stat, err := fstat(child.FD()) if err != nil { @@ -452,11 +469,11 @@ func (l *localFile) Create(name string, p9Flags p9.OpenFlags, perm p9.FileMode, c := &localFile{ attachPoint: l.attachPoint, - hostPath: path.Join(l.hostPath, name), + hostPath: join(l.hostPath, name), file: child, mode: mode, fileType: unix.S_IFREG, - qid: l.attachPoint.makeQID(stat), + qid: l.attachPoint.makeQID(&stat), } cu.Release() @@ -488,8 +505,10 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) } defer f.Close() - if err := fchown(f.FD(), uid, gid); err != nil { - return p9.QID{}, extractErrno(err) + if uid != processUID || gid != processGID { + if err := fchown(f.FD(), uid, gid); err != nil { + return p9.QID{}, extractErrno(err) + } } stat, err := fstat(f.FD()) if err != nil { @@ -497,7 +516,7 @@ func (l *localFile) Mkdir(name string, perm p9.FileMode, uid p9.UID, gid p9.GID) } cu.Release() - return l.attachPoint.makeQID(stat), nil + return l.attachPoint.makeQID(&stat), nil } // Walk implements p9.File. @@ -512,7 +531,7 @@ func (l *localFile) WalkGetAttr(names []string) ([]p9.QID, p9.File, p9.AttrMask, if err != nil { return nil, nil, p9.AttrMask{}, p9.Attr{}, err } - mask, attr := l.fillAttr(stat) + mask, attr := l.fillAttr(&stat) return qids, file, mask, attr, nil } @@ -538,13 +557,13 @@ func (l *localFile) walk(names []string) ([]p9.QID, p9.File, unix.Stat_t, error) file: newFile, mode: invalidMode, fileType: l.fileType, - qid: l.attachPoint.makeQID(stat), + qid: l.attachPoint.makeQID(&stat), controlReadable: readable, } return []p9.QID{c.qid}, c, stat, nil } - var qids []p9.QID + qids := make([]p9.QID, 0, len(names)) var lastStat unix.Stat_t last := l for _, name := range names { @@ -560,7 +579,7 @@ func (l *localFile) walk(names []string) ([]p9.QID, p9.File, unix.Stat_t, error) _ = f.Close() return nil, nil, unix.Stat_t{}, extractErrno(err) } - c, err := newLocalFile(last.attachPoint, f, path, readable, lastStat) + c, err := newLocalFile(last.attachPoint, f, path, readable, &lastStat) if err != nil { _ = f.Close() return nil, nil, unix.Stat_t{}, extractErrno(err) @@ -609,11 +628,11 @@ func (l *localFile) GetAttr(_ p9.AttrMask) (p9.QID, p9.AttrMask, p9.Attr, error) if err != nil { return p9.QID{}, p9.AttrMask{}, p9.Attr{}, extractErrno(err) } - mask, attr := l.fillAttr(stat) + mask, attr := l.fillAttr(&stat) return l.qid, mask, attr, nil } -func (l *localFile) fillAttr(stat unix.Stat_t) (p9.AttrMask, p9.Attr) { +func (l *localFile) fillAttr(stat *unix.Stat_t) (p9.AttrMask, p9.Attr) { attr := p9.Attr{ Mode: p9.FileMode(stat.Mode), UID: p9.UID(stat.Uid), @@ -881,8 +900,10 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. } defer f.Close() - if err := fchown(f.FD(), uid, gid); err != nil { - return p9.QID{}, extractErrno(err) + if uid != processUID || gid != processGID { + if err := fchown(f.FD(), uid, gid); err != nil { + return p9.QID{}, extractErrno(err) + } } stat, err := fstat(f.FD()) if err != nil { @@ -890,7 +911,7 @@ func (l *localFile) Symlink(target, newName string, uid p9.UID, gid p9.GID) (p9. } cu.Release() - return l.attachPoint.makeQID(stat), nil + return l.attachPoint.makeQID(&stat), nil } // Link implements p9.File. @@ -938,8 +959,10 @@ func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, uid } defer child.Close() - if err := fchown(child.FD(), uid, gid); err != nil { - return p9.QID{}, extractErrno(err) + if uid != processUID || gid != processGID { + if err := fchown(child.FD(), uid, gid); err != nil { + return p9.QID{}, extractErrno(err) + } } stat, err := fstat(child.FD()) if err != nil { @@ -947,7 +970,7 @@ func (l *localFile) Mknod(name string, mode p9.FileMode, _ uint32, _ uint32, uid } cu.Release() - return l.attachPoint.makeQID(stat), nil + return l.attachPoint.makeQID(&stat), nil } // UnlinkAt implements p9.File. @@ -1045,7 +1068,7 @@ func (l *localFile) readDirent(f int, offset uint64, count uint32, skip uint64) log.Warningf("Readdir is skipping file with failed stat %q, err: %v", l.hostPath, err) continue } - qid := l.attachPoint.makeQID(stat) + qid := l.attachPoint.makeQID(&stat) offset++ dirents = append(dirents, p9.Dirent{ QID: qid, @@ -1139,7 +1162,7 @@ func (l *localFile) isOpen() bool { // Renamed implements p9.Renamed. func (l *localFile) Renamed(newDir p9.File, newName string) { - l.hostPath = path.Join(newDir.(*localFile).hostPath, newName) + l.hostPath = join(newDir.(*localFile).hostPath, newName) } // extractErrno tries to determine the errno. |