summaryrefslogtreecommitdiffhomepage
path: root/runsc/container/state_file.go
diff options
context:
space:
mode:
authorFabricio Voznika <fvoznika@google.com>2020-12-17 10:44:44 -0800
committergVisor bot <gvisor-bot@google.com>2020-12-17 10:52:44 -0800
commit8ea19b5818d0c1e9b798bd0bd288c7f51a46261d (patch)
treed7f9da9c969f460fc6267e6fe212e37bfdf12185 /runsc/container/state_file.go
parente7493a9e23325c00ad9a0db341d5887afe3ae5eb (diff)
Add sandbox ID to state file name
This allows to find all containers inside a sandbox more efficiently. This operation is required every time a container starts and stops, and previously required loading *all* container state files to check whether the container belonged to the sandbox. Apert from being inneficient, it has caused problems when state files are stale or corrupt, causing inavalability to create any container. Also adjust commands `list` and `debug` to skip over files that fail to load. Resolves #5052 PiperOrigin-RevId: 348050637
Diffstat (limited to 'runsc/container/state_file.go')
-rw-r--r--runsc/container/state_file.go236
1 files changed, 204 insertions, 32 deletions
diff --git a/runsc/container/state_file.go b/runsc/container/state_file.go
index 17a251530..dfbf1f2d3 100644
--- a/runsc/container/state_file.go
+++ b/runsc/container/state_file.go
@@ -20,58 +20,228 @@ import (
"io/ioutil"
"os"
"path/filepath"
+ "regexp"
+ "strings"
+ "syscall"
"github.com/gofrs/flock"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sync"
)
-const stateFileExtension = ".state"
+const stateFileExtension = "state"
-// StateFile handles load from/save to container state safely from multiple
-// processes. It uses a lock file to provide synchronization between operations.
+// LoadOpts provides options for Load()ing a container.
+type LoadOpts struct {
+ // Exact tells whether the search should be exact. See Load() for more.
+ Exact bool
+
+ // SkipCheck tells Load() to skip checking if container is runnning.
+ SkipCheck bool
+}
+
+// Load loads a container with the given id from a metadata file. "id" may
+// be an abbreviation of the full container id in case LoadOpts.Exact if not
+// set. It also checks if the container is still running, in order to return
+// an error to the caller earlier. This check is skipped if LoadOpts.SkipCheck
+// is set.
//
-// The lock file is located at: "${s.RootDir}/${s.ID}.lock".
-// The state file is located at: "${s.RootDir}/${s.ID}.state".
-type StateFile struct {
- // RootDir is the directory containing the container metadata file.
- RootDir string `json:"rootDir"`
+// Returns ErrNotExist if no container is found. Returns error in case more than
+// one containers matching the ID prefix is found.
+func Load(rootDir string, id FullID, opts LoadOpts) (*Container, error) {
+ //log.Debugf("Load container, rootDir: %q, partial cid: %s", rootDir, partialID)
+ if !opts.Exact {
+ var err error
+ id, err = findContainerID(rootDir, id.ContainerID)
+ if err != nil {
+ // Preserve error so that callers can distinguish 'not found' errors.
+ return nil, err
+ }
+ }
- // ID is the container ID.
- ID string `json:"id"`
+ if err := id.validate(); err != nil {
+ return nil, fmt.Errorf("invalid container id: %v", err)
+ }
+ state := StateFile{
+ RootDir: rootDir,
+ ID: id,
+ }
+ defer state.close()
- //
- // Fields below this line are not saved in the state file and will not
- // be preserved across commands.
- //
+ c := &Container{}
+ if err := state.load(c); err != nil {
+ if os.IsNotExist(err) {
+ // Preserve error so that callers can distinguish 'not found' errors.
+ return nil, err
+ }
+ return nil, fmt.Errorf("reading container metadata file %q: %v", state.statePath(), err)
+ }
- once sync.Once
- flock *flock.Flock
+ if !opts.SkipCheck {
+ // If the status is "Running" or "Created", check that the sandbox/container
+ // is still running, setting it to Stopped if not.
+ //
+ // This is inherently racy.
+ switch c.Status {
+ case Created:
+ if !c.IsSandboxRunning() {
+ // Sandbox no longer exists, so this container definitely does not exist.
+ c.changeStatus(Stopped)
+ }
+ case Running:
+ if err := c.SignalContainer(syscall.Signal(0), false); err != nil {
+ c.changeStatus(Stopped)
+ }
+ }
+ }
+
+ return c, nil
}
// List returns all container ids in the given root directory.
-func List(rootDir string) ([]string, error) {
+func List(rootDir string) ([]FullID, error) {
log.Debugf("List containers %q", rootDir)
- list, err := filepath.Glob(filepath.Join(rootDir, "*"+stateFileExtension))
+ return listMatch(rootDir, FullID{})
+}
+
+// listMatch returns all container ids that match the provided id.
+func listMatch(rootDir string, id FullID) ([]FullID, error) {
+ id.SandboxID += "*"
+ id.ContainerID += "*"
+ pattern := buildPath(rootDir, id, stateFileExtension)
+ list, err := filepath.Glob(pattern)
if err != nil {
return nil, err
}
- var out []string
+ var out []FullID
for _, path := range list {
- // Filter out files that do no belong to a container.
- fileName := filepath.Base(path)
- if len(fileName) < len(stateFileExtension) {
- panic(fmt.Sprintf("invalid file match %q", path))
- }
- // Remove the extension.
- cid := fileName[:len(fileName)-len(stateFileExtension)]
- if validateID(cid) == nil {
- out = append(out, cid)
+ id, err := parseFileName(filepath.Base(path))
+ if err == nil {
+ out = append(out, id)
}
}
return out, nil
}
+// loadSandbox loads all containers that belong to the sandbox with the given
+// ID.
+func loadSandbox(rootDir, id string) ([]*Container, error) {
+ cids, err := listMatch(rootDir, FullID{SandboxID: id})
+ if err != nil {
+ return nil, err
+ }
+
+ // Load the container metadata.
+ var containers []*Container
+ for _, cid := range cids {
+ container, err := Load(rootDir, cid, LoadOpts{Exact: true, SkipCheck: true})
+ if err != nil {
+ // Container file may not exist if it raced with creation/deletion or
+ // directory was left behind. Load provides a snapshot in time, so it's
+ // fine to skip it.
+ if os.IsNotExist(err) {
+ continue
+ }
+ return nil, fmt.Errorf("loading sandbox %q, failed to load container %q: %v", id, cid, err)
+ }
+ containers = append(containers, container)
+ }
+ return containers, nil
+}
+
+func findContainerID(rootDir, partialID string) (FullID, error) {
+ // Check whether the id fully specifies an existing container.
+ pattern := buildPath(rootDir, FullID{SandboxID: "*", ContainerID: partialID + "*"}, stateFileExtension)
+ list, err := filepath.Glob(pattern)
+ if err != nil {
+ return FullID{}, err
+ }
+ switch len(list) {
+ case 0:
+ return FullID{}, os.ErrNotExist
+ case 1:
+ return parseFileName(filepath.Base(list[0]))
+ }
+
+ // Now see whether id could be an abbreviation of exactly 1 of the
+ // container ids. If id is ambiguous (it could match more than 1
+ // container), it is an error.
+ ids, err := List(rootDir)
+ if err != nil {
+ return FullID{}, err
+ }
+ var rv *FullID
+ for _, id := range ids {
+ if strings.HasPrefix(id.ContainerID, partialID) {
+ if rv != nil {
+ return FullID{}, fmt.Errorf("id %q is ambiguous and could refer to multiple containers: %q, %q", partialID, rv, id)
+ }
+ rv = &id
+ }
+ }
+ if rv == nil {
+ return FullID{}, os.ErrNotExist
+ }
+ log.Debugf("abbreviated id %q resolves to full id %v", partialID, *rv)
+ return *rv, nil
+}
+
+func parseFileName(name string) (FullID, error) {
+ re := regexp.MustCompile(`([\w+-\.]+)_sandbox:([\w+-\.]+)\.` + stateFileExtension)
+ groups := re.FindStringSubmatch(name)
+ if len(groups) != 3 {
+ return FullID{}, fmt.Errorf("invalid state file name format: %q", name)
+ }
+ id := FullID{
+ SandboxID: groups[2],
+ ContainerID: groups[1],
+ }
+ if err := id.validate(); err != nil {
+ return FullID{}, fmt.Errorf("invalid state file name %q: %w", name, err)
+ }
+ return id, nil
+}
+
+// FullID combines sandbox and container ID to identify a container. Sandbox ID
+// is used to allow all containers for a given sandbox to be loaded by matching
+// sandbox ID in the file name.
+type FullID struct {
+ SandboxID string `json:"sandboxId"`
+ ContainerID string `json:"containerId"`
+}
+
+func (f *FullID) String() string {
+ return f.SandboxID + "/" + f.ContainerID
+}
+
+func (f *FullID) validate() error {
+ if err := validateID(f.SandboxID); err != nil {
+ return err
+ }
+ return validateID(f.ContainerID)
+}
+
+// StateFile handles load from/save to container state safely from multiple
+// processes. It uses a lock file to provide synchronization between operations.
+//
+// The lock file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.lock".
+// The state file is located at: "${s.RootDir}/${containerd-id}_sand:{sandbox-id}.state".
+type StateFile struct {
+ // RootDir is the directory containing the container metadata file.
+ RootDir string `json:"rootDir"`
+
+ // ID is the sandbox+container ID.
+ ID FullID `json:"id"`
+
+ //
+ // Fields below this line are not saved in the state file and will not
+ // be preserved across commands.
+ //
+
+ once sync.Once
+ flock *flock.Flock
+}
+
// lock globally locks all locking operations for the container.
func (s *StateFile) lock() error {
s.once.Do(func() {
@@ -157,18 +327,20 @@ func (s *StateFile) close() error {
return s.flock.Close()
}
-func buildStatePath(rootDir, id string) string {
- return filepath.Join(rootDir, id+stateFileExtension)
+func buildPath(rootDir string, id FullID, extension string) string {
+ // Note: "_" and ":" are not valid in IDs.
+ name := fmt.Sprintf("%s_sandbox:%s.%s", id.ContainerID, id.SandboxID, extension)
+ return filepath.Join(rootDir, name)
}
// statePath is the full path to the state file.
func (s *StateFile) statePath() string {
- return buildStatePath(s.RootDir, s.ID)
+ return buildPath(s.RootDir, s.ID, stateFileExtension)
}
// lockPath is the full path to the lock file.
func (s *StateFile) lockPath() string {
- return filepath.Join(s.RootDir, s.ID+".lock")
+ return buildPath(s.RootDir, s.ID, "lock")
}
// destroy deletes all state created by the stateFile. It may be called with the