diff options
Diffstat (limited to 'vendor/github.com')
37 files changed, 1140 insertions, 150 deletions
diff --git a/vendor/github.com/containerd/cgroups/README.md b/vendor/github.com/containerd/cgroups/README.md index 69e932a9f..81ad11cc7 100644 --- a/vendor/github.com/containerd/cgroups/README.md +++ b/vendor/github.com/containerd/cgroups/README.md @@ -1,8 +1,9 @@ # cgroups [![Build Status](https://travis-ci.org/containerd/cgroups.svg?branch=master)](https://travis-ci.org/containerd/cgroups) - [![codecov](https://codecov.io/gh/containerd/cgroups/branch/master/graph/badge.svg)](https://codecov.io/gh/containerd/cgroups) +[![GoDoc](https://godoc.org/github.com/containerd/cgroups?status.svg)](https://godoc.org/github.com/containerd/cgroups) +[![Go Report Card](https://goreportcard.com/badge/github.com/containerd/cgroups)](https://goreportcard.com/report/github.com/containerd/cgroups) Go package for creating, managing, inspecting, and destroying cgroups. The resources format for settings on the cgroup uses the OCI runtime-spec found @@ -110,3 +111,14 @@ err := control.MoveTo(destination) ```go subCgroup, err := control.New("child", resources) ``` + +## Project details + +Cgroups is a containerd sub-project, licensed under the [Apache 2.0 license](./LICENSE). +As a containerd sub-project, you will find the: + + * [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md), + * [Maintainers](https://github.com/containerd/project/blob/master/MAINTAINERS), + * and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md) + +information in our [`containerd/project`](https://github.com/containerd/project) repository. diff --git a/vendor/github.com/containerd/cgroups/blkio.go b/vendor/github.com/containerd/cgroups/blkio.go index fc1e689cb..875fb5546 100644 --- a/vendor/github.com/containerd/cgroups/blkio.go +++ b/vendor/github.com/containerd/cgroups/blkio.go @@ -191,31 +191,42 @@ func (b *blkioController) readEntry(devices map[deviceKey]string, path, name str } func createBlkioSettings(blkio *specs.LinuxBlockIO) []blkioSettings { - settings := []blkioSettings{ - { - name: "weight", - value: blkio.Weight, - format: uintf, - }, - { - name: "leaf_weight", - value: blkio.LeafWeight, - format: uintf, - }, - } - for _, wd := range blkio.WeightDevice { + settings := []blkioSettings{} + + if blkio.Weight != nil { settings = append(settings, blkioSettings{ - name: "weight_device", - value: wd, - format: weightdev, - }, + name: "weight", + value: blkio.Weight, + format: uintf, + }) + } + if blkio.LeafWeight != nil { + settings = append(settings, blkioSettings{ - name: "leaf_weight_device", - value: wd, - format: weightleafdev, + name: "leaf_weight", + value: blkio.LeafWeight, + format: uintf, }) } + for _, wd := range blkio.WeightDevice { + if wd.Weight != nil { + settings = append(settings, + blkioSettings{ + name: "weight_device", + value: wd, + format: weightdev, + }) + } + if wd.LeafWeight != nil { + settings = append(settings, + blkioSettings{ + name: "leaf_weight_device", + value: wd, + format: weightleafdev, + }) + } + } for _, t := range []struct { name string list []specs.LinuxThrottleDevice @@ -265,12 +276,12 @@ func uintf(v interface{}) []byte { func weightdev(v interface{}) []byte { wd := v.(specs.LinuxWeightDevice) - return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)) + return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, *wd.Weight)) } func weightleafdev(v interface{}) []byte { wd := v.(specs.LinuxWeightDevice) - return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)) + return []byte(fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, *wd.LeafWeight)) } func throttleddev(v interface{}) []byte { diff --git a/vendor/github.com/containerd/cgroups/cgroup.go b/vendor/github.com/containerd/cgroups/cgroup.go index 7959feb49..e3ef07651 100644 --- a/vendor/github.com/containerd/cgroups/cgroup.go +++ b/vendor/github.com/containerd/cgroups/cgroup.go @@ -30,47 +30,88 @@ import ( ) // New returns a new control via the cgroup cgroups interface -func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources) (Cgroup, error) { +func New(hierarchy Hierarchy, path Path, resources *specs.LinuxResources, opts ...InitOpts) (Cgroup, error) { + config := newInitConfig() + for _, o := range opts { + if err := o(config); err != nil { + return nil, err + } + } subsystems, err := hierarchy() if err != nil { return nil, err } + var active []Subsystem for _, s := range subsystems { + // check if subsystem exists if err := initializeSubsystem(s, path, resources); err != nil { + if err == ErrControllerNotActive { + if config.InitCheck != nil { + if skerr := config.InitCheck(s, path, err); skerr != nil { + if skerr != ErrIgnoreSubsystem { + return nil, skerr + } + } + } + continue + } return nil, err } + active = append(active, s) } return &cgroup{ path: path, - subsystems: subsystems, + subsystems: active, }, nil } // Load will load an existing cgroup and allow it to be controlled -func Load(hierarchy Hierarchy, path Path) (Cgroup, error) { +func Load(hierarchy Hierarchy, path Path, opts ...InitOpts) (Cgroup, error) { + config := newInitConfig() + for _, o := range opts { + if err := o(config); err != nil { + return nil, err + } + } + var activeSubsystems []Subsystem subsystems, err := hierarchy() if err != nil { return nil, err } - // check the the subsystems still exist + // check that the subsystems still exist, and keep only those that actually exist for _, s := range pathers(subsystems) { p, err := path(s.Name()) if err != nil { if os.IsNotExist(errors.Cause(err)) { return nil, ErrCgroupDeleted } + if err == ErrControllerNotActive { + if config.InitCheck != nil { + if skerr := config.InitCheck(s, path, err); skerr != nil { + if skerr != ErrIgnoreSubsystem { + return nil, skerr + } + } + } + continue + } return nil, err } if _, err := os.Lstat(s.Path(p)); err != nil { if os.IsNotExist(err) { - return nil, ErrCgroupDeleted + continue } return nil, err } + activeSubsystems = append(activeSubsystems, s) + } + // if we do not have any active systems then the cgroup is deleted + if len(activeSubsystems) == 0 { + return nil, ErrCgroupDeleted } return &cgroup{ path: path, - subsystems: subsystems, + subsystems: activeSubsystems, }, nil } @@ -319,6 +360,49 @@ func (c *cgroup) processes(subsystem Name, recursive bool) ([]Process, error) { return processes, err } +// Tasks returns the tasks running inside the cgroup along +// with the subsystem used, pid, and path +func (c *cgroup) Tasks(subsystem Name, recursive bool) ([]Task, error) { + c.mu.Lock() + defer c.mu.Unlock() + if c.err != nil { + return nil, c.err + } + return c.tasks(subsystem, recursive) +} + +func (c *cgroup) tasks(subsystem Name, recursive bool) ([]Task, error) { + s := c.getSubsystem(subsystem) + sp, err := c.path(subsystem) + if err != nil { + return nil, err + } + path := s.(pather).Path(sp) + var tasks []Task + err = filepath.Walk(path, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !recursive && info.IsDir() { + if p == path { + return nil + } + return filepath.SkipDir + } + dir, name := filepath.Split(p) + if name != cgroupTasks { + return nil + } + procs, err := readTasksPids(dir, subsystem) + if err != nil { + return err + } + tasks = append(tasks, procs...) + return nil + }) + return tasks, err +} + // Freeze freezes the entire cgroup and all the processes inside it func (c *cgroup) Freeze() error { c.mu.Lock() diff --git a/vendor/github.com/containerd/cgroups/control.go b/vendor/github.com/containerd/cgroups/control.go index 63e2df93d..1f62c54f3 100644 --- a/vendor/github.com/containerd/cgroups/control.go +++ b/vendor/github.com/containerd/cgroups/control.go @@ -44,6 +44,15 @@ type Process struct { Path string } +type Task struct { + // Subsystem is the name of the subsystem that the task is in + Subsystem Name + // Pid is the process id of the task + Pid int + // Path is the full path of the subsystem and location that the task is in + Path string +} + // Cgroup handles interactions with the individual groups to perform // actions on them as them main interface to this cgroup package type Cgroup interface { @@ -64,6 +73,8 @@ type Cgroup interface { Update(resources *specs.LinuxResources) error // Processes returns all the processes in a select subsystem for the cgroup Processes(Name, bool) ([]Process, error) + // Tasks returns all the tasks in a select subsystem for the cgroup + Tasks(Name, bool) ([]Task, error) // Freeze freezes or pauses all processes inside the cgroup Freeze() error // Thaw thaw or resumes all processes inside the cgroup diff --git a/vendor/github.com/containerd/cgroups/cpuset.go b/vendor/github.com/containerd/cgroups/cpuset.go index f182aa68c..30208515e 100644 --- a/vendor/github.com/containerd/cgroups/cpuset.go +++ b/vendor/github.com/containerd/cgroups/cpuset.go @@ -57,21 +57,21 @@ func (c *cpusetController) Create(path string, resources *specs.LinuxResources) if resources.CPU != nil { for _, t := range []struct { name string - value *string + value string }{ { name: "cpus", - value: &resources.CPU.Cpus, + value: resources.CPU.Cpus, }, { name: "mems", - value: &resources.CPU.Mems, + value: resources.CPU.Mems, }, } { - if t.value != nil { + if t.value != "" { if err := ioutil.WriteFile( filepath.Join(c.Path(path), fmt.Sprintf("cpuset.%s", t.name)), - []byte(*t.value), + []byte(t.value), defaultFilePerm, ); err != nil { return err diff --git a/vendor/github.com/containerd/cgroups/devices.go b/vendor/github.com/containerd/cgroups/devices.go index f9a118b22..f6a3b1947 100644 --- a/vendor/github.com/containerd/cgroups/devices.go +++ b/vendor/github.com/containerd/cgroups/devices.go @@ -58,6 +58,9 @@ func (d *devicesController) Create(path string, resources *specs.LinuxResources) if device.Allow { file = allowDeviceFile } + if device.Type == "" { + device.Type = "a" + } if err := ioutil.WriteFile( filepath.Join(d.Path(path), file), []byte(deviceString(device)), diff --git a/vendor/github.com/containerd/cgroups/net_prio.go b/vendor/github.com/containerd/cgroups/net_prio.go index c77169215..612e1bcd2 100644 --- a/vendor/github.com/containerd/cgroups/net_prio.go +++ b/vendor/github.com/containerd/cgroups/net_prio.go @@ -50,7 +50,7 @@ func (n *netprioController) Create(path string, resources *specs.LinuxResources) if resources.Network != nil { for _, prio := range resources.Network.Priorities { if err := ioutil.WriteFile( - filepath.Join(n.Path(path), "net_prio_ifpriomap"), + filepath.Join(n.Path(path), "net_prio.ifpriomap"), formatPrio(prio.Name, prio.Priority), defaultFilePerm, ); err != nil { diff --git a/vendor/github.com/containerd/cgroups/opts.go b/vendor/github.com/containerd/cgroups/opts.go new file mode 100644 index 000000000..7c5d9fb9c --- /dev/null +++ b/vendor/github.com/containerd/cgroups/opts.go @@ -0,0 +1,61 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package cgroups + +import ( + "github.com/pkg/errors" +) + +var ( + // ErrIgnoreSubsystem allows the specific subsystem to be skipped + ErrIgnoreSubsystem = errors.New("skip subsystem") + // ErrDevicesRequired is returned when the devices subsystem is required but + // does not exist or is not active + ErrDevicesRequired = errors.New("devices subsystem is required") +) + +// InitOpts allows configuration for the creation or loading of a cgroup +type InitOpts func(*InitConfig) error + +// InitConfig provides configuration options for the creation +// or loading of a cgroup and its subsystems +type InitConfig struct { + // InitCheck can be used to check initialization errors from the subsystem + InitCheck InitCheck +} + +func newInitConfig() *InitConfig { + return &InitConfig{ + InitCheck: RequireDevices, + } +} + +// InitCheck allows subsystems errors to be checked when initialized or loaded +type InitCheck func(Subsystem, Path, error) error + +// AllowAny allows any subsystem errors to be skipped +func AllowAny(s Subsystem, p Path, err error) error { + return ErrIgnoreSubsystem +} + +// RequireDevices requires the device subsystem but no others +func RequireDevices(s Subsystem, p Path, err error) error { + if s.Name() == Devices { + return ErrDevicesRequired + } + return ErrIgnoreSubsystem +} diff --git a/vendor/github.com/containerd/cgroups/paths.go b/vendor/github.com/containerd/cgroups/paths.go index 455ce857f..f45fd4256 100644 --- a/vendor/github.com/containerd/cgroups/paths.go +++ b/vendor/github.com/containerd/cgroups/paths.go @@ -57,6 +57,9 @@ func PidPath(pid int) Path { return existingPath(paths, "") } +// ErrControllerNotActive is returned when a controller is not supported or enabled +var ErrControllerNotActive = errors.New("controller is not supported") + func existingPath(paths map[string]string, suffix string) Path { // localize the paths based on the root mount dest for nested cgroups for n, p := range paths { @@ -77,7 +80,7 @@ func existingPath(paths map[string]string, suffix string) Path { root, ok := paths[string(name)] if !ok { if root, ok = paths[fmt.Sprintf("name=%s", name)]; !ok { - return "", fmt.Errorf("unable to find %q in controller set", name) + return "", ErrControllerNotActive } } if suffix != "" { diff --git a/vendor/github.com/containerd/cgroups/subsystem.go b/vendor/github.com/containerd/cgroups/subsystem.go index 933a6c38d..23de04d49 100644 --- a/vendor/github.com/containerd/cgroups/subsystem.go +++ b/vendor/github.com/containerd/cgroups/subsystem.go @@ -42,7 +42,7 @@ const ( ) // Subsystems returns a complete list of the default cgroups -// avaliable on most linux systems +// available on most linux systems func Subsystems() []Name { n := []Name{ Hugetlb, diff --git a/vendor/github.com/containerd/cgroups/systemd.go b/vendor/github.com/containerd/cgroups/systemd.go index 8153d744c..c5d4e3081 100644 --- a/vendor/github.com/containerd/cgroups/systemd.go +++ b/vendor/github.com/containerd/cgroups/systemd.go @@ -32,6 +32,11 @@ const ( defaultSlice = "system.slice" ) +var ( + canDelegate bool + once sync.Once +) + func Systemd() ([]Subsystem, error) { root, err := v1MountPoint() if err != nil { @@ -54,7 +59,7 @@ func Slice(slice, name string) Path { slice = defaultSlice } return func(subsystem Name) (string, error) { - return filepath.Join(slice, unitName(name)), nil + return filepath.Join(slice, name), nil } } @@ -80,15 +85,39 @@ func (s *SystemdController) Create(path string, resources *specs.LinuxResources) } defer conn.Close() slice, name := splitName(path) + // We need to see if systemd can handle the delegate property + // Systemd will return an error if it cannot handle delegate regardless + // of its bool setting. + checkDelegate := func() { + canDelegate = true + dlSlice := newProperty("Delegate", true) + if _, err := conn.StartTransientUnit(slice, "testdelegate", []systemdDbus.Property{dlSlice}, nil); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + // Starting with systemd v237, Delegate is not even a property of slices anymore, + // so the D-Bus call fails with "InvalidArgs" error. + if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") || strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.InvalidArgs") { + canDelegate = false + } + } + } + + conn.StopUnit(slice, "testDelegate", nil) + } + once.Do(checkDelegate) properties := []systemdDbus.Property{ systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", name)), systemdDbus.PropWants(slice), newProperty("DefaultDependencies", false), - newProperty("Delegate", true), newProperty("MemoryAccounting", true), newProperty("CPUAccounting", true), newProperty("BlockIOAccounting", true), } + + // If we can delegate, we add the property back in + if canDelegate { + properties = append(properties, newProperty("Delegate", true)) + } + ch := make(chan string) _, err = conn.StartTransientUnit(name, "replace", properties, ch) if err != nil { diff --git a/vendor/github.com/containerd/cgroups/utils.go b/vendor/github.com/containerd/cgroups/utils.go index 345be4e46..f3129b1a3 100644 --- a/vendor/github.com/containerd/cgroups/utils.go +++ b/vendor/github.com/containerd/cgroups/utils.go @@ -111,7 +111,7 @@ func remove(path string) error { return fmt.Errorf("cgroups: unable to remove path %q", path) } -// readPids will read all the pids in a cgroup by the provided path +// readPids will read all the pids of processes in a cgroup by the provided path func readPids(path string, subsystem Name) ([]Process, error) { f, err := os.Open(filepath.Join(path, cgroupProcs)) if err != nil { @@ -138,6 +138,33 @@ func readPids(path string, subsystem Name) ([]Process, error) { return out, nil } +// readTasksPids will read all the pids of tasks in a cgroup by the provided path +func readTasksPids(path string, subsystem Name) ([]Task, error) { + f, err := os.Open(filepath.Join(path, cgroupTasks)) + if err != nil { + return nil, err + } + defer f.Close() + var ( + out []Task + s = bufio.NewScanner(f) + ) + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, Task{ + Pid: pid, + Subsystem: subsystem, + Path: path, + }) + } + } + return out, nil +} + func hugePageSizes() ([]string, error) { var ( pageSizes []string diff --git a/vendor/github.com/containerd/containerd/api/types/descriptor.pb.go b/vendor/github.com/containerd/containerd/api/types/descriptor.pb.go index 93e88c0dc..4f5f6c61f 100644 --- a/vendor/github.com/containerd/containerd/api/types/descriptor.pb.go +++ b/vendor/github.com/containerd/containerd/api/types/descriptor.pb.go @@ -28,6 +28,7 @@ import github_com_opencontainers_go_digest "github.com/opencontainers/go-digest" import strings "strings" import reflect "reflect" +import sortkeys "github.com/gogo/protobuf/sortkeys" import io "io" @@ -48,9 +49,10 @@ const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package // oci descriptor found in a manifest. // See https://godoc.org/github.com/opencontainers/image-spec/specs-go/v1#Descriptor type Descriptor struct { - MediaType string `protobuf:"bytes,1,opt,name=media_type,json=mediaType,proto3" json:"media_type,omitempty"` - Digest github_com_opencontainers_go_digest.Digest `protobuf:"bytes,2,opt,name=digest,proto3,customtype=github.com/opencontainers/go-digest.Digest" json:"digest"` - Size_ int64 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"` + MediaType string `protobuf:"bytes,1,opt,name=media_type,json=mediaType,proto3" json:"media_type,omitempty"` + Digest github_com_opencontainers_go_digest.Digest `protobuf:"bytes,2,opt,name=digest,proto3,customtype=github.com/opencontainers/go-digest.Digest" json:"digest"` + Size_ int64 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"` + Annotations map[string]string `protobuf:"bytes,5,rep,name=annotations" json:"annotations,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` } func (m *Descriptor) Reset() { *m = Descriptor{} } @@ -92,6 +94,23 @@ func (m *Descriptor) MarshalTo(dAtA []byte) (int, error) { i++ i = encodeVarintDescriptor(dAtA, i, uint64(m.Size_)) } + if len(m.Annotations) > 0 { + for k, _ := range m.Annotations { + dAtA[i] = 0x2a + i++ + v := m.Annotations[k] + mapSize := 1 + len(k) + sovDescriptor(uint64(len(k))) + 1 + len(v) + sovDescriptor(uint64(len(v))) + i = encodeVarintDescriptor(dAtA, i, uint64(mapSize)) + dAtA[i] = 0xa + i++ + i = encodeVarintDescriptor(dAtA, i, uint64(len(k))) + i += copy(dAtA[i:], k) + dAtA[i] = 0x12 + i++ + i = encodeVarintDescriptor(dAtA, i, uint64(len(v))) + i += copy(dAtA[i:], v) + } + } return i, nil } @@ -118,6 +137,14 @@ func (m *Descriptor) Size() (n int) { if m.Size_ != 0 { n += 1 + sovDescriptor(uint64(m.Size_)) } + if len(m.Annotations) > 0 { + for k, v := range m.Annotations { + _ = k + _ = v + mapEntrySize := 1 + len(k) + sovDescriptor(uint64(len(k))) + 1 + len(v) + sovDescriptor(uint64(len(v))) + n += mapEntrySize + 1 + sovDescriptor(uint64(mapEntrySize)) + } + } return n } @@ -138,10 +165,21 @@ func (this *Descriptor) String() string { if this == nil { return "nil" } + keysForAnnotations := make([]string, 0, len(this.Annotations)) + for k, _ := range this.Annotations { + keysForAnnotations = append(keysForAnnotations, k) + } + sortkeys.Strings(keysForAnnotations) + mapStringForAnnotations := "map[string]string{" + for _, k := range keysForAnnotations { + mapStringForAnnotations += fmt.Sprintf("%v: %v,", k, this.Annotations[k]) + } + mapStringForAnnotations += "}" s := strings.Join([]string{`&Descriptor{`, `MediaType:` + fmt.Sprintf("%v", this.MediaType) + `,`, `Digest:` + fmt.Sprintf("%v", this.Digest) + `,`, `Size_:` + fmt.Sprintf("%v", this.Size_) + `,`, + `Annotations:` + mapStringForAnnotations + `,`, `}`, }, "") return s @@ -260,6 +298,124 @@ func (m *Descriptor) Unmarshal(dAtA []byte) error { break } } + case 5: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Annotations", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDescriptor + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthDescriptor + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.Annotations == nil { + m.Annotations = make(map[string]string) + } + var mapkey string + var mapvalue string + for iNdEx < postIndex { + entryPreIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDescriptor + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + if fieldNum == 1 { + var stringLenmapkey uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDescriptor + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLenmapkey |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLenmapkey := int(stringLenmapkey) + if intStringLenmapkey < 0 { + return ErrInvalidLengthDescriptor + } + postStringIndexmapkey := iNdEx + intStringLenmapkey + if postStringIndexmapkey > l { + return io.ErrUnexpectedEOF + } + mapkey = string(dAtA[iNdEx:postStringIndexmapkey]) + iNdEx = postStringIndexmapkey + } else if fieldNum == 2 { + var stringLenmapvalue uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDescriptor + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLenmapvalue |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLenmapvalue := int(stringLenmapvalue) + if intStringLenmapvalue < 0 { + return ErrInvalidLengthDescriptor + } + postStringIndexmapvalue := iNdEx + intStringLenmapvalue + if postStringIndexmapvalue > l { + return io.ErrUnexpectedEOF + } + mapvalue = string(dAtA[iNdEx:postStringIndexmapvalue]) + iNdEx = postStringIndexmapvalue + } else { + iNdEx = entryPreIndex + skippy, err := skipDescriptor(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthDescriptor + } + if (iNdEx + skippy) > postIndex { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + m.Annotations[mapkey] = mapvalue + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipDescriptor(dAtA[iNdEx:]) @@ -391,20 +547,25 @@ func init() { } var fileDescriptorDescriptor = []byte{ - // 234 bytes of a gzipped FileDescriptorProto + // 311 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xb2, 0x4e, 0xcf, 0x2c, 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xce, 0xcf, 0x2b, 0x49, 0xcc, 0xcc, 0x4b, 0x2d, 0x4a, 0x41, 0x66, 0x26, 0x16, 0x64, 0xea, 0x97, 0x54, 0x16, 0xa4, 0x16, 0xeb, 0xa7, 0xa4, 0x16, 0x27, 0x17, 0x65, 0x16, 0x94, 0xe4, 0x17, 0xe9, 0x15, 0x14, 0xe5, 0x97, 0xe4, 0x0b, 0x09, 0x20, 0x94, 0xe9, 0x81, 0x95, 0x48, 0x89, 0xa4, 0xe7, 0xa7, 0xe7, 0x83, 0x25, 0xf5, 0x41, 0x2c, 0x88, - 0x3a, 0xa5, 0x6e, 0x46, 0x2e, 0x2e, 0x17, 0xb8, 0x66, 0x21, 0x59, 0x2e, 0xae, 0xdc, 0xd4, 0x94, - 0xcc, 0xc4, 0x78, 0x90, 0x1e, 0x09, 0x46, 0x05, 0x46, 0x0d, 0xce, 0x20, 0x4e, 0xb0, 0x48, 0x48, - 0x65, 0x41, 0xaa, 0x90, 0x17, 0x17, 0x5b, 0x4a, 0x66, 0x7a, 0x6a, 0x71, 0x89, 0x04, 0x13, 0x48, - 0xca, 0xc9, 0xe8, 0xc4, 0x3d, 0x79, 0x86, 0x5b, 0xf7, 0xe4, 0xb5, 0x90, 0x9c, 0x9a, 0x5f, 0x90, - 0x9a, 0x07, 0xb7, 0xbc, 0x58, 0x3f, 0x3d, 0x5f, 0x17, 0xa2, 0x45, 0xcf, 0x05, 0x4c, 0x05, 0x41, - 0x4d, 0x10, 0x12, 0xe2, 0x62, 0x29, 0xce, 0xac, 0x4a, 0x95, 0x60, 0x56, 0x60, 0xd4, 0x60, 0x0e, - 0x02, 0xb3, 0x9d, 0xbc, 0x4e, 0x3c, 0x94, 0x63, 0xb8, 0xf1, 0x50, 0x8e, 0xa1, 0xe1, 0x91, 0x1c, - 0xe3, 0x89, 0x47, 0x72, 0x8c, 0x17, 0x1e, 0xc9, 0x31, 0x3e, 0x78, 0x24, 0xc7, 0x18, 0x65, 0x40, - 0x7c, 0x60, 0x58, 0x83, 0xc9, 0x08, 0x86, 0x24, 0x36, 0xb0, 0x17, 0x8d, 0x01, 0x01, 0x00, 0x00, - 0xff, 0xff, 0xea, 0xac, 0x78, 0x9a, 0x49, 0x01, 0x00, 0x00, + 0x3a, 0xa5, 0x39, 0x4c, 0x5c, 0x5c, 0x2e, 0x70, 0xcd, 0x42, 0xb2, 0x5c, 0x5c, 0xb9, 0xa9, 0x29, + 0x99, 0x89, 0xf1, 0x20, 0x3d, 0x12, 0x8c, 0x0a, 0x8c, 0x1a, 0x9c, 0x41, 0x9c, 0x60, 0x91, 0x90, + 0xca, 0x82, 0x54, 0x21, 0x2f, 0x2e, 0xb6, 0x94, 0xcc, 0xf4, 0xd4, 0xe2, 0x12, 0x09, 0x26, 0x90, + 0x94, 0x93, 0xd1, 0x89, 0x7b, 0xf2, 0x0c, 0xb7, 0xee, 0xc9, 0x6b, 0x21, 0x39, 0x35, 0xbf, 0x20, + 0x35, 0x0f, 0x6e, 0x79, 0xb1, 0x7e, 0x7a, 0xbe, 0x2e, 0x44, 0x8b, 0x9e, 0x0b, 0x98, 0x0a, 0x82, + 0x9a, 0x20, 0x24, 0xc4, 0xc5, 0x52, 0x9c, 0x59, 0x95, 0x2a, 0xc1, 0xac, 0xc0, 0xa8, 0xc1, 0x1c, + 0x04, 0x66, 0x0b, 0xf9, 0x73, 0x71, 0x27, 0xe6, 0xe5, 0xe5, 0x97, 0x24, 0x96, 0x64, 0xe6, 0xe7, + 0x15, 0x4b, 0xb0, 0x2a, 0x30, 0x6b, 0x70, 0x1b, 0xe9, 0xea, 0xa1, 0xfb, 0x45, 0x0f, 0xe1, 0x62, + 0x3d, 0x47, 0x84, 0x7a, 0xd7, 0xbc, 0x92, 0xa2, 0xca, 0x20, 0x64, 0x13, 0xa4, 0xec, 0xb8, 0x04, + 0xd0, 0x15, 0x08, 0x09, 0x70, 0x31, 0x67, 0xa7, 0x56, 0x42, 0x3d, 0x07, 0x62, 0x0a, 0x89, 0x70, + 0xb1, 0x96, 0x25, 0xe6, 0x94, 0xa6, 0x42, 0x7c, 0x15, 0x04, 0xe1, 0x58, 0x31, 0x59, 0x30, 0x3a, + 0x79, 0x9d, 0x78, 0x28, 0xc7, 0x70, 0xe3, 0xa1, 0x1c, 0x43, 0xc3, 0x23, 0x39, 0xc6, 0x13, 0x8f, + 0xe4, 0x18, 0x2f, 0x3c, 0x92, 0x63, 0x7c, 0xf0, 0x48, 0x8e, 0x31, 0xca, 0x80, 0xf8, 0xd8, 0xb1, + 0x06, 0x93, 0x11, 0x0c, 0x49, 0x6c, 0xe0, 0x30, 0x37, 0x06, 0x04, 0x00, 0x00, 0xff, 0xff, 0x22, + 0x8a, 0x20, 0x4a, 0xda, 0x01, 0x00, 0x00, } diff --git a/vendor/github.com/containerd/containerd/api/types/descriptor.proto b/vendor/github.com/containerd/containerd/api/types/descriptor.proto index 5c00dca4f..6d90a1626 100644 --- a/vendor/github.com/containerd/containerd/api/types/descriptor.proto +++ b/vendor/github.com/containerd/containerd/api/types/descriptor.proto @@ -15,4 +15,5 @@ message Descriptor { string media_type = 1; string digest = 2 [(gogoproto.customtype) = "github.com/opencontainers/go-digest.Digest", (gogoproto.nullable) = false]; int64 size = 3; + map<string, string> annotations = 5; } diff --git a/vendor/github.com/containerd/containerd/mount/mountinfo_linux.go b/vendor/github.com/containerd/containerd/mount/mountinfo_linux.go index a986f8f4e..a7407c50e 100644 --- a/vendor/github.com/containerd/containerd/mount/mountinfo_linux.go +++ b/vendor/github.com/containerd/containerd/mount/mountinfo_linux.go @@ -25,6 +25,8 @@ import ( "os" "strconv" "strings" + + "github.com/pkg/errors" ) // Self retrieves a list of mounts for the current running process. @@ -41,13 +43,15 @@ func Self() ([]Info, error) { func parseInfoFile(r io.Reader) ([]Info, error) { s := bufio.NewScanner(r) out := []Info{} - + var err error for s.Scan() { - if err := s.Err(); err != nil { + if err = s.Err(); err != nil { return nil, err } /* + See http://man7.org/linux/man-pages/man5/proc.5.html + 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) (1) mount ID: unique identifier of the mount (may be reused after umount) @@ -68,7 +72,7 @@ func parseInfoFile(r io.Reader) ([]Info, error) { numFields := len(fields) if numFields < 10 { // should be at least 10 fields - return nil, fmt.Errorf("Parsing '%s' failed: not enough fields (%d)", text, numFields) + return nil, errors.Errorf("parsing '%s' failed: not enough fields (%d)", text, numFields) } p := Info{} // ignore any numbers parsing errors, as there should not be any @@ -76,13 +80,19 @@ func parseInfoFile(r io.Reader) ([]Info, error) { p.Parent, _ = strconv.Atoi(fields[1]) mm := strings.Split(fields[2], ":") if len(mm) != 2 { - return nil, fmt.Errorf("Parsing '%s' failed: unexpected minor:major pair %s", text, mm) + return nil, errors.Errorf("parsing '%s' failed: unexpected minor:major pair %s", text, mm) } p.Major, _ = strconv.Atoi(mm[0]) p.Minor, _ = strconv.Atoi(mm[1]) - p.Root = fields[3] - p.Mountpoint = fields[4] + p.Root, err = strconv.Unquote(`"` + fields[3] + `"`) + if err != nil { + return nil, errors.Wrapf(err, "parsing '%s' failed: unable to unquote root field", fields[3]) + } + p.Mountpoint, err = strconv.Unquote(`"` + fields[4] + `"`) + if err != nil { + return nil, errors.Wrapf(err, "parsing '%s' failed: unable to unquote mount point field", fields[4]) + } p.Options = fields[5] // one or more optional fields, when a separator (-) @@ -101,11 +111,11 @@ func parseInfoFile(r io.Reader) ([]Info, error) { } } if i == numFields { - return nil, fmt.Errorf("Parsing '%s' failed: missing separator ('-')", text) + return nil, errors.Errorf("parsing '%s' failed: missing separator ('-')", text) } // There should be 3 fields after the separator... if i+4 > numFields { - return nil, fmt.Errorf("Parsing '%s' failed: not enough fields after a separator", text) + return nil, errors.Errorf("parsing '%s' failed: not enough fields after a separator", text) } // ... but in Linux <= 3.9 mounting a cifs with spaces in a share name // (like "//serv/My Documents") _may_ end up having a space in the last field diff --git a/vendor/github.com/containerd/containerd/runtime/runtime.go b/vendor/github.com/containerd/containerd/runtime/runtime.go index 1b3f87c15..5a3f654bd 100644 --- a/vendor/github.com/containerd/containerd/runtime/runtime.go +++ b/vendor/github.com/containerd/containerd/runtime/runtime.go @@ -69,4 +69,8 @@ type PlatformRuntime interface { // Tasks returns all the current tasks for the runtime. // Any container runs at most one task at a time. Tasks(context.Context, bool) ([]Task, error) + // Add adds a task into runtime. + Add(context.Context, Task) error + // Delete remove a task. + Delete(context.Context, string) } diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/deleted_state.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/deleted_state.go index bc9aefb7f..fe9d7bf55 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/deleted_state.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/deleted_state.go @@ -22,6 +22,7 @@ import ( "context" "github.com/containerd/console" + "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/runtime/proc" google_protobuf "github.com/gogo/protobuf/types" "github.com/pkg/errors" @@ -55,11 +56,11 @@ func (s *deletedState) Start(ctx context.Context) error { } func (s *deletedState) Delete(ctx context.Context) error { - return errors.Errorf("cannot delete a deleted process") + return errors.Wrap(errdefs.ErrNotFound, "cannot delete a deleted process") } func (s *deletedState) Kill(ctx context.Context, sig uint32, all bool) error { - return errors.Errorf("cannot kill a deleted process") + return errors.Wrap(errdefs.ErrNotFound, "cannot kill a deleted process") } func (s *deletedState) SetExited(status int) { @@ -69,7 +70,3 @@ func (s *deletedState) SetExited(status int) { func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) { return nil, errors.Errorf("cannot exec in a deleted state") } - -func (s *deletedState) Pid() int { - return -1 -} diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec.go index cefce6cc3..5dbfbaf62 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec.go @@ -49,7 +49,7 @@ type execProcess struct { io runc.IO status int exited time.Time - pid int + pid *safePid closers []io.Closer stdin io.Closer stdio proc.Stdio @@ -69,11 +69,7 @@ func (e *execProcess) ID() string { } func (e *execProcess) Pid() int { - return e.execState.Pid() -} - -func (e *execProcess) pidv() int { - return e.pid + return e.pid.get() } func (e *execProcess) ExitStatus() int { @@ -145,7 +141,7 @@ func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error { } func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error { - pid := e.pid + pid := e.pid.get() if pid != 0 { if err := unix.Kill(pid, syscall.Signal(sig)); err != nil { return errors.Wrapf(checkKillError(err), "exec kill error") @@ -170,6 +166,12 @@ func (e *execProcess) Start(ctx context.Context) error { } func (e *execProcess) start(ctx context.Context) (err error) { + // The reaper may receive exit signal right after + // the container is started, before the e.pid is updated. + // In that case, we want to block the signal handler to + // access e.pid until it is updated. + e.pid.Lock() + defer e.pid.Unlock() var ( socket *runc.Socket pidfile = filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id)) @@ -201,7 +203,7 @@ func (e *execProcess) start(ctx context.Context) (err error) { return e.parent.runtimeError(err, "OCI runtime exec failed") } if e.stdio.Stdin != "" { - sc, err := fifo.OpenFifo(ctx, e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + sc, err := fifo.OpenFifo(context.Background(), e.stdio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) if err != nil { return errors.Wrapf(err, "failed to open stdin fifo %s", e.stdio.Stdin) } @@ -210,29 +212,26 @@ func (e *execProcess) start(ctx context.Context) (err error) { } var copyWaitGroup sync.WaitGroup ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() if socket != nil { console, err := socket.ReceiveMaster() if err != nil { - cancel() return errors.Wrap(err, "failed to retrieve console master") } if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - cancel() return errors.Wrap(err, "failed to start console copy") } } else if !e.stdio.IsNull() { if err := copyPipes(ctx, e.io, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg, ©WaitGroup); err != nil { - cancel() return errors.Wrap(err, "failed to start io pipe copy") } } copyWaitGroup.Wait() pid, err := runc.ReadPidFile(opts.PidFile) if err != nil { - cancel() return errors.Wrap(err, "failed to retrieve OCI runtime exec pid") } - e.pid = pid + e.pid.pid = pid return nil } @@ -250,11 +249,11 @@ func (e *execProcess) Status(ctx context.Context) (string, error) { e.mu.Lock() defer e.mu.Unlock() // if we don't have a pid then the exec process has just been created - if e.pid == 0 { + if e.pid.get() == 0 { return "created", nil } // if we have a pid and it can be signaled, the process is running - if err := unix.Kill(e.pid, 0); err == nil { + if err := unix.Kill(e.pid.get(), 0); err == nil { return "running", nil } // else if we have a pid but it can nolonger be signaled, it has stopped diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec_state.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec_state.go index 0e0bc3fcf..12489501b 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec_state.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/exec_state.go @@ -31,7 +31,6 @@ type execState interface { Delete(context.Context) error Kill(context.Context, uint32, bool) error SetExited(int) - Pid() int } type execCreatedState struct { @@ -83,12 +82,6 @@ func (s *execCreatedState) SetExited(status int) { } } -func (s *execCreatedState) Pid() int { - s.p.mu.Lock() - defer s.p.mu.Unlock() - return s.p.pidv() -} - type execRunningState struct { p *execProcess } @@ -127,12 +120,6 @@ func (s *execRunningState) SetExited(status int) { } } -func (s *execRunningState) Pid() int { - s.p.mu.Lock() - defer s.p.mu.Unlock() - return s.p.pidv() -} - type execStoppedState struct { p *execProcess } @@ -170,7 +157,3 @@ func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error func (s *execStoppedState) SetExited(status int) { // no op } - -func (s *execStoppedState) Pid() int { - return s.p.pidv() -} diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init.go index c76bcfe43..b72f03bd1 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init.go @@ -160,7 +160,7 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) error { return p.runtimeError(err, "OCI runtime create failed") } if r.Stdin != "" { - sc, err := fifo.OpenFifo(ctx, r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) if err != nil { return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin) } @@ -169,21 +169,19 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) error { } var copyWaitGroup sync.WaitGroup ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() if socket != nil { console, err := socket.ReceiveMaster() if err != nil { - cancel() return errors.Wrap(err, "failed to retrieve console master") } console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup) if err != nil { - cancel() return errors.Wrap(err, "failed to start console copy") } p.console = console } else if !hasNoIO(r) { if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, ©WaitGroup); err != nil { - cancel() return errors.Wrap(err, "failed to start io pipe copy") } } @@ -191,7 +189,6 @@ func (p *Init) Create(ctx context.Context, r *CreateConfig) error { copyWaitGroup.Wait() pid, err := runc.ReadPidFile(pidFile) if err != nil { - cancel() return errors.Wrap(err, "failed to retrieve OCI runtime container pid") } p.pid = pid @@ -409,6 +406,7 @@ func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Proce Terminal: r.Terminal, }, waitBlock: make(chan struct{}), + pid: &safePid{}, } e.execState = &execCreatedState{p: e} return e, nil diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init_state.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init_state.go index 37798de20..62cbe38cd 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init_state.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/init_state.go @@ -172,7 +172,7 @@ func (s *createdCheckpointState) Start(ctx context.Context) error { return p.runtimeError(err, "OCI runtime restore failed") } if sio.Stdin != "" { - sc, err := fifo.OpenFifo(ctx, sio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) + sc, err := fifo.OpenFifo(context.Background(), sio.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0) if err != nil { return errors.Wrapf(err, "failed to open stdin fifo %s", sio.Stdin) } diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/io.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/io.go index 71f6ee1bb..7091e869c 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/io.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/io.go @@ -24,8 +24,10 @@ import ( "io" "os" "sync" + "sync/atomic" "syscall" + "github.com/containerd/containerd/log" "github.com/containerd/fifo" runc "github.com/containerd/go-runc" ) @@ -38,7 +40,7 @@ var bufPool = sync.Pool{ } func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error { - var sameFile io.WriteCloser + var sameFile *countingWriteCloser for _, i := range []struct { name string dest func(wc io.WriteCloser, rc io.Closer) @@ -52,7 +54,9 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w cwg.Done() p := bufPool.Get().(*[]byte) defer bufPool.Put(p) - io.CopyBuffer(wc, rio.Stdout(), *p) + if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil { + log.G(ctx).Warn("error copying stdout") + } wg.Done() wc.Close() if rc != nil { @@ -69,7 +73,9 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w cwg.Done() p := bufPool.Get().(*[]byte) defer bufPool.Put(p) - io.CopyBuffer(wc, rio.Stderr(), *p) + if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil { + log.G(ctx).Warn("error copying stderr") + } wg.Done() wc.Close() if rc != nil { @@ -96,6 +102,7 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w } } else { if sameFile != nil { + sameFile.count++ i.dest(sameFile, nil) continue } @@ -103,7 +110,10 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w return fmt.Errorf("containerd-shim: opening %s failed: %s", i.name, err) } if stdout == stderr { - sameFile = fw + sameFile = &countingWriteCloser{ + WriteCloser: fw, + count: 1, + } } } i.dest(fw, fr) @@ -111,7 +121,7 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w if stdin == "" { return nil } - f, err := fifo.OpenFifo(ctx, stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) if err != nil { return fmt.Errorf("containerd-shim: opening %s failed: %s", stdin, err) } @@ -128,6 +138,19 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w return nil } +// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times. +type countingWriteCloser struct { + io.WriteCloser + count int64 +} + +func (c *countingWriteCloser) Close() error { + if atomic.AddInt64(&c.count, -1) > 0 { + return nil + } + return c.WriteCloser.Close() +} + // isFifo checks if a file is a fifo // if the file does not exist then it returns false func isFifo(path string) (bool, error) { diff --git a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/utils.go b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/utils.go index 04baae0f7..075ef2617 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/utils.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/linux/proc/utils.go @@ -23,6 +23,7 @@ import ( "io" "os" "strings" + "sync" "time" "github.com/containerd/containerd/errdefs" @@ -31,6 +32,18 @@ import ( "golang.org/x/sys/unix" ) +// safePid is a thread safe wrapper for pid. +type safePid struct { + sync.Mutex + pid int +} + +func (s *safePid) get() int { + s.Lock() + defer s.Unlock() + return s.pid +} + // TODO(mlaventure): move to runc package? func getLastRuntimeError(r *runc.Runc) (string, error) { if r.Log == "" { diff --git a/vendor/github.com/containerd/containerd/runtime/v1/shim/service.go b/vendor/github.com/containerd/containerd/runtime/v1/shim/service.go index df6d8b64e..3f89b41a3 100644 --- a/vendor/github.com/containerd/containerd/runtime/v1/shim/service.go +++ b/vendor/github.com/containerd/containerd/runtime/v1/shim/service.go @@ -545,7 +545,7 @@ func shouldKillAllOnExit(bundlePath string) (bool, error) { if bundleSpec.Linux != nil { for _, ns := range bundleSpec.Linux.Namespaces { - if ns.Type == specs.PIDNamespace { + if ns.Type == specs.PIDNamespace && ns.Path == "" { return false, nil } } diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go index 937aaaf0d..db639139f 100644 --- a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_unix.go @@ -72,9 +72,9 @@ func serveListener(path string) (net.Listener, error) { func handleSignals(logger *logrus.Entry, signals chan os.Signal) error { logger.Info("starting signal loop") + for { - select { - case s := <-signals: + for s := range signals { switch s { case unix.SIGCHLD: if err := Reap(); err != nil { diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go index 6e5642620..614e62974 100644 --- a/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/shim_windows.go @@ -27,7 +27,6 @@ import ( "os" "os/exec" "sync" - "syscall" "unsafe" winio "github.com/Microsoft/go-winio" @@ -40,10 +39,6 @@ import ( "golang.org/x/sys/windows" ) -const ( - errorConnectionAborted syscall.Errno = 1236 -) - // setupSignals creates a new signal handler for all signals func setupSignals() (chan os.Signal, error) { signals := make(chan os.Signal, 32) @@ -113,12 +108,11 @@ func serveListener(path string) (net.Listener, error) { func handleSignals(logger *logrus.Entry, signals chan os.Signal) error { logger.Info("starting signal loop") + for { - select { - case s := <-signals: + for s := range signals { switch s { case os.Interrupt: - break } } } @@ -209,7 +203,7 @@ func (dswl *deferredShimWriteLogger) beginAccept() { dswl.mu.Unlock() c, err := dswl.l.Accept() - if err == errorConnectionAborted { + if err == winio.ErrPipeListenerClosed { dswl.mu.Lock() dswl.aborted = true dswl.l.Close() diff --git a/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go b/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go index 986fc754b..594a0f75b 100644 --- a/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go +++ b/vendor/github.com/containerd/containerd/runtime/v2/shim/util_windows.go @@ -54,7 +54,7 @@ func AnonDialer(address string, timeout time.Duration) (net.Conn, error) { timedOutError := errors.Errorf("timed out waiting for npipe %s", address) start := time.Now() for { - remaining := timeout - time.Now().Sub(start) + remaining := timeout - time.Since(start) if remaining <= 0 { lastError = timedOutError break @@ -71,7 +71,7 @@ func AnonDialer(address string, timeout time.Duration) (net.Conn, error) { // serve it within 5 seconds. We use the passed in timeout for the // `DialPipe` timeout if the pipe exists however to give the pipe time // to `Accept` the connection. - if time.Now().Sub(start) >= 5*time.Second { + if time.Since(start) >= 5*time.Second { lastError = timedOutError break } diff --git a/vendor/github.com/containerd/containerd/vendor.conf b/vendor/github.com/containerd/containerd/vendor.conf index 25b1a01bd..f61ba048a 100644 --- a/vendor/github.com/containerd/containerd/vendor.conf +++ b/vendor/github.com/containerd/containerd/vendor.conf @@ -1,6 +1,6 @@ github.com/containerd/go-runc 5a6d9f37cfa36b15efba46dc7ea349fa9b7143c3 github.com/containerd/console c12b1e7919c14469339a5d38f2f8ed9b64a9de23 -github.com/containerd/cgroups 5e610833b72089b37d0e615de9a92dfc043757c2 +github.com/containerd/cgroups 4994991857f9b0ae8dc439551e8bebdbb4bf66c1 github.com/containerd/typeurl a93fcdb778cd272c6e9b3028b2f42d813e785d40 github.com/containerd/fifo 3d5202aec260678c48179c56f40e6f38a095738c github.com/containerd/btrfs 2e1aa0ddf94f91fa282b6ed87c23bf0d64911244 @@ -20,14 +20,14 @@ github.com/gogo/protobuf v1.0.0 github.com/gogo/googleapis 08a7655d27152912db7aaf4f983275eaf8d128ef github.com/golang/protobuf v1.1.0 github.com/opencontainers/runtime-spec eba862dc2470385a233c7507392675cbeadf7353 # v1.0.1-45-geba862d -github.com/opencontainers/runc 96ec2177ae841256168fcf76954f7177af9446eb +github.com/opencontainers/runc v1.0.0-rc8 github.com/sirupsen/logrus v1.0.0 github.com/urfave/cli 7bc6a0acffa589f415f88aca16cc1de5ffd66f9c golang.org/x/net b3756b4b77d7b13260a0a2ec658753cf48922eac google.golang.org/grpc v1.12.0 github.com/pkg/errors v0.8.0 github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7 -golang.org/x/sys 1b2967e3c290b7c545b3db0deeda16e9be4f98a2 https://github.com/golang/sys +golang.org/x/sys 41f3e6584952bb034a481797859f6ab34b6803bd https://github.com/golang/sys github.com/opencontainers/image-spec v1.0.1 golang.org/x/sync 450f422ab23cf9881c94e2db30cac0eb1b7cf80c github.com/BurntSushi/toml a368813c5e648fee92e5f6c30e3944ff9d5e8895 @@ -36,20 +36,20 @@ github.com/Microsoft/go-winio v0.4.11 github.com/Microsoft/hcsshim v0.8.1 google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944 golang.org/x/text 19e51611da83d6be54ddafce4a4af510cb3e9ea4 -github.com/containerd/ttrpc 2a805f71863501300ae1976d29f0454ae003e85a +github.com/containerd/ttrpc 699c4e40d1e7416e08bf7019c7ce2e9beced4636 github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 gotest.tools v2.1.0 github.com/google/go-cmp v0.1.0 go.etcd.io/bbolt v1.3.1-etcd.8 # cri dependencies -github.com/containerd/cri 0d5cabd006cb5319dc965046067b8432d9fa5ef8 # release/1.2 branch +github.com/containerd/cri eb926cd79d3bac188dcc4ed7694fc9298f8831be # release/1.2 branch github.com/containerd/go-cni 40bcf8ec8acd7372be1d77031d585d5d8e561c90 github.com/blang/semver v3.1.0 github.com/containernetworking/cni v0.6.0 -github.com/containernetworking/plugins v0.7.0 +github.com/containernetworking/plugins v0.7.5 github.com/davecgh/go-spew v1.1.0 -github.com/docker/distribution b38e5838b7b2f2ad48e06ec4b500011976080621 +github.com/docker/distribution 0d3efadf0154c2b8a4e7b6621fff9809655cc580 github.com/docker/docker 86f080cff0914e9694068ed78d503701667c4c00 github.com/docker/spdystream 449fdfce4d962303d702fec724ef0ad181c92528 github.com/emicklei/go-restful v2.2.1 @@ -62,7 +62,7 @@ github.com/json-iterator/go 1.1.5 github.com/modern-go/reflect2 1.0.1 github.com/modern-go/concurrent 1.0.3 github.com/opencontainers/runtime-tools v0.6.0 -github.com/opencontainers/selinux b6fa367ed7f534f9ba25391cc2d467085dbb445a +github.com/opencontainers/selinux v1.2.2 github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 github.com/tchap/go-patricia v2.2.6 github.com/xeipuuv/gojsonpointer 4e3ac2762d5f479393488629ee9370b50873b3a6 diff --git a/vendor/github.com/containerd/ttrpc/README.md b/vendor/github.com/containerd/ttrpc/README.md index d1eed6b12..c345c844e 100644 --- a/vendor/github.com/containerd/ttrpc/README.md +++ b/vendor/github.com/containerd/ttrpc/README.md @@ -50,3 +50,13 @@ TODO: - [ ] Document protocol layout - [ ] Add testing under concurrent load to ensure - [ ] Verify connection error handling + +# Project details + +ttrpc is a containerd sub-project, licensed under the [Apache 2.0 license](./LICENSE). +As a containerd sub-project, you will find the: + * [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md), + * [Maintainers](https://github.com/containerd/project/blob/master/MAINTAINERS), + * and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md) + +information in our [`containerd/project`](https://github.com/containerd/project) repository. diff --git a/vendor/github.com/containerd/ttrpc/client.go b/vendor/github.com/containerd/ttrpc/client.go index e40592dd7..35ca91fba 100644 --- a/vendor/github.com/containerd/ttrpc/client.go +++ b/vendor/github.com/containerd/ttrpc/client.go @@ -24,6 +24,7 @@ import ( "strings" "sync" "syscall" + "time" "github.com/gogo/protobuf/proto" "github.com/pkg/errors" @@ -48,7 +49,15 @@ type Client struct { err error } -func NewClient(conn net.Conn) *Client { +type ClientOpts func(c *Client) + +func WithOnClose(onClose func()) ClientOpts { + return func(c *Client) { + c.closeFunc = onClose + } +} + +func NewClient(conn net.Conn, opts ...ClientOpts) *Client { c := &Client{ codec: codec{}, conn: conn, @@ -59,6 +68,10 @@ func NewClient(conn net.Conn) *Client { closeFunc: func() {}, } + for _, o := range opts { + o(c) + } + go c.run() return c } @@ -86,6 +99,10 @@ func (c *Client) Call(ctx context.Context, service, method string, req, resp int cresp = &Response{} ) + if dl, ok := ctx.Deadline(); ok { + creq.TimeoutNano = dl.Sub(time.Now()).Nanoseconds() + } + if err := c.dispatch(ctx, creq, cresp); err != nil { return err } @@ -104,6 +121,7 @@ func (c *Client) Call(ctx context.Context, service, method string, req, resp int func (c *Client) dispatch(ctx context.Context, req *Request, resp *Response) error { errs := make(chan error, 1) call := &callRequest{ + ctx: ctx, req: req, resp: resp, errs: errs, @@ -135,11 +153,6 @@ func (c *Client) Close() error { return nil } -// OnClose allows a close func to be called when the server is closed -func (c *Client) OnClose(closer func()) { - c.closeFunc = closer -} - type message struct { messageHeader p []byte @@ -249,7 +262,7 @@ func (c *Client) recv(resp *Response, msg *message) error { } if msg.Type != messageTypeResponse { - return errors.New("unkown message type received") + return errors.New("unknown message type received") } defer c.channel.putmbuf(msg.p) diff --git a/vendor/github.com/containerd/ttrpc/server.go b/vendor/github.com/containerd/ttrpc/server.go index 263cb4583..40804eac0 100644 --- a/vendor/github.com/containerd/ttrpc/server.go +++ b/vendor/github.com/containerd/ttrpc/server.go @@ -414,6 +414,9 @@ func (c *serverConn) run(sctx context.Context) { case request := <-requests: active++ go func(id uint32) { + ctx, cancel := getRequestContext(ctx, request.req) + defer cancel() + p, status := c.server.services.call(ctx, request.req.Service, request.req.Method, request.req.Payload) resp := &Response{ Status: status.Proto(), @@ -454,3 +457,15 @@ func (c *serverConn) run(sctx context.Context) { } } } + +var noopFunc = func() {} + +func getRequestContext(ctx context.Context, req *Request) (retCtx context.Context, cancel func()) { + cancel = noopFunc + if req.TimeoutNano == 0 { + return ctx, cancel + } + + ctx, cancel = context.WithTimeout(ctx, time.Duration(req.TimeoutNano)) + return ctx, cancel +} diff --git a/vendor/github.com/containerd/ttrpc/services.go b/vendor/github.com/containerd/ttrpc/services.go index e90963825..fe1cade5a 100644 --- a/vendor/github.com/containerd/ttrpc/services.go +++ b/vendor/github.com/containerd/ttrpc/services.go @@ -76,7 +76,7 @@ func (s *serviceSet) dispatch(ctx context.Context, serviceName, methodName strin switch v := obj.(type) { case proto.Message: if err := proto.Unmarshal(p, v); err != nil { - return status.Errorf(codes.Internal, "ttrpc: error unmarshaling payload: %v", err.Error()) + return status.Errorf(codes.Internal, "ttrpc: error unmarshalling payload: %v", err.Error()) } default: return status.Errorf(codes.Internal, "ttrpc: error unsupported request type: %T", v) diff --git a/vendor/github.com/containerd/ttrpc/types.go b/vendor/github.com/containerd/ttrpc/types.go index 1f7969e5c..a6b3b818e 100644 --- a/vendor/github.com/containerd/ttrpc/types.go +++ b/vendor/github.com/containerd/ttrpc/types.go @@ -23,9 +23,10 @@ import ( ) type Request struct { - Service string `protobuf:"bytes,1,opt,name=service,proto3"` - Method string `protobuf:"bytes,2,opt,name=method,proto3"` - Payload []byte `protobuf:"bytes,3,opt,name=payload,proto3"` + Service string `protobuf:"bytes,1,opt,name=service,proto3"` + Method string `protobuf:"bytes,2,opt,name=method,proto3"` + Payload []byte `protobuf:"bytes,3,opt,name=payload,proto3"` + TimeoutNano int64 `protobuf:"varint,4,opt,name=timeout_nano,proto3"` } func (r *Request) Reset() { *r = Request{} } diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md index e755fb7bc..11fa4138b 100644 --- a/vendor/github.com/opencontainers/runc/README.md +++ b/vendor/github.com/opencontainers/runc/README.md @@ -16,10 +16,9 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. -### Security +## Security -If you wish to report a security issue, please disclose the issue responsibly -to security@opencontainers.org. +Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/) ## Building diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c new file mode 100644 index 000000000..ad10f1406 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c @@ -0,0 +1,516 @@ +/* + * Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2019 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <limits.h> +#include <fcntl.h> +#include <errno.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/vfs.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <sys/sendfile.h> +#include <sys/syscall.h> + +/* Use our own wrapper for memfd_create. */ +#if !defined(SYS_memfd_create) && defined(__NR_memfd_create) +# define SYS_memfd_create __NR_memfd_create +#endif +/* memfd_create(2) flags -- copied from <linux/memfd.h>. */ +#ifndef MFD_CLOEXEC +# define MFD_CLOEXEC 0x0001U +# define MFD_ALLOW_SEALING 0x0002U +#endif +int memfd_create(const char *name, unsigned int flags) +{ +#ifdef SYS_memfd_create + return syscall(SYS_memfd_create, name, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + + +/* This comes directly from <linux/fcntl.h>. */ +#ifndef F_LINUX_SPECIFIC_BASE +# define F_LINUX_SPECIFIC_BASE 1024 +#endif +#ifndef F_ADD_SEALS +# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif +#ifndef F_SEAL_SEAL +# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +# define F_SEAL_GROW 0x0004 /* prevent file from growing */ +# define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY" +#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe" +#define RUNC_MEMFD_SEALS \ + (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE) + +static void *must_realloc(void *ptr, size_t size) +{ + void *old = ptr; + do { + ptr = realloc(old, size); + } while(!ptr); + return ptr; +} + +/* + * Verify whether we are currently in a self-cloned program (namely, is + * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather + * for shmem files), and we want to be sure it's actually sealed. + */ +static int is_self_cloned(void) +{ + int fd, ret, is_cloned = 0; + struct stat statbuf = {}; + struct statfs fsbuf = {}; + + fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -ENOTRECOVERABLE; + + /* + * Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for + * this, because you cannot write to a sealed memfd no matter what (so + * sharing it isn't a bad thing -- and an admin could bind-mount a sealed + * memfd to /usr/bin/runc to allow re-use). + */ + ret = fcntl(fd, F_GET_SEALS); + if (ret >= 0) { + is_cloned = (ret == RUNC_MEMFD_SEALS); + goto out; + } + + /* + * All other forms require CLONED_BINARY_ENV, since they are potentially + * writeable (or we can't tell if they're fully safe) and thus we must + * check the environment as an extra layer of defence. + */ + if (!getenv(CLONED_BINARY_ENV)) { + is_cloned = false; + goto out; + } + + /* + * Is the binary on a read-only filesystem? We can't detect bind-mounts in + * particular (in-kernel they are identical to regular mounts) but we can + * at least be sure that it's read-only. In addition, to make sure that + * it's *our* bind-mount we check CLONED_BINARY_ENV. + */ + if (fstatfs(fd, &fsbuf) >= 0) + is_cloned |= (fsbuf.f_flags & MS_RDONLY); + + /* + * Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6 + * which appears to have a borked backport of F_GET_SEALS. Either way, + * having a file which has no hardlinks indicates that we aren't using + * a host-side "runc" binary and this is something that a container + * cannot fake (because unlinking requires being able to resolve the + * path that you want to unlink). + */ + if (fstat(fd, &statbuf) >= 0) + is_cloned |= (statbuf.st_nlink == 0); + +out: + close(fd); + return is_cloned; +} + +/* Read a given file into a new buffer, and providing the length. */ +static char *read_file(char *path, size_t *length) +{ + int fd; + char buf[4096], *copy = NULL; + + if (!length) + return NULL; + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return NULL; + + *length = 0; + for (;;) { + ssize_t n; + + n = read(fd, buf, sizeof(buf)); + if (n < 0) + goto error; + if (!n) + break; + + copy = must_realloc(copy, (*length + n) * sizeof(*copy)); + memcpy(copy + *length, buf, n); + *length += n; + } + close(fd); + return copy; + +error: + close(fd); + free(copy); + return NULL; +} + +/* + * A poor-man's version of "xargs -0". Basically parses a given block of + * NUL-delimited data, within the given length and adds a pointer to each entry + * to the array of pointers. + */ +static int parse_xargs(char *data, int data_length, char ***output) +{ + int num = 0; + char *cur = data; + + if (!data || *output != NULL) + return -1; + + while (cur < data + data_length) { + num++; + *output = must_realloc(*output, (num + 1) * sizeof(**output)); + (*output)[num - 1] = cur; + cur += strlen(cur) + 1; + } + (*output)[num] = NULL; + return num; +} + +/* + * "Parse" out argv from /proc/self/cmdline. + * This is necessary because we are running in a context where we don't have a + * main() that we can just get the arguments from. + */ +static int fetchve(char ***argv) +{ + char *cmdline = NULL; + size_t cmdline_size; + + cmdline = read_file("/proc/self/cmdline", &cmdline_size); + if (!cmdline) + goto error; + + if (parse_xargs(cmdline, cmdline_size, argv) <= 0) + goto error; + + return 0; + +error: + free(cmdline); + return -EINVAL; +} + +enum { + EFD_NONE = 0, + EFD_MEMFD, + EFD_FILE, +}; + +/* + * This comes from <linux/fcntl.h>. We can't hard-code __O_TMPFILE because it + * changes depending on the architecture. If we don't have O_TMPFILE we always + * have the mkostemp(3) fallback. + */ +#ifndef O_TMPFILE +# if defined(__O_TMPFILE) && defined(O_DIRECTORY) +# define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +# endif +#endif + +static int make_execfd(int *fdtype) +{ + int fd = -1; + char template[PATH_MAX] = {0}; + char *prefix = getenv("_LIBCONTAINER_STATEDIR"); + + if (!prefix || *prefix != '/') + prefix = "/tmp"; + if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0) + return -1; + + /* + * Now try memfd, it's much nicer than actually creating a file in STATEDIR + * since it's easily detected thanks to sealing and also doesn't require + * assumptions about STATEDIR. + */ + *fdtype = EFD_MEMFD; + fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (fd >= 0) + return fd; + if (errno != ENOSYS && errno != EINVAL) + goto error; + +#ifdef O_TMPFILE + /* + * Try O_TMPFILE to avoid races where someone might snatch our file. Note + * that O_EXCL isn't actually a security measure here (since you can just + * fd re-open it and clear O_EXCL). + */ + *fdtype = EFD_FILE; + fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700); + if (fd >= 0) { + struct stat statbuf = {}; + bool working_otmpfile = false; + + /* + * open(2) ignores unknown O_* flags -- yeah, I was surprised when I + * found this out too. As a result we can't check for EINVAL. However, + * if we get nlink != 0 (or EISDIR) then we know that this kernel + * doesn't support O_TMPFILE. + */ + if (fstat(fd, &statbuf) >= 0) + working_otmpfile = (statbuf.st_nlink == 0); + + if (working_otmpfile) + return fd; + + /* Pretend that we got EISDIR since O_TMPFILE failed. */ + close(fd); + errno = EISDIR; + } + if (errno != EISDIR) + goto error; +#endif /* defined(O_TMPFILE) */ + + /* + * Our final option is to create a temporary file the old-school way, and + * then unlink it so that nothing else sees it by accident. + */ + *fdtype = EFD_FILE; + fd = mkostemp(template, O_CLOEXEC); + if (fd >= 0) { + if (unlink(template) >= 0) + return fd; + close(fd); + } + +error: + *fdtype = EFD_NONE; + return -1; +} + +static int seal_execfd(int *fd, int fdtype) +{ + switch (fdtype) { + case EFD_MEMFD: + return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS); + case EFD_FILE: { + /* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */ + int newfd; + char fdpath[PATH_MAX] = {0}; + + if (fchmod(*fd, 0100) < 0) + return -1; + + if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0) + return -1; + + newfd = open(fdpath, O_PATH | O_CLOEXEC); + if (newfd < 0) + return -1; + + close(*fd); + *fd = newfd; + return 0; + } + default: + break; + } + return -1; +} + +static int try_bindfd(void) +{ + int fd, ret = -1; + char template[PATH_MAX] = {0}; + char *prefix = getenv("_LIBCONTAINER_STATEDIR"); + + if (!prefix || *prefix != '/') + prefix = "/tmp"; + if (snprintf(template, sizeof(template), "%s/runc.XXXXXX", prefix) < 0) + return ret; + + /* + * We need somewhere to mount it, mounting anything over /proc/self is a + * BAD idea on the host -- even if we do it temporarily. + */ + fd = mkstemp(template); + if (fd < 0) + return ret; + close(fd); + + /* + * For obvious reasons this won't work in rootless mode because we haven't + * created a userns+mntns -- but getting that to work will be a bit + * complicated and it's only worth doing if someone actually needs it. + */ + ret = -EPERM; + if (mount("/proc/self/exe", template, "", MS_BIND, "") < 0) + goto out; + if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0) + goto out_umount; + + + /* Get read-only handle that we're sure can't be made read-write. */ + ret = open(template, O_PATH | O_CLOEXEC); + +out_umount: + /* + * Make sure the MNT_DETACH works, otherwise we could get remounted + * read-write and that would be quite bad (the fd would be made read-write + * too, invalidating the protection). + */ + if (umount2(template, MNT_DETACH) < 0) { + if (ret >= 0) + close(ret); + ret = -ENOTRECOVERABLE; + } + +out: + /* + * We don't care about unlink errors, the worst that happens is that + * there's an empty file left around in STATEDIR. + */ + unlink(template); + return ret; +} + +static ssize_t fd_to_fd(int outfd, int infd) +{ + ssize_t total = 0; + char buffer[4096]; + + for (;;) { + ssize_t nread, nwritten = 0; + + nread = read(infd, buffer, sizeof(buffer)); + if (nread < 0) + return -1; + if (!nread) + break; + + do { + ssize_t n = write(outfd, buffer + nwritten, nread - nwritten); + if (n < 0) + return -1; + nwritten += n; + } while(nwritten < nread); + + total += nwritten; + } + + return total; +} + +static int clone_binary(void) +{ + int binfd, execfd; + struct stat statbuf = {}; + size_t sent = 0; + int fdtype = EFD_NONE; + + /* + * Before we resort to copying, let's try creating an ro-binfd in one shot + * by getting a handle for a read-only bind-mount of the execfd. + */ + execfd = try_bindfd(); + if (execfd >= 0) + return execfd; + + /* + * Dammit, that didn't work -- time to copy the binary to a safe place we + * can seal the contents. + */ + execfd = make_execfd(&fdtype); + if (execfd < 0 || fdtype == EFD_NONE) + return -ENOTRECOVERABLE; + + binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); + if (binfd < 0) + goto error; + + if (fstat(binfd, &statbuf) < 0) + goto error_binfd; + + while (sent < statbuf.st_size) { + int n = sendfile(execfd, binfd, NULL, statbuf.st_size - sent); + if (n < 0) { + /* sendfile can fail so we fallback to a dumb user-space copy. */ + n = fd_to_fd(execfd, binfd); + if (n < 0) + goto error_binfd; + } + sent += n; + } + close(binfd); + if (sent != statbuf.st_size) + goto error; + + if (seal_execfd(&execfd, fdtype) < 0) + goto error; + + return execfd; + +error_binfd: + close(binfd); +error: + close(execfd); + return -EIO; +} + +/* Get cheap access to the environment. */ +extern char **environ; + +int ensure_cloned_binary(void) +{ + int execfd; + char **argv = NULL; + + /* Check that we're not self-cloned, and if we are then bail. */ + int cloned = is_self_cloned(); + if (cloned > 0 || cloned == -ENOTRECOVERABLE) + return cloned; + + if (fetchve(&argv) < 0) + return -EINVAL; + + execfd = clone_binary(); + if (execfd < 0) + return -EIO; + + if (putenv(CLONED_BINARY_ENV "=1")) + goto error; + + fexecve(execfd, argv, environ); +error: + close(execfd); + return -ENOEXEC; +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 28269dfc0..7750af35e 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -534,6 +534,9 @@ void join_namespaces(char *nslist) free(namespaces); } +/* Defined in cloned_binary.c. */ +extern int ensure_cloned_binary(void); + void nsexec(void) { int pipenum; @@ -549,6 +552,14 @@ void nsexec(void) if (pipenum == -1) return; + /* + * We need to re-exec if we are not in a cloned binary. This is necessary + * to ensure that containers won't be able to access the host binary + * through /proc/self/exe. See CVE-2019-5736. + */ + if (ensure_cloned_binary() < 0) + bail("could not ensure we are a cloned binary"); + /* Parse all of the netlink configuration. */ nl_parse(pipenum, &config); diff --git a/vendor/github.com/opencontainers/runc/vendor.conf b/vendor/github.com/opencontainers/runc/vendor.conf index fadbe0707..22cba0f1b 100644 --- a/vendor/github.com/opencontainers/runc/vendor.conf +++ b/vendor/github.com/opencontainers/runc/vendor.conf @@ -1,10 +1,11 @@ # OCI runtime-spec. When updating this, make sure you use a version tag rather # than a commit ID so it's much more obvious what version of the spec we are # using. -github.com/opencontainers/runtime-spec 5684b8af48c1ac3b1451fa499724e30e3c20a294 +github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # Core libcontainer functionality. +github.com/checkpoint-restore/go-criu v3.11 github.com/mrunalp/fileutils ed869b029674c0e9ce4c0dfa781405c2d9946d08 -github.com/opencontainers/selinux v1.0.0-rc1 +github.com/opencontainers/selinux v1.2.2 github.com/seccomp/libseccomp-golang 84e90a91acea0f4e51e62bc1a75de18b1fc0790f github.com/sirupsen/logrus a3f95b5c423586578a4e099b11a46c2479628cac github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 @@ -18,7 +19,7 @@ github.com/golang/protobuf 18c9bb3261723cd5401db4d0c9fbc5c3b6c70fe8 github.com/cyphar/filepath-securejoin v0.2.1 github.com/docker/go-units v0.2.0 github.com/urfave/cli d53eb991652b1d438abdd34ce4bfa3ef1539108e -golang.org/x/sys 7ddbeae9ae08c6a06a59597f0c9edbc5ff2444ce https://github.com/golang/sys +golang.org/x/sys 41f3e6584952bb034a481797859f6ab34b6803bd https://github.com/golang/sys # console dependencies github.com/containerd/console 2748ece16665b45a47f884001d5831ec79703880 |