diff options
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/bitmap.go | 139 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cgroupfs.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go | 64 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cpuset.go | 114 |
4 files changed, 315 insertions, 4 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/bitmap.go b/pkg/sentry/fsimpl/cgroupfs/bitmap.go new file mode 100644 index 000000000..8074641db --- /dev/null +++ b/pkg/sentry/fsimpl/cgroupfs/bitmap.go @@ -0,0 +1,139 @@ +// Copyright 2021 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cgroupfs + +import ( + "fmt" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/bitmap" +) + +// formatBitmap produces a string representation of b, which lists the indicies +// of set bits in the bitmap. Indicies are separated by commas and ranges of +// set bits are abbreviated. Example outputs: "0,2,4", "0,3-7,10", "0-10". +// +// Inverse of parseBitmap. +func formatBitmap(b *bitmap.Bitmap) string { + ones := b.ToSlice() + if len(ones) == 0 { + return "" + } + + elems := make([]string, 0, len(ones)) + runStart := ones[0] + lastVal := ones[0] + inRun := false + + for _, v := range ones[1:] { + last := lastVal + lastVal = v + + if last+1 == v { + // In a contiguous block of ones. + if !inRun { + runStart = last + inRun = true + } + + continue + } + + // Non-contiguous bit. + if inRun { + // Render a run + elems = append(elems, fmt.Sprintf("%d-%d", runStart, last)) + inRun = false + continue + } + + // Lone non-contiguous bit. + elems = append(elems, fmt.Sprintf("%d", last)) + + } + + // Process potential final run + if inRun { + elems = append(elems, fmt.Sprintf("%d-%d", runStart, lastVal)) + } else { + elems = append(elems, fmt.Sprintf("%d", lastVal)) + } + + return strings.Join(elems, ",") +} + +func parseToken(token string) (start, end uint32, err error) { + ts := strings.SplitN(token, "-", 2) + switch len(ts) { + case 0: + return 0, 0, fmt.Errorf("invalid token %q", token) + case 1: + val, err := strconv.ParseUint(ts[0], 10, 32) + if err != nil { + return 0, 0, err + } + return uint32(val), uint32(val), nil + case 2: + val1, err := strconv.ParseUint(ts[0], 10, 32) + if err != nil { + return 0, 0, err + } + val2, err := strconv.ParseUint(ts[1], 10, 32) + if err != nil { + return 0, 0, err + } + if val1 >= val2 { + return 0, 0, fmt.Errorf("start (%v) must be less than end (%v)", val1, val2) + } + return uint32(val1), uint32(val2), nil + default: + panic(fmt.Sprintf("Unreachable: got %d substrs", len(ts))) + } +} + +// parseBitmap parses input as a bitmap. input should be a comma separated list +// of indices, and ranges of set bits may be abbreviated. Examples: "0,2,4", +// "0,3-7,10", "0-10". Input after the first newline or null byte is discarded. +// +// sizeHint sets the initial size of the bitmap, which may prevent reallocation +// when growing the bitmap during parsing. Ideally sizeHint should be at least +// as large as the bitmap represented by input, but this is not required. +// +// Inverse of formatBitmap. +func parseBitmap(input string, sizeHint uint32) (*bitmap.Bitmap, error) { + b := bitmap.New(sizeHint) + + if termIdx := strings.IndexAny(input, "\n\000"); termIdx != -1 { + input = input[:termIdx] + } + input = strings.TrimSpace(input) + + if len(input) == 0 { + return &b, nil + } + tokens := strings.Split(input, ",") + + for _, t := range tokens { + start, end, err := parseToken(strings.TrimSpace(t)) + if err != nil { + return nil, err + } + for i := start; i <= end; i++ { + b.Add(i) + } + } + return &b, nil +} diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go index edc3b50b9..e089b2c28 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -269,7 +269,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt case controllerCPUAcct: c = newCPUAcctController(fs) case controllerCPUSet: - c = newCPUSetController(fs) + c = newCPUSetController(k, fs) case controllerJob: c = newJobController(fs) case controllerMemory: diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go index d390050d7..47ddb54c5 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go @@ -524,6 +524,10 @@ func (c *cpusetController) StateTypeName() string { func (c *cpusetController) StateFields() []string { return []string{ "controllerCommon", + "maxCpus", + "maxMems", + "cpus", + "mems", } } @@ -533,6 +537,10 @@ func (c *cpusetController) beforeSave() {} func (c *cpusetController) StateSave(stateSinkObject state.Sink) { c.beforeSave() stateSinkObject.Save(0, &c.controllerCommon) + stateSinkObject.Save(1, &c.maxCpus) + stateSinkObject.Save(2, &c.maxMems) + stateSinkObject.Save(3, &c.cpus) + stateSinkObject.Save(4, &c.mems) } func (c *cpusetController) afterLoad() {} @@ -540,6 +548,60 @@ func (c *cpusetController) afterLoad() {} // +checklocksignore func (c *cpusetController) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &c.controllerCommon) + stateSourceObject.Load(1, &c.maxCpus) + stateSourceObject.Load(2, &c.maxMems) + stateSourceObject.Load(3, &c.cpus) + stateSourceObject.Load(4, &c.mems) +} + +func (d *cpusData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.cpusData" +} + +func (d *cpusData) StateFields() []string { + return []string{ + "c", + } +} + +func (d *cpusData) beforeSave() {} + +// +checklocksignore +func (d *cpusData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.c) +} + +func (d *cpusData) afterLoad() {} + +// +checklocksignore +func (d *cpusData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.c) +} + +func (d *memsData) StateTypeName() string { + return "pkg/sentry/fsimpl/cgroupfs.memsData" +} + +func (d *memsData) StateFields() []string { + return []string{ + "c", + } +} + +func (d *memsData) beforeSave() {} + +// +checklocksignore +func (d *memsData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.c) +} + +func (d *memsData) afterLoad() {} + +// +checklocksignore +func (d *memsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.c) } func (r *dirRefs) StateTypeName() string { @@ -694,6 +756,8 @@ func init() { state.Register((*cpuacctUsageUserData)(nil)) state.Register((*cpuacctUsageSysData)(nil)) state.Register((*cpusetController)(nil)) + state.Register((*cpusData)(nil)) + state.Register((*memsData)(nil)) state.Register((*dirRefs)(nil)) state.Register((*jobController)(nil)) state.Register((*jobIDData)(nil)) diff --git a/pkg/sentry/fsimpl/cgroupfs/cpuset.go b/pkg/sentry/fsimpl/cgroupfs/cpuset.go index ac547f8e2..62e7029da 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cpuset.go +++ b/pkg/sentry/fsimpl/cgroupfs/cpuset.go @@ -15,25 +15,133 @@ package cgroupfs import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/bitmap" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/usermem" ) // +stateify savable type cpusetController struct { controllerCommon + + maxCpus uint32 + maxMems uint32 + + cpus *bitmap.Bitmap + mems *bitmap.Bitmap } var _ controller = (*cpusetController)(nil) -func newCPUSetController(fs *filesystem) *cpusetController { - c := &cpusetController{} +func newCPUSetController(k *kernel.Kernel, fs *filesystem) *cpusetController { + cores := uint32(k.ApplicationCores()) + cpus := bitmap.New(cores) + cpus.FlipRange(0, cores) + mems := bitmap.New(1) + mems.FlipRange(0, 1) + c := &cpusetController{ + cpus: &cpus, + mems: &mems, + maxCpus: uint32(k.ApplicationCores()), + maxMems: 1, // We always report a single NUMA node. + } c.controllerCommon.init(controllerCPUSet, fs) return c } // AddControlFiles implements controller.AddControlFiles. func (c *cpusetController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) { - // This controller is currently intentionally empty. + contents["cpuset.cpus"] = c.fs.newControllerWritableFile(ctx, creds, &cpusData{c: c}) + contents["cpuset.mems"] = c.fs.newControllerWritableFile(ctx, creds, &memsData{c: c}) +} + +// +stateify savable +type cpusData struct { + c *cpusetController +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *cpusData) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%s\n", formatBitmap(d.c.cpus)) + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (d *cpusData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + src = src.DropFirst64(offset) + if src.NumBytes() > hostarch.PageSize { + return 0, linuxerr.EINVAL + } + + t := kernel.TaskFromContext(ctx) + buf := t.CopyScratchBuffer(hostarch.PageSize) + n, err := src.CopyIn(ctx, buf) + if err != nil { + return 0, err + } + buf = buf[:n] + + b, err := parseBitmap(string(buf), d.c.maxCpus) + if err != nil { + log.Warningf("cgroupfs cpuset controller: Failed to parse bitmap: %v", err) + return 0, linuxerr.EINVAL + } + + if got, want := b.Maximum(), d.c.maxCpus; got > want { + log.Warningf("cgroupfs cpuset controller: Attempted to specify cpuset.cpus beyond highest available cpu: got %d, want %d", got, want) + return 0, linuxerr.EINVAL + } + + d.c.cpus = b + return int64(n), nil +} + +// +stateify savable +type memsData struct { + c *cpusetController +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *memsData) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%s\n", formatBitmap(d.c.mems)) + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (d *memsData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { + src = src.DropFirst64(offset) + if src.NumBytes() > hostarch.PageSize { + return 0, linuxerr.EINVAL + } + + t := kernel.TaskFromContext(ctx) + buf := t.CopyScratchBuffer(hostarch.PageSize) + n, err := src.CopyIn(ctx, buf) + if err != nil { + return 0, err + } + buf = buf[:n] + + b, err := parseBitmap(string(buf), d.c.maxMems) + if err != nil { + log.Warningf("cgroupfs cpuset controller: Failed to parse bitmap: %v", err) + return 0, linuxerr.EINVAL + } + + if got, want := b.Maximum(), d.c.maxMems; got > want { + log.Warningf("cgroupfs cpuset controller: Attempted to specify cpuset.mems beyond highest available node: got %d, want %d", got, want) + return 0, linuxerr.EINVAL + } + + d.c.mems = b + return int64(n), nil } |