diff options
-rw-r--r-- | pkg/sentry/fsimpl/devpts/devpts.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/fuse/fuse_state_autogen.go | 23 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/fuse/fusefs.go | 10 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/filesystem.go | 57 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 110 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/overlay/filesystem.go | 12 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/pipefs/pipefs.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/filesystem.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/sockfs/sockfs.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/sys/sys.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go | 11 | ||||
-rw-r--r-- | pkg/sentry/vfs/anonfs.go | 5 | ||||
-rw-r--r-- | pkg/sentry/vfs/filesystem.go | 9 | ||||
-rw-r--r-- | pkg/sentry/vfs/mount.go | 8 | ||||
-rw-r--r-- | runsc/cmd/mitigate.go | 122 | ||||
-rw-r--r-- | runsc/mitigate/cpu.go | 423 | ||||
-rw-r--r-- | runsc/mitigate/mitigate.go | 467 | ||||
-rw-r--r-- | runsc/mitigate/mitigate_conf.go | 37 |
21 files changed, 735 insertions, 599 deletions
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index d8c237753..e75954105 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -137,6 +137,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // rootInode is the root directory inode for the devpts mounts. // // +stateify savable diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go index f59e82755..0a75d80a5 100644 --- a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go +++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go @@ -200,6 +200,7 @@ func (f *filesystemOptions) StateTypeName() string { func (f *filesystemOptions) StateFields() []string { return []string{ + "mopts", "userID", "groupID", "rootMode", @@ -212,21 +213,23 @@ func (f *filesystemOptions) beforeSave() {} func (f *filesystemOptions) StateSave(stateSinkObject state.Sink) { f.beforeSave() - stateSinkObject.Save(0, &f.userID) - stateSinkObject.Save(1, &f.groupID) - stateSinkObject.Save(2, &f.rootMode) - stateSinkObject.Save(3, &f.maxActiveRequests) - stateSinkObject.Save(4, &f.maxRead) + stateSinkObject.Save(0, &f.mopts) + stateSinkObject.Save(1, &f.userID) + stateSinkObject.Save(2, &f.groupID) + stateSinkObject.Save(3, &f.rootMode) + stateSinkObject.Save(4, &f.maxActiveRequests) + stateSinkObject.Save(5, &f.maxRead) } func (f *filesystemOptions) afterLoad() {} func (f *filesystemOptions) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &f.userID) - stateSourceObject.Load(1, &f.groupID) - stateSourceObject.Load(2, &f.rootMode) - stateSourceObject.Load(3, &f.maxActiveRequests) - stateSourceObject.Load(4, &f.maxRead) + stateSourceObject.Load(0, &f.mopts) + stateSourceObject.Load(1, &f.userID) + stateSourceObject.Load(2, &f.groupID) + stateSourceObject.Load(3, &f.rootMode) + stateSourceObject.Load(4, &f.maxActiveRequests) + stateSourceObject.Load(5, &f.maxRead) } func (fs *filesystem) StateTypeName() string { diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 204d8d143..fb0ba2c6d 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -47,6 +47,9 @@ type FilesystemType struct{} // +stateify savable type filesystemOptions struct { + // mopts contains the raw, unparsed mount options passed to this filesystem. + mopts string + // userID specifies the numeric uid of the mount owner. // This option should not be specified by the filesystem owner. // It is set by libfuse (or, if libfuse is not used, must be set @@ -108,7 +111,7 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt return nil, nil, err } - var fsopts filesystemOptions + fsopts := filesystemOptions{mopts: opts.Data} mopts := vfs.GenericParseMountOptions(opts.Data) deviceDescriptorStr, ok := mopts["fd"] if !ok { @@ -260,6 +263,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fs.opts.mopts +} + // inode implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 8f95473b6..c34451269 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -15,7 +15,9 @@ package gofer import ( + "fmt" "math" + "strings" "sync" "sync/atomic" @@ -1608,3 +1610,58 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.renameMu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) } + +type mopt struct { + key string + value interface{} +} + +func (m mopt) String() string { + if m.value == nil { + return fmt.Sprintf("%s", m.key) + } + return fmt.Sprintf("%s=%v", m.key, m.value) +} + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + optsKV := []mopt{ + {moptTransport, transportModeFD}, // Only valid value, currently. + {moptReadFD, fs.opts.fd}, // Currently, read and write FD are the same. + {moptWriteFD, fs.opts.fd}, // Currently, read and write FD are the same. + {moptAname, fs.opts.aname}, + {moptDfltUID, fs.opts.dfltuid}, + {moptDfltGID, fs.opts.dfltgid}, + {moptMsize, fs.opts.msize}, + {moptVersion, fs.opts.version}, + {moptDentryCacheLimit, fs.opts.maxCachedDentries}, + } + + switch fs.opts.interop { + case InteropModeExclusive: + optsKV = append(optsKV, mopt{moptCache, cacheFSCache}) + case InteropModeWritethrough: + optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough}) + case InteropModeShared: + if fs.opts.regularFilesUseSpecialFileFD { + optsKV = append(optsKV, mopt{moptCache, cacheNone}) + } else { + optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating}) + } + } + if fs.opts.forcePageCache { + optsKV = append(optsKV, mopt{moptForcePageCache, nil}) + } + if fs.opts.limitHostFDTranslation { + optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil}) + } + if fs.opts.overlayfsStaleRead { + optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil}) + } + + opts := make([]string, 0, len(optsKV)) + for _, opt := range optsKV { + opts = append(opts, opt.String()) + } + return strings.Join(opts, ",") +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1508cbdf1..71569dc65 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -66,6 +66,34 @@ import ( // Name is the default filesystem name. const Name = "9p" +// Mount option names for goferfs. +const ( + moptTransport = "trans" + moptReadFD = "rfdno" + moptWriteFD = "wfdno" + moptAname = "aname" + moptDfltUID = "dfltuid" + moptDfltGID = "dfltgid" + moptMsize = "msize" + moptVersion = "version" + moptDentryCacheLimit = "dentry_cache_limit" + moptCache = "cache" + moptForcePageCache = "force_page_cache" + moptLimitHostFDTranslation = "limit_host_fd_translation" + moptOverlayfsStaleRead = "overlayfs_stale_read" +) + +// Valid values for the "cache" mount option. +const ( + cacheNone = "none" + cacheFSCache = "fscache" + cacheFSCacheWritethrough = "fscache_writethrough" + cacheRemoteRevalidating = "remote_revalidating" +) + +// Valid values for "trans" mount option. +const transportModeFD = "fd" + // FilesystemType implements vfs.FilesystemType. // // +stateify savable @@ -301,39 +329,39 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Get the attach name. fsopts.aname = "/" - if aname, ok := mopts["aname"]; ok { - delete(mopts, "aname") + if aname, ok := mopts[moptAname]; ok { + delete(mopts, moptAname) fsopts.aname = aname } // Parse the cache policy. For historical reasons, this defaults to the // least generally-applicable option, InteropModeExclusive. fsopts.interop = InteropModeExclusive - if cache, ok := mopts["cache"]; ok { - delete(mopts, "cache") + if cache, ok := mopts[moptCache]; ok { + delete(mopts, moptCache) switch cache { - case "fscache": + case cacheFSCache: fsopts.interop = InteropModeExclusive - case "fscache_writethrough": + case cacheFSCacheWritethrough: fsopts.interop = InteropModeWritethrough - case "none": + case cacheNone: fsopts.regularFilesUseSpecialFileFD = true fallthrough - case "remote_revalidating": + case cacheRemoteRevalidating: fsopts.interop = InteropModeShared default: - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: cache=%s", cache) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid cache policy: %s=%s", moptCache, cache) return nil, nil, syserror.EINVAL } } // Parse the default UID and GID. fsopts.dfltuid = _V9FS_DEFUID - if dfltuidstr, ok := mopts["dfltuid"]; ok { - delete(mopts, "dfltuid") + if dfltuidstr, ok := mopts[moptDfltUID]; ok { + delete(mopts, moptDfltUID) dfltuid, err := strconv.ParseUint(dfltuidstr, 10, 32) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: dfltuid=%s", dfltuidstr) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltUID, dfltuidstr) return nil, nil, syserror.EINVAL } // In Linux, dfltuid is interpreted as a UID and is converted to a KUID @@ -342,11 +370,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fsopts.dfltuid = auth.KUID(dfltuid) } fsopts.dfltgid = _V9FS_DEFGID - if dfltgidstr, ok := mopts["dfltgid"]; ok { - delete(mopts, "dfltgid") + if dfltgidstr, ok := mopts[moptDfltGID]; ok { + delete(mopts, moptDfltGID) dfltgid, err := strconv.ParseUint(dfltgidstr, 10, 32) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: dfltgid=%s", dfltgidstr) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: %s=%s", moptDfltGID, dfltgidstr) return nil, nil, syserror.EINVAL } fsopts.dfltgid = auth.KGID(dfltgid) @@ -354,11 +382,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Parse the 9P message size. fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M - if msizestr, ok := mopts["msize"]; ok { - delete(mopts, "msize") + if msizestr, ok := mopts[moptMsize]; ok { + delete(mopts, moptMsize) msize, err := strconv.ParseUint(msizestr, 10, 32) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: msize=%s", msizestr) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid message size: %s=%s", moptMsize, msizestr) return nil, nil, syserror.EINVAL } fsopts.msize = uint32(msize) @@ -366,34 +394,34 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt // Parse the 9P protocol version. fsopts.version = p9.HighestVersionString() - if version, ok := mopts["version"]; ok { - delete(mopts, "version") + if version, ok := mopts[moptVersion]; ok { + delete(mopts, moptVersion) fsopts.version = version } // Parse the dentry cache limit. fsopts.maxCachedDentries = 1000 - if str, ok := mopts["dentry_cache_limit"]; ok { - delete(mopts, "dentry_cache_limit") + if str, ok := mopts[moptDentryCacheLimit]; ok { + delete(mopts, moptDentryCacheLimit) maxCachedDentries, err := strconv.ParseUint(str, 10, 64) if err != nil { - ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: dentry_cache_limit=%s", str) + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid dentry cache limit: %s=%s", moptDentryCacheLimit, str) return nil, nil, syserror.EINVAL } fsopts.maxCachedDentries = maxCachedDentries } // Handle simple flags. - if _, ok := mopts["force_page_cache"]; ok { - delete(mopts, "force_page_cache") + if _, ok := mopts[moptForcePageCache]; ok { + delete(mopts, moptForcePageCache) fsopts.forcePageCache = true } - if _, ok := mopts["limit_host_fd_translation"]; ok { - delete(mopts, "limit_host_fd_translation") + if _, ok := mopts[moptLimitHostFDTranslation]; ok { + delete(mopts, moptLimitHostFDTranslation) fsopts.limitHostFDTranslation = true } - if _, ok := mopts["overlayfs_stale_read"]; ok { - delete(mopts, "overlayfs_stale_read") + if _, ok := mopts[moptOverlayfsStaleRead]; ok { + delete(mopts, moptOverlayfsStaleRead) fsopts.overlayfsStaleRead = true } // fsopts.regularFilesUseSpecialFileFD can only be enabled by specifying @@ -469,34 +497,34 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt func getFDFromMountOptionsMap(ctx context.Context, mopts map[string]string) (int, error) { // Check that the transport is "fd". - trans, ok := mopts["trans"] - if !ok || trans != "fd" { - ctx.Warningf("gofer.getFDFromMountOptionsMap: transport must be specified as 'trans=fd'") + trans, ok := mopts[moptTransport] + if !ok || trans != transportModeFD { + ctx.Warningf("gofer.getFDFromMountOptionsMap: transport must be specified as '%s=%s'", moptTransport, transportModeFD) return -1, syserror.EINVAL } - delete(mopts, "trans") + delete(mopts, moptTransport) // Check that read and write FDs are provided and identical. - rfdstr, ok := mopts["rfdno"] + rfdstr, ok := mopts[moptReadFD] if !ok { - ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD must be specified as 'rfdno=<file descriptor>'") + ctx.Warningf("gofer.getFDFromMountOptionsMap: read FD must be specified as '%s=<file descriptor>'", moptReadFD) return -1, syserror.EINVAL } - delete(mopts, "rfdno") + delete(mopts, moptReadFD) rfd, err := strconv.Atoi(rfdstr) if err != nil { - ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid read FD: rfdno=%s", rfdstr) + ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid read FD: %s=%s", moptReadFD, rfdstr) return -1, syserror.EINVAL } - wfdstr, ok := mopts["wfdno"] + wfdstr, ok := mopts[moptWriteFD] if !ok { - ctx.Warningf("gofer.getFDFromMountOptionsMap: write FD must be specified as 'wfdno=<file descriptor>'") + ctx.Warningf("gofer.getFDFromMountOptionsMap: write FD must be specified as '%s=<file descriptor>'", moptWriteFD) return -1, syserror.EINVAL } - delete(mopts, "wfdno") + delete(mopts, moptWriteFD) wfd, err := strconv.Atoi(wfdstr) if err != nil { - ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid write FD: wfdno=%s", wfdstr) + ctx.Warningf("gofer.getFDFromMountOptionsMap: invalid write FD: %s=%s", moptWriteFD, wfdstr) return -1, syserror.EINVAL } if rfd != wfd { diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index ad5de80dc..b9cce4181 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -260,6 +260,11 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe return vfs.PrependPathSyntheticError{} } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // CheckPermissions implements kernfs.Inode.CheckPermissions. func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { var s unix.Stat_t diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index 917709d75..84e37f793 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -1764,3 +1764,15 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.renameMu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) } + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + // Return the mount options from the topmost layer. + var vd vfs.VirtualDentry + if fs.opts.UpperRoot.Ok() { + vd = fs.opts.UpperRoot + } else { + vd = fs.opts.LowerRoots[0] + } + return vd.Mount().Filesystem().Impl().MountOptions() +} diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 429733c10..3f05e444e 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -80,6 +80,11 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe return vfs.PrependPathSyntheticError{} } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // inode implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 8716d0a3c..254a8b062 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -104,6 +104,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries) +} + // dynamicInode is an overfitted interface for common Inodes with // dynamicByteSource types used in procfs. // diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index fda1fa942..735756280 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -85,6 +85,11 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe return vfs.PrependPathSyntheticError{} } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return "" +} + // inode implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index dbd9ebdda..1d9280dae 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -143,6 +143,11 @@ func (fs *filesystem) Release(ctx context.Context) { fs.Filesystem.Release(ctx) } +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fmt.Sprintf("dentry_cache_limit=%d", fs.MaxCachedDentries) +} + // dir implements kernfs.Inode. // // +stateify savable diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 4f675c21e..5fdca1d46 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -898,3 +898,8 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe d = d.parent } } + +// MountOptions implements vfs.FilesystemImpl.MountOptions. +func (fs *filesystem) MountOptions() string { + return fs.mopts +} diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index a01e413e0..8df81f589 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -70,6 +70,10 @@ type filesystem struct { // devMinor is the filesystem's minor device number. devMinor is immutable. devMinor uint32 + // mopts contains the tmpfs-specific mount options passed to this + // filesystem. Immutable. + mopts string + // mu serializes changes to the Dentry tree. mu sync.RWMutex `state:"nosave"` @@ -184,6 +188,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt mfp: mfp, clock: clock, devMinor: devMinor, + mopts: opts.Data, } fs.vfsfs.Init(vfsObj, newFSType, &fs) diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go index 21c53afaf..bd0b0bf97 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_state_autogen.go @@ -352,6 +352,7 @@ func (fs *filesystem) StateFields() []string { "mfp", "clock", "devMinor", + "mopts", "nextInoMinusOne", "root", } @@ -365,8 +366,9 @@ func (fs *filesystem) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(1, &fs.mfp) stateSinkObject.Save(2, &fs.clock) stateSinkObject.Save(3, &fs.devMinor) - stateSinkObject.Save(4, &fs.nextInoMinusOne) - stateSinkObject.Save(5, &fs.root) + stateSinkObject.Save(4, &fs.mopts) + stateSinkObject.Save(5, &fs.nextInoMinusOne) + stateSinkObject.Save(6, &fs.root) } func (fs *filesystem) afterLoad() {} @@ -376,8 +378,9 @@ func (fs *filesystem) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(1, &fs.mfp) stateSourceObject.Load(2, &fs.clock) stateSourceObject.Load(3, &fs.devMinor) - stateSourceObject.Load(4, &fs.nextInoMinusOne) - stateSourceObject.Load(5, &fs.root) + stateSourceObject.Load(4, &fs.mopts) + stateSourceObject.Load(5, &fs.nextInoMinusOne) + stateSourceObject.Load(6, &fs.root) } func (f *FilesystemOpts) StateTypeName() string { diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go index 7ad0eaf86..3caf417ca 100644 --- a/pkg/sentry/vfs/anonfs.go +++ b/pkg/sentry/vfs/anonfs.go @@ -291,6 +291,11 @@ func (fs *anonFilesystem) PrependPath(ctx context.Context, vfsroot, vd VirtualDe return PrependPathSyntheticError{} } +// MountOptions implements FilesystemImpl.MountOptions. +func (fs *anonFilesystem) MountOptions() string { + return "" +} + // IncRef implements DentryImpl.IncRef. func (d *anonDentry) IncRef() { // no-op diff --git a/pkg/sentry/vfs/filesystem.go b/pkg/sentry/vfs/filesystem.go index 2c4b81e78..059939010 100644 --- a/pkg/sentry/vfs/filesystem.go +++ b/pkg/sentry/vfs/filesystem.go @@ -502,6 +502,15 @@ type FilesystemImpl interface { // // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error + + // MountOptions returns mount options for the current filesystem. This + // should only return options specific to the filesystem (i.e. don't return + // "ro", "rw", etc). Options should be returned as a comma-separated string, + // similar to the input to the 5th argument to mount. + // + // If the implementation has no filesystem-specific options, it should + // return the empty string. + MountOptions() string } // PrependPathAtVFSRootError is returned by implementations of diff --git a/pkg/sentry/vfs/mount.go b/pkg/sentry/vfs/mount.go index bac9eb905..922f9e697 100644 --- a/pkg/sentry/vfs/mount.go +++ b/pkg/sentry/vfs/mount.go @@ -959,13 +959,17 @@ func manglePath(p string) string { // superBlockOpts returns the super block options string for the the mount at // the given path. func superBlockOpts(mountPath string, mnt *Mount) string { - // gVisor doesn't (yet) have a concept of super block options, so we - // use the ro/rw bit from the mount flag. + // Compose super block options by combining global mount flags with + // FS-specific mount options. opts := "rw" if mnt.ReadOnly() { opts = "ro" } + if mopts := mnt.fs.Impl().MountOptions(); mopts != "" { + opts += "," + mopts + } + // NOTE(b/147673608): If the mount is a cgroup, we also need to include // the cgroup name in the options. For now we just read that from the // path. diff --git a/runsc/cmd/mitigate.go b/runsc/cmd/mitigate.go index 822af1917..fddf0e0dd 100644 --- a/runsc/cmd/mitigate.go +++ b/runsc/cmd/mitigate.go @@ -16,6 +16,8 @@ package cmd import ( "context" + "fmt" + "io/ioutil" "github.com/google/subcommands" "gvisor.dev/gvisor/pkg/log" @@ -23,9 +25,23 @@ import ( "gvisor.dev/gvisor/runsc/mitigate" ) +const ( + // cpuInfo is the path used to parse CPU info. + cpuInfo = "/proc/cpuinfo" + // allPossibleCPUs is the path used to enable CPUs. + allPossibleCPUs = "/sys/devices/system/cpu/possible" +) + // Mitigate implements subcommands.Command for the "mitigate" command. type Mitigate struct { - mitigate mitigate.Mitigate + // Run the command without changing the underlying system. + dryRun bool + // Reverse mitigate by turning on all CPU cores. + reverse bool + // Path to file to read to create CPUSet. + path string + // Callback to check if a given thread is vulnerable. + vulnerable func(other mitigate.Thread) bool } // Name implements subcommands.command.name. @@ -38,14 +54,19 @@ func (*Mitigate) Synopsis() string { return "mitigate mitigates the underlying system against side channel attacks" } -// Usage implements subcommands.Command.Usage. -func (m *Mitigate) Usage() string { - return m.mitigate.Usage() +// Usage implments Usage for cmd.Mitigate. +func (m Mitigate) Usage() string { + return `mitigate [flags] + +mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot. + +The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online.` } -// SetFlags implements subcommands.Command.SetFlags. +// SetFlags sets flags for the command Mitigate. func (m *Mitigate) SetFlags(f *flag.FlagSet) { - m.mitigate.SetFlags(f) + f.BoolVar(&m.dryRun, "dryrun", false, "run the command without changing system") + f.BoolVar(&m.reverse, "reverse", false, "reverse mitigate by enabling all CPUs") } // Execute implements subcommands.Command.Execute. @@ -55,10 +76,97 @@ func (m *Mitigate) Execute(_ context.Context, f *flag.FlagSet, args ...interface return subcommands.ExitUsageError } - if err := m.mitigate.Execute(); err != nil { + m.path = cpuInfo + if m.reverse { + m.path = allPossibleCPUs + } + + m.vulnerable = func(other mitigate.Thread) bool { + return other.IsVulnerable() + } + + if _, err := m.doExecute(); err != nil { log.Warningf("Execute failed: %v", err) return subcommands.ExitFailure } return subcommands.ExitSuccess } + +// Execute executes the Mitigate command. +func (m *Mitigate) doExecute() (mitigate.CPUSet, error) { + if m.dryRun { + log.Infof("Running with DryRun. No cpu settings will be changed.") + } + if m.reverse { + data, err := ioutil.ReadFile(m.path) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %v", m.path, err) + } + + set, err := m.doReverse(data) + if err != nil { + return nil, fmt.Errorf("reverse operation failed: %v", err) + } + return set, nil + } + + data, err := ioutil.ReadFile(m.path) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %v", m.path, err) + } + set, err := m.doMitigate(data) + if err != nil { + return nil, fmt.Errorf("mitigate operation failed: %v", err) + } + return set, nil +} + +func (m *Mitigate) doMitigate(data []byte) (mitigate.CPUSet, error) { + set, err := mitigate.NewCPUSet(data, m.vulnerable) + if err != nil { + return nil, err + } + + log.Infof("Mitigate found the following CPUs...") + log.Infof("%s", set) + + disableList := set.GetShutdownList() + log.Infof("Disabling threads on thread pairs.") + for _, t := range disableList { + log.Infof("Disable thread: %s", t) + if m.dryRun { + continue + } + if err := t.Disable(); err != nil { + return nil, fmt.Errorf("error disabling thread: %s err: %v", t, err) + } + } + log.Infof("Shutdown successful.") + return set, nil +} + +func (m *Mitigate) doReverse(data []byte) (mitigate.CPUSet, error) { + set, err := mitigate.NewCPUSetFromPossible(data) + if err != nil { + return nil, err + } + + log.Infof("Reverse mitigate found the following CPUs...") + log.Infof("%s", set) + + enableList := set.GetRemainingList() + + log.Infof("Enabling all CPUs...") + for _, t := range enableList { + log.Infof("Enabling thread: %s", t) + if m.dryRun { + continue + } + if err := t.Enable(); err != nil { + return nil, fmt.Errorf("error enabling thread: %s err: %v", t, err) + } + } + log.Infof("Enable successful.") + return set, nil +} diff --git a/runsc/mitigate/cpu.go b/runsc/mitigate/cpu.go deleted file mode 100644 index 4b2aa351f..000000000 --- a/runsc/mitigate/cpu.go +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mitigate - -import ( - "fmt" - "io/ioutil" - "regexp" - "strconv" - "strings" -) - -const ( - // mds is the only bug we care about. - mds = "mds" - - // Constants for parsing /proc/cpuinfo. - processorKey = "processor" - vendorIDKey = "vendor_id" - cpuFamilyKey = "cpu family" - modelKey = "model" - physicalIDKey = "physical id" - coreIDKey = "core id" - bugsKey = "bugs" - - // Path to shutdown a CPU. - cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" -) - -// cpuSet contains a map of all CPUs on the system, mapped -// by Physical ID and CoreIDs. threads with the same -// Core and Physical ID are Hyperthread pairs. -type cpuSet map[cpuID]*threadGroup - -// newCPUSet creates a CPUSet from data read from /proc/cpuinfo. -func newCPUSet(data []byte, vulnerable func(thread) bool) (cpuSet, error) { - processors, err := getThreads(string(data)) - if err != nil { - return nil, err - } - - set := make(cpuSet) - for _, p := range processors { - // Each ID is of the form physicalID:coreID. Hyperthread pairs - // have identical physical and core IDs. We need to match - // Hyperthread pairs so that we can shutdown all but one per - // pair. - core, ok := set[p.id] - if !ok { - core = &threadGroup{} - set[p.id] = core - } - core.isVulnerable = core.isVulnerable || vulnerable(p) - core.threads = append(core.threads, p) - } - return set, nil -} - -// newCPUSetFromPossible makes a cpuSet data read from -// /sys/devices/system/cpu/possible. This is used in enable operations -// where the caller simply wants to enable all CPUS. -func newCPUSetFromPossible(data []byte) (cpuSet, error) { - threads, err := getThreadsFromPossible(data) - if err != nil { - return nil, err - } - - // We don't care if a CPU is vulnerable or not, we just - // want to return a list of all CPUs on the host. - set := cpuSet{ - threads[0].id: &threadGroup{ - threads: threads, - isVulnerable: false, - }, - } - return set, nil -} - -// String implements the String method for CPUSet. -func (c cpuSet) String() string { - ret := "" - for _, tg := range c { - ret += fmt.Sprintf("%s\n", tg) - } - return ret -} - -// getRemainingList returns the list of threads that will remain active -// after mitigation. -func (c cpuSet) getRemainingList() []thread { - threads := make([]thread, 0, len(c)) - for _, core := range c { - // If we're vulnerable, take only one thread from the pair. - if core.isVulnerable { - threads = append(threads, core.threads[0]) - continue - } - // Otherwise don't shutdown anything. - threads = append(threads, core.threads...) - } - return threads -} - -// getShutdownList returns the list of threads that will be shutdown on -// mitigation. -func (c cpuSet) getShutdownList() []thread { - threads := make([]thread, 0) - for _, core := range c { - // Only if we're vulnerable do shutdown anything. In this case, - // shutdown all but the first entry. - if core.isVulnerable && len(core.threads) > 1 { - threads = append(threads, core.threads[1:]...) - } - } - return threads -} - -// threadGroup represents Hyperthread pairs on the same physical/core ID. -type threadGroup struct { - threads []thread - isVulnerable bool -} - -// String implements the String method for threadGroup. -func (c threadGroup) String() string { - ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) - for _, processor := range c.threads { - ret += fmt.Sprintf("%s\n", processor) - } - return ret -} - -// getThreads returns threads structs from reading /proc/cpuinfo. -func getThreads(data string) ([]thread, error) { - // Each processor entry should start with the - // processor key. Find the beginings of each. - r := buildRegex(processorKey, `\d+`) - indices := r.FindAllStringIndex(data, -1) - if len(indices) < 1 { - return nil, fmt.Errorf("no cpus found for: %q", data) - } - - // Add the ending index for last entry. - indices = append(indices, []int{len(data), -1}) - - // Valid cpus are now defined by strings in between - // indexes (e.g. data[index[i], index[i+1]]). - // There should be len(indicies) - 1 CPUs - // since the last index is the end of the string. - cpus := make([]thread, 0, len(indices)) - // Find each string that represents a CPU. These begin "processor". - for i := 1; i < len(indices); i++ { - start := indices[i-1][0] - end := indices[i][0] - // Parse the CPU entry, which should be between start/end. - c, err := newThread(data[start:end]) - if err != nil { - return nil, err - } - cpus = append(cpus, c) - } - return cpus, nil -} - -// getThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible. -func getThreadsFromPossible(data []byte) ([]thread, error) { - possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`) - matches := possibleRegex.FindStringSubmatch(string(data)) - if len(matches) != 4 { - return nil, fmt.Errorf("mismatch regex from %s: %q", allPossibleCPUs, string(data)) - } - - // If matches[3] is empty, we only have one cpu entry. - if matches[3] == "" { - matches[3] = matches[1] - } - - begin, err := strconv.ParseInt(matches[1], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse begin: %v", err) - } - end, err := strconv.ParseInt(matches[3], 10, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse end: %v", err) - } - if begin > end || begin < 0 || end < 0 { - return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end) - } - - ret := make([]thread, 0, end-begin) - for i := begin; i <= end; i++ { - ret = append(ret, thread{ - processorNumber: i, - id: cpuID{ - physicalID: 0, // we don't care about id for enable ops. - coreID: 0, - }, - }) - } - - return ret, nil -} - -// cpuID for each thread is defined by the physical and -// core IDs. If equal, two threads are Hyperthread pairs. -type cpuID struct { - physicalID int64 - coreID int64 -} - -// type cpu represents pertinent info about a cpu. -type thread struct { - processorNumber int64 // the processor number of this CPU. - vendorID string // the vendorID of CPU (e.g. AuthenticAMD). - cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). - model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). - id cpuID // id for this thread - bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. -} - -// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. -func newThread(data string) (thread, error) { - empty := thread{} - processor, err := parseProcessor(data) - if err != nil { - return empty, err - } - - vendorID, err := parseVendorID(data) - if err != nil { - return empty, err - } - - cpuFamily, err := parseCPUFamily(data) - if err != nil { - return empty, err - } - - model, err := parseModel(data) - if err != nil { - return empty, err - } - - physicalID, err := parsePhysicalID(data) - if err != nil { - return empty, err - } - - coreID, err := parseCoreID(data) - if err != nil { - return empty, err - } - - bugs, err := parseBugs(data) - if err != nil { - return empty, err - } - - return thread{ - processorNumber: processor, - vendorID: vendorID, - cpuFamily: cpuFamily, - model: model, - id: cpuID{ - physicalID: physicalID, - coreID: coreID, - }, - bugs: bugs, - }, nil -} - -// String implements the String method for thread. -func (t thread) String() string { - template := `CPU: %d -CPU ID: %+v -Vendor: %s -Family/Model: %d/%d -Bugs: %s -` - bugs := make([]string, 0) - for bug := range t.bugs { - bugs = append(bugs, bug) - } - - return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) -} - -// enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online. -func (t thread) enable() error { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - return ioutil.WriteFile(cpuPath, []byte{'1'}, 0644) -} - -// disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. -func (t thread) disable() error { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) -} - -// isVulnerable checks if a CPU is vulnerable to mds. -func (t thread) isVulnerable() bool { - _, ok := t.bugs[mds] - return ok -} - -// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online -// If the file does not exist (ioutil returns in error), we assume the CPU is on. -func (t thread) isActive() bool { - cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) - data, err := ioutil.ReadFile(cpuPath) - if err != nil { - return true - } - return len(data) > 0 && data[0] != '0' -} - -// similarTo checks family/model/bugs fields for equality of two -// processors. -func (t thread) similarTo(other thread) bool { - if t.vendorID != other.vendorID { - return false - } - - if other.cpuFamily != t.cpuFamily { - return false - } - - if other.model != t.model { - return false - } - - if len(other.bugs) != len(t.bugs) { - return false - } - - for bug := range t.bugs { - if _, ok := other.bugs[bug]; !ok { - return false - } - } - return true -} - -// parseProcessor grabs the processor field from /proc/cpuinfo output. -func parseProcessor(data string) (int64, error) { - return parseIntegerResult(data, processorKey) -} - -// parseVendorID grabs the vendor_id field from /proc/cpuinfo output. -func parseVendorID(data string) (string, error) { - return parseRegex(data, vendorIDKey, `[\w\d]+`) -} - -// parseCPUFamily grabs the cpu family field from /proc/cpuinfo output. -func parseCPUFamily(data string) (int64, error) { - return parseIntegerResult(data, cpuFamilyKey) -} - -// parseModel grabs the model field from /proc/cpuinfo output. -func parseModel(data string) (int64, error) { - return parseIntegerResult(data, modelKey) -} - -// parsePhysicalID parses the physical id field. -func parsePhysicalID(data string) (int64, error) { - return parseIntegerResult(data, physicalIDKey) -} - -// parseCoreID parses the core id field. -func parseCoreID(data string) (int64, error) { - return parseIntegerResult(data, coreIDKey) -} - -// parseBugs grabs the bugs field from /proc/cpuinfo output. -func parseBugs(data string) (map[string]struct{}, error) { - result, err := parseRegex(data, bugsKey, `[\d\w\s]*`) - if err != nil { - return nil, err - } - bugs := strings.Split(result, " ") - ret := make(map[string]struct{}, len(bugs)) - for _, bug := range bugs { - ret[bug] = struct{}{} - } - return ret, nil -} - -// parseIntegerResult parses fields expecting an integer. -func parseIntegerResult(data, key string) (int64, error) { - result, err := parseRegex(data, key, `\d+`) - if err != nil { - return 0, err - } - return strconv.ParseInt(result, 0, 64) -} - -// buildRegex builds a regex for parsing each CPU field. -func buildRegex(key, match string) *regexp.Regexp { - reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key) - return regexp.MustCompile(reg) -} - -// parseRegex parses data with key inserted into a standard regex template. -func parseRegex(data, key, match string) (string, error) { - r := buildRegex(key, match) - matches := r.FindStringSubmatch(data) - if len(matches) < 2 { - return "", fmt.Errorf("failed to match key %q: %q", key, data) - } - return matches[1], nil -} diff --git a/runsc/mitigate/mitigate.go b/runsc/mitigate/mitigate.go index 91de623e3..24f67414c 100644 --- a/runsc/mitigate/mitigate.go +++ b/runsc/mitigate/mitigate.go @@ -14,121 +14,440 @@ // Package mitigate provides libraries for the mitigate command. The // mitigate command mitigates side channel attacks such as MDS. Mitigate -// shuts down CPUs via /sys/devices/system/cpu/cpu{N}/online. In addition, -// the mitigate also handles computing available CPU in kubernetes kube_config -// files. +// shuts down CPUs via /sys/devices/system/cpu/cpu{N}/online. package mitigate import ( "fmt" "io/ioutil" - - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/runsc/flag" + "os" + "regexp" + "sort" + "strconv" + "strings" ) const ( - cpuInfo = "/proc/cpuinfo" - allPossibleCPUs = "/sys/devices/system/cpu/possible" + // mds is the only bug we care about. + mds = "mds" + + // Constants for parsing /proc/cpuinfo. + processorKey = "processor" + vendorIDKey = "vendor_id" + cpuFamilyKey = "cpu family" + modelKey = "model" + physicalIDKey = "physical id" + coreIDKey = "core id" + bugsKey = "bugs" + + // Path to shutdown a CPU. + cpuOnlineTemplate = "/sys/devices/system/cpu/cpu%d/online" ) -// Mitigate handles high level mitigate operations provided to runsc. -type Mitigate struct { - dryRun bool // Run the command without changing the underlying system. - reverse bool // Reverse mitigate by turning on all CPU cores. - other mitigate // Struct holds extra mitigate logic. - path string // path to read for each operation (e.g. /proc/cpuinfo). +// CPUSet contains a map of all CPUs on the system, mapped +// by Physical ID and CoreIDs. threads with the same +// Core and Physical ID are Hyperthread pairs. +type CPUSet map[threadID]*ThreadGroup + +// NewCPUSet creates a CPUSet from data read from /proc/cpuinfo. +func NewCPUSet(data []byte, vulnerable func(Thread) bool) (CPUSet, error) { + processors, err := getThreads(string(data)) + if err != nil { + return nil, err + } + + set := make(CPUSet) + for _, p := range processors { + // Each ID is of the form physicalID:coreID. Hyperthread pairs + // have identical physical and core IDs. We need to match + // Hyperthread pairs so that we can shutdown all but one per + // pair. + core, ok := set[p.id] + if !ok { + core = &ThreadGroup{} + set[p.id] = core + } + core.isVulnerable = core.isVulnerable || vulnerable(p) + core.threads = append(core.threads, p) + } + + // We need to make sure we shutdown the lowest number processor per + // thread group. + for _, tg := range set { + sort.Slice(tg.threads, func(i, j int) bool { + return tg.threads[i].processorNumber < tg.threads[j].processorNumber + }) + } + return set, nil } -// Usage implments Usage for cmd.Mitigate. -func (m Mitigate) Usage() string { - usageString := `mitigate [flags] +// NewCPUSetFromPossible makes a cpuSet data read from +// /sys/devices/system/cpu/possible. This is used in enable operations +// where the caller simply wants to enable all CPUS. +func NewCPUSetFromPossible(data []byte) (CPUSet, error) { + threads, err := GetThreadsFromPossible(data) + if err != nil { + return nil, err + } + + // We don't care if a CPU is vulnerable or not, we just + // want to return a list of all CPUs on the host. + set := CPUSet{ + threads[0].id: &ThreadGroup{ + threads: threads, + isVulnerable: false, + }, + } + return set, nil +} -Mitigate mitigates a system to the "MDS" vulnerability by implementing a manual shutdown of SMT. The command checks /proc/cpuinfo for cpus having the MDS vulnerability, and if found, shutdown all but one CPU per hyperthread pair via /sys/devices/system/cpu/cpu{N}/online. CPUs can be restored by writing "2" to each file in /sys/devices/system/cpu/cpu{N}/online or performing a system reboot. +// String implements the String method for CPUSet. +func (c CPUSet) String() string { + ret := "" + for _, tg := range c { + ret += fmt.Sprintf("%s\n", tg) + } + return ret +} -The command can be reversed with --reverse, which reads the total CPUs from /sys/devices/system/cpu/possible and enables all with /sys/devices/system/cpu/cpu{N}/online. -` - return usageString + m.other.usage() +// GetRemainingList returns the list of threads that will remain active +// after mitigation. +func (c CPUSet) GetRemainingList() []Thread { + threads := make([]Thread, 0, len(c)) + for _, core := range c { + // If we're vulnerable, take only one thread from the pair. + if core.isVulnerable { + threads = append(threads, core.threads[0]) + continue + } + // Otherwise don't shutdown anything. + threads = append(threads, core.threads...) + } + return threads } -// SetFlags sets flags for the command Mitigate. -func (m Mitigate) SetFlags(f *flag.FlagSet) { - f.BoolVar(&m.dryRun, "dryrun", false, "run the command without changing system") - f.BoolVar(&m.reverse, "reverse", false, "reverse mitigate by enabling all CPUs") - m.other.setFlags(f) - m.path = cpuInfo - if m.reverse { - m.path = allPossibleCPUs +// GetShutdownList returns the list of threads that will be shutdown on +// mitigation. +func (c CPUSet) GetShutdownList() []Thread { + threads := make([]Thread, 0) + for _, core := range c { + // Only if we're vulnerable do shutdown anything. In this case, + // shutdown all but the first entry. + if core.isVulnerable && len(core.threads) > 1 { + threads = append(threads, core.threads[1:]...) + } } + return threads } -// Execute executes the Mitigate command. -func (m Mitigate) Execute() error { - data, err := ioutil.ReadFile(m.path) - if err != nil { - return fmt.Errorf("failed to read %s: %v", m.path, err) +// ThreadGroup represents Hyperthread pairs on the same physical/core ID. +type ThreadGroup struct { + threads []Thread + isVulnerable bool +} + +// String implements the String method for threadGroup. +func (c ThreadGroup) String() string { + ret := fmt.Sprintf("ThreadGroup:\nIsVulnerable: %t\n", c.isVulnerable) + for _, processor := range c.threads { + ret += fmt.Sprintf("%s\n", processor) } + return ret +} - if m.reverse { - err := m.doReverse(data) +// getThreads returns threads structs from reading /proc/cpuinfo. +func getThreads(data string) ([]Thread, error) { + // Each processor entry should start with the + // processor key. Find the beginings of each. + r := buildRegex(processorKey, `\d+`) + indices := r.FindAllStringIndex(data, -1) + if len(indices) < 1 { + return nil, fmt.Errorf("no cpus found for: %q", data) + } + + // Add the ending index for last entry. + indices = append(indices, []int{len(data), -1}) + + // Valid cpus are now defined by strings in between + // indexes (e.g. data[index[i], index[i+1]]). + // There should be len(indicies) - 1 CPUs + // since the last index is the end of the string. + cpus := make([]Thread, 0, len(indices)) + // Find each string that represents a CPU. These begin "processor". + for i := 1; i < len(indices); i++ { + start := indices[i-1][0] + end := indices[i][0] + // Parse the CPU entry, which should be between start/end. + c, err := newThread(data[start:end]) if err != nil { - return fmt.Errorf("reverse operation failed: %v", err) + return nil, err } - return nil + cpus = append(cpus, c) + } + return cpus, nil +} + +// GetThreadsFromPossible makes threads from data read from /sys/devices/system/cpu/possible. +func GetThreadsFromPossible(data []byte) ([]Thread, error) { + possibleRegex := regexp.MustCompile(`(?m)^(\d+)(-(\d+))?$`) + matches := possibleRegex.FindStringSubmatch(string(data)) + if len(matches) != 4 { + return nil, fmt.Errorf("mismatch regex from possible: %q", string(data)) + } + + // If matches[3] is empty, we only have one cpu entry. + if matches[3] == "" { + matches[3] = matches[1] } - set, err := m.doMitigate(data) + begin, err := strconv.ParseInt(matches[1], 10, 64) if err != nil { - return fmt.Errorf("mitigate operation failed: %v", err) + return nil, fmt.Errorf("failed to parse begin: %v", err) } - return m.other.execute(set, m.dryRun) + end, err := strconv.ParseInt(matches[3], 10, 64) + if err != nil { + return nil, fmt.Errorf("failed to parse end: %v", err) + } + if begin > end || begin < 0 || end < 0 { + return nil, fmt.Errorf("invalid cpu bounds from possible: begin: %d end: %d", begin, end) + } + + ret := make([]Thread, 0, end-begin) + for i := begin; i <= end; i++ { + ret = append(ret, Thread{ + processorNumber: i, + id: threadID{ + physicalID: 0, // we don't care about id for enable ops. + coreID: 0, + }, + }) + } + + return ret, nil +} + +// threadID for each thread is defined by the physical and +// core IDs. If equal, two threads are Hyperthread pairs. +type threadID struct { + physicalID int64 + coreID int64 } -func (m Mitigate) doMitigate(data []byte) (cpuSet, error) { - set, err := newCPUSet(data, m.other.vulnerable) +// Thread represents pertinent info about a single hyperthread in a pair. +type Thread struct { + processorNumber int64 // the processor number of this CPU. + vendorID string // the vendorID of CPU (e.g. AuthenticAMD). + cpuFamily int64 // CPU family number (e.g. 6 for CascadeLake/Skylake). + model int64 // CPU model number (e.g. 85 for CascadeLake/Skylake). + id threadID // id for this thread + bugs map[string]struct{} // map of vulnerabilities parsed from the 'bugs' field. +} + +// newThread parses a CPU from a single cpu entry from /proc/cpuinfo. +func newThread(data string) (Thread, error) { + empty := Thread{} + processor, err := parseProcessor(data) if err != nil { - return nil, err + return empty, err } - log.Infof("Mitigate found the following CPUs...") - log.Infof("%s", set) + vendorID, err := parseVendorID(data) + if err != nil { + return empty, err + } - disableList := set.getShutdownList() - log.Infof("Disabling threads on thread pairs.") - for _, t := range disableList { - log.Infof("Disable thread: %s", t) - if m.dryRun { - continue - } - if err := t.disable(); err != nil { - return nil, fmt.Errorf("error disabling thread: %s err: %v", t, err) - } + cpuFamily, err := parseCPUFamily(data) + if err != nil { + return empty, err } - log.Infof("Shutdown successful.") - return set, nil + + model, err := parseModel(data) + if err != nil { + return empty, err + } + + physicalID, err := parsePhysicalID(data) + if err != nil { + return empty, err + } + + coreID, err := parseCoreID(data) + if err != nil { + return empty, err + } + + bugs, err := parseBugs(data) + if err != nil { + return empty, err + } + + return Thread{ + processorNumber: processor, + vendorID: vendorID, + cpuFamily: cpuFamily, + model: model, + id: threadID{ + physicalID: physicalID, + coreID: coreID, + }, + bugs: bugs, + }, nil +} + +// String implements the String method for thread. +func (t Thread) String() string { + template := `CPU: %d +CPU ID: %+v +Vendor: %s +Family/Model: %d/%d +Bugs: %s +` + bugs := make([]string, 0) + for bug := range t.bugs { + bugs = append(bugs, bug) + } + + return fmt.Sprintf(template, t.processorNumber, t.id, t.vendorID, t.cpuFamily, t.model, strings.Join(bugs, ",")) +} + +// Enable turns on the CPU by writing 1 to /sys/devices/cpu/cpu{N}/online. +func (t Thread) Enable() error { + // Linux ensures that "cpu0" is always online. + if t.processorNumber == 0 { + return nil + } + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + f, err := os.OpenFile(cpuPath, os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return fmt.Errorf("failed to open file %s: %v", cpuPath, err) + } + if _, err = f.Write([]byte{'1'}); err != nil { + return fmt.Errorf("failed to write '1' to %s: %v", cpuPath, err) + } + return nil +} + +// Disable turns off the CPU by writing 0 to /sys/devices/cpu/cpu{N}/online. +func (t Thread) Disable() error { + // The core labeled "cpu0" can never be taken offline via this method. + // Linux will return EPERM if the user even creates a file at the /sys + // path above. + if t.processorNumber == 0 { + return fmt.Errorf("invalid shutdown operation: cpu0 cannot be disabled") + } + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + return ioutil.WriteFile(cpuPath, []byte{'0'}, 0644) } -func (m Mitigate) doReverse(data []byte) error { - set, err := newCPUSetFromPossible(data) +// IsVulnerable checks if a CPU is vulnerable to mds. +func (t Thread) IsVulnerable() bool { + _, ok := t.bugs[mds] + return ok +} + +// isActive checks if a CPU is active from /sys/devices/system/cpu/cpu{N}/online +// If the file does not exist (ioutil returns in error), we assume the CPU is on. +func (t Thread) isActive() bool { + cpuPath := fmt.Sprintf(cpuOnlineTemplate, t.processorNumber) + data, err := ioutil.ReadFile(cpuPath) if err != nil { - return err + return true } + return len(data) > 0 && data[0] != '0' +} - log.Infof("Reverse mitigate found the following CPUs...") - log.Infof("%s", set) +// SimilarTo checks family/model/bugs fields for equality of two +// processors. +func (t Thread) SimilarTo(other Thread) bool { + if t.vendorID != other.vendorID { + return false + } - enableList := set.getRemainingList() + if other.cpuFamily != t.cpuFamily { + return false + } - log.Infof("Enabling all CPUs...") - for _, t := range enableList { - log.Infof("Enabling thread: %s", t) - if m.dryRun { - continue - } - if err := t.enable(); err != nil { - return fmt.Errorf("error enabling thread: %s err: %v", t, err) + if other.model != t.model { + return false + } + + if len(other.bugs) != len(t.bugs) { + return false + } + + for bug := range t.bugs { + if _, ok := other.bugs[bug]; !ok { + return false } } - log.Infof("Enable successful.") - return nil + return true +} + +// parseProcessor grabs the processor field from /proc/cpuinfo output. +func parseProcessor(data string) (int64, error) { + return parseIntegerResult(data, processorKey) +} + +// parseVendorID grabs the vendor_id field from /proc/cpuinfo output. +func parseVendorID(data string) (string, error) { + return parseRegex(data, vendorIDKey, `[\w\d]+`) +} + +// parseCPUFamily grabs the cpu family field from /proc/cpuinfo output. +func parseCPUFamily(data string) (int64, error) { + return parseIntegerResult(data, cpuFamilyKey) +} + +// parseModel grabs the model field from /proc/cpuinfo output. +func parseModel(data string) (int64, error) { + return parseIntegerResult(data, modelKey) +} + +// parsePhysicalID parses the physical id field. +func parsePhysicalID(data string) (int64, error) { + return parseIntegerResult(data, physicalIDKey) +} + +// parseCoreID parses the core id field. +func parseCoreID(data string) (int64, error) { + return parseIntegerResult(data, coreIDKey) +} + +// parseBugs grabs the bugs field from /proc/cpuinfo output. +func parseBugs(data string) (map[string]struct{}, error) { + result, err := parseRegex(data, bugsKey, `[\d\w\s]*`) + if err != nil { + return nil, err + } + bugs := strings.Split(result, " ") + ret := make(map[string]struct{}, len(bugs)) + for _, bug := range bugs { + ret[bug] = struct{}{} + } + return ret, nil +} + +// parseIntegerResult parses fields expecting an integer. +func parseIntegerResult(data, key string) (int64, error) { + result, err := parseRegex(data, key, `\d+`) + if err != nil { + return 0, err + } + return strconv.ParseInt(result, 0, 64) +} + +// buildRegex builds a regex for parsing each CPU field. +func buildRegex(key, match string) *regexp.Regexp { + reg := fmt.Sprintf(`(?m)^%s\s*:\s*(.*)$`, key) + return regexp.MustCompile(reg) +} + +// parseRegex parses data with key inserted into a standard regex template. +func parseRegex(data, key, match string) (string, error) { + r := buildRegex(key, match) + matches := r.FindStringSubmatch(data) + if len(matches) < 2 { + return "", fmt.Errorf("failed to match key %q: %q", key, data) + } + return matches[1], nil } diff --git a/runsc/mitigate/mitigate_conf.go b/runsc/mitigate/mitigate_conf.go deleted file mode 100644 index ee326324b..000000000 --- a/runsc/mitigate/mitigate_conf.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2021 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package mitigate - -import ( - "gvisor.dev/gvisor/runsc/flag" -) - -type mitigate struct { -} - -// usage returns the usage string portion for the mitigate. -func (m mitigate) usage() string { return "" } - -// setFlags sets additional flags for the Mitigate command. -func (m mitigate) setFlags(f *flag.FlagSet) {} - -// execute performs additional parts of Execute for Mitigate. -func (m mitigate) execute(set cpuSet, dryrun bool) error { - return nil -} - -func (m mitigate) vulnerable(other thread) bool { - return other.isVulnerable() -} |