diff options
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 34 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/benchmark_test.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/BUILD | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids.go | 5 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go | 32 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s | 45 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_x86.go | 20 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_rlimit.go | 2 | ||||
-rw-r--r-- | pkg/sentry/watchdog/watchdog.go | 13 |
11 files changed, 115 insertions, 47 deletions
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index 97fa7f7ab..fe14476f1 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -94,7 +94,6 @@ func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, err isTTY: isTTY, canMap: canMap(uint32(fileType)), ino: fs.NextIno(), - mode: fileMode, // For simplicity, set offset to 0. Technically, we should use the existing // offset on the host if the file is seekable. offset: 0, @@ -149,20 +148,6 @@ type inode struct { // This field is initialized at creation time and is immutable. ino uint64 - // modeMu protects mode. - modeMu sync.Mutex - - // mode is a cached version of the file mode on the host. Note that it may - // become out of date if the mode is changed on the host, e.g. with chmod. - // - // Generally, it is better to retrieve the mode from the host through an - // fstat syscall. We only use this value in inode.Mode(), which cannot - // return an error, if the syscall to host fails. - // - // FIXME(b/152294168): Plumb error into Inode.Mode() return value so we - // can get rid of this. - mode linux.FileMode - // offsetMu protects offset. offsetMu sync.Mutex @@ -195,10 +180,11 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a // Mode implements kernfs.Inode. func (i *inode) Mode() linux.FileMode { mode, _, _, err := i.getPermissions() + // Retrieving the mode from the host fd using fstat(2) should not fail. + // If the syscall does not succeed, something is fundamentally wrong. if err != nil { - return i.mode + panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err)) } - return linux.FileMode(mode) } @@ -208,11 +194,6 @@ func (i *inode) getPermissions() (linux.FileMode, auth.KUID, auth.KGID, error) { if err := syscall.Fstat(i.hostFD, &s); err != nil { return 0, 0, 0, err } - - // Update cached mode. - i.modeMu.Lock() - i.mode = linux.FileMode(s.Mode) - i.modeMu.Unlock() return linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid), nil } @@ -292,12 +273,6 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro ls.Ino = i.ino } - // Update cached mode. - if (mask&linux.STATX_TYPE != 0) && (mask&linux.STATX_MODE != 0) { - i.modeMu.Lock() - i.mode = linux.FileMode(s.Mode) - i.modeMu.Unlock() - } return ls, nil } @@ -364,9 +339,6 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre if err := syscall.Fchmod(i.hostFD, uint32(s.Mode)); err != nil { return err } - i.modeMu.Lock() - i.mode = linux.FileMode(s.Mode) - i.modeMu.Unlock() } if m&linux.STATX_SIZE != 0 { if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil { diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go index 383133e44..651912169 100644 --- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go @@ -168,7 +168,7 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) { } } -func BenchmarkVFS2MemfsStat(b *testing.B) { +func BenchmarkVFS2TmpfsStat(b *testing.B) { for _, depth := range depths { b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { ctx := contexttest.Context(b) @@ -362,7 +362,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) { } } -func BenchmarkVFS2MemfsMountStat(b *testing.B) { +func BenchmarkVFS2TmpfsMountStat(b *testing.B) { for _, depth := range depths { b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) { ctx := contexttest.Context(b) diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 660f5a29b..452c4e2e0 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -148,7 +148,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa if !dir && rp.MustBeDir() { return syserror.ENOENT } - // In memfs, the only way to cause a dentry to be disowned is by removing + // In tmpfs, the only way to cause a dentry to be disowned is by removing // it from the filesystem, so this check is equivalent to checking if // parent has been removed. if parent.vfsd.IsDisowned() { diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index a59b24d45..82c709b43 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -247,7 +247,7 @@ func (i *inode) incLinksLocked() { panic("tmpfs.inode.incLinksLocked() called with no existing links") } if i.nlink == maxLinks { - panic("memfs.inode.incLinksLocked() called with maximum link count") + panic("tmpfs.inode.incLinksLocked() called with maximum link count") } atomic.AddUint32(&i.nlink, 1) } diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 581841555..16d5f478b 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -81,6 +81,9 @@ go_library( "pagetables_arm64.go", "pagetables_x86.go", "pcids.go", + "pcids_aarch64.go", + "pcids_aarch64.s", + "pcids_x86.go", "walker_amd64.go", "walker_arm64.go", "walker_empty.go", diff --git a/pkg/sentry/platform/ring0/pagetables/pcids.go b/pkg/sentry/platform/ring0/pagetables/pcids.go index 9206030bf..964496aac 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids.go +++ b/pkg/sentry/platform/ring0/pagetables/pcids.go @@ -18,9 +18,6 @@ import ( "gvisor.dev/gvisor/pkg/sync" ) -// limitPCID is the number of valid PCIDs. -const limitPCID = 4096 - // PCIDs is a simple PCID database. // // This is not protected by locks and is thus suitable for use only with a @@ -44,7 +41,7 @@ type PCIDs struct { // // Nil is returned iff the start and size are out of range. func NewPCIDs(start, size uint16) *PCIDs { - if start+uint16(size) >= limitPCID { + if start+uint16(size) > limitPCID { return nil // See comment. } p := &PCIDs{ diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go new file mode 100644 index 000000000..fbfd41d83 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go @@ -0,0 +1,32 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package pagetables + +// limitPCID is the maximum value of PCIDs. +// +// In VMSAv8-64, the PCID(ASID) size is an IMPLEMENTATION DEFINED choice +// of 8 bits or 16 bits, and ID_AA64MMFR0_EL1.ASIDBits identifies the +// supported size. When an implementation supports a 16-bit ASID, TCR_ELx.AS +// selects whether the top 8 bits of the ASID are used. +var limitPCID uint16 + +// GetASIDBits return the system ASID bits, 8 or 16 bits. +func GetASIDBits() uint8 + +func init() { + limitPCID = uint16(1)<<GetASIDBits() - 1 +} diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s new file mode 100644 index 000000000..e9d62d768 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s @@ -0,0 +1,45 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +#include "funcdata.h" +#include "textflag.h" + +#define ID_AA64MMFR0_ASIDBITS_SHIFT 4 +#define ID_AA64MMFR0_ASIDBITS_16 2 +#define TCR_EL1_AS_BIT 36 + +// GetASIDBits return the system ASID bits, 8 or 16 bits. +// +// func GetASIDBits() uint8 +TEXT ·GetASIDBits(SB),NOSPLIT,$0-1 + // First, check whether 16bits ASID is supported. + // ID_AA64MMFR0_EL1.ASIDBITS[7:4] == 0010. + WORD $0xd5380700 // MRS ID_AA64MMFR0_EL1, R0 + UBFX $ID_AA64MMFR0_ASIDBITS_SHIFT, R0, $4, R0 + CMPW $ID_AA64MMFR0_ASIDBITS_16, R0 + BNE bits_8 + + // Second, check whether 16bits ASID is enabled. + // TCR_EL1.AS[36] == 1. + WORD $0xd5382040 // MRS TCR_EL1, R0 + TBZ $TCR_EL1_AS_BIT, R0, bits_8 + MOVD $16, R0 + B done +bits_8: + MOVD $8, R0 +done: + MOVB R0, ret+0(FP) + RET diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go new file mode 100644 index 000000000..91fc5e8dd --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go @@ -0,0 +1,20 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build i386 amd64 + +package pagetables + +// limitPCID is the maximum value of valid PCIDs. +const limitPCID = 4095 diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go index e08c333d6..d5d5b6959 100644 --- a/pkg/sentry/syscalls/linux/sys_rlimit.go +++ b/pkg/sentry/syscalls/linux/sys_rlimit.go @@ -197,7 +197,7 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys // saved set user IDs of the target process must match the real user ID of // the caller and the real, effective, and saved set group IDs of the // target process must match the real group ID of the caller." - if !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) { + if ot != t && !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) { cred, tcred := t.Credentials(), ot.Credentials() if cred.RealKUID != tcred.RealKUID || cred.RealKUID != tcred.EffectiveKUID || diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go index f7d6009a0..fcc46420f 100644 --- a/pkg/sentry/watchdog/watchdog.go +++ b/pkg/sentry/watchdog/watchdog.go @@ -319,8 +319,8 @@ func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound boo // Dump stack only if a new task is detected or if it sometime has // passed since the last time a stack dump was generated. - skipStack := newTaskFound || time.Since(w.lastStackDump) >= stackDumpSameTaskPeriod - w.doAction(w.TaskTimeoutAction, skipStack, &buf) + showStack := newTaskFound || time.Since(w.lastStackDump) >= stackDumpSameTaskPeriod + w.doAction(w.TaskTimeoutAction, showStack, &buf) } func (w *Watchdog) reportStuckWatchdog() { @@ -329,16 +329,15 @@ func (w *Watchdog) reportStuckWatchdog() { w.doAction(w.TaskTimeoutAction, false, &buf) } -// doAction will take the given action. If the action is LogWarnind and -// skipStack is true, then the stack printing will be skipped. -func (w *Watchdog) doAction(action Action, skipStack bool, msg *bytes.Buffer) { +// doAction will take the given action. If the action is LogWarning and +// showStack is false, then the stack printing will be skipped. +func (w *Watchdog) doAction(action Action, showStack bool, msg *bytes.Buffer) { switch action { case LogWarning: - if skipStack { + if !showStack { msg.WriteString("\n...[stack dump skipped]...") log.Warningf(msg.String()) return - } log.TracebackAll(msg.String()) w.lastStackDump = time.Now() |