summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/host/host.go34
-rw-r--r--pkg/sentry/fsimpl/tmpfs/benchmark_test.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD3
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids.go5
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go32
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s45
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86.go20
-rw-r--r--pkg/sentry/syscalls/linux/sys_rlimit.go2
-rw-r--r--pkg/sentry/watchdog/watchdog.go13
11 files changed, 115 insertions, 47 deletions
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 97fa7f7ab..fe14476f1 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -94,7 +94,6 @@ func ImportFD(mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, err
isTTY: isTTY,
canMap: canMap(uint32(fileType)),
ino: fs.NextIno(),
- mode: fileMode,
// For simplicity, set offset to 0. Technically, we should use the existing
// offset on the host if the file is seekable.
offset: 0,
@@ -149,20 +148,6 @@ type inode struct {
// This field is initialized at creation time and is immutable.
ino uint64
- // modeMu protects mode.
- modeMu sync.Mutex
-
- // mode is a cached version of the file mode on the host. Note that it may
- // become out of date if the mode is changed on the host, e.g. with chmod.
- //
- // Generally, it is better to retrieve the mode from the host through an
- // fstat syscall. We only use this value in inode.Mode(), which cannot
- // return an error, if the syscall to host fails.
- //
- // FIXME(b/152294168): Plumb error into Inode.Mode() return value so we
- // can get rid of this.
- mode linux.FileMode
-
// offsetMu protects offset.
offsetMu sync.Mutex
@@ -195,10 +180,11 @@ func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, a
// Mode implements kernfs.Inode.
func (i *inode) Mode() linux.FileMode {
mode, _, _, err := i.getPermissions()
+ // Retrieving the mode from the host fd using fstat(2) should not fail.
+ // If the syscall does not succeed, something is fundamentally wrong.
if err != nil {
- return i.mode
+ panic(fmt.Sprintf("failed to retrieve mode from host fd %d: %v", i.hostFD, err))
}
-
return linux.FileMode(mode)
}
@@ -208,11 +194,6 @@ func (i *inode) getPermissions() (linux.FileMode, auth.KUID, auth.KGID, error) {
if err := syscall.Fstat(i.hostFD, &s); err != nil {
return 0, 0, 0, err
}
-
- // Update cached mode.
- i.modeMu.Lock()
- i.mode = linux.FileMode(s.Mode)
- i.modeMu.Unlock()
return linux.FileMode(s.Mode), auth.KUID(s.Uid), auth.KGID(s.Gid), nil
}
@@ -292,12 +273,6 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro
ls.Ino = i.ino
}
- // Update cached mode.
- if (mask&linux.STATX_TYPE != 0) && (mask&linux.STATX_MODE != 0) {
- i.modeMu.Lock()
- i.mode = linux.FileMode(s.Mode)
- i.modeMu.Unlock()
- }
return ls, nil
}
@@ -364,9 +339,6 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
if err := syscall.Fchmod(i.hostFD, uint32(s.Mode)); err != nil {
return err
}
- i.modeMu.Lock()
- i.mode = linux.FileMode(s.Mode)
- i.modeMu.Unlock()
}
if m&linux.STATX_SIZE != 0 {
if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil {
diff --git a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
index 383133e44..651912169 100644
--- a/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/benchmark_test.go
@@ -168,7 +168,7 @@ func BenchmarkVFS1TmpfsStat(b *testing.B) {
}
}
-func BenchmarkVFS2MemfsStat(b *testing.B) {
+func BenchmarkVFS2TmpfsStat(b *testing.B) {
for _, depth := range depths {
b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
ctx := contexttest.Context(b)
@@ -362,7 +362,7 @@ func BenchmarkVFS1TmpfsMountStat(b *testing.B) {
}
}
-func BenchmarkVFS2MemfsMountStat(b *testing.B) {
+func BenchmarkVFS2TmpfsMountStat(b *testing.B) {
for _, depth := range depths {
b.Run(fmt.Sprintf("%d", depth), func(b *testing.B) {
ctx := contexttest.Context(b)
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 660f5a29b..452c4e2e0 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -148,7 +148,7 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa
if !dir && rp.MustBeDir() {
return syserror.ENOENT
}
- // In memfs, the only way to cause a dentry to be disowned is by removing
+ // In tmpfs, the only way to cause a dentry to be disowned is by removing
// it from the filesystem, so this check is equivalent to checking if
// parent has been removed.
if parent.vfsd.IsDisowned() {
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index a59b24d45..82c709b43 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -247,7 +247,7 @@ func (i *inode) incLinksLocked() {
panic("tmpfs.inode.incLinksLocked() called with no existing links")
}
if i.nlink == maxLinks {
- panic("memfs.inode.incLinksLocked() called with maximum link count")
+ panic("tmpfs.inode.incLinksLocked() called with maximum link count")
}
atomic.AddUint32(&i.nlink, 1)
}
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
index 581841555..16d5f478b 100644
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -81,6 +81,9 @@ go_library(
"pagetables_arm64.go",
"pagetables_x86.go",
"pcids.go",
+ "pcids_aarch64.go",
+ "pcids_aarch64.s",
+ "pcids_x86.go",
"walker_amd64.go",
"walker_arm64.go",
"walker_empty.go",
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids.go b/pkg/sentry/platform/ring0/pagetables/pcids.go
index 9206030bf..964496aac 100644
--- a/pkg/sentry/platform/ring0/pagetables/pcids.go
+++ b/pkg/sentry/platform/ring0/pagetables/pcids.go
@@ -18,9 +18,6 @@ import (
"gvisor.dev/gvisor/pkg/sync"
)
-// limitPCID is the number of valid PCIDs.
-const limitPCID = 4096
-
// PCIDs is a simple PCID database.
//
// This is not protected by locks and is thus suitable for use only with a
@@ -44,7 +41,7 @@ type PCIDs struct {
//
// Nil is returned iff the start and size are out of range.
func NewPCIDs(start, size uint16) *PCIDs {
- if start+uint16(size) >= limitPCID {
+ if start+uint16(size) > limitPCID {
return nil // See comment.
}
p := &PCIDs{
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
new file mode 100644
index 000000000..fbfd41d83
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
@@ -0,0 +1,32 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package pagetables
+
+// limitPCID is the maximum value of PCIDs.
+//
+// In VMSAv8-64, the PCID(ASID) size is an IMPLEMENTATION DEFINED choice
+// of 8 bits or 16 bits, and ID_AA64MMFR0_EL1.ASIDBits identifies the
+// supported size. When an implementation supports a 16-bit ASID, TCR_ELx.AS
+// selects whether the top 8 bits of the ASID are used.
+var limitPCID uint16
+
+// GetASIDBits return the system ASID bits, 8 or 16 bits.
+func GetASIDBits() uint8
+
+func init() {
+ limitPCID = uint16(1)<<GetASIDBits() - 1
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
new file mode 100644
index 000000000..e9d62d768
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
@@ -0,0 +1,45 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+#include "funcdata.h"
+#include "textflag.h"
+
+#define ID_AA64MMFR0_ASIDBITS_SHIFT 4
+#define ID_AA64MMFR0_ASIDBITS_16 2
+#define TCR_EL1_AS_BIT 36
+
+// GetASIDBits return the system ASID bits, 8 or 16 bits.
+//
+// func GetASIDBits() uint8
+TEXT ·GetASIDBits(SB),NOSPLIT,$0-1
+ // First, check whether 16bits ASID is supported.
+ // ID_AA64MMFR0_EL1.ASIDBITS[7:4] == 0010.
+ WORD $0xd5380700 // MRS ID_AA64MMFR0_EL1, R0
+ UBFX $ID_AA64MMFR0_ASIDBITS_SHIFT, R0, $4, R0
+ CMPW $ID_AA64MMFR0_ASIDBITS_16, R0
+ BNE bits_8
+
+ // Second, check whether 16bits ASID is enabled.
+ // TCR_EL1.AS[36] == 1.
+ WORD $0xd5382040 // MRS TCR_EL1, R0
+ TBZ $TCR_EL1_AS_BIT, R0, bits_8
+ MOVD $16, R0
+ B done
+bits_8:
+ MOVD $8, R0
+done:
+ MOVB R0, ret+0(FP)
+ RET
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
new file mode 100644
index 000000000..91fc5e8dd
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
@@ -0,0 +1,20 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package pagetables
+
+// limitPCID is the maximum value of valid PCIDs.
+const limitPCID = 4095
diff --git a/pkg/sentry/syscalls/linux/sys_rlimit.go b/pkg/sentry/syscalls/linux/sys_rlimit.go
index e08c333d6..d5d5b6959 100644
--- a/pkg/sentry/syscalls/linux/sys_rlimit.go
+++ b/pkg/sentry/syscalls/linux/sys_rlimit.go
@@ -197,7 +197,7 @@ func Prlimit64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
// saved set user IDs of the target process must match the real user ID of
// the caller and the real, effective, and saved set group IDs of the
// target process must match the real group ID of the caller."
- if !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) {
+ if ot != t && !t.HasCapabilityIn(linux.CAP_SYS_RESOURCE, t.PIDNamespace().UserNamespace()) {
cred, tcred := t.Credentials(), ot.Credentials()
if cred.RealKUID != tcred.RealKUID ||
cred.RealKUID != tcred.EffectiveKUID ||
diff --git a/pkg/sentry/watchdog/watchdog.go b/pkg/sentry/watchdog/watchdog.go
index f7d6009a0..fcc46420f 100644
--- a/pkg/sentry/watchdog/watchdog.go
+++ b/pkg/sentry/watchdog/watchdog.go
@@ -319,8 +319,8 @@ func (w *Watchdog) report(offenders map[*kernel.Task]*offender, newTaskFound boo
// Dump stack only if a new task is detected or if it sometime has
// passed since the last time a stack dump was generated.
- skipStack := newTaskFound || time.Since(w.lastStackDump) >= stackDumpSameTaskPeriod
- w.doAction(w.TaskTimeoutAction, skipStack, &buf)
+ showStack := newTaskFound || time.Since(w.lastStackDump) >= stackDumpSameTaskPeriod
+ w.doAction(w.TaskTimeoutAction, showStack, &buf)
}
func (w *Watchdog) reportStuckWatchdog() {
@@ -329,16 +329,15 @@ func (w *Watchdog) reportStuckWatchdog() {
w.doAction(w.TaskTimeoutAction, false, &buf)
}
-// doAction will take the given action. If the action is LogWarnind and
-// skipStack is true, then the stack printing will be skipped.
-func (w *Watchdog) doAction(action Action, skipStack bool, msg *bytes.Buffer) {
+// doAction will take the given action. If the action is LogWarning and
+// showStack is false, then the stack printing will be skipped.
+func (w *Watchdog) doAction(action Action, showStack bool, msg *bytes.Buffer) {
switch action {
case LogWarning:
- if skipStack {
+ if !showStack {
msg.WriteString("\n...[stack dump skipped]...")
log.Warningf(msg.String())
return
-
}
log.TracebackAll(msg.String())
w.lastStackDump = time.Now()