summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/arch/arch_aarch64.go2
-rw-r--r--pkg/sentry/arch/arch_arm64.go2
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go96
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go17
-rw-r--r--pkg/sentry/fsimpl/host/host.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD13
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go27
-rw-r--r--pkg/sentry/platform/kvm/kvm_const_arm64.go1
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go11
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.s56
-rw-r--r--pkg/sentry/platform/ring0/kernel_arm64.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go9
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go4
-rw-r--r--pkg/sentry/socket/netfilter/udp_matcher.go4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/aio.go9
-rw-r--r--pkg/sentry/vfs/file_description.go4
17 files changed, 185 insertions, 76 deletions
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go
index fd95eb2d2..0f433ee79 100644
--- a/pkg/sentry/arch/arch_aarch64.go
+++ b/pkg/sentry/arch/arch_aarch64.go
@@ -101,6 +101,8 @@ func NewFloatingPointData() *FloatingPointData {
// State contains the common architecture bits for aarch64 (the build tag of this
// file ensures it's only built on aarch64).
+//
+// +stateify savable
type State struct {
// The system registers.
Regs Registers
diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go
index cabbf60e0..550741d8c 100644
--- a/pkg/sentry/arch/arch_arm64.go
+++ b/pkg/sentry/arch/arch_arm64.go
@@ -73,6 +73,8 @@ const (
)
// context64 represents an ARM64 context.
+//
+// +stateify savable
type context64 struct {
State
sigFPState []aarch64FPState // fpstate to be restored on sigreturn.
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index a1405f7c3..83c24ec25 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -226,3 +226,99 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
}
return fd.VFSFileDescription(), nil
}
+
+// statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The
+// opts.Sync attribute is ignored since the synchronization is handled by the
+// FUSE server.
+func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx {
+ var stat linux.Statx
+ stat.Blksize = attr.BlkSize
+ stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor
+
+ rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev)
+ stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor
+
+ if mask&linux.STATX_MODE != 0 {
+ stat.Mode = uint16(attr.Mode)
+ }
+ if mask&linux.STATX_NLINK != 0 {
+ stat.Nlink = attr.Nlink
+ }
+ if mask&linux.STATX_UID != 0 {
+ stat.UID = attr.UID
+ }
+ if mask&linux.STATX_GID != 0 {
+ stat.GID = attr.GID
+ }
+ if mask&linux.STATX_ATIME != 0 {
+ stat.Atime = linux.StatxTimestamp{
+ Sec: int64(attr.Atime),
+ Nsec: attr.AtimeNsec,
+ }
+ }
+ if mask&linux.STATX_MTIME != 0 {
+ stat.Mtime = linux.StatxTimestamp{
+ Sec: int64(attr.Mtime),
+ Nsec: attr.MtimeNsec,
+ }
+ }
+ if mask&linux.STATX_CTIME != 0 {
+ stat.Ctime = linux.StatxTimestamp{
+ Sec: int64(attr.Ctime),
+ Nsec: attr.CtimeNsec,
+ }
+ }
+ if mask&linux.STATX_INO != 0 {
+ stat.Ino = attr.Ino
+ }
+ if mask&linux.STATX_SIZE != 0 {
+ stat.Size = attr.Size
+ }
+ if mask&linux.STATX_BLOCKS != 0 {
+ stat.Blocks = attr.Blocks
+ }
+ return stat
+}
+
+// Stat implements kernfs.Inode.Stat.
+func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ fusefs := fs.Impl().(*filesystem)
+ conn := fusefs.conn
+ task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
+ if task == nil {
+ log.Warningf("couldn't get kernel task from context")
+ return linux.Statx{}, syserror.EINVAL
+ }
+
+ var in linux.FUSEGetAttrIn
+ // We don't set any attribute in the request, because in VFS2 fstat(2) will
+ // finally be translated into vfs.FilesystemImpl.StatAt() (see
+ // pkg/sentry/syscalls/linux/vfs2/stat.go), resulting in the same flow
+ // as stat(2). Thus GetAttrFlags and Fh variable will never be used in VFS2.
+ req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.Ino(), linux.FUSE_GETATTR, &in)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+
+ res, err := conn.Call(task, req)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+ if err := res.Error(); err != nil {
+ return linux.Statx{}, err
+ }
+
+ var out linux.FUSEGetAttrOut
+ if err := res.UnmarshalPayload(&out); err != nil {
+ return linux.Statx{}, err
+ }
+
+ // Set all metadata into kernfs.InodeAttrs.
+ if err := i.SetStat(ctx, fs, creds, vfs.SetStatOptions{
+ Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, fusefs.devMinor),
+ }); err != nil {
+ return linux.Statx{}, err
+ }
+
+ return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil
+}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index eaef2594d..610a7ed78 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -844,6 +844,13 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
}
}
if rp.Done() {
+ // Reject attempts to open mount root directory with O_CREAT.
+ if mayCreate && rp.MustBeDir() {
+ return nil, syserror.EISDIR
+ }
+ if mustCreate {
+ return nil, syserror.EEXIST
+ }
return start.openLocked(ctx, rp, &opts)
}
@@ -856,6 +863,10 @@ afterTrailingSymlink:
if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
return nil, err
}
+ // Reject attempts to open directories with O_CREAT.
+ if mayCreate && rp.MustBeDir() {
+ return nil, syserror.EISDIR
+ }
// Determine whether or not we need to create a file.
parent.dirMu.Lock()
child, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
@@ -1071,10 +1082,8 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
creds := rp.Credentials()
name := rp.Component()
- // Filter file creation flags and O_LARGEFILE out; the create RPC already
- // has the semantics of O_CREAT|O_EXCL, while some servers will choke on
- // O_LARGEFILE.
- createFlags := p9.OpenFlags(opts.Flags &^ (vfs.FileCreationFlags | linux.O_LARGEFILE))
+ // We only want the access mode for creating the file.
+ createFlags := p9.OpenFlags(opts.Flags) & p9.OpenFlagsModeMask
fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
if err != nil {
dirfile.close(ctx)
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index bf922c566..bd6caba06 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -552,7 +552,7 @@ func (f *fileDescription) Allocate(ctx context.Context, mode, offset, length uin
return syserror.ESPIPE
}
- // TODO(gvisor.dev/issue/2923): Implement Allocate for non-pipe hostfds.
+ // TODO(gvisor.dev/issue/3589): Implement Allocate for non-pipe hostfds.
return syserror.EOPNOTSUPP
}
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index e73732a6b..5cd428d64 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -26,6 +26,17 @@ go_template_instance(
},
)
+go_template_instance(
+ name = "inode_refs",
+ out = "inode_refs.go",
+ package = "tmpfs",
+ prefix = "inode",
+ template = "//pkg/refs_vfs2:refs_template",
+ types = {
+ "T": "inode",
+ },
+)
+
go_library(
name = "tmpfs",
srcs = [
@@ -34,6 +45,7 @@ go_library(
"directory.go",
"filesystem.go",
"fstree.go",
+ "inode_refs.go",
"named_pipe.go",
"regular_file.go",
"socket_file.go",
@@ -47,6 +59,7 @@ go_library(
"//pkg/context",
"//pkg/fspath",
"//pkg/log",
+ "//pkg/refs",
"//pkg/safemem",
"//pkg/sentry/arch",
"//pkg/sentry/fs",
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 065812065..a4864df53 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -320,7 +320,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
fs.mu.Lock()
defer fs.mu.Unlock()
if rp.Done() {
- // Reject attempts to open directories with O_CREAT.
+ // Reject attempts to open mount root directory with O_CREAT.
if rp.MustBeDir() {
return nil, syserror.EISDIR
}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 4681a2f52..de2af6d01 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -285,13 +285,10 @@ type inode struct {
// fs is the owning filesystem. fs is immutable.
fs *filesystem
- // refs is a reference count. refs is accessed using atomic memory
- // operations.
- //
// A reference is held on all inodes as long as they are reachable in the
// filesystem tree, i.e. nlink is nonzero. This reference is dropped when
// nlink reaches 0.
- refs int64
+ refs inodeRefs
// xattrs implements extended attributes.
//
@@ -327,7 +324,6 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth
panic("file type is required in FileMode")
}
i.fs = fs
- i.refs = 1
i.mode = uint32(mode)
i.uid = uint32(kuid)
i.gid = uint32(kgid)
@@ -339,6 +335,7 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth
i.mtime = now
// i.nlink initialized by caller
i.impl = impl
+ i.refs.EnableLeakCheck()
}
// incLinksLocked increments i's link count.
@@ -369,25 +366,15 @@ func (i *inode) decLinksLocked(ctx context.Context) {
}
func (i *inode) incRef() {
- if atomic.AddInt64(&i.refs, 1) <= 1 {
- panic("tmpfs.inode.incRef() called without holding a reference")
- }
+ i.refs.IncRef()
}
func (i *inode) tryIncRef() bool {
- for {
- refs := atomic.LoadInt64(&i.refs)
- if refs == 0 {
- return false
- }
- if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) {
- return true
- }
- }
+ return i.refs.TryIncRef()
}
func (i *inode) decRef(ctx context.Context) {
- if refs := atomic.AddInt64(&i.refs, -1); refs == 0 {
+ i.refs.DecRef(func() {
i.watches.HandleDeletion(ctx)
if regFile, ok := i.impl.(*regularFile); ok {
// Release memory used by regFile to store data. Since regFile is
@@ -395,9 +382,7 @@ func (i *inode) decRef(ctx context.Context) {
// metadata.
regFile.data.DropAll(regFile.memFile)
}
- } else if refs < 0 {
- panic("tmpfs.inode.decRef() called without holding a reference")
- }
+ })
}
func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
diff --git a/pkg/sentry/platform/kvm/kvm_const_arm64.go b/pkg/sentry/platform/kvm/kvm_const_arm64.go
index fdc599477..9a7be3655 100644
--- a/pkg/sentry/platform/kvm/kvm_const_arm64.go
+++ b/pkg/sentry/platform/kvm/kvm_const_arm64.go
@@ -72,6 +72,7 @@ const (
_TCR_T0SZ_VA48 = 64 - 48 // VA=48
_TCR_T1SZ_VA48 = 64 - 48 // VA=48
+ _TCR_A1 = 1 << 22
_TCR_ASID16 = 1 << 36
_TCR_TBI0 = 1 << 37
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 307a7645f..905712076 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -61,7 +61,6 @@ func (c *vCPU) initArchState() error {
reg.addr = uint64(reflect.ValueOf(&data).Pointer())
regGet.addr = uint64(reflect.ValueOf(&dataGet).Pointer())
- vcpuInit.target = _KVM_ARM_TARGET_GENERIC_V8
vcpuInit.features[0] |= (1 << _KVM_ARM_VCPU_PSCI_0_2)
if _, _, errno := syscall.RawSyscall(
syscall.SYS_IOCTL,
@@ -272,8 +271,16 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
return c.fault(int32(syscall.SIGSEGV), info)
case ring0.Vector(bounce): // ring0.VirtualizationException
return usermem.NoAccess, platform.ErrContextInterrupt
+ case ring0.El0Sync_undef,
+ ring0.El1Sync_undef:
+ *info = arch.SignalInfo{
+ Signo: int32(syscall.SIGILL),
+ Code: 1, // ILL_ILLOPC (illegal opcode).
+ }
+ info.SetAddr(switchOpts.Registers.Pc) // Include address.
+ return usermem.AccessType{}, platform.ErrContextSignal
default:
- return usermem.NoAccess, platform.ErrContextSignal
+ panic(fmt.Sprintf("unexpected vector: 0x%x", vector))
}
}
diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s
index d8a7bc2f9..9d29b7168 100644
--- a/pkg/sentry/platform/ring0/entry_arm64.s
+++ b/pkg/sentry/platform/ring0/entry_arm64.s
@@ -364,6 +364,9 @@ TEXT ·Halt(SB),NOSPLIT,$0
CMP RSV_REG, R9
BNE mmio_exit
MOVD $0, CPU_REGISTERS+PTRACE_R9(RSV_REG)
+
+ // Flush dcache.
+ WORD $0xd5087e52 // DC CISW
mmio_exit:
// Disable fpsimd.
WORD $0xd5381041 // MRS CPACR_EL1, R1
@@ -381,6 +384,9 @@ mmio_exit:
MRS VBAR_EL1, R9
MOVD R0, 0x0(R9)
+ // Flush dcahce.
+ WORD $0xd5087e52 // DC CISW
+
RET
// HaltAndResume halts execution and point the pointer to the resume function.
@@ -414,6 +420,7 @@ TEXT ·Current(SB),NOSPLIT,$0-8
// Prepare the vcpu environment for container application.
TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
// Step1, save sentry context into memory.
+ MRS TPIDR_EL1, RSV_REG
REGISTERS_SAVE(RSV_REG, CPU_REGISTERS)
MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG)
@@ -425,34 +432,13 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
MOVD CPU_REGISTERS+PTRACE_R3(RSV_REG), R3
- // Step2, save SP_EL1, PSTATE into kernel temporary stack.
- // switch to temporary stack.
+ // Step2, switch to temporary stack.
LOAD_KERNEL_STACK(RSV_REG)
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
- SUB $STACK_FRAME_SIZE, RSP, RSP
- MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R11
- MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R12
- STP (R11, R12), 16*0(RSP)
-
- MOVD CPU_REGISTERS+PTRACE_R11(RSV_REG), R11
- MOVD CPU_REGISTERS+PTRACE_R12(RSV_REG), R12
- // Step3, test user pagetable.
- // If user pagetable is empty, trapped in el1_ia.
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
- SWITCH_TO_APP_PAGETABLE(RSV_REG)
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
- SWITCH_TO_KVM_PAGETABLE(RSV_REG)
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
- // If pagetable is not empty, recovery kernel temporary stack.
- ADD $STACK_FRAME_SIZE, RSP, RSP
-
- // Step4, load app context pointer.
+ // Step3, load app context pointer.
MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP
- // Step5, prepare the environment for container application.
+ // Step4, prepare the environment for container application.
// set sp_el0.
MOVD PTRACE_SP(RSV_REG_APP), R1
WORD $0xd5184101 //MSR R1, SP_EL0
@@ -480,13 +466,13 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
LDP 16*0(RSP), (RSV_REG, RSV_REG_APP)
ADD $STACK_FRAME_SIZE, RSP, RSP
+ ISB $15
ERET()
// kernelExitToEl1 is the entrypoint for sentry in guest_el1.
// Prepare the vcpu environment for sentry.
TEXT ·kernelExitToEl1(SB),NOSPLIT,$0
WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R1
WORD $0xd5184001 //MSR R1, SPSR_EL1
@@ -503,6 +489,8 @@ TEXT ·kernelExitToEl1(SB),NOSPLIT,$0
// Start is the CPU entrypoint.
TEXT ·Start(SB),NOSPLIT,$0
+ // Flush dcache.
+ WORD $0xd5087e52 // DC CISW
// Init.
MOVD $SCTLR_EL1_DEFAULT, R1
MSR R1, SCTLR_EL1
@@ -558,6 +546,7 @@ TEXT ·El1_sync(SB),NOSPLIT,$0
B el1_invalid
el1_da:
+el1_ia:
WORD $0xd538d092 //MRS TPIDR_EL1, R18
WORD $0xd538601a //MRS FAR_EL1, R26
@@ -570,9 +559,6 @@ el1_da:
B ·HaltAndResume(SB)
-el1_ia:
- B ·HaltAndResume(SB)
-
el1_sp_pc:
B ·Shutdown(SB)
@@ -644,9 +630,10 @@ el0_svc:
MOVD $Syscall, R3
MOVD R3, CPU_VECTOR_CODE(RSV_REG)
- B ·HaltAndResume(SB)
+ B ·kernelExitToEl1(SB)
el0_da:
+el0_ia:
WORD $0xd538d092 //MRS TPIDR_EL1, R18
WORD $0xd538601a //MRS FAR_EL1, R26
@@ -658,10 +645,10 @@ el0_da:
MOVD $PageFault, R3
MOVD R3, CPU_VECTOR_CODE(RSV_REG)
- B ·HaltAndResume(SB)
+ MRS ESR_EL1, R3
+ MOVD R3, CPU_ERROR_CODE(RSV_REG)
-el0_ia:
- B ·Shutdown(SB)
+ B ·kernelExitToEl1(SB)
el0_fpsimd_acc:
B ·Shutdown(SB)
@@ -676,7 +663,10 @@ el0_sp_pc:
B ·Shutdown(SB)
el0_undef:
- B ·Shutdown(SB)
+ MOVD $El0Sync_undef, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ B ·kernelExitToEl1(SB)
el0_dbg:
B ·Shutdown(SB)
diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go
index 42009dac0..d0afa1aaa 100644
--- a/pkg/sentry/platform/ring0/kernel_arm64.go
+++ b/pkg/sentry/platform/ring0/kernel_arm64.go
@@ -53,7 +53,6 @@ func IsCanonical(addr uint64) bool {
//go:nosplit
func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
- // Sanitize registers.
regs := switchOpts.Registers
regs.Pstate &= ^uint64(PsrFlagsClear)
@@ -69,6 +68,5 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
vector = c.vecCode
- // Perform the switch.
return
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
index 78510ebed..6409d1d91 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
@@ -72,13 +72,14 @@ const (
)
const (
- mtNormal = 0x4 << 2
+ mtDevicenGnRE = 0x1 << 2
+ mtNormal = 0x4 << 2
)
const (
executeDisable = xn
optionMask = 0xfff | 0xfff<<48
- protDefault = accessed | shared | mtNormal
+ protDefault = accessed | shared
)
// MapOpts are x86 options.
@@ -184,8 +185,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) {
if opts.User {
v |= user
+ v |= mtNormal
} else {
v = v &^ user
+ v |= mtDevicenGnRE // Strong order for the addresses with ring0.KernelStartAddress.
}
atomic.StoreUintptr((*uintptr)(p), v)
}
@@ -200,7 +203,7 @@ func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) {
// This should never happen.
panic("unaligned physical address!")
}
- v := addr | typeTable | protDefault
+ v := addr | typeTable | protDefault | mtNormal
atomic.StoreUintptr((*uintptr)(p), v)
}
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 4f98ee2d5..0bfd6c1f4 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -97,7 +97,7 @@ func (*TCPMatcher) Name() string {
// Match implements Matcher.Match.
func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
- netHeader := header.IPv4(pkt.NetworkHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
if netHeader.TransportProtocol() != header.TCPProtocolNumber {
return false, false
@@ -111,7 +111,7 @@ func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceN
return false, false
}
- tcpHeader := header.TCP(pkt.TransportHeader)
+ tcpHeader := header.TCP(pkt.TransportHeader().View())
if len(tcpHeader) < header.TCPMinimumSize {
// There's no valid TCP header here, so we drop the packet immediately.
return false, true
diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go
index 3f20fc891..7ed05461d 100644
--- a/pkg/sentry/socket/netfilter/udp_matcher.go
+++ b/pkg/sentry/socket/netfilter/udp_matcher.go
@@ -94,7 +94,7 @@ func (*UDPMatcher) Name() string {
// Match implements Matcher.Match.
func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) {
- netHeader := header.IPv4(pkt.NetworkHeader)
+ netHeader := header.IPv4(pkt.NetworkHeader().View())
// TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved
// into the stack.Check codepath as matchers are added.
@@ -110,7 +110,7 @@ func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceN
return false, false
}
- udpHeader := header.UDP(pkt.TransportHeader)
+ udpHeader := header.UDP(pkt.TransportHeader().View())
if len(udpHeader) < header.UDPMinimumSize {
// There's no valid UDP header here, so we drop the packet immediately.
return false, true
diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go
index 399b4f60c..42559bf69 100644
--- a/pkg/sentry/syscalls/linux/vfs2/aio.go
+++ b/pkg/sentry/syscalls/linux/vfs2/aio.go
@@ -144,6 +144,12 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user
func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr usermem.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, aioCtx *mm.AIOContext) kernel.AIOCallback {
return func(ctx context.Context) {
+ // Release references after completing the callback.
+ defer fd.DecRef(ctx)
+ if eventFD != nil {
+ defer eventFD.DecRef(ctx)
+ }
+
if aioCtx.Dead() {
aioCtx.CancelPendingRequest()
return
@@ -169,8 +175,6 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use
ev.Result = -int64(kernel.ExtractErrno(err, 0))
}
- fd.DecRef(ctx)
-
// Queue the result for delivery.
aioCtx.FinishRequest(ev)
@@ -179,7 +183,6 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use
// wake up.
if eventFD != nil {
eventFD.Impl().(*eventfd.EventFileDescription).Signal(1)
- eventFD.DecRef(ctx)
}
}
}
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index d3c1197e3..dcafffe57 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -289,7 +289,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede
if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO {
return syserror.EINVAL
}
- // TODO(jamieliu): FileDescriptionImpl.SetOAsync()?
+ // TODO(gvisor.dev/issue/1035): FileDescriptionImpl.SetOAsync()?
const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK
fd.flagsMu.Lock()
if fd.asyncHandler != nil {
@@ -301,7 +301,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede
fd.asyncHandler.Unregister(fd)
}
}
- fd.statusFlags = (oldFlags &^ settableFlags) | (flags & settableFlags)
+ atomic.StoreUint32(&fd.statusFlags, (oldFlags&^settableFlags)|(flags&settableFlags))
fd.flagsMu.Unlock()
return nil
}