diff options
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/arch/arch_aarch64.go | 2 | ||||
-rw-r--r-- | pkg/sentry/arch/arch_arm64.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/fuse/fusefs.go | 96 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/filesystem.go | 17 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/host/host.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/BUILD | 13 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 27 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_const_arm64.go | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 11 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/entry_arm64.s | 56 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_arm64.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go | 9 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/tcp_matcher.go | 4 | ||||
-rw-r--r-- | pkg/sentry/socket/netfilter/udp_matcher.go | 4 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/aio.go | 9 | ||||
-rw-r--r-- | pkg/sentry/vfs/file_description.go | 4 |
17 files changed, 185 insertions, 76 deletions
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go index fd95eb2d2..0f433ee79 100644 --- a/pkg/sentry/arch/arch_aarch64.go +++ b/pkg/sentry/arch/arch_aarch64.go @@ -101,6 +101,8 @@ func NewFloatingPointData() *FloatingPointData { // State contains the common architecture bits for aarch64 (the build tag of this // file ensures it's only built on aarch64). +// +// +stateify savable type State struct { // The system registers. Regs Registers diff --git a/pkg/sentry/arch/arch_arm64.go b/pkg/sentry/arch/arch_arm64.go index cabbf60e0..550741d8c 100644 --- a/pkg/sentry/arch/arch_arm64.go +++ b/pkg/sentry/arch/arch_arm64.go @@ -73,6 +73,8 @@ const ( ) // context64 represents an ARM64 context. +// +// +stateify savable type context64 struct { State sigFPState []aarch64FPState // fpstate to be restored on sigreturn. diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index a1405f7c3..83c24ec25 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -226,3 +226,99 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr } return fd.VFSFileDescription(), nil } + +// statFromFUSEAttr makes attributes from linux.FUSEAttr to linux.Statx. The +// opts.Sync attribute is ignored since the synchronization is handled by the +// FUSE server. +func statFromFUSEAttr(attr linux.FUSEAttr, mask, devMinor uint32) linux.Statx { + var stat linux.Statx + stat.Blksize = attr.BlkSize + stat.DevMajor, stat.DevMinor = linux.UNNAMED_MAJOR, devMinor + + rdevMajor, rdevMinor := linux.DecodeDeviceID(attr.Rdev) + stat.RdevMajor, stat.RdevMinor = uint32(rdevMajor), rdevMinor + + if mask&linux.STATX_MODE != 0 { + stat.Mode = uint16(attr.Mode) + } + if mask&linux.STATX_NLINK != 0 { + stat.Nlink = attr.Nlink + } + if mask&linux.STATX_UID != 0 { + stat.UID = attr.UID + } + if mask&linux.STATX_GID != 0 { + stat.GID = attr.GID + } + if mask&linux.STATX_ATIME != 0 { + stat.Atime = linux.StatxTimestamp{ + Sec: int64(attr.Atime), + Nsec: attr.AtimeNsec, + } + } + if mask&linux.STATX_MTIME != 0 { + stat.Mtime = linux.StatxTimestamp{ + Sec: int64(attr.Mtime), + Nsec: attr.MtimeNsec, + } + } + if mask&linux.STATX_CTIME != 0 { + stat.Ctime = linux.StatxTimestamp{ + Sec: int64(attr.Ctime), + Nsec: attr.CtimeNsec, + } + } + if mask&linux.STATX_INO != 0 { + stat.Ino = attr.Ino + } + if mask&linux.STATX_SIZE != 0 { + stat.Size = attr.Size + } + if mask&linux.STATX_BLOCKS != 0 { + stat.Blocks = attr.Blocks + } + return stat +} + +// Stat implements kernfs.Inode.Stat. +func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { + fusefs := fs.Impl().(*filesystem) + conn := fusefs.conn + task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx) + if task == nil { + log.Warningf("couldn't get kernel task from context") + return linux.Statx{}, syserror.EINVAL + } + + var in linux.FUSEGetAttrIn + // We don't set any attribute in the request, because in VFS2 fstat(2) will + // finally be translated into vfs.FilesystemImpl.StatAt() (see + // pkg/sentry/syscalls/linux/vfs2/stat.go), resulting in the same flow + // as stat(2). Thus GetAttrFlags and Fh variable will never be used in VFS2. + req, err := conn.NewRequest(creds, uint32(task.ThreadID()), i.Ino(), linux.FUSE_GETATTR, &in) + if err != nil { + return linux.Statx{}, err + } + + res, err := conn.Call(task, req) + if err != nil { + return linux.Statx{}, err + } + if err := res.Error(); err != nil { + return linux.Statx{}, err + } + + var out linux.FUSEGetAttrOut + if err := res.UnmarshalPayload(&out); err != nil { + return linux.Statx{}, err + } + + // Set all metadata into kernfs.InodeAttrs. + if err := i.SetStat(ctx, fs, creds, vfs.SetStatOptions{ + Stat: statFromFUSEAttr(out.Attr, linux.STATX_ALL, fusefs.devMinor), + }); err != nil { + return linux.Statx{}, err + } + + return statFromFUSEAttr(out.Attr, opts.Mask, fusefs.devMinor), nil +} diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index eaef2594d..610a7ed78 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -844,6 +844,13 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf } } if rp.Done() { + // Reject attempts to open mount root directory with O_CREAT. + if mayCreate && rp.MustBeDir() { + return nil, syserror.EISDIR + } + if mustCreate { + return nil, syserror.EEXIST + } return start.openLocked(ctx, rp, &opts) } @@ -856,6 +863,10 @@ afterTrailingSymlink: if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { return nil, err } + // Reject attempts to open directories with O_CREAT. + if mayCreate && rp.MustBeDir() { + return nil, syserror.EISDIR + } // Determine whether or not we need to create a file. parent.dirMu.Lock() child, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds) @@ -1071,10 +1082,8 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } creds := rp.Credentials() name := rp.Component() - // Filter file creation flags and O_LARGEFILE out; the create RPC already - // has the semantics of O_CREAT|O_EXCL, while some servers will choke on - // O_LARGEFILE. - createFlags := p9.OpenFlags(opts.Flags &^ (vfs.FileCreationFlags | linux.O_LARGEFILE)) + // We only want the access mode for creating the file. + createFlags := p9.OpenFlags(opts.Flags) & p9.OpenFlagsModeMask fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID)) if err != nil { dirfile.close(ctx) diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index bf922c566..bd6caba06 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -552,7 +552,7 @@ func (f *fileDescription) Allocate(ctx context.Context, mode, offset, length uin return syserror.ESPIPE } - // TODO(gvisor.dev/issue/2923): Implement Allocate for non-pipe hostfds. + // TODO(gvisor.dev/issue/3589): Implement Allocate for non-pipe hostfds. return syserror.EOPNOTSUPP } diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index e73732a6b..5cd428d64 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -26,6 +26,17 @@ go_template_instance( }, ) +go_template_instance( + name = "inode_refs", + out = "inode_refs.go", + package = "tmpfs", + prefix = "inode", + template = "//pkg/refs_vfs2:refs_template", + types = { + "T": "inode", + }, +) + go_library( name = "tmpfs", srcs = [ @@ -34,6 +45,7 @@ go_library( "directory.go", "filesystem.go", "fstree.go", + "inode_refs.go", "named_pipe.go", "regular_file.go", "socket_file.go", @@ -47,6 +59,7 @@ go_library( "//pkg/context", "//pkg/fspath", "//pkg/log", + "//pkg/refs", "//pkg/safemem", "//pkg/sentry/arch", "//pkg/sentry/fs", diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 065812065..a4864df53 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -320,7 +320,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf fs.mu.Lock() defer fs.mu.Unlock() if rp.Done() { - // Reject attempts to open directories with O_CREAT. + // Reject attempts to open mount root directory with O_CREAT. if rp.MustBeDir() { return nil, syserror.EISDIR } diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 4681a2f52..de2af6d01 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -285,13 +285,10 @@ type inode struct { // fs is the owning filesystem. fs is immutable. fs *filesystem - // refs is a reference count. refs is accessed using atomic memory - // operations. - // // A reference is held on all inodes as long as they are reachable in the // filesystem tree, i.e. nlink is nonzero. This reference is dropped when // nlink reaches 0. - refs int64 + refs inodeRefs // xattrs implements extended attributes. // @@ -327,7 +324,6 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth panic("file type is required in FileMode") } i.fs = fs - i.refs = 1 i.mode = uint32(mode) i.uid = uint32(kuid) i.gid = uint32(kgid) @@ -339,6 +335,7 @@ func (i *inode) init(impl interface{}, fs *filesystem, kuid auth.KUID, kgid auth i.mtime = now // i.nlink initialized by caller i.impl = impl + i.refs.EnableLeakCheck() } // incLinksLocked increments i's link count. @@ -369,25 +366,15 @@ func (i *inode) decLinksLocked(ctx context.Context) { } func (i *inode) incRef() { - if atomic.AddInt64(&i.refs, 1) <= 1 { - panic("tmpfs.inode.incRef() called without holding a reference") - } + i.refs.IncRef() } func (i *inode) tryIncRef() bool { - for { - refs := atomic.LoadInt64(&i.refs) - if refs == 0 { - return false - } - if atomic.CompareAndSwapInt64(&i.refs, refs, refs+1) { - return true - } - } + return i.refs.TryIncRef() } func (i *inode) decRef(ctx context.Context) { - if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { + i.refs.DecRef(func() { i.watches.HandleDeletion(ctx) if regFile, ok := i.impl.(*regularFile); ok { // Release memory used by regFile to store data. Since regFile is @@ -395,9 +382,7 @@ func (i *inode) decRef(ctx context.Context) { // metadata. regFile.data.DropAll(regFile.memFile) } - } else if refs < 0 { - panic("tmpfs.inode.decRef() called without holding a reference") - } + }) } func (i *inode) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error { diff --git a/pkg/sentry/platform/kvm/kvm_const_arm64.go b/pkg/sentry/platform/kvm/kvm_const_arm64.go index fdc599477..9a7be3655 100644 --- a/pkg/sentry/platform/kvm/kvm_const_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_const_arm64.go @@ -72,6 +72,7 @@ const ( _TCR_T0SZ_VA48 = 64 - 48 // VA=48 _TCR_T1SZ_VA48 = 64 - 48 // VA=48 + _TCR_A1 = 1 << 22 _TCR_ASID16 = 1 << 36 _TCR_TBI0 = 1 << 37 diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 307a7645f..905712076 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -61,7 +61,6 @@ func (c *vCPU) initArchState() error { reg.addr = uint64(reflect.ValueOf(&data).Pointer()) regGet.addr = uint64(reflect.ValueOf(&dataGet).Pointer()) - vcpuInit.target = _KVM_ARM_TARGET_GENERIC_V8 vcpuInit.features[0] |= (1 << _KVM_ARM_VCPU_PSCI_0_2) if _, _, errno := syscall.RawSyscall( syscall.SYS_IOCTL, @@ -272,8 +271,16 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) return c.fault(int32(syscall.SIGSEGV), info) case ring0.Vector(bounce): // ring0.VirtualizationException return usermem.NoAccess, platform.ErrContextInterrupt + case ring0.El0Sync_undef, + ring0.El1Sync_undef: + *info = arch.SignalInfo{ + Signo: int32(syscall.SIGILL), + Code: 1, // ILL_ILLOPC (illegal opcode). + } + info.SetAddr(switchOpts.Registers.Pc) // Include address. + return usermem.AccessType{}, platform.ErrContextSignal default: - return usermem.NoAccess, platform.ErrContextSignal + panic(fmt.Sprintf("unexpected vector: 0x%x", vector)) } } diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s index d8a7bc2f9..9d29b7168 100644 --- a/pkg/sentry/platform/ring0/entry_arm64.s +++ b/pkg/sentry/platform/ring0/entry_arm64.s @@ -364,6 +364,9 @@ TEXT ·Halt(SB),NOSPLIT,$0 CMP RSV_REG, R9 BNE mmio_exit MOVD $0, CPU_REGISTERS+PTRACE_R9(RSV_REG) + + // Flush dcache. + WORD $0xd5087e52 // DC CISW mmio_exit: // Disable fpsimd. WORD $0xd5381041 // MRS CPACR_EL1, R1 @@ -381,6 +384,9 @@ mmio_exit: MRS VBAR_EL1, R9 MOVD R0, 0x0(R9) + // Flush dcahce. + WORD $0xd5087e52 // DC CISW + RET // HaltAndResume halts execution and point the pointer to the resume function. @@ -414,6 +420,7 @@ TEXT ·Current(SB),NOSPLIT,$0-8 // Prepare the vcpu environment for container application. TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 // Step1, save sentry context into memory. + MRS TPIDR_EL1, RSV_REG REGISTERS_SAVE(RSV_REG, CPU_REGISTERS) MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG) @@ -425,34 +432,13 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 MOVD CPU_REGISTERS+PTRACE_R3(RSV_REG), R3 - // Step2, save SP_EL1, PSTATE into kernel temporary stack. - // switch to temporary stack. + // Step2, switch to temporary stack. LOAD_KERNEL_STACK(RSV_REG) - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - - SUB $STACK_FRAME_SIZE, RSP, RSP - MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R11 - MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R12 - STP (R11, R12), 16*0(RSP) - - MOVD CPU_REGISTERS+PTRACE_R11(RSV_REG), R11 - MOVD CPU_REGISTERS+PTRACE_R12(RSV_REG), R12 - // Step3, test user pagetable. - // If user pagetable is empty, trapped in el1_ia. - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - SWITCH_TO_APP_PAGETABLE(RSV_REG) - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - SWITCH_TO_KVM_PAGETABLE(RSV_REG) - WORD $0xd538d092 //MRS TPIDR_EL1, R18 - - // If pagetable is not empty, recovery kernel temporary stack. - ADD $STACK_FRAME_SIZE, RSP, RSP - - // Step4, load app context pointer. + // Step3, load app context pointer. MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP - // Step5, prepare the environment for container application. + // Step4, prepare the environment for container application. // set sp_el0. MOVD PTRACE_SP(RSV_REG_APP), R1 WORD $0xd5184101 //MSR R1, SP_EL0 @@ -480,13 +466,13 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 LDP 16*0(RSP), (RSV_REG, RSV_REG_APP) ADD $STACK_FRAME_SIZE, RSP, RSP + ISB $15 ERET() // kernelExitToEl1 is the entrypoint for sentry in guest_el1. // Prepare the vcpu environment for sentry. TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 WORD $0xd538d092 //MRS TPIDR_EL1, R18 - MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R1 WORD $0xd5184001 //MSR R1, SPSR_EL1 @@ -503,6 +489,8 @@ TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 // Start is the CPU entrypoint. TEXT ·Start(SB),NOSPLIT,$0 + // Flush dcache. + WORD $0xd5087e52 // DC CISW // Init. MOVD $SCTLR_EL1_DEFAULT, R1 MSR R1, SCTLR_EL1 @@ -558,6 +546,7 @@ TEXT ·El1_sync(SB),NOSPLIT,$0 B el1_invalid el1_da: +el1_ia: WORD $0xd538d092 //MRS TPIDR_EL1, R18 WORD $0xd538601a //MRS FAR_EL1, R26 @@ -570,9 +559,6 @@ el1_da: B ·HaltAndResume(SB) -el1_ia: - B ·HaltAndResume(SB) - el1_sp_pc: B ·Shutdown(SB) @@ -644,9 +630,10 @@ el0_svc: MOVD $Syscall, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·HaltAndResume(SB) + B ·kernelExitToEl1(SB) el0_da: +el0_ia: WORD $0xd538d092 //MRS TPIDR_EL1, R18 WORD $0xd538601a //MRS FAR_EL1, R26 @@ -658,10 +645,10 @@ el0_da: MOVD $PageFault, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·HaltAndResume(SB) + MRS ESR_EL1, R3 + MOVD R3, CPU_ERROR_CODE(RSV_REG) -el0_ia: - B ·Shutdown(SB) + B ·kernelExitToEl1(SB) el0_fpsimd_acc: B ·Shutdown(SB) @@ -676,7 +663,10 @@ el0_sp_pc: B ·Shutdown(SB) el0_undef: - B ·Shutdown(SB) + MOVD $El0Sync_undef, R3 + MOVD R3, CPU_VECTOR_CODE(RSV_REG) + + B ·kernelExitToEl1(SB) el0_dbg: B ·Shutdown(SB) diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go index 42009dac0..d0afa1aaa 100644 --- a/pkg/sentry/platform/ring0/kernel_arm64.go +++ b/pkg/sentry/platform/ring0/kernel_arm64.go @@ -53,7 +53,6 @@ func IsCanonical(addr uint64) bool { //go:nosplit func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { - // Sanitize registers. regs := switchOpts.Registers regs.Pstate &= ^uint64(PsrFlagsClear) @@ -69,6 +68,5 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { vector = c.vecCode - // Perform the switch. return } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go index 78510ebed..6409d1d91 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go @@ -72,13 +72,14 @@ const ( ) const ( - mtNormal = 0x4 << 2 + mtDevicenGnRE = 0x1 << 2 + mtNormal = 0x4 << 2 ) const ( executeDisable = xn optionMask = 0xfff | 0xfff<<48 - protDefault = accessed | shared | mtNormal + protDefault = accessed | shared ) // MapOpts are x86 options. @@ -184,8 +185,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { if opts.User { v |= user + v |= mtNormal } else { v = v &^ user + v |= mtDevicenGnRE // Strong order for the addresses with ring0.KernelStartAddress. } atomic.StoreUintptr((*uintptr)(p), v) } @@ -200,7 +203,7 @@ func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) { // This should never happen. panic("unaligned physical address!") } - v := addr | typeTable | protDefault + v := addr | typeTable | protDefault | mtNormal atomic.StoreUintptr((*uintptr)(p), v) } diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go index 4f98ee2d5..0bfd6c1f4 100644 --- a/pkg/sentry/socket/netfilter/tcp_matcher.go +++ b/pkg/sentry/socket/netfilter/tcp_matcher.go @@ -97,7 +97,7 @@ func (*TCPMatcher) Name() string { // Match implements Matcher.Match. func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) { - netHeader := header.IPv4(pkt.NetworkHeader) + netHeader := header.IPv4(pkt.NetworkHeader().View()) if netHeader.TransportProtocol() != header.TCPProtocolNumber { return false, false @@ -111,7 +111,7 @@ func (tm *TCPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceN return false, false } - tcpHeader := header.TCP(pkt.TransportHeader) + tcpHeader := header.TCP(pkt.TransportHeader().View()) if len(tcpHeader) < header.TCPMinimumSize { // There's no valid TCP header here, so we drop the packet immediately. return false, true diff --git a/pkg/sentry/socket/netfilter/udp_matcher.go b/pkg/sentry/socket/netfilter/udp_matcher.go index 3f20fc891..7ed05461d 100644 --- a/pkg/sentry/socket/netfilter/udp_matcher.go +++ b/pkg/sentry/socket/netfilter/udp_matcher.go @@ -94,7 +94,7 @@ func (*UDPMatcher) Name() string { // Match implements Matcher.Match. func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceName string) (bool, bool) { - netHeader := header.IPv4(pkt.NetworkHeader) + netHeader := header.IPv4(pkt.NetworkHeader().View()) // TODO(gvisor.dev/issue/170): Proto checks should ultimately be moved // into the stack.Check codepath as matchers are added. @@ -110,7 +110,7 @@ func (um *UDPMatcher) Match(hook stack.Hook, pkt *stack.PacketBuffer, interfaceN return false, false } - udpHeader := header.UDP(pkt.TransportHeader) + udpHeader := header.UDP(pkt.TransportHeader().View()) if len(udpHeader) < header.UDPMinimumSize { // There's no valid UDP header here, so we drop the packet immediately. return false, true diff --git a/pkg/sentry/syscalls/linux/vfs2/aio.go b/pkg/sentry/syscalls/linux/vfs2/aio.go index 399b4f60c..42559bf69 100644 --- a/pkg/sentry/syscalls/linux/vfs2/aio.go +++ b/pkg/sentry/syscalls/linux/vfs2/aio.go @@ -144,6 +144,12 @@ func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr user func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr usermem.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, aioCtx *mm.AIOContext) kernel.AIOCallback { return func(ctx context.Context) { + // Release references after completing the callback. + defer fd.DecRef(ctx) + if eventFD != nil { + defer eventFD.DecRef(ctx) + } + if aioCtx.Dead() { aioCtx.CancelPendingRequest() return @@ -169,8 +175,6 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use ev.Result = -int64(kernel.ExtractErrno(err, 0)) } - fd.DecRef(ctx) - // Queue the result for delivery. aioCtx.FinishRequest(ev) @@ -179,7 +183,6 @@ func getAIOCallback(t *kernel.Task, fd, eventFD *vfs.FileDescription, cbAddr use // wake up. if eventFD != nil { eventFD.Impl().(*eventfd.EventFileDescription).Signal(1) - eventFD.DecRef(ctx) } } } diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go index d3c1197e3..dcafffe57 100644 --- a/pkg/sentry/vfs/file_description.go +++ b/pkg/sentry/vfs/file_description.go @@ -289,7 +289,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO { return syserror.EINVAL } - // TODO(jamieliu): FileDescriptionImpl.SetOAsync()? + // TODO(gvisor.dev/issue/1035): FileDescriptionImpl.SetOAsync()? const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK fd.flagsMu.Lock() if fd.asyncHandler != nil { @@ -301,7 +301,7 @@ func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Crede fd.asyncHandler.Unregister(fd) } } - fd.statusFlags = (oldFlags &^ settableFlags) | (flags & settableFlags) + atomic.StoreUint32(&fd.statusFlags, (oldFlags&^settableFlags)|(flags&settableFlags)) fd.flagsMu.Unlock() return nil } |