summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/platform/kvm/address_space.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_allocator.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/context.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_arm64.go2
-rw-r--r--pkg/sentry/platform/kvm/machine.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go4
-rw-r--r--pkg/sentry/platform/kvm/physical_map.go2
-rw-r--r--pkg/sentry/platform/ring0/defs_impl_amd64.go598
-rw-r--r--pkg/sentry/platform/ring0/defs_impl_arm64.go428
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.go131
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.go60
-rw-r--r--pkg/sentry/platform/ring0/entry_impl_amd64.s441
-rw-r--r--pkg/sentry/platform/ring0/entry_impl_arm64.s858
-rw-r--r--pkg/sentry/platform/ring0/kernel.go90
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go323
-rw-r--r--pkg/sentry/platform/ring0/kernel_arm64.go85
-rw-r--r--pkg/sentry/platform/ring0/kernel_unsafe.go41
-rw-r--r--pkg/sentry/platform/ring0/lib_amd64.go119
-rw-r--r--pkg/sentry/platform/ring0/lib_amd64.s200
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.go71
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.s180
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator.go127
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go53
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables.go324
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go214
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_aarch64_state_autogen.go6
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go77
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64_state_autogen.go5
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go71
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_arm64_state_autogen.go5
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go3
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_unsafe_state_autogen.go3
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86.go183
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86_state_autogen.go6
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids.go104
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go32
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s45
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86.go20
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_amd64.go221
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_arm64.go231
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_empty_amd64.go265
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_empty_arm64.go275
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_generic.go110
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_lookup_amd64.go265
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_lookup_arm64.go275
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_map_amd64.go265
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_map_arm64.go275
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_unmap_amd64.go265
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_unmap_arm64.go275
-rw-r--r--pkg/sentry/platform/ring0/ring0.go16
-rw-r--r--pkg/sentry/platform/ring0/ring0_amd64_state_autogen.go7
-rw-r--r--pkg/sentry/platform/ring0/ring0_arm64_state_autogen.go7
-rw-r--r--pkg/sentry/platform/ring0/ring0_impl_amd64_state_autogen.go7
-rw-r--r--pkg/sentry/platform/ring0/ring0_impl_arm64_state_autogen.go7
-rw-r--r--pkg/sentry/platform/ring0/ring0_state_autogen.go3
-rw-r--r--pkg/sentry/platform/ring0/ring0_unsafe_state_autogen.go3
65 files changed, 21 insertions, 7696 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index af5c5e191..25c21e843 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -18,9 +18,9 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index 4b23f7803..2c970162e 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -19,9 +19,9 @@ import (
"reflect"
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/safecopy"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// bluepill enters guest mode.
diff --git a/pkg/sentry/platform/kvm/bluepill_allocator.go b/pkg/sentry/platform/kvm/bluepill_allocator.go
index 9485e1301..1825edc3a 100644
--- a/pkg/sentry/platform/kvm/bluepill_allocator.go
+++ b/pkg/sentry/platform/kvm/bluepill_allocator.go
@@ -17,7 +17,7 @@ package kvm
import (
"fmt"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
)
type allocator struct {
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index ddc1554d5..83a4766fb 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -19,8 +19,8 @@ package kvm
import (
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
var (
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
index f8ccb7430..0063e947b 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
@@ -20,8 +20,8 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// dieArchSetup initializes the state for dieTrampoline.
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index 1f09813ba..35298135a 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -19,8 +19,8 @@ package kvm
import (
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
var (
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
index 4d912769a..dbbf2a897 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
@@ -20,8 +20,8 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// fpsimdPtr returns a fpsimd64 for the given address.
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index 17268d127..aeae01dbd 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -18,10 +18,10 @@ import (
"sync/atomic"
pkgcontext "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index 5979aef97..7bdf57436 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -20,9 +20,9 @@ import (
"os"
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go
index 093497bc4..b9ed4a706 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64.go
@@ -18,7 +18,7 @@ package kvm
import (
"gvisor.dev/gvisor/pkg/cpuid"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/ring0"
)
// userRegs represents KVM user registers.
diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go
index 9db1db4e9..b73340f0e 100644
--- a/pkg/sentry/platform/kvm/kvm_arm64.go
+++ b/pkg/sentry/platform/kvm/kvm_arm64.go
@@ -17,8 +17,8 @@
package kvm
import (
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
type kvmOneReg struct {
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index e2fffc99b..1ece1b8d8 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -23,8 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 8e03c310d..59c752d73 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -24,10 +24,10 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index aa2d21748..7d7857067 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -17,10 +17,10 @@
package kvm
import (
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index a466acf4d..dca0cdb60 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -23,10 +23,10 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go
index f7fa2f98d..8bdec93ae 100644
--- a/pkg/sentry/platform/kvm/physical_map.go
+++ b/pkg/sentry/platform/kvm/physical_map.go
@@ -20,7 +20,7 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/ring0/defs_impl_amd64.go b/pkg/sentry/platform/ring0/defs_impl_amd64.go
deleted file mode 100644
index 3282254a7..000000000
--- a/pkg/sentry/platform/ring0/defs_impl_amd64.go
+++ /dev/null
@@ -1,598 +0,0 @@
-// +build amd64
-// +build amd64
-// +build 386 amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
-
- "fmt"
- "gvisor.dev/gvisor/pkg/cpuid"
- "gvisor.dev/gvisor/pkg/usermem"
- "io"
- "reflect"
-)
-
-// Kernel is a global kernel object.
-//
-// This contains global state, shared by multiple CPUs.
-type Kernel struct {
- // PageTables are the kernel pagetables; this must be provided.
- PageTables *pagetables.PageTables
-
- KernelArchState
-}
-
-// Hooks are hooks for kernel functions.
-type Hooks interface {
- // KernelSyscall is called for kernel system calls.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelSyscall()
-
- // KernelException handles an exception during kernel execution.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelException(Vector)
-}
-
-// CPU is the per-CPU struct.
-type CPU struct {
- // self is a self reference.
- //
- // This is always guaranteed to be at offset zero.
- self *CPU
-
- // kernel is reference to the kernel that this CPU was initialized
- // with. This reference is kept for garbage collection purposes: CPU
- // registers may refer to objects within the Kernel object that cannot
- // be safely freed.
- kernel *Kernel
-
- // CPUArchState is architecture-specific state.
- CPUArchState
-
- // registers is a set of registers; these may be used on kernel system
- // calls and exceptions via the Registers function.
- registers arch.Registers
-
- // hooks are kernel hooks.
- hooks Hooks
-}
-
-// Registers returns a modifiable-copy of the kernel registers.
-//
-// This is explicitly safe to call during KernelException and KernelSyscall.
-//
-//go:nosplit
-func (c *CPU) Registers() *arch.Registers {
- return &c.registers
-}
-
-// SwitchOpts are passed to the Switch function.
-type SwitchOpts struct {
- // Registers are the user register state.
- Registers *arch.Registers
-
- // FloatingPointState is a byte pointer where floating point state is
- // saved and restored.
- FloatingPointState *byte
-
- // PageTables are the application page tables.
- PageTables *pagetables.PageTables
-
- // Flush indicates that a TLB flush should be forced on switch.
- Flush bool
-
- // FullRestore indicates that an iret-based restore should be used.
- FullRestore bool
-
- // SwitchArchOpts are architecture-specific options.
- SwitchArchOpts
-}
-
-var (
- // UserspaceSize is the total size of userspace.
- UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1)
-
- // MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
-
- // KernelStartAddress is the starting kernel address.
- KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
-)
-
-// Segment indices and Selectors.
-const (
- // Index into GDT array.
- _ = iota // Null descriptor first.
- _ // Reserved (Linux is kernel 32).
- segKcode // Kernel code (64-bit).
- segKdata // Kernel data.
- segUcode32 // User code (32-bit).
- segUdata // User data.
- segUcode64 // User code (64-bit).
- segTss // Task segment descriptor.
- segTssHi // Upper bits for TSS.
- segLast // Last segment (terminal, not included).
-)
-
-// Selectors.
-const (
- Kcode Selector = segKcode << 3
- Kdata Selector = segKdata << 3
- Ucode32 Selector = (segUcode32 << 3) | 3
- Udata Selector = (segUdata << 3) | 3
- Ucode64 Selector = (segUcode64 << 3) | 3
- Tss Selector = segTss << 3
-)
-
-// Standard segments.
-var (
- UserCodeSegment32 SegmentDescriptor
- UserDataSegment SegmentDescriptor
- UserCodeSegment64 SegmentDescriptor
- KernelCodeSegment SegmentDescriptor
- KernelDataSegment SegmentDescriptor
-)
-
-// KernelArchState contains architecture-specific state.
-type KernelArchState struct {
- // cpuEntries is array of kernelEntry for all cpus.
- cpuEntries []kernelEntry
-
- // globalIDT is our set of interrupt gates.
- globalIDT *idt64
-}
-
-// kernelEntry contains minimal CPU-specific arch state
-// that can be mapped at the upper of the address space.
-// Malicious APP might steal info from it via CPU bugs.
-type kernelEntry struct {
- // stack is the stack used for interrupts on this CPU.
- stack [256]byte
-
- // scratch space for temporary usage.
- scratch0 uint64
-
- // stackTop is the top of the stack.
- stackTop uint64
-
- // cpuSelf is back reference to CPU.
- cpuSelf *CPU
-
- // kernelCR3 is the cr3 used for sentry kernel.
- kernelCR3 uintptr
-
- // gdt is the CPU's descriptor table.
- gdt descriptorTable
-
- // tss is the CPU's task state.
- tss TaskState64
-}
-
-// CPUArchState contains CPU-specific arch state.
-type CPUArchState struct {
- // errorCode is the error code from the last exception.
- errorCode uintptr
-
- // errorType indicates the type of error code here, it is always set
- // along with the errorCode value above.
- //
- // It will either by 1, which indicates a user error, or 0 indicating a
- // kernel error. If the error code below returns false (kernel error),
- // then it cannot provide relevant information about the last
- // exception.
- errorType uintptr
-
- *kernelEntry
-}
-
-// ErrorCode returns the last error code.
-//
-// The returned boolean indicates whether the error code corresponds to the
-// last user error or not. If it does not, then fault information must be
-// ignored. This is generally the result of a kernel fault while servicing a
-// user fault.
-//
-//go:nosplit
-func (c *CPU) ErrorCode() (value uintptr, user bool) {
- return c.errorCode, c.errorType != 0
-}
-
-// ClearErrorCode resets the error code.
-//
-//go:nosplit
-func (c *CPU) ClearErrorCode() {
- c.errorCode = 0
- c.errorType = 1
-}
-
-// SwitchArchOpts are embedded in SwitchOpts.
-type SwitchArchOpts struct {
- // UserPCID indicates that the application PCID to be used on switch,
- // assuming that PCIDs are supported.
- //
- // Per pagetables_x86.go, a zero PCID implies a flush.
- UserPCID uint16
-
- // KernelPCID indicates that the kernel PCID to be used on return,
- // assuming that PCIDs are supported.
- //
- // Per pagetables_x86.go, a zero PCID implies a flush.
- KernelPCID uint16
-}
-
-func init() {
- KernelCodeSegment.setCode64(0, 0, 0)
- KernelDataSegment.setData(0, 0xffffffff, 0)
- UserCodeSegment32.setCode64(0, 0, 3)
- UserDataSegment.setData(0, 0xffffffff, 3)
- UserCodeSegment64.setCode64(0, 0, 3)
-}
-
-// Emit prints architecture-specific offsets.
-func Emit(w io.Writer) {
- fmt.Fprintf(w, "// Automatically generated, do not edit.\n")
-
- c := &CPU{}
- fmt.Fprintf(w, "\n// CPU offsets.\n")
- fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ENTRY 0x%02x\n", reflect.ValueOf(&c.kernelEntry).Pointer()-reflect.ValueOf(c).Pointer())
-
- e := &kernelEntry{}
- fmt.Fprintf(w, "\n// CPU entry offsets.\n")
- fmt.Fprintf(w, "#define ENTRY_SCRATCH0 0x%02x\n", reflect.ValueOf(&e.scratch0).Pointer()-reflect.ValueOf(e).Pointer())
- fmt.Fprintf(w, "#define ENTRY_STACK_TOP 0x%02x\n", reflect.ValueOf(&e.stackTop).Pointer()-reflect.ValueOf(e).Pointer())
- fmt.Fprintf(w, "#define ENTRY_CPU_SELF 0x%02x\n", reflect.ValueOf(&e.cpuSelf).Pointer()-reflect.ValueOf(e).Pointer())
- fmt.Fprintf(w, "#define ENTRY_KERNEL_CR3 0x%02x\n", reflect.ValueOf(&e.kernelCR3).Pointer()-reflect.ValueOf(e).Pointer())
-
- fmt.Fprintf(w, "\n// Bits.\n")
- fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF)
- fmt.Fprintf(w, "#define _RFLAGS_IOPL0 0x%02x\n", _RFLAGS_IOPL0)
- fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet)
-
- fmt.Fprintf(w, "\n// Vectors.\n")
- fmt.Fprintf(w, "#define DivideByZero 0x%02x\n", DivideByZero)
- fmt.Fprintf(w, "#define Debug 0x%02x\n", Debug)
- fmt.Fprintf(w, "#define NMI 0x%02x\n", NMI)
- fmt.Fprintf(w, "#define Breakpoint 0x%02x\n", Breakpoint)
- fmt.Fprintf(w, "#define Overflow 0x%02x\n", Overflow)
- fmt.Fprintf(w, "#define BoundRangeExceeded 0x%02x\n", BoundRangeExceeded)
- fmt.Fprintf(w, "#define InvalidOpcode 0x%02x\n", InvalidOpcode)
- fmt.Fprintf(w, "#define DeviceNotAvailable 0x%02x\n", DeviceNotAvailable)
- fmt.Fprintf(w, "#define DoubleFault 0x%02x\n", DoubleFault)
- fmt.Fprintf(w, "#define CoprocessorSegmentOverrun 0x%02x\n", CoprocessorSegmentOverrun)
- fmt.Fprintf(w, "#define InvalidTSS 0x%02x\n", InvalidTSS)
- fmt.Fprintf(w, "#define SegmentNotPresent 0x%02x\n", SegmentNotPresent)
- fmt.Fprintf(w, "#define StackSegmentFault 0x%02x\n", StackSegmentFault)
- fmt.Fprintf(w, "#define GeneralProtectionFault 0x%02x\n", GeneralProtectionFault)
- fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault)
- fmt.Fprintf(w, "#define X87FloatingPointException 0x%02x\n", X87FloatingPointException)
- fmt.Fprintf(w, "#define AlignmentCheck 0x%02x\n", AlignmentCheck)
- fmt.Fprintf(w, "#define MachineCheck 0x%02x\n", MachineCheck)
- fmt.Fprintf(w, "#define SIMDFloatingPointException 0x%02x\n", SIMDFloatingPointException)
- fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
- fmt.Fprintf(w, "#define SecurityException 0x%02x\n", SecurityException)
- fmt.Fprintf(w, "#define SyscallInt80 0x%02x\n", SyscallInt80)
- fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
-
- p := &arch.Registers{}
- fmt.Fprintf(w, "\n// Ptrace registers.\n")
- fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.R13).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.R12).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RBP 0x%02x\n", reflect.ValueOf(&p.Rbp).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RBX 0x%02x\n", reflect.ValueOf(&p.Rbx).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.R11).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.R10).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.R9).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.R8).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RAX 0x%02x\n", reflect.ValueOf(&p.Rax).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RCX 0x%02x\n", reflect.ValueOf(&p.Rcx).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RDX 0x%02x\n", reflect.ValueOf(&p.Rdx).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RSI 0x%02x\n", reflect.ValueOf(&p.Rsi).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RDI 0x%02x\n", reflect.ValueOf(&p.Rdi).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_ORIGRAX 0x%02x\n", reflect.ValueOf(&p.Orig_rax).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RIP 0x%02x\n", reflect.ValueOf(&p.Rip).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_CS 0x%02x\n", reflect.ValueOf(&p.Cs).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_FLAGS 0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RSP 0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_SS 0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_FS 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_GS 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer())
-}
-
-// Useful bits.
-const (
- _CR0_PE = 1 << 0
- _CR0_ET = 1 << 4
- _CR0_AM = 1 << 18
- _CR0_PG = 1 << 31
-
- _CR4_PSE = 1 << 4
- _CR4_PAE = 1 << 5
- _CR4_PGE = 1 << 7
- _CR4_OSFXSR = 1 << 9
- _CR4_OSXMMEXCPT = 1 << 10
- _CR4_FSGSBASE = 1 << 16
- _CR4_PCIDE = 1 << 17
- _CR4_OSXSAVE = 1 << 18
- _CR4_SMEP = 1 << 20
-
- _RFLAGS_AC = 1 << 18
- _RFLAGS_NT = 1 << 14
- _RFLAGS_IOPL0 = 1 << 12
- _RFLAGS_IOPL1 = 1 << 13
- _RFLAGS_IOPL = _RFLAGS_IOPL0 | _RFLAGS_IOPL1
- _RFLAGS_DF = 1 << 10
- _RFLAGS_IF = 1 << 9
- _RFLAGS_STEP = 1 << 8
- _RFLAGS_RESERVED = 1 << 1
-
- _EFER_SCE = 0x001
- _EFER_LME = 0x100
- _EFER_LMA = 0x400
- _EFER_NX = 0x800
-
- _MSR_STAR = 0xc0000081
- _MSR_LSTAR = 0xc0000082
- _MSR_CSTAR = 0xc0000083
- _MSR_SYSCALL_MASK = 0xc0000084
- _MSR_PLATFORM_INFO = 0xce
- _MSR_MISC_FEATURES = 0x140
-
- _PLATFORM_INFO_CPUID_FAULT = 1 << 31
-
- _MISC_FEATURE_CPUID_TRAP = 0x1
-)
-
-const (
- // KernelFlagsSet should always be set in the kernel.
- KernelFlagsSet = _RFLAGS_RESERVED
-
- // UserFlagsSet are always set in userspace.
- //
- // _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege
- // level. The Current Privilege Level (CPL) of the task must be less
- // than or equal to the IOPL in order for the task or program to access
- // I/O ports.
- //
- // Here, _RFLAGS_IOPL0 is used only to determine whether the task is
- // running in the kernel or userspace mode. In the user mode, the CPL is
- // always 3 and it doesn't matter what IOPL is set if it is bellow CPL.
- //
- // We need to have one bit which will be always different in user and
- // kernel modes. And we have to remember that even though we have
- // KernelFlagsClear, we still can see some of these flags in the kernel
- // mode. This can happen when the goruntime switches on a goroutine
- // which has been saved in the host mode. On restore, the popf
- // instruction is used to restore flags and this means that all flags
- // what the goroutine has in the host mode will be restored in the
- // kernel mode.
- //
- // _RFLAGS_IOPL0 is never set in host and kernel modes and we always set
- // it in the user mode. So if this flag is set, the task is running in
- // the user mode and if it isn't set, the task is running in the kernel
- // mode.
- UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0
-
- // KernelFlagsClear should always be clear in the kernel.
- KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT
-
- // UserFlagsClear are always cleared in userspace.
- UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1
-)
-
-// IsKernelFlags returns true if rflags coresponds to the kernel mode.
-//
-// go:nosplit
-func IsKernelFlags(rflags uint64) bool {
- return rflags&_RFLAGS_IOPL0 == 0
-}
-
-// Vector is an exception vector.
-type Vector uintptr
-
-// Exception vectors.
-const (
- DivideByZero Vector = iota
- Debug
- NMI
- Breakpoint
- Overflow
- BoundRangeExceeded
- InvalidOpcode
- DeviceNotAvailable
- DoubleFault
- CoprocessorSegmentOverrun
- InvalidTSS
- SegmentNotPresent
- StackSegmentFault
- GeneralProtectionFault
- PageFault
- _
- X87FloatingPointException
- AlignmentCheck
- MachineCheck
- SIMDFloatingPointException
- VirtualizationException
- SecurityException = 0x1e
- SyscallInt80 = 0x80
- _NR_INTERRUPTS = 0x100
-)
-
-// System call vectors.
-const (
- Syscall Vector = _NR_INTERRUPTS
-)
-
-// VirtualAddressBits returns the number bits available for virtual addresses.
-//
-// Note that sign-extension semantics apply to the highest order bit.
-//
-// FIXME(b/69382326): This should use the cpuid passed to Init.
-func VirtualAddressBits() uint32 {
- ax, _, _, _ := cpuid.HostID(0x80000008, 0)
- return (ax >> 8) & 0xff
-}
-
-// PhysicalAddressBits returns the number of bits available for physical addresses.
-//
-// FIXME(b/69382326): This should use the cpuid passed to Init.
-func PhysicalAddressBits() uint32 {
- ax, _, _, _ := cpuid.HostID(0x80000008, 0)
- return ax & 0xff
-}
-
-// Selector is a segment Selector.
-type Selector uint16
-
-// SegmentDescriptor is a segment descriptor.
-type SegmentDescriptor struct {
- bits [2]uint32
-}
-
-// descriptorTable is a collection of descriptors.
-type descriptorTable [32]SegmentDescriptor
-
-// SegmentDescriptorFlags are typed flags within a descriptor.
-type SegmentDescriptorFlags uint32
-
-// SegmentDescriptorFlag declarations.
-const (
- SegmentDescriptorAccess SegmentDescriptorFlags = 1 << 8 // Access bit (always set).
- SegmentDescriptorWrite = 1 << 9 // Write permission.
- SegmentDescriptorExpandDown = 1 << 10 // Grows down, not used.
- SegmentDescriptorExecute = 1 << 11 // Execute permission.
- SegmentDescriptorSystem = 1 << 12 // Zero => system, 1 => user code/data.
- SegmentDescriptorPresent = 1 << 15 // Present.
- SegmentDescriptorAVL = 1 << 20 // Available.
- SegmentDescriptorLong = 1 << 21 // Long mode.
- SegmentDescriptorDB = 1 << 22 // 16 or 32-bit.
- SegmentDescriptorG = 1 << 23 // Granularity: page or byte.
-)
-
-// Base returns the descriptor's base linear address.
-func (d *SegmentDescriptor) Base() uint32 {
- return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16
-}
-
-// Limit returns the descriptor size.
-func (d *SegmentDescriptor) Limit() uint32 {
- l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000
- if d.bits[1]&uint32(SegmentDescriptorG) != 0 {
- l <<= 12
- l |= 0xFFF
- }
- return l
-}
-
-// Flags returns descriptor flags.
-func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags {
- return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00)
-}
-
-// DPL returns the descriptor privilege level.
-func (d *SegmentDescriptor) DPL() int {
- return int((d.bits[1] >> 13) & 3)
-}
-
-func (d *SegmentDescriptor) setNull() {
- d.bits[0] = 0
- d.bits[1] = 0
-}
-
-func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) {
- flags |= SegmentDescriptorPresent
- if limit>>12 != 0 {
- limit >>= 12
- flags |= SegmentDescriptorG
- }
- d.bits[0] = base<<16 | limit&0xFFFF
- d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13
-}
-
-func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) {
- d.set(base, limit, dpl,
- SegmentDescriptorDB|
- SegmentDescriptorExecute|
- SegmentDescriptorSystem)
-}
-
-func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) {
- d.set(base, limit, dpl,
- SegmentDescriptorG|
- SegmentDescriptorLong|
- SegmentDescriptorExecute|
- SegmentDescriptorSystem)
-}
-
-func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) {
- d.set(base, limit, dpl,
- SegmentDescriptorWrite|
- SegmentDescriptorSystem)
-}
-
-// setHi is only used for the TSS segment, which is magically 64-bits.
-func (d *SegmentDescriptor) setHi(base uint32) {
- d.bits[0] = base
- d.bits[1] = 0
-}
-
-// Gate64 is a 64-bit task, trap, or interrupt gate.
-type Gate64 struct {
- bits [4]uint32
-}
-
-// idt64 is a 64-bit interrupt descriptor table.
-type idt64 [_NR_INTERRUPTS]Gate64
-
-func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) {
- g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF
- g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7
- g.bits[2] = uint32(rip >> 32)
-}
-
-func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) {
- g.setInterrupt(cs, rip, dpl, ist)
- g.bits[1] |= 1 << 8
-}
-
-// TaskState64 is a 64-bit task state structure.
-type TaskState64 struct {
- _ uint32
- rsp0Lo, rsp0Hi uint32
- rsp1Lo, rsp1Hi uint32
- rsp2Lo, rsp2Hi uint32
- _ [2]uint32
- ist1Lo, ist1Hi uint32
- ist2Lo, ist2Hi uint32
- ist3Lo, ist3Hi uint32
- ist4Lo, ist4Hi uint32
- ist5Lo, ist5Hi uint32
- ist6Lo, ist6Hi uint32
- ist7Lo, ist7Hi uint32
- _ [2]uint32
- _ uint16
- ioPerm uint16
-}
diff --git a/pkg/sentry/platform/ring0/defs_impl_arm64.go b/pkg/sentry/platform/ring0/defs_impl_arm64.go
deleted file mode 100644
index 92c64fe1f..000000000
--- a/pkg/sentry/platform/ring0/defs_impl_arm64.go
+++ /dev/null
@@ -1,428 +0,0 @@
-// +build arm64
-// +build arm64
-// +build arm64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
-
- "fmt"
- "gvisor.dev/gvisor/pkg/usermem"
- "io"
- "reflect"
-)
-
-// Useful bits.
-const (
- _PGD_PGT_BASE = 0x1000
- _PGD_PGT_SIZE = 0x1000
- _PUD_PGT_BASE = 0x2000
- _PUD_PGT_SIZE = 0x1000
- _PMD_PGT_BASE = 0x3000
- _PMD_PGT_SIZE = 0x4000
- _PTE_PGT_BASE = 0x7000
- _PTE_PGT_SIZE = 0x1000
-)
-
-const (
- // DAIF bits:debug, sError, IRQ, FIQ.
- _PSR_D_BIT = 0x00000200
- _PSR_A_BIT = 0x00000100
- _PSR_I_BIT = 0x00000080
- _PSR_F_BIT = 0x00000040
- _PSR_DAIF_SHIFT = 6
- _PSR_DAIF_MASK = 0xf << _PSR_DAIF_SHIFT
-
- // PSR bits.
- _PSR_MODE_EL0t = 0x00000000
- _PSR_MODE_EL1t = 0x00000004
- _PSR_MODE_EL1h = 0x00000005
- _PSR_MODE_MASK = 0x0000000f
-
- PsrFlagsClear = _PSR_MODE_MASK | _PSR_DAIF_MASK
- PsrModeMask = _PSR_MODE_MASK
-
- // KernelFlagsSet should always be set in the kernel.
- KernelFlagsSet = _PSR_MODE_EL1h | _PSR_D_BIT | _PSR_A_BIT | _PSR_I_BIT | _PSR_F_BIT
-
- // UserFlagsSet are always set in userspace.
- UserFlagsSet = _PSR_MODE_EL0t
-)
-
-// Vector is an exception vector.
-type Vector uintptr
-
-// Exception vectors.
-const (
- El1InvSync = iota
- El1InvIrq
- El1InvFiq
- El1InvError
-
- El1Sync
- El1Irq
- El1Fiq
- El1Err
-
- El0Sync
- El0Irq
- El0Fiq
- El0Err
-
- El0InvSync
- El0InvIrq
- El0InvFiq
- El0InvErr
-
- El1SyncDa
- El1SyncIa
- El1SyncSpPc
- El1SyncUndef
- El1SyncDbg
- El1SyncInv
-
- El0SyncSVC
- El0SyncDa
- El0SyncIa
- El0SyncFpsimdAcc
- El0SyncSveAcc
- El0SyncFpsimdExc
- El0SyncSys
- El0SyncSpPc
- El0SyncUndef
- El0SyncDbg
- El0SyncWfx
- El0SyncInv
-
- El0ErrNMI
- El0ErrBounce
-
- _NR_INTERRUPTS
-)
-
-// System call vectors.
-const (
- Syscall Vector = El0SyncSVC
- PageFault Vector = El0SyncDa
- VirtualizationException Vector = El0ErrBounce
-)
-
-// VirtualAddressBits returns the number bits available for virtual addresses.
-func VirtualAddressBits() uint32 {
- return 48
-}
-
-// PhysicalAddressBits returns the number of bits available for physical addresses.
-func PhysicalAddressBits() uint32 {
- return 40
-}
-
-// Kernel is a global kernel object.
-//
-// This contains global state, shared by multiple CPUs.
-type Kernel struct {
- // PageTables are the kernel pagetables; this must be provided.
- PageTables *pagetables.PageTables
-
- KernelArchState
-}
-
-// Hooks are hooks for kernel functions.
-type Hooks interface {
- // KernelSyscall is called for kernel system calls.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelSyscall()
-
- // KernelException handles an exception during kernel execution.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelException(Vector)
-}
-
-// CPU is the per-CPU struct.
-type CPU struct {
- // self is a self reference.
- //
- // This is always guaranteed to be at offset zero.
- self *CPU
-
- // kernel is reference to the kernel that this CPU was initialized
- // with. This reference is kept for garbage collection purposes: CPU
- // registers may refer to objects within the Kernel object that cannot
- // be safely freed.
- kernel *Kernel
-
- // CPUArchState is architecture-specific state.
- CPUArchState
-
- // registers is a set of registers; these may be used on kernel system
- // calls and exceptions via the Registers function.
- registers arch.Registers
-
- // hooks are kernel hooks.
- hooks Hooks
-}
-
-// Registers returns a modifiable-copy of the kernel registers.
-//
-// This is explicitly safe to call during KernelException and KernelSyscall.
-//
-//go:nosplit
-func (c *CPU) Registers() *arch.Registers {
- return &c.registers
-}
-
-// SwitchOpts are passed to the Switch function.
-type SwitchOpts struct {
- // Registers are the user register state.
- Registers *arch.Registers
-
- // FloatingPointState is a byte pointer where floating point state is
- // saved and restored.
- FloatingPointState *byte
-
- // PageTables are the application page tables.
- PageTables *pagetables.PageTables
-
- // Flush indicates that a TLB flush should be forced on switch.
- Flush bool
-
- // FullRestore indicates that an iret-based restore should be used.
- FullRestore bool
-
- // SwitchArchOpts are architecture-specific options.
- SwitchArchOpts
-}
-
-var (
- // UserspaceSize is the total size of userspace.
- UserspaceSize = uintptr(1) << (VirtualAddressBits())
-
- // MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
-
- // KernelStartAddress is the starting kernel address.
- KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
-)
-
-// KernelArchState contains architecture-specific state.
-type KernelArchState struct {
-}
-
-// CPUArchState contains CPU-specific arch state.
-type CPUArchState struct {
- // stack is the stack used for interrupts on this CPU.
- stack [512]byte
-
- // errorCode is the error code from the last exception.
- errorCode uintptr
-
- // errorType indicates the type of error code here, it is always set
- // along with the errorCode value above.
- //
- // It will either by 1, which indicates a user error, or 0 indicating a
- // kernel error. If the error code below returns false (kernel error),
- // then it cannot provide relevant information about the last
- // exception.
- errorType uintptr
-
- // faultAddr is the value of far_el1.
- faultAddr uintptr
-
- // ttbr0Kvm is the value of ttbr0_el1 for sentry.
- ttbr0Kvm uintptr
-
- // ttbr0App is the value of ttbr0_el1 for applicaton.
- ttbr0App uintptr
-
- // exception vector.
- vecCode Vector
-
- // application context pointer.
- appAddr uintptr
-
- // lazyVFP is the value of cpacr_el1.
- lazyVFP uintptr
-
- // appASID is the asid value of guest application.
- appASID uintptr
-}
-
-// ErrorCode returns the last error code.
-//
-// The returned boolean indicates whether the error code corresponds to the
-// last user error or not. If it does not, then fault information must be
-// ignored. This is generally the result of a kernel fault while servicing a
-// user fault.
-//
-//go:nosplit
-func (c *CPU) ErrorCode() (value uintptr, user bool) {
- return c.errorCode, c.errorType != 0
-}
-
-// ClearErrorCode resets the error code.
-//
-//go:nosplit
-func (c *CPU) ClearErrorCode() {
- c.errorCode = 0
- c.errorType = 1
-}
-
-//go:nosplit
-func (c *CPU) GetFaultAddr() (value uintptr) {
- return c.faultAddr
-}
-
-//go:nosplit
-func (c *CPU) SetTtbr0Kvm(value uintptr) {
- c.ttbr0Kvm = value
-}
-
-//go:nosplit
-func (c *CPU) SetTtbr0App(value uintptr) {
- c.ttbr0App = value
-}
-
-//go:nosplit
-func (c *CPU) GetVector() (value Vector) {
- return c.vecCode
-}
-
-//go:nosplit
-func (c *CPU) SetAppAddr(value uintptr) {
- c.appAddr = value
-}
-
-// GetLazyVFP returns the value of cpacr_el1.
-//go:nosplit
-func (c *CPU) GetLazyVFP() (value uintptr) {
- return c.lazyVFP
-}
-
-// SwitchArchOpts are embedded in SwitchOpts.
-type SwitchArchOpts struct {
- // UserASID indicates that the application ASID to be used on switch,
- UserASID uint16
-
- // KernelASID indicates that the kernel ASID to be used on return,
- KernelASID uint16
-}
-
-func init() {
-}
-
-// Emit prints architecture-specific offsets.
-func Emit(w io.Writer) {
- fmt.Fprintf(w, "// Automatically generated, do not edit.\n")
-
- c := &CPU{}
- fmt.Fprintf(w, "\n// CPU offsets.\n")
- fmt.Fprintf(w, "#define CPU_SELF 0x%02x\n", reflect.ValueOf(&c.self).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_STACK_TOP 0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack)))
- fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_FAULT_ADDR 0x%02x\n", reflect.ValueOf(&c.faultAddr).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_TTBR0_KVM 0x%02x\n", reflect.ValueOf(&c.ttbr0Kvm).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_TTBR0_APP 0x%02x\n", reflect.ValueOf(&c.ttbr0App).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_VECTOR_CODE 0x%02x\n", reflect.ValueOf(&c.vecCode).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_APP_ADDR 0x%02x\n", reflect.ValueOf(&c.appAddr).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_LAZY_VFP 0x%02x\n", reflect.ValueOf(&c.lazyVFP).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_APP_ASID 0x%02x\n", reflect.ValueOf(&c.appASID).Pointer()-reflect.ValueOf(c).Pointer())
-
- fmt.Fprintf(w, "\n// Bits.\n")
- fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet)
-
- fmt.Fprintf(w, "\n// Vectors.\n")
-
- fmt.Fprintf(w, "#define El1Sync 0x%02x\n", El1Sync)
- fmt.Fprintf(w, "#define El1Irq 0x%02x\n", El1Irq)
- fmt.Fprintf(w, "#define El1Fiq 0x%02x\n", El1Fiq)
- fmt.Fprintf(w, "#define El1Err 0x%02x\n", El1Err)
-
- fmt.Fprintf(w, "#define El0Sync 0x%02x\n", El0Sync)
- fmt.Fprintf(w, "#define El0Irq 0x%02x\n", El0Irq)
- fmt.Fprintf(w, "#define El0Fiq 0x%02x\n", El0Fiq)
- fmt.Fprintf(w, "#define El0Err 0x%02x\n", El0Err)
-
- fmt.Fprintf(w, "#define El1SyncDa 0x%02x\n", El1SyncDa)
- fmt.Fprintf(w, "#define El1SyncIa 0x%02x\n", El1SyncIa)
- fmt.Fprintf(w, "#define El1SyncSpPc 0x%02x\n", El1SyncSpPc)
- fmt.Fprintf(w, "#define El1SyncUndef 0x%02x\n", El1SyncUndef)
- fmt.Fprintf(w, "#define El1SyncDbg 0x%02x\n", El1SyncDbg)
- fmt.Fprintf(w, "#define El1SyncInv 0x%02x\n", El1SyncInv)
-
- fmt.Fprintf(w, "#define El0SyncSVC 0x%02x\n", El0SyncSVC)
- fmt.Fprintf(w, "#define El0SyncDa 0x%02x\n", El0SyncDa)
- fmt.Fprintf(w, "#define El0SyncIa 0x%02x\n", El0SyncIa)
- fmt.Fprintf(w, "#define El0SyncFpsimdAcc 0x%02x\n", El0SyncFpsimdAcc)
- fmt.Fprintf(w, "#define El0SyncSveAcc 0x%02x\n", El0SyncSveAcc)
- fmt.Fprintf(w, "#define El0SyncFpsimdExc 0x%02x\n", El0SyncFpsimdExc)
- fmt.Fprintf(w, "#define El0SyncSys 0x%02x\n", El0SyncSys)
- fmt.Fprintf(w, "#define El0SyncSpPc 0x%02x\n", El0SyncSpPc)
- fmt.Fprintf(w, "#define El0SyncUndef 0x%02x\n", El0SyncUndef)
- fmt.Fprintf(w, "#define El0SyncDbg 0x%02x\n", El0SyncDbg)
- fmt.Fprintf(w, "#define El0SyncWfx 0x%02x\n", El0SyncWfx)
- fmt.Fprintf(w, "#define El0SyncInv 0x%02x\n", El0SyncInv)
-
- fmt.Fprintf(w, "#define El0ErrNMI 0x%02x\n", El0ErrNMI)
-
- fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault)
- fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
- fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
-
- p := &arch.Registers{}
- fmt.Fprintf(w, "\n// Ptrace registers.\n")
- fmt.Fprintf(w, "#define PTRACE_R0 0x%02x\n", reflect.ValueOf(&p.Regs[0]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R1 0x%02x\n", reflect.ValueOf(&p.Regs[1]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R2 0x%02x\n", reflect.ValueOf(&p.Regs[2]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R3 0x%02x\n", reflect.ValueOf(&p.Regs[3]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R4 0x%02x\n", reflect.ValueOf(&p.Regs[4]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R5 0x%02x\n", reflect.ValueOf(&p.Regs[5]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R6 0x%02x\n", reflect.ValueOf(&p.Regs[6]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R7 0x%02x\n", reflect.ValueOf(&p.Regs[7]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.Regs[8]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.Regs[9]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.Regs[10]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.Regs[11]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.Regs[12]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.Regs[13]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.Regs[14]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.Regs[15]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R16 0x%02x\n", reflect.ValueOf(&p.Regs[16]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R17 0x%02x\n", reflect.ValueOf(&p.Regs[17]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R18 0x%02x\n", reflect.ValueOf(&p.Regs[18]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R19 0x%02x\n", reflect.ValueOf(&p.Regs[19]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R20 0x%02x\n", reflect.ValueOf(&p.Regs[20]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R21 0x%02x\n", reflect.ValueOf(&p.Regs[21]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R22 0x%02x\n", reflect.ValueOf(&p.Regs[22]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R23 0x%02x\n", reflect.ValueOf(&p.Regs[23]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R24 0x%02x\n", reflect.ValueOf(&p.Regs[24]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R25 0x%02x\n", reflect.ValueOf(&p.Regs[25]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R26 0x%02x\n", reflect.ValueOf(&p.Regs[26]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R27 0x%02x\n", reflect.ValueOf(&p.Regs[27]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R28 0x%02x\n", reflect.ValueOf(&p.Regs[28]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R29 0x%02x\n", reflect.ValueOf(&p.Regs[29]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R30 0x%02x\n", reflect.ValueOf(&p.Regs[30]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_SP 0x%02x\n", reflect.ValueOf(&p.Sp).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_PC 0x%02x\n", reflect.ValueOf(&p.Pc).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_PSTATE 0x%02x\n", reflect.ValueOf(&p.Pstate).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_TLS 0x%02x\n", reflect.ValueOf(&p.TPIDR_EL0).Pointer()-reflect.ValueOf(p).Pointer())
-}
diff --git a/pkg/sentry/platform/ring0/entry_amd64.go b/pkg/sentry/platform/ring0/entry_amd64.go
deleted file mode 100644
index d87b1fd00..000000000
--- a/pkg/sentry/platform/ring0/entry_amd64.go
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/arch"
-)
-
-// This is an assembly function.
-//
-// The sysenter function is invoked in two situations:
-//
-// (1) The guest kernel has executed a system call.
-// (2) The guest application has executed a system call.
-//
-// The interrupt flag is examined to determine whether the system call was
-// executed from kernel mode or not and the appropriate stub is called.
-func sysenter()
-
-// swapgs swaps the current GS value.
-//
-// This must be called prior to sysret/iret.
-func swapgs()
-
-// jumpToKernel jumps to the kernel version of the current RIP.
-func jumpToKernel()
-
-// sysret returns to userspace from a system call.
-//
-// The return code is the vector that interrupted execution.
-//
-// See stubs.go for a note regarding the frame size of this function.
-func sysret(cpu *CPU, regs *arch.Registers, userCR3 uintptr) Vector
-
-// "iret is the cadillac of CPL switching."
-//
-// -- Neel Natu
-//
-// iret is nearly identical to sysret, except an iret is used to fully restore
-// all user state. This must be called in cases where all registers need to be
-// restored.
-func iret(cpu *CPU, regs *arch.Registers, userCR3 uintptr) Vector
-
-// exception is the generic exception entry.
-//
-// This is called by the individual stub definitions.
-func exception()
-
-// resume is a stub that restores the CPU kernel registers.
-//
-// This is used when processing kernel exceptions and syscalls.
-func resume()
-
-// Start is the CPU entrypoint.
-//
-// The following start conditions must be satisfied:
-//
-// * AX should contain the CPU pointer.
-// * c.GDT() should be loaded as the GDT.
-// * c.IDT() should be loaded as the IDT.
-// * c.CR0() should be the current CR0 value.
-// * c.CR3() should be set to the kernel PageTables.
-// * c.CR4() should be the current CR4 value.
-// * c.EFER() should be the current EFER value.
-//
-// The CPU state will be set to c.Registers().
-func Start()
-
-// Exception stubs.
-func divideByZero()
-func debug()
-func nmi()
-func breakpoint()
-func overflow()
-func boundRangeExceeded()
-func invalidOpcode()
-func deviceNotAvailable()
-func doubleFault()
-func coprocessorSegmentOverrun()
-func invalidTSS()
-func segmentNotPresent()
-func stackSegmentFault()
-func generalProtectionFault()
-func pageFault()
-func x87FloatingPointException()
-func alignmentCheck()
-func machineCheck()
-func simdFloatingPointException()
-func virtualizationException()
-func securityException()
-func syscallInt80()
-
-// Exception handler index.
-var handlers = map[Vector]func(){
- DivideByZero: divideByZero,
- Debug: debug,
- NMI: nmi,
- Breakpoint: breakpoint,
- Overflow: overflow,
- BoundRangeExceeded: boundRangeExceeded,
- InvalidOpcode: invalidOpcode,
- DeviceNotAvailable: deviceNotAvailable,
- DoubleFault: doubleFault,
- CoprocessorSegmentOverrun: coprocessorSegmentOverrun,
- InvalidTSS: invalidTSS,
- SegmentNotPresent: segmentNotPresent,
- StackSegmentFault: stackSegmentFault,
- GeneralProtectionFault: generalProtectionFault,
- PageFault: pageFault,
- X87FloatingPointException: x87FloatingPointException,
- AlignmentCheck: alignmentCheck,
- MachineCheck: machineCheck,
- SIMDFloatingPointException: simdFloatingPointException,
- VirtualizationException: virtualizationException,
- SecurityException: securityException,
- SyscallInt80: syscallInt80,
-}
diff --git a/pkg/sentry/platform/ring0/entry_arm64.go b/pkg/sentry/platform/ring0/entry_arm64.go
deleted file mode 100644
index 62a93f3d6..000000000
--- a/pkg/sentry/platform/ring0/entry_arm64.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// This is an assembly function.
-//
-// The sysenter function is invoked in two situations:
-//
-// (1) The guest kernel has executed a system call.
-// (2) The guest application has executed a system call.
-//
-// The interrupt flag is examined to determine whether the system call was
-// executed from kernel mode or not and the appropriate stub is called.
-
-func El1_sync_invalid()
-func El1_irq_invalid()
-func El1_fiq_invalid()
-func El1_error_invalid()
-
-func El1_sync()
-func El1_irq()
-func El1_fiq()
-func El1_error()
-
-func El0_sync()
-func El0_irq()
-func El0_fiq()
-func El0_error()
-
-func El0_sync_invalid()
-func El0_irq_invalid()
-func El0_fiq_invalid()
-func El0_error_invalid()
-
-func Vectors()
-
-// Start is the CPU entrypoint.
-//
-// The CPU state will be set to c.Registers().
-func Start()
-func kernelExitToEl1()
-
-func kernelExitToEl0()
-
-// Shutdown execution
-func Shutdown()
diff --git a/pkg/sentry/platform/ring0/entry_impl_amd64.s b/pkg/sentry/platform/ring0/entry_impl_amd64.s
deleted file mode 100644
index 9a5d6c064..000000000
--- a/pkg/sentry/platform/ring0/entry_impl_amd64.s
+++ /dev/null
@@ -1,441 +0,0 @@
-// build +amd64
-
-// Automatically generated, do not edit.
-
-// CPU offsets.
-#define CPU_REGISTERS 0x28
-#define CPU_ERROR_CODE 0x10
-#define CPU_ERROR_TYPE 0x18
-#define CPU_ENTRY 0x20
-
-// CPU entry offsets.
-#define ENTRY_SCRATCH0 0x100
-#define ENTRY_STACK_TOP 0x108
-#define ENTRY_CPU_SELF 0x110
-#define ENTRY_KERNEL_CR3 0x118
-
-// Bits.
-#define _RFLAGS_IF 0x200
-#define _RFLAGS_IOPL0 0x1000
-#define _KERNEL_FLAGS 0x02
-
-// Vectors.
-#define DivideByZero 0x00
-#define Debug 0x01
-#define NMI 0x02
-#define Breakpoint 0x03
-#define Overflow 0x04
-#define BoundRangeExceeded 0x05
-#define InvalidOpcode 0x06
-#define DeviceNotAvailable 0x07
-#define DoubleFault 0x08
-#define CoprocessorSegmentOverrun 0x09
-#define InvalidTSS 0x0a
-#define SegmentNotPresent 0x0b
-#define StackSegmentFault 0x0c
-#define GeneralProtectionFault 0x0d
-#define PageFault 0x0e
-#define X87FloatingPointException 0x10
-#define AlignmentCheck 0x11
-#define MachineCheck 0x12
-#define SIMDFloatingPointException 0x13
-#define VirtualizationException 0x14
-#define SecurityException 0x1e
-#define SyscallInt80 0x80
-#define Syscall 0x100
-
-// Ptrace registers.
-#define PTRACE_R15 0x00
-#define PTRACE_R14 0x08
-#define PTRACE_R13 0x10
-#define PTRACE_R12 0x18
-#define PTRACE_RBP 0x20
-#define PTRACE_RBX 0x28
-#define PTRACE_R11 0x30
-#define PTRACE_R10 0x38
-#define PTRACE_R9 0x40
-#define PTRACE_R8 0x48
-#define PTRACE_RAX 0x50
-#define PTRACE_RCX 0x58
-#define PTRACE_RDX 0x60
-#define PTRACE_RSI 0x68
-#define PTRACE_RDI 0x70
-#define PTRACE_ORIGRAX 0x78
-#define PTRACE_RIP 0x80
-#define PTRACE_CS 0x88
-#define PTRACE_FLAGS 0x90
-#define PTRACE_RSP 0x98
-#define PTRACE_SS 0xa0
-#define PTRACE_FS 0xa8
-#define PTRACE_GS 0xb0
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-// NB: Offsets are programmatically generated (see BUILD).
-//
-// This file is concatenated with the definitions.
-
-// Saves a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not saved: AX, SP, IP, FLAGS, all segments.
-#define REGISTERS_SAVE(reg, offset) \
- MOVQ R15, offset+PTRACE_R15(reg); \
- MOVQ R14, offset+PTRACE_R14(reg); \
- MOVQ R13, offset+PTRACE_R13(reg); \
- MOVQ R12, offset+PTRACE_R12(reg); \
- MOVQ BP, offset+PTRACE_RBP(reg); \
- MOVQ BX, offset+PTRACE_RBX(reg); \
- MOVQ CX, offset+PTRACE_RCX(reg); \
- MOVQ DX, offset+PTRACE_RDX(reg); \
- MOVQ R11, offset+PTRACE_R11(reg); \
- MOVQ R10, offset+PTRACE_R10(reg); \
- MOVQ R9, offset+PTRACE_R9(reg); \
- MOVQ R8, offset+PTRACE_R8(reg); \
- MOVQ SI, offset+PTRACE_RSI(reg); \
- MOVQ DI, offset+PTRACE_RDI(reg);
-
-// Loads a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not loaded: AX, SP, IP, FLAGS, all segments.
-#define REGISTERS_LOAD(reg, offset) \
- MOVQ offset+PTRACE_R15(reg), R15; \
- MOVQ offset+PTRACE_R14(reg), R14; \
- MOVQ offset+PTRACE_R13(reg), R13; \
- MOVQ offset+PTRACE_R12(reg), R12; \
- MOVQ offset+PTRACE_RBP(reg), BP; \
- MOVQ offset+PTRACE_RBX(reg), BX; \
- MOVQ offset+PTRACE_RCX(reg), CX; \
- MOVQ offset+PTRACE_RDX(reg), DX; \
- MOVQ offset+PTRACE_R11(reg), R11; \
- MOVQ offset+PTRACE_R10(reg), R10; \
- MOVQ offset+PTRACE_R9(reg), R9; \
- MOVQ offset+PTRACE_R8(reg), R8; \
- MOVQ offset+PTRACE_RSI(reg), SI; \
- MOVQ offset+PTRACE_RDI(reg), DI;
-
-// WRITE_CR3() writes the given CR3 value.
-//
-// The code corresponds to:
-//
-// mov %rax, %cr3
-//
-#define WRITE_CR3() \
- BYTE $0x0f; BYTE $0x22; BYTE $0xd8;
-
-// SWAP_GS swaps the kernel GS (CPU).
-#define SWAP_GS() \
- BYTE $0x0F; BYTE $0x01; BYTE $0xf8;
-
-// IRET returns from an interrupt frame.
-#define IRET() \
- BYTE $0x48; BYTE $0xcf;
-
-// SYSRET64 executes the sysret instruction.
-#define SYSRET64() \
- BYTE $0x48; BYTE $0x0f; BYTE $0x07;
-
-// LOAD_KERNEL_STACK loads the kernel stack.
-#define LOAD_KERNEL_STACK(entry) \
- MOVQ ENTRY_STACK_TOP(entry), SP;
-
-// See kernel.go.
-TEXT ·Halt(SB),NOSPLIT,$0
- HLT
- RET
-
-// See entry_amd64.go.
-TEXT ·swapgs(SB),NOSPLIT,$0
- SWAP_GS()
- RET
-
-// jumpToKernel changes execution to the kernel address space.
-//
-// This works by changing the return value to the kernel version.
-TEXT ·jumpToKernel(SB),NOSPLIT,$0
- MOVQ 0(SP), AX
- ORQ ·KernelStartAddress(SB), AX // Future return value.
- MOVQ AX, 0(SP)
- RET
-
-// See entry_amd64.go.
-TEXT ·sysret(SB),NOSPLIT,$0-24
- CALL ·jumpToKernel(SB)
- // Save original state and stack. sysenter() or exception()
- // from APP(gr3) will switch to this stack, set the return
- // value (vector: 32(SP)) and then do RET, which will also
- // automatically return to the lower half.
- MOVQ cpu+0(FP), BX
- MOVQ regs+8(FP), AX
- MOVQ userCR3+16(FP), CX
- MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
- MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
- MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
-
- // save SP AX userCR3 on the kernel stack.
- MOVQ CPU_ENTRY(BX), BX
- LOAD_KERNEL_STACK(BX)
- PUSHQ PTRACE_RSP(AX)
- PUSHQ PTRACE_RAX(AX)
- PUSHQ CX
-
- // Restore user register state.
- REGISTERS_LOAD(AX, 0)
- MOVQ PTRACE_RIP(AX), CX // Needed for SYSRET.
- MOVQ PTRACE_FLAGS(AX), R11 // Needed for SYSRET.
-
- // restore userCR3, AX, SP.
- POPQ AX // Get userCR3.
- WRITE_CR3() // Switch to userCR3.
- POPQ AX // Restore AX.
- POPQ SP // Restore SP.
- SYSRET64()
-
-// See entry_amd64.go.
-TEXT ·iret(SB),NOSPLIT,$0-24
- CALL ·jumpToKernel(SB)
- // Save original state and stack. sysenter() or exception()
- // from APP(gr3) will switch to this stack, set the return
- // value (vector: 32(SP)) and then do RET, which will also
- // automatically return to the lower half.
- MOVQ cpu+0(FP), BX
- MOVQ regs+8(FP), AX
- MOVQ userCR3+16(FP), CX
- MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
- MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
- MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
-
- // Build an IRET frame & restore state.
- MOVQ CPU_ENTRY(BX), BX
- LOAD_KERNEL_STACK(BX)
- PUSHQ PTRACE_SS(AX)
- PUSHQ PTRACE_RSP(AX)
- PUSHQ PTRACE_FLAGS(AX)
- PUSHQ PTRACE_CS(AX)
- PUSHQ PTRACE_RIP(AX)
- PUSHQ PTRACE_RAX(AX) // Save AX on kernel stack.
- PUSHQ CX // Save userCR3 on kernel stack.
- REGISTERS_LOAD(AX, 0) // Restore most registers.
- POPQ AX // Get userCR3.
- WRITE_CR3() // Switch to userCR3.
- POPQ AX // Restore AX.
- IRET()
-
-// See entry_amd64.go.
-TEXT ·resume(SB),NOSPLIT,$0
- // See iret, above.
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- PUSHQ CPU_REGISTERS+PTRACE_SS(AX)
- PUSHQ CPU_REGISTERS+PTRACE_RSP(AX)
- PUSHQ CPU_REGISTERS+PTRACE_FLAGS(AX)
- PUSHQ CPU_REGISTERS+PTRACE_CS(AX)
- PUSHQ CPU_REGISTERS+PTRACE_RIP(AX)
- REGISTERS_LOAD(AX, CPU_REGISTERS)
- MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX
- IRET()
-
-// See entry_amd64.go.
-TEXT ·Start(SB),NOSPLIT,$0
- PUSHQ $0x0 // Previous frame pointer.
- MOVQ SP, BP // Set frame pointer.
- PUSHQ AX // First argument (CPU).
- CALL ·start(SB) // Call Go hook.
- JMP ·resume(SB) // Restore to registers.
-
-// See entry_amd64.go.
-TEXT ·sysenter(SB),NOSPLIT,$0
- // _RFLAGS_IOPL0 is always set in the user mode and it is never set in
- // the kernel mode. See the comment of UserFlagsSet for more details.
- TESTL $_RFLAGS_IOPL0, R11
- JZ kernel
-user:
- SWAP_GS()
- MOVQ AX, ENTRY_SCRATCH0(GS) // Save user AX on scratch.
- MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX.
- WRITE_CR3() // Switch to kernel cr3.
-
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs.
- REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX.
- MOVQ CX, PTRACE_RIP(AX)
- MOVQ R11, PTRACE_FLAGS(AX)
- MOVQ SP, PTRACE_RSP(AX)
- MOVQ ENTRY_SCRATCH0(GS), CX // Load saved user AX value.
- MOVQ CX, PTRACE_RAX(AX) // Save everything else.
- MOVQ CX, PTRACE_ORIGRAX(AX)
-
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Get stacks.
- MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
- MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
-
- // Return to the kernel, where the frame is:
- //
- // vector (sp+32)
- // userCR3 (sp+24)
- // regs (sp+16)
- // cpu (sp+8)
- // vcpu.Switch (sp+0)
- //
- MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
- MOVQ $Syscall, 32(SP) // Output vector.
- RET
-
-kernel:
- // We can't restore the original stack, but we can access the registers
- // in the CPU state directly. No need for temporary juggling.
- MOVQ AX, ENTRY_SCRATCH0(GS)
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- REGISTERS_SAVE(AX, CPU_REGISTERS)
- MOVQ CX, CPU_REGISTERS+PTRACE_RIP(AX)
- MOVQ R11, CPU_REGISTERS+PTRACE_FLAGS(AX)
- MOVQ SP, CPU_REGISTERS+PTRACE_RSP(AX)
- MOVQ ENTRY_SCRATCH0(GS), BX
- MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
- MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
- MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
- MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
-
- // Call the syscall trampoline.
- LOAD_KERNEL_STACK(GS)
- PUSHQ AX // First argument (vCPU).
- CALL ·kernelSyscall(SB) // Call the trampoline.
- POPQ AX // Pop vCPU.
- JMP ·resume(SB)
-
-// exception is a generic exception handler.
-//
-// There are two cases handled:
-//
-// 1) An exception in kernel mode: this results in saving the state at the time
-// of the exception and calling the defined hook.
-//
-// 2) An exception in guest mode: the original kernel frame is restored, and
-// the vector & error codes are pushed as return values.
-//
-// See below for the stubs that call exception.
-TEXT ·exception(SB),NOSPLIT,$0
- // Determine whether the exception occurred in kernel mode or user
- // mode, based on the flags. We expect the following stack:
- //
- // SS (sp+48)
- // SP (sp+40)
- // FLAGS (sp+32)
- // CS (sp+24)
- // IP (sp+16)
- // ERROR_CODE (sp+8)
- // VECTOR (sp+0)
- //
- TESTL $_RFLAGS_IOPL0, 32(SP)
- JZ kernel
-
-user:
- SWAP_GS()
- ADDQ $-8, SP // Adjust for flags.
- MOVQ $_KERNEL_FLAGS, 0(SP); BYTE $0x9d; // Reset flags (POPFQ).
- PUSHQ AX // Save user AX on stack.
- MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX.
- WRITE_CR3() // Switch to kernel cr3.
-
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs.
- REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX.
- POPQ BX // Restore original AX.
- MOVQ BX, PTRACE_RAX(AX) // Save it.
- MOVQ BX, PTRACE_ORIGRAX(AX)
- MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX)
- MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX)
- MOVQ 32(SP), DX; MOVQ DX, PTRACE_FLAGS(AX)
- MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX)
- MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX)
-
- // Copy out and return.
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ 0(SP), BX // Load vector.
- MOVQ 8(SP), CX // Load error code.
- MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Original stack (kernel version).
- MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
- MOVQ CX, CPU_ERROR_CODE(AX) // Set error code.
- MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
- MOVQ BX, 32(SP) // Output vector.
- RET
-
-kernel:
- // As per above, we can save directly.
- PUSHQ AX
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- REGISTERS_SAVE(AX, CPU_REGISTERS)
- POPQ BX
- MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
- MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
- MOVQ 16(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RIP(AX)
- MOVQ 32(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_FLAGS(AX)
- MOVQ 40(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RSP(AX)
-
- // Set the error code and adjust the stack.
- MOVQ 8(SP), BX // Load the error code.
- MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU.
- MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
- MOVQ 0(SP), BX // BX contains the vector.
-
- // Call the exception trampoline.
- LOAD_KERNEL_STACK(GS)
- PUSHQ BX // Second argument (vector).
- PUSHQ AX // First argument (vCPU).
- CALL ·kernelException(SB) // Call the trampoline.
- POPQ BX // Pop vector.
- POPQ AX // Pop vCPU.
- JMP ·resume(SB)
-
-#define EXCEPTION_WITH_ERROR(value, symbol) \
-TEXT symbol,NOSPLIT,$0; \
- PUSHQ $value; \
- JMP ·exception(SB);
-
-#define EXCEPTION_WITHOUT_ERROR(value, symbol) \
-TEXT symbol,NOSPLIT,$0; \
- PUSHQ $0x0; \
- PUSHQ $value; \
- JMP ·exception(SB);
-
-EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB))
-EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB))
-EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB))
-EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB))
-EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB))
-EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB))
-EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB))
-EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB))
-EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB))
-EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB))
-EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB))
-EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB))
-EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB))
-EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB))
-EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB))
-EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB))
-EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB))
-EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB))
-EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB))
-EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB))
-EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB))
-EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB))
diff --git a/pkg/sentry/platform/ring0/entry_impl_arm64.s b/pkg/sentry/platform/ring0/entry_impl_arm64.s
deleted file mode 100644
index b3d300fb7..000000000
--- a/pkg/sentry/platform/ring0/entry_impl_arm64.s
+++ /dev/null
@@ -1,858 +0,0 @@
-// build +arm64
-
-// Automatically generated, do not edit.
-
-// CPU offsets.
-#define CPU_SELF 0x00
-#define CPU_REGISTERS 0x258
-#define CPU_STACK_TOP 0x210
-#define CPU_ERROR_CODE 0x210
-#define CPU_ERROR_TYPE 0x218
-#define CPU_FAULT_ADDR 0x220
-#define CPU_TTBR0_KVM 0x228
-#define CPU_TTBR0_APP 0x230
-#define CPU_VECTOR_CODE 0x238
-#define CPU_APP_ADDR 0x240
-#define CPU_LAZY_VFP 0x248
-#define CPU_APP_ASID 0x250
-
-// Bits.
-#define _KERNEL_FLAGS 0x3c5
-
-// Vectors.
-#define El1Sync 0x04
-#define El1Irq 0x05
-#define El1Fiq 0x06
-#define El1Err 0x07
-#define El0Sync 0x08
-#define El0Irq 0x09
-#define El0Fiq 0x0a
-#define El0Err 0x0b
-#define El1SyncDa 0x10
-#define El1SyncIa 0x11
-#define El1SyncSpPc 0x12
-#define El1SyncUndef 0x13
-#define El1SyncDbg 0x14
-#define El1SyncInv 0x15
-#define El0SyncSVC 0x16
-#define El0SyncDa 0x17
-#define El0SyncIa 0x18
-#define El0SyncFpsimdAcc 0x19
-#define El0SyncSveAcc 0x1a
-#define El0SyncFpsimdExc 0x1b
-#define El0SyncSys 0x1c
-#define El0SyncSpPc 0x1d
-#define El0SyncUndef 0x1e
-#define El0SyncDbg 0x1f
-#define El0SyncWfx 0x20
-#define El0SyncInv 0x21
-#define El0ErrNMI 0x22
-#define PageFault 0x17
-#define Syscall 0x16
-#define VirtualizationException 0x23
-
-// Ptrace registers.
-#define PTRACE_R0 0x00
-#define PTRACE_R1 0x08
-#define PTRACE_R2 0x10
-#define PTRACE_R3 0x18
-#define PTRACE_R4 0x20
-#define PTRACE_R5 0x28
-#define PTRACE_R6 0x30
-#define PTRACE_R7 0x38
-#define PTRACE_R8 0x40
-#define PTRACE_R9 0x48
-#define PTRACE_R10 0x50
-#define PTRACE_R11 0x58
-#define PTRACE_R12 0x60
-#define PTRACE_R13 0x68
-#define PTRACE_R14 0x70
-#define PTRACE_R15 0x78
-#define PTRACE_R16 0x80
-#define PTRACE_R17 0x88
-#define PTRACE_R18 0x90
-#define PTRACE_R19 0x98
-#define PTRACE_R20 0xa0
-#define PTRACE_R21 0xa8
-#define PTRACE_R22 0xb0
-#define PTRACE_R23 0xb8
-#define PTRACE_R24 0xc0
-#define PTRACE_R25 0xc8
-#define PTRACE_R26 0xd0
-#define PTRACE_R27 0xd8
-#define PTRACE_R28 0xe0
-#define PTRACE_R29 0xe8
-#define PTRACE_R30 0xf0
-#define PTRACE_SP 0xf8
-#define PTRACE_PC 0x100
-#define PTRACE_PSTATE 0x108
-#define PTRACE_TLS 0x110
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-// NB: Offsets are programatically generated (see BUILD).
-//
-// This file is concatenated with the definitions.
-
-// Saves a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-
-// ERET returns using the ELR and SPSR for the current exception level.
-#define ERET() \
- WORD $0xd69f03e0; \
- DSB $7; \
- ISB $15;
-
-// RSV_REG is a register that holds el1 information temporarily.
-#define RSV_REG R18_PLATFORM
-
-// RSV_REG_APP is a register that holds el0 information temporarily.
-#define RSV_REG_APP R9
-
-#define FPEN_NOTRAP 0x3
-#define FPEN_SHIFT 20
-
-#define FPEN_ENABLE (FPEN_NOTRAP << FPEN_SHIFT)
-
-// sctlr_el1: system control register el1.
-#define SCTLR_M 1 << 0
-#define SCTLR_C 1 << 2
-#define SCTLR_I 1 << 12
-#define SCTLR_DZE 1 << 14
-#define SCTLR_UCT 1 << 15
-#define SCTLR_UCI 1 << 26
-
-#define SCTLR_EL1_DEFAULT (SCTLR_M | SCTLR_C | SCTLR_I | SCTLR_UCT | SCTLR_UCI | SCTLR_DZE)
-
-// cntkctl_el1: counter-timer kernel control register el1.
-#define CNTKCTL_EL0PCTEN 1 << 0
-#define CNTKCTL_EL0VCTEN 1 << 1
-
-#define CNTKCTL_EL1_DEFAULT (CNTKCTL_EL0PCTEN | CNTKCTL_EL0VCTEN)
-
-// Saves a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not saved: R9, R18.
-#define REGISTERS_SAVE(reg, offset) \
- MOVD R0, offset+PTRACE_R0(reg); \
- MOVD R1, offset+PTRACE_R1(reg); \
- MOVD R2, offset+PTRACE_R2(reg); \
- MOVD R3, offset+PTRACE_R3(reg); \
- MOVD R4, offset+PTRACE_R4(reg); \
- MOVD R5, offset+PTRACE_R5(reg); \
- MOVD R6, offset+PTRACE_R6(reg); \
- MOVD R7, offset+PTRACE_R7(reg); \
- MOVD R8, offset+PTRACE_R8(reg); \
- MOVD R10, offset+PTRACE_R10(reg); \
- MOVD R11, offset+PTRACE_R11(reg); \
- MOVD R12, offset+PTRACE_R12(reg); \
- MOVD R13, offset+PTRACE_R13(reg); \
- MOVD R14, offset+PTRACE_R14(reg); \
- MOVD R15, offset+PTRACE_R15(reg); \
- MOVD R16, offset+PTRACE_R16(reg); \
- MOVD R17, offset+PTRACE_R17(reg); \
- MOVD R19, offset+PTRACE_R19(reg); \
- MOVD R20, offset+PTRACE_R20(reg); \
- MOVD R21, offset+PTRACE_R21(reg); \
- MOVD R22, offset+PTRACE_R22(reg); \
- MOVD R23, offset+PTRACE_R23(reg); \
- MOVD R24, offset+PTRACE_R24(reg); \
- MOVD R25, offset+PTRACE_R25(reg); \
- MOVD R26, offset+PTRACE_R26(reg); \
- MOVD R27, offset+PTRACE_R27(reg); \
- MOVD g, offset+PTRACE_R28(reg); \
- MOVD R29, offset+PTRACE_R29(reg); \
- MOVD R30, offset+PTRACE_R30(reg);
-
-// Loads a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not loaded: R9, R18.
-#define REGISTERS_LOAD(reg, offset) \
- MOVD offset+PTRACE_R0(reg), R0; \
- MOVD offset+PTRACE_R1(reg), R1; \
- MOVD offset+PTRACE_R2(reg), R2; \
- MOVD offset+PTRACE_R3(reg), R3; \
- MOVD offset+PTRACE_R4(reg), R4; \
- MOVD offset+PTRACE_R5(reg), R5; \
- MOVD offset+PTRACE_R6(reg), R6; \
- MOVD offset+PTRACE_R7(reg), R7; \
- MOVD offset+PTRACE_R8(reg), R8; \
- MOVD offset+PTRACE_R10(reg), R10; \
- MOVD offset+PTRACE_R11(reg), R11; \
- MOVD offset+PTRACE_R12(reg), R12; \
- MOVD offset+PTRACE_R13(reg), R13; \
- MOVD offset+PTRACE_R14(reg), R14; \
- MOVD offset+PTRACE_R15(reg), R15; \
- MOVD offset+PTRACE_R16(reg), R16; \
- MOVD offset+PTRACE_R17(reg), R17; \
- MOVD offset+PTRACE_R19(reg), R19; \
- MOVD offset+PTRACE_R20(reg), R20; \
- MOVD offset+PTRACE_R21(reg), R21; \
- MOVD offset+PTRACE_R22(reg), R22; \
- MOVD offset+PTRACE_R23(reg), R23; \
- MOVD offset+PTRACE_R24(reg), R24; \
- MOVD offset+PTRACE_R25(reg), R25; \
- MOVD offset+PTRACE_R26(reg), R26; \
- MOVD offset+PTRACE_R27(reg), R27; \
- MOVD offset+PTRACE_R28(reg), g; \
- MOVD offset+PTRACE_R29(reg), R29; \
- MOVD offset+PTRACE_R30(reg), R30;
-
-#define ESR_ELx_EC_UNKNOWN (0x00)
-#define ESR_ELx_EC_WFx (0x01)
-/* Unallocated EC: 0x02 */
-#define ESR_ELx_EC_CP15_32 (0x03)
-#define ESR_ELx_EC_CP15_64 (0x04)
-#define ESR_ELx_EC_CP14_MR (0x05)
-#define ESR_ELx_EC_CP14_LS (0x06)
-#define ESR_ELx_EC_FP_ASIMD (0x07)
-#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
-#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
-/* Unallocated EC: 0x0A - 0x0B */
-#define ESR_ELx_EC_CP14_64 (0x0C)
-/* Unallocated EC: 0x0d */
-#define ESR_ELx_EC_ILL (0x0E)
-/* Unallocated EC: 0x0F - 0x10 */
-#define ESR_ELx_EC_SVC32 (0x11)
-#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
-#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
-/* Unallocated EC: 0x14 */
-#define ESR_ELx_EC_SVC64 (0x15)
-#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
-#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
-#define ESR_ELx_EC_SYS64 (0x18)
-#define ESR_ELx_EC_SVE (0x19)
-/* Unallocated EC: 0x1A - 0x1E */
-#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
-#define ESR_ELx_EC_IABT_LOW (0x20)
-#define ESR_ELx_EC_IABT_CUR (0x21)
-#define ESR_ELx_EC_PC_ALIGN (0x22)
-/* Unallocated EC: 0x23 */
-#define ESR_ELx_EC_DABT_LOW (0x24)
-#define ESR_ELx_EC_DABT_CUR (0x25)
-#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
-#define ESR_ELx_EC_FP_EXC32 (0x28)
-/* Unallocated EC: 0x29 - 0x2B */
-#define ESR_ELx_EC_FP_EXC64 (0x2C)
-/* Unallocated EC: 0x2D - 0x2E */
-#define ESR_ELx_EC_SERROR (0x2F)
-#define ESR_ELx_EC_BREAKPT_LOW (0x30)
-#define ESR_ELx_EC_BREAKPT_CUR (0x31)
-#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
-#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
-#define ESR_ELx_EC_WATCHPT_LOW (0x34)
-#define ESR_ELx_EC_WATCHPT_CUR (0x35)
-/* Unallocated EC: 0x36 - 0x37 */
-#define ESR_ELx_EC_BKPT32 (0x38)
-/* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
-/* Unallocted EC: 0x3B */
-#define ESR_ELx_EC_BRK64 (0x3C)
-/* Unallocated EC: 0x3D - 0x3F */
-#define ESR_ELx_EC_MAX (0x3F)
-
-#define ESR_ELx_EC_SHIFT (26)
-#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
-#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
-
-#define ESR_ELx_IL_SHIFT (25)
-#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
-#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
-
-/* ISS field definitions shared by different classes */
-#define ESR_ELx_WNR_SHIFT (6)
-#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
-
-/* Asynchronous Error Type */
-#define ESR_ELx_IDS_SHIFT (24)
-#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT)
-#define ESR_ELx_AET_SHIFT (10)
-#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT)
-
-#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
-
-/* Shared ISS field definitions for Data/Instruction aborts */
-#define ESR_ELx_SET_SHIFT (11)
-#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
-#define ESR_ELx_FnV_SHIFT (10)
-#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT)
-#define ESR_ELx_EA_SHIFT (9)
-#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT)
-#define ESR_ELx_S1PTW_SHIFT (7)
-#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT)
-
-/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
-#define ESR_ELx_FSC (0x3F)
-#define ESR_ELx_FSC_TYPE (0x3C)
-#define ESR_ELx_FSC_EXTABT (0x10)
-#define ESR_ELx_FSC_SERROR (0x11)
-#define ESR_ELx_FSC_ACCESS (0x08)
-#define ESR_ELx_FSC_FAULT (0x04)
-#define ESR_ELx_FSC_PERM (0x0C)
-
-/* ISS field definitions for Data Aborts */
-#define ESR_ELx_ISV_SHIFT (24)
-#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
-#define ESR_ELx_SAS_SHIFT (22)
-#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
-#define ESR_ELx_SSE_SHIFT (21)
-#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT)
-#define ESR_ELx_SRT_SHIFT (16)
-#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
-#define ESR_ELx_SF_SHIFT (15)
-#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
-#define ESR_ELx_AR_SHIFT (14)
-#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
-#define ESR_ELx_CM_SHIFT (8)
-#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
-
-/* ISS field definitions for exceptions taken in to Hyp */
-#define ESR_ELx_CV (UL(1) << 24)
-#define ESR_ELx_COND_SHIFT (20)
-#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
-#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
-#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
-#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
-#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
-
-/* ISS field definitions for system error */
-#define ESR_ELx_SERR_MASK (0x1)
-#define ESR_ELx_SERR_NMI (0x1)
-
-// LOAD_KERNEL_ADDRESS loads a kernel address.
-#define LOAD_KERNEL_ADDRESS(from, to) \
- MOVD from, to; \
- ORR $0xffff000000000000, to, to;
-
-// LOAD_KERNEL_STACK loads the kernel temporary stack.
-#define LOAD_KERNEL_STACK(from) \
- LOAD_KERNEL_ADDRESS(CPU_SELF(from), RSV_REG); \
- MOVD $CPU_STACK_TOP(RSV_REG), RSV_REG; \
- MOVD RSV_REG, RSP; \
- WORD $0xd538d092; //MRS TPIDR_EL1, R18
-
-// SWITCH_TO_APP_PAGETABLE sets a new pagetable for a container application.
-#define SWITCH_TO_APP_PAGETABLE() \
- MOVD CPU_APP_ASID(RSV_REG), RSV_REG_APP; \
- MOVD CPU_TTBR0_APP(RSV_REG), RSV_REG; \
- BFI $48, RSV_REG_APP, $16, RSV_REG; \
- MSR RSV_REG, TTBR0_EL1; \
- ISB $15;
-
-// SWITCH_TO_KVM_PAGETABLE sets the kvm pagetable.
-#define SWITCH_TO_KVM_PAGETABLE() \
- MOVD CPU_TTBR0_KVM(RSV_REG), RSV_REG; \
- MOVD $1, RSV_REG_APP; \
- BFI $48, RSV_REG_APP, $16, RSV_REG; \
- MSR RSV_REG, TTBR0_EL1; \
- ISB $15;
-
-TEXT ·EnableVFP(SB),NOSPLIT,$0
- MOVD $FPEN_ENABLE, R0
- WORD $0xd5181040 //MSR R0, CPACR_EL1
- ISB $15
- RET
-
-TEXT ·DisableVFP(SB),NOSPLIT,$0
- MOVD $0, R0
- WORD $0xd5181040 //MSR R0, CPACR_EL1
- ISB $15
- RET
-
-#define VFP_ENABLE \
- MOVD $FPEN_ENABLE, R0; \
- WORD $0xd5181040; \ //MSR R0, CPACR_EL1
- ISB $15;
-
-#define VFP_DISABLE \
- MOVD $0x0, R0; \
- WORD $0xd5181040; \ //MSR R0, CPACR_EL1
- ISB $15;
-
-// KERNEL_ENTRY_FROM_EL0 is the entry code of the vcpu from el0 to el1.
-#define KERNEL_ENTRY_FROM_EL0 \
- SUB $16, RSP, RSP; \ // step1, save r18, r9 into kernel temporary stack.
- STP (RSV_REG, RSV_REG_APP), 16*0(RSP); \
- WORD $0xd538d092; \ // MRS TPIDR_EL1, R18
- MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP; \ // step2, load app context pointer.
- REGISTERS_SAVE(RSV_REG_APP, 0); \ // step3, save app context.
- MOVD RSV_REG_APP, R20; \
- LDP 16*0(RSP), (RSV_REG, RSV_REG_APP); \
- ADD $16, RSP, RSP; \
- MOVD RSV_REG, PTRACE_R18(R20); \
- MOVD RSV_REG_APP, PTRACE_R9(R20); \
- MRS TPIDR_EL0, R3; \
- MOVD R3, PTRACE_TLS(R20); \
- WORD $0xd5384003; \ // MRS SPSR_EL1, R3
- MOVD R3, PTRACE_PSTATE(R20); \
- MRS ELR_EL1, R3; \
- MOVD R3, PTRACE_PC(R20); \
- WORD $0xd5384103; \ // MRS SP_EL0, R3
- MOVD R3, PTRACE_SP(R20);
-
-// KERNEL_ENTRY_FROM_EL1 is the entry code of the vcpu from el1 to el1.
-#define KERNEL_ENTRY_FROM_EL1 \
- WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
- REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // Save sentry context.
- MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG); \
- MRS TPIDR_EL0, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_TLS(RSV_REG); \
- WORD $0xd5384004; \ // MRS SPSR_EL1, R4
- MOVD R4, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG); \
- MRS ELR_EL1, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_PC(RSV_REG); \
- MOVD RSP, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); \
- LOAD_KERNEL_STACK(RSV_REG); // Load the temporary stack.
-
-// EXCEPTION_EL0 is a common el0 exception handler function.
-#define EXCEPTION_EL0(vector) \
- WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
- WORD $0xd538601a; \ //MRS FAR_EL1, R26
- MOVD R26, CPU_FAULT_ADDR(RSV_REG); \
- MOVD $1, R3; \
- MOVD R3, CPU_ERROR_TYPE(RSV_REG); \ // Set error type to user.
- MOVD $vector, R3; \
- MOVD R3, CPU_VECTOR_CODE(RSV_REG); \
- MRS ESR_EL1, R3; \
- MOVD R3, CPU_ERROR_CODE(RSV_REG); \
- B ·kernelExitToEl1(SB);
-
-// EXCEPTION_EL1 is a common el1 exception handler function.
-#define EXCEPTION_EL1(vector) \
- MOVD $vector, R3; \
- MOVD R3, 8(RSP); \
- B ·HaltEl1ExceptionAndResume(SB);
-
-// storeAppASID writes the application's asid value.
-TEXT ·storeAppASID(SB),NOSPLIT,$0-8
- MOVD asid+0(FP), R1
- MRS TPIDR_EL1, RSV_REG
- MOVD R1, CPU_APP_ASID(RSV_REG)
- RET
-
-// Halt halts execution.
-TEXT ·Halt(SB),NOSPLIT,$0
- // Clear bluepill.
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
- CMP RSV_REG, R9
- BNE mmio_exit
- MOVD $0, CPU_REGISTERS+PTRACE_R9(RSV_REG)
-
-mmio_exit:
- // Disable fpsimd.
- WORD $0xd5381041 // MRS CPACR_EL1, R1
- MOVD R1, CPU_LAZY_VFP(RSV_REG)
- VFP_DISABLE
-
- // Trigger MMIO_EXIT/_KVM_HYPERCALL_VMEXIT.
- //
- // To keep it simple, I used the address of exception table as the
- // MMIO base address, so that I can trigger a MMIO-EXIT by forcibly writing
- // a read-only space.
- // Also, the length is engough to match a sufficient number of hypercall ID.
- // Then, in host user space, I can calculate this address to find out
- // which hypercall.
- MRS VBAR_EL1, R9
- MOVD R0, 0x0(R9)
-
- RET
-
-// HaltAndResume halts execution and point the pointer to the resume function.
-TEXT ·HaltAndResume(SB),NOSPLIT,$0
- BL ·Halt(SB)
- B ·kernelExitToEl1(SB) // Resume.
-
-// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
-TEXT ·HaltEl1SvcAndResume(SB),NOSPLIT,$0
- WORD $0xd538d092 // MRS TPIDR_EL1, R18
- MOVD CPU_SELF(RSV_REG), R3 // Load vCPU.
- MOVD R3, 8(RSP) // First argument (vCPU).
- CALL ·kernelSyscall(SB) // Call the trampoline.
- B ·kernelExitToEl1(SB) // Resume.
-
-// HaltEl1ExceptionAndResume calls Hooks.KernelException and resume.
-TEXT ·HaltEl1ExceptionAndResume(SB),NOSPLIT,$0-8
- WORD $0xd538d092 // MRS TPIDR_EL1, R18
- MOVD CPU_SELF(RSV_REG), R3 // Load vCPU.
- MOVD R3, 8(RSP) // First argument (vCPU).
- MOVD vector+0(FP), R3
- MOVD R3, 16(RSP) // Second argument (vector).
- CALL ·kernelException(SB) // Call the trampoline.
- B ·kernelExitToEl1(SB) // Resume.
-
-// Shutdown stops the guest.
-TEXT ·Shutdown(SB),NOSPLIT,$0
- // PSCI EVENT.
- MOVD $0x84000009, R0
- HVC $0
-
-// See kernel.go.
-TEXT ·Current(SB),NOSPLIT,$0-8
- MOVD CPU_SELF(RSV_REG), R8
- MOVD R8, ret+0(FP)
- RET
-
-#define STACK_FRAME_SIZE 32
-
-// kernelExitToEl0 is the entrypoint for application in guest_el0.
-// Prepare the vcpu environment for container application.
-TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
- // Step1, save sentry context into memory.
- MRS TPIDR_EL1, RSV_REG
- REGISTERS_SAVE(RSV_REG, CPU_REGISTERS)
- MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG)
- MRS TPIDR_EL0, R3
- MOVD R3, CPU_REGISTERS+PTRACE_TLS(RSV_REG)
-
- WORD $0xd5384003 // MRS SPSR_EL1, R3
- MOVD R3, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG)
- MOVD R30, CPU_REGISTERS+PTRACE_PC(RSV_REG)
- MOVD RSP, R3
- MOVD R3, CPU_REGISTERS+PTRACE_SP(RSV_REG)
-
- MOVD CPU_REGISTERS+PTRACE_R3(RSV_REG), R3
-
- // Step2, switch to temporary stack.
- LOAD_KERNEL_STACK(RSV_REG)
-
- // Step3, load app context pointer.
- MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP
-
- // Step4, prepare the environment for container application.
- // set sp_el0.
- MOVD PTRACE_SP(RSV_REG_APP), R1
- WORD $0xd5184101 //MSR R1, SP_EL0
- // set pc.
- MOVD PTRACE_PC(RSV_REG_APP), R1
- MSR R1, ELR_EL1
- // set pstate.
- MOVD PTRACE_PSTATE(RSV_REG_APP), R1
- WORD $0xd5184001 //MSR R1, SPSR_EL1
-
- // need use kernel space address to excute below code, since
- // after SWITCH_TO_APP_PAGETABLE the ASID is changed to app's
- // ASID.
- WORD $0x10000061 // ADR R1, do_exit_to_el0
- ORR $0xffff000000000000, R1, R1
- JMP (R1)
-
-do_exit_to_el0:
- // RSV_REG & RSV_REG_APP will be loaded at the end.
- REGISTERS_LOAD(RSV_REG_APP, 0)
- MOVD PTRACE_TLS(RSV_REG_APP), RSV_REG
- MSR RSV_REG, TPIDR_EL0
-
- // switch to user pagetable.
- MOVD PTRACE_R18(RSV_REG_APP), RSV_REG
- MOVD PTRACE_R9(RSV_REG_APP), RSV_REG_APP
-
- SUB $STACK_FRAME_SIZE, RSP, RSP
- STP (RSV_REG, RSV_REG_APP), 16*0(RSP)
- STP (R0, R1), 16*1(RSP)
-
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
- SWITCH_TO_APP_PAGETABLE()
-
- LDP 16*1(RSP), (R0, R1)
- LDP 16*0(RSP), (RSV_REG, RSV_REG_APP)
- ADD $STACK_FRAME_SIZE, RSP, RSP
-
- ERET()
-
-// kernelExitToEl1 is the entrypoint for sentry in guest_el1.
-// Prepare the vcpu environment for sentry.
-TEXT ·kernelExitToEl1(SB),NOSPLIT,$0
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
- MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R1
- WORD $0xd5184001 //MSR R1, SPSR_EL1
-
- MOVD CPU_REGISTERS+PTRACE_PC(RSV_REG), R1
- MSR R1, ELR_EL1
-
- // restore sentry's tls.
- MOVD CPU_REGISTERS+PTRACE_TLS(RSV_REG), R1
- MSR R1, TPIDR_EL0
-
- MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R1
- MOVD R1, RSP
-
- REGISTERS_LOAD(RSV_REG, CPU_REGISTERS)
- SWITCH_TO_KVM_PAGETABLE()
- MRS TPIDR_EL1, RSV_REG
-
- MOVD CPU_REGISTERS+PTRACE_R9(RSV_REG), RSV_REG_APP
-
- ERET()
-
-// Start is the CPU entrypoint.
-TEXT ·Start(SB),NOSPLIT,$0
- // Init.
- WORD $0xd508871f // __tlbi(vmalle1)
- DSB $7 // dsb(nsh)
-
- MOVD $1<<12, R1 // Reset mdscr_el1 and disable
- MSR R1, MDSCR_EL1 // access to the DCC from EL0
- ISB $15
-
- MRS TTBR1_EL1, R1
- MSR R1, TTBR0_EL1
- ISB $15
-
- MOVD $CNTKCTL_EL1_DEFAULT, R1
- MSR R1, CNTKCTL_EL1
-
- MOVD R8, RSV_REG
- ORR $0xffff000000000000, RSV_REG, RSV_REG
- WORD $0xd518d092 //MSR R18, TPIDR_EL1
-
- // Init.
- MOVD $SCTLR_EL1_DEFAULT, R1 // re-enable the mmu.
- MSR R1, SCTLR_EL1
- ISB $15
- WORD $0xd508751f // ic iallu
-
- DSB $7 // dsb(nsh)
- ISB $15
-
- B ·kernelExitToEl1(SB)
-
-// El1_sync_invalid is the handler for an invalid EL1_sync.
-TEXT ·El1_sync_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_irq_invalid is the handler for an invalid El1_irq.
-TEXT ·El1_irq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_fiq_invalid is the handler for an invalid El1_fiq.
-TEXT ·El1_fiq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_error_invalid is the handler for an invalid El1_error.
-TEXT ·El1_error_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_sync is the handler for El1_sync.
-TEXT ·El1_sync(SB),NOSPLIT,$0
- KERNEL_ENTRY_FROM_EL1
- MRS ESR_EL1, R25 // read the syndrome register
- LSR $ESR_ELx_EC_SHIFT, R25, R24 // exception class
- CMP $ESR_ELx_EC_DABT_CUR, R24
- BEQ el1_da // data abort in EL1
- CMP $ESR_ELx_EC_IABT_CUR, R24
- BEQ el1_ia // instruction abort in EL1
- CMP $ESR_ELx_EC_SP_ALIGN, R24
- BEQ el1_sp_pc // stack alignment exception
- CMP $ESR_ELx_EC_PC_ALIGN, R24
- BEQ el1_sp_pc // pc alignment exception
- CMP $ESR_ELx_EC_UNKNOWN, R24
- BEQ el1_undef // unknown exception in EL1
- CMP $ESR_ELx_EC_SVC64, R24
- BEQ el1_svc // SVC in 64-bit state
- CMP $ESR_ELx_EC_BREAKPT_CUR, R24
- BEQ el1_dbg // debug exception in EL1
- CMP $ESR_ELx_EC_FP_ASIMD, R24
- BEQ el1_fpsimd_acc // FP/ASIMD access
- CMP $ESR_ELx_EC_SVE, R24
- BEQ el1_sve_acc // SVE access
- B el1_invalid
-
-el1_da:
- EXCEPTION_EL1(El1SyncDa)
-el1_ia:
- EXCEPTION_EL1(El1SyncIa)
-el1_sp_pc:
- EXCEPTION_EL1(El1SyncSpPc)
-el1_undef:
- EXCEPTION_EL1(El1SyncUndef)
-el1_svc:
- B ·HaltEl1SvcAndResume(SB)
-el1_dbg:
- EXCEPTION_EL1(El1SyncDbg)
-el1_fpsimd_acc:
-el1_sve_acc:
- VFP_ENABLE
- B ·kernelExitToEl1(SB) // Resume.
-el1_invalid:
- EXCEPTION_EL1(El1SyncInv)
-
-// El1_irq is the handler for El1_irq.
-TEXT ·El1_irq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_fiq is the handler for El1_fiq.
-TEXT ·El1_fiq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_error is the handler for El1_error.
-TEXT ·El1_error(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El0_sync is the handler for El0_sync.
-TEXT ·El0_sync(SB),NOSPLIT,$0
- KERNEL_ENTRY_FROM_EL0
- MRS ESR_EL1, R25 // read the syndrome register
- LSR $ESR_ELx_EC_SHIFT, R25, R24 // exception class
- CMP $ESR_ELx_EC_SVC64, R24
- BEQ el0_svc // SVC in 64-bit state
- CMP $ESR_ELx_EC_DABT_LOW, R24
- BEQ el0_da // data abort in EL0
- CMP $ESR_ELx_EC_IABT_LOW, R24
- BEQ el0_ia // instruction abort in EL0
- CMP $ESR_ELx_EC_FP_ASIMD, R24
- BEQ el0_fpsimd_acc // FP/ASIMD access
- CMP $ESR_ELx_EC_SVE, R24
- BEQ el0_sve_acc // SVE access
- CMP $ESR_ELx_EC_FP_EXC64, R24
- BEQ el0_fpsimd_exc // FP/ASIMD exception
- CMP $ESR_ELx_EC_SP_ALIGN, R24
- BEQ el0_sp_pc // stack alignment exception
- CMP $ESR_ELx_EC_PC_ALIGN, R24
- BEQ el0_sp_pc // pc alignment exception
- CMP $ESR_ELx_EC_UNKNOWN, R24
- BEQ el0_undef // unknown exception in EL0
- CMP $ESR_ELx_EC_BREAKPT_LOW, R24
- BEQ el0_dbg // debug exception in EL0
- CMP $ESR_ELx_EC_SYS64, R24
- BEQ el0_sys // configurable trap
- CMP $ESR_ELx_EC_WFx, R24
- BEQ el0_wfx // WFX trap
- B el0_invalid
-
-el0_svc:
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
- MOVD $0, CPU_ERROR_CODE(RSV_REG) // Clear error code.
-
- MOVD $1, R3
- MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user.
-
- MOVD $Syscall, R3
- MOVD R3, CPU_VECTOR_CODE(RSV_REG)
-
- B ·kernelExitToEl1(SB)
-
-el0_da:
-el0_ia:
- EXCEPTION_EL0(PageFault)
-el0_fpsimd_acc:
- EXCEPTION_EL0(El0SyncFpsimdAcc)
-el0_sve_acc:
- EXCEPTION_EL0(El0SyncSveAcc)
-el0_fpsimd_exc:
- EXCEPTION_EL0(El0SyncFpsimdExc)
-el0_sp_pc:
- EXCEPTION_EL0(El0SyncSpPc)
-el0_undef:
- EXCEPTION_EL0(El0SyncUndef)
-el0_dbg:
- EXCEPTION_EL0(El0SyncDbg)
-el0_sys:
- EXCEPTION_EL0(El0SyncSys)
-el0_wfx:
- EXCEPTION_EL0(El0SyncWfx)
-el0_invalid:
- EXCEPTION_EL0(El0SyncInv)
-
-TEXT ·El0_irq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_fiq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_error(SB),NOSPLIT,$0
- KERNEL_ENTRY_FROM_EL0
- WORD $0xd5385219 // MRS ESR_EL1, R25
- AND $ESR_ELx_SERR_MASK, R25, R24
- CMP $ESR_ELx_SERR_NMI, R24
- BEQ el0_nmi
- B el0_bounce
-
-el0_nmi:
- EXCEPTION_EL0(El0ErrNMI)
-el0_bounce:
- EXCEPTION_EL0(VirtualizationException)
-
-TEXT ·El0_sync_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_irq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_fiq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_error_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// Vectors implements exception vector table.
-// The start address of exception vector table should be 11-bits aligned.
-// For detail, please refer to arm developer document:
-// https://developer.arm.com/documentation/100933/0100/AArch64-exception-vector-table
-// Also can refer to the code in linux kernel: arch/arm64/kernel/entry.S
-TEXT ·Vectors(SB),NOSPLIT,$0
- PCALIGN $2048
- B ·El1_sync_invalid(SB)
- PCALIGN $128
- B ·El1_irq_invalid(SB)
- PCALIGN $128
- B ·El1_fiq_invalid(SB)
- PCALIGN $128
- B ·El1_error_invalid(SB)
-
- PCALIGN $128
- B ·El1_sync(SB)
- PCALIGN $128
- B ·El1_irq(SB)
- PCALIGN $128
- B ·El1_fiq(SB)
- PCALIGN $128
- B ·El1_error(SB)
-
- PCALIGN $128
- B ·El0_sync(SB)
- PCALIGN $128
- B ·El0_irq(SB)
- PCALIGN $128
- B ·El0_fiq(SB)
- PCALIGN $128
- B ·El0_error(SB)
-
- PCALIGN $128
- B ·El0_sync_invalid(SB)
- PCALIGN $128
- B ·El0_irq_invalid(SB)
- PCALIGN $128
- B ·El0_fiq_invalid(SB)
- PCALIGN $128
- B ·El0_error_invalid(SB)
diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/sentry/platform/ring0/kernel.go
deleted file mode 100644
index 292f9d0cc..000000000
--- a/pkg/sentry/platform/ring0/kernel.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ring0
-
-// Init initializes a new kernel.
-//
-//go:nosplit
-func (k *Kernel) Init(maxCPUs int) {
- k.init(maxCPUs)
-}
-
-// Halt halts execution.
-func Halt()
-
-// defaultHooks implements hooks.
-type defaultHooks struct{}
-
-// KernelSyscall implements Hooks.KernelSyscall.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (defaultHooks) KernelSyscall() {
- Halt()
-}
-
-// KernelException implements Hooks.KernelException.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (defaultHooks) KernelException(Vector) {
- Halt()
-}
-
-// kernelSyscall is a trampoline.
-//
-// When in amd64, it is called with %rip on the upper half, so it can
-// NOT access to any global data which is not mapped on upper and must
-// call to function pointers or interfaces to switch to the lower half
-// so that callee can access to global data.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func kernelSyscall(c *CPU) {
- c.hooks.KernelSyscall()
-}
-
-// kernelException is a trampoline.
-//
-// When in amd64, it is called with %rip on the upper half, so it can
-// NOT access to any global data which is not mapped on upper and must
-// call to function pointers or interfaces to switch to the lower half
-// so that callee can access to global data.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func kernelException(c *CPU, vector Vector) {
- c.hooks.KernelException(vector)
-}
-
-// Init initializes a new CPU.
-//
-// Init allows embedding in other objects.
-func (c *CPU) Init(k *Kernel, cpuID int, hooks Hooks) {
- c.self = c // Set self reference.
- c.kernel = k // Set kernel reference.
- c.init(cpuID) // Perform architectural init.
-
- // Require hooks.
- if hooks != nil {
- c.hooks = hooks
- } else {
- c.hooks = defaultHooks{}
- }
-}
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
deleted file mode 100644
index 36a60700e..000000000
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ /dev/null
@@ -1,323 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "encoding/binary"
- "reflect"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// init initializes architecture-specific state.
-func (k *Kernel) init(maxCPUs int) {
- entrySize := reflect.TypeOf(kernelEntry{}).Size()
- var (
- entries []kernelEntry
- padding = 1
- )
- for {
- entries = make([]kernelEntry, maxCPUs+padding-1)
- totalSize := entrySize * uintptr(maxCPUs+padding-1)
- addr := reflect.ValueOf(&entries[0]).Pointer()
- if addr&(usermem.PageSize-1) == 0 && totalSize >= usermem.PageSize {
- // The runtime forces power-of-2 alignment for allocations, and we are therefore
- // safe once the first address is aligned and the chunk is at least a full page.
- break
- }
- padding = padding << 1
- }
- k.cpuEntries = entries
-
- k.globalIDT = &idt64{}
- if reflect.TypeOf(idt64{}).Size() != usermem.PageSize {
- panic("Size of globalIDT should be PageSize")
- }
- if reflect.ValueOf(k.globalIDT).Pointer()&(usermem.PageSize-1) != 0 {
- panic("Allocated globalIDT should be page aligned")
- }
-
- // Setup the IDT, which is uniform.
- for v, handler := range handlers {
- // Allow Breakpoint and Overflow to be called from all
- // privilege levels.
- dpl := 0
- if v == Breakpoint || v == Overflow {
- dpl = 3
- }
- // Note that we set all traps to use the interrupt stack, this
- // is defined below when setting up the TSS.
- k.globalIDT[v].setInterrupt(Kcode, uint64(kernelFunc(handler)), dpl, 1 /* ist */)
- }
-}
-
-// EntryRegions returns the set of kernel entry regions (must be mapped).
-func (k *Kernel) EntryRegions() map[uintptr]uintptr {
- regions := make(map[uintptr]uintptr)
-
- addr := reflect.ValueOf(&k.cpuEntries[0]).Pointer()
- size := reflect.TypeOf(kernelEntry{}).Size() * uintptr(len(k.cpuEntries))
- end, _ := usermem.Addr(addr + size).RoundUp()
- regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end)
-
- addr = reflect.ValueOf(k.globalIDT).Pointer()
- size = reflect.TypeOf(idt64{}).Size()
- end, _ = usermem.Addr(addr + size).RoundUp()
- regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end)
-
- return regions
-}
-
-// init initializes architecture-specific state.
-func (c *CPU) init(cpuID int) {
- c.kernelEntry = &c.kernel.cpuEntries[cpuID]
- c.cpuSelf = c
- // Null segment.
- c.gdt[0].setNull()
-
- // Kernel & user segments.
- c.gdt[segKcode] = KernelCodeSegment
- c.gdt[segKdata] = KernelDataSegment
- c.gdt[segUcode32] = UserCodeSegment32
- c.gdt[segUdata] = UserDataSegment
- c.gdt[segUcode64] = UserCodeSegment64
-
- // The task segment, this spans two entries.
- tssBase, tssLimit, _ := c.TSS()
- c.gdt[segTss].set(
- uint32(tssBase),
- uint32(tssLimit),
- 0, // Privilege level zero.
- SegmentDescriptorPresent|
- SegmentDescriptorAccess|
- SegmentDescriptorWrite|
- SegmentDescriptorExecute)
- c.gdt[segTssHi].setHi(uint32((tssBase) >> 32))
-
- // Set the kernel stack pointer in the TSS (virtual address).
- stackAddr := c.StackTop()
- c.stackTop = stackAddr
- c.tss.rsp0Lo = uint32(stackAddr)
- c.tss.rsp0Hi = uint32(stackAddr >> 32)
- c.tss.ist1Lo = uint32(stackAddr)
- c.tss.ist1Hi = uint32(stackAddr >> 32)
-
- // Set the I/O bitmap base address beyond the last byte in the TSS
- // to block access to the entire I/O address range.
- //
- // From section 18.5.2 "I/O Permission Bit Map" from Intel SDM vol1:
- // I/O addresses not spanned by the map are treated as if they had set
- // bits in the map.
- c.tss.ioPerm = tssLimit + 1
-
- // Permanently set the kernel segments.
- c.registers.Cs = uint64(Kcode)
- c.registers.Ds = uint64(Kdata)
- c.registers.Es = uint64(Kdata)
- c.registers.Ss = uint64(Kdata)
- c.registers.Fs = uint64(Kdata)
- c.registers.Gs = uint64(Kdata)
-
- // Set mandatory flags.
- c.registers.Eflags = KernelFlagsSet
-}
-
-// StackTop returns the kernel's stack address.
-//
-//go:nosplit
-func (c *CPU) StackTop() uint64 {
- return uint64(kernelAddr(&c.stack[0])) + uint64(len(c.stack))
-}
-
-// IDT returns the CPU's IDT base and limit.
-//
-//go:nosplit
-func (c *CPU) IDT() (uint64, uint16) {
- return uint64(kernelAddr(&c.kernel.globalIDT[0])), uint16(binary.Size(&c.kernel.globalIDT) - 1)
-}
-
-// GDT returns the CPU's GDT base and limit.
-//
-//go:nosplit
-func (c *CPU) GDT() (uint64, uint16) {
- return uint64(kernelAddr(&c.gdt[0])), uint16(8*segLast - 1)
-}
-
-// TSS returns the CPU's TSS base, limit and value.
-//
-//go:nosplit
-func (c *CPU) TSS() (uint64, uint16, *SegmentDescriptor) {
- return uint64(kernelAddr(&c.tss)), uint16(binary.Size(&c.tss) - 1), &c.gdt[segTss]
-}
-
-// CR0 returns the CPU's CR0 value.
-//
-//go:nosplit
-func (c *CPU) CR0() uint64 {
- return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET
-}
-
-// CR4 returns the CPU's CR4 value.
-//
-//go:nosplit
-func (c *CPU) CR4() uint64 {
- cr4 := uint64(_CR4_PAE | _CR4_PSE | _CR4_OSFXSR | _CR4_OSXMMEXCPT)
- if hasPCID {
- cr4 |= _CR4_PCIDE
- }
- if hasXSAVE {
- cr4 |= _CR4_OSXSAVE
- }
- if hasSMEP {
- cr4 |= _CR4_SMEP
- }
- if hasFSGSBASE {
- cr4 |= _CR4_FSGSBASE
- }
- return cr4
-}
-
-// EFER returns the CPU's EFER value.
-//
-//go:nosplit
-func (c *CPU) EFER() uint64 {
- return _EFER_LME | _EFER_LMA | _EFER_SCE | _EFER_NX
-}
-
-// IsCanonical indicates whether addr is canonical per the amd64 spec.
-//
-//go:nosplit
-func IsCanonical(addr uint64) bool {
- return addr <= 0x00007fffffffffff || addr > 0xffff800000000000
-}
-
-// SwitchToUser performs either a sysret or an iret.
-//
-// The return value is the vector that interrupted execution.
-//
-// This function will not split the stack. Callers will probably want to call
-// runtime.entersyscall (and pair with a call to runtime.exitsyscall) prior to
-// calling this function.
-//
-// When this is done, this region is quite sensitive to things like system
-// calls. After calling entersyscall, any memory used must have been allocated
-// and no function calls without go:nosplit are permitted. Any calls made here
-// are protected appropriately (e.g. IsCanonical and CR3).
-//
-// Also note that this function transitively depends on the compiler generating
-// code that uses IP-relative addressing inside of absolute addresses. That's
-// the case for amd64, but may not be the case for other architectures.
-//
-// Precondition: the Rip, Rsp, Fs and Gs registers must be canonical.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
- userCR3 := switchOpts.PageTables.CR3(!switchOpts.Flush, switchOpts.UserPCID)
- c.kernelCR3 = uintptr(c.kernel.PageTables.CR3(true, switchOpts.KernelPCID))
-
- // Sanitize registers.
- regs := switchOpts.Registers
- regs.Eflags &= ^uint64(UserFlagsClear)
- regs.Eflags |= UserFlagsSet
- regs.Cs = uint64(Ucode64) // Required for iret.
- regs.Ss = uint64(Udata) // Ditto.
-
- // Perform the switch.
- swapgs() // GS will be swapped on return.
- WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS.
- WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS.
- LoadFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy in floating point.
- if switchOpts.FullRestore {
- vector = iret(c, regs, uintptr(userCR3))
- } else {
- vector = sysret(c, regs, uintptr(userCR3))
- }
- SaveFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy out floating point.
- WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS.
- return
-}
-
-// start is the CPU entrypoint.
-//
-// This is called from the Start asm stub (see entry_amd64.go); on return the
-// registers in c.registers will be restored (not segments).
-//
-//go:nosplit
-func start(c *CPU) {
- // Save per-cpu & FS segment.
- WriteGS(kernelAddr(c.kernelEntry))
- WriteFS(uintptr(c.registers.Fs_base))
-
- // Initialize floating point.
- //
- // Note that on skylake, the valid XCR0 mask reported seems to be 0xff.
- // This breaks down as:
- //
- // bit0 - x87
- // bit1 - SSE
- // bit2 - AVX
- // bit3-4 - MPX
- // bit5-7 - AVX512
- //
- // For some reason, enabled MPX & AVX512 on platforms that report them
- // seems to be cause a general protection fault. (Maybe there are some
- // virtualization issues and these aren't exported to the guest cpuid.)
- // This needs further investigation, but we can limit the floating
- // point operations to x87, SSE & AVX for now.
- fninit()
- xsetbv(0, validXCR0Mask&0x7)
-
- // Set the syscall target.
- wrmsr(_MSR_LSTAR, kernelFunc(sysenter))
- wrmsr(_MSR_SYSCALL_MASK, KernelFlagsClear|_RFLAGS_DF)
-
- // NOTE: This depends on having the 64-bit segments immediately
- // following the 32-bit user segments. This is simply the way the
- // sysret instruction is designed to work (it assumes they follow).
- wrmsr(_MSR_STAR, uintptr(uint64(Kcode)<<32|uint64(Ucode32)<<48))
- wrmsr(_MSR_CSTAR, kernelFunc(sysenter))
-}
-
-// SetCPUIDFaulting sets CPUID faulting per the boolean value.
-//
-// True is returned if faulting could be set.
-//
-//go:nosplit
-func SetCPUIDFaulting(on bool) bool {
- // Per the SDM (Vol 3, Table 2-43), PLATFORM_INFO bit 31 denotes support
- // for CPUID faulting, and we enable and disable via the MISC_FEATURES MSR.
- if rdmsr(_MSR_PLATFORM_INFO)&_PLATFORM_INFO_CPUID_FAULT != 0 {
- features := rdmsr(_MSR_MISC_FEATURES)
- if on {
- features |= _MISC_FEATURE_CPUID_TRAP
- } else {
- features &^= _MISC_FEATURE_CPUID_TRAP
- }
- wrmsr(_MSR_MISC_FEATURES, features)
- return true // Setting successful.
- }
- return false
-}
-
-// ReadCR2 reads the current CR2 value.
-//
-//go:nosplit
-func ReadCR2() uintptr {
- return readCR2()
-}
diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go
deleted file mode 100644
index c05284641..000000000
--- a/pkg/sentry/platform/ring0/kernel_arm64.go
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// HaltAndResume halts execution and point the pointer to the resume function.
-//go:nosplit
-func HaltAndResume()
-
-// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
-//go:nosplit
-func HaltEl1SvcAndResume()
-
-// HaltEl1ExceptionAndResume calls Hooks.KernelException and resume.
-//go:nosplit
-func HaltEl1ExceptionAndResume()
-
-// init initializes architecture-specific state.
-func (k *Kernel) init(maxCPUs int) {
-}
-
-// init initializes architecture-specific state.
-func (c *CPU) init(cpuID int) {
- // Set the kernel stack pointer(virtual address).
- c.registers.Sp = uint64(c.StackTop())
-
-}
-
-// StackTop returns the kernel's stack address.
-//
-//go:nosplit
-func (c *CPU) StackTop() uint64 {
- return uint64(kernelAddr(&c.stack[0])) + uint64(len(c.stack))
-}
-
-// IsCanonical indicates whether addr is canonical per the arm64 spec.
-//
-//go:nosplit
-func IsCanonical(addr uint64) bool {
- return addr <= 0x0000ffffffffffff || addr > 0xffff000000000000
-}
-
-// SwitchToUser performs an eret.
-//
-// The return value is the exception vector.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
- storeAppASID(uintptr(switchOpts.UserASID))
- if switchOpts.Flush {
- FlushTlbByASID(uintptr(switchOpts.UserASID))
- }
-
- regs := switchOpts.Registers
-
- regs.Pstate &= ^uint64(PsrFlagsClear)
- regs.Pstate |= UserFlagsSet
-
- EnableVFP()
- LoadFloatingPoint(switchOpts.FloatingPointState)
-
- kernelExitToEl0()
-
- SaveFloatingPoint(switchOpts.FloatingPointState)
- DisableVFP()
-
- vector = c.vecCode
-
- return
-}
diff --git a/pkg/sentry/platform/ring0/kernel_unsafe.go b/pkg/sentry/platform/ring0/kernel_unsafe.go
deleted file mode 100644
index 16955ad91..000000000
--- a/pkg/sentry/platform/ring0/kernel_unsafe.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ring0
-
-import (
- "unsafe"
-)
-
-// eface mirrors runtime.eface.
-type eface struct {
- typ uintptr
- data unsafe.Pointer
-}
-
-// kernelAddr returns the kernel virtual address for the given object.
-//
-//go:nosplit
-func kernelAddr(obj interface{}) uintptr {
- e := (*eface)(unsafe.Pointer(&obj))
- return KernelStartAddress | uintptr(e.data)
-}
-
-// kernelFunc returns the address of the given function.
-//
-//go:nosplit
-func kernelFunc(fn func()) uintptr {
- fnptr := (**uintptr)(unsafe.Pointer(&fn))
- return KernelStartAddress | **fnptr
-}
diff --git a/pkg/sentry/platform/ring0/lib_amd64.go b/pkg/sentry/platform/ring0/lib_amd64.go
deleted file mode 100644
index 0ec5c3bc5..000000000
--- a/pkg/sentry/platform/ring0/lib_amd64.go
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/cpuid"
-)
-
-// LoadFloatingPoint loads floating point state by the most efficient mechanism
-// available (set by Init).
-var LoadFloatingPoint func(*byte)
-
-// SaveFloatingPoint saves floating point state by the most efficient mechanism
-// available (set by Init).
-var SaveFloatingPoint func(*byte)
-
-// fxrstor uses fxrstor64 to load floating point state.
-func fxrstor(*byte)
-
-// xrstor uses xrstor to load floating point state.
-func xrstor(*byte)
-
-// fxsave uses fxsave64 to save floating point state.
-func fxsave(*byte)
-
-// xsave uses xsave to save floating point state.
-func xsave(*byte)
-
-// xsaveopt uses xsaveopt to save floating point state.
-func xsaveopt(*byte)
-
-// WriteFS sets the GS address (set by init).
-var WriteFS func(addr uintptr)
-
-// wrfsbase writes to the GS base address.
-func wrfsbase(addr uintptr)
-
-// wrfsmsr writes to the GS_BASE MSR.
-func wrfsmsr(addr uintptr)
-
-// WriteGS sets the GS address (set by init).
-var WriteGS func(addr uintptr)
-
-// wrgsbase writes to the GS base address.
-func wrgsbase(addr uintptr)
-
-// wrgsmsr writes to the GS_BASE MSR.
-func wrgsmsr(addr uintptr)
-
-// readCR2 reads the current CR2 value.
-func readCR2() uintptr
-
-// fninit initializes the floating point unit.
-func fninit()
-
-// xsetbv writes to an extended control register.
-func xsetbv(reg, value uintptr)
-
-// xgetbv reads an extended control register.
-func xgetbv(reg uintptr) uintptr
-
-// wrmsr reads to the given MSR.
-func wrmsr(reg, value uintptr)
-
-// rdmsr reads the given MSR.
-func rdmsr(reg uintptr) uintptr
-
-// Mostly-constants set by Init.
-var (
- hasSMEP bool
- hasPCID bool
- hasXSAVEOPT bool
- hasXSAVE bool
- hasFSGSBASE bool
- validXCR0Mask uintptr
-)
-
-// Init sets function pointers based on architectural features.
-//
-// This must be called prior to using ring0.
-func Init(featureSet *cpuid.FeatureSet) {
- hasSMEP = featureSet.HasFeature(cpuid.X86FeatureSMEP)
- hasPCID = featureSet.HasFeature(cpuid.X86FeaturePCID)
- hasXSAVEOPT = featureSet.UseXsaveopt()
- hasXSAVE = featureSet.UseXsave()
- hasFSGSBASE = featureSet.HasFeature(cpuid.X86FeatureFSGSBase)
- validXCR0Mask = uintptr(featureSet.ValidXCR0Mask())
- if hasXSAVEOPT {
- SaveFloatingPoint = xsaveopt
- LoadFloatingPoint = xrstor
- } else if hasXSAVE {
- SaveFloatingPoint = xsave
- LoadFloatingPoint = xrstor
- } else {
- SaveFloatingPoint = fxsave
- LoadFloatingPoint = fxrstor
- }
- if hasFSGSBASE {
- WriteFS = wrfsbase
- WriteGS = wrgsbase
- } else {
- WriteFS = wrfsmsr
- WriteGS = wrgsmsr
- }
-}
diff --git a/pkg/sentry/platform/ring0/lib_amd64.s b/pkg/sentry/platform/ring0/lib_amd64.s
deleted file mode 100644
index 2fe83568a..000000000
--- a/pkg/sentry/platform/ring0/lib_amd64.s
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-// fxrstor loads floating point state.
-//
-// The code corresponds to:
-//
-// fxrstor64 (%rbx)
-//
-TEXT ·fxrstor(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), BX
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x0b;
- RET
-
-// xrstor loads floating point state.
-//
-// The code corresponds to:
-//
-// xrstor (%rdi)
-//
-TEXT ·xrstor(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), DI
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f;
- RET
-
-// fxsave saves floating point state.
-//
-// The code corresponds to:
-//
-// fxsave64 (%rbx)
-//
-TEXT ·fxsave(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), BX
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x03;
- RET
-
-// xsave saves floating point state.
-//
-// The code corresponds to:
-//
-// xsave (%rdi)
-//
-TEXT ·xsave(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), DI
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27;
- RET
-
-// xsaveopt saves floating point state.
-//
-// The code corresponds to:
-//
-// xsaveopt (%rdi)
-//
-TEXT ·xsaveopt(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), DI
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37;
- RET
-
-// wrfsbase writes to the FS base.
-//
-// The code corresponds to:
-//
-// wrfsbase %rax
-//
-TEXT ·wrfsbase(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0xd0;
- RET
-
-// wrfsmsr writes to the FSBASE MSR.
-//
-// The code corresponds to:
-//
-// wrmsr (writes EDX:EAX to the MSR in ECX)
-//
-TEXT ·wrfsmsr(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- MOVQ AX, DX
- SHRQ $32, DX
- MOVQ $0xc0000100, CX // MSR_FS_BASE
- BYTE $0x0f; BYTE $0x30;
- RET
-
-// wrgsbase writes to the GS base.
-//
-// The code corresponds to:
-//
-// wrgsbase %rax
-//
-TEXT ·wrgsbase(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0xd8;
- RET
-
-// wrgsmsr writes to the GSBASE MSR.
-//
-// See wrfsmsr.
-TEXT ·wrgsmsr(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- MOVQ AX, DX
- SHRQ $32, DX
- MOVQ $0xc0000101, CX // MSR_GS_BASE
- BYTE $0x0f; BYTE $0x30; // WRMSR
- RET
-
-// readCR2 reads the current CR2 value.
-//
-// The code corresponds to:
-//
-// mov %cr2, %rax
-//
-TEXT ·readCR2(SB),NOSPLIT,$0-8
- BYTE $0x0f; BYTE $0x20; BYTE $0xd0;
- MOVQ AX, ret+0(FP)
- RET
-
-// fninit initializes the floating point unit.
-//
-// The code corresponds to:
-//
-// fninit
-TEXT ·fninit(SB),NOSPLIT,$0
- BYTE $0xdb; BYTE $0xe3;
- RET
-
-// xsetbv writes to an extended control register.
-//
-// The code corresponds to:
-//
-// xsetbv
-//
-TEXT ·xsetbv(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- MOVL value+8(FP), AX
- MOVL value+12(FP), DX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd1;
- RET
-
-// xgetbv reads an extended control register.
-//
-// The code corresponds to:
-//
-// xgetbv
-//
-TEXT ·xgetbv(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0;
- MOVL AX, ret+8(FP)
- MOVL DX, ret+12(FP)
- RET
-
-// wrmsr writes to a control register.
-//
-// The code corresponds to:
-//
-// wrmsr
-//
-TEXT ·wrmsr(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- MOVL value+8(FP), AX
- MOVL value+12(FP), DX
- BYTE $0x0f; BYTE $0x30;
- RET
-
-// rdmsr reads a control register.
-//
-// The code corresponds to:
-//
-// rdmsr
-//
-TEXT ·rdmsr(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- BYTE $0x0f; BYTE $0x32;
- MOVL AX, ret+8(FP)
- MOVL DX, ret+12(FP)
- RET
diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/sentry/platform/ring0/lib_arm64.go
deleted file mode 100644
index a490bf3af..000000000
--- a/pkg/sentry/platform/ring0/lib_arm64.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// storeAppASID writes the application's asid value.
-func storeAppASID(asid uintptr)
-
-// LocalFlushTlbAll same as FlushTlbAll, but only applies to the calling CPU.
-func LocalFlushTlbAll()
-
-// FlushTlbByVA invalidates tlb by VA/Last-level/Inner-Shareable.
-func FlushTlbByVA(addr uintptr)
-
-// FlushTlbByASID invalidates tlb by ASID/Inner-Shareable.
-func FlushTlbByASID(asid uintptr)
-
-// FlushTlbAll invalidates all tlb.
-func FlushTlbAll()
-
-// CPACREL1 returns the value of the CPACR_EL1 register.
-func CPACREL1() (value uintptr)
-
-// GetFPCR returns the value of FPCR register.
-func GetFPCR() (value uintptr)
-
-// SetFPCR writes the FPCR value.
-func SetFPCR(value uintptr)
-
-// GetFPSR returns the value of FPSR register.
-func GetFPSR() (value uintptr)
-
-// SetFPSR writes the FPSR value.
-func SetFPSR(value uintptr)
-
-// SaveVRegs saves V0-V31 registers.
-// V0-V31: 32 128-bit registers for floating point and simd.
-func SaveVRegs(*byte)
-
-// LoadVRegs loads V0-V31 registers.
-func LoadVRegs(*byte)
-
-// LoadFloatingPoint loads floating point state.
-func LoadFloatingPoint(*byte)
-
-// SaveFloatingPoint saves floating point state.
-func SaveFloatingPoint(*byte)
-
-// EnableVFP enables fpsimd.
-func EnableVFP()
-
-// DisableVFP disables fpsimd.
-func DisableVFP()
-
-// Init sets function pointers based on architectural features.
-//
-// This must be called prior to using ring0.
-func Init() {}
diff --git a/pkg/sentry/platform/ring0/lib_arm64.s b/pkg/sentry/platform/ring0/lib_arm64.s
deleted file mode 100644
index e39b32841..000000000
--- a/pkg/sentry/platform/ring0/lib_arm64.s
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-#define TLBI_ASID_SHIFT 48
-
-TEXT ·FlushTlbByVA(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R1
- DSB $10 // dsb(ishst)
- WORD $0xd50883a1 // tlbi vale1is, x1
- DSB $11 // dsb(ish)
- RET
-
-TEXT ·FlushTlbByASID(SB),NOSPLIT,$0-8
- MOVD asid+0(FP), R1
- LSL $TLBI_ASID_SHIFT, R1, R1
- DSB $10 // dsb(ishst)
- WORD $0xd5088341 // tlbi aside1is, x1
- DSB $11 // dsb(ish)
- RET
-
-TEXT ·LocalFlushTlbAll(SB),NOSPLIT,$0
- DSB $6 // dsb(nshst)
- WORD $0xd508871f // __tlbi(vmalle1)
- DSB $7 // dsb(nsh)
- ISB $15
- RET
-
-TEXT ·FlushTlbAll(SB),NOSPLIT,$0
- DSB $10 // dsb(ishst)
- WORD $0xd508831f // __tlbi(vmalle1is)
- DSB $11 // dsb(ish)
- ISB $15
- RET
-
-TEXT ·CPACREL1(SB),NOSPLIT,$0-8
- WORD $0xd5381041 // MRS CPACR_EL1, R1
- MOVD R1, ret+0(FP)
- RET
-
-TEXT ·GetFPCR(SB),NOSPLIT,$0-8
- MOVD FPCR, R1
- MOVD R1, ret+0(FP)
- RET
-
-TEXT ·GetFPSR(SB),NOSPLIT,$0-8
- MOVD FPSR, R1
- MOVD R1, ret+0(FP)
- RET
-
-TEXT ·SetFPCR(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R1
- MOVD R1, FPCR
- RET
-
-TEXT ·SetFPSR(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R1
- MOVD R1, FPSR
- RET
-
-TEXT ·SaveVRegs(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- // Skip aarch64_ctx, fpsr, fpcr.
- ADD $16, R0, R0
-
- WORD $0xad000400 // stp q0, q1, [x0]
- WORD $0xad010c02 // stp q2, q3, [x0, #32]
- WORD $0xad021404 // stp q4, q5, [x0, #64]
- WORD $0xad031c06 // stp q6, q7, [x0, #96]
- WORD $0xad042408 // stp q8, q9, [x0, #128]
- WORD $0xad052c0a // stp q10, q11, [x0, #160]
- WORD $0xad06340c // stp q12, q13, [x0, #192]
- WORD $0xad073c0e // stp q14, q15, [x0, #224]
- WORD $0xad084410 // stp q16, q17, [x0, #256]
- WORD $0xad094c12 // stp q18, q19, [x0, #288]
- WORD $0xad0a5414 // stp q20, q21, [x0, #320]
- WORD $0xad0b5c16 // stp q22, q23, [x0, #352]
- WORD $0xad0c6418 // stp q24, q25, [x0, #384]
- WORD $0xad0d6c1a // stp q26, q27, [x0, #416]
- WORD $0xad0e741c // stp q28, q29, [x0, #448]
- WORD $0xad0f7c1e // stp q30, q31, [x0, #480]
-
- RET
-
-TEXT ·LoadVRegs(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- // Skip aarch64_ctx, fpsr, fpcr.
- ADD $16, R0, R0
-
- WORD $0xad400400 // ldp q0, q1, [x0]
- WORD $0xad410c02 // ldp q2, q3, [x0, #32]
- WORD $0xad421404 // ldp q4, q5, [x0, #64]
- WORD $0xad431c06 // ldp q6, q7, [x0, #96]
- WORD $0xad442408 // ldp q8, q9, [x0, #128]
- WORD $0xad452c0a // ldp q10, q11, [x0, #160]
- WORD $0xad46340c // ldp q12, q13, [x0, #192]
- WORD $0xad473c0e // ldp q14, q15, [x0, #224]
- WORD $0xad484410 // ldp q16, q17, [x0, #256]
- WORD $0xad494c12 // ldp q18, q19, [x0, #288]
- WORD $0xad4a5414 // ldp q20, q21, [x0, #320]
- WORD $0xad4b5c16 // ldp q22, q23, [x0, #352]
- WORD $0xad4c6418 // ldp q24, q25, [x0, #384]
- WORD $0xad4d6c1a // ldp q26, q27, [x0, #416]
- WORD $0xad4e741c // ldp q28, q29, [x0, #448]
- WORD $0xad4f7c1e // ldp q30, q31, [x0, #480]
-
- RET
-
-TEXT ·LoadFloatingPoint(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- MOVD 0(R0), R1
- MOVD R1, FPSR
- MOVD 8(R0), R1
- MOVD R1, FPCR
-
- ADD $16, R0, R0
-
- WORD $0xad400400 // ldp q0, q1, [x0]
- WORD $0xad410c02 // ldp q2, q3, [x0, #32]
- WORD $0xad421404 // ldp q4, q5, [x0, #64]
- WORD $0xad431c06 // ldp q6, q7, [x0, #96]
- WORD $0xad442408 // ldp q8, q9, [x0, #128]
- WORD $0xad452c0a // ldp q10, q11, [x0, #160]
- WORD $0xad46340c // ldp q12, q13, [x0, #192]
- WORD $0xad473c0e // ldp q14, q15, [x0, #224]
- WORD $0xad484410 // ldp q16, q17, [x0, #256]
- WORD $0xad494c12 // ldp q18, q19, [x0, #288]
- WORD $0xad4a5414 // ldp q20, q21, [x0, #320]
- WORD $0xad4b5c16 // ldp q22, q23, [x0, #352]
- WORD $0xad4c6418 // ldp q24, q25, [x0, #384]
- WORD $0xad4d6c1a // ldp q26, q27, [x0, #416]
- WORD $0xad4e741c // ldp q28, q29, [x0, #448]
- WORD $0xad4f7c1e // ldp q30, q31, [x0, #480]
-
- RET
-
-TEXT ·SaveFloatingPoint(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- MOVD FPSR, R1
- MOVD R1, 0(R0)
- MOVD FPCR, R1
- MOVD R1, 8(R0)
-
- ADD $16, R0, R0
-
- WORD $0xad000400 // stp q0, q1, [x0]
- WORD $0xad010c02 // stp q2, q3, [x0, #32]
- WORD $0xad021404 // stp q4, q5, [x0, #64]
- WORD $0xad031c06 // stp q6, q7, [x0, #96]
- WORD $0xad042408 // stp q8, q9, [x0, #128]
- WORD $0xad052c0a // stp q10, q11, [x0, #160]
- WORD $0xad06340c // stp q12, q13, [x0, #192]
- WORD $0xad073c0e // stp q14, q15, [x0, #224]
- WORD $0xad084410 // stp q16, q17, [x0, #256]
- WORD $0xad094c12 // stp q18, q19, [x0, #288]
- WORD $0xad0a5414 // stp q20, q21, [x0, #320]
- WORD $0xad0b5c16 // stp q22, q23, [x0, #352]
- WORD $0xad0c6418 // stp q24, q25, [x0, #384]
- WORD $0xad0d6c1a // stp q26, q27, [x0, #416]
- WORD $0xad0e741c // stp q28, q29, [x0, #448]
- WORD $0xad0f7c1e // stp q30, q31, [x0, #480]
-
- RET
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator.go b/pkg/sentry/platform/ring0/pagetables/allocator.go
deleted file mode 100644
index 8d75b7599..000000000
--- a/pkg/sentry/platform/ring0/pagetables/allocator.go
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Allocator is used to allocate and map PTEs.
-//
-// Note that allocators may be called concurrently.
-type Allocator interface {
- // NewPTEs returns a new set of PTEs and their physical address.
- NewPTEs() *PTEs
-
- // PhysicalFor gives the physical address for a set of PTEs.
- PhysicalFor(ptes *PTEs) uintptr
-
- // LookupPTEs looks up PTEs by physical address.
- LookupPTEs(physical uintptr) *PTEs
-
- // FreePTEs marks a set of PTEs a freed, although they may not be available
- // for use again until Recycle is called, below.
- FreePTEs(ptes *PTEs)
-
- // Recycle makes freed PTEs available for use again.
- Recycle()
-}
-
-// RuntimeAllocator is a trivial allocator.
-type RuntimeAllocator struct {
- // used is the set of PTEs that have been allocated. This includes any
- // PTEs that may be in the pool below. PTEs are only freed from this
- // map by the Drain call.
- //
- // This exists to prevent accidental garbage collection.
- used map[*PTEs]struct{}
-
- // pool is the set of free-to-use PTEs.
- pool []*PTEs
-
- // freed is the set of recently-freed PTEs.
- freed []*PTEs
-}
-
-// NewRuntimeAllocator returns an allocator that uses runtime allocation.
-func NewRuntimeAllocator() *RuntimeAllocator {
- r := new(RuntimeAllocator)
- r.Init()
- return r
-}
-
-// Init initializes a RuntimeAllocator.
-func (r *RuntimeAllocator) Init() {
- r.used = make(map[*PTEs]struct{})
-}
-
-// Recycle returns freed pages to the pool.
-func (r *RuntimeAllocator) Recycle() {
- r.pool = append(r.pool, r.freed...)
- r.freed = r.freed[:0]
-}
-
-// Drain empties the pool.
-func (r *RuntimeAllocator) Drain() {
- r.Recycle()
- for i, ptes := range r.pool {
- // Zap the entry in the underlying array to ensure that it can
- // be properly garbage collected.
- r.pool[i] = nil
- // Similarly, free the reference held by the used map (these
- // also apply for the pool entries).
- delete(r.used, ptes)
- }
- r.pool = r.pool[:0]
-}
-
-// NewPTEs implements Allocator.NewPTEs.
-//
-// Note that the "physical" address here is actually the virtual address of the
-// PTEs structure. The entries are tracked only to avoid garbage collection.
-//
-// This is guaranteed not to split as long as the pool is sufficiently full.
-//
-//go:nosplit
-func (r *RuntimeAllocator) NewPTEs() *PTEs {
- // Pull from the pool if we can.
- if len(r.pool) > 0 {
- ptes := r.pool[len(r.pool)-1]
- r.pool = r.pool[:len(r.pool)-1]
- return ptes
- }
-
- // Allocate a new entry.
- ptes := newAlignedPTEs()
- r.used[ptes] = struct{}{}
- return ptes
-}
-
-// PhysicalFor returns the physical address for the given PTEs.
-//
-//go:nosplit
-func (r *RuntimeAllocator) PhysicalFor(ptes *PTEs) uintptr {
- return physicalFor(ptes)
-}
-
-// LookupPTEs implements Allocator.LookupPTEs.
-//
-//go:nosplit
-func (r *RuntimeAllocator) LookupPTEs(physical uintptr) *PTEs {
- return fromPhysical(physical)
-}
-
-// FreePTEs implements Allocator.FreePTEs.
-//
-//go:nosplit
-func (r *RuntimeAllocator) FreePTEs(ptes *PTEs) {
- r.freed = append(r.freed, ptes)
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
deleted file mode 100644
index d08bfdeb3..000000000
--- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-import (
- "unsafe"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// newAlignedPTEs returns a set of aligned PTEs.
-func newAlignedPTEs() *PTEs {
- ptes := new(PTEs)
- offset := physicalFor(ptes) & (usermem.PageSize - 1)
- if offset == 0 {
- // Already aligned.
- return ptes
- }
-
- // Need to force an aligned allocation.
- unaligned := make([]byte, (2*usermem.PageSize)-1)
- offset = uintptr(unsafe.Pointer(&unaligned[0])) & (usermem.PageSize - 1)
- if offset != 0 {
- offset = usermem.PageSize - offset
- }
- return (*PTEs)(unsafe.Pointer(&unaligned[offset]))
-}
-
-// physicalFor returns the "physical" address for PTEs.
-//
-//go:nosplit
-func physicalFor(ptes *PTEs) uintptr {
- return uintptr(unsafe.Pointer(ptes))
-}
-
-// fromPhysical returns the PTEs from the "physical" address.
-//
-//go:nosplit
-func fromPhysical(physical uintptr) *PTEs {
- return (*PTEs)(unsafe.Pointer(physical))
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
deleted file mode 100644
index 8c0a6aa82..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables.go
+++ /dev/null
@@ -1,324 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package pagetables provides a generic implementation of pagetables.
-//
-// The core functions must be safe to call from a nosplit context. Furthermore,
-// this pagetables implementation goes to lengths to ensure that all functions
-// are free from runtime allocation. Calls to NewPTEs/FreePTEs may be made
-// during walks, but these can be cached elsewhere if required.
-package pagetables
-
-import (
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// PageTables is a set of page tables.
-type PageTables struct {
- // Allocator is used to allocate nodes.
- Allocator Allocator
-
- // root is the pagetable root.
- //
- // For same archs such as amd64, the upper of the PTEs is cloned
- // from and owned by upperSharedPageTables which are shared among
- // many PageTables if upperSharedPageTables is not nil.
- root *PTEs
-
- // rootPhysical is the cached physical address of the root.
- //
- // This is saved only to prevent constant translation.
- rootPhysical uintptr
-
- // archPageTables includes architecture-specific features.
- archPageTables
-
- // upperSharedPageTables represents a read-only shared upper
- // of the Pagetable. When it is not nil, the upper is not
- // allowed to be modified.
- upperSharedPageTables *PageTables
-
- // upperStart is the start address of the upper portion that
- // are shared from upperSharedPageTables
- upperStart uintptr
-
- // readOnlyShared indicates the Pagetables are read-only and
- // own the ranges that are shared with other Pagetables.
- readOnlyShared bool
-}
-
-// Init initializes a set of PageTables.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) Init(allocator Allocator) {
- p.Allocator = allocator
- p.root = p.Allocator.NewPTEs()
- p.rootPhysical = p.Allocator.PhysicalFor(p.root)
-}
-
-// NewWithUpper returns new PageTables.
-//
-// upperSharedPageTables are used for mapping the upper of addresses,
-// starting at upperStart. These pageTables should not be touched (as
-// invalidations may be incorrect) after they are passed as an
-// upperSharedPageTables. Only when all dependent PageTables are gone
-// may they be used. The intenteded use case is for kernel page tables,
-// which are static and fixed.
-//
-// Precondition: upperStart must be between canonical ranges.
-// Precondition: upperStart must be pgdSize aligned.
-// precondition: upperSharedPageTables must be marked read-only shared.
-func NewWithUpper(a Allocator, upperSharedPageTables *PageTables, upperStart uintptr) *PageTables {
- p := new(PageTables)
- p.Init(a)
-
- if upperSharedPageTables != nil {
- if !upperSharedPageTables.readOnlyShared {
- panic("Only read-only shared pagetables can be used as upper")
- }
- p.upperSharedPageTables = upperSharedPageTables
- p.upperStart = upperStart
- }
-
- p.InitArch(a)
- return p
-}
-
-// New returns new PageTables.
-func New(a Allocator) *PageTables {
- return NewWithUpper(a, nil, 0)
-}
-
-// mapVisitor is used for map.
-type mapVisitor struct {
- target uintptr // Input.
- physical uintptr // Input.
- opts MapOpts // Input.
- prev bool // Output.
-}
-
-// visit is used for map.
-//
-//go:nosplit
-func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
- p := v.physical + (start - uintptr(v.target))
- if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) {
- v.prev = true
- }
- if p&align != 0 {
- // We will install entries at a smaller granulaity if we don't
- // install a valid entry here, however we must zap any existing
- // entry to ensure this happens.
- pte.Clear()
- return true
- }
- pte.Set(p, v.opts)
- return true
-}
-
-//go:nosplit
-func (*mapVisitor) requiresAlloc() bool { return true }
-
-//go:nosplit
-func (*mapVisitor) requiresSplit() bool { return true }
-
-// Map installs a mapping with the given physical address.
-//
-// True is returned iff there was a previous mapping in the range.
-//
-// Precondition: addr & length must be page-aligned, their sum must not overflow.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
- if p.readOnlyShared {
- panic("Should not modify read-only shared pagetables.")
- }
- if uintptr(addr)+length < uintptr(addr) {
- panic("addr & length overflow")
- }
- if p.upperSharedPageTables != nil {
- // ignore change to the read-only upper shared portion.
- if uintptr(addr) >= p.upperStart {
- return false
- }
- if uintptr(addr)+length > p.upperStart {
- length = p.upperStart - uintptr(addr)
- }
- }
- w := mapWalker{
- pageTables: p,
- visitor: mapVisitor{
- target: uintptr(addr),
- physical: physical,
- opts: opts,
- },
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+length)
- return w.visitor.prev
-}
-
-// unmapVisitor is used for unmap.
-type unmapVisitor struct {
- count int
-}
-
-//go:nosplit
-func (*unmapVisitor) requiresAlloc() bool { return false }
-
-//go:nosplit
-func (*unmapVisitor) requiresSplit() bool { return true }
-
-// visit unmaps the given entry.
-//
-//go:nosplit
-func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
- pte.Clear()
- v.count++
- return true
-}
-
-// Unmap unmaps the given range.
-//
-// True is returned iff there was a previous mapping in the range.
-//
-// Precondition: addr & length must be page-aligned, their sum must not overflow.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
- if p.readOnlyShared {
- panic("Should not modify read-only shared pagetables.")
- }
- if uintptr(addr)+length < uintptr(addr) {
- panic("addr & length overflow")
- }
- if p.upperSharedPageTables != nil {
- // ignore change to the read-only upper shared portion.
- if uintptr(addr) >= p.upperStart {
- return false
- }
- if uintptr(addr)+length > p.upperStart {
- length = p.upperStart - uintptr(addr)
- }
- }
- w := unmapWalker{
- pageTables: p,
- visitor: unmapVisitor{
- count: 0,
- },
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+length)
- return w.visitor.count > 0
-}
-
-// emptyVisitor is used for emptiness checks.
-type emptyVisitor struct {
- count int
-}
-
-//go:nosplit
-func (*emptyVisitor) requiresAlloc() bool { return false }
-
-//go:nosplit
-func (*emptyVisitor) requiresSplit() bool { return false }
-
-// visit unmaps the given entry.
-//
-//go:nosplit
-func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
- v.count++
- return true
-}
-
-// IsEmpty checks if the given range is empty.
-//
-// Precondition: addr & length must be page-aligned.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
- w := emptyWalker{
- pageTables: p,
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+length)
- return w.visitor.count == 0
-}
-
-// lookupVisitor is used for lookup.
-type lookupVisitor struct {
- target uintptr // Input & Output.
- findFirst bool // Input.
- physical uintptr // Output.
- size uintptr // Output.
- opts MapOpts // Output.
-}
-
-// visit matches the given address.
-//
-//go:nosplit
-func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
- if !pte.Valid() {
- // If looking for the first, then we just keep iterating until
- // we find a valid entry.
- return v.findFirst
- }
- // Is this within the current range?
- v.target = start
- v.physical = pte.Address()
- v.size = (align + 1)
- v.opts = pte.Opts()
- return false
-}
-
-//go:nosplit
-func (*lookupVisitor) requiresAlloc() bool { return false }
-
-//go:nosplit
-func (*lookupVisitor) requiresSplit() bool { return false }
-
-// Lookup returns the physical address for the given virtual address.
-//
-// If findFirst is true, then the next valid address after addr is returned.
-// If findFirst is false, then only a mapping for addr will be returned.
-//
-// Note that if size is zero, then no matching entry was found.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.Addr, physical, size uintptr, opts MapOpts) {
- mask := uintptr(usermem.PageSize - 1)
- addr &^= usermem.Addr(mask)
- w := lookupWalker{
- pageTables: p,
- visitor: lookupVisitor{
- target: uintptr(addr),
- findFirst: findFirst,
- },
- }
- end := ^usermem.Addr(0) &^ usermem.Addr(mask)
- if !findFirst {
- end = addr + 1
- }
- w.iterateRange(uintptr(addr), uintptr(end))
- return usermem.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts
-}
-
-// MarkReadOnlyShared marks the pagetables read-only and can be shared.
-//
-// It is usually used on the pagetables that are used as the upper
-func (p *PageTables) MarkReadOnlyShared() {
- p.readOnlyShared = true
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
deleted file mode 100644
index 163a3aea3..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-import (
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// archPageTables is architecture-specific data.
-type archPageTables struct {
- // root is the pagetable root for kernel space.
- root *PTEs
-
- // rootPhysical is the cached physical address of the root.
- //
- // This is saved only to prevent constant translation.
- rootPhysical uintptr
-
- asid uint16
-}
-
-// TTBR0_EL1 returns the translation table base register 0.
-//
-//go:nosplit
-func (p *PageTables) TTBR0_EL1(noFlush bool, asid uint16) uint64 {
- return uint64(p.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset
-}
-
-// TTBR1_EL1 returns the translation table base register 1.
-//
-//go:nosplit
-func (p *PageTables) TTBR1_EL1(noFlush bool, asid uint16) uint64 {
- return uint64(p.archPageTables.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset
-}
-
-// Bits in page table entries.
-const (
- typeTable = 0x3 << 0
- typeSect = 0x1 << 0
- typePage = 0x3 << 0
- pteValid = 0x1 << 0
- pteTableBit = 0x1 << 1
- pteTypeMask = 0x3 << 0
- present = pteValid | pteTableBit
- user = 0x1 << 6 /* AP[1] */
- readOnly = 0x1 << 7 /* AP[2] */
- accessed = 0x1 << 10
- dbm = 0x1 << 51
- writable = dbm
- cont = 0x1 << 52
- pxn = 0x1 << 53
- xn = 0x1 << 54
- dirty = 0x1 << 55
- nG = 0x1 << 11
- shared = 0x3 << 8
-)
-
-const (
- mtDevicenGnRE = 0x1 << 2
- mtNormal = 0x4 << 2
-)
-
-const (
- executeDisable = xn
- optionMask = 0xfff | 0xffff<<48
- protDefault = accessed | shared
-)
-
-// MapOpts are x86 options.
-type MapOpts struct {
- // AccessType defines permissions.
- AccessType usermem.AccessType
-
- // Global indicates the page is globally accessible.
- Global bool
-
- // User indicates the page is a user page.
- User bool
-}
-
-// PTE is a page table entry.
-type PTE uintptr
-
-// Clear clears this PTE, including sect page information.
-//
-//go:nosplit
-func (p *PTE) Clear() {
- atomic.StoreUintptr((*uintptr)(p), 0)
-}
-
-// Valid returns true iff this entry is valid.
-//
-//go:nosplit
-func (p *PTE) Valid() bool {
- return atomic.LoadUintptr((*uintptr)(p))&present != 0
-}
-
-// Opts returns the PTE options.
-//
-// These are all options except Valid and Sect.
-//
-//go:nosplit
-func (p *PTE) Opts() MapOpts {
- v := atomic.LoadUintptr((*uintptr)(p))
-
- return MapOpts{
- AccessType: usermem.AccessType{
- Read: true,
- Write: v&readOnly == 0,
- Execute: v&xn == 0,
- },
- Global: v&nG == 0,
- User: v&user != 0,
- }
-}
-
-// SetSect sets this page as a sect page.
-//
-// The page must not be valid or a panic will result.
-//
-//go:nosplit
-func (p *PTE) SetSect() {
- if p.Valid() {
- // This is not allowed.
- panic("SetSect called on valid page!")
- }
- atomic.StoreUintptr((*uintptr)(p), typeSect)
-}
-
-// IsSect returns true iff this page is a sect page.
-//
-//go:nosplit
-func (p *PTE) IsSect() bool {
- return atomic.LoadUintptr((*uintptr)(p))&pteTypeMask == typeSect
-}
-
-// Set sets this PTE value.
-//
-// This does not change the sect page property.
-//
-//go:nosplit
-func (p *PTE) Set(addr uintptr, opts MapOpts) {
- v := (addr &^ optionMask) | nG | readOnly | protDefault
- if p.IsSect() {
- // Note that this is inherited from the previous instance. Set
- // does not change the value of Sect. See above.
- v |= typeSect
- } else {
- v |= typePage
- }
- if !opts.AccessType.Any() {
- // Leave as non-valid if no access is available.
- v &^= pteValid
- }
-
- if opts.Global {
- v = v &^ nG
- }
-
- if opts.AccessType.Execute {
- v = v &^ executeDisable
- } else {
- v |= executeDisable
- }
- if opts.AccessType.Write {
- v = v &^ readOnly
- }
-
- if opts.User {
- v |= user
- v |= mtNormal
- } else {
- v = v &^ user
- v |= mtNormal
- }
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// setPageTable sets this PTE value and forces the write bit and sect bit to
-// be cleared. This is used explicitly for breaking sect pages.
-//
-//go:nosplit
-func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) {
- addr := pt.Allocator.PhysicalFor(ptes)
- if addr&^optionMask != addr {
- // This should never happen.
- panic("unaligned physical address!")
- }
- v := addr | typeTable | protDefault | mtNormal
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// Address extracts the address. This should only be used if Valid returns true.
-//
-//go:nosplit
-func (p *PTE) Address() uintptr {
- return atomic.LoadUintptr((*uintptr)(p)) &^ optionMask
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64_state_autogen.go
deleted file mode 100644
index a24523f87..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64_state_autogen.go
+++ /dev/null
@@ -1,6 +0,0 @@
-// automatically generated by stateify.
-
-// +build arm64
-// +build arm64
-
-package pagetables
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
deleted file mode 100644
index a217f404c..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Address constraints.
-//
-// The lowerTop and upperBottom currently apply to four-level pagetables;
-// additional refactoring would be necessary to support five-level pagetables.
-const (
- lowerTop = 0x00007fffffffffff
- upperBottom = 0xffff800000000000
-
- pteShift = 12
- pmdShift = 21
- pudShift = 30
- pgdShift = 39
-
- pteMask = 0x1ff << pteShift
- pmdMask = 0x1ff << pmdShift
- pudMask = 0x1ff << pudShift
- pgdMask = 0x1ff << pgdShift
-
- pteSize = 1 << pteShift
- pmdSize = 1 << pmdShift
- pudSize = 1 << pudShift
- pgdSize = 1 << pgdShift
-
- executeDisable = 1 << 63
- entriesPerPage = 512
-)
-
-// InitArch does some additional initialization related to the architecture.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) InitArch(allocator Allocator) {
- if p.upperSharedPageTables != nil {
- p.cloneUpperShared()
- }
-}
-
-//go:nosplit
-func pgdIndex(upperStart uintptr) uintptr {
- if upperStart&(pgdSize-1) != 0 {
- panic("upperStart should be pgd size aligned")
- }
- if upperStart >= upperBottom {
- return entriesPerPage/2 + (upperStart-upperBottom)/pgdSize
- }
- if upperStart < lowerTop {
- return upperStart / pgdSize
- }
- panic("upperStart should be in canonical range")
-}
-
-// cloneUpperShared clone the upper from the upper shared page tables.
-//
-//go:nosplit
-func (p *PageTables) cloneUpperShared() {
- start := pgdIndex(p.upperStart)
- copy(p.root[start:entriesPerPage], p.upperSharedPageTables.root[start:entriesPerPage])
-}
-
-// PTEs is a collection of entries.
-type PTEs [entriesPerPage]PTE
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_state_autogen.go
deleted file mode 100644
index f48a8acd1..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_state_autogen.go
+++ /dev/null
@@ -1,5 +0,0 @@
-// automatically generated by stateify.
-
-// +build amd64
-
-package pagetables
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go
deleted file mode 100644
index fef7a0fd1..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Address constraints.
-//
-// The lowerTop and upperBottom currently apply to four-level pagetables;
-// additional refactoring would be necessary to support five-level pagetables.
-const (
- lowerTop = 0x0000ffffffffffff
- upperBottom = 0xffff000000000000
- pteShift = 12
- pmdShift = 21
- pudShift = 30
- pgdShift = 39
-
- pteMask = 0x1ff << pteShift
- pmdMask = 0x1ff << pmdShift
- pudMask = 0x1ff << pudShift
- pgdMask = 0x1ff << pgdShift
-
- pteSize = 1 << pteShift
- pmdSize = 1 << pmdShift
- pudSize = 1 << pudShift
- pgdSize = 1 << pgdShift
-
- ttbrASIDOffset = 48
- ttbrASIDMask = 0xff
-
- entriesPerPage = 512
-)
-
-// InitArch does some additional initialization related to the architecture.
-//
-// +checkescape:hard,stack
-//go:nosplit
-func (p *PageTables) InitArch(allocator Allocator) {
- if p.upperSharedPageTables != nil {
- p.cloneUpperShared()
- } else {
- p.archPageTables.root = p.Allocator.NewPTEs()
- p.archPageTables.rootPhysical = p.Allocator.PhysicalFor(p.archPageTables.root)
- }
-}
-
-// cloneUpperShared clone the upper from the upper shared page tables.
-//
-//go:nosplit
-func (p *PageTables) cloneUpperShared() {
- if p.upperStart != upperBottom {
- panic("upperStart should be the same as upperBottom")
- }
-
- p.archPageTables.root = p.upperSharedPageTables.archPageTables.root
- p.archPageTables.rootPhysical = p.upperSharedPageTables.archPageTables.rootPhysical
-}
-
-// PTEs is a collection of entries.
-type PTEs [entriesPerPage]PTE
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_state_autogen.go
deleted file mode 100644
index ae9d2b272..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_state_autogen.go
+++ /dev/null
@@ -1,5 +0,0 @@
-// automatically generated by stateify.
-
-// +build arm64
-
-package pagetables
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go
deleted file mode 100644
index 4c4540603..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_state_autogen.go
+++ /dev/null
@@ -1,3 +0,0 @@
-// automatically generated by stateify.
-
-package pagetables
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe_state_autogen.go
deleted file mode 100644
index 4c4540603..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe_state_autogen.go
+++ /dev/null
@@ -1,3 +0,0 @@
-// automatically generated by stateify.
-
-package pagetables
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
deleted file mode 100644
index 32edd2f0a..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
+++ /dev/null
@@ -1,183 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build 386 amd64
-
-package pagetables
-
-import (
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// archPageTables is architecture-specific data.
-type archPageTables struct {
- // pcid is the value assigned by PCIDs.Assign.
- //
- // Note that zero is a valid PCID.
- pcid uint16
-}
-
-// CR3 returns the CR3 value for these tables.
-//
-// This may be called in interrupt contexts. A PCID of zero always implies a
-// flush and should be passed when PCIDs are not enabled. See pcids_x86.go for
-// more information.
-//
-//go:nosplit
-func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 {
- // Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1).
- const noFlushBit uint64 = 0x8000000000000000
- if noFlush && pcid != 0 {
- return noFlushBit | uint64(p.rootPhysical) | uint64(pcid)
- }
- return uint64(p.rootPhysical) | uint64(pcid)
-}
-
-// Bits in page table entries.
-const (
- present = 0x001
- writable = 0x002
- user = 0x004
- writeThrough = 0x008
- cacheDisable = 0x010
- accessed = 0x020
- dirty = 0x040
- super = 0x080
- global = 0x100
- optionMask = executeDisable | 0xfff
-)
-
-// MapOpts are x86 options.
-type MapOpts struct {
- // AccessType defines permissions.
- AccessType usermem.AccessType
-
- // Global indicates the page is globally accessible.
- Global bool
-
- // User indicates the page is a user page.
- User bool
-}
-
-// PTE is a page table entry.
-type PTE uintptr
-
-// Clear clears this PTE, including super page information.
-//
-//go:nosplit
-func (p *PTE) Clear() {
- atomic.StoreUintptr((*uintptr)(p), 0)
-}
-
-// Valid returns true iff this entry is valid.
-//
-//go:nosplit
-func (p *PTE) Valid() bool {
- return atomic.LoadUintptr((*uintptr)(p))&present != 0
-}
-
-// Opts returns the PTE options.
-//
-// These are all options except Valid and Super.
-//
-//go:nosplit
-func (p *PTE) Opts() MapOpts {
- v := atomic.LoadUintptr((*uintptr)(p))
- return MapOpts{
- AccessType: usermem.AccessType{
- Read: v&present != 0,
- Write: v&writable != 0,
- Execute: v&executeDisable == 0,
- },
- Global: v&global != 0,
- User: v&user != 0,
- }
-}
-
-// SetSuper sets this page as a super page.
-//
-// The page must not be valid or a panic will result.
-//
-//go:nosplit
-func (p *PTE) SetSuper() {
- if p.Valid() {
- // This is not allowed.
- panic("SetSuper called on valid page!")
- }
- atomic.StoreUintptr((*uintptr)(p), super)
-}
-
-// IsSuper returns true iff this page is a super page.
-//
-//go:nosplit
-func (p *PTE) IsSuper() bool {
- return atomic.LoadUintptr((*uintptr)(p))&super != 0
-}
-
-// Set sets this PTE value.
-//
-// This does not change the super page property.
-//
-//go:nosplit
-func (p *PTE) Set(addr uintptr, opts MapOpts) {
- if !opts.AccessType.Any() {
- p.Clear()
- return
- }
- v := (addr &^ optionMask)
- if opts.AccessType.Any() {
- v |= present | accessed
- }
- if opts.User {
- v |= user
- }
- if opts.Global {
- v |= global
- }
- if !opts.AccessType.Execute {
- v |= executeDisable
- }
- if opts.AccessType.Write {
- v |= writable | dirty
- }
- if p.IsSuper() {
- // Note that this is inherited from the previous instance. Set
- // does not change the value of Super. See above.
- v |= super
- }
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// setPageTable sets this PTE value and forces the write bit and super bit to
-// be cleared. This is used explicitly for breaking super pages.
-//
-//go:nosplit
-func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) {
- addr := pt.Allocator.PhysicalFor(ptes)
- if addr&^optionMask != addr {
- // This should never happen.
- panic("unaligned physical address!")
- }
- v := addr | present | user | writable | accessed | dirty
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// Address extracts the address. This should only be used if Valid returns true.
-//
-//go:nosplit
-func (p *PTE) Address() uintptr {
- return atomic.LoadUintptr((*uintptr)(p)) &^ optionMask
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86_state_autogen.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86_state_autogen.go
deleted file mode 100644
index 6fe78c51c..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86_state_autogen.go
+++ /dev/null
@@ -1,6 +0,0 @@
-// automatically generated by stateify.
-
-// +build 386 amd64
-// +build i386 amd64
-
-package pagetables
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids.go b/pkg/sentry/platform/ring0/pagetables/pcids.go
deleted file mode 100644
index 964496aac..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids.go
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-import (
- "gvisor.dev/gvisor/pkg/sync"
-)
-
-// PCIDs is a simple PCID database.
-//
-// This is not protected by locks and is thus suitable for use only with a
-// single CPU at a time.
-type PCIDs struct {
- // mu protects below.
- mu sync.Mutex
-
- // cache are the assigned page tables.
- cache map[*PageTables]uint16
-
- // avail are available PCIDs.
- avail []uint16
-}
-
-// NewPCIDs returns a new PCID database.
-//
-// start is the first index to assign. Typically this will be one, as the zero
-// pcid will always be flushed on transition (see pagetables_x86.go). This may
-// be more than one if specific PCIDs are reserved.
-//
-// Nil is returned iff the start and size are out of range.
-func NewPCIDs(start, size uint16) *PCIDs {
- if start+uint16(size) > limitPCID {
- return nil // See comment.
- }
- p := &PCIDs{
- cache: make(map[*PageTables]uint16),
- }
- for pcid := start; pcid < start+size; pcid++ {
- p.avail = append(p.avail, pcid)
- }
- return p
-}
-
-// Assign assigns a PCID to the given PageTables.
-//
-// This may overwrite any previous assignment provided. If this in the case,
-// true is returned to indicate that the PCID should be flushed.
-func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) {
- p.mu.Lock()
- if pcid, ok := p.cache[pt]; ok {
- p.mu.Unlock()
- return pcid, false // No flush.
- }
-
- // Is there something available?
- if len(p.avail) > 0 {
- pcid := p.avail[len(p.avail)-1]
- p.avail = p.avail[:len(p.avail)-1]
- p.cache[pt] = pcid
-
- // We need to flush because while this is in the available
- // pool, it may have been used previously.
- p.mu.Unlock()
- return pcid, true
- }
-
- // Evict an existing table.
- for old, pcid := range p.cache {
- delete(p.cache, old)
- p.cache[pt] = pcid
-
- // A flush is definitely required in this case, these page
- // tables may still be active. (They will just be assigned some
- // other PCID if and when they hit the given CPU again.)
- p.mu.Unlock()
- return pcid, true
- }
-
- // No PCID.
- p.mu.Unlock()
- return 0, false
-}
-
-// Drop drops references to a set of page tables.
-func (p *PCIDs) Drop(pt *PageTables) {
- p.mu.Lock()
- if pcid, ok := p.cache[pt]; ok {
- delete(p.cache, pt)
- p.avail = append(p.avail, pcid)
- }
- p.mu.Unlock()
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
deleted file mode 100644
index fbfd41d83..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-// limitPCID is the maximum value of PCIDs.
-//
-// In VMSAv8-64, the PCID(ASID) size is an IMPLEMENTATION DEFINED choice
-// of 8 bits or 16 bits, and ID_AA64MMFR0_EL1.ASIDBits identifies the
-// supported size. When an implementation supports a 16-bit ASID, TCR_ELx.AS
-// selects whether the top 8 bits of the ASID are used.
-var limitPCID uint16
-
-// GetASIDBits return the system ASID bits, 8 or 16 bits.
-func GetASIDBits() uint8
-
-func init() {
- limitPCID = uint16(1)<<GetASIDBits() - 1
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
deleted file mode 100644
index e9d62d768..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-#include "funcdata.h"
-#include "textflag.h"
-
-#define ID_AA64MMFR0_ASIDBITS_SHIFT 4
-#define ID_AA64MMFR0_ASIDBITS_16 2
-#define TCR_EL1_AS_BIT 36
-
-// GetASIDBits return the system ASID bits, 8 or 16 bits.
-//
-// func GetASIDBits() uint8
-TEXT ·GetASIDBits(SB),NOSPLIT,$0-1
- // First, check whether 16bits ASID is supported.
- // ID_AA64MMFR0_EL1.ASIDBITS[7:4] == 0010.
- WORD $0xd5380700 // MRS ID_AA64MMFR0_EL1, R0
- UBFX $ID_AA64MMFR0_ASIDBITS_SHIFT, R0, $4, R0
- CMPW $ID_AA64MMFR0_ASIDBITS_16, R0
- BNE bits_8
-
- // Second, check whether 16bits ASID is enabled.
- // TCR_EL1.AS[36] == 1.
- WORD $0xd5382040 // MRS TCR_EL1, R0
- TBZ $TCR_EL1_AS_BIT, R0, bits_8
- MOVD $16, R0
- B done
-bits_8:
- MOVD $8, R0
-done:
- MOVB R0, ret+0(FP)
- RET
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
deleted file mode 100644
index 91fc5e8dd..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build i386 amd64
-
-package pagetables
-
-// limitPCID is the maximum value of valid PCIDs.
-const limitPCID = 4095
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_amd64.go
deleted file mode 100644
index eb4fbcc31..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go
+++ /dev/null
@@ -1,221 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *Walker) iterateRangeCanonical(start, end uintptr) bool {
- for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &w.pageTables.root[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- start = next(start, pgdSize)
- continue
- }
-
- // Allocate a new pgd.
- pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator.
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above.
- }
-
- // Map the next level.
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPUDEntries++
- start = next(start, pudSize)
- continue
- }
-
- // This level has 1-GB super pages. Is this
- // entire region at least as large as a single
- // PUD entry? If so, we can skip allocating a
- // new page for the pmd.
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = next(start, pudSize)
- continue
- }
- }
-
- // Allocate a new pud.
- pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above.
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSuper() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
- // Install the relevant entries.
- pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above.
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSuper()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
- // A super page to be checked directly.
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
-
- // Might have been cleared.
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- // Note that the super page was changed.
- start = next(start, pudSize)
- continue
- }
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) // escapes: see above.
- }
-
- // Map the next level, since this is valid.
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPMDEntries++
- start = next(start, pmdSize)
- continue
- }
-
- // This level has 2-MB huge pages. If this
- // region is contined in a single PMD entry?
- // As above, we can skip allocating a new page.
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = next(start, pmdSize)
- continue
- }
- }
-
- // Allocate a new pmd.
- pteEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above.
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSuper() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) {
- // Install the relevant entries.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
- // A huge page to be checked directly.
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
-
- // Might have been cleared.
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- // Note that the huge page was changed.
- start = next(start, pmdSize)
- continue
- }
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) // escapes: see above.
- }
-
- // Map the next level, since this is valid.
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- // At this point, we are guaranteed that start%pteSize == 0.
- if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- }
-
- // Note that the pte was changed.
- start += pteSize
- continue
- }
-
- // Check if we no longer need this page.
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries) // escapes: see above.
- clearPMDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries) // escapes: see above.
- clearPUDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above.
- }
- }
- return true
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go
deleted file mode 100644
index 5ed881c7a..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *Walker) iterateRangeCanonical(start, end uintptr) bool {
- pgdEntryIndex := w.pageTables.root
- if start >= upperBottom {
- pgdEntryIndex = w.pageTables.archPageTables.root
- }
-
- for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &pgdEntryIndex[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- start = next(start, pgdSize)
- continue
- }
-
- // Allocate a new pgd.
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- // Map the next level.
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPUDEntries++
- start = next(start, pudSize)
- continue
- }
-
- // This level has 1-GB sect pages. Is this
- // entire region at least as large as a single
- // PUD entry? If so, we can skip allocating a
- // new page for the pmd.
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = next(start, pudSize)
- continue
- }
- }
-
- // Allocate a new pud.
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSect() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
- // Install the relevant entries.
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSect()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
- // A sect page to be checked directly.
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
-
- // Might have been cleared.
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- // Note that the sect page was changed.
- start = next(start, pudSize)
- continue
- }
-
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- // Map the next level, since this is valid.
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPMDEntries++
- start = next(start, pmdSize)
- continue
- }
-
- // This level has 2-MB huge pages. If this
- // region is contined in a single PMD entry?
- // As above, we can skip allocating a new page.
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = next(start, pmdSize)
- continue
- }
- }
-
- // Allocate a new pmd.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSect() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) {
- // Install the relevant entries.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
- // A huge page to be checked directly.
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
-
- // Might have been cleared.
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- // Note that the huge page was changed.
- start = next(start, pmdSize)
- continue
- }
-
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- // Map the next level, since this is valid.
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- // At this point, we are guaranteed that start%pteSize == 0.
- if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- // Note that the pte was changed.
- start += pteSize
- continue
- }
-
- // Check if we no longer need this page.
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_empty_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_empty_amd64.go
deleted file mode 100644
index a3cd7a1a2..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_empty_amd64.go
+++ /dev/null
@@ -1,265 +0,0 @@
-// +build amd64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *emptyWalker) iterateRangeCanonical(start, end uintptr) bool {
- for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &w.pageTables.root[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = emptynext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = emptynext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = emptynext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < emptynext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSuper()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = emptynext(start, pudSize)
- continue
- }
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = emptynext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = emptynext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < emptynext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = emptynext(start, pmdSize)
- continue
- }
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type emptyWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor emptyVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *emptyWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func emptynext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_empty_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_empty_arm64.go
deleted file mode 100644
index d61b44b65..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_empty_arm64.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// +build arm64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *emptyWalker) iterateRangeCanonical(start, end uintptr) bool {
- pgdEntryIndex := w.pageTables.root
- if start >= upperBottom {
- pgdEntryIndex = w.pageTables.archPageTables.root
- }
-
- for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &pgdEntryIndex[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = emptynext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = emptynext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = emptynext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < emptynext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSect()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = emptynext(start, pudSize)
- continue
- }
-
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = emptynext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = emptynext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < emptynext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = emptynext(start, pmdSize)
- continue
- }
-
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type emptyWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor emptyVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *emptyWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func emptynext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_generic.go b/pkg/sentry/platform/ring0/pagetables/walker_generic.go
deleted file mode 100644
index 34fba7b84..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_generic.go
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Visitor is a generic type.
-type Visitor interface {
- // visit is called on each PTE. The returned boolean indicates whether
- // the walk should continue.
- visit(start uintptr, pte *PTE, align uintptr) bool
-
- // requiresAlloc indicates that new entries should be allocated within
- // the walked range.
- requiresAlloc() bool
-
- // requiresSplit indicates that entries in the given range should be
- // split if they are huge or jumbo pages.
- requiresSplit() bool
-}
-
-// Walker walks page tables.
-type Walker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor Visitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *Walker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func next(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_lookup_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_lookup_amd64.go
deleted file mode 100644
index c92c1cb44..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_lookup_amd64.go
+++ /dev/null
@@ -1,265 +0,0 @@
-// +build amd64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *lookupWalker) iterateRangeCanonical(start, end uintptr) bool {
- for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &w.pageTables.root[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = lookupnext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = lookupnext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = lookupnext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < lookupnext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSuper()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = lookupnext(start, pudSize)
- continue
- }
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = lookupnext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = lookupnext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < lookupnext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = lookupnext(start, pmdSize)
- continue
- }
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type lookupWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor lookupVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *lookupWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func lookupnext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_lookup_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_lookup_arm64.go
deleted file mode 100644
index 74062a00a..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_lookup_arm64.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// +build arm64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *lookupWalker) iterateRangeCanonical(start, end uintptr) bool {
- pgdEntryIndex := w.pageTables.root
- if start >= upperBottom {
- pgdEntryIndex = w.pageTables.archPageTables.root
- }
-
- for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &pgdEntryIndex[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = lookupnext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = lookupnext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = lookupnext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < lookupnext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSect()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = lookupnext(start, pudSize)
- continue
- }
-
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = lookupnext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = lookupnext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < lookupnext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = lookupnext(start, pmdSize)
- continue
- }
-
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type lookupWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor lookupVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *lookupWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func lookupnext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_map_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_map_amd64.go
deleted file mode 100644
index 1c6c1a032..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_map_amd64.go
+++ /dev/null
@@ -1,265 +0,0 @@
-// +build amd64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *mapWalker) iterateRangeCanonical(start, end uintptr) bool {
- for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &w.pageTables.root[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = mapnext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = mapnext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = mapnext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < mapnext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSuper()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = mapnext(start, pudSize)
- continue
- }
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = mapnext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = mapnext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < mapnext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = mapnext(start, pmdSize)
- continue
- }
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type mapWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor mapVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *mapWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func mapnext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_map_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_map_arm64.go
deleted file mode 100644
index 8223de306..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_map_arm64.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// +build arm64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *mapWalker) iterateRangeCanonical(start, end uintptr) bool {
- pgdEntryIndex := w.pageTables.root
- if start >= upperBottom {
- pgdEntryIndex = w.pageTables.archPageTables.root
- }
-
- for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &pgdEntryIndex[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = mapnext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = mapnext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = mapnext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < mapnext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSect()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = mapnext(start, pudSize)
- continue
- }
-
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = mapnext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = mapnext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < mapnext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = mapnext(start, pmdSize)
- continue
- }
-
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type mapWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor mapVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *mapWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func mapnext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_unmap_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_unmap_amd64.go
deleted file mode 100644
index 82b27ab64..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_unmap_amd64.go
+++ /dev/null
@@ -1,265 +0,0 @@
-// +build amd64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *unmapWalker) iterateRangeCanonical(start, end uintptr) bool {
- for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &w.pageTables.root[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = unmapnext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = unmapnext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = unmapnext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < unmapnext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSuper()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = unmapnext(start, pudSize)
- continue
- }
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = unmapnext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSuper()
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = unmapnext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSuper() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < unmapnext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = unmapnext(start, pmdSize)
- continue
- }
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type unmapWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor unmapVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *unmapWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func unmapnext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_unmap_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_unmap_arm64.go
deleted file mode 100644
index 1ecccbf27..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_unmap_arm64.go
+++ /dev/null
@@ -1,275 +0,0 @@
-// +build arm64
-
-package pagetables
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *unmapWalker) iterateRangeCanonical(start, end uintptr) bool {
- pgdEntryIndex := w.pageTables.root
- if start >= upperBottom {
- pgdEntryIndex = w.pageTables.archPageTables.root
- }
-
- for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &pgdEntryIndex[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- start = unmapnext(start, pgdSize)
- continue
- }
-
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPUDEntries++
- start = unmapnext(start, pudSize)
- continue
- }
-
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
- if pudEntry.Valid() {
- start = unmapnext(start, pudSize)
- continue
- }
- }
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < unmapnext(start, pudSize)) {
-
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSect()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) {
- return false
- }
-
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- start = unmapnext(start, pudSize)
- continue
- }
-
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
-
- clearPMDEntries++
- start = unmapnext(start, pmdSize)
- continue
- }
-
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSect()
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
- if pmdEntry.Valid() {
- start = unmapnext(start, pmdSize)
- continue
- }
- }
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSect() {
-
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < unmapnext(start, pmdSize)) {
-
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
-
- if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) {
- return false
- }
-
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- start = unmapnext(start, pmdSize)
- continue
- }
-
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) {
- return false
- }
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- start += pteSize
- continue
- }
-
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
- return true
-}
-
-// Walker walks page tables.
-type unmapWalker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor unmapVisitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super/sect pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-// Precondition: start must be less than end.
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *unmapWalker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- if !w.iterateRangeCanonical(start, lowerTop) {
- return
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func unmapnext(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
diff --git a/pkg/sentry/platform/ring0/ring0.go b/pkg/sentry/platform/ring0/ring0.go
deleted file mode 100644
index cdeb1b43a..000000000
--- a/pkg/sentry/platform/ring0/ring0.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package ring0 provides basic operating system-level stubs.
-package ring0
diff --git a/pkg/sentry/platform/ring0/ring0_amd64_state_autogen.go b/pkg/sentry/platform/ring0/ring0_amd64_state_autogen.go
deleted file mode 100644
index 96cf5d331..000000000
--- a/pkg/sentry/platform/ring0/ring0_amd64_state_autogen.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// automatically generated by stateify.
-
-// +build amd64
-// +build amd64
-// +build amd64
-
-package ring0
diff --git a/pkg/sentry/platform/ring0/ring0_arm64_state_autogen.go b/pkg/sentry/platform/ring0/ring0_arm64_state_autogen.go
deleted file mode 100644
index 7f2ab3537..000000000
--- a/pkg/sentry/platform/ring0/ring0_arm64_state_autogen.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// automatically generated by stateify.
-
-// +build arm64
-// +build arm64
-// +build arm64
-
-package ring0
diff --git a/pkg/sentry/platform/ring0/ring0_impl_amd64_state_autogen.go b/pkg/sentry/platform/ring0/ring0_impl_amd64_state_autogen.go
deleted file mode 100644
index 770a13dfa..000000000
--- a/pkg/sentry/platform/ring0/ring0_impl_amd64_state_autogen.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// automatically generated by stateify.
-
-// +build amd64
-// +build amd64
-// +build 386 amd64
-
-package ring0
diff --git a/pkg/sentry/platform/ring0/ring0_impl_arm64_state_autogen.go b/pkg/sentry/platform/ring0/ring0_impl_arm64_state_autogen.go
deleted file mode 100644
index 7f2ab3537..000000000
--- a/pkg/sentry/platform/ring0/ring0_impl_arm64_state_autogen.go
+++ /dev/null
@@ -1,7 +0,0 @@
-// automatically generated by stateify.
-
-// +build arm64
-// +build arm64
-// +build arm64
-
-package ring0
diff --git a/pkg/sentry/platform/ring0/ring0_state_autogen.go b/pkg/sentry/platform/ring0/ring0_state_autogen.go
deleted file mode 100644
index 327aba163..000000000
--- a/pkg/sentry/platform/ring0/ring0_state_autogen.go
+++ /dev/null
@@ -1,3 +0,0 @@
-// automatically generated by stateify.
-
-package ring0
diff --git a/pkg/sentry/platform/ring0/ring0_unsafe_state_autogen.go b/pkg/sentry/platform/ring0/ring0_unsafe_state_autogen.go
deleted file mode 100644
index 327aba163..000000000
--- a/pkg/sentry/platform/ring0/ring0_unsafe_state_autogen.go
+++ /dev/null
@@ -1,3 +0,0 @@
-// automatically generated by stateify.
-
-package ring0