summaryrefslogtreecommitdiffhomepage
path: root/pkg/ring0
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/ring0')
-rw-r--r--pkg/ring0/aarch64.go1
-rw-r--r--pkg/ring0/defs_amd64.go1
-rw-r--r--pkg/ring0/defs_arm64.go1
-rw-r--r--pkg/ring0/entry_amd64.go98
-rw-r--r--pkg/ring0/entry_amd64.s147
-rw-r--r--pkg/ring0/entry_arm64.go1
-rw-r--r--pkg/ring0/kernel_amd64.go50
-rw-r--r--pkg/ring0/kernel_arm64.go1
-rw-r--r--pkg/ring0/kernel_unsafe.go5
-rw-r--r--pkg/ring0/lib_amd64.go7
-rw-r--r--pkg/ring0/lib_amd64.s23
-rw-r--r--pkg/ring0/lib_arm64.go1
-rw-r--r--pkg/ring0/offsets_amd64.go5
-rw-r--r--pkg/ring0/offsets_arm64.go1
-rw-r--r--pkg/ring0/pagetables/pagetables_aarch64.go1
-rw-r--r--pkg/ring0/pagetables/pagetables_amd64_test.go1
-rw-r--r--pkg/ring0/pagetables/pagetables_arm64_test.go1
-rw-r--r--pkg/ring0/pagetables/pagetables_x86.go1
-rw-r--r--pkg/ring0/pagetables/pcids_aarch64.go1
-rw-r--r--pkg/ring0/pagetables/pcids_aarch64.s1
-rw-r--r--pkg/ring0/pagetables/pcids_x86.go1
-rw-r--r--pkg/ring0/pagetables/walker_amd64.go1
-rw-r--r--pkg/ring0/pagetables/walker_arm64.go1
-rw-r--r--pkg/ring0/x86.go2
24 files changed, 281 insertions, 72 deletions
diff --git a/pkg/ring0/aarch64.go b/pkg/ring0/aarch64.go
index 3bda594f9..96c884844 100644
--- a/pkg/ring0/aarch64.go
+++ b/pkg/ring0/aarch64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package ring0
diff --git a/pkg/ring0/defs_amd64.go b/pkg/ring0/defs_amd64.go
index 76776c65c..24f6e4cde 100644
--- a/pkg/ring0/defs_amd64.go
+++ b/pkg/ring0/defs_amd64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package ring0
diff --git a/pkg/ring0/defs_arm64.go b/pkg/ring0/defs_arm64.go
index 0125690d2..3e212516f 100644
--- a/pkg/ring0/defs_arm64.go
+++ b/pkg/ring0/defs_arm64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package ring0
diff --git a/pkg/ring0/entry_amd64.go b/pkg/ring0/entry_amd64.go
index d87b1fd00..afd646b0b 100644
--- a/pkg/ring0/entry_amd64.go
+++ b/pkg/ring0/entry_amd64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package ring0
@@ -31,6 +32,13 @@ import (
// executed from kernel mode or not and the appropriate stub is called.
func sysenter()
+// addrOfSysenter returns the start address of sysenter.
+//
+// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal
+// wrapper function rather than the function itself. We must reference from
+// assembly to get the ABI0 (i.e., primary) address.
+func addrOfSysenter() uintptr
+
// swapgs swaps the current GS value.
//
// This must be called prior to sysret/iret.
@@ -39,6 +47,9 @@ func swapgs()
// jumpToKernel jumps to the kernel version of the current RIP.
func jumpToKernel()
+// jumpToUser jumps to the user version of the current RIP.
+func jumpToUser()
+
// sysret returns to userspace from a system call.
//
// The return code is the vector that interrupted execution.
@@ -65,7 +76,12 @@ func exception()
// This is used when processing kernel exceptions and syscalls.
func resume()
-// Start is the CPU entrypoint.
+// start is the CPU entrypoint.
+//
+// See requirements below.
+func start()
+
+// AddrOfStart return the address of the CPU entrypoint.
//
// The following start conditions must be satisfied:
//
@@ -78,7 +94,11 @@ func resume()
// * c.EFER() should be the current EFER value.
//
// The CPU state will be set to c.Registers().
-func Start()
+//
+// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal
+// wrapper function rather than the function itself. We must reference from
+// assembly to get the ABI0 (i.e., primary) address.
+func AddrOfStart() uintptr
// Exception stubs.
func divideByZero()
@@ -104,28 +124,56 @@ func virtualizationException()
func securityException()
func syscallInt80()
+// These returns the start address of the functions above.
+//
+// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal
+// wrapper function rather than the function itself. We must reference from
+// assembly to get the ABI0 (i.e., primary) address.
+func addrOfDivideByZero() uintptr
+func addrOfDebug() uintptr
+func addrOfNMI() uintptr
+func addrOfBreakpoint() uintptr
+func addrOfOverflow() uintptr
+func addrOfBoundRangeExceeded() uintptr
+func addrOfInvalidOpcode() uintptr
+func addrOfDeviceNotAvailable() uintptr
+func addrOfDoubleFault() uintptr
+func addrOfCoprocessorSegmentOverrun() uintptr
+func addrOfInvalidTSS() uintptr
+func addrOfSegmentNotPresent() uintptr
+func addrOfStackSegmentFault() uintptr
+func addrOfGeneralProtectionFault() uintptr
+func addrOfPageFault() uintptr
+func addrOfX87FloatingPointException() uintptr
+func addrOfAlignmentCheck() uintptr
+func addrOfMachineCheck() uintptr
+func addrOfSimdFloatingPointException() uintptr
+func addrOfVirtualizationException() uintptr
+func addrOfSecurityException() uintptr
+func addrOfSyscallInt80() uintptr
+
// Exception handler index.
-var handlers = map[Vector]func(){
- DivideByZero: divideByZero,
- Debug: debug,
- NMI: nmi,
- Breakpoint: breakpoint,
- Overflow: overflow,
- BoundRangeExceeded: boundRangeExceeded,
- InvalidOpcode: invalidOpcode,
- DeviceNotAvailable: deviceNotAvailable,
- DoubleFault: doubleFault,
- CoprocessorSegmentOverrun: coprocessorSegmentOverrun,
- InvalidTSS: invalidTSS,
- SegmentNotPresent: segmentNotPresent,
- StackSegmentFault: stackSegmentFault,
- GeneralProtectionFault: generalProtectionFault,
- PageFault: pageFault,
- X87FloatingPointException: x87FloatingPointException,
- AlignmentCheck: alignmentCheck,
- MachineCheck: machineCheck,
- SIMDFloatingPointException: simdFloatingPointException,
- VirtualizationException: virtualizationException,
- SecurityException: securityException,
- SyscallInt80: syscallInt80,
+var handlers = map[Vector]uintptr{
+ DivideByZero: addrOfDivideByZero(),
+ Debug: addrOfDebug(),
+ NMI: addrOfNMI(),
+ Breakpoint: addrOfBreakpoint(),
+ Overflow: addrOfOverflow(),
+ BoundRangeExceeded: addrOfBoundRangeExceeded(),
+ InvalidOpcode: addrOfInvalidOpcode(),
+ DeviceNotAvailable: addrOfDeviceNotAvailable(),
+ DoubleFault: addrOfDoubleFault(),
+ CoprocessorSegmentOverrun: addrOfCoprocessorSegmentOverrun(),
+ InvalidTSS: addrOfInvalidTSS(),
+ SegmentNotPresent: addrOfSegmentNotPresent(),
+ StackSegmentFault: addrOfStackSegmentFault(),
+ GeneralProtectionFault: addrOfGeneralProtectionFault(),
+ PageFault: addrOfPageFault(),
+ X87FloatingPointException: addrOfX87FloatingPointException(),
+ AlignmentCheck: addrOfAlignmentCheck(),
+ MachineCheck: addrOfMachineCheck(),
+ SIMDFloatingPointException: addrOfSimdFloatingPointException(),
+ VirtualizationException: addrOfVirtualizationException(),
+ SecurityException: addrOfSecurityException(),
+ SyscallInt80: addrOfSyscallInt80(),
}
diff --git a/pkg/ring0/entry_amd64.s b/pkg/ring0/entry_amd64.s
index f59747df3..520bd9f57 100644
--- a/pkg/ring0/entry_amd64.s
+++ b/pkg/ring0/entry_amd64.s
@@ -88,11 +88,33 @@
#define LOAD_KERNEL_STACK(entry) \
MOVQ ENTRY_STACK_TOP(entry), SP;
+// ADDR_OF_FUNC defines a function named 'name' that returns the address of
+// 'symbol'.
+#define ADDR_OF_FUNC(name, symbol) \
+TEXT name,$0-8; \
+ MOVQ $symbol, AX; \
+ MOVQ AX, ret+0(FP); \
+ RET
+
// See kernel.go.
TEXT ·Halt(SB),NOSPLIT,$0
HLT
RET
+// See kernel_amd64.go.
+TEXT ·HaltAndWriteFSBase(SB),NOSPLIT,$8-8
+ HLT
+
+ // Restore FS_BASE.
+ MOVQ regs+0(FP), AX
+ MOVQ PTRACE_FS_BASE(AX), AX
+
+ PUSHQ AX // First argument (FS_BASE)
+ CALL ·writeFS(SB)
+ POPQ AX
+
+ RET
+
// See entry_amd64.go.
TEXT ·swapgs(SB),NOSPLIT,$0
SWAP_GS()
@@ -107,8 +129,29 @@ TEXT ·jumpToKernel(SB),NOSPLIT,$0
MOVQ AX, 0(SP)
RET
+// jumpToUser changes execution to the user address space.
+//
+// This works by changing the return value to the user version.
+TEXT ·jumpToUser(SB),NOSPLIT,$0
+ // N.B. we can't access KernelStartAddress from the upper half (data
+ // pages not available), so just naively clear all the upper bits.
+ // We are assuming a 47-bit virtual address space.
+ MOVQ $0x00007fffffffffff, AX
+ MOVQ 0(SP), BX
+ ANDQ BX, AX // Future return value.
+ MOVQ AX, 0(SP)
+ RET
+
// See entry_amd64.go.
TEXT ·sysret(SB),NOSPLIT,$0-24
+ // Set application FS. We can't do this in Go because Go code needs FS.
+ MOVQ regs+8(FP), AX
+ MOVQ PTRACE_FS_BASE(AX), AX
+
+ PUSHQ AX
+ CALL ·writeFS(SB)
+ POPQ AX
+
CALL ·jumpToKernel(SB)
// Save original state and stack. sysenter() or exception()
// from APP(gr3) will switch to this stack, set the return
@@ -142,6 +185,14 @@ TEXT ·sysret(SB),NOSPLIT,$0-24
// See entry_amd64.go.
TEXT ·iret(SB),NOSPLIT,$0-24
+ // Set application FS. We can't do this in Go because Go code needs FS.
+ MOVQ regs+8(FP), AX
+ MOVQ PTRACE_FS_BASE(AX), AX
+
+ PUSHQ AX // First argument (FS_BASE)
+ CALL ·writeFS(SB)
+ POPQ AX
+
CALL ·jumpToKernel(SB)
// Save original state and stack. sysenter() or exception()
// from APP(gr3) will switch to this stack, set the return
@@ -184,13 +235,29 @@ TEXT ·resume(SB),NOSPLIT,$0
IRET()
// See entry_amd64.go.
-TEXT ·Start(SB),NOSPLIT,$0
+TEXT ·start(SB),NOSPLIT,$0
+ // N.B. This is the vCPU entrypoint. It is not called from Go code and
+ // thus pushes and pops values on the stack until calling into Go
+ // (startGo) because we aren't usually a typical Go assembly frame.
+
PUSHQ $0x0 // Previous frame pointer.
MOVQ SP, BP // Set frame pointer.
- PUSHQ AX // First argument (CPU).
- CALL ·start(SB) // Call Go hook.
+
+ PUSHQ AX // Save CPU.
+
+ // Set up environment required by Go before calling startGo: Go needs
+ // FS_BASE and floating point initialized.
+ MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX
+ PUSHQ BX // First argument (FS_BASE)
+ CALL ·writeFS(SB)
+ POPQ BX
+
+ // First argument (CPU) already at bottom of stack.
+ CALL ·startGo(SB) // Call Go hook.
JMP ·resume(SB) // Restore to registers.
+ADDR_OF_FUNC(·AddrOfStart(SB), ·start(SB));
+
// See entry_amd64.go.
TEXT ·sysenter(SB),NOSPLIT,$0
// _RFLAGS_IOPL0 is always set in the user mode and it is never set in
@@ -218,6 +285,18 @@ user:
MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
+ CALL ·jumpToUser(SB)
+
+ // Restore kernel FS_BASE.
+ MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
+ MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX
+
+ PUSHQ BX // First argument (FS_BASE)
+ CALL ·writeFS(SB)
+ POPQ BX
+
+ MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
+
// Return to the kernel, where the frame is:
//
// vector (sp+32)
@@ -252,6 +331,8 @@ kernel:
POPQ AX // Pop vCPU.
JMP ·resume(SB)
+ADDR_OF_FUNC(·addrOfSysenter(SB), ·sysenter(SB));
+
// exception is a generic exception handler.
//
// There are two cases handled:
@@ -298,6 +379,16 @@ user:
MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX)
MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX)
+ CALL ·jumpToUser(SB)
+
+ // Restore kernel FS_BASE.
+ MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
+ MOVQ CPU_REGISTERS+PTRACE_FS_BASE(AX), BX
+
+ PUSHQ BX // First argument (FS_BASE)
+ CALL ·writeFS(SB)
+ POPQ BX
+
// Copy out and return.
MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
MOVQ 0(SP), BX // Load vector.
@@ -336,36 +427,38 @@ kernel:
POPQ AX // Pop vCPU.
JMP ·resume(SB)
-#define EXCEPTION_WITH_ERROR(value, symbol) \
+#define EXCEPTION_WITH_ERROR(value, symbol, addr) \
+ADDR_OF_FUNC(addr, symbol); \
TEXT symbol,NOSPLIT,$0; \
PUSHQ $value; \
JMP ·exception(SB);
-#define EXCEPTION_WITHOUT_ERROR(value, symbol) \
+#define EXCEPTION_WITHOUT_ERROR(value, symbol, addr) \
+ADDR_OF_FUNC(addr, symbol); \
TEXT symbol,NOSPLIT,$0; \
PUSHQ $0x0; \
PUSHQ $value; \
JMP ·exception(SB);
-EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB))
-EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB))
-EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB))
-EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB))
-EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB))
-EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB))
-EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB))
-EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB))
-EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB))
-EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB))
-EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB))
-EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB))
-EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB))
-EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB))
-EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB))
-EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB))
-EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB))
-EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB))
-EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB))
-EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB))
-EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB))
-EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB))
+EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB), ·addrOfDivideByZero(SB))
+EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB), ·addrOfDebug(SB))
+EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB), ·addrOfNMI(SB))
+EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB), ·addrOfBreakpoint(SB))
+EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB), ·addrOfOverflow(SB))
+EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB), ·addrOfBoundRangeExceeded(SB))
+EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB), ·addrOfInvalidOpcode(SB))
+EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB), ·addrOfDeviceNotAvailable(SB))
+EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB), ·addrOfDoubleFault(SB))
+EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB), ·addrOfCoprocessorSegmentOverrun(SB))
+EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB), ·addrOfInvalidTSS(SB))
+EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB), ·addrOfSegmentNotPresent(SB))
+EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB), ·addrOfStackSegmentFault(SB))
+EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB), ·addrOfGeneralProtectionFault(SB))
+EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB), ·addrOfPageFault(SB))
+EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB), ·addrOfX87FloatingPointException(SB))
+EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB), ·addrOfAlignmentCheck(SB))
+EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB), ·addrOfMachineCheck(SB))
+EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB), ·addrOfSimdFloatingPointException(SB))
+EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB), ·addrOfVirtualizationException(SB))
+EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB), ·addrOfSecurityException(SB))
+EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB), ·addrOfSyscallInt80(SB))
diff --git a/pkg/ring0/entry_arm64.go b/pkg/ring0/entry_arm64.go
index 62a93f3d6..299036478 100644
--- a/pkg/ring0/entry_arm64.go
+++ b/pkg/ring0/entry_arm64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package ring0
diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go
index f63af8b76..4a4c0ae26 100644
--- a/pkg/ring0/kernel_amd64.go
+++ b/pkg/ring0/kernel_amd64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package ring0
@@ -19,12 +20,20 @@ package ring0
import (
"encoding/binary"
"reflect"
+ "sync"
"gvisor.dev/gvisor/pkg/hostarch"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
+// HaltAndWriteFSBase halts execution. On resume, it sets FS_BASE from the
+// value in regs.
+func HaltAndWriteFSBase(regs *arch.Registers)
+
// init initializes architecture-specific state.
func (k *Kernel) init(maxCPUs int) {
+ initSentryXCR0()
+
entrySize := reflect.TypeOf(kernelEntry{}).Size()
var (
entries []kernelEntry
@@ -168,7 +177,7 @@ func (c *CPU) TSS() (uint64, uint16, *SegmentDescriptor) {
//
//go:nosplit
func (c *CPU) CR0() uint64 {
- return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET
+ return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET | _CR0_NE
}
// CR4 returns the CPU's CR4 value.
@@ -240,7 +249,6 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
// Perform the switch.
swapgs() // GS will be swapped on return.
- WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS.
WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS.
LoadFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy in floating point.
if switchOpts.FullRestore {
@@ -249,38 +257,58 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
vector = sysret(c, regs, uintptr(userCR3))
}
SaveFloatingPoint(switchOpts.FloatingPointState.BytePointer()) // escapes: no. Copy out floating point.
- WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS.
RestoreKernelFPState() // escapes: no. Restore kernel MXCSR.
return
}
-var sentryXCR0 = xgetbv(0)
+var (
+ sentryXCR0 uintptr
+ sentryXCR0Once sync.Once
+)
-// start is the CPU entrypoint.
+// initSentryXCR0 saves a value of XCR0 in the host mode. It is used to
+// initialize XCR0 of guest vCPU-s.
+func initSentryXCR0() {
+ sentryXCR0Once.Do(func() { sentryXCR0 = xgetbv(0) })
+}
+
+// startGo is the CPU entrypoint.
//
-// This is called from the Start asm stub (see entry_amd64.go); on return the
+// This is called from the start asm stub (see entry_amd64.go); on return the
// registers in c.registers will be restored (not segments).
//
+// Note that any code written in Go should adhere to Go expected environment:
+// * Initialized floating point state (required for optimizations using
+// floating point instructions).
+// * Go TLS in FS_BASE (this is required by splittable functions, calls into
+// the runtime, calls to assembly functions (Go 1.17+ ABI wrappers access
+// TLS)).
+//
//go:nosplit
-func start(c *CPU) {
- // Save per-cpu & FS segment.
+func startGo(c *CPU) {
+ // Save per-cpu.
WriteGS(kernelAddr(c.kernelEntry))
- WriteFS(uintptr(c.registers.Fs_base))
+ //
+ // TODO(mpratt): Note that per the note above, this should be done
+ // before entering Go code. However for simplicity we leave it here for
+ // now, since the small critical sections with undefined FPU state
+ // should only contain very limited use of floating point instructions
+ // (notably, use of XMM15 as a zero register).
fninit()
// Need to sync XCR0 with the host, because xsave and xrstor can be
// called from different contexts.
xsetbv(0, sentryXCR0)
// Set the syscall target.
- wrmsr(_MSR_LSTAR, kernelFunc(sysenter))
+ wrmsr(_MSR_LSTAR, kernelFunc(addrOfSysenter()))
wrmsr(_MSR_SYSCALL_MASK, KernelFlagsClear|_RFLAGS_DF)
// NOTE: This depends on having the 64-bit segments immediately
// following the 32-bit user segments. This is simply the way the
// sysret instruction is designed to work (it assumes they follow).
wrmsr(_MSR_STAR, uintptr(uint64(Kcode)<<32|uint64(Ucode32)<<48))
- wrmsr(_MSR_CSTAR, kernelFunc(sysenter))
+ wrmsr(_MSR_CSTAR, kernelFunc(addrOfSysenter()))
}
// SetCPUIDFaulting sets CPUID faulting per the boolean value.
diff --git a/pkg/ring0/kernel_arm64.go b/pkg/ring0/kernel_arm64.go
index 21db910a2..79f85ff50 100644
--- a/pkg/ring0/kernel_arm64.go
+++ b/pkg/ring0/kernel_arm64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package ring0
diff --git a/pkg/ring0/kernel_unsafe.go b/pkg/ring0/kernel_unsafe.go
index 16955ad91..04c60d0a7 100644
--- a/pkg/ring0/kernel_unsafe.go
+++ b/pkg/ring0/kernel_unsafe.go
@@ -35,7 +35,6 @@ func kernelAddr(obj interface{}) uintptr {
// kernelFunc returns the address of the given function.
//
//go:nosplit
-func kernelFunc(fn func()) uintptr {
- fnptr := (**uintptr)(unsafe.Pointer(&fn))
- return KernelStartAddress | **fnptr
+func kernelFunc(fn uintptr) uintptr {
+ return KernelStartAddress | fn
}
diff --git a/pkg/ring0/lib_amd64.go b/pkg/ring0/lib_amd64.go
index 3e6bb9663..05c394ff5 100644
--- a/pkg/ring0/lib_amd64.go
+++ b/pkg/ring0/lib_amd64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package ring0
@@ -43,8 +44,8 @@ func xsave(*byte)
// xsaveopt uses xsaveopt to save floating point state.
func xsaveopt(*byte)
-// WriteFS sets the GS address (set by init).
-var WriteFS func(addr uintptr)
+// writeFS sets the FS base address (selects one of wrfsbase or wrfsmsr).
+func writeFS(addr uintptr)
// wrfsbase writes to the GS base address.
func wrfsbase(addr uintptr)
@@ -116,10 +117,8 @@ func Init(featureSet *cpuid.FeatureSet) {
LoadFloatingPoint = fxrstor
}
if hasFSGSBASE {
- WriteFS = wrfsbase
WriteGS = wrgsbase
} else {
- WriteFS = wrfsmsr
WriteGS = wrgsmsr
}
}
diff --git a/pkg/ring0/lib_amd64.s b/pkg/ring0/lib_amd64.s
index 70a43e79e..8ed98fc84 100644
--- a/pkg/ring0/lib_amd64.s
+++ b/pkg/ring0/lib_amd64.s
@@ -80,6 +80,29 @@ TEXT ·xsaveopt(SB),NOSPLIT,$0-8
BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37;
RET
+// writeFS writes to the FS base.
+//
+// This is written in assembly because it must be safe to call before the Go
+// environment is set up. See comment on start().
+//
+// Preconditions: must be running in the lower address space, as it accesses
+// global data.
+TEXT ·writeFS(SB),NOSPLIT,$8-8
+ MOVQ addr+0(FP), AX
+
+ CMPB ·hasFSGSBASE(SB), $1
+ JNE msr
+
+ PUSHQ AX
+ CALL ·wrfsbase(SB)
+ POPQ AX
+ RET
+msr:
+ PUSHQ AX
+ CALL ·wrfsmsr(SB)
+ POPQ AX
+ RET
+
// wrfsbase writes to the FS base.
//
// The code corresponds to:
diff --git a/pkg/ring0/lib_arm64.go b/pkg/ring0/lib_arm64.go
index 5eabd4296..a72a6926d 100644
--- a/pkg/ring0/lib_arm64.go
+++ b/pkg/ring0/lib_arm64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package ring0
diff --git a/pkg/ring0/offsets_amd64.go b/pkg/ring0/offsets_amd64.go
index ca4075b09..75f6218b3 100644
--- a/pkg/ring0/offsets_amd64.go
+++ b/pkg/ring0/offsets_amd64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package ring0
@@ -95,6 +96,6 @@ func Emit(w io.Writer) {
fmt.Fprintf(w, "#define PTRACE_FLAGS 0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer())
fmt.Fprintf(w, "#define PTRACE_RSP 0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer())
fmt.Fprintf(w, "#define PTRACE_SS 0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_FS 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_GS 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_FS_BASE 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_GS_BASE 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer())
}
diff --git a/pkg/ring0/offsets_arm64.go b/pkg/ring0/offsets_arm64.go
index 03adaa6b0..60b2c4074 100644
--- a/pkg/ring0/offsets_arm64.go
+++ b/pkg/ring0/offsets_arm64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package ring0
diff --git a/pkg/ring0/pagetables/pagetables_aarch64.go b/pkg/ring0/pagetables/pagetables_aarch64.go
index 86eb00a4f..aa2a5c984 100644
--- a/pkg/ring0/pagetables/pagetables_aarch64.go
+++ b/pkg/ring0/pagetables/pagetables_aarch64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package pagetables
diff --git a/pkg/ring0/pagetables/pagetables_amd64_test.go b/pkg/ring0/pagetables/pagetables_amd64_test.go
index a13c616ae..c27b3b10a 100644
--- a/pkg/ring0/pagetables/pagetables_amd64_test.go
+++ b/pkg/ring0/pagetables/pagetables_amd64_test.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package pagetables
diff --git a/pkg/ring0/pagetables/pagetables_arm64_test.go b/pkg/ring0/pagetables/pagetables_arm64_test.go
index 2514b9ac5..1c919ec7d 100644
--- a/pkg/ring0/pagetables/pagetables_arm64_test.go
+++ b/pkg/ring0/pagetables/pagetables_arm64_test.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package pagetables
diff --git a/pkg/ring0/pagetables/pagetables_x86.go b/pkg/ring0/pagetables/pagetables_x86.go
index e43698173..dc98d8452 100644
--- a/pkg/ring0/pagetables/pagetables_x86.go
+++ b/pkg/ring0/pagetables/pagetables_x86.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build 386 || amd64
// +build 386 amd64
package pagetables
diff --git a/pkg/ring0/pagetables/pcids_aarch64.go b/pkg/ring0/pagetables/pcids_aarch64.go
index fbfd41d83..ad492d039 100644
--- a/pkg/ring0/pagetables/pcids_aarch64.go
+++ b/pkg/ring0/pagetables/pcids_aarch64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package pagetables
diff --git a/pkg/ring0/pagetables/pcids_aarch64.s b/pkg/ring0/pagetables/pcids_aarch64.s
index e9d62d768..cfcedba71 100644
--- a/pkg/ring0/pagetables/pcids_aarch64.s
+++ b/pkg/ring0/pagetables/pcids_aarch64.s
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
#include "funcdata.h"
diff --git a/pkg/ring0/pagetables/pcids_x86.go b/pkg/ring0/pagetables/pcids_x86.go
index 91fc5e8dd..2a107ea70 100644
--- a/pkg/ring0/pagetables/pcids_x86.go
+++ b/pkg/ring0/pagetables/pcids_x86.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build i386 || amd64
// +build i386 amd64
package pagetables
diff --git a/pkg/ring0/pagetables/walker_amd64.go b/pkg/ring0/pagetables/walker_amd64.go
index eb4fbcc31..ca5e2f85f 100644
--- a/pkg/ring0/pagetables/walker_amd64.go
+++ b/pkg/ring0/pagetables/walker_amd64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build amd64
// +build amd64
package pagetables
diff --git a/pkg/ring0/pagetables/walker_arm64.go b/pkg/ring0/pagetables/walker_arm64.go
index 5ed881c7a..e32dbda2d 100644
--- a/pkg/ring0/pagetables/walker_arm64.go
+++ b/pkg/ring0/pagetables/walker_arm64.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build arm64
// +build arm64
package pagetables
diff --git a/pkg/ring0/x86.go b/pkg/ring0/x86.go
index 34fbc1c35..9a98703da 100644
--- a/pkg/ring0/x86.go
+++ b/pkg/ring0/x86.go
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build 386 || amd64
// +build 386 amd64
package ring0
@@ -24,6 +25,7 @@ import (
const (
_CR0_PE = 1 << 0
_CR0_ET = 1 << 4
+ _CR0_NE = 1 << 5
_CR0_AM = 1 << 18
_CR0_PG = 1 << 31