summaryrefslogtreecommitdiffhomepage
path: root/pkg/ring0/entry_amd64.s
diff options
context:
space:
mode:
authorJamie Liu <jamieliu@google.com>2021-10-08 18:00:36 -0700
committergVisor bot <gvisor-bot@google.com>2021-10-08 18:03:23 -0700
commit3f1642e4bc86c6e7febc6b2dd2b83ad48c5ee201 (patch)
treed7b6428e36cda75eb7a8702d22923cc13c22f0cc /pkg/ring0/entry_amd64.s
parent34e68b6b4ff04de5c7b5e2dc46e5bd44c6845e63 (diff)
Remove ring0 floating point save/load functions on amd64.
ring0.Save/LoadFloatingPoint() are only usable if the caller can ensure that Go will not clobber floating point registers before/after calling them respectively. Due to regabig in Go 1.17, this is no longer the case; regabig (among other things) maintains a zeroed XMM15 during ABIInternal execution, including by zeroing it after ABI0-to-ABIInternal transitions. In ring0.sysenter/exception, this happens in ring0.kernelSyscall/kernelException.abi0 respectively; in ring0.CPU.SwitchToUser, this happens after returning from ring0.sysret/iret.abi0. Delete these functions and do floating point save/load in assembly. While arm64 doesn't appear to be immediately affected (so this CL permits us to resume usage of Go 1.17), its use of Save/LoadFloatingPoint() still seems to be incorrect for the same fundamental reason (Go code can't sanely assume what registers the Go compiler will or won't use) and should be fixed eventually. PiperOrigin-RevId: 401895658
Diffstat (limited to 'pkg/ring0/entry_amd64.s')
-rw-r--r--pkg/ring0/entry_amd64.s177
1 files changed, 166 insertions, 11 deletions
diff --git a/pkg/ring0/entry_amd64.s b/pkg/ring0/entry_amd64.s
index 520bd9f57..d2913f190 100644
--- a/pkg/ring0/entry_amd64.s
+++ b/pkg/ring0/entry_amd64.s
@@ -142,8 +142,103 @@ TEXT ·jumpToUser(SB),NOSPLIT,$0
MOVQ AX, 0(SP)
RET
+// See kernel_amd64.go.
+//
+// The 16-byte frame size is for the saved values of MXCSR and the x87 control
+// word.
+TEXT ·doSwitchToUser(SB),NOSPLIT,$16-48
+ // We are passed pointers to heap objects, but do not store them in our
+ // local frame.
+ NO_LOCAL_POINTERS
+
+ // MXCSR and the x87 control word are the only floating point state
+ // that is callee-save and thus we must save.
+ STMXCSR mxcsr-0(SP)
+ FSTCW cw-8(SP)
+
+ // Restore application floating point state.
+ MOVQ cpu+0(FP), SI
+ MOVQ fpState+16(FP), DI
+ MOVB ·hasXSAVE(SB), BX
+ TESTB BX, BX
+ JZ no_xrstor
+ // Use xrstor to restore all available fp state. For now, we restore
+ // everything unconditionally by setting the implicit operand edx:eax
+ // (the "requested feature bitmap") to all 1's.
+ MOVL $0xffffffff, AX
+ MOVL $0xffffffff, DX
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f // XRSTOR64 0(DI)
+ JMP fprestore_done
+no_xrstor:
+ // Fall back to fxrstor if xsave is not available.
+ FXRSTOR64 0(DI)
+fprestore_done:
+
+ // Set application GS.
+ MOVQ regs+8(FP), R8
+ SWAP_GS()
+ MOVQ PTRACE_GS_BASE(R8), AX
+ PUSHQ AX
+ CALL ·writeGS(SB)
+ POPQ AX
+
+ // Call sysret() or iret().
+ MOVQ userCR3+24(FP), CX
+ MOVQ needIRET+32(FP), R9
+ ADDQ $-32, SP
+ MOVQ SI, 0(SP) // cpu
+ MOVQ R8, 8(SP) // regs
+ MOVQ CX, 16(SP) // userCR3
+ TESTQ R9, R9
+ JNZ do_iret
+ CALL ·sysret(SB)
+ JMP done_sysret_or_iret
+do_iret:
+ CALL ·iret(SB)
+done_sysret_or_iret:
+ MOVQ 24(SP), AX // vector
+ ADDQ $32, SP
+ MOVQ AX, vector+40(FP)
+
+ // Save application floating point state.
+ MOVQ fpState+16(FP), DI
+ MOVB ·hasXSAVE(SB), BX
+ MOVB ·hasXSAVEOPT(SB), CX
+ TESTB BX, BX
+ JZ no_xsave
+ // Use xsave/xsaveopt to save all extended state.
+ // We save everything unconditionally by setting RFBM to all 1's.
+ MOVL $0xffffffff, AX
+ MOVL $0xffffffff, DX
+ TESTB CX, CX
+ JZ no_xsaveopt
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
+ JMP fpsave_done
+no_xsaveopt:
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
+ JMP fpsave_done
+no_xsave:
+ FXSAVE64 0(DI)
+fpsave_done:
+
+ // Restore MXCSR and the x87 control word after one of the two floating
+ // point save cases above, to ensure the application versions are saved
+ // before being clobbered here.
+ LDMXCSR mxcsr-0(SP)
+
+ // FLDCW is a "waiting" x87 instruction, meaning it checks for pending
+ // unmasked exceptions before executing. Thus if userspace has unmasked
+ // an exception and has one pending, it can be raised by FLDCW even
+ // though the new control word will mask exceptions. To prevent this,
+ // we must first clear pending exceptions (which will be restored by
+ // XRSTOR, et al).
+ BYTE $0xDB; BYTE $0xE2; // FNCLEX
+ FLDCW cw-8(SP)
+
+ RET
+
// See entry_amd64.go.
-TEXT ·sysret(SB),NOSPLIT,$0-24
+TEXT ·sysret(SB),NOSPLIT,$0-32
// Set application FS. We can't do this in Go because Go code needs FS.
MOVQ regs+8(FP), AX
MOVQ PTRACE_FS_BASE(AX), AX
@@ -182,9 +277,11 @@ TEXT ·sysret(SB),NOSPLIT,$0-24
POPQ AX // Restore AX.
POPQ SP // Restore SP.
SYSRET64()
+ // sysenter or exception will write our return value and return to our
+ // caller.
// See entry_amd64.go.
-TEXT ·iret(SB),NOSPLIT,$0-24
+TEXT ·iret(SB),NOSPLIT,$0-32
// Set application FS. We can't do this in Go because Go code needs FS.
MOVQ regs+8(FP), AX
MOVQ PTRACE_FS_BASE(AX), AX
@@ -220,6 +317,8 @@ TEXT ·iret(SB),NOSPLIT,$0-24
WRITE_CR3() // Switch to userCR3.
POPQ AX // Restore AX.
IRET()
+ // sysenter or exception will write our return value and return to our
+ // caller.
// See entry_amd64.go.
TEXT ·resume(SB),NOSPLIT,$0
@@ -324,11 +423,39 @@ kernel:
MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
+ // Save floating point state. CPU.floatingPointState is a slice, so the
+ // first word of CPU.floatingPointState is a pointer to the destination
+ // array.
+ MOVQ CPU_FPU_STATE(AX), DI
+ MOVB CPU_HAS_XSAVE(AX), BX
+ MOVB CPU_HAS_XSAVEOPT(AX), CX
+ TESTB BX, BX
+ JZ no_xsave
+ // Use xsave/xsaveopt to save all extended state.
+ // We save everything unconditionally by setting RFBM to all 1's.
+ MOVL $0xffffffff, AX
+ MOVL $0xffffffff, DX
+ TESTB CX, CX
+ JZ no_xsaveopt
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
+ JMP fpsave_done
+no_xsaveopt:
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
+ JMP fpsave_done
+no_xsave:
+ FXSAVE64 0(DI)
+fpsave_done:
+
// Call the syscall trampoline.
LOAD_KERNEL_STACK(GS)
- PUSHQ AX // First argument (vCPU).
- CALL ·kernelSyscall(SB) // Call the trampoline.
- POPQ AX // Pop vCPU.
+ MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU.
+ PUSHQ AX // First argument (vCPU).
+ CALL ·kernelSyscall(SB) // Call the trampoline.
+ POPQ AX // Pop vCPU.
+
+ // We only trigger a bluepill entry in the bluepill function, and can
+ // therefore be guaranteed that there is no floating point state to be
+ // loaded on resuming from halt.
JMP ·resume(SB)
ADDR_OF_FUNC(·addrOfSysenter(SB), ·sysenter(SB));
@@ -416,15 +543,43 @@ kernel:
MOVQ 8(SP), BX // Load the error code.
MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU.
MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
- MOVQ 0(SP), BX // BX contains the vector.
+
+ // Save floating point state. CPU.floatingPointState is a slice, so the
+ // first word of CPU.floatingPointState is a pointer to the destination
+ // array.
+ MOVQ CPU_FPU_STATE(AX), DI
+ MOVB CPU_HAS_XSAVE(AX), BX
+ MOVB CPU_HAS_XSAVEOPT(AX), CX
+ TESTB BX, BX
+ JZ no_xsave
+ // Use xsave/xsaveopt to save all extended state.
+ // We save everything unconditionally by setting RFBM to all 1's.
+ MOVL $0xffffffff, AX
+ MOVL $0xffffffff, DX
+ TESTB CX, CX
+ JZ no_xsaveopt
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37; // XSAVEOPT64 0(DI)
+ JMP fpsave_done
+no_xsaveopt:
+ BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27; // XSAVE64 0(DI)
+ JMP fpsave_done
+no_xsave:
+ FXSAVE64 0(DI)
+fpsave_done:
// Call the exception trampoline.
+ MOVQ 0(SP), BX // BX contains the vector.
LOAD_KERNEL_STACK(GS)
- PUSHQ BX // Second argument (vector).
- PUSHQ AX // First argument (vCPU).
- CALL ·kernelException(SB) // Call the trampoline.
- POPQ BX // Pop vector.
- POPQ AX // Pop vCPU.
+ MOVQ ENTRY_CPU_SELF(GS), AX // AX contains the vCPU.
+ PUSHQ BX // Second argument (vector).
+ PUSHQ AX // First argument (vCPU).
+ CALL ·kernelException(SB) // Call the trampoline.
+ POPQ BX // Pop vector.
+ POPQ AX // Pop vCPU.
+
+ // We only trigger a bluepill entry in the bluepill function, and can
+ // therefore be guaranteed that there is no floating point state to be
+ // loaded on resuming from halt.
JMP ·resume(SB)
#define EXCEPTION_WITH_ERROR(value, symbol, addr) \