From 9cae407b270b20ff78370d33602191d37ddf5530 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 26 Mar 2020 09:23:23 +0000 Subject: amd64: implement KPTI for gvisor Actually, gvisor has KPTI (Kernel PageTable Isolation) between gr0 and gr3. But the upper half of the userCR3 contains the whole sentry kernel which makes the kernel vulnerable to gr3 APP through CPU bugs. This patch implement full KPTI functionality for gvisor. It doesn't map the whole kernel in the upper. It maps only the text section of the binary and the entry area required by the ISA. The entry area contains the global idt, the percpu gdt/tss etc. The entry area packs all these together which is less than 350k for 512 vCPUs. The text section is normally nonsensitive. It is possible to map only the entry functions (interrupt handler etc.) only. But it requires some hacks. Signed-off-by: Lai Jiangshan Signed-off-by: Lai Jiangshan --- pkg/sentry/platform/ring0/kernel.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'pkg/sentry/platform/ring0/kernel.go') diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/sentry/platform/ring0/kernel.go index 021693791..264be23d3 100644 --- a/pkg/sentry/platform/ring0/kernel.go +++ b/pkg/sentry/platform/ring0/kernel.go @@ -19,8 +19,8 @@ package ring0 // N.B. that constraints on KernelOpts must be satisfied. // //go:nosplit -func (k *Kernel) Init(opts KernelOpts) { - k.init(opts) +func (k *Kernel) Init(opts KernelOpts, maxCPUs int) { + k.init(opts, maxCPUs) } // Halt halts execution. @@ -49,6 +49,11 @@ func (defaultHooks) KernelException(Vector) { // kernelSyscall is a trampoline. // +// When in amd64, it is called with %rip on the upper half, so it can +// NOT access to any global data which is not mapped on upper and must +// call to function pointers or interfaces to switch to the lower half +// so that callee can access to global data. +// // +checkescape:hard,stack // //go:nosplit @@ -58,6 +63,11 @@ func kernelSyscall(c *CPU) { // kernelException is a trampoline. // +// When in amd64, it is called with %rip on the upper half, so it can +// NOT access to any global data which is not mapped on upper and must +// call to function pointers or interfaces to switch to the lower half +// so that callee can access to global data. +// // +checkescape:hard,stack // //go:nosplit @@ -68,10 +78,10 @@ func kernelException(c *CPU, vector Vector) { // Init initializes a new CPU. // // Init allows embedding in other objects. -func (c *CPU) Init(k *Kernel, hooks Hooks) { - c.self = c // Set self reference. - c.kernel = k // Set kernel reference. - c.init() // Perform architectural init. +func (c *CPU) Init(k *Kernel, cpuID int, hooks Hooks) { + c.self = c // Set self reference. + c.kernel = k // Set kernel reference. + c.init(cpuID) // Perform architectural init. // Require hooks. if hooks != nil { -- cgit v1.2.3