From d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 Mon Sep 17 00:00:00 2001
From: Googler <noreply@google.com>
Date: Fri, 27 Apr 2018 10:37:02 -0700
Subject: Check in gVisor.

PiperOrigin-RevId: 194583126
Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
---
 pkg/sentry/platform/ring0/BUILD                    |  52 +++
 pkg/sentry/platform/ring0/defs.go                  |  93 +++++
 pkg/sentry/platform/ring0/defs_amd64.go            | 113 ++++++
 pkg/sentry/platform/ring0/entry_amd64.go           | 128 +++++++
 pkg/sentry/platform/ring0/entry_amd64.s            | 334 +++++++++++++++++
 pkg/sentry/platform/ring0/gen_offsets/BUILD        |  25 ++
 pkg/sentry/platform/ring0/gen_offsets/main.go      |  24 ++
 pkg/sentry/platform/ring0/kernel.go                |  71 ++++
 pkg/sentry/platform/ring0/kernel_amd64.go          | 280 +++++++++++++++
 pkg/sentry/platform/ring0/kernel_unsafe.go         |  41 +++
 pkg/sentry/platform/ring0/lib_amd64.go             | 128 +++++++
 pkg/sentry/platform/ring0/lib_amd64.s              | 247 +++++++++++++
 pkg/sentry/platform/ring0/offsets_amd64.go         |  93 +++++
 pkg/sentry/platform/ring0/pagetables/BUILD         |  32 ++
 pkg/sentry/platform/ring0/pagetables/pagetables.go | 193 ++++++++++
 .../platform/ring0/pagetables/pagetables_amd64.go  | 397 +++++++++++++++++++++
 .../platform/ring0/pagetables/pagetables_test.go   | 161 +++++++++
 .../platform/ring0/pagetables/pagetables_unsafe.go |  31 ++
 .../platform/ring0/pagetables/pagetables_x86.go    |  79 ++++
 .../ring0/pagetables/pagetables_x86_test.go        |  79 ++++
 pkg/sentry/platform/ring0/pagetables/pcids_x86.go  |  74 ++++
 .../platform/ring0/pagetables/pcids_x86_test.go    |  65 ++++
 pkg/sentry/platform/ring0/ring0.go                 |  16 +
 pkg/sentry/platform/ring0/x86.go                   | 242 +++++++++++++
 24 files changed, 2998 insertions(+)
 create mode 100644 pkg/sentry/platform/ring0/BUILD
 create mode 100644 pkg/sentry/platform/ring0/defs.go
 create mode 100644 pkg/sentry/platform/ring0/defs_amd64.go
 create mode 100644 pkg/sentry/platform/ring0/entry_amd64.go
 create mode 100644 pkg/sentry/platform/ring0/entry_amd64.s
 create mode 100644 pkg/sentry/platform/ring0/gen_offsets/BUILD
 create mode 100644 pkg/sentry/platform/ring0/gen_offsets/main.go
 create mode 100644 pkg/sentry/platform/ring0/kernel.go
 create mode 100644 pkg/sentry/platform/ring0/kernel_amd64.go
 create mode 100644 pkg/sentry/platform/ring0/kernel_unsafe.go
 create mode 100644 pkg/sentry/platform/ring0/lib_amd64.go
 create mode 100644 pkg/sentry/platform/ring0/lib_amd64.s
 create mode 100644 pkg/sentry/platform/ring0/offsets_amd64.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/BUILD
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pagetables.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pagetables_test.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pcids_x86.go
 create mode 100644 pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
 create mode 100644 pkg/sentry/platform/ring0/ring0.go
 create mode 100644 pkg/sentry/platform/ring0/x86.go

(limited to 'pkg/sentry/platform/ring0')

diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD
new file mode 100644
index 000000000..2df232a64
--- /dev/null
+++ b/pkg/sentry/platform/ring0/BUILD
@@ -0,0 +1,52 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance")
+
+go_template(
+    name = "defs",
+    srcs = [
+        "defs.go",
+        "defs_amd64.go",
+        "offsets_amd64.go",
+        "x86.go",
+    ],
+    visibility = [":__subpackages__"],
+)
+
+go_template_instance(
+    name = "defs_impl",
+    out = "defs_impl.go",
+    package = "ring0",
+    template = ":defs",
+)
+
+genrule(
+    name = "entry_impl_amd64",
+    srcs = ["entry_amd64.s"],
+    outs = ["entry_impl_amd64.s"],
+    cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@",
+    tools = ["//pkg/sentry/platform/ring0/gen_offsets"],
+)
+
+go_library(
+    name = "ring0",
+    srcs = [
+        "defs_impl.go",
+        "entry_amd64.go",
+        "entry_impl_amd64.s",
+        "kernel.go",
+        "kernel_amd64.go",
+        "kernel_unsafe.go",
+        "lib_amd64.go",
+        "lib_amd64.s",
+        "ring0.go",
+    ],
+    importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0",
+    visibility = ["//pkg/sentry:internal"],
+    deps = [
+        "//pkg/cpuid",
+        "//pkg/sentry/platform/ring0/pagetables",
+        "//pkg/sentry/usermem",
+    ],
+)
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
new file mode 100644
index 000000000..9d947b73d
--- /dev/null
+++ b/pkg/sentry/platform/ring0/defs.go
@@ -0,0 +1,93 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ring0
+
+import (
+	"syscall"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+var (
+	// UserspaceSize is the total size of userspace.
+	UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1)
+
+	// MaximumUserAddress is the largest possible user address.
+	MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
+
+	// KernelStartAddress is the starting kernel address.
+	KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
+)
+
+// Kernel is a global kernel object.
+//
+// This contains global state, shared by multiple CPUs.
+type Kernel struct {
+	KernelArchState
+}
+
+// CPU is the per-CPU struct.
+type CPU struct {
+	// self is a self reference.
+	//
+	// This is always guaranteed to be at offset zero.
+	self *CPU
+
+	// kernel is reference to the kernel that this CPU was initialized
+	// with. This reference is kept for garbage collection purposes: CPU
+	// registers may refer to objects within the Kernel object that cannot
+	// be safely freed.
+	kernel *Kernel
+
+	// CPUArchState is architecture-specific state.
+	CPUArchState
+
+	// registers is a set of registers; these may be used on kernel system
+	// calls and exceptions via the Registers function.
+	registers syscall.PtraceRegs
+
+	// KernelException handles an exception during kernel execution.
+	//
+	// Return from this call will restore registers and return to the kernel: the
+	// registers must be modified directly.
+	//
+	// If this function is not provided, a kernel exception results in halt.
+	//
+	// This must be go:nosplit, as this will be on the interrupt stack.
+	// Closures are permitted, as the pointer to the closure frame is not
+	// passed on the stack.
+	KernelException func(Vector)
+
+	// KernelSyscall is called for kernel system calls.
+	//
+	// Return from this call will restore registers and return to the kernel: the
+	// registers must be modified directly.
+	//
+	// If this function is not provided, a kernel exception results in halt.
+	//
+	// This must be go:nosplit, as this will be on the interrupt stack.
+	// Closures are permitted, as the pointer to the closure frame is not
+	// passed on the stack.
+	KernelSyscall func()
+}
+
+// Registers returns a modifiable-copy of the kernel registers.
+//
+// This is explicitly safe to call during KernelException and KernelSyscall.
+//
+//go:nosplit
+func (c *CPU) Registers() *syscall.PtraceRegs {
+	return &c.registers
+}
diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go
new file mode 100644
index 000000000..bb3420125
--- /dev/null
+++ b/pkg/sentry/platform/ring0/defs_amd64.go
@@ -0,0 +1,113 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package ring0
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
+)
+
+// Segment indices and Selectors.
+const (
+	// Index into GDT array.
+	_          = iota // Null descriptor first.
+	_                 // Reserved (Linux is kernel 32).
+	segKcode          // Kernel code (64-bit).
+	segKdata          // Kernel data.
+	segUcode32        // User code (32-bit).
+	segUdata          // User data.
+	segUcode64        // User code (64-bit).
+	segTss            // Task segment descriptor.
+	segTssHi          // Upper bits for TSS.
+	segLast           // Last segment (terminal, not included).
+)
+
+// Selectors.
+const (
+	Kcode   Selector = segKcode << 3
+	Kdata   Selector = segKdata << 3
+	Ucode32 Selector = (segUcode32 << 3) | 3
+	Udata   Selector = (segUdata << 3) | 3
+	Ucode64 Selector = (segUcode64 << 3) | 3
+	Tss     Selector = segTss << 3
+)
+
+// Standard segments.
+var (
+	UserCodeSegment32 SegmentDescriptor
+	UserDataSegment   SegmentDescriptor
+	UserCodeSegment64 SegmentDescriptor
+	KernelCodeSegment SegmentDescriptor
+	KernelDataSegment SegmentDescriptor
+)
+
+// KernelOpts has initialization options for the kernel.
+type KernelOpts struct {
+	// PageTables are the kernel pagetables; this must be provided.
+	PageTables *pagetables.PageTables
+}
+
+// KernelArchState contains architecture-specific state.
+type KernelArchState struct {
+	KernelOpts
+
+	// globalIDT is our set of interrupt gates.
+	globalIDT idt64
+}
+
+// CPUArchState contains CPU-specific arch state.
+type CPUArchState struct {
+	// stack is the stack used for interrupts on this CPU.
+	stack [256]byte
+
+	// errorCode is the error code from the last exception.
+	errorCode uintptr
+
+	// errorType indicates the type of error code here, it is always set
+	// along with the errorCode value above.
+	//
+	// It will either by 1, which indicates a user error, or 0 indicating a
+	// kernel error. If the error code below returns false (kernel error),
+	// then it cannot provide relevant information about the last
+	// exception.
+	errorType uintptr
+
+	// gdt is the CPU's descriptor table.
+	gdt descriptorTable
+
+	// tss is the CPU's task state.
+	tss TaskState64
+}
+
+// ErrorCode returns the last error code.
+//
+// The returned boolean indicates whether the error code corresponds to the
+// last user error or not. If it does not, then fault information must be
+// ignored. This is generally the result of a kernel fault while servicing a
+// user fault.
+//
+//go:nosplit
+func (c *CPU) ErrorCode() (value uintptr, user bool) {
+	return c.errorCode, c.errorType != 0
+}
+
+func init() {
+	KernelCodeSegment.setCode64(0, 0, 0)
+	KernelDataSegment.setData(0, 0xffffffff, 0)
+	UserCodeSegment32.setCode64(0, 0, 3)
+	UserDataSegment.setData(0, 0xffffffff, 3)
+	UserCodeSegment64.setCode64(0, 0, 3)
+}
diff --git a/pkg/sentry/platform/ring0/entry_amd64.go b/pkg/sentry/platform/ring0/entry_amd64.go
new file mode 100644
index 000000000..a3e992e0d
--- /dev/null
+++ b/pkg/sentry/platform/ring0/entry_amd64.go
@@ -0,0 +1,128 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package ring0
+
+import (
+	"syscall"
+)
+
+// This is an assembly function.
+//
+// The sysenter function is invoked in two situations:
+//
+//  (1) The guest kernel has executed a system call.
+//  (2) The guest application has executed a system call.
+//
+// The interrupt flag is examined to determine whether the system call was
+// executed from kernel mode or not and the appropriate stub is called.
+func sysenter()
+
+// swapgs swaps the current GS value.
+//
+// This must be called prior to sysret/iret.
+func swapgs()
+
+// sysret returns to userspace from a system call.
+//
+// The return code is the vector that interrupted execution.
+//
+// See stubs.go for a note regarding the frame size of this function.
+func sysret(*CPU, *syscall.PtraceRegs) Vector
+
+// "iret is the cadillac of CPL switching."
+//
+//				-- Neel Natu
+//
+// iret is nearly identical to sysret, except an iret is used to fully restore
+// all user state. This must be called in cases where all registers need to be
+// restored.
+func iret(*CPU, *syscall.PtraceRegs) Vector
+
+// exception is the generic exception entry.
+//
+// This is called by the individual stub definitions.
+func exception()
+
+// resume is a stub that restores the CPU kernel registers.
+//
+// This is used when processing kernel exceptions and syscalls.
+func resume()
+
+// Start is the CPU entrypoint.
+//
+// The following start conditions must be satisfied:
+//
+//  * AX should contain the CPU pointer.
+//  * c.GDT() should be loaded as the GDT.
+//  * c.IDT() should be loaded as the IDT.
+//  * c.CR0() should be the current CR0 value.
+//  * c.CR3() should be set to the kernel PageTables.
+//  * c.CR4() should be the current CR4 value.
+//  * c.EFER() should be the current EFER value.
+//
+// The CPU state will be set to c.Registers().
+func Start()
+
+// Exception stubs.
+func divideByZero()
+func debug()
+func nmi()
+func breakpoint()
+func overflow()
+func boundRangeExceeded()
+func invalidOpcode()
+func deviceNotAvailable()
+func doubleFault()
+func coprocessorSegmentOverrun()
+func invalidTSS()
+func segmentNotPresent()
+func stackSegmentFault()
+func generalProtectionFault()
+func pageFault()
+func x87FloatingPointException()
+func alignmentCheck()
+func machineCheck()
+func simdFloatingPointException()
+func virtualizationException()
+func securityException()
+func syscallInt80()
+
+// Exception handler index.
+var handlers = map[Vector]func(){
+	DivideByZero:               divideByZero,
+	Debug:                      debug,
+	NMI:                        nmi,
+	Breakpoint:                 breakpoint,
+	Overflow:                   overflow,
+	BoundRangeExceeded:         boundRangeExceeded,
+	InvalidOpcode:              invalidOpcode,
+	DeviceNotAvailable:         deviceNotAvailable,
+	DoubleFault:                doubleFault,
+	CoprocessorSegmentOverrun:  coprocessorSegmentOverrun,
+	InvalidTSS:                 invalidTSS,
+	SegmentNotPresent:          segmentNotPresent,
+	StackSegmentFault:          stackSegmentFault,
+	GeneralProtectionFault:     generalProtectionFault,
+	PageFault:                  pageFault,
+	X87FloatingPointException:  x87FloatingPointException,
+	AlignmentCheck:             alignmentCheck,
+	MachineCheck:               machineCheck,
+	SIMDFloatingPointException: simdFloatingPointException,
+	VirtualizationException:    virtualizationException,
+	SecurityException:          securityException,
+	SyscallInt80:               syscallInt80,
+}
diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s
new file mode 100644
index 000000000..e8638133b
--- /dev/null
+++ b/pkg/sentry/platform/ring0/entry_amd64.s
@@ -0,0 +1,334 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "funcdata.h"
+#include "textflag.h"
+
+// NB: Offsets are programatically generated (see BUILD).
+//
+// This file is concatenated with the definitions.
+
+// Saves a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+// The following registers are not saved: AX, SP, IP, FLAGS, all segments.
+#define REGISTERS_SAVE(reg, offset) \
+  MOVQ R15, offset+PTRACE_R15(reg); \
+  MOVQ R14, offset+PTRACE_R14(reg); \
+  MOVQ R13, offset+PTRACE_R13(reg); \
+  MOVQ R12, offset+PTRACE_R12(reg); \
+  MOVQ BP,  offset+PTRACE_RBP(reg); \
+  MOVQ BX,  offset+PTRACE_RBX(reg); \
+  MOVQ CX,  offset+PTRACE_RCX(reg); \
+  MOVQ DX,  offset+PTRACE_RDX(reg); \
+  MOVQ R11, offset+PTRACE_R11(reg); \
+  MOVQ R10, offset+PTRACE_R10(reg); \
+  MOVQ R9,  offset+PTRACE_R9(reg); \
+  MOVQ R8,  offset+PTRACE_R8(reg); \
+  MOVQ SI,  offset+PTRACE_RSI(reg); \
+  MOVQ DI,  offset+PTRACE_RDI(reg);
+
+// Loads a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+// The following registers are not loaded: AX, SP, IP, FLAGS, all segments.
+#define REGISTERS_LOAD(reg, offset) \
+  MOVQ offset+PTRACE_R15(reg), R15; \
+  MOVQ offset+PTRACE_R14(reg), R14; \
+  MOVQ offset+PTRACE_R13(reg), R13; \
+  MOVQ offset+PTRACE_R12(reg), R12; \
+  MOVQ offset+PTRACE_RBP(reg), BP; \
+  MOVQ offset+PTRACE_RBX(reg), BX; \
+  MOVQ offset+PTRACE_RCX(reg), CX; \
+  MOVQ offset+PTRACE_RDX(reg), DX; \
+  MOVQ offset+PTRACE_R11(reg), R11; \
+  MOVQ offset+PTRACE_R10(reg), R10; \
+  MOVQ offset+PTRACE_R9(reg),  R9; \
+  MOVQ offset+PTRACE_R8(reg),  R8; \
+  MOVQ offset+PTRACE_RSI(reg), SI; \
+  MOVQ offset+PTRACE_RDI(reg), DI;
+
+// SWAP_GS swaps the kernel GS (CPU).
+#define SWAP_GS() \
+	BYTE $0x0F; BYTE $0x01; BYTE $0xf8;
+
+// IRET returns from an interrupt frame.
+#define IRET() \
+	BYTE $0x48; BYTE $0xcf;
+
+// SYSRET64 executes the sysret instruction.
+#define SYSRET64() \
+	BYTE $0x48; BYTE $0x0f; BYTE $0x07;
+
+// LOAD_KERNEL_ADDRESS loads a kernel address.
+#define LOAD_KERNEL_ADDRESS(from, to) \
+	MOVQ from, to; \
+	ORQ ·KernelStartAddress(SB), to;
+
+// LOAD_KERNEL_STACK loads the kernel stack.
+#define LOAD_KERNEL_STACK(from) \
+	LOAD_KERNEL_ADDRESS(CPU_SELF(from), SP); \
+	LEAQ CPU_STACK_TOP(SP), SP;
+
+// See kernel.go.
+TEXT ·Halt(SB),NOSPLIT,$0
+	HLT
+	RET
+
+// See kernel.go.
+TEXT ·Current(SB),NOSPLIT,$0-8
+	MOVQ CPU_SELF(GS), AX
+	MOVQ AX, ret+0(FP)
+	RET
+
+// See entry_amd64.go.
+TEXT ·swapgs(SB),NOSPLIT,$0
+	SWAP_GS()
+	RET
+
+// See entry_amd64.go.
+TEXT ·sysret(SB),NOSPLIT,$0-24
+	// Save original state.
+	LOAD_KERNEL_ADDRESS(cpu+0(FP), BX)
+	LOAD_KERNEL_ADDRESS(regs+8(FP), AX)
+	MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
+	MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
+	MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
+
+	// Restore user register state.
+	REGISTERS_LOAD(AX, 0)
+	MOVQ PTRACE_RIP(AX), CX    // Needed for SYSRET.
+	MOVQ PTRACE_FLAGS(AX), R11 // Needed for SYSRET.
+	MOVQ PTRACE_RSP(AX), SP    // Restore the stack directly.
+	MOVQ PTRACE_RAX(AX), AX    // Restore AX (scratch).
+	SYSRET64()
+
+// See entry_amd64.go.
+TEXT ·iret(SB),NOSPLIT,$0-24
+	// Save original state.
+	LOAD_KERNEL_ADDRESS(cpu+0(FP), BX)
+	LOAD_KERNEL_ADDRESS(regs+8(FP), AX)
+	MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
+	MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
+	MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
+
+	// Build an IRET frame & restore state.
+	LOAD_KERNEL_STACK(BX)
+	MOVQ PTRACE_SS(AX), BX;    PUSHQ BX
+	MOVQ PTRACE_RSP(AX), CX;   PUSHQ CX
+	MOVQ PTRACE_FLAGS(AX), DX; PUSHQ DX
+	MOVQ PTRACE_CS(AX), DI;    PUSHQ DI
+	MOVQ PTRACE_RIP(AX), SI;   PUSHQ SI
+	REGISTERS_LOAD(AX, 0)   // Restore most registers.
+	MOVQ PTRACE_RAX(AX), AX // Restore AX (scratch).
+	IRET()
+
+// See entry_amd64.go.
+TEXT ·resume(SB),NOSPLIT,$0
+	// See iret, above.
+	MOVQ CPU_REGISTERS+PTRACE_SS(GS), BX;    PUSHQ BX
+	MOVQ CPU_REGISTERS+PTRACE_RSP(GS), CX;   PUSHQ CX
+	MOVQ CPU_REGISTERS+PTRACE_FLAGS(GS), DX; PUSHQ DX
+	MOVQ CPU_REGISTERS+PTRACE_CS(GS), DI;    PUSHQ DI
+	MOVQ CPU_REGISTERS+PTRACE_RIP(GS), SI;   PUSHQ SI
+	REGISTERS_LOAD(GS, CPU_REGISTERS)
+	MOVQ CPU_REGISTERS+PTRACE_RAX(GS), AX
+	IRET()
+
+// See entry_amd64.go.
+TEXT ·Start(SB),NOSPLIT,$0
+	LOAD_KERNEL_STACK(AX) // Set the stack.
+	PUSHQ $0x0            // Previous frame pointer.
+	MOVQ SP, BP           // Set frame pointer.
+	PUSHQ AX              // First argument (CPU).
+	CALL ·start(SB)       // Call Go hook.
+	JMP ·resume(SB)       // Restore to registers.
+
+// See entry_amd64.go.
+TEXT ·sysenter(SB),NOSPLIT,$0
+	// Interrupts are always disabled while we're executing in kernel mode
+	// and always enabled while executing in user mode. Therefore, we can
+	// reliably look at the flags in R11 to determine where this syscall
+	// was from.
+	TESTL $_RFLAGS_IF, R11
+	JZ kernel
+
+user:
+	SWAP_GS()
+	XCHGQ CPU_REGISTERS+PTRACE_RSP(GS), SP // Swap stacks.
+	XCHGQ CPU_REGISTERS+PTRACE_RAX(GS), AX // Swap for AX (regs).
+	REGISTERS_SAVE(AX, 0)                  // Save all except IP, FLAGS, SP, AX.
+	MOVQ CPU_REGISTERS+PTRACE_RAX(GS), BX  // Load saved AX value.
+	MOVQ BX,  PTRACE_RAX(AX)               // Save everything else.
+	MOVQ BX,  PTRACE_ORIGRAX(AX)
+	MOVQ CX,  PTRACE_RIP(AX)
+	MOVQ R11, PTRACE_FLAGS(AX)
+	MOVQ CPU_REGISTERS+PTRACE_RSP(GS), BX; MOVQ BX, PTRACE_RSP(AX)
+	MOVQ $0, CPU_ERROR_CODE(GS) // Clear error code.
+	MOVQ $1, CPU_ERROR_TYPE(GS) // Set error type to user.
+
+	// Return to the kernel, where the frame is:
+	//
+	//	vector      (sp+24)
+	// 	regs        (sp+16)
+	// 	cpu         (sp+8)
+	// 	vcpu.Switch (sp+0)
+	//
+	MOVQ CPU_REGISTERS+PTRACE_RBP(GS), BP // Original base pointer.
+	MOVQ $Syscall, 24(SP)                 // Output vector.
+	RET
+
+kernel:
+	// We can't restore the original stack, but we can access the registers
+	// in the CPU state directly. No need for temporary juggling.
+	MOVQ AX,  CPU_REGISTERS+PTRACE_ORIGRAX(GS)
+	MOVQ AX,  CPU_REGISTERS+PTRACE_RAX(GS)
+	REGISTERS_SAVE(GS, CPU_REGISTERS)
+	MOVQ CX,  CPU_REGISTERS+PTRACE_RIP(GS)
+	MOVQ R11, CPU_REGISTERS+PTRACE_FLAGS(GS)
+	MOVQ SP,  CPU_REGISTERS+PTRACE_RSP(GS)
+	MOVQ $0, CPU_ERROR_CODE(GS) // Clear error code.
+	MOVQ $0, CPU_ERROR_TYPE(GS) // Set error type to kernel.
+
+	// Load the function stored in KernelSyscall.
+	//
+	// Note that this function needs to be executed on the stack in case
+	// the runtime decides to make use of the redzone (grumble). This also
+	// protects against any functions that might not be go:nosplit, since
+	// this will cause a failure immediately.
+	LOAD_KERNEL_STACK(GS)
+	MOVQ CPU_KERNEL_SYSCALL(GS), DX // Function data.
+	MOVQ 0(DX), AX                  // Function pointer.
+	PUSHQ BP                        // Push the frame pointer.
+	MOVQ SP, BP                     // Set frame pointer value.
+	CALL *AX                        // Call the function.
+	POPQ BP                         // Restore the frame pointer.
+	JMP ·resume(SB)
+
+// exception is a generic exception handler.
+//
+// There are two cases handled:
+//
+// 1) An exception in kernel mode: this results in saving the state at the time
+// of the exception and calling the defined hook.
+//
+// 2) An exception in guest mode: the original kernel frame is restored, and
+// the vector & error codes are pushed as return values.
+//
+// See below for the stubs that call exception.
+TEXT ·exception(SB),NOSPLIT,$0
+	// Determine whether the exception occurred in kernel mode or user
+	// mode, based on the flags. We expect the following stack:
+	//
+	//	SS          (sp+48)
+	//	SP          (sp+40)
+	//	FLAGS       (sp+32)
+	//	CS          (sp+24)
+	//	IP          (sp+16)
+	//	ERROR_CODE  (sp+8)
+	//	VECTOR      (sp+0)
+	//
+	TESTL $_RFLAGS_IF, 32(SP)
+	JZ kernel
+
+user:
+	SWAP_GS()
+	XCHGQ CPU_REGISTERS+PTRACE_RAX(GS), AX // Swap for AX (regs).
+	REGISTERS_SAVE(AX, 0)                  // Save all except IP, FLAGS, SP, AX.
+	MOVQ CPU_REGISTERS+PTRACE_RAX(GS), BX  // Load saved AX value.
+	MOVQ BX, PTRACE_RAX(AX)                // Save everything else.
+	MOVQ BX, PTRACE_ORIGRAX(AX)
+	MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX)
+	MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX)
+	MOVQ 32(SP), DX; MOVQ DX, PTRACE_FLAGS(AX)
+	MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX)
+	MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX)
+
+	// Copy out and return.
+	MOVQ 0(SP), BX                        // Load vector.
+	MOVQ 8(SP), CX                        // Load error code.
+	MOVQ CPU_REGISTERS+PTRACE_RSP(GS), SP // Original stack (kernel version).
+	MOVQ CPU_REGISTERS+PTRACE_RBP(GS), BP // Original base pointer.
+	MOVQ CX, CPU_ERROR_CODE(GS)           // Set error code.
+	MOVQ $1, CPU_ERROR_TYPE(GS)           // Set error type to user.
+	MOVQ BX, 24(SP)                       // Output vector.
+	RET
+
+kernel:
+	// As per above, we can save directly.
+	MOVQ AX, CPU_REGISTERS+PTRACE_RAX(GS)
+	MOVQ AX, CPU_REGISTERS+PTRACE_ORIGRAX(GS)
+	REGISTERS_SAVE(GS, CPU_REGISTERS)
+	MOVQ 16(SP), AX; MOVQ AX, CPU_REGISTERS+PTRACE_RIP(GS)
+	MOVQ 32(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_FLAGS(GS)
+	MOVQ 40(SP), CX; MOVQ CX, CPU_REGISTERS+PTRACE_RSP(GS)
+
+	// Set the error code and adjust the stack.
+	MOVQ 8(SP), AX              // Load the error code.
+	MOVQ AX, CPU_ERROR_CODE(GS) // Copy out to the CPU.
+	MOVQ $0, CPU_ERROR_TYPE(GS) // Set error type to kernel.
+	MOVQ 0(SP), BX              // BX contains the vector.
+	ADDQ $48, SP                // Drop the exception frame.
+
+	// Load the function stored in KernelException.
+	//
+	// See note above re: the kernel stack.
+	LOAD_KERNEL_STACK(GS)
+	MOVQ CPU_KERNEL_EXCEPTION(GS), DX // Function data.
+	MOVQ 0(DX), AX                    // Function pointer.
+	PUSHQ BP                          // Push the frame pointer.
+	MOVQ SP, BP                       // Set frame pointer value.
+	PUSHQ BX                          // First argument (vector).
+	CALL *AX                          // Call the function.
+	POPQ BX                           // Discard the argument.
+	POPQ BP                           // Restore the frame pointer.
+	JMP ·resume(SB)
+
+#define EXCEPTION_WITH_ERROR(value, symbol) \
+TEXT symbol,NOSPLIT,$0; \
+	PUSHQ $value; \
+	JMP ·exception(SB);
+
+#define EXCEPTION_WITHOUT_ERROR(value, symbol) \
+TEXT symbol,NOSPLIT,$0; \
+	PUSHQ $0x0; \
+	PUSHQ $value; \
+	JMP ·exception(SB);
+
+EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB))
+EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB))
+EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB))
+EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB))
+EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB))
+EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB))
+EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB))
+EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB))
+EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB))
+EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB))
+EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB))
+EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB))
+EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB))
+EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB))
+EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB))
+EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB))
+EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB))
+EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB))
+EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB))
+EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB))
+EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB))
+EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB))
diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD
new file mode 100644
index 000000000..3bce56985
--- /dev/null
+++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD
@@ -0,0 +1,25 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_binary")
+load("//tools/go_generics:defs.bzl", "go_template_instance")
+
+go_template_instance(
+    name = "defs_impl",
+    out = "defs_impl.go",
+    package = "main",
+    template = "//pkg/sentry/platform/ring0:defs",
+)
+
+go_binary(
+    name = "gen_offsets",
+    srcs = [
+        "defs_impl.go",
+        "main.go",
+    ],
+    visibility = ["//pkg/sentry/platform/ring0:__pkg__"],
+    deps = [
+        "//pkg/cpuid",
+        "//pkg/sentry/platform/ring0/pagetables",
+        "//pkg/sentry/usermem",
+    ],
+)
diff --git a/pkg/sentry/platform/ring0/gen_offsets/main.go b/pkg/sentry/platform/ring0/gen_offsets/main.go
new file mode 100644
index 000000000..ffa7eaf77
--- /dev/null
+++ b/pkg/sentry/platform/ring0/gen_offsets/main.go
@@ -0,0 +1,24 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Binary gen_offsets is a helper for generating offset headers.
+package main
+
+import (
+	"os"
+)
+
+func main() {
+	Emit(os.Stdout)
+}
diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/sentry/platform/ring0/kernel.go
new file mode 100644
index 000000000..b0471ab9a
--- /dev/null
+++ b/pkg/sentry/platform/ring0/kernel.go
@@ -0,0 +1,71 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ring0
+
+// New creates a new kernel.
+//
+// N.B. that constraints on KernelOpts must be satisfied.
+//
+// Init must have been called.
+func New(opts KernelOpts) *Kernel {
+	k := new(Kernel)
+	k.init(opts)
+	return k
+}
+
+// NewCPU creates a new CPU associated with this Kernel.
+//
+// Note that execution of the new CPU must begin at Start, with constraints as
+// documented. Initialization is not completed by this method alone.
+//
+// See also Init.
+func (k *Kernel) NewCPU() *CPU {
+	c := new(CPU)
+	c.Init(k)
+	return c
+}
+
+// Halt halts execution.
+func Halt()
+
+// Current returns the current CPU.
+//
+// Its use is only legal in the KernelSyscall and KernelException contexts,
+// which must all be guarded go:nosplit.
+func Current() *CPU
+
+// defaultSyscall is the default syscall hook.
+//
+//go:nosplit
+func defaultSyscall() { Halt() }
+
+// defaultException is the default exception hook.
+//
+//go:nosplit
+func defaultException(Vector) { Halt() }
+
+// Init allows the initialization of a CPU from a kernel without allocation.
+// The same constraints as NewCPU apply.
+//
+// Init allows embedding in other objects.
+func (c *CPU) Init(k *Kernel) {
+	c.self = c   // Set self reference.
+	c.kernel = k // Set kernel reference.
+	c.init()     // Perform architectural init.
+
+	// Defaults.
+	c.KernelSyscall = defaultSyscall
+	c.KernelException = defaultException
+}
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
new file mode 100644
index 000000000..c82613a9c
--- /dev/null
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -0,0 +1,280 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package ring0
+
+import (
+	"encoding/binary"
+	"syscall"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
+)
+
+const (
+	// KernelFlagsSet should always be set in the kernel.
+	KernelFlagsSet = _RFLAGS_RESERVED
+
+	// UserFlagsSet are always set in userspace.
+	UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF
+
+	// KernelFlagsClear should always be clear in the kernel.
+	KernelFlagsClear = _RFLAGS_IF | _RFLAGS_NT | _RFLAGS_IOPL
+
+	// UserFlagsClear are always cleared in userspace.
+	UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL
+)
+
+// init initializes architecture-specific state.
+func (k *Kernel) init(opts KernelOpts) {
+	// Save the root page tables.
+	k.PageTables = opts.PageTables
+
+	// Setup the IDT, which is uniform.
+	for v, handler := range handlers {
+		// Note that we set all traps to use the interrupt stack, this
+		// is defined below when setting up the TSS.
+		k.globalIDT[v].setInterrupt(Kcode, uint64(kernelFunc(handler)), 0 /* dpl */, 1 /* ist */)
+	}
+}
+
+// init initializes architecture-specific state.
+func (c *CPU) init() {
+	// Null segment.
+	c.gdt[0].setNull()
+
+	// Kernel & user segments.
+	c.gdt[segKcode] = KernelCodeSegment
+	c.gdt[segKdata] = KernelDataSegment
+	c.gdt[segUcode32] = UserCodeSegment32
+	c.gdt[segUdata] = UserDataSegment
+	c.gdt[segUcode64] = UserCodeSegment64
+
+	// The task segment, this spans two entries.
+	tssBase, tssLimit, _ := c.TSS()
+	c.gdt[segTss].set(
+		uint32(tssBase),
+		uint32(tssLimit),
+		0, // Privilege level zero.
+		SegmentDescriptorPresent|
+			SegmentDescriptorAccess|
+			SegmentDescriptorWrite|
+			SegmentDescriptorExecute)
+	c.gdt[segTssHi].setHi(uint32((tssBase) >> 32))
+
+	// Set the kernel stack pointer in the TSS (virtual address).
+	stackAddr := c.StackTop()
+	c.tss.rsp0Lo = uint32(stackAddr)
+	c.tss.rsp0Hi = uint32(stackAddr >> 32)
+	c.tss.ist1Lo = uint32(stackAddr)
+	c.tss.ist1Hi = uint32(stackAddr >> 32)
+
+	// Permanently set the kernel segments.
+	c.registers.Cs = uint64(Kcode)
+	c.registers.Ds = uint64(Kdata)
+	c.registers.Es = uint64(Kdata)
+	c.registers.Ss = uint64(Kdata)
+	c.registers.Fs = uint64(Kdata)
+	c.registers.Gs = uint64(Kdata)
+}
+
+// StackTop returns the kernel's stack address.
+//
+//go:nosplit
+func (c *CPU) StackTop() uint64 {
+	return uint64(kernelAddr(&c.stack[0])) + uint64(len(c.stack))
+}
+
+// IDT returns the CPU's IDT base and limit.
+//
+//go:nosplit
+func (c *CPU) IDT() (uint64, uint16) {
+	return uint64(kernelAddr(&c.kernel.globalIDT[0])), uint16(binary.Size(&c.kernel.globalIDT) - 1)
+}
+
+// GDT returns the CPU's GDT base and limit.
+//
+//go:nosplit
+func (c *CPU) GDT() (uint64, uint16) {
+	return uint64(kernelAddr(&c.gdt[0])), uint16(8*segLast - 1)
+}
+
+// TSS returns the CPU's TSS base, limit and value.
+//
+//go:nosplit
+func (c *CPU) TSS() (uint64, uint16, *SegmentDescriptor) {
+	return uint64(kernelAddr(&c.tss)), uint16(binary.Size(&c.tss) - 1), &c.gdt[segTss]
+}
+
+// CR0 returns the CPU's CR0 value.
+//
+//go:nosplit
+func (c *CPU) CR0() uint64 {
+	return _CR0_PE | _CR0_PG | _CR0_ET
+}
+
+// CR4 returns the CPU's CR4 value.
+//
+//go:nosplit
+func (c *CPU) CR4() uint64 {
+	cr4 := uint64(_CR4_PAE | _CR4_PSE | _CR4_OSFXSR | _CR4_OSXMMEXCPT)
+	if hasPCID {
+		cr4 |= _CR4_PCIDE
+	}
+	if hasXSAVE {
+		cr4 |= _CR4_OSXSAVE
+	}
+	if hasSMEP {
+		cr4 |= _CR4_SMEP
+	}
+	if hasFSGSBASE {
+		cr4 |= _CR4_FSGSBASE
+	}
+	return cr4
+}
+
+// EFER returns the CPU's EFER value.
+//
+//go:nosplit
+func (c *CPU) EFER() uint64 {
+	return _EFER_LME | _EFER_SCE | _EFER_NX
+}
+
+// IsCanonical indicates whether addr is canonical per the amd64 spec.
+//
+//go:nosplit
+func IsCanonical(addr uint64) bool {
+	return addr <= 0x00007fffffffffff || addr > 0xffff800000000000
+}
+
+// Flags contains flags related to switch.
+type Flags uintptr
+
+const (
+	// FlagFull indicates that a full restore should be not, not a fast
+	// restore (on the syscall return path.)
+	FlagFull = 1 << iota
+
+	// FlagFlush indicates that a full TLB flush is required.
+	FlagFlush
+)
+
+// SwitchToUser performs either a sysret or an iret.
+//
+// The return value is the vector that interrupted execution.
+//
+// This function will not split the stack. Callers will probably want to call
+// runtime.entersyscall (and pair with a call to runtime.exitsyscall) prior to
+// calling this function.
+//
+// When this is done, this region is quite sensitive to things like system
+// calls. After calling entersyscall, any memory used must have been allocated
+// and no function calls without go:nosplit are permitted. Any calls made here
+// are protected appropriately (e.g. IsCanonical and CR3).
+//
+// Also note that this function transitively depends on the compiler generating
+// code that uses IP-relative addressing inside of absolute addresses. That's
+// the case for amd64, but may not be the case for other architectures.
+//
+//go:nosplit
+func (c *CPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetables.PageTables, flags Flags) (vector Vector) {
+	// Check for canonical addresses.
+	if !IsCanonical(regs.Rip) || !IsCanonical(regs.Rsp) || !IsCanonical(regs.Fs_base) || !IsCanonical(regs.Gs_base) {
+		return GeneralProtectionFault
+	}
+
+	var (
+		userCR3   uint64
+		kernelCR3 uint64
+	)
+
+	// Sanitize registers.
+	if flags&FlagFlush != 0 {
+		userCR3 = pt.FlushCR3()
+	} else {
+		userCR3 = pt.CR3()
+	}
+	regs.Eflags &= ^uint64(UserFlagsClear)
+	regs.Eflags |= UserFlagsSet
+	regs.Cs = uint64(Ucode64) // Required for iret.
+	regs.Ss = uint64(Udata)   // Ditto.
+	kernelCR3 = c.kernel.PageTables.CR3()
+
+	// Perform the switch.
+	swapgs()                    // GS will be swapped on return.
+	wrfs(uintptr(regs.Fs_base)) // Set application FS.
+	wrgs(uintptr(regs.Gs_base)) // Set application GS.
+	LoadFloatingPoint(fpState)  // Copy in floating point.
+	jumpToKernel()              // Switch to upper half.
+	writeCR3(uintptr(userCR3))  // Change to user address space.
+	if flags&FlagFull != 0 {
+		vector = iret(c, regs)
+	} else {
+		vector = sysret(c, regs)
+	}
+	writeCR3(uintptr(kernelCR3))       // Return to kernel address space.
+	jumpToUser()                       // Return to lower half.
+	SaveFloatingPoint(fpState)         // Copy out floating point.
+	wrfs(uintptr(c.registers.Fs_base)) // Restore kernel FS.
+	return
+}
+
+// start is the CPU entrypoint.
+//
+// This is called from the Start asm stub (see entry_amd64.go); on return the
+// registers in c.registers will be restored (not segments).
+//
+//go:nosplit
+func start(c *CPU) {
+	// Save per-cpu & FS segment.
+	wrgs(kernelAddr(c))
+	wrfs(uintptr(c.Registers().Fs_base))
+
+	// Initialize floating point.
+	//
+	// Note that on skylake, the valid XCR0 mask reported seems to be 0xff.
+	// This breaks down as:
+	//
+	//	bit0   - x87
+	//	bit1   - SSE
+	//	bit2   - AVX
+	//	bit3-4 - MPX
+	//	bit5-7 - AVX512
+	//
+	// For some reason, enabled MPX & AVX512 on platforms that report them
+	// seems to be cause a general protection fault. (Maybe there are some
+	// virtualization issues and these aren't exported to the guest cpuid.)
+	// This needs further investigation, but we can limit the floating
+	// point operations to x87, SSE & AVX for now.
+	fninit()
+	xsetbv(0, validXCR0Mask&0x7)
+
+	// Set the syscall target.
+	wrmsr(_MSR_LSTAR, kernelFunc(sysenter))
+	wrmsr(_MSR_SYSCALL_MASK, _RFLAGS_STEP|_RFLAGS_IF|_RFLAGS_DF|_RFLAGS_IOPL|_RFLAGS_AC|_RFLAGS_NT)
+
+	// NOTE: This depends on having the 64-bit segments immediately
+	// following the 32-bit user segments. This is simply the way the
+	// sysret instruction is designed to work (it assumes they follow).
+	wrmsr(_MSR_STAR, uintptr(uint64(Kcode)<<32|uint64(Ucode32)<<48))
+	wrmsr(_MSR_CSTAR, kernelFunc(sysenter))
+}
+
+// ReadCR2 reads the current CR2 value.
+//
+//go:nosplit
+func ReadCR2() uintptr {
+	return readCR2()
+}
diff --git a/pkg/sentry/platform/ring0/kernel_unsafe.go b/pkg/sentry/platform/ring0/kernel_unsafe.go
new file mode 100644
index 000000000..cfb3ad853
--- /dev/null
+++ b/pkg/sentry/platform/ring0/kernel_unsafe.go
@@ -0,0 +1,41 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ring0
+
+import (
+	"unsafe"
+)
+
+// eface mirrors runtime.eface.
+type eface struct {
+	typ  uintptr
+	data unsafe.Pointer
+}
+
+// kernelAddr returns the kernel virtual address for the given object.
+//
+//go:nosplit
+func kernelAddr(obj interface{}) uintptr {
+	e := (*eface)(unsafe.Pointer(&obj))
+	return KernelStartAddress | uintptr(e.data)
+}
+
+// kernelFunc returns the address of the given function.
+//
+//go:nosplit
+func kernelFunc(fn func()) uintptr {
+	fnptr := (**uintptr)(unsafe.Pointer(&fn))
+	return KernelStartAddress | **fnptr
+}
diff --git a/pkg/sentry/platform/ring0/lib_amd64.go b/pkg/sentry/platform/ring0/lib_amd64.go
new file mode 100644
index 000000000..f1ed5bfb4
--- /dev/null
+++ b/pkg/sentry/platform/ring0/lib_amd64.go
@@ -0,0 +1,128 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package ring0
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/cpuid"
+)
+
+// LoadFloatingPoint loads floating point state by the most efficient mechanism
+// available (set by Init).
+var LoadFloatingPoint func(*byte)
+
+// SaveFloatingPoint saves floating point state by the most efficient mechanism
+// available (set by Init).
+var SaveFloatingPoint func(*byte)
+
+// fxrstor uses fxrstor64 to load floating point state.
+func fxrstor(*byte)
+
+// xrstor uses xrstor to load floating point state.
+func xrstor(*byte)
+
+// fxsave uses fxsave64 to save floating point state.
+func fxsave(*byte)
+
+// xsave uses xsave to save floating point state.
+func xsave(*byte)
+
+// xsaveopt uses xsaveopt to save floating point state.
+func xsaveopt(*byte)
+
+// wrfs sets the GS address (set by init).
+var wrfs func(addr uintptr)
+
+// wrfsbase writes to the GS base address.
+func wrfsbase(addr uintptr)
+
+// wrfsmsr writes to the GS_BASE MSR.
+func wrfsmsr(addr uintptr)
+
+// wrgs sets the GS address (set by init).
+var wrgs func(addr uintptr)
+
+// wrgsbase writes to the GS base address.
+func wrgsbase(addr uintptr)
+
+// wrgsmsr writes to the GS_BASE MSR.
+func wrgsmsr(addr uintptr)
+
+// writeCR3 writes the CR3 value.
+func writeCR3(phys uintptr)
+
+// readCR2 reads the current CR2 value.
+func readCR2() uintptr
+
+// jumpToKernel jumps to the kernel version of the current RIP.
+func jumpToKernel()
+
+// jumpToUser jumps to the user version of the current RIP.
+func jumpToUser()
+
+// fninit initializes the floating point unit.
+func fninit()
+
+// xsetbv writes to an extended control register.
+func xsetbv(reg, value uintptr)
+
+// xgetbv reads an extended control register.
+func xgetbv(reg uintptr) uintptr
+
+// wrmsr reads to the given MSR.
+func wrmsr(reg, value uintptr)
+
+// rdmsr reads the given MSR.
+func rdmsr(reg uintptr) uintptr
+
+// Mostly-constants set by Init.
+var (
+	hasSMEP       bool
+	hasPCID       bool
+	hasXSAVEOPT   bool
+	hasXSAVE      bool
+	hasFSGSBASE   bool
+	validXCR0Mask uintptr
+)
+
+// Init sets function pointers based on architectural features.
+//
+// This must be called prior to using ring0.
+func Init(featureSet *cpuid.FeatureSet) {
+	hasSMEP = featureSet.HasFeature(cpuid.X86FeatureSMEP)
+	hasPCID = featureSet.HasFeature(cpuid.X86FeaturePCID)
+	hasXSAVEOPT = featureSet.UseXsaveopt()
+	hasXSAVE = featureSet.UseXsave()
+	hasFSGSBASE = featureSet.HasFeature(cpuid.X86FeatureFSGSBase)
+	validXCR0Mask = uintptr(featureSet.ValidXCR0Mask())
+	if hasXSAVEOPT {
+		SaveFloatingPoint = xsaveopt
+		LoadFloatingPoint = xrstor
+	} else if hasXSAVE {
+		SaveFloatingPoint = xsave
+		LoadFloatingPoint = xrstor
+	} else {
+		SaveFloatingPoint = fxsave
+		LoadFloatingPoint = fxrstor
+	}
+	if hasFSGSBASE {
+		wrfs = wrfsbase
+		wrgs = wrgsbase
+	} else {
+		wrfs = wrfsmsr
+		wrgs = wrgsmsr
+	}
+}
diff --git a/pkg/sentry/platform/ring0/lib_amd64.s b/pkg/sentry/platform/ring0/lib_amd64.s
new file mode 100644
index 000000000..6f143ea5a
--- /dev/null
+++ b/pkg/sentry/platform/ring0/lib_amd64.s
@@ -0,0 +1,247 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "funcdata.h"
+#include "textflag.h"
+
+// fxrstor loads floating point state.
+//
+// The code corresponds to:
+//
+//     fxrstor64 (%rbx)
+//
+TEXT ·fxrstor(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), BX
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x0b;
+	RET
+
+// xrstor loads floating point state.
+//
+// The code corresponds to:
+//
+//     xrstor (%rdi)
+//
+TEXT ·xrstor(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), DI
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f;
+	RET
+
+// fxsave saves floating point state.
+//
+// The code corresponds to:
+//
+//     fxsave64 (%rbx)
+//
+TEXT ·fxsave(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), BX
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x03;
+	RET
+
+// xsave saves floating point state.
+//
+// The code corresponds to:
+//
+//     xsave (%rdi)
+//
+TEXT ·xsave(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), DI
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27;
+	RET
+
+// xsaveopt saves floating point state.
+//
+// The code corresponds to:
+//
+//     xsaveopt (%rdi)
+//
+TEXT ·xsaveopt(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), DI
+	MOVL $0xffffffff, AX
+	MOVL $0xffffffff, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37;
+	RET
+
+// wrfsbase writes to the FS base.
+//
+// The code corresponds to:
+//
+// 	wrfsbase %rax
+//
+TEXT ·wrfsbase(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), AX
+	BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0xd0;
+	RET
+
+// wrfsmsr writes to the FSBASE MSR.
+//
+// The code corresponds to:
+//
+// 	wrmsr (writes EDX:EAX to the MSR in ECX)
+//
+TEXT ·wrfsmsr(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), AX
+	MOVQ AX, DX
+	SHRQ $32, DX
+	MOVQ $0xc0000100, CX // MSR_FS_BASE
+	BYTE $0x0f; BYTE $0x30;
+	RET
+
+// wrgsbase writes to the GS base.
+//
+// The code corresponds to:
+//
+// 	wrgsbase %rax
+//
+TEXT ·wrgsbase(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), AX
+	BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0xd8;
+	RET
+
+// wrgsmsr writes to the GSBASE MSR.
+//
+// See wrfsmsr.
+TEXT ·wrgsmsr(SB),NOSPLIT,$0-8
+	MOVQ addr+0(FP), AX
+	MOVQ AX, DX
+	SHRQ $32, DX
+	MOVQ $0xc0000101, CX     // MSR_GS_BASE
+	BYTE $0x0f; BYTE $0x30;  // WRMSR
+	RET
+
+// jumpToUser changes execution to the user address.
+//
+// This works by changing the return value to the user version.
+TEXT ·jumpToUser(SB),NOSPLIT,$0
+	MOVQ 0(SP), AX
+	MOVQ ·KernelStartAddress(SB), BX
+	NOTQ BX
+	ANDQ BX, SP // Switch the stack.
+	ANDQ BX, BP // Switch the frame pointer.
+	ANDQ BX, AX // Future return value.
+	MOVQ AX, 0(SP)
+	RET
+
+// jumpToKernel changes execution to the kernel address space.
+//
+// This works by changing the return value to the kernel version.
+TEXT ·jumpToKernel(SB),NOSPLIT,$0
+	MOVQ 0(SP), AX
+	MOVQ ·KernelStartAddress(SB), BX
+	ORQ BX, SP // Switch the stack.
+	ORQ BX, BP // Switch the frame pointer.
+	ORQ BX, AX // Future return value.
+	MOVQ AX, 0(SP)
+	RET
+
+// writeCR3 writes the given CR3 value.
+//
+// The code corresponds to:
+//
+// 	mov %rax, %cr3
+//
+TEXT ·writeCR3(SB),NOSPLIT,$0-8
+	MOVQ cr3+0(FP), AX
+	BYTE $0x0f; BYTE $0x22; BYTE $0xd8;
+	RET
+
+// readCR3 reads the current CR3 value.
+//
+// The code corresponds to:
+//
+// 	mov %cr3, %rax
+//
+TEXT ·readCR3(SB),NOSPLIT,$0-8
+	BYTE $0x0f; BYTE $0x20; BYTE $0xd8;
+	MOVQ AX, ret+0(FP)
+	RET
+
+// readCR2 reads the current CR2 value.
+//
+// The code corresponds to:
+//
+// 	mov %cr2, %rax
+//
+TEXT ·readCR2(SB),NOSPLIT,$0-8
+	BYTE $0x0f; BYTE $0x20; BYTE $0xd0;
+	MOVQ AX, ret+0(FP)
+	RET
+
+// fninit initializes the floating point unit.
+//
+// The code corresponds to:
+//
+// 	fninit
+TEXT ·fninit(SB),NOSPLIT,$0
+	BYTE $0xdb; BYTE $0xe3;
+	RET
+
+// xsetbv writes to an extended control register.
+//
+// The code corresponds to:
+//
+// 	xsetbv
+//
+TEXT ·xsetbv(SB),NOSPLIT,$0-16
+	MOVL reg+0(FP), CX
+	MOVL value+8(FP), AX
+	MOVL value+12(FP), DX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd1;
+	RET
+
+// xgetbv reads an extended control register.
+//
+// The code corresponds to:
+//
+// 	xgetbv
+//
+TEXT ·xgetbv(SB),NOSPLIT,$0-16
+	MOVL reg+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0;
+	MOVL AX, ret+8(FP)
+	MOVL DX, ret+12(FP)
+	RET
+
+// wrmsr writes to a control register.
+//
+// The code corresponds to:
+//
+// 	wrmsr
+//
+TEXT ·wrmsr(SB),NOSPLIT,$0-16
+	MOVL reg+0(FP), CX
+	MOVL value+8(FP), AX
+	MOVL value+12(FP), DX
+	BYTE $0x0f; BYTE $0x30;
+	RET
+
+// rdmsr reads a control register.
+//
+// The code corresponds to:
+//
+// 	rdmsr
+//
+TEXT ·rdmsr(SB),NOSPLIT,$0-16
+	MOVL reg+0(FP), CX
+	BYTE $0x0f; BYTE $0x32;
+	MOVL AX, ret+8(FP)
+	MOVL DX, ret+12(FP)
+	RET
diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go
new file mode 100644
index 000000000..9acd442ba
--- /dev/null
+++ b/pkg/sentry/platform/ring0/offsets_amd64.go
@@ -0,0 +1,93 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package ring0
+
+import (
+	"fmt"
+	"io"
+	"reflect"
+	"syscall"
+)
+
+// Emit prints architecture-specific offsets.
+func Emit(w io.Writer) {
+	fmt.Fprintf(w, "// Automatically generated, do not edit.\n")
+
+	c := &CPU{}
+	fmt.Fprintf(w, "\n// CPU offsets.\n")
+	fmt.Fprintf(w, "#define CPU_SELF             0x%02x\n", reflect.ValueOf(&c.self).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_REGISTERS        0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_STACK_TOP        0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack)))
+	fmt.Fprintf(w, "#define CPU_ERROR_CODE       0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_ERROR_TYPE       0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_KERNEL_EXCEPTION 0x%02x\n", reflect.ValueOf(&c.KernelException).Pointer()-reflect.ValueOf(c).Pointer())
+	fmt.Fprintf(w, "#define CPU_KERNEL_SYSCALL   0x%02x\n", reflect.ValueOf(&c.KernelSyscall).Pointer()-reflect.ValueOf(c).Pointer())
+
+	fmt.Fprintf(w, "\n// Bits.\n")
+	fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF)
+
+	fmt.Fprintf(w, "\n// Vectors.\n")
+	fmt.Fprintf(w, "#define DivideByZero               0x%02x\n", DivideByZero)
+	fmt.Fprintf(w, "#define Debug                      0x%02x\n", Debug)
+	fmt.Fprintf(w, "#define NMI                        0x%02x\n", NMI)
+	fmt.Fprintf(w, "#define Breakpoint                 0x%02x\n", Breakpoint)
+	fmt.Fprintf(w, "#define Overflow                   0x%02x\n", Overflow)
+	fmt.Fprintf(w, "#define BoundRangeExceeded         0x%02x\n", BoundRangeExceeded)
+	fmt.Fprintf(w, "#define InvalidOpcode              0x%02x\n", InvalidOpcode)
+	fmt.Fprintf(w, "#define DeviceNotAvailable         0x%02x\n", DeviceNotAvailable)
+	fmt.Fprintf(w, "#define DoubleFault                0x%02x\n", DoubleFault)
+	fmt.Fprintf(w, "#define CoprocessorSegmentOverrun  0x%02x\n", CoprocessorSegmentOverrun)
+	fmt.Fprintf(w, "#define InvalidTSS                 0x%02x\n", InvalidTSS)
+	fmt.Fprintf(w, "#define SegmentNotPresent          0x%02x\n", SegmentNotPresent)
+	fmt.Fprintf(w, "#define StackSegmentFault          0x%02x\n", StackSegmentFault)
+	fmt.Fprintf(w, "#define GeneralProtectionFault     0x%02x\n", GeneralProtectionFault)
+	fmt.Fprintf(w, "#define PageFault                  0x%02x\n", PageFault)
+	fmt.Fprintf(w, "#define X87FloatingPointException  0x%02x\n", X87FloatingPointException)
+	fmt.Fprintf(w, "#define AlignmentCheck             0x%02x\n", AlignmentCheck)
+	fmt.Fprintf(w, "#define MachineCheck               0x%02x\n", MachineCheck)
+	fmt.Fprintf(w, "#define SIMDFloatingPointException 0x%02x\n", SIMDFloatingPointException)
+	fmt.Fprintf(w, "#define VirtualizationException    0x%02x\n", VirtualizationException)
+	fmt.Fprintf(w, "#define SecurityException          0x%02x\n", SecurityException)
+	fmt.Fprintf(w, "#define SyscallInt80               0x%02x\n", SyscallInt80)
+	fmt.Fprintf(w, "#define Syscall                    0x%02x\n", Syscall)
+
+	p := &syscall.PtraceRegs{}
+	fmt.Fprintf(w, "\n// Ptrace registers.\n")
+	fmt.Fprintf(w, "#define PTRACE_R15      0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R14      0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R13      0x%02x\n", reflect.ValueOf(&p.R13).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R12      0x%02x\n", reflect.ValueOf(&p.R12).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RBP      0x%02x\n", reflect.ValueOf(&p.Rbp).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RBX      0x%02x\n", reflect.ValueOf(&p.Rbx).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R11      0x%02x\n", reflect.ValueOf(&p.R11).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R10      0x%02x\n", reflect.ValueOf(&p.R10).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R9       0x%02x\n", reflect.ValueOf(&p.R9).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_R8       0x%02x\n", reflect.ValueOf(&p.R8).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RAX      0x%02x\n", reflect.ValueOf(&p.Rax).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RCX      0x%02x\n", reflect.ValueOf(&p.Rcx).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RDX      0x%02x\n", reflect.ValueOf(&p.Rdx).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RSI      0x%02x\n", reflect.ValueOf(&p.Rsi).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RDI      0x%02x\n", reflect.ValueOf(&p.Rdi).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_ORIGRAX  0x%02x\n", reflect.ValueOf(&p.Orig_rax).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RIP      0x%02x\n", reflect.ValueOf(&p.Rip).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_CS       0x%02x\n", reflect.ValueOf(&p.Cs).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_FLAGS    0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_RSP      0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_SS       0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_FS       0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer())
+	fmt.Fprintf(w, "#define PTRACE_GS       0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer())
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
new file mode 100644
index 000000000..c0c481ab3
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -0,0 +1,32 @@
+package(licenses = ["notice"])  # Apache 2.0
+
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "pagetables",
+    srcs = [
+        "pagetables.go",
+        "pagetables_amd64.go",
+        "pagetables_unsafe.go",
+        "pagetables_x86.go",
+        "pcids_x86.go",
+    ],
+    importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables",
+    visibility = [
+        "//pkg/sentry/platform/kvm:__subpackages__",
+        "//pkg/sentry/platform/ring0:__subpackages__",
+    ],
+    deps = ["//pkg/sentry/usermem"],
+)
+
+go_test(
+    name = "pagetables_test",
+    size = "small",
+    srcs = [
+        "pagetables_test.go",
+        "pagetables_x86_test.go",
+        "pcids_x86_test.go",
+    ],
+    embed = [":pagetables"],
+    deps = ["//pkg/sentry/usermem"],
+)
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
new file mode 100644
index 000000000..3cbf0bfa5
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go
@@ -0,0 +1,193 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package pagetables provides a generic implementation of pagetables.
+package pagetables
+
+import (
+	"sync"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// Node is a single node within a set of page tables.
+type Node struct {
+	// unalignedData has unaligned data. Unfortunately, we can't really
+	// rely on the allocator to give us what we want here. So we just throw
+	// it at the wall and use the portion that matches. Gross. This may be
+	// changed in the future to use a different allocation mechanism.
+	//
+	// Access must happen via functions found in pagetables_unsafe.go.
+	unalignedData [(2 * usermem.PageSize) - 1]byte
+
+	// physical is the translated address of these entries.
+	//
+	// This is filled in at creation time.
+	physical uintptr
+}
+
+// PageTables is a set of page tables.
+type PageTables struct {
+	mu sync.Mutex
+
+	// root is the pagetable root.
+	root *Node
+
+	// translater is the translater passed at creation.
+	translater Translater
+
+	// archPageTables includes architecture-specific features.
+	archPageTables
+
+	// allNodes is a set of nodes indexed by translater address.
+	allNodes map[uintptr]*Node
+}
+
+// Translater translates to guest physical addresses.
+type Translater interface {
+	// TranslateToPhysical translates the given pointer object into a
+	// "physical" address. We do not require that it translates back, the
+	// reverse mapping is maintained internally.
+	TranslateToPhysical(*PTEs) uintptr
+}
+
+// New returns new PageTables.
+func New(t Translater, opts Opts) *PageTables {
+	p := &PageTables{
+		translater: t,
+		allNodes:   make(map[uintptr]*Node),
+	}
+	p.root = p.allocNode()
+	p.init(opts)
+	return p
+}
+
+// New returns a new set of PageTables derived from the given one.
+//
+// This function should always be preferred to New if there are existing
+// pagetables, as this function preserves architectural constraints relevant to
+// managing multiple sets of pagetables.
+func (p *PageTables) New() *PageTables {
+	np := &PageTables{
+		translater: p.translater,
+		allNodes:   make(map[uintptr]*Node),
+	}
+	np.root = np.allocNode()
+	np.initFrom(&p.archPageTables)
+	return np
+}
+
+// setPageTable sets the given index as a page table.
+func (p *PageTables) setPageTable(n *Node, index int, child *Node) {
+	phys := p.translater.TranslateToPhysical(child.PTEs())
+	p.allNodes[phys] = child
+	pte := &n.PTEs()[index]
+	pte.setPageTable(phys)
+}
+
+// clearPageTable clears the given entry.
+func (p *PageTables) clearPageTable(n *Node, index int) {
+	pte := &n.PTEs()[index]
+	physical := pte.Address()
+	pte.Clear()
+	delete(p.allNodes, physical)
+}
+
+// getPageTable returns the page table entry.
+func (p *PageTables) getPageTable(n *Node, index int) *Node {
+	pte := &n.PTEs()[index]
+	physical := pte.Address()
+	child := p.allNodes[physical]
+	return child
+}
+
+// Map installs a mapping with the given physical address.
+//
+// True is returned iff there was a previous mapping in the range.
+//
+// Precondition: addr & length must be aligned, their sum must not overflow.
+func (p *PageTables) Map(addr usermem.Addr, length uintptr, user bool, at usermem.AccessType, physical uintptr) bool {
+	if at == usermem.NoAccess {
+		return p.Unmap(addr, length)
+	}
+	prev := false
+	p.mu.Lock()
+	end, ok := addr.AddLength(uint64(length))
+	if !ok {
+		panic("pagetables.Map: overflow")
+	}
+	p.iterateRange(uintptr(addr), uintptr(end), true, func(s, e uintptr, pte *PTE, align uintptr) {
+		p := physical + (s - uintptr(addr))
+		prev = prev || (pte.Valid() && (p != pte.Address() || at.Write != pte.Writeable() || at.Execute != pte.Executable()))
+		if p&align != 0 {
+			// We will install entries at a smaller granulaity if
+			// we don't install a valid entry here, however we must
+			// zap any existing entry to ensure this happens.
+			pte.Clear()
+			return
+		}
+		pte.Set(p, at.Write, at.Execute, user)
+	})
+	p.mu.Unlock()
+	return prev
+}
+
+// Unmap unmaps the given range.
+//
+// True is returned iff there was a previous mapping in the range.
+func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
+	p.mu.Lock()
+	count := 0
+	p.iterateRange(uintptr(addr), uintptr(addr)+length, false, func(s, e uintptr, pte *PTE, align uintptr) {
+		pte.Clear()
+		count++
+	})
+	p.mu.Unlock()
+	return count > 0
+}
+
+// Release releases this address space.
+//
+// This must be called to release the PCID.
+func (p *PageTables) Release() {
+	// Clear all pages.
+	p.Unmap(0, ^uintptr(0))
+	p.release()
+}
+
+// Lookup returns the physical address for the given virtual address.
+func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, accessType usermem.AccessType) {
+	mask := uintptr(usermem.PageSize - 1)
+	off := uintptr(addr) & mask
+	addr = addr &^ usermem.Addr(mask)
+	p.iterateRange(uintptr(addr), uintptr(addr+usermem.PageSize), false, func(s, e uintptr, pte *PTE, align uintptr) {
+		if !pte.Valid() {
+			return
+		}
+		physical = pte.Address() + (s - uintptr(addr)) + off
+		accessType = usermem.AccessType{
+			Read:    true,
+			Write:   pte.Writeable(),
+			Execute: pte.Executable(),
+		}
+	})
+	return physical, accessType
+}
+
+// allocNode allocates a new page.
+func (p *PageTables) allocNode() *Node {
+	n := new(Node)
+	n.physical = p.translater.TranslateToPhysical(n.PTEs())
+	return n
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
new file mode 100644
index 000000000..b89665c96
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
@@ -0,0 +1,397 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64
+
+package pagetables
+
+import (
+	"fmt"
+	"sync/atomic"
+)
+
+// Address constraints.
+//
+// The lowerTop and upperBottom currently apply to four-level pagetables;
+// additional refactoring would be necessary to support five-level pagetables.
+const (
+	lowerTop    = 0x00007fffffffffff
+	upperBottom = 0xffff800000000000
+
+	pteShift = 12
+	pmdShift = 21
+	pudShift = 30
+	pgdShift = 39
+
+	pteMask = 0x1ff << pteShift
+	pmdMask = 0x1ff << pmdShift
+	pudMask = 0x1ff << pudShift
+	pgdMask = 0x1ff << pgdShift
+
+	pteSize = 1 << pteShift
+	pmdSize = 1 << pmdShift
+	pudSize = 1 << pudShift
+	pgdSize = 1 << pgdShift
+)
+
+// Bits in page table entries.
+const (
+	present        = 0x001
+	writable       = 0x002
+	user           = 0x004
+	writeThrough   = 0x008
+	cacheDisable   = 0x010
+	accessed       = 0x020
+	dirty          = 0x040
+	super          = 0x080
+	executeDisable = 1 << 63
+)
+
+// PTE is a page table entry.
+type PTE uint64
+
+// Clear clears this PTE, including super page information.
+func (p *PTE) Clear() {
+	atomic.StoreUint64((*uint64)(p), 0)
+}
+
+// Valid returns true iff this entry is valid.
+func (p *PTE) Valid() bool {
+	return atomic.LoadUint64((*uint64)(p))&present != 0
+}
+
+// Writeable returns true iff the page is writable.
+func (p *PTE) Writeable() bool {
+	return atomic.LoadUint64((*uint64)(p))&writable != 0
+}
+
+// User returns true iff the page is user-accessible.
+func (p *PTE) User() bool {
+	return atomic.LoadUint64((*uint64)(p))&user != 0
+}
+
+// Executable returns true iff the page is executable.
+func (p *PTE) Executable() bool {
+	return atomic.LoadUint64((*uint64)(p))&executeDisable == 0
+}
+
+// SetSuper sets this page as a super page.
+//
+// The page must not be valid or a panic will result.
+func (p *PTE) SetSuper() {
+	if p.Valid() {
+		// This is not allowed.
+		panic("SetSuper called on valid page!")
+	}
+	atomic.StoreUint64((*uint64)(p), super)
+}
+
+// IsSuper returns true iff this page is a super page.
+func (p *PTE) IsSuper() bool {
+	return atomic.LoadUint64((*uint64)(p))&super != 0
+}
+
+// Set sets this PTE value.
+func (p *PTE) Set(addr uintptr, write, execute bool, userAccessible bool) {
+	v := uint64(addr)&^uint64(0xfff) | present | accessed
+	if userAccessible {
+		v |= user
+	}
+	if !execute {
+		v |= executeDisable
+	}
+	if write {
+		v |= writable | dirty
+	}
+	if p.IsSuper() {
+		v |= super
+	}
+	atomic.StoreUint64((*uint64)(p), v)
+}
+
+// setPageTable sets this PTE value and forces the write bit and super bit to
+// be cleared. This is used explicitly for breaking super pages.
+func (p *PTE) setPageTable(addr uintptr) {
+	v := uint64(addr)&^uint64(0xfff) | present | user | writable | accessed | dirty
+	atomic.StoreUint64((*uint64)(p), v)
+}
+
+// Address extracts the address. This should only be used if Valid returns true.
+func (p *PTE) Address() uintptr {
+	return uintptr(atomic.LoadUint64((*uint64)(p)) & ^uint64(executeDisable|0xfff))
+}
+
+// entriesPerPage is the number of PTEs per page.
+const entriesPerPage = 512
+
+// PTEs is a collection of entries.
+type PTEs [entriesPerPage]PTE
+
+// next returns the next address quantized by the given size.
+func next(start uint64, size uint64) uint64 {
+	start &= ^(size - 1)
+	start += size
+	return start
+}
+
+// iterateRange iterates over all appropriate levels of page tables for the given range.
+//
+// If alloc is set, then Set _must_ be called on all given PTEs. The exception
+// is super pages. If a valid super page cannot be installed, then the walk
+// will continue to individual entries.
+//
+// This algorithm will attempt to maximize the use of super pages whenever
+// possible. Whether a super page is provided will be clear through the range
+// provided in the callback.
+//
+// Note that if alloc set, then no gaps will be present. However, if alloc is
+// not set, then the iteration will likely be full of gaps.
+//
+// Note that this function should generally be avoided in favor of Map, Unmap,
+// etc. when not necessary.
+//
+// Precondition: startAddr and endAddr must be page-aligned.
+//
+// Precondition: startStart must be less than endAddr.
+//
+// Precondition: If alloc is set, then startAddr and endAddr should not span
+// non-canonical ranges. If they do, a panic will result.
+func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn func(s, e uintptr, pte *PTE, align uintptr)) {
+	start := uint64(startAddr)
+	end := uint64(endAddr)
+	if start%pteSize != 0 {
+		panic(fmt.Sprintf("unaligned start: %v", start))
+	}
+	if start > end {
+		panic(fmt.Sprintf("start > end (%v > %v))", start, end))
+	}
+
+	// Deal with cases where we traverse the "gap".
+	//
+	// These are all explicitly disallowed if alloc is set, and we must
+	// traverse an entry for each address explicitly.
+	switch {
+	case start < lowerTop && end > lowerTop && end < upperBottom:
+		if alloc {
+			panic(fmt.Sprintf("alloc [%x, %x) spans non-canonical range", start, end))
+		}
+		p.iterateRange(startAddr, lowerTop, false, fn)
+		return
+	case start < lowerTop && end > lowerTop:
+		if alloc {
+			panic(fmt.Sprintf("alloc [%x, %x) spans non-canonical range", start, end))
+		}
+		p.iterateRange(startAddr, lowerTop, false, fn)
+		p.iterateRange(upperBottom, endAddr, false, fn)
+		return
+	case start > lowerTop && end < upperBottom:
+		if alloc {
+			panic(fmt.Sprintf("alloc [%x, %x) spans non-canonical range", start, end))
+		}
+		return
+	case start > lowerTop && start < upperBottom && end > upperBottom:
+		if alloc {
+			panic(fmt.Sprintf("alloc [%x, %x) spans non-canonical range", start, end))
+		}
+		p.iterateRange(upperBottom, endAddr, false, fn)
+		return
+	}
+
+	for pgdIndex := int((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
+		pgdEntry := &p.root.PTEs()[pgdIndex]
+		if !pgdEntry.Valid() {
+			if !alloc {
+				// Skip over this entry.
+				start = next(start, pgdSize)
+				continue
+			}
+
+			// Allocate a new pgd.
+			p.setPageTable(p.root, pgdIndex, p.allocNode())
+		}
+
+		// Map the next level.
+		pudNode := p.getPageTable(p.root, pgdIndex)
+		clearPUDEntries := 0
+
+		for pudIndex := int((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
+			pudEntry := &(pudNode.PTEs()[pudIndex])
+			if !pudEntry.Valid() {
+				if !alloc {
+					// Skip over this entry.
+					clearPUDEntries++
+					start = next(start, pudSize)
+					continue
+				}
+
+				// This level has 1-GB super pages. Is this
+				// entire region contained in a single PUD
+				// entry? If so, we can skip allocating a new
+				// page for the pmd.
+				if start&(pudSize-1) == 0 && end-start >= pudSize {
+					pudEntry.SetSuper()
+					fn(uintptr(start), uintptr(start+pudSize), pudEntry, pudSize-1)
+					if pudEntry.Valid() {
+						start = next(start, pudSize)
+						continue
+					}
+				}
+
+				// Allocate a new pud.
+				p.setPageTable(pudNode, pudIndex, p.allocNode())
+
+			} else if pudEntry.IsSuper() {
+				// Does this page need to be split?
+				if start&(pudSize-1) != 0 || end < next(start, pudSize) {
+					currentAddr := uint64(pudEntry.Address())
+					writeable := pudEntry.Writeable()
+					executable := pudEntry.Executable()
+					user := pudEntry.User()
+
+					// Install the relevant entries.
+					pmdNode := p.allocNode()
+					pmdEntries := pmdNode.PTEs()
+					for index := 0; index < entriesPerPage; index++ {
+						pmdEntry := &pmdEntries[index]
+						pmdEntry.SetSuper()
+						pmdEntry.Set(uintptr(currentAddr), writeable, executable, user)
+						currentAddr += pmdSize
+					}
+
+					// Reset to point to the new page.
+					p.setPageTable(pudNode, pudIndex, pmdNode)
+				} else {
+					// A super page to be checked directly.
+					fn(uintptr(start), uintptr(start+pudSize), pudEntry, pudSize-1)
+
+					// Might have been cleared.
+					if !pudEntry.Valid() {
+						clearPUDEntries++
+					}
+
+					// Note that the super page was changed.
+					start = next(start, pudSize)
+					continue
+				}
+			}
+
+			// Map the next level, since this is valid.
+			pmdNode := p.getPageTable(pudNode, pudIndex)
+			clearPMDEntries := 0
+
+			for pmdIndex := int((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
+				pmdEntry := &pmdNode.PTEs()[pmdIndex]
+				if !pmdEntry.Valid() {
+					if !alloc {
+						// Skip over this entry.
+						clearPMDEntries++
+						start = next(start, pmdSize)
+						continue
+					}
+
+					// This level has 2-MB huge pages. If this
+					// region is contined in a single PMD entry?
+					// As above, we can skip allocating a new page.
+					if start&(pmdSize-1) == 0 && end-start >= pmdSize {
+						pmdEntry.SetSuper()
+						fn(uintptr(start), uintptr(start+pmdSize), pmdEntry, pmdSize-1)
+						if pmdEntry.Valid() {
+							start = next(start, pmdSize)
+							continue
+						}
+					}
+
+					// Allocate a new pmd.
+					p.setPageTable(pmdNode, pmdIndex, p.allocNode())
+
+				} else if pmdEntry.IsSuper() {
+					// Does this page need to be split?
+					if start&(pmdSize-1) != 0 || end < next(start, pmdSize) {
+						currentAddr := uint64(pmdEntry.Address())
+						writeable := pmdEntry.Writeable()
+						executable := pmdEntry.Executable()
+						user := pmdEntry.User()
+
+						// Install the relevant entries.
+						pteNode := p.allocNode()
+						pteEntries := pteNode.PTEs()
+						for index := 0; index < entriesPerPage; index++ {
+							pteEntry := &pteEntries[index]
+							pteEntry.Set(uintptr(currentAddr), writeable, executable, user)
+							currentAddr += pteSize
+						}
+
+						// Reset to point to the new page.
+						p.setPageTable(pmdNode, pmdIndex, pteNode)
+					} else {
+						// A huge page to be checked directly.
+						fn(uintptr(start), uintptr(start+pmdSize), pmdEntry, pmdSize-1)
+
+						// Might have been cleared.
+						if !pmdEntry.Valid() {
+							clearPMDEntries++
+						}
+
+						// Note that the huge page was changed.
+						start = next(start, pmdSize)
+						continue
+					}
+				}
+
+				// Map the next level, since this is valid.
+				pteNode := p.getPageTable(pmdNode, pmdIndex)
+				clearPTEEntries := 0
+
+				for pteIndex := int((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
+					pteEntry := &pteNode.PTEs()[pteIndex]
+					if !pteEntry.Valid() && !alloc {
+						clearPTEEntries++
+						start += pteSize
+						continue
+					}
+
+					// At this point, we are guaranteed that start%pteSize == 0.
+					fn(uintptr(start), uintptr(start+pteSize), pteEntry, pteSize-1)
+					if !pteEntry.Valid() {
+						if alloc {
+							panic("PTE not set after iteration with alloc=true!")
+						}
+						clearPTEEntries++
+					}
+
+					// Note that the pte was changed.
+					start += pteSize
+					continue
+				}
+
+				// Check if we no longer need this page.
+				if clearPTEEntries == entriesPerPage {
+					p.clearPageTable(pmdNode, pmdIndex)
+					clearPMDEntries++
+				}
+			}
+
+			// Check if we no longer need this page.
+			if clearPMDEntries == entriesPerPage {
+				p.clearPageTable(pudNode, pudIndex)
+				clearPUDEntries++
+			}
+		}
+
+		// Check if we no longer need this page.
+		if clearPUDEntries == entriesPerPage {
+			p.clearPageTable(p.root, pgdIndex)
+		}
+	}
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
new file mode 100644
index 000000000..9cbc0e3b0
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
@@ -0,0 +1,161 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pagetables
+
+import (
+	"reflect"
+	"testing"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+type reflectTranslater struct{}
+
+func (r reflectTranslater) TranslateToPhysical(ptes *PTEs) uintptr {
+	return reflect.ValueOf(ptes).Pointer()
+}
+
+type mapping struct {
+	start     uintptr
+	length    uintptr
+	addr      uintptr
+	writeable bool
+}
+
+func checkMappings(t *testing.T, pt *PageTables, m []mapping) {
+	var (
+		current int
+		found   []mapping
+		failed  string
+	)
+
+	// Iterate over all the mappings.
+	pt.iterateRange(0, ^uintptr(0), false, func(s, e uintptr, pte *PTE, align uintptr) {
+		found = append(found, mapping{
+			start:     s,
+			length:    e - s,
+			addr:      pte.Address(),
+			writeable: pte.Writeable(),
+		})
+		if failed != "" {
+			// Don't keep looking for errors.
+			return
+		}
+
+		if current >= len(m) {
+			failed = "more mappings than expected"
+		} else if m[current].start != s {
+			failed = "start didn't match expected"
+		} else if m[current].length != (e - s) {
+			failed = "end didn't match expected"
+		} else if m[current].addr != pte.Address() {
+			failed = "address didn't match expected"
+		} else if m[current].writeable != pte.Writeable() {
+			failed = "writeable didn't match"
+		}
+		current++
+	})
+
+	// Were we expected additional mappings?
+	if failed == "" && current != len(m) {
+		failed = "insufficient mappings found"
+	}
+
+	// Emit a meaningful error message on failure.
+	if failed != "" {
+		t.Errorf("%s; got %#v, wanted %#v", failed, found, m)
+	}
+}
+
+func TestAllocFree(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+	pt.Release()
+}
+
+func TestUnmap(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map and unmap one entry.
+	pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42)
+	pt.Unmap(0x400000, pteSize)
+
+	checkMappings(t, pt, nil)
+	pt.Release()
+}
+
+func TestReadOnly(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map one entry.
+	pt.Map(0x400000, pteSize, true, usermem.Read, pteSize*42)
+
+	checkMappings(t, pt, []mapping{
+		{0x400000, pteSize, pteSize * 42, false},
+	})
+	pt.Release()
+}
+
+func TestReadWrite(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map one entry.
+	pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42)
+
+	checkMappings(t, pt, []mapping{
+		{0x400000, pteSize, pteSize * 42, true},
+	})
+	pt.Release()
+}
+
+func TestSerialEntries(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map two sequential entries.
+	pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42)
+	pt.Map(0x401000, pteSize, true, usermem.ReadWrite, pteSize*47)
+
+	checkMappings(t, pt, []mapping{
+		{0x400000, pteSize, pteSize * 42, true},
+		{0x401000, pteSize, pteSize * 47, true},
+	})
+	pt.Release()
+}
+
+func TestSpanningEntries(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Span a pgd with two pages.
+	pt.Map(0x00007efffffff000, 2*pteSize, true, usermem.Read, pteSize*42)
+
+	checkMappings(t, pt, []mapping{
+		{0x00007efffffff000, pteSize, pteSize * 42, false},
+		{0x00007f0000000000, pteSize, pteSize * 43, false},
+	})
+	pt.Release()
+}
+
+func TestSparseEntries(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map two entries in different pgds.
+	pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42)
+	pt.Map(0x00007f0000000000, pteSize, true, usermem.Read, pteSize*47)
+
+	checkMappings(t, pt, []mapping{
+		{0x400000, pteSize, pteSize * 42, true},
+		{0x00007f0000000000, pteSize, pteSize * 47, false},
+	})
+	pt.Release()
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go b/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go
new file mode 100644
index 000000000..a2b44fb79
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go
@@ -0,0 +1,31 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pagetables
+
+import (
+	"unsafe"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+// PTEs returns aligned PTE entries.
+func (n *Node) PTEs() *PTEs {
+	addr := uintptr(unsafe.Pointer(&n.unalignedData[0]))
+	offset := addr & (usermem.PageSize - 1)
+	if offset != 0 {
+		offset = usermem.PageSize - offset
+	}
+	return (*PTEs)(unsafe.Pointer(addr + offset))
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
new file mode 100644
index 000000000..dac66373f
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
@@ -0,0 +1,79 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package pagetables
+
+// Opts are pagetable options.
+type Opts struct {
+	EnablePCID bool
+}
+
+// archPageTables has x86-specific features.
+type archPageTables struct {
+	// pcids is the PCID database.
+	pcids *PCIDs
+
+	// pcid is the globally unique identifier, or zero if none were
+	// available or pcids is nil.
+	pcid uint16
+}
+
+// init initializes arch-specific features.
+func (a *archPageTables) init(opts Opts) {
+	if opts.EnablePCID {
+		a.pcids = NewPCIDs()
+		a.pcid = a.pcids.allocate()
+	}
+}
+
+// initFrom initializes arch-specific features from an existing entry.'
+func (a *archPageTables) initFrom(other *archPageTables) {
+	a.pcids = other.pcids // Refer to the same PCID database.
+	if a.pcids != nil {
+		a.pcid = a.pcids.allocate()
+	}
+}
+
+// release is called from Release.
+func (a *archPageTables) release() {
+	// Return the PCID.
+	if a.pcids != nil {
+		a.pcids.free(a.pcid)
+	}
+}
+
+// CR3 returns the CR3 value for these tables.
+//
+// This may be called in interrupt contexts.
+//
+//go:nosplit
+func (p *PageTables) CR3() uint64 {
+	// Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1).
+	const noFlushBit uint64 = 0x8000000000000000
+	if p.pcid != 0 {
+		return noFlushBit | uint64(p.root.physical) | uint64(p.pcid)
+	}
+	return uint64(p.root.physical)
+}
+
+// FlushCR3 returns the CR3 value that flushes the TLB.
+//
+// This may be called in interrupt contexts.
+//
+//go:nosplit
+func (p *PageTables) FlushCR3() uint64 {
+	return uint64(p.root.physical) | uint64(p.pcid)
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go
new file mode 100644
index 000000000..1fc403c48
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go
@@ -0,0 +1,79 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package pagetables
+
+import (
+	"testing"
+
+	"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
+)
+
+func Test2MAnd4K(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map a small page and a huge page.
+	pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42)
+	pt.Map(0x00007f0000000000, 1<<21, true, usermem.Read, pmdSize*47)
+
+	checkMappings(t, pt, []mapping{
+		{0x400000, pteSize, pteSize * 42, true},
+		{0x00007f0000000000, pmdSize, pmdSize * 47, false},
+	})
+	pt.Release()
+}
+
+func Test1GAnd4K(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map a small page and a super page.
+	pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42)
+	pt.Map(0x00007f0000000000, pudSize, true, usermem.Read, pudSize*47)
+
+	checkMappings(t, pt, []mapping{
+		{0x400000, pteSize, pteSize * 42, true},
+		{0x00007f0000000000, pudSize, pudSize * 47, false},
+	})
+	pt.Release()
+}
+
+func TestSplit1GPage(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map a super page and knock out the middle.
+	pt.Map(0x00007f0000000000, pudSize, true, usermem.Read, pudSize*42)
+	pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize))
+
+	checkMappings(t, pt, []mapping{
+		{0x00007f0000000000, pteSize, pudSize * 42, false},
+		{0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, false},
+	})
+	pt.Release()
+}
+
+func TestSplit2MPage(t *testing.T) {
+	pt := New(reflectTranslater{}, Opts{})
+
+	// Map a huge page and knock out the middle.
+	pt.Map(0x00007f0000000000, pmdSize, true, usermem.Read, pmdSize*42)
+	pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize))
+
+	checkMappings(t, pt, []mapping{
+		{0x00007f0000000000, pteSize, pmdSize * 42, false},
+		{0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, false},
+	})
+	pt.Release()
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
new file mode 100644
index 000000000..509e8c0d9
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
@@ -0,0 +1,74 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package pagetables
+
+import (
+	"sync"
+)
+
+// maxPCID is the maximum allowed PCID.
+const maxPCID = 4095
+
+// PCIDs is a simple PCID database.
+type PCIDs struct {
+	mu sync.Mutex
+
+	// last is the last fresh PCID given out (not including the available
+	// pool). If last >= maxPCID, then the only PCIDs available in the
+	// available pool below.
+	last uint16
+
+	// available are PCIDs that have been freed.
+	available map[uint16]struct{}
+}
+
+// NewPCIDs returns a new PCID set.
+func NewPCIDs() *PCIDs {
+	return &PCIDs{
+		available: make(map[uint16]struct{}),
+	}
+}
+
+// allocate returns an unused PCID, or zero if all are taken.
+func (p *PCIDs) allocate() uint16 {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if len(p.available) > 0 {
+		for id := range p.available {
+			delete(p.available, id)
+			return id
+		}
+	}
+	if id := p.last + 1; id <= maxPCID {
+		p.last = id
+		return id
+	}
+	// Nothing available.
+	return 0
+}
+
+// free returns a PCID to the pool.
+//
+// It is safe to call free with a zero pcid. That is, you may always call free
+// with anything returned by allocate.
+func (p *PCIDs) free(id uint16) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if id != 0 {
+		p.available[id] = struct{}{}
+	}
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
new file mode 100644
index 000000000..0b555cd76
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
@@ -0,0 +1,65 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package pagetables
+
+import (
+	"testing"
+)
+
+func TestMaxPCID(t *testing.T) {
+	p := NewPCIDs()
+	for i := 0; i < maxPCID; i++ {
+		if id := p.allocate(); id != uint16(i+1) {
+			t.Errorf("got %d, expected %d", id, i+1)
+		}
+	}
+	if id := p.allocate(); id != 0 {
+		if id != 0 {
+			t.Errorf("got %d, expected 0", id)
+		}
+	}
+}
+
+func TestFirstPCID(t *testing.T) {
+	p := NewPCIDs()
+	if id := p.allocate(); id != 1 {
+		t.Errorf("got %d, expected 1", id)
+	}
+}
+
+func TestFreePCID(t *testing.T) {
+	p := NewPCIDs()
+	p.free(0)
+	if id := p.allocate(); id != 1 {
+		t.Errorf("got %d, expected 1 (not zero)", id)
+	}
+}
+
+func TestReusePCID(t *testing.T) {
+	p := NewPCIDs()
+	id := p.allocate()
+	if id != 1 {
+		t.Errorf("got %d, expected 1", id)
+	}
+	p.free(id)
+	if id := p.allocate(); id != 1 {
+		t.Errorf("got %d, expected 1", id)
+	}
+	if id := p.allocate(); id != 2 {
+		t.Errorf("got %d, expected 2", id)
+	}
+}
diff --git a/pkg/sentry/platform/ring0/ring0.go b/pkg/sentry/platform/ring0/ring0.go
new file mode 100644
index 000000000..4991031c5
--- /dev/null
+++ b/pkg/sentry/platform/ring0/ring0.go
@@ -0,0 +1,16 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package ring0 provides basic operating system-level stubs.
+package ring0
diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go
new file mode 100644
index 000000000..e16f6c599
--- /dev/null
+++ b/pkg/sentry/platform/ring0/x86.go
@@ -0,0 +1,242 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package ring0
+
+import (
+	"gvisor.googlesource.com/gvisor/pkg/cpuid"
+)
+
+// Useful bits.
+const (
+	_CR0_PE = 1 << 0
+	_CR0_ET = 1 << 4
+	_CR0_PG = 1 << 31
+
+	_CR4_PSE        = 1 << 4
+	_CR4_PAE        = 1 << 5
+	_CR4_PGE        = 1 << 7
+	_CR4_OSFXSR     = 1 << 9
+	_CR4_OSXMMEXCPT = 1 << 10
+	_CR4_FSGSBASE   = 1 << 16
+	_CR4_PCIDE      = 1 << 17
+	_CR4_OSXSAVE    = 1 << 18
+	_CR4_SMEP       = 1 << 20
+
+	_RFLAGS_AC       = 1 << 18
+	_RFLAGS_NT       = 1 << 14
+	_RFLAGS_IOPL     = 3 << 12
+	_RFLAGS_DF       = 1 << 10
+	_RFLAGS_IF       = 1 << 9
+	_RFLAGS_STEP     = 1 << 8
+	_RFLAGS_RESERVED = 1 << 1
+
+	_EFER_SCE = 0x001
+	_EFER_LME = 0x100
+	_EFER_NX  = 0x800
+
+	_MSR_STAR         = 0xc0000081
+	_MSR_LSTAR        = 0xc0000082
+	_MSR_CSTAR        = 0xc0000083
+	_MSR_SYSCALL_MASK = 0xc0000084
+)
+
+// Vector is an exception vector.
+type Vector uintptr
+
+// Exception vectors.
+const (
+	DivideByZero Vector = iota
+	Debug
+	NMI
+	Breakpoint
+	Overflow
+	BoundRangeExceeded
+	InvalidOpcode
+	DeviceNotAvailable
+	DoubleFault
+	CoprocessorSegmentOverrun
+	InvalidTSS
+	SegmentNotPresent
+	StackSegmentFault
+	GeneralProtectionFault
+	PageFault
+	_
+	X87FloatingPointException
+	AlignmentCheck
+	MachineCheck
+	SIMDFloatingPointException
+	VirtualizationException
+	SecurityException = 0x1e
+	SyscallInt80      = 0x80
+	_NR_INTERRUPTS    = SyscallInt80 + 1
+)
+
+// System call vectors.
+const (
+	Syscall Vector = _NR_INTERRUPTS
+)
+
+// VirtualAddressBits returns the number bits available for virtual addresses.
+//
+// Note that sign-extension semantics apply to the highest order bit.
+//
+// FIXME: This should use the cpuid passed to Init.
+func VirtualAddressBits() uint32 {
+	ax, _, _, _ := cpuid.HostID(0x80000008, 0)
+	return (ax >> 8) & 0xff
+}
+
+// PhysicalAddressBits returns the number of bits available for physical addresses.
+//
+// FIXME: This should use the cpuid passed to Init.
+func PhysicalAddressBits() uint32 {
+	ax, _, _, _ := cpuid.HostID(0x80000008, 0)
+	return ax & 0xff
+}
+
+// Selector is a segment Selector.
+type Selector uint16
+
+// SegmentDescriptor is a segment descriptor.
+type SegmentDescriptor struct {
+	bits [2]uint32
+}
+
+// descriptorTable is a collection of descriptors.
+type descriptorTable [32]SegmentDescriptor
+
+// SegmentDescriptorFlags are typed flags within a descriptor.
+type SegmentDescriptorFlags uint32
+
+// SegmentDescriptorFlag declarations.
+const (
+	SegmentDescriptorAccess     SegmentDescriptorFlags = 1 << 8  // Access bit (always set).
+	SegmentDescriptorWrite                             = 1 << 9  // Write permission.
+	SegmentDescriptorExpandDown                        = 1 << 10 // Grows down, not used.
+	SegmentDescriptorExecute                           = 1 << 11 // Execute permission.
+	SegmentDescriptorSystem                            = 1 << 12 // Zero => system, 1 => user code/data.
+	SegmentDescriptorPresent                           = 1 << 15 // Present.
+	SegmentDescriptorAVL                               = 1 << 20 // Available.
+	SegmentDescriptorLong                              = 1 << 21 // Long mode.
+	SegmentDescriptorDB                                = 1 << 22 // 16 or 32-bit.
+	SegmentDescriptorG                                 = 1 << 23 // Granularity: page or byte.
+)
+
+// Base returns the descriptor's base linear address.
+func (d *SegmentDescriptor) Base() uint32 {
+	return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16
+}
+
+// Limit returns the descriptor size.
+func (d *SegmentDescriptor) Limit() uint32 {
+	l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000
+	if d.bits[1]&uint32(SegmentDescriptorG) != 0 {
+		l <<= 12
+		l |= 0xFFF
+	}
+	return l
+}
+
+// Flags returns descriptor flags.
+func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags {
+	return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00)
+}
+
+// DPL returns the descriptor privilege level.
+func (d *SegmentDescriptor) DPL() int {
+	return int((d.bits[1] >> 13) & 3)
+}
+
+func (d *SegmentDescriptor) setNull() {
+	d.bits[0] = 0
+	d.bits[1] = 0
+}
+
+func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) {
+	flags |= SegmentDescriptorPresent
+	if limit>>12 != 0 {
+		limit >>= 12
+		flags |= SegmentDescriptorG
+	}
+	d.bits[0] = base<<16 | limit&0xFFFF
+	d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13
+}
+
+func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) {
+	d.set(base, limit, dpl,
+		SegmentDescriptorDB|
+			SegmentDescriptorExecute|
+			SegmentDescriptorSystem)
+}
+
+func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) {
+	d.set(base, limit, dpl,
+		SegmentDescriptorG|
+			SegmentDescriptorLong|
+			SegmentDescriptorExecute|
+			SegmentDescriptorSystem)
+}
+
+func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) {
+	d.set(base, limit, dpl,
+		SegmentDescriptorWrite|
+			SegmentDescriptorSystem)
+}
+
+// setHi is only used for the TSS segment, which is magically 64-bits.
+func (d *SegmentDescriptor) setHi(base uint32) {
+	d.bits[0] = base
+	d.bits[1] = 0
+}
+
+// Gate64 is a 64-bit task, trap, or interrupt gate.
+type Gate64 struct {
+	bits [4]uint32
+}
+
+// idt64 is a 64-bit interrupt descriptor table.
+type idt64 [_NR_INTERRUPTS]Gate64
+
+func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) {
+	g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF
+	g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7
+	g.bits[2] = uint32(rip >> 32)
+}
+
+func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) {
+	g.setInterrupt(cs, rip, dpl, ist)
+	g.bits[1] |= 1 << 8
+}
+
+// TaskState64 is a 64-bit task state structure.
+type TaskState64 struct {
+	_              uint32
+	rsp0Lo, rsp0Hi uint32
+	rsp1Lo, rsp1Hi uint32
+	rsp2Lo, rsp2Hi uint32
+	_              [2]uint32
+	ist1Lo, ist1Hi uint32
+	ist2Lo, ist2Hi uint32
+	ist3Lo, ist3Hi uint32
+	ist4Lo, ist4Hi uint32
+	ist5Lo, ist5Hi uint32
+	ist6Lo, ist6Hi uint32
+	ist7Lo, ist7Hi uint32
+	_              [2]uint32
+	_              uint16
+	ioPerm         uint16
+}
-- 
cgit v1.2.3