summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/control/proc.go13
-rw-r--r--pkg/sentry/fs/host/socket.go2
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go9
-rw-r--r--pkg/sentry/fsimpl/host/socket.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go5
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go10
-rw-r--r--pkg/sentry/platform/kvm/BUILD12
-rw-r--r--pkg/sentry/platform/kvm/address_space.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_allocator.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64.go2
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go2
-rw-r--r--pkg/sentry/platform/kvm/context.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64_test.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm_arm64.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go4
-rw-r--r--pkg/sentry/platform/kvm/machine.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go4
-rw-r--r--pkg/sentry/platform/kvm/physical_map.go2
-rw-r--r--pkg/sentry/platform/ring0/BUILD85
-rw-r--r--pkg/sentry/platform/ring0/aarch64.go122
-rw-r--r--pkg/sentry/platform/ring0/defs.go112
-rw-r--r--pkg/sentry/platform/ring0/defs_amd64.go162
-rw-r--r--pkg/sentry/platform/ring0/defs_arm64.go139
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.go131
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.s371
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.go60
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.s769
-rw-r--r--pkg/sentry/platform/ring0/gen_offsets/BUILD40
-rw-r--r--pkg/sentry/platform/ring0/gen_offsets/main.go24
-rw-r--r--pkg/sentry/platform/ring0/kernel.go90
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go323
-rw-r--r--pkg/sentry/platform/ring0/kernel_arm64.go85
-rw-r--r--pkg/sentry/platform/ring0/kernel_unsafe.go41
-rw-r--r--pkg/sentry/platform/ring0/lib_amd64.go119
-rw-r--r--pkg/sentry/platform/ring0/lib_amd64.s200
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.go71
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.s180
-rw-r--r--pkg/sentry/platform/ring0/offsets_amd64.go100
-rw-r--r--pkg/sentry/platform/ring0/offsets_arm64.go124
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD84
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator.go127
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go53
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables.go310
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go215
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go75
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go75
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go70
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go80
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_test.go156
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86.go180
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids.go104
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go32
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s45
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86.go20
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_amd64.go307
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_arm64.go314
-rw-r--r--pkg/sentry/platform/ring0/ring0.go16
-rw-r--r--pkg/sentry/platform/ring0/x86.go296
-rw-r--r--pkg/sentry/socket/netstack/netstack.go102
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go4
-rw-r--r--pkg/sentry/socket/netstack/provider.go2
-rw-r--r--pkg/sentry/socket/netstack/provider_vfs2.go2
-rw-r--r--pkg/sentry/socket/socket.go8
-rw-r--r--pkg/sentry/socket/unix/transport/connectioned.go2
-rw-r--r--pkg/sentry/socket/unix/transport/unix.go58
-rw-r--r--pkg/sentry/syscalls/linux/error.go4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go13
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/ioctl.go4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/sync.go4
-rw-r--r--pkg/sentry/vfs/BUILD1
-rw-r--r--pkg/sentry/vfs/opath.go139
-rw-r--r--pkg/sentry/vfs/options.go2
-rw-r--r--pkg/sentry/vfs/vfs.go12
80 files changed, 342 insertions, 6025 deletions
diff --git a/pkg/sentry/control/proc.go b/pkg/sentry/control/proc.go
index 1d88db12f..de7a0f3ab 100644
--- a/pkg/sentry/control/proc.go
+++ b/pkg/sentry/control/proc.go
@@ -404,3 +404,16 @@ func ttyName(tty *kernel.TTY) string {
}
return fmt.Sprintf("pts/%d", tty.Index)
}
+
+// ContainerUsage retrieves per-container CPU usage.
+func ContainerUsage(kr *kernel.Kernel) map[string]uint64 {
+ cusage := make(map[string]uint64)
+ for _, tg := range kr.TaskSet().Root.ThreadGroups() {
+ // We want each tg's usage including reaped children.
+ cid := tg.Leader().ContainerID()
+ stats := tg.CPUStats()
+ stats.Accumulate(tg.JoinedChildCPUStats())
+ cusage[cid] += uint64(stats.UserTime.Nanoseconds()) + uint64(stats.SysTime.Nanoseconds())
+ }
+ return cusage
+}
diff --git a/pkg/sentry/fs/host/socket.go b/pkg/sentry/fs/host/socket.go
index a2f3d5918..07b4fb70f 100644
--- a/pkg/sentry/fs/host/socket.go
+++ b/pkg/sentry/fs/host/socket.go
@@ -257,7 +257,7 @@ func (c *ConnectedEndpoint) Passcred() bool {
}
// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress.
-func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
return tcpip.FullAddress{Addr: tcpip.Address(c.path)}, nil
}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 089955a96..ae972fcb5 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -299,10 +299,15 @@ func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, off
src = src.TakeFirst64(limit)
}
- // Do a buffered write. See rationale in PRead.
if d.cachedMetadataAuthoritative() {
- d.touchCMtime()
+ if fd.isRegularFile {
+ d.touchCMtimeLocked()
+ } else {
+ d.touchCMtime()
+ }
}
+
+ // Do a buffered write. See rationale in PRead.
buf := make([]byte, src.NumBytes())
copied, copyErr := src.CopyIn(ctx, buf)
if copied == 0 && copyErr != nil {
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 60acc367f..72aa535f8 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -201,7 +201,7 @@ func (c *ConnectedEndpoint) Passcred() bool {
}
// GetLocalAddress implements transport.ConnectedEndpoint.GetLocalAddress.
-func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (c *ConnectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
return tcpip.FullAddress{Addr: tcpip.Address(c.addr)}, nil
}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index eac578f25..8139bff76 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -371,6 +371,8 @@ type OrderedChildrenOptions struct {
// OrderedChildren may modify the tracked children. This applies to
// operations related to rename, unlink and rmdir. If an OrderedChildren is
// not writable, these operations all fail with EPERM.
+ //
+ // Note that writable users must implement the sticky bit (I_SVTX).
Writable bool
}
@@ -556,7 +558,6 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode)
return err
}
- // TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
o.removeLocked(name)
return nil
}
@@ -603,8 +604,8 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c
if err := o.checkExistingLocked(oldname, child); err != nil {
return err
}
+ o.removeLocked(oldname)
- // TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
dst.replaceChildLocked(ctx, newname, child)
return nil
}
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index 3b6336e94..09c0ccaf2 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -368,17 +368,15 @@ func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars usermem.AddrRangeSeq, dst
})
}
-// CopyOutFrom implements usermem.IO.CopyOutFrom.
+// CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's
+// responsibility to call fd.pipe.Notify(waiter.EventIn) after the write is
+// completed.
//
// Preconditions: fd.pipe.mu must be locked.
func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars usermem.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
- n, err := fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) {
+ return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) {
return src.ReadToBlocks(dsts)
})
- if n > 0 {
- fd.pipe.Notify(waiter.EventIn)
- }
- return n, err
}
// SwapUint32 implements usermem.IO.SwapUint32.
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 8ce411102..b3290917e 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -45,14 +45,14 @@ go_library(
"//pkg/cpuid",
"//pkg/log",
"//pkg/procid",
+ "//pkg/ring0",
+ "//pkg/ring0/pagetables",
"//pkg/safecopy",
"//pkg/seccomp",
"//pkg/sentry/arch",
"//pkg/sentry/memmap",
"//pkg/sentry/platform",
"//pkg/sentry/platform/interrupt",
- "//pkg/sentry/platform/ring0",
- "//pkg/sentry/platform/ring0/pagetables",
"//pkg/sentry/time",
"//pkg/sync",
"//pkg/usermem",
@@ -75,11 +75,11 @@ go_test(
"requires-kvm",
],
deps = [
+ "//pkg/ring0",
+ "//pkg/ring0/pagetables",
"//pkg/sentry/arch",
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm/testutil",
- "//pkg/sentry/platform/ring0",
- "//pkg/sentry/platform/ring0/pagetables",
"//pkg/sentry/time",
"//pkg/usermem",
],
@@ -89,6 +89,6 @@ genrule(
name = "bluepill_impl_amd64",
srcs = ["bluepill_amd64.s"],
outs = ["bluepill_impl_amd64.s"],
- cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@",
- tools = ["//pkg/sentry/platform/ring0/gen_offsets"],
+ cmd = "(echo -e '// build +amd64\\n' && $(location //pkg/ring0/gen_offsets) && cat $(SRCS)) > $@",
+ tools = ["//pkg/ring0/gen_offsets"],
)
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index af5c5e191..25c21e843 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -18,9 +18,9 @@ import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/atomicbitops"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index 4b23f7803..2c970162e 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -19,9 +19,9 @@ import (
"reflect"
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/safecopy"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// bluepill enters guest mode.
diff --git a/pkg/sentry/platform/kvm/bluepill_allocator.go b/pkg/sentry/platform/kvm/bluepill_allocator.go
index 9485e1301..1825edc3a 100644
--- a/pkg/sentry/platform/kvm/bluepill_allocator.go
+++ b/pkg/sentry/platform/kvm/bluepill_allocator.go
@@ -17,7 +17,7 @@ package kvm
import (
"fmt"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
)
type allocator struct {
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index ddc1554d5..83a4766fb 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -19,8 +19,8 @@ package kvm
import (
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
var (
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
index f8ccb7430..0063e947b 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
@@ -20,8 +20,8 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// dieArchSetup initializes the state for dieTrampoline.
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index 1f09813ba..35298135a 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -19,8 +19,8 @@ package kvm
import (
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
var (
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
index 4d912769a..dbbf2a897 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go
@@ -20,8 +20,8 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
// fpsimdPtr returns a fpsimd64 for the given address.
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index 17268d127..aeae01dbd 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -18,10 +18,10 @@ import (
"sync/atomic"
pkgcontext "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index 5979aef97..7bdf57436 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -20,9 +20,9 @@ import (
"os"
"syscall"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go
index 093497bc4..b9ed4a706 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64.go
@@ -18,7 +18,7 @@ package kvm
import (
"gvisor.dev/gvisor/pkg/cpuid"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/ring0"
)
// userRegs represents KVM user registers.
diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go
index c0b4fd374..76fc594a0 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64_test.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go
@@ -19,11 +19,11 @@ package kvm
import (
"testing"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
)
func TestSegments(t *testing.T) {
diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go
index 9db1db4e9..b73340f0e 100644
--- a/pkg/sentry/platform/kvm/kvm_arm64.go
+++ b/pkg/sentry/platform/kvm/kvm_arm64.go
@@ -17,8 +17,8 @@
package kvm
import (
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
)
type kvmOneReg struct {
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index a650877d6..11ca1f0ea 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -22,11 +22,11 @@ import (
"testing"
"time"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index e2fffc99b..1ece1b8d8 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -23,8 +23,8 @@ import (
"gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/procid"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/usermem"
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 8e03c310d..59c752d73 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -24,10 +24,10 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
ktime "gvisor.dev/gvisor/pkg/sentry/time"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index aa2d21748..7d7857067 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -17,10 +17,10 @@
package kvm
import (
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index a466acf4d..dca0cdb60 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -23,10 +23,10 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/ring0"
+ "gvisor.dev/gvisor/pkg/ring0/pagetables"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go
index f7fa2f98d..8bdec93ae 100644
--- a/pkg/sentry/platform/kvm/physical_map.go
+++ b/pkg/sentry/platform/kvm/physical_map.go
@@ -20,7 +20,7 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.dev/gvisor/pkg/ring0"
"gvisor.dev/gvisor/pkg/usermem"
)
diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD
deleted file mode 100644
index 2852b7387..000000000
--- a/pkg/sentry/platform/ring0/BUILD
+++ /dev/null
@@ -1,85 +0,0 @@
-load("//tools:defs.bzl", "arch_genrule", "go_library")
-load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance")
-
-package(licenses = ["notice"])
-
-go_template(
- name = "defs_amd64",
- srcs = [
- "defs.go",
- "defs_amd64.go",
- "offsets_amd64.go",
- "x86.go",
- ],
- visibility = [":__subpackages__"],
-)
-
-go_template(
- name = "defs_arm64",
- srcs = [
- "aarch64.go",
- "defs.go",
- "defs_arm64.go",
- "offsets_arm64.go",
- ],
- visibility = [":__subpackages__"],
-)
-
-go_template_instance(
- name = "defs_impl_amd64",
- out = "defs_impl_amd64.go",
- package = "ring0",
- template = ":defs_amd64",
-)
-
-go_template_instance(
- name = "defs_impl_arm64",
- out = "defs_impl_arm64.go",
- package = "ring0",
- template = ":defs_arm64",
-)
-
-arch_genrule(
- name = "entry_impl_amd64",
- srcs = ["entry_amd64.s"],
- outs = ["entry_impl_amd64.s"],
- cmd = "(echo -e '// build +amd64\\n' && QEMU $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(location entry_amd64.s)) > $@",
- tools = ["//pkg/sentry/platform/ring0/gen_offsets"],
-)
-
-arch_genrule(
- name = "entry_impl_arm64",
- srcs = ["entry_arm64.s"],
- outs = ["entry_impl_arm64.s"],
- cmd = "(echo -e '// build +arm64\\n' && QEMU $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(location entry_arm64.s)) > $@",
- tools = ["//pkg/sentry/platform/ring0/gen_offsets"],
-)
-
-go_library(
- name = "ring0",
- srcs = [
- "defs_impl_amd64.go",
- "defs_impl_arm64.go",
- "entry_amd64.go",
- "entry_arm64.go",
- "entry_impl_amd64.s",
- "entry_impl_arm64.s",
- "kernel.go",
- "kernel_amd64.go",
- "kernel_arm64.go",
- "kernel_unsafe.go",
- "lib_amd64.go",
- "lib_amd64.s",
- "lib_arm64.go",
- "lib_arm64.s",
- "ring0.go",
- ],
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/cpuid",
- "//pkg/safecopy",
- "//pkg/sentry/arch",
- "//pkg/sentry/platform/ring0/pagetables",
- "//pkg/usermem",
- ],
-)
diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/sentry/platform/ring0/aarch64.go
deleted file mode 100644
index 3bda594f9..000000000
--- a/pkg/sentry/platform/ring0/aarch64.go
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// Useful bits.
-const (
- _PGD_PGT_BASE = 0x1000
- _PGD_PGT_SIZE = 0x1000
- _PUD_PGT_BASE = 0x2000
- _PUD_PGT_SIZE = 0x1000
- _PMD_PGT_BASE = 0x3000
- _PMD_PGT_SIZE = 0x4000
- _PTE_PGT_BASE = 0x7000
- _PTE_PGT_SIZE = 0x1000
-)
-
-const (
- // DAIF bits:debug, sError, IRQ, FIQ.
- _PSR_D_BIT = 0x00000200
- _PSR_A_BIT = 0x00000100
- _PSR_I_BIT = 0x00000080
- _PSR_F_BIT = 0x00000040
- _PSR_DAIF_SHIFT = 6
- _PSR_DAIF_MASK = 0xf << _PSR_DAIF_SHIFT
-
- // PSR bits.
- _PSR_MODE_EL0t = 0x00000000
- _PSR_MODE_EL1t = 0x00000004
- _PSR_MODE_EL1h = 0x00000005
- _PSR_MODE_MASK = 0x0000000f
-
- PsrFlagsClear = _PSR_MODE_MASK | _PSR_DAIF_MASK
- PsrModeMask = _PSR_MODE_MASK
-
- // KernelFlagsSet should always be set in the kernel.
- KernelFlagsSet = _PSR_MODE_EL1h | _PSR_D_BIT | _PSR_A_BIT | _PSR_I_BIT | _PSR_F_BIT
-
- // UserFlagsSet are always set in userspace.
- UserFlagsSet = _PSR_MODE_EL0t
-)
-
-// Vector is an exception vector.
-type Vector uintptr
-
-// Exception vectors.
-const (
- El1InvSync = iota
- El1InvIrq
- El1InvFiq
- El1InvError
-
- El1Sync
- El1Irq
- El1Fiq
- El1Err
-
- El0Sync
- El0Irq
- El0Fiq
- El0Err
-
- El0InvSync
- El0InvIrq
- El0InvFiq
- El0InvErr
-
- El1SyncDa
- El1SyncIa
- El1SyncSpPc
- El1SyncUndef
- El1SyncDbg
- El1SyncInv
-
- El0SyncSVC
- El0SyncDa
- El0SyncIa
- El0SyncFpsimdAcc
- El0SyncSveAcc
- El0SyncFpsimdExc
- El0SyncSys
- El0SyncSpPc
- El0SyncUndef
- El0SyncDbg
- El0SyncWfx
- El0SyncInv
-
- El0ErrNMI
- El0ErrBounce
-
- _NR_INTERRUPTS
-)
-
-// System call vectors.
-const (
- Syscall Vector = El0SyncSVC
- PageFault Vector = El0SyncDa
- VirtualizationException Vector = El0ErrBounce
-)
-
-// VirtualAddressBits returns the number bits available for virtual addresses.
-func VirtualAddressBits() uint32 {
- return 48
-}
-
-// PhysicalAddressBits returns the number of bits available for physical addresses.
-func PhysicalAddressBits() uint32 {
- return 40
-}
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
deleted file mode 100644
index f9765771e..000000000
--- a/pkg/sentry/platform/ring0/defs.go
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
-)
-
-// Kernel is a global kernel object.
-//
-// This contains global state, shared by multiple CPUs.
-type Kernel struct {
- // PageTables are the kernel pagetables; this must be provided.
- PageTables *pagetables.PageTables
-
- KernelArchState
-}
-
-// Hooks are hooks for kernel functions.
-type Hooks interface {
- // KernelSyscall is called for kernel system calls.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelSyscall()
-
- // KernelException handles an exception during kernel execution.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelException(Vector)
-}
-
-// CPU is the per-CPU struct.
-type CPU struct {
- // self is a self reference.
- //
- // This is always guaranteed to be at offset zero.
- self *CPU
-
- // kernel is reference to the kernel that this CPU was initialized
- // with. This reference is kept for garbage collection purposes: CPU
- // registers may refer to objects within the Kernel object that cannot
- // be safely freed.
- kernel *Kernel
-
- // CPUArchState is architecture-specific state.
- CPUArchState
-
- // registers is a set of registers; these may be used on kernel system
- // calls and exceptions via the Registers function.
- registers arch.Registers
-
- // hooks are kernel hooks.
- hooks Hooks
-}
-
-// Registers returns a modifiable-copy of the kernel registers.
-//
-// This is explicitly safe to call during KernelException and KernelSyscall.
-//
-//go:nosplit
-func (c *CPU) Registers() *arch.Registers {
- return &c.registers
-}
-
-// SwitchOpts are passed to the Switch function.
-type SwitchOpts struct {
- // Registers are the user register state.
- Registers *arch.Registers
-
- // FloatingPointState is a byte pointer where floating point state is
- // saved and restored.
- FloatingPointState *byte
-
- // PageTables are the application page tables.
- PageTables *pagetables.PageTables
-
- // Flush indicates that a TLB flush should be forced on switch.
- Flush bool
-
- // FullRestore indicates that an iret-based restore should be used.
- FullRestore bool
-
- // SwitchArchOpts are architecture-specific options.
- SwitchArchOpts
-}
diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go
deleted file mode 100644
index 7a2275558..000000000
--- a/pkg/sentry/platform/ring0/defs_amd64.go
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-var (
- // UserspaceSize is the total size of userspace.
- UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1)
-
- // MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
-
- // KernelStartAddress is the starting kernel address.
- KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
-)
-
-// Segment indices and Selectors.
-const (
- // Index into GDT array.
- _ = iota // Null descriptor first.
- _ // Reserved (Linux is kernel 32).
- segKcode // Kernel code (64-bit).
- segKdata // Kernel data.
- segUcode32 // User code (32-bit).
- segUdata // User data.
- segUcode64 // User code (64-bit).
- segTss // Task segment descriptor.
- segTssHi // Upper bits for TSS.
- segLast // Last segment (terminal, not included).
-)
-
-// Selectors.
-const (
- Kcode Selector = segKcode << 3
- Kdata Selector = segKdata << 3
- Ucode32 Selector = (segUcode32 << 3) | 3
- Udata Selector = (segUdata << 3) | 3
- Ucode64 Selector = (segUcode64 << 3) | 3
- Tss Selector = segTss << 3
-)
-
-// Standard segments.
-var (
- UserCodeSegment32 SegmentDescriptor
- UserDataSegment SegmentDescriptor
- UserCodeSegment64 SegmentDescriptor
- KernelCodeSegment SegmentDescriptor
- KernelDataSegment SegmentDescriptor
-)
-
-// KernelArchState contains architecture-specific state.
-type KernelArchState struct {
- // cpuEntries is array of kernelEntry for all cpus.
- cpuEntries []kernelEntry
-
- // globalIDT is our set of interrupt gates.
- globalIDT *idt64
-}
-
-// kernelEntry contains minimal CPU-specific arch state
-// that can be mapped at the upper of the address space.
-// Malicious APP might steal info from it via CPU bugs.
-type kernelEntry struct {
- // stack is the stack used for interrupts on this CPU.
- stack [256]byte
-
- // scratch space for temporary usage.
- scratch0 uint64
-
- // stackTop is the top of the stack.
- stackTop uint64
-
- // cpuSelf is back reference to CPU.
- cpuSelf *CPU
-
- // kernelCR3 is the cr3 used for sentry kernel.
- kernelCR3 uintptr
-
- // gdt is the CPU's descriptor table.
- gdt descriptorTable
-
- // tss is the CPU's task state.
- tss TaskState64
-}
-
-// CPUArchState contains CPU-specific arch state.
-type CPUArchState struct {
- // errorCode is the error code from the last exception.
- errorCode uintptr
-
- // errorType indicates the type of error code here, it is always set
- // along with the errorCode value above.
- //
- // It will either by 1, which indicates a user error, or 0 indicating a
- // kernel error. If the error code below returns false (kernel error),
- // then it cannot provide relevant information about the last
- // exception.
- errorType uintptr
-
- *kernelEntry
-}
-
-// ErrorCode returns the last error code.
-//
-// The returned boolean indicates whether the error code corresponds to the
-// last user error or not. If it does not, then fault information must be
-// ignored. This is generally the result of a kernel fault while servicing a
-// user fault.
-//
-//go:nosplit
-func (c *CPU) ErrorCode() (value uintptr, user bool) {
- return c.errorCode, c.errorType != 0
-}
-
-// ClearErrorCode resets the error code.
-//
-//go:nosplit
-func (c *CPU) ClearErrorCode() {
- c.errorCode = 0 // No code.
- c.errorType = 1 // User mode.
-}
-
-// SwitchArchOpts are embedded in SwitchOpts.
-type SwitchArchOpts struct {
- // UserPCID indicates that the application PCID to be used on switch,
- // assuming that PCIDs are supported.
- //
- // Per pagetables_x86.go, a zero PCID implies a flush.
- UserPCID uint16
-
- // KernelPCID indicates that the kernel PCID to be used on return,
- // assuming that PCIDs are supported.
- //
- // Per pagetables_x86.go, a zero PCID implies a flush.
- KernelPCID uint16
-}
-
-func init() {
- KernelCodeSegment.setCode64(0, 0, 0)
- KernelDataSegment.setData(0, 0xffffffff, 0)
- UserCodeSegment32.setCode64(0, 0, 3)
- UserDataSegment.setData(0, 0xffffffff, 3)
- UserCodeSegment64.setCode64(0, 0, 3)
-}
diff --git a/pkg/sentry/platform/ring0/defs_arm64.go b/pkg/sentry/platform/ring0/defs_arm64.go
deleted file mode 100644
index a014dcbc0..000000000
--- a/pkg/sentry/platform/ring0/defs_arm64.go
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-var (
- // UserspaceSize is the total size of userspace.
- UserspaceSize = uintptr(1) << (VirtualAddressBits())
-
- // MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
-
- // KernelStartAddress is the starting kernel address.
- KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
-)
-
-// KernelArchState contains architecture-specific state.
-type KernelArchState struct {
-}
-
-// CPUArchState contains CPU-specific arch state.
-type CPUArchState struct {
- // stack is the stack used for interrupts on this CPU.
- stack [512]byte
-
- // errorCode is the error code from the last exception.
- errorCode uintptr
-
- // errorType indicates the type of error code here, it is always set
- // along with the errorCode value above.
- //
- // It will either by 1, which indicates a user error, or 0 indicating a
- // kernel error. If the error code below returns false (kernel error),
- // then it cannot provide relevant information about the last
- // exception.
- errorType uintptr
-
- // faultAddr is the value of far_el1.
- faultAddr uintptr
-
- // ttbr0Kvm is the value of ttbr0_el1 for sentry.
- ttbr0Kvm uintptr
-
- // ttbr0App is the value of ttbr0_el1 for applicaton.
- ttbr0App uintptr
-
- // exception vector.
- vecCode Vector
-
- // application context pointer.
- appAddr uintptr
-
- // lazyVFP is the value of cpacr_el1.
- lazyVFP uintptr
-
- // appASID is the asid value of guest application.
- appASID uintptr
-}
-
-// ErrorCode returns the last error code.
-//
-// The returned boolean indicates whether the error code corresponds to the
-// last user error or not. If it does not, then fault information must be
-// ignored. This is generally the result of a kernel fault while servicing a
-// user fault.
-//
-//go:nosplit
-func (c *CPU) ErrorCode() (value uintptr, user bool) {
- return c.errorCode, c.errorType != 0
-}
-
-// ClearErrorCode resets the error code.
-//
-//go:nosplit
-func (c *CPU) ClearErrorCode() {
- c.errorCode = 0 // No code.
- c.errorType = 1 // User mode.
-}
-
-//go:nosplit
-func (c *CPU) GetFaultAddr() (value uintptr) {
- return c.faultAddr
-}
-
-//go:nosplit
-func (c *CPU) SetTtbr0Kvm(value uintptr) {
- c.ttbr0Kvm = value
-}
-
-//go:nosplit
-func (c *CPU) SetTtbr0App(value uintptr) {
- c.ttbr0App = value
-}
-
-//go:nosplit
-func (c *CPU) GetVector() (value Vector) {
- return c.vecCode
-}
-
-//go:nosplit
-func (c *CPU) SetAppAddr(value uintptr) {
- c.appAddr = value
-}
-
-// GetLazyVFP returns the value of cpacr_el1.
-//go:nosplit
-func (c *CPU) GetLazyVFP() (value uintptr) {
- return c.lazyVFP
-}
-
-// SwitchArchOpts are embedded in SwitchOpts.
-type SwitchArchOpts struct {
- // UserASID indicates that the application ASID to be used on switch,
- UserASID uint16
-
- // KernelASID indicates that the kernel ASID to be used on return,
- KernelASID uint16
-}
-
-func init() {
-}
diff --git a/pkg/sentry/platform/ring0/entry_amd64.go b/pkg/sentry/platform/ring0/entry_amd64.go
deleted file mode 100644
index d87b1fd00..000000000
--- a/pkg/sentry/platform/ring0/entry_amd64.go
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/sentry/arch"
-)
-
-// This is an assembly function.
-//
-// The sysenter function is invoked in two situations:
-//
-// (1) The guest kernel has executed a system call.
-// (2) The guest application has executed a system call.
-//
-// The interrupt flag is examined to determine whether the system call was
-// executed from kernel mode or not and the appropriate stub is called.
-func sysenter()
-
-// swapgs swaps the current GS value.
-//
-// This must be called prior to sysret/iret.
-func swapgs()
-
-// jumpToKernel jumps to the kernel version of the current RIP.
-func jumpToKernel()
-
-// sysret returns to userspace from a system call.
-//
-// The return code is the vector that interrupted execution.
-//
-// See stubs.go for a note regarding the frame size of this function.
-func sysret(cpu *CPU, regs *arch.Registers, userCR3 uintptr) Vector
-
-// "iret is the cadillac of CPL switching."
-//
-// -- Neel Natu
-//
-// iret is nearly identical to sysret, except an iret is used to fully restore
-// all user state. This must be called in cases where all registers need to be
-// restored.
-func iret(cpu *CPU, regs *arch.Registers, userCR3 uintptr) Vector
-
-// exception is the generic exception entry.
-//
-// This is called by the individual stub definitions.
-func exception()
-
-// resume is a stub that restores the CPU kernel registers.
-//
-// This is used when processing kernel exceptions and syscalls.
-func resume()
-
-// Start is the CPU entrypoint.
-//
-// The following start conditions must be satisfied:
-//
-// * AX should contain the CPU pointer.
-// * c.GDT() should be loaded as the GDT.
-// * c.IDT() should be loaded as the IDT.
-// * c.CR0() should be the current CR0 value.
-// * c.CR3() should be set to the kernel PageTables.
-// * c.CR4() should be the current CR4 value.
-// * c.EFER() should be the current EFER value.
-//
-// The CPU state will be set to c.Registers().
-func Start()
-
-// Exception stubs.
-func divideByZero()
-func debug()
-func nmi()
-func breakpoint()
-func overflow()
-func boundRangeExceeded()
-func invalidOpcode()
-func deviceNotAvailable()
-func doubleFault()
-func coprocessorSegmentOverrun()
-func invalidTSS()
-func segmentNotPresent()
-func stackSegmentFault()
-func generalProtectionFault()
-func pageFault()
-func x87FloatingPointException()
-func alignmentCheck()
-func machineCheck()
-func simdFloatingPointException()
-func virtualizationException()
-func securityException()
-func syscallInt80()
-
-// Exception handler index.
-var handlers = map[Vector]func(){
- DivideByZero: divideByZero,
- Debug: debug,
- NMI: nmi,
- Breakpoint: breakpoint,
- Overflow: overflow,
- BoundRangeExceeded: boundRangeExceeded,
- InvalidOpcode: invalidOpcode,
- DeviceNotAvailable: deviceNotAvailable,
- DoubleFault: doubleFault,
- CoprocessorSegmentOverrun: coprocessorSegmentOverrun,
- InvalidTSS: invalidTSS,
- SegmentNotPresent: segmentNotPresent,
- StackSegmentFault: stackSegmentFault,
- GeneralProtectionFault: generalProtectionFault,
- PageFault: pageFault,
- X87FloatingPointException: x87FloatingPointException,
- AlignmentCheck: alignmentCheck,
- MachineCheck: machineCheck,
- SIMDFloatingPointException: simdFloatingPointException,
- VirtualizationException: virtualizationException,
- SecurityException: securityException,
- SyscallInt80: syscallInt80,
-}
diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s
deleted file mode 100644
index f59747df3..000000000
--- a/pkg/sentry/platform/ring0/entry_amd64.s
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-// NB: Offsets are programmatically generated (see BUILD).
-//
-// This file is concatenated with the definitions.
-
-// Saves a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not saved: AX, SP, IP, FLAGS, all segments.
-#define REGISTERS_SAVE(reg, offset) \
- MOVQ R15, offset+PTRACE_R15(reg); \
- MOVQ R14, offset+PTRACE_R14(reg); \
- MOVQ R13, offset+PTRACE_R13(reg); \
- MOVQ R12, offset+PTRACE_R12(reg); \
- MOVQ BP, offset+PTRACE_RBP(reg); \
- MOVQ BX, offset+PTRACE_RBX(reg); \
- MOVQ CX, offset+PTRACE_RCX(reg); \
- MOVQ DX, offset+PTRACE_RDX(reg); \
- MOVQ R11, offset+PTRACE_R11(reg); \
- MOVQ R10, offset+PTRACE_R10(reg); \
- MOVQ R9, offset+PTRACE_R9(reg); \
- MOVQ R8, offset+PTRACE_R8(reg); \
- MOVQ SI, offset+PTRACE_RSI(reg); \
- MOVQ DI, offset+PTRACE_RDI(reg);
-
-// Loads a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not loaded: AX, SP, IP, FLAGS, all segments.
-#define REGISTERS_LOAD(reg, offset) \
- MOVQ offset+PTRACE_R15(reg), R15; \
- MOVQ offset+PTRACE_R14(reg), R14; \
- MOVQ offset+PTRACE_R13(reg), R13; \
- MOVQ offset+PTRACE_R12(reg), R12; \
- MOVQ offset+PTRACE_RBP(reg), BP; \
- MOVQ offset+PTRACE_RBX(reg), BX; \
- MOVQ offset+PTRACE_RCX(reg), CX; \
- MOVQ offset+PTRACE_RDX(reg), DX; \
- MOVQ offset+PTRACE_R11(reg), R11; \
- MOVQ offset+PTRACE_R10(reg), R10; \
- MOVQ offset+PTRACE_R9(reg), R9; \
- MOVQ offset+PTRACE_R8(reg), R8; \
- MOVQ offset+PTRACE_RSI(reg), SI; \
- MOVQ offset+PTRACE_RDI(reg), DI;
-
-// WRITE_CR3() writes the given CR3 value.
-//
-// The code corresponds to:
-//
-// mov %rax, %cr3
-//
-#define WRITE_CR3() \
- BYTE $0x0f; BYTE $0x22; BYTE $0xd8;
-
-// SWAP_GS swaps the kernel GS (CPU).
-#define SWAP_GS() \
- BYTE $0x0F; BYTE $0x01; BYTE $0xf8;
-
-// IRET returns from an interrupt frame.
-#define IRET() \
- BYTE $0x48; BYTE $0xcf;
-
-// SYSRET64 executes the sysret instruction.
-#define SYSRET64() \
- BYTE $0x48; BYTE $0x0f; BYTE $0x07;
-
-// LOAD_KERNEL_STACK loads the kernel stack.
-#define LOAD_KERNEL_STACK(entry) \
- MOVQ ENTRY_STACK_TOP(entry), SP;
-
-// See kernel.go.
-TEXT ·Halt(SB),NOSPLIT,$0
- HLT
- RET
-
-// See entry_amd64.go.
-TEXT ·swapgs(SB),NOSPLIT,$0
- SWAP_GS()
- RET
-
-// jumpToKernel changes execution to the kernel address space.
-//
-// This works by changing the return value to the kernel version.
-TEXT ·jumpToKernel(SB),NOSPLIT,$0
- MOVQ 0(SP), AX
- ORQ ·KernelStartAddress(SB), AX // Future return value.
- MOVQ AX, 0(SP)
- RET
-
-// See entry_amd64.go.
-TEXT ·sysret(SB),NOSPLIT,$0-24
- CALL ·jumpToKernel(SB)
- // Save original state and stack. sysenter() or exception()
- // from APP(gr3) will switch to this stack, set the return
- // value (vector: 32(SP)) and then do RET, which will also
- // automatically return to the lower half.
- MOVQ cpu+0(FP), BX
- MOVQ regs+8(FP), AX
- MOVQ userCR3+16(FP), CX
- MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
- MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
- MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
-
- // save SP AX userCR3 on the kernel stack.
- MOVQ CPU_ENTRY(BX), BX
- LOAD_KERNEL_STACK(BX)
- PUSHQ PTRACE_RSP(AX)
- PUSHQ PTRACE_RAX(AX)
- PUSHQ CX
-
- // Restore user register state.
- REGISTERS_LOAD(AX, 0)
- MOVQ PTRACE_RIP(AX), CX // Needed for SYSRET.
- MOVQ PTRACE_FLAGS(AX), R11 // Needed for SYSRET.
-
- // restore userCR3, AX, SP.
- POPQ AX // Get userCR3.
- WRITE_CR3() // Switch to userCR3.
- POPQ AX // Restore AX.
- POPQ SP // Restore SP.
- SYSRET64()
-
-// See entry_amd64.go.
-TEXT ·iret(SB),NOSPLIT,$0-24
- CALL ·jumpToKernel(SB)
- // Save original state and stack. sysenter() or exception()
- // from APP(gr3) will switch to this stack, set the return
- // value (vector: 32(SP)) and then do RET, which will also
- // automatically return to the lower half.
- MOVQ cpu+0(FP), BX
- MOVQ regs+8(FP), AX
- MOVQ userCR3+16(FP), CX
- MOVQ SP, CPU_REGISTERS+PTRACE_RSP(BX)
- MOVQ BP, CPU_REGISTERS+PTRACE_RBP(BX)
- MOVQ AX, CPU_REGISTERS+PTRACE_RAX(BX)
-
- // Build an IRET frame & restore state.
- MOVQ CPU_ENTRY(BX), BX
- LOAD_KERNEL_STACK(BX)
- PUSHQ PTRACE_SS(AX)
- PUSHQ PTRACE_RSP(AX)
- PUSHQ PTRACE_FLAGS(AX)
- PUSHQ PTRACE_CS(AX)
- PUSHQ PTRACE_RIP(AX)
- PUSHQ PTRACE_RAX(AX) // Save AX on kernel stack.
- PUSHQ CX // Save userCR3 on kernel stack.
- REGISTERS_LOAD(AX, 0) // Restore most registers.
- POPQ AX // Get userCR3.
- WRITE_CR3() // Switch to userCR3.
- POPQ AX // Restore AX.
- IRET()
-
-// See entry_amd64.go.
-TEXT ·resume(SB),NOSPLIT,$0
- // See iret, above.
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- PUSHQ CPU_REGISTERS+PTRACE_SS(AX)
- PUSHQ CPU_REGISTERS+PTRACE_RSP(AX)
- PUSHQ CPU_REGISTERS+PTRACE_FLAGS(AX)
- PUSHQ CPU_REGISTERS+PTRACE_CS(AX)
- PUSHQ CPU_REGISTERS+PTRACE_RIP(AX)
- REGISTERS_LOAD(AX, CPU_REGISTERS)
- MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX
- IRET()
-
-// See entry_amd64.go.
-TEXT ·Start(SB),NOSPLIT,$0
- PUSHQ $0x0 // Previous frame pointer.
- MOVQ SP, BP // Set frame pointer.
- PUSHQ AX // First argument (CPU).
- CALL ·start(SB) // Call Go hook.
- JMP ·resume(SB) // Restore to registers.
-
-// See entry_amd64.go.
-TEXT ·sysenter(SB),NOSPLIT,$0
- // _RFLAGS_IOPL0 is always set in the user mode and it is never set in
- // the kernel mode. See the comment of UserFlagsSet for more details.
- TESTL $_RFLAGS_IOPL0, R11
- JZ kernel
-user:
- SWAP_GS()
- MOVQ AX, ENTRY_SCRATCH0(GS) // Save user AX on scratch.
- MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX.
- WRITE_CR3() // Switch to kernel cr3.
-
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs.
- REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX.
- MOVQ CX, PTRACE_RIP(AX)
- MOVQ R11, PTRACE_FLAGS(AX)
- MOVQ SP, PTRACE_RSP(AX)
- MOVQ ENTRY_SCRATCH0(GS), CX // Load saved user AX value.
- MOVQ CX, PTRACE_RAX(AX) // Save everything else.
- MOVQ CX, PTRACE_ORIGRAX(AX)
-
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Get stacks.
- MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
- MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
-
- // Return to the kernel, where the frame is:
- //
- // vector (sp+32)
- // userCR3 (sp+24)
- // regs (sp+16)
- // cpu (sp+8)
- // vcpu.Switch (sp+0)
- //
- MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
- MOVQ $Syscall, 32(SP) // Output vector.
- RET
-
-kernel:
- // We can't restore the original stack, but we can access the registers
- // in the CPU state directly. No need for temporary juggling.
- MOVQ AX, ENTRY_SCRATCH0(GS)
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- REGISTERS_SAVE(AX, CPU_REGISTERS)
- MOVQ CX, CPU_REGISTERS+PTRACE_RIP(AX)
- MOVQ R11, CPU_REGISTERS+PTRACE_FLAGS(AX)
- MOVQ SP, CPU_REGISTERS+PTRACE_RSP(AX)
- MOVQ ENTRY_SCRATCH0(GS), BX
- MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
- MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
- MOVQ $0, CPU_ERROR_CODE(AX) // Clear error code.
- MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
-
- // Call the syscall trampoline.
- LOAD_KERNEL_STACK(GS)
- PUSHQ AX // First argument (vCPU).
- CALL ·kernelSyscall(SB) // Call the trampoline.
- POPQ AX // Pop vCPU.
- JMP ·resume(SB)
-
-// exception is a generic exception handler.
-//
-// There are two cases handled:
-//
-// 1) An exception in kernel mode: this results in saving the state at the time
-// of the exception and calling the defined hook.
-//
-// 2) An exception in guest mode: the original kernel frame is restored, and
-// the vector & error codes are pushed as return values.
-//
-// See below for the stubs that call exception.
-TEXT ·exception(SB),NOSPLIT,$0
- // Determine whether the exception occurred in kernel mode or user
- // mode, based on the flags. We expect the following stack:
- //
- // SS (sp+48)
- // SP (sp+40)
- // FLAGS (sp+32)
- // CS (sp+24)
- // IP (sp+16)
- // ERROR_CODE (sp+8)
- // VECTOR (sp+0)
- //
- TESTL $_RFLAGS_IOPL0, 32(SP)
- JZ kernel
-
-user:
- SWAP_GS()
- ADDQ $-8, SP // Adjust for flags.
- MOVQ $_KERNEL_FLAGS, 0(SP); BYTE $0x9d; // Reset flags (POPFQ).
- PUSHQ AX // Save user AX on stack.
- MOVQ ENTRY_KERNEL_CR3(GS), AX // Get kernel cr3 on AX.
- WRITE_CR3() // Switch to kernel cr3.
-
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ CPU_REGISTERS+PTRACE_RAX(AX), AX // Get user regs.
- REGISTERS_SAVE(AX, 0) // Save all except IP, FLAGS, SP, AX.
- POPQ BX // Restore original AX.
- MOVQ BX, PTRACE_RAX(AX) // Save it.
- MOVQ BX, PTRACE_ORIGRAX(AX)
- MOVQ 16(SP), BX; MOVQ BX, PTRACE_RIP(AX)
- MOVQ 24(SP), CX; MOVQ CX, PTRACE_CS(AX)
- MOVQ 32(SP), DX; MOVQ DX, PTRACE_FLAGS(AX)
- MOVQ 40(SP), DI; MOVQ DI, PTRACE_RSP(AX)
- MOVQ 48(SP), SI; MOVQ SI, PTRACE_SS(AX)
-
- // Copy out and return.
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- MOVQ 0(SP), BX // Load vector.
- MOVQ 8(SP), CX // Load error code.
- MOVQ CPU_REGISTERS+PTRACE_RSP(AX), SP // Original stack (kernel version).
- MOVQ CPU_REGISTERS+PTRACE_RBP(AX), BP // Original base pointer.
- MOVQ CX, CPU_ERROR_CODE(AX) // Set error code.
- MOVQ $1, CPU_ERROR_TYPE(AX) // Set error type to user.
- MOVQ BX, 32(SP) // Output vector.
- RET
-
-kernel:
- // As per above, we can save directly.
- PUSHQ AX
- MOVQ ENTRY_CPU_SELF(GS), AX // Load vCPU.
- REGISTERS_SAVE(AX, CPU_REGISTERS)
- POPQ BX
- MOVQ BX, CPU_REGISTERS+PTRACE_RAX(AX)
- MOVQ BX, CPU_REGISTERS+PTRACE_ORIGRAX(AX)
- MOVQ 16(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RIP(AX)
- MOVQ 32(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_FLAGS(AX)
- MOVQ 40(SP), BX; MOVQ BX, CPU_REGISTERS+PTRACE_RSP(AX)
-
- // Set the error code and adjust the stack.
- MOVQ 8(SP), BX // Load the error code.
- MOVQ BX, CPU_ERROR_CODE(AX) // Copy out to the CPU.
- MOVQ $0, CPU_ERROR_TYPE(AX) // Set error type to kernel.
- MOVQ 0(SP), BX // BX contains the vector.
-
- // Call the exception trampoline.
- LOAD_KERNEL_STACK(GS)
- PUSHQ BX // Second argument (vector).
- PUSHQ AX // First argument (vCPU).
- CALL ·kernelException(SB) // Call the trampoline.
- POPQ BX // Pop vector.
- POPQ AX // Pop vCPU.
- JMP ·resume(SB)
-
-#define EXCEPTION_WITH_ERROR(value, symbol) \
-TEXT symbol,NOSPLIT,$0; \
- PUSHQ $value; \
- JMP ·exception(SB);
-
-#define EXCEPTION_WITHOUT_ERROR(value, symbol) \
-TEXT symbol,NOSPLIT,$0; \
- PUSHQ $0x0; \
- PUSHQ $value; \
- JMP ·exception(SB);
-
-EXCEPTION_WITHOUT_ERROR(DivideByZero, ·divideByZero(SB))
-EXCEPTION_WITHOUT_ERROR(Debug, ·debug(SB))
-EXCEPTION_WITHOUT_ERROR(NMI, ·nmi(SB))
-EXCEPTION_WITHOUT_ERROR(Breakpoint, ·breakpoint(SB))
-EXCEPTION_WITHOUT_ERROR(Overflow, ·overflow(SB))
-EXCEPTION_WITHOUT_ERROR(BoundRangeExceeded, ·boundRangeExceeded(SB))
-EXCEPTION_WITHOUT_ERROR(InvalidOpcode, ·invalidOpcode(SB))
-EXCEPTION_WITHOUT_ERROR(DeviceNotAvailable, ·deviceNotAvailable(SB))
-EXCEPTION_WITH_ERROR(DoubleFault, ·doubleFault(SB))
-EXCEPTION_WITHOUT_ERROR(CoprocessorSegmentOverrun, ·coprocessorSegmentOverrun(SB))
-EXCEPTION_WITH_ERROR(InvalidTSS, ·invalidTSS(SB))
-EXCEPTION_WITH_ERROR(SegmentNotPresent, ·segmentNotPresent(SB))
-EXCEPTION_WITH_ERROR(StackSegmentFault, ·stackSegmentFault(SB))
-EXCEPTION_WITH_ERROR(GeneralProtectionFault, ·generalProtectionFault(SB))
-EXCEPTION_WITH_ERROR(PageFault, ·pageFault(SB))
-EXCEPTION_WITHOUT_ERROR(X87FloatingPointException, ·x87FloatingPointException(SB))
-EXCEPTION_WITH_ERROR(AlignmentCheck, ·alignmentCheck(SB))
-EXCEPTION_WITHOUT_ERROR(MachineCheck, ·machineCheck(SB))
-EXCEPTION_WITHOUT_ERROR(SIMDFloatingPointException, ·simdFloatingPointException(SB))
-EXCEPTION_WITHOUT_ERROR(VirtualizationException, ·virtualizationException(SB))
-EXCEPTION_WITH_ERROR(SecurityException, ·securityException(SB))
-EXCEPTION_WITHOUT_ERROR(SyscallInt80, ·syscallInt80(SB))
diff --git a/pkg/sentry/platform/ring0/entry_arm64.go b/pkg/sentry/platform/ring0/entry_arm64.go
deleted file mode 100644
index 62a93f3d6..000000000
--- a/pkg/sentry/platform/ring0/entry_arm64.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// This is an assembly function.
-//
-// The sysenter function is invoked in two situations:
-//
-// (1) The guest kernel has executed a system call.
-// (2) The guest application has executed a system call.
-//
-// The interrupt flag is examined to determine whether the system call was
-// executed from kernel mode or not and the appropriate stub is called.
-
-func El1_sync_invalid()
-func El1_irq_invalid()
-func El1_fiq_invalid()
-func El1_error_invalid()
-
-func El1_sync()
-func El1_irq()
-func El1_fiq()
-func El1_error()
-
-func El0_sync()
-func El0_irq()
-func El0_fiq()
-func El0_error()
-
-func El0_sync_invalid()
-func El0_irq_invalid()
-func El0_fiq_invalid()
-func El0_error_invalid()
-
-func Vectors()
-
-// Start is the CPU entrypoint.
-//
-// The CPU state will be set to c.Registers().
-func Start()
-func kernelExitToEl1()
-
-func kernelExitToEl0()
-
-// Shutdown execution
-func Shutdown()
diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s
deleted file mode 100644
index b2bb18257..000000000
--- a/pkg/sentry/platform/ring0/entry_arm64.s
+++ /dev/null
@@ -1,769 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-// NB: Offsets are programatically generated (see BUILD).
-//
-// This file is concatenated with the definitions.
-
-// Saves a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-
-// ERET returns using the ELR and SPSR for the current exception level.
-#define ERET() \
- WORD $0xd69f03e0; \
- DSB $7; \
- ISB $15;
-
-// RSV_REG is a register that holds el1 information temporarily.
-#define RSV_REG R18_PLATFORM
-
-// RSV_REG_APP is a register that holds el0 information temporarily.
-#define RSV_REG_APP R9
-
-#define FPEN_NOTRAP 0x3
-#define FPEN_SHIFT 20
-
-#define FPEN_ENABLE (FPEN_NOTRAP << FPEN_SHIFT)
-
-// sctlr_el1: system control register el1.
-#define SCTLR_M 1 << 0
-#define SCTLR_C 1 << 2
-#define SCTLR_I 1 << 12
-#define SCTLR_DZE 1 << 14
-#define SCTLR_UCT 1 << 15
-#define SCTLR_UCI 1 << 26
-
-#define SCTLR_EL1_DEFAULT (SCTLR_M | SCTLR_C | SCTLR_I | SCTLR_UCT | SCTLR_UCI | SCTLR_DZE)
-
-// cntkctl_el1: counter-timer kernel control register el1.
-#define CNTKCTL_EL0PCTEN 1 << 0
-#define CNTKCTL_EL0VCTEN 1 << 1
-
-#define CNTKCTL_EL1_DEFAULT (CNTKCTL_EL0PCTEN | CNTKCTL_EL0VCTEN)
-
-// Saves a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not saved: R9, R18.
-#define REGISTERS_SAVE(reg, offset) \
- MOVD R0, offset+PTRACE_R0(reg); \
- MOVD R1, offset+PTRACE_R1(reg); \
- MOVD R2, offset+PTRACE_R2(reg); \
- MOVD R3, offset+PTRACE_R3(reg); \
- MOVD R4, offset+PTRACE_R4(reg); \
- MOVD R5, offset+PTRACE_R5(reg); \
- MOVD R6, offset+PTRACE_R6(reg); \
- MOVD R7, offset+PTRACE_R7(reg); \
- MOVD R8, offset+PTRACE_R8(reg); \
- MOVD R10, offset+PTRACE_R10(reg); \
- MOVD R11, offset+PTRACE_R11(reg); \
- MOVD R12, offset+PTRACE_R12(reg); \
- MOVD R13, offset+PTRACE_R13(reg); \
- MOVD R14, offset+PTRACE_R14(reg); \
- MOVD R15, offset+PTRACE_R15(reg); \
- MOVD R16, offset+PTRACE_R16(reg); \
- MOVD R17, offset+PTRACE_R17(reg); \
- MOVD R19, offset+PTRACE_R19(reg); \
- MOVD R20, offset+PTRACE_R20(reg); \
- MOVD R21, offset+PTRACE_R21(reg); \
- MOVD R22, offset+PTRACE_R22(reg); \
- MOVD R23, offset+PTRACE_R23(reg); \
- MOVD R24, offset+PTRACE_R24(reg); \
- MOVD R25, offset+PTRACE_R25(reg); \
- MOVD R26, offset+PTRACE_R26(reg); \
- MOVD R27, offset+PTRACE_R27(reg); \
- MOVD g, offset+PTRACE_R28(reg); \
- MOVD R29, offset+PTRACE_R29(reg); \
- MOVD R30, offset+PTRACE_R30(reg);
-
-// Loads a register set.
-//
-// This is a macro because it may need to executed in contents where a stack is
-// not available for calls.
-//
-// The following registers are not loaded: R9, R18.
-#define REGISTERS_LOAD(reg, offset) \
- MOVD offset+PTRACE_R0(reg), R0; \
- MOVD offset+PTRACE_R1(reg), R1; \
- MOVD offset+PTRACE_R2(reg), R2; \
- MOVD offset+PTRACE_R3(reg), R3; \
- MOVD offset+PTRACE_R4(reg), R4; \
- MOVD offset+PTRACE_R5(reg), R5; \
- MOVD offset+PTRACE_R6(reg), R6; \
- MOVD offset+PTRACE_R7(reg), R7; \
- MOVD offset+PTRACE_R8(reg), R8; \
- MOVD offset+PTRACE_R10(reg), R10; \
- MOVD offset+PTRACE_R11(reg), R11; \
- MOVD offset+PTRACE_R12(reg), R12; \
- MOVD offset+PTRACE_R13(reg), R13; \
- MOVD offset+PTRACE_R14(reg), R14; \
- MOVD offset+PTRACE_R15(reg), R15; \
- MOVD offset+PTRACE_R16(reg), R16; \
- MOVD offset+PTRACE_R17(reg), R17; \
- MOVD offset+PTRACE_R19(reg), R19; \
- MOVD offset+PTRACE_R20(reg), R20; \
- MOVD offset+PTRACE_R21(reg), R21; \
- MOVD offset+PTRACE_R22(reg), R22; \
- MOVD offset+PTRACE_R23(reg), R23; \
- MOVD offset+PTRACE_R24(reg), R24; \
- MOVD offset+PTRACE_R25(reg), R25; \
- MOVD offset+PTRACE_R26(reg), R26; \
- MOVD offset+PTRACE_R27(reg), R27; \
- MOVD offset+PTRACE_R28(reg), g; \
- MOVD offset+PTRACE_R29(reg), R29; \
- MOVD offset+PTRACE_R30(reg), R30;
-
-#define ESR_ELx_EC_UNKNOWN (0x00)
-#define ESR_ELx_EC_WFx (0x01)
-/* Unallocated EC: 0x02 */
-#define ESR_ELx_EC_CP15_32 (0x03)
-#define ESR_ELx_EC_CP15_64 (0x04)
-#define ESR_ELx_EC_CP14_MR (0x05)
-#define ESR_ELx_EC_CP14_LS (0x06)
-#define ESR_ELx_EC_FP_ASIMD (0x07)
-#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
-#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
-/* Unallocated EC: 0x0A - 0x0B */
-#define ESR_ELx_EC_CP14_64 (0x0C)
-/* Unallocated EC: 0x0d */
-#define ESR_ELx_EC_ILL (0x0E)
-/* Unallocated EC: 0x0F - 0x10 */
-#define ESR_ELx_EC_SVC32 (0x11)
-#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
-#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
-/* Unallocated EC: 0x14 */
-#define ESR_ELx_EC_SVC64 (0x15)
-#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
-#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
-#define ESR_ELx_EC_SYS64 (0x18)
-#define ESR_ELx_EC_SVE (0x19)
-/* Unallocated EC: 0x1A - 0x1E */
-#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
-#define ESR_ELx_EC_IABT_LOW (0x20)
-#define ESR_ELx_EC_IABT_CUR (0x21)
-#define ESR_ELx_EC_PC_ALIGN (0x22)
-/* Unallocated EC: 0x23 */
-#define ESR_ELx_EC_DABT_LOW (0x24)
-#define ESR_ELx_EC_DABT_CUR (0x25)
-#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
-#define ESR_ELx_EC_FP_EXC32 (0x28)
-/* Unallocated EC: 0x29 - 0x2B */
-#define ESR_ELx_EC_FP_EXC64 (0x2C)
-/* Unallocated EC: 0x2D - 0x2E */
-#define ESR_ELx_EC_SERROR (0x2F)
-#define ESR_ELx_EC_BREAKPT_LOW (0x30)
-#define ESR_ELx_EC_BREAKPT_CUR (0x31)
-#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
-#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
-#define ESR_ELx_EC_WATCHPT_LOW (0x34)
-#define ESR_ELx_EC_WATCHPT_CUR (0x35)
-/* Unallocated EC: 0x36 - 0x37 */
-#define ESR_ELx_EC_BKPT32 (0x38)
-/* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
-/* Unallocted EC: 0x3B */
-#define ESR_ELx_EC_BRK64 (0x3C)
-/* Unallocated EC: 0x3D - 0x3F */
-#define ESR_ELx_EC_MAX (0x3F)
-
-#define ESR_ELx_EC_SHIFT (26)
-#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
-#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
-
-#define ESR_ELx_IL_SHIFT (25)
-#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
-#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
-
-/* ISS field definitions shared by different classes */
-#define ESR_ELx_WNR_SHIFT (6)
-#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
-
-/* Asynchronous Error Type */
-#define ESR_ELx_IDS_SHIFT (24)
-#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT)
-#define ESR_ELx_AET_SHIFT (10)
-#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT)
-
-#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT)
-#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
-
-/* Shared ISS field definitions for Data/Instruction aborts */
-#define ESR_ELx_SET_SHIFT (11)
-#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
-#define ESR_ELx_FnV_SHIFT (10)
-#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT)
-#define ESR_ELx_EA_SHIFT (9)
-#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT)
-#define ESR_ELx_S1PTW_SHIFT (7)
-#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT)
-
-/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
-#define ESR_ELx_FSC (0x3F)
-#define ESR_ELx_FSC_TYPE (0x3C)
-#define ESR_ELx_FSC_EXTABT (0x10)
-#define ESR_ELx_FSC_SERROR (0x11)
-#define ESR_ELx_FSC_ACCESS (0x08)
-#define ESR_ELx_FSC_FAULT (0x04)
-#define ESR_ELx_FSC_PERM (0x0C)
-
-/* ISS field definitions for Data Aborts */
-#define ESR_ELx_ISV_SHIFT (24)
-#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
-#define ESR_ELx_SAS_SHIFT (22)
-#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
-#define ESR_ELx_SSE_SHIFT (21)
-#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT)
-#define ESR_ELx_SRT_SHIFT (16)
-#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
-#define ESR_ELx_SF_SHIFT (15)
-#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
-#define ESR_ELx_AR_SHIFT (14)
-#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
-#define ESR_ELx_CM_SHIFT (8)
-#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
-
-/* ISS field definitions for exceptions taken in to Hyp */
-#define ESR_ELx_CV (UL(1) << 24)
-#define ESR_ELx_COND_SHIFT (20)
-#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
-#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
-#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
-#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
-#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
-
-/* ISS field definitions for system error */
-#define ESR_ELx_SERR_MASK (0x1)
-#define ESR_ELx_SERR_NMI (0x1)
-
-// LOAD_KERNEL_ADDRESS loads a kernel address.
-#define LOAD_KERNEL_ADDRESS(from, to) \
- MOVD from, to; \
- ORR $0xffff000000000000, to, to;
-
-// LOAD_KERNEL_STACK loads the kernel temporary stack.
-#define LOAD_KERNEL_STACK(from) \
- LOAD_KERNEL_ADDRESS(CPU_SELF(from), RSV_REG); \
- MOVD $CPU_STACK_TOP(RSV_REG), RSV_REG; \
- MOVD RSV_REG, RSP; \
- WORD $0xd538d092; //MRS TPIDR_EL1, R18
-
-// SWITCH_TO_APP_PAGETABLE sets a new pagetable for a container application.
-#define SWITCH_TO_APP_PAGETABLE() \
- MOVD CPU_APP_ASID(RSV_REG), RSV_REG_APP; \
- MOVD CPU_TTBR0_APP(RSV_REG), RSV_REG; \
- BFI $48, RSV_REG_APP, $16, RSV_REG; \
- MSR RSV_REG, TTBR0_EL1; \
- ISB $15;
-
-// SWITCH_TO_KVM_PAGETABLE sets the kvm pagetable.
-#define SWITCH_TO_KVM_PAGETABLE() \
- MOVD CPU_TTBR0_KVM(RSV_REG), RSV_REG; \
- MOVD $1, RSV_REG_APP; \
- BFI $48, RSV_REG_APP, $16, RSV_REG; \
- MSR RSV_REG, TTBR0_EL1; \
- ISB $15;
-
-TEXT ·EnableVFP(SB),NOSPLIT,$0
- MOVD $FPEN_ENABLE, R0
- WORD $0xd5181040 //MSR R0, CPACR_EL1
- ISB $15
- RET
-
-TEXT ·DisableVFP(SB),NOSPLIT,$0
- MOVD $0, R0
- WORD $0xd5181040 //MSR R0, CPACR_EL1
- ISB $15
- RET
-
-#define VFP_ENABLE \
- MOVD $FPEN_ENABLE, R0; \
- WORD $0xd5181040; \ //MSR R0, CPACR_EL1
- ISB $15;
-
-#define VFP_DISABLE \
- MOVD $0x0, R0; \
- WORD $0xd5181040; \ //MSR R0, CPACR_EL1
- ISB $15;
-
-// KERNEL_ENTRY_FROM_EL0 is the entry code of the vcpu from el0 to el1.
-#define KERNEL_ENTRY_FROM_EL0 \
- SUB $16, RSP, RSP; \ // step1, save r18, r9 into kernel temporary stack.
- STP (RSV_REG, RSV_REG_APP), 16*0(RSP); \
- WORD $0xd538d092; \ // MRS TPIDR_EL1, R18
- MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP; \ // step2, load app context pointer.
- REGISTERS_SAVE(RSV_REG_APP, 0); \ // step3, save app context.
- MOVD RSV_REG_APP, R20; \
- LDP 16*0(RSP), (RSV_REG, RSV_REG_APP); \
- ADD $16, RSP, RSP; \
- MOVD RSV_REG, PTRACE_R18(R20); \
- MOVD RSV_REG_APP, PTRACE_R9(R20); \
- MRS TPIDR_EL0, R3; \
- MOVD R3, PTRACE_TLS(R20); \
- WORD $0xd5384003; \ // MRS SPSR_EL1, R3
- MOVD R3, PTRACE_PSTATE(R20); \
- MRS ELR_EL1, R3; \
- MOVD R3, PTRACE_PC(R20); \
- WORD $0xd5384103; \ // MRS SP_EL0, R3
- MOVD R3, PTRACE_SP(R20);
-
-// KERNEL_ENTRY_FROM_EL1 is the entry code of the vcpu from el1 to el1.
-#define KERNEL_ENTRY_FROM_EL1 \
- WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
- REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // Save sentry context.
- MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG); \
- MRS TPIDR_EL0, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_TLS(RSV_REG); \
- WORD $0xd5384004; \ // MRS SPSR_EL1, R4
- MOVD R4, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG); \
- MRS ELR_EL1, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_PC(RSV_REG); \
- MOVD RSP, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); \
- LOAD_KERNEL_STACK(RSV_REG); // Load the temporary stack.
-
-// EXCEPTION_EL0 is a common el0 exception handler function.
-#define EXCEPTION_EL0(vector) \
- WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
- WORD $0xd538601a; \ //MRS FAR_EL1, R26
- MOVD R26, CPU_FAULT_ADDR(RSV_REG); \
- MOVD $1, R3; \
- MOVD R3, CPU_ERROR_TYPE(RSV_REG); \ // Set error type to user.
- MOVD $vector, R3; \
- MOVD R3, CPU_VECTOR_CODE(RSV_REG); \
- MRS ESR_EL1, R3; \
- MOVD R3, CPU_ERROR_CODE(RSV_REG); \
- B ·kernelExitToEl1(SB);
-
-// EXCEPTION_EL1 is a common el1 exception handler function.
-#define EXCEPTION_EL1(vector) \
- MOVD $vector, R3; \
- MOVD R3, 8(RSP); \
- B ·HaltEl1ExceptionAndResume(SB);
-
-// storeAppASID writes the application's asid value.
-TEXT ·storeAppASID(SB),NOSPLIT,$0-8
- MOVD asid+0(FP), R1
- MRS TPIDR_EL1, RSV_REG
- MOVD R1, CPU_APP_ASID(RSV_REG)
- RET
-
-// Halt halts execution.
-TEXT ·Halt(SB),NOSPLIT,$0
- // Clear bluepill.
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
- CMP RSV_REG, R9
- BNE mmio_exit
- MOVD $0, CPU_REGISTERS+PTRACE_R9(RSV_REG)
-
-mmio_exit:
- // Disable fpsimd.
- WORD $0xd5381041 // MRS CPACR_EL1, R1
- MOVD R1, CPU_LAZY_VFP(RSV_REG)
- VFP_DISABLE
-
- // Trigger MMIO_EXIT/_KVM_HYPERCALL_VMEXIT.
- //
- // To keep it simple, I used the address of exception table as the
- // MMIO base address, so that I can trigger a MMIO-EXIT by forcibly writing
- // a read-only space.
- // Also, the length is engough to match a sufficient number of hypercall ID.
- // Then, in host user space, I can calculate this address to find out
- // which hypercall.
- MRS VBAR_EL1, R9
- MOVD R0, 0x0(R9)
-
- RET
-
-// HaltAndResume halts execution and point the pointer to the resume function.
-TEXT ·HaltAndResume(SB),NOSPLIT,$0
- BL ·Halt(SB)
- B ·kernelExitToEl1(SB) // Resume.
-
-// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
-TEXT ·HaltEl1SvcAndResume(SB),NOSPLIT,$0
- WORD $0xd538d092 // MRS TPIDR_EL1, R18
- MOVD CPU_SELF(RSV_REG), R3 // Load vCPU.
- MOVD R3, 8(RSP) // First argument (vCPU).
- CALL ·kernelSyscall(SB) // Call the trampoline.
- B ·kernelExitToEl1(SB) // Resume.
-
-// HaltEl1ExceptionAndResume calls Hooks.KernelException and resume.
-TEXT ·HaltEl1ExceptionAndResume(SB),NOSPLIT,$0-8
- WORD $0xd538d092 // MRS TPIDR_EL1, R18
- MOVD CPU_SELF(RSV_REG), R3 // Load vCPU.
- MOVD R3, 8(RSP) // First argument (vCPU).
- MOVD vector+0(FP), R3
- MOVD R3, 16(RSP) // Second argument (vector).
- CALL ·kernelException(SB) // Call the trampoline.
- B ·kernelExitToEl1(SB) // Resume.
-
-// Shutdown stops the guest.
-TEXT ·Shutdown(SB),NOSPLIT,$0
- // PSCI EVENT.
- MOVD $0x84000009, R0
- HVC $0
-
-// See kernel.go.
-TEXT ·Current(SB),NOSPLIT,$0-8
- MOVD CPU_SELF(RSV_REG), R8
- MOVD R8, ret+0(FP)
- RET
-
-#define STACK_FRAME_SIZE 32
-
-// kernelExitToEl0 is the entrypoint for application in guest_el0.
-// Prepare the vcpu environment for container application.
-TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
- // Step1, save sentry context into memory.
- MRS TPIDR_EL1, RSV_REG
- REGISTERS_SAVE(RSV_REG, CPU_REGISTERS)
- MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG)
- MRS TPIDR_EL0, R3
- MOVD R3, CPU_REGISTERS+PTRACE_TLS(RSV_REG)
-
- WORD $0xd5384003 // MRS SPSR_EL1, R3
- MOVD R3, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG)
- MOVD R30, CPU_REGISTERS+PTRACE_PC(RSV_REG)
- MOVD RSP, R3
- MOVD R3, CPU_REGISTERS+PTRACE_SP(RSV_REG)
-
- MOVD CPU_REGISTERS+PTRACE_R3(RSV_REG), R3
-
- // Step2, switch to temporary stack.
- LOAD_KERNEL_STACK(RSV_REG)
-
- // Step3, load app context pointer.
- MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP
-
- // Step4, prepare the environment for container application.
- // set sp_el0.
- MOVD PTRACE_SP(RSV_REG_APP), R1
- WORD $0xd5184101 //MSR R1, SP_EL0
- // set pc.
- MOVD PTRACE_PC(RSV_REG_APP), R1
- MSR R1, ELR_EL1
- // set pstate.
- MOVD PTRACE_PSTATE(RSV_REG_APP), R1
- WORD $0xd5184001 //MSR R1, SPSR_EL1
-
- // need use kernel space address to excute below code, since
- // after SWITCH_TO_APP_PAGETABLE the ASID is changed to app's
- // ASID.
- WORD $0x10000061 // ADR R1, do_exit_to_el0
- ORR $0xffff000000000000, R1, R1
- JMP (R1)
-
-do_exit_to_el0:
- // RSV_REG & RSV_REG_APP will be loaded at the end.
- REGISTERS_LOAD(RSV_REG_APP, 0)
- MOVD PTRACE_TLS(RSV_REG_APP), RSV_REG
- MSR RSV_REG, TPIDR_EL0
-
- // switch to user pagetable.
- MOVD PTRACE_R18(RSV_REG_APP), RSV_REG
- MOVD PTRACE_R9(RSV_REG_APP), RSV_REG_APP
-
- SUB $STACK_FRAME_SIZE, RSP, RSP
- STP (RSV_REG, RSV_REG_APP), 16*0(RSP)
- STP (R0, R1), 16*1(RSP)
-
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
- SWITCH_TO_APP_PAGETABLE()
-
- LDP 16*1(RSP), (R0, R1)
- LDP 16*0(RSP), (RSV_REG, RSV_REG_APP)
- ADD $STACK_FRAME_SIZE, RSP, RSP
-
- ERET()
-
-// kernelExitToEl1 is the entrypoint for sentry in guest_el1.
-// Prepare the vcpu environment for sentry.
-TEXT ·kernelExitToEl1(SB),NOSPLIT,$0
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
- MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R1
- WORD $0xd5184001 //MSR R1, SPSR_EL1
-
- MOVD CPU_REGISTERS+PTRACE_PC(RSV_REG), R1
- MSR R1, ELR_EL1
-
- // restore sentry's tls.
- MOVD CPU_REGISTERS+PTRACE_TLS(RSV_REG), R1
- MSR R1, TPIDR_EL0
-
- MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R1
- MOVD R1, RSP
-
- REGISTERS_LOAD(RSV_REG, CPU_REGISTERS)
- SWITCH_TO_KVM_PAGETABLE()
- MRS TPIDR_EL1, RSV_REG
-
- MOVD CPU_REGISTERS+PTRACE_R9(RSV_REG), RSV_REG_APP
-
- ERET()
-
-// Start is the CPU entrypoint.
-TEXT ·Start(SB),NOSPLIT,$0
- // Init.
- WORD $0xd508871f // __tlbi(vmalle1)
- DSB $7 // dsb(nsh)
-
- MOVD $1<<12, R1 // Reset mdscr_el1 and disable
- MSR R1, MDSCR_EL1 // access to the DCC from EL0
- ISB $15
-
- MRS TTBR1_EL1, R1
- MSR R1, TTBR0_EL1
- ISB $15
-
- MOVD $CNTKCTL_EL1_DEFAULT, R1
- MSR R1, CNTKCTL_EL1
-
- MOVD R8, RSV_REG
- ORR $0xffff000000000000, RSV_REG, RSV_REG
- WORD $0xd518d092 //MSR R18, TPIDR_EL1
-
- // Init.
- MOVD $SCTLR_EL1_DEFAULT, R1 // re-enable the mmu.
- MSR R1, SCTLR_EL1
- ISB $15
- WORD $0xd508751f // ic iallu
-
- DSB $7 // dsb(nsh)
- ISB $15
-
- B ·kernelExitToEl1(SB)
-
-// El1_sync_invalid is the handler for an invalid EL1_sync.
-TEXT ·El1_sync_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_irq_invalid is the handler for an invalid El1_irq.
-TEXT ·El1_irq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_fiq_invalid is the handler for an invalid El1_fiq.
-TEXT ·El1_fiq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_error_invalid is the handler for an invalid El1_error.
-TEXT ·El1_error_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_sync is the handler for El1_sync.
-TEXT ·El1_sync(SB),NOSPLIT,$0
- KERNEL_ENTRY_FROM_EL1
- MRS ESR_EL1, R25 // read the syndrome register
- LSR $ESR_ELx_EC_SHIFT, R25, R24 // exception class
- CMP $ESR_ELx_EC_DABT_CUR, R24
- BEQ el1_da // data abort in EL1
- CMP $ESR_ELx_EC_IABT_CUR, R24
- BEQ el1_ia // instruction abort in EL1
- CMP $ESR_ELx_EC_SP_ALIGN, R24
- BEQ el1_sp_pc // stack alignment exception
- CMP $ESR_ELx_EC_PC_ALIGN, R24
- BEQ el1_sp_pc // pc alignment exception
- CMP $ESR_ELx_EC_UNKNOWN, R24
- BEQ el1_undef // unknown exception in EL1
- CMP $ESR_ELx_EC_SVC64, R24
- BEQ el1_svc // SVC in 64-bit state
- CMP $ESR_ELx_EC_BREAKPT_CUR, R24
- BEQ el1_dbg // debug exception in EL1
- CMP $ESR_ELx_EC_FP_ASIMD, R24
- BEQ el1_fpsimd_acc // FP/ASIMD access
- CMP $ESR_ELx_EC_SVE, R24
- BEQ el1_sve_acc // SVE access
- B el1_invalid
-
-el1_da:
- EXCEPTION_EL1(El1SyncDa)
-el1_ia:
- EXCEPTION_EL1(El1SyncIa)
-el1_sp_pc:
- EXCEPTION_EL1(El1SyncSpPc)
-el1_undef:
- EXCEPTION_EL1(El1SyncUndef)
-el1_svc:
- B ·HaltEl1SvcAndResume(SB)
-el1_dbg:
- EXCEPTION_EL1(El1SyncDbg)
-el1_fpsimd_acc:
-el1_sve_acc:
- VFP_ENABLE
- B ·kernelExitToEl1(SB) // Resume.
-el1_invalid:
- EXCEPTION_EL1(El1SyncInv)
-
-// El1_irq is the handler for El1_irq.
-TEXT ·El1_irq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_fiq is the handler for El1_fiq.
-TEXT ·El1_fiq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El1_error is the handler for El1_error.
-TEXT ·El1_error(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// El0_sync is the handler for El0_sync.
-TEXT ·El0_sync(SB),NOSPLIT,$0
- KERNEL_ENTRY_FROM_EL0
- MRS ESR_EL1, R25 // read the syndrome register
- LSR $ESR_ELx_EC_SHIFT, R25, R24 // exception class
- CMP $ESR_ELx_EC_SVC64, R24
- BEQ el0_svc // SVC in 64-bit state
- CMP $ESR_ELx_EC_DABT_LOW, R24
- BEQ el0_da // data abort in EL0
- CMP $ESR_ELx_EC_IABT_LOW, R24
- BEQ el0_ia // instruction abort in EL0
- CMP $ESR_ELx_EC_FP_ASIMD, R24
- BEQ el0_fpsimd_acc // FP/ASIMD access
- CMP $ESR_ELx_EC_SVE, R24
- BEQ el0_sve_acc // SVE access
- CMP $ESR_ELx_EC_FP_EXC64, R24
- BEQ el0_fpsimd_exc // FP/ASIMD exception
- CMP $ESR_ELx_EC_SP_ALIGN, R24
- BEQ el0_sp_pc // stack alignment exception
- CMP $ESR_ELx_EC_PC_ALIGN, R24
- BEQ el0_sp_pc // pc alignment exception
- CMP $ESR_ELx_EC_UNKNOWN, R24
- BEQ el0_undef // unknown exception in EL0
- CMP $ESR_ELx_EC_BREAKPT_LOW, R24
- BEQ el0_dbg // debug exception in EL0
- CMP $ESR_ELx_EC_SYS64, R24
- BEQ el0_sys // configurable trap
- CMP $ESR_ELx_EC_WFx, R24
- BEQ el0_wfx // WFX trap
- B el0_invalid
-
-el0_svc:
- WORD $0xd538d092 //MRS TPIDR_EL1, R18
-
- MOVD $0, CPU_ERROR_CODE(RSV_REG) // Clear error code.
-
- MOVD $1, R3
- MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user.
-
- MOVD $Syscall, R3
- MOVD R3, CPU_VECTOR_CODE(RSV_REG)
-
- B ·kernelExitToEl1(SB)
-
-el0_da:
-el0_ia:
- EXCEPTION_EL0(PageFault)
-el0_fpsimd_acc:
- EXCEPTION_EL0(El0SyncFpsimdAcc)
-el0_sve_acc:
- EXCEPTION_EL0(El0SyncSveAcc)
-el0_fpsimd_exc:
- EXCEPTION_EL0(El0SyncFpsimdExc)
-el0_sp_pc:
- EXCEPTION_EL0(El0SyncSpPc)
-el0_undef:
- EXCEPTION_EL0(El0SyncUndef)
-el0_dbg:
- EXCEPTION_EL0(El0SyncDbg)
-el0_sys:
- EXCEPTION_EL0(El0SyncSys)
-el0_wfx:
- EXCEPTION_EL0(El0SyncWfx)
-el0_invalid:
- EXCEPTION_EL0(El0SyncInv)
-
-TEXT ·El0_irq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_fiq(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_error(SB),NOSPLIT,$0
- KERNEL_ENTRY_FROM_EL0
- WORD $0xd5385219 // MRS ESR_EL1, R25
- AND $ESR_ELx_SERR_MASK, R25, R24
- CMP $ESR_ELx_SERR_NMI, R24
- BEQ el0_nmi
- B el0_bounce
-
-el0_nmi:
- EXCEPTION_EL0(El0ErrNMI)
-el0_bounce:
- EXCEPTION_EL0(VirtualizationException)
-
-TEXT ·El0_sync_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_irq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_fiq_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-TEXT ·El0_error_invalid(SB),NOSPLIT,$0
- B ·Shutdown(SB)
-
-// Vectors implements exception vector table.
-// The start address of exception vector table should be 11-bits aligned.
-// For detail, please refer to arm developer document:
-// https://developer.arm.com/documentation/100933/0100/AArch64-exception-vector-table
-// Also can refer to the code in linux kernel: arch/arm64/kernel/entry.S
-TEXT ·Vectors(SB),NOSPLIT,$0
- PCALIGN $2048
- B ·El1_sync_invalid(SB)
- PCALIGN $128
- B ·El1_irq_invalid(SB)
- PCALIGN $128
- B ·El1_fiq_invalid(SB)
- PCALIGN $128
- B ·El1_error_invalid(SB)
-
- PCALIGN $128
- B ·El1_sync(SB)
- PCALIGN $128
- B ·El1_irq(SB)
- PCALIGN $128
- B ·El1_fiq(SB)
- PCALIGN $128
- B ·El1_error(SB)
-
- PCALIGN $128
- B ·El0_sync(SB)
- PCALIGN $128
- B ·El0_irq(SB)
- PCALIGN $128
- B ·El0_fiq(SB)
- PCALIGN $128
- B ·El0_error(SB)
-
- PCALIGN $128
- B ·El0_sync_invalid(SB)
- PCALIGN $128
- B ·El0_irq_invalid(SB)
- PCALIGN $128
- B ·El0_fiq_invalid(SB)
- PCALIGN $128
- B ·El0_error_invalid(SB)
diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD
deleted file mode 100644
index a9703baf6..000000000
--- a/pkg/sentry/platform/ring0/gen_offsets/BUILD
+++ /dev/null
@@ -1,40 +0,0 @@
-load("//tools:defs.bzl", "go_binary")
-load("//tools/go_generics:defs.bzl", "go_template_instance")
-
-package(licenses = ["notice"])
-
-go_template_instance(
- name = "defs_impl_arm64",
- out = "defs_impl_arm64.go",
- package = "main",
- template = "//pkg/sentry/platform/ring0:defs_arm64",
-)
-
-go_template_instance(
- name = "defs_impl_amd64",
- out = "defs_impl_amd64.go",
- package = "main",
- template = "//pkg/sentry/platform/ring0:defs_amd64",
-)
-
-go_binary(
- name = "gen_offsets",
- srcs = [
- "defs_impl_amd64.go",
- "defs_impl_arm64.go",
- "main.go",
- ],
- # Use the libc malloc to avoid any extra dependencies. This is required to
- # pass the sentry deps test.
- system_malloc = True,
- visibility = [
- "//pkg/sentry/platform/kvm:__pkg__",
- "//pkg/sentry/platform/ring0:__pkg__",
- ],
- deps = [
- "//pkg/cpuid",
- "//pkg/sentry/arch",
- "//pkg/sentry/platform/ring0/pagetables",
- "//pkg/usermem",
- ],
-)
diff --git a/pkg/sentry/platform/ring0/gen_offsets/main.go b/pkg/sentry/platform/ring0/gen_offsets/main.go
deleted file mode 100644
index a4927da2f..000000000
--- a/pkg/sentry/platform/ring0/gen_offsets/main.go
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Binary gen_offsets is a helper for generating offset headers.
-package main
-
-import (
- "os"
-)
-
-func main() {
- Emit(os.Stdout)
-}
diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/sentry/platform/ring0/kernel.go
deleted file mode 100644
index 292f9d0cc..000000000
--- a/pkg/sentry/platform/ring0/kernel.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ring0
-
-// Init initializes a new kernel.
-//
-//go:nosplit
-func (k *Kernel) Init(maxCPUs int) {
- k.init(maxCPUs)
-}
-
-// Halt halts execution.
-func Halt()
-
-// defaultHooks implements hooks.
-type defaultHooks struct{}
-
-// KernelSyscall implements Hooks.KernelSyscall.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (defaultHooks) KernelSyscall() {
- Halt()
-}
-
-// KernelException implements Hooks.KernelException.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (defaultHooks) KernelException(Vector) {
- Halt()
-}
-
-// kernelSyscall is a trampoline.
-//
-// When in amd64, it is called with %rip on the upper half, so it can
-// NOT access to any global data which is not mapped on upper and must
-// call to function pointers or interfaces to switch to the lower half
-// so that callee can access to global data.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func kernelSyscall(c *CPU) {
- c.hooks.KernelSyscall()
-}
-
-// kernelException is a trampoline.
-//
-// When in amd64, it is called with %rip on the upper half, so it can
-// NOT access to any global data which is not mapped on upper and must
-// call to function pointers or interfaces to switch to the lower half
-// so that callee can access to global data.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func kernelException(c *CPU, vector Vector) {
- c.hooks.KernelException(vector)
-}
-
-// Init initializes a new CPU.
-//
-// Init allows embedding in other objects.
-func (c *CPU) Init(k *Kernel, cpuID int, hooks Hooks) {
- c.self = c // Set self reference.
- c.kernel = k // Set kernel reference.
- c.init(cpuID) // Perform architectural init.
-
- // Require hooks.
- if hooks != nil {
- c.hooks = hooks
- } else {
- c.hooks = defaultHooks{}
- }
-}
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
deleted file mode 100644
index 36a60700e..000000000
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ /dev/null
@@ -1,323 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "encoding/binary"
- "reflect"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// init initializes architecture-specific state.
-func (k *Kernel) init(maxCPUs int) {
- entrySize := reflect.TypeOf(kernelEntry{}).Size()
- var (
- entries []kernelEntry
- padding = 1
- )
- for {
- entries = make([]kernelEntry, maxCPUs+padding-1)
- totalSize := entrySize * uintptr(maxCPUs+padding-1)
- addr := reflect.ValueOf(&entries[0]).Pointer()
- if addr&(usermem.PageSize-1) == 0 && totalSize >= usermem.PageSize {
- // The runtime forces power-of-2 alignment for allocations, and we are therefore
- // safe once the first address is aligned and the chunk is at least a full page.
- break
- }
- padding = padding << 1
- }
- k.cpuEntries = entries
-
- k.globalIDT = &idt64{}
- if reflect.TypeOf(idt64{}).Size() != usermem.PageSize {
- panic("Size of globalIDT should be PageSize")
- }
- if reflect.ValueOf(k.globalIDT).Pointer()&(usermem.PageSize-1) != 0 {
- panic("Allocated globalIDT should be page aligned")
- }
-
- // Setup the IDT, which is uniform.
- for v, handler := range handlers {
- // Allow Breakpoint and Overflow to be called from all
- // privilege levels.
- dpl := 0
- if v == Breakpoint || v == Overflow {
- dpl = 3
- }
- // Note that we set all traps to use the interrupt stack, this
- // is defined below when setting up the TSS.
- k.globalIDT[v].setInterrupt(Kcode, uint64(kernelFunc(handler)), dpl, 1 /* ist */)
- }
-}
-
-// EntryRegions returns the set of kernel entry regions (must be mapped).
-func (k *Kernel) EntryRegions() map[uintptr]uintptr {
- regions := make(map[uintptr]uintptr)
-
- addr := reflect.ValueOf(&k.cpuEntries[0]).Pointer()
- size := reflect.TypeOf(kernelEntry{}).Size() * uintptr(len(k.cpuEntries))
- end, _ := usermem.Addr(addr + size).RoundUp()
- regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end)
-
- addr = reflect.ValueOf(k.globalIDT).Pointer()
- size = reflect.TypeOf(idt64{}).Size()
- end, _ = usermem.Addr(addr + size).RoundUp()
- regions[uintptr(usermem.Addr(addr).RoundDown())] = uintptr(end)
-
- return regions
-}
-
-// init initializes architecture-specific state.
-func (c *CPU) init(cpuID int) {
- c.kernelEntry = &c.kernel.cpuEntries[cpuID]
- c.cpuSelf = c
- // Null segment.
- c.gdt[0].setNull()
-
- // Kernel & user segments.
- c.gdt[segKcode] = KernelCodeSegment
- c.gdt[segKdata] = KernelDataSegment
- c.gdt[segUcode32] = UserCodeSegment32
- c.gdt[segUdata] = UserDataSegment
- c.gdt[segUcode64] = UserCodeSegment64
-
- // The task segment, this spans two entries.
- tssBase, tssLimit, _ := c.TSS()
- c.gdt[segTss].set(
- uint32(tssBase),
- uint32(tssLimit),
- 0, // Privilege level zero.
- SegmentDescriptorPresent|
- SegmentDescriptorAccess|
- SegmentDescriptorWrite|
- SegmentDescriptorExecute)
- c.gdt[segTssHi].setHi(uint32((tssBase) >> 32))
-
- // Set the kernel stack pointer in the TSS (virtual address).
- stackAddr := c.StackTop()
- c.stackTop = stackAddr
- c.tss.rsp0Lo = uint32(stackAddr)
- c.tss.rsp0Hi = uint32(stackAddr >> 32)
- c.tss.ist1Lo = uint32(stackAddr)
- c.tss.ist1Hi = uint32(stackAddr >> 32)
-
- // Set the I/O bitmap base address beyond the last byte in the TSS
- // to block access to the entire I/O address range.
- //
- // From section 18.5.2 "I/O Permission Bit Map" from Intel SDM vol1:
- // I/O addresses not spanned by the map are treated as if they had set
- // bits in the map.
- c.tss.ioPerm = tssLimit + 1
-
- // Permanently set the kernel segments.
- c.registers.Cs = uint64(Kcode)
- c.registers.Ds = uint64(Kdata)
- c.registers.Es = uint64(Kdata)
- c.registers.Ss = uint64(Kdata)
- c.registers.Fs = uint64(Kdata)
- c.registers.Gs = uint64(Kdata)
-
- // Set mandatory flags.
- c.registers.Eflags = KernelFlagsSet
-}
-
-// StackTop returns the kernel's stack address.
-//
-//go:nosplit
-func (c *CPU) StackTop() uint64 {
- return uint64(kernelAddr(&c.stack[0])) + uint64(len(c.stack))
-}
-
-// IDT returns the CPU's IDT base and limit.
-//
-//go:nosplit
-func (c *CPU) IDT() (uint64, uint16) {
- return uint64(kernelAddr(&c.kernel.globalIDT[0])), uint16(binary.Size(&c.kernel.globalIDT) - 1)
-}
-
-// GDT returns the CPU's GDT base and limit.
-//
-//go:nosplit
-func (c *CPU) GDT() (uint64, uint16) {
- return uint64(kernelAddr(&c.gdt[0])), uint16(8*segLast - 1)
-}
-
-// TSS returns the CPU's TSS base, limit and value.
-//
-//go:nosplit
-func (c *CPU) TSS() (uint64, uint16, *SegmentDescriptor) {
- return uint64(kernelAddr(&c.tss)), uint16(binary.Size(&c.tss) - 1), &c.gdt[segTss]
-}
-
-// CR0 returns the CPU's CR0 value.
-//
-//go:nosplit
-func (c *CPU) CR0() uint64 {
- return _CR0_PE | _CR0_PG | _CR0_AM | _CR0_ET
-}
-
-// CR4 returns the CPU's CR4 value.
-//
-//go:nosplit
-func (c *CPU) CR4() uint64 {
- cr4 := uint64(_CR4_PAE | _CR4_PSE | _CR4_OSFXSR | _CR4_OSXMMEXCPT)
- if hasPCID {
- cr4 |= _CR4_PCIDE
- }
- if hasXSAVE {
- cr4 |= _CR4_OSXSAVE
- }
- if hasSMEP {
- cr4 |= _CR4_SMEP
- }
- if hasFSGSBASE {
- cr4 |= _CR4_FSGSBASE
- }
- return cr4
-}
-
-// EFER returns the CPU's EFER value.
-//
-//go:nosplit
-func (c *CPU) EFER() uint64 {
- return _EFER_LME | _EFER_LMA | _EFER_SCE | _EFER_NX
-}
-
-// IsCanonical indicates whether addr is canonical per the amd64 spec.
-//
-//go:nosplit
-func IsCanonical(addr uint64) bool {
- return addr <= 0x00007fffffffffff || addr > 0xffff800000000000
-}
-
-// SwitchToUser performs either a sysret or an iret.
-//
-// The return value is the vector that interrupted execution.
-//
-// This function will not split the stack. Callers will probably want to call
-// runtime.entersyscall (and pair with a call to runtime.exitsyscall) prior to
-// calling this function.
-//
-// When this is done, this region is quite sensitive to things like system
-// calls. After calling entersyscall, any memory used must have been allocated
-// and no function calls without go:nosplit are permitted. Any calls made here
-// are protected appropriately (e.g. IsCanonical and CR3).
-//
-// Also note that this function transitively depends on the compiler generating
-// code that uses IP-relative addressing inside of absolute addresses. That's
-// the case for amd64, but may not be the case for other architectures.
-//
-// Precondition: the Rip, Rsp, Fs and Gs registers must be canonical.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
- userCR3 := switchOpts.PageTables.CR3(!switchOpts.Flush, switchOpts.UserPCID)
- c.kernelCR3 = uintptr(c.kernel.PageTables.CR3(true, switchOpts.KernelPCID))
-
- // Sanitize registers.
- regs := switchOpts.Registers
- regs.Eflags &= ^uint64(UserFlagsClear)
- regs.Eflags |= UserFlagsSet
- regs.Cs = uint64(Ucode64) // Required for iret.
- regs.Ss = uint64(Udata) // Ditto.
-
- // Perform the switch.
- swapgs() // GS will be swapped on return.
- WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS.
- WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS.
- LoadFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy in floating point.
- if switchOpts.FullRestore {
- vector = iret(c, regs, uintptr(userCR3))
- } else {
- vector = sysret(c, regs, uintptr(userCR3))
- }
- SaveFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy out floating point.
- WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS.
- return
-}
-
-// start is the CPU entrypoint.
-//
-// This is called from the Start asm stub (see entry_amd64.go); on return the
-// registers in c.registers will be restored (not segments).
-//
-//go:nosplit
-func start(c *CPU) {
- // Save per-cpu & FS segment.
- WriteGS(kernelAddr(c.kernelEntry))
- WriteFS(uintptr(c.registers.Fs_base))
-
- // Initialize floating point.
- //
- // Note that on skylake, the valid XCR0 mask reported seems to be 0xff.
- // This breaks down as:
- //
- // bit0 - x87
- // bit1 - SSE
- // bit2 - AVX
- // bit3-4 - MPX
- // bit5-7 - AVX512
- //
- // For some reason, enabled MPX & AVX512 on platforms that report them
- // seems to be cause a general protection fault. (Maybe there are some
- // virtualization issues and these aren't exported to the guest cpuid.)
- // This needs further investigation, but we can limit the floating
- // point operations to x87, SSE & AVX for now.
- fninit()
- xsetbv(0, validXCR0Mask&0x7)
-
- // Set the syscall target.
- wrmsr(_MSR_LSTAR, kernelFunc(sysenter))
- wrmsr(_MSR_SYSCALL_MASK, KernelFlagsClear|_RFLAGS_DF)
-
- // NOTE: This depends on having the 64-bit segments immediately
- // following the 32-bit user segments. This is simply the way the
- // sysret instruction is designed to work (it assumes they follow).
- wrmsr(_MSR_STAR, uintptr(uint64(Kcode)<<32|uint64(Ucode32)<<48))
- wrmsr(_MSR_CSTAR, kernelFunc(sysenter))
-}
-
-// SetCPUIDFaulting sets CPUID faulting per the boolean value.
-//
-// True is returned if faulting could be set.
-//
-//go:nosplit
-func SetCPUIDFaulting(on bool) bool {
- // Per the SDM (Vol 3, Table 2-43), PLATFORM_INFO bit 31 denotes support
- // for CPUID faulting, and we enable and disable via the MISC_FEATURES MSR.
- if rdmsr(_MSR_PLATFORM_INFO)&_PLATFORM_INFO_CPUID_FAULT != 0 {
- features := rdmsr(_MSR_MISC_FEATURES)
- if on {
- features |= _MISC_FEATURE_CPUID_TRAP
- } else {
- features &^= _MISC_FEATURE_CPUID_TRAP
- }
- wrmsr(_MSR_MISC_FEATURES, features)
- return true // Setting successful.
- }
- return false
-}
-
-// ReadCR2 reads the current CR2 value.
-//
-//go:nosplit
-func ReadCR2() uintptr {
- return readCR2()
-}
diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go
deleted file mode 100644
index c05284641..000000000
--- a/pkg/sentry/platform/ring0/kernel_arm64.go
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// HaltAndResume halts execution and point the pointer to the resume function.
-//go:nosplit
-func HaltAndResume()
-
-// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
-//go:nosplit
-func HaltEl1SvcAndResume()
-
-// HaltEl1ExceptionAndResume calls Hooks.KernelException and resume.
-//go:nosplit
-func HaltEl1ExceptionAndResume()
-
-// init initializes architecture-specific state.
-func (k *Kernel) init(maxCPUs int) {
-}
-
-// init initializes architecture-specific state.
-func (c *CPU) init(cpuID int) {
- // Set the kernel stack pointer(virtual address).
- c.registers.Sp = uint64(c.StackTop())
-
-}
-
-// StackTop returns the kernel's stack address.
-//
-//go:nosplit
-func (c *CPU) StackTop() uint64 {
- return uint64(kernelAddr(&c.stack[0])) + uint64(len(c.stack))
-}
-
-// IsCanonical indicates whether addr is canonical per the arm64 spec.
-//
-//go:nosplit
-func IsCanonical(addr uint64) bool {
- return addr <= 0x0000ffffffffffff || addr > 0xffff000000000000
-}
-
-// SwitchToUser performs an eret.
-//
-// The return value is the exception vector.
-//
-// +checkescape:all
-//
-//go:nosplit
-func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
- storeAppASID(uintptr(switchOpts.UserASID))
- if switchOpts.Flush {
- FlushTlbByASID(uintptr(switchOpts.UserASID))
- }
-
- regs := switchOpts.Registers
-
- regs.Pstate &= ^uint64(PsrFlagsClear)
- regs.Pstate |= UserFlagsSet
-
- EnableVFP()
- LoadFloatingPoint(switchOpts.FloatingPointState)
-
- kernelExitToEl0()
-
- SaveFloatingPoint(switchOpts.FloatingPointState)
- DisableVFP()
-
- vector = c.vecCode
-
- return
-}
diff --git a/pkg/sentry/platform/ring0/kernel_unsafe.go b/pkg/sentry/platform/ring0/kernel_unsafe.go
deleted file mode 100644
index 16955ad91..000000000
--- a/pkg/sentry/platform/ring0/kernel_unsafe.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package ring0
-
-import (
- "unsafe"
-)
-
-// eface mirrors runtime.eface.
-type eface struct {
- typ uintptr
- data unsafe.Pointer
-}
-
-// kernelAddr returns the kernel virtual address for the given object.
-//
-//go:nosplit
-func kernelAddr(obj interface{}) uintptr {
- e := (*eface)(unsafe.Pointer(&obj))
- return KernelStartAddress | uintptr(e.data)
-}
-
-// kernelFunc returns the address of the given function.
-//
-//go:nosplit
-func kernelFunc(fn func()) uintptr {
- fnptr := (**uintptr)(unsafe.Pointer(&fn))
- return KernelStartAddress | **fnptr
-}
diff --git a/pkg/sentry/platform/ring0/lib_amd64.go b/pkg/sentry/platform/ring0/lib_amd64.go
deleted file mode 100644
index 0ec5c3bc5..000000000
--- a/pkg/sentry/platform/ring0/lib_amd64.go
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/cpuid"
-)
-
-// LoadFloatingPoint loads floating point state by the most efficient mechanism
-// available (set by Init).
-var LoadFloatingPoint func(*byte)
-
-// SaveFloatingPoint saves floating point state by the most efficient mechanism
-// available (set by Init).
-var SaveFloatingPoint func(*byte)
-
-// fxrstor uses fxrstor64 to load floating point state.
-func fxrstor(*byte)
-
-// xrstor uses xrstor to load floating point state.
-func xrstor(*byte)
-
-// fxsave uses fxsave64 to save floating point state.
-func fxsave(*byte)
-
-// xsave uses xsave to save floating point state.
-func xsave(*byte)
-
-// xsaveopt uses xsaveopt to save floating point state.
-func xsaveopt(*byte)
-
-// WriteFS sets the GS address (set by init).
-var WriteFS func(addr uintptr)
-
-// wrfsbase writes to the GS base address.
-func wrfsbase(addr uintptr)
-
-// wrfsmsr writes to the GS_BASE MSR.
-func wrfsmsr(addr uintptr)
-
-// WriteGS sets the GS address (set by init).
-var WriteGS func(addr uintptr)
-
-// wrgsbase writes to the GS base address.
-func wrgsbase(addr uintptr)
-
-// wrgsmsr writes to the GS_BASE MSR.
-func wrgsmsr(addr uintptr)
-
-// readCR2 reads the current CR2 value.
-func readCR2() uintptr
-
-// fninit initializes the floating point unit.
-func fninit()
-
-// xsetbv writes to an extended control register.
-func xsetbv(reg, value uintptr)
-
-// xgetbv reads an extended control register.
-func xgetbv(reg uintptr) uintptr
-
-// wrmsr reads to the given MSR.
-func wrmsr(reg, value uintptr)
-
-// rdmsr reads the given MSR.
-func rdmsr(reg uintptr) uintptr
-
-// Mostly-constants set by Init.
-var (
- hasSMEP bool
- hasPCID bool
- hasXSAVEOPT bool
- hasXSAVE bool
- hasFSGSBASE bool
- validXCR0Mask uintptr
-)
-
-// Init sets function pointers based on architectural features.
-//
-// This must be called prior to using ring0.
-func Init(featureSet *cpuid.FeatureSet) {
- hasSMEP = featureSet.HasFeature(cpuid.X86FeatureSMEP)
- hasPCID = featureSet.HasFeature(cpuid.X86FeaturePCID)
- hasXSAVEOPT = featureSet.UseXsaveopt()
- hasXSAVE = featureSet.UseXsave()
- hasFSGSBASE = featureSet.HasFeature(cpuid.X86FeatureFSGSBase)
- validXCR0Mask = uintptr(featureSet.ValidXCR0Mask())
- if hasXSAVEOPT {
- SaveFloatingPoint = xsaveopt
- LoadFloatingPoint = xrstor
- } else if hasXSAVE {
- SaveFloatingPoint = xsave
- LoadFloatingPoint = xrstor
- } else {
- SaveFloatingPoint = fxsave
- LoadFloatingPoint = fxrstor
- }
- if hasFSGSBASE {
- WriteFS = wrfsbase
- WriteGS = wrgsbase
- } else {
- WriteFS = wrfsmsr
- WriteGS = wrgsmsr
- }
-}
diff --git a/pkg/sentry/platform/ring0/lib_amd64.s b/pkg/sentry/platform/ring0/lib_amd64.s
deleted file mode 100644
index 2fe83568a..000000000
--- a/pkg/sentry/platform/ring0/lib_amd64.s
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-// fxrstor loads floating point state.
-//
-// The code corresponds to:
-//
-// fxrstor64 (%rbx)
-//
-TEXT ·fxrstor(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), BX
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x0b;
- RET
-
-// xrstor loads floating point state.
-//
-// The code corresponds to:
-//
-// xrstor (%rdi)
-//
-TEXT ·xrstor(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), DI
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x2f;
- RET
-
-// fxsave saves floating point state.
-//
-// The code corresponds to:
-//
-// fxsave64 (%rbx)
-//
-TEXT ·fxsave(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), BX
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x03;
- RET
-
-// xsave saves floating point state.
-//
-// The code corresponds to:
-//
-// xsave (%rdi)
-//
-TEXT ·xsave(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), DI
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x27;
- RET
-
-// xsaveopt saves floating point state.
-//
-// The code corresponds to:
-//
-// xsaveopt (%rdi)
-//
-TEXT ·xsaveopt(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), DI
- MOVL $0xffffffff, AX
- MOVL $0xffffffff, DX
- BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x37;
- RET
-
-// wrfsbase writes to the FS base.
-//
-// The code corresponds to:
-//
-// wrfsbase %rax
-//
-TEXT ·wrfsbase(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0xd0;
- RET
-
-// wrfsmsr writes to the FSBASE MSR.
-//
-// The code corresponds to:
-//
-// wrmsr (writes EDX:EAX to the MSR in ECX)
-//
-TEXT ·wrfsmsr(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- MOVQ AX, DX
- SHRQ $32, DX
- MOVQ $0xc0000100, CX // MSR_FS_BASE
- BYTE $0x0f; BYTE $0x30;
- RET
-
-// wrgsbase writes to the GS base.
-//
-// The code corresponds to:
-//
-// wrgsbase %rax
-//
-TEXT ·wrgsbase(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0xd8;
- RET
-
-// wrgsmsr writes to the GSBASE MSR.
-//
-// See wrfsmsr.
-TEXT ·wrgsmsr(SB),NOSPLIT,$0-8
- MOVQ addr+0(FP), AX
- MOVQ AX, DX
- SHRQ $32, DX
- MOVQ $0xc0000101, CX // MSR_GS_BASE
- BYTE $0x0f; BYTE $0x30; // WRMSR
- RET
-
-// readCR2 reads the current CR2 value.
-//
-// The code corresponds to:
-//
-// mov %cr2, %rax
-//
-TEXT ·readCR2(SB),NOSPLIT,$0-8
- BYTE $0x0f; BYTE $0x20; BYTE $0xd0;
- MOVQ AX, ret+0(FP)
- RET
-
-// fninit initializes the floating point unit.
-//
-// The code corresponds to:
-//
-// fninit
-TEXT ·fninit(SB),NOSPLIT,$0
- BYTE $0xdb; BYTE $0xe3;
- RET
-
-// xsetbv writes to an extended control register.
-//
-// The code corresponds to:
-//
-// xsetbv
-//
-TEXT ·xsetbv(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- MOVL value+8(FP), AX
- MOVL value+12(FP), DX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd1;
- RET
-
-// xgetbv reads an extended control register.
-//
-// The code corresponds to:
-//
-// xgetbv
-//
-TEXT ·xgetbv(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- BYTE $0x0f; BYTE $0x01; BYTE $0xd0;
- MOVL AX, ret+8(FP)
- MOVL DX, ret+12(FP)
- RET
-
-// wrmsr writes to a control register.
-//
-// The code corresponds to:
-//
-// wrmsr
-//
-TEXT ·wrmsr(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- MOVL value+8(FP), AX
- MOVL value+12(FP), DX
- BYTE $0x0f; BYTE $0x30;
- RET
-
-// rdmsr reads a control register.
-//
-// The code corresponds to:
-//
-// rdmsr
-//
-TEXT ·rdmsr(SB),NOSPLIT,$0-16
- MOVL reg+0(FP), CX
- BYTE $0x0f; BYTE $0x32;
- MOVL AX, ret+8(FP)
- MOVL DX, ret+12(FP)
- RET
diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/sentry/platform/ring0/lib_arm64.go
deleted file mode 100644
index a490bf3af..000000000
--- a/pkg/sentry/platform/ring0/lib_arm64.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-// storeAppASID writes the application's asid value.
-func storeAppASID(asid uintptr)
-
-// LocalFlushTlbAll same as FlushTlbAll, but only applies to the calling CPU.
-func LocalFlushTlbAll()
-
-// FlushTlbByVA invalidates tlb by VA/Last-level/Inner-Shareable.
-func FlushTlbByVA(addr uintptr)
-
-// FlushTlbByASID invalidates tlb by ASID/Inner-Shareable.
-func FlushTlbByASID(asid uintptr)
-
-// FlushTlbAll invalidates all tlb.
-func FlushTlbAll()
-
-// CPACREL1 returns the value of the CPACR_EL1 register.
-func CPACREL1() (value uintptr)
-
-// GetFPCR returns the value of FPCR register.
-func GetFPCR() (value uintptr)
-
-// SetFPCR writes the FPCR value.
-func SetFPCR(value uintptr)
-
-// GetFPSR returns the value of FPSR register.
-func GetFPSR() (value uintptr)
-
-// SetFPSR writes the FPSR value.
-func SetFPSR(value uintptr)
-
-// SaveVRegs saves V0-V31 registers.
-// V0-V31: 32 128-bit registers for floating point and simd.
-func SaveVRegs(*byte)
-
-// LoadVRegs loads V0-V31 registers.
-func LoadVRegs(*byte)
-
-// LoadFloatingPoint loads floating point state.
-func LoadFloatingPoint(*byte)
-
-// SaveFloatingPoint saves floating point state.
-func SaveFloatingPoint(*byte)
-
-// EnableVFP enables fpsimd.
-func EnableVFP()
-
-// DisableVFP disables fpsimd.
-func DisableVFP()
-
-// Init sets function pointers based on architectural features.
-//
-// This must be called prior to using ring0.
-func Init() {}
diff --git a/pkg/sentry/platform/ring0/lib_arm64.s b/pkg/sentry/platform/ring0/lib_arm64.s
deleted file mode 100644
index e39b32841..000000000
--- a/pkg/sentry/platform/ring0/lib_arm64.s
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "funcdata.h"
-#include "textflag.h"
-
-#define TLBI_ASID_SHIFT 48
-
-TEXT ·FlushTlbByVA(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R1
- DSB $10 // dsb(ishst)
- WORD $0xd50883a1 // tlbi vale1is, x1
- DSB $11 // dsb(ish)
- RET
-
-TEXT ·FlushTlbByASID(SB),NOSPLIT,$0-8
- MOVD asid+0(FP), R1
- LSL $TLBI_ASID_SHIFT, R1, R1
- DSB $10 // dsb(ishst)
- WORD $0xd5088341 // tlbi aside1is, x1
- DSB $11 // dsb(ish)
- RET
-
-TEXT ·LocalFlushTlbAll(SB),NOSPLIT,$0
- DSB $6 // dsb(nshst)
- WORD $0xd508871f // __tlbi(vmalle1)
- DSB $7 // dsb(nsh)
- ISB $15
- RET
-
-TEXT ·FlushTlbAll(SB),NOSPLIT,$0
- DSB $10 // dsb(ishst)
- WORD $0xd508831f // __tlbi(vmalle1is)
- DSB $11 // dsb(ish)
- ISB $15
- RET
-
-TEXT ·CPACREL1(SB),NOSPLIT,$0-8
- WORD $0xd5381041 // MRS CPACR_EL1, R1
- MOVD R1, ret+0(FP)
- RET
-
-TEXT ·GetFPCR(SB),NOSPLIT,$0-8
- MOVD FPCR, R1
- MOVD R1, ret+0(FP)
- RET
-
-TEXT ·GetFPSR(SB),NOSPLIT,$0-8
- MOVD FPSR, R1
- MOVD R1, ret+0(FP)
- RET
-
-TEXT ·SetFPCR(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R1
- MOVD R1, FPCR
- RET
-
-TEXT ·SetFPSR(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R1
- MOVD R1, FPSR
- RET
-
-TEXT ·SaveVRegs(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- // Skip aarch64_ctx, fpsr, fpcr.
- ADD $16, R0, R0
-
- WORD $0xad000400 // stp q0, q1, [x0]
- WORD $0xad010c02 // stp q2, q3, [x0, #32]
- WORD $0xad021404 // stp q4, q5, [x0, #64]
- WORD $0xad031c06 // stp q6, q7, [x0, #96]
- WORD $0xad042408 // stp q8, q9, [x0, #128]
- WORD $0xad052c0a // stp q10, q11, [x0, #160]
- WORD $0xad06340c // stp q12, q13, [x0, #192]
- WORD $0xad073c0e // stp q14, q15, [x0, #224]
- WORD $0xad084410 // stp q16, q17, [x0, #256]
- WORD $0xad094c12 // stp q18, q19, [x0, #288]
- WORD $0xad0a5414 // stp q20, q21, [x0, #320]
- WORD $0xad0b5c16 // stp q22, q23, [x0, #352]
- WORD $0xad0c6418 // stp q24, q25, [x0, #384]
- WORD $0xad0d6c1a // stp q26, q27, [x0, #416]
- WORD $0xad0e741c // stp q28, q29, [x0, #448]
- WORD $0xad0f7c1e // stp q30, q31, [x0, #480]
-
- RET
-
-TEXT ·LoadVRegs(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- // Skip aarch64_ctx, fpsr, fpcr.
- ADD $16, R0, R0
-
- WORD $0xad400400 // ldp q0, q1, [x0]
- WORD $0xad410c02 // ldp q2, q3, [x0, #32]
- WORD $0xad421404 // ldp q4, q5, [x0, #64]
- WORD $0xad431c06 // ldp q6, q7, [x0, #96]
- WORD $0xad442408 // ldp q8, q9, [x0, #128]
- WORD $0xad452c0a // ldp q10, q11, [x0, #160]
- WORD $0xad46340c // ldp q12, q13, [x0, #192]
- WORD $0xad473c0e // ldp q14, q15, [x0, #224]
- WORD $0xad484410 // ldp q16, q17, [x0, #256]
- WORD $0xad494c12 // ldp q18, q19, [x0, #288]
- WORD $0xad4a5414 // ldp q20, q21, [x0, #320]
- WORD $0xad4b5c16 // ldp q22, q23, [x0, #352]
- WORD $0xad4c6418 // ldp q24, q25, [x0, #384]
- WORD $0xad4d6c1a // ldp q26, q27, [x0, #416]
- WORD $0xad4e741c // ldp q28, q29, [x0, #448]
- WORD $0xad4f7c1e // ldp q30, q31, [x0, #480]
-
- RET
-
-TEXT ·LoadFloatingPoint(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- MOVD 0(R0), R1
- MOVD R1, FPSR
- MOVD 8(R0), R1
- MOVD R1, FPCR
-
- ADD $16, R0, R0
-
- WORD $0xad400400 // ldp q0, q1, [x0]
- WORD $0xad410c02 // ldp q2, q3, [x0, #32]
- WORD $0xad421404 // ldp q4, q5, [x0, #64]
- WORD $0xad431c06 // ldp q6, q7, [x0, #96]
- WORD $0xad442408 // ldp q8, q9, [x0, #128]
- WORD $0xad452c0a // ldp q10, q11, [x0, #160]
- WORD $0xad46340c // ldp q12, q13, [x0, #192]
- WORD $0xad473c0e // ldp q14, q15, [x0, #224]
- WORD $0xad484410 // ldp q16, q17, [x0, #256]
- WORD $0xad494c12 // ldp q18, q19, [x0, #288]
- WORD $0xad4a5414 // ldp q20, q21, [x0, #320]
- WORD $0xad4b5c16 // ldp q22, q23, [x0, #352]
- WORD $0xad4c6418 // ldp q24, q25, [x0, #384]
- WORD $0xad4d6c1a // ldp q26, q27, [x0, #416]
- WORD $0xad4e741c // ldp q28, q29, [x0, #448]
- WORD $0xad4f7c1e // ldp q30, q31, [x0, #480]
-
- RET
-
-TEXT ·SaveFloatingPoint(SB),NOSPLIT,$0-8
- MOVD addr+0(FP), R0
-
- MOVD FPSR, R1
- MOVD R1, 0(R0)
- MOVD FPCR, R1
- MOVD R1, 8(R0)
-
- ADD $16, R0, R0
-
- WORD $0xad000400 // stp q0, q1, [x0]
- WORD $0xad010c02 // stp q2, q3, [x0, #32]
- WORD $0xad021404 // stp q4, q5, [x0, #64]
- WORD $0xad031c06 // stp q6, q7, [x0, #96]
- WORD $0xad042408 // stp q8, q9, [x0, #128]
- WORD $0xad052c0a // stp q10, q11, [x0, #160]
- WORD $0xad06340c // stp q12, q13, [x0, #192]
- WORD $0xad073c0e // stp q14, q15, [x0, #224]
- WORD $0xad084410 // stp q16, q17, [x0, #256]
- WORD $0xad094c12 // stp q18, q19, [x0, #288]
- WORD $0xad0a5414 // stp q20, q21, [x0, #320]
- WORD $0xad0b5c16 // stp q22, q23, [x0, #352]
- WORD $0xad0c6418 // stp q24, q25, [x0, #384]
- WORD $0xad0d6c1a // stp q26, q27, [x0, #416]
- WORD $0xad0e741c // stp q28, q29, [x0, #448]
- WORD $0xad0f7c1e // stp q30, q31, [x0, #480]
-
- RET
diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go
deleted file mode 100644
index ca4075b09..000000000
--- a/pkg/sentry/platform/ring0/offsets_amd64.go
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package ring0
-
-import (
- "fmt"
- "io"
- "reflect"
-
- "gvisor.dev/gvisor/pkg/sentry/arch"
-)
-
-// Emit prints architecture-specific offsets.
-func Emit(w io.Writer) {
- fmt.Fprintf(w, "// Automatically generated, do not edit.\n")
-
- c := &CPU{}
- fmt.Fprintf(w, "\n// CPU offsets.\n")
- fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ENTRY 0x%02x\n", reflect.ValueOf(&c.kernelEntry).Pointer()-reflect.ValueOf(c).Pointer())
-
- e := &kernelEntry{}
- fmt.Fprintf(w, "\n// CPU entry offsets.\n")
- fmt.Fprintf(w, "#define ENTRY_SCRATCH0 0x%02x\n", reflect.ValueOf(&e.scratch0).Pointer()-reflect.ValueOf(e).Pointer())
- fmt.Fprintf(w, "#define ENTRY_STACK_TOP 0x%02x\n", reflect.ValueOf(&e.stackTop).Pointer()-reflect.ValueOf(e).Pointer())
- fmt.Fprintf(w, "#define ENTRY_CPU_SELF 0x%02x\n", reflect.ValueOf(&e.cpuSelf).Pointer()-reflect.ValueOf(e).Pointer())
- fmt.Fprintf(w, "#define ENTRY_KERNEL_CR3 0x%02x\n", reflect.ValueOf(&e.kernelCR3).Pointer()-reflect.ValueOf(e).Pointer())
-
- fmt.Fprintf(w, "\n// Bits.\n")
- fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF)
- fmt.Fprintf(w, "#define _RFLAGS_IOPL0 0x%02x\n", _RFLAGS_IOPL0)
- fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet)
-
- fmt.Fprintf(w, "\n// Vectors.\n")
- fmt.Fprintf(w, "#define DivideByZero 0x%02x\n", DivideByZero)
- fmt.Fprintf(w, "#define Debug 0x%02x\n", Debug)
- fmt.Fprintf(w, "#define NMI 0x%02x\n", NMI)
- fmt.Fprintf(w, "#define Breakpoint 0x%02x\n", Breakpoint)
- fmt.Fprintf(w, "#define Overflow 0x%02x\n", Overflow)
- fmt.Fprintf(w, "#define BoundRangeExceeded 0x%02x\n", BoundRangeExceeded)
- fmt.Fprintf(w, "#define InvalidOpcode 0x%02x\n", InvalidOpcode)
- fmt.Fprintf(w, "#define DeviceNotAvailable 0x%02x\n", DeviceNotAvailable)
- fmt.Fprintf(w, "#define DoubleFault 0x%02x\n", DoubleFault)
- fmt.Fprintf(w, "#define CoprocessorSegmentOverrun 0x%02x\n", CoprocessorSegmentOverrun)
- fmt.Fprintf(w, "#define InvalidTSS 0x%02x\n", InvalidTSS)
- fmt.Fprintf(w, "#define SegmentNotPresent 0x%02x\n", SegmentNotPresent)
- fmt.Fprintf(w, "#define StackSegmentFault 0x%02x\n", StackSegmentFault)
- fmt.Fprintf(w, "#define GeneralProtectionFault 0x%02x\n", GeneralProtectionFault)
- fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault)
- fmt.Fprintf(w, "#define X87FloatingPointException 0x%02x\n", X87FloatingPointException)
- fmt.Fprintf(w, "#define AlignmentCheck 0x%02x\n", AlignmentCheck)
- fmt.Fprintf(w, "#define MachineCheck 0x%02x\n", MachineCheck)
- fmt.Fprintf(w, "#define SIMDFloatingPointException 0x%02x\n", SIMDFloatingPointException)
- fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
- fmt.Fprintf(w, "#define SecurityException 0x%02x\n", SecurityException)
- fmt.Fprintf(w, "#define SyscallInt80 0x%02x\n", SyscallInt80)
- fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
-
- p := &arch.Registers{}
- fmt.Fprintf(w, "\n// Ptrace registers.\n")
- fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.R13).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.R12).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RBP 0x%02x\n", reflect.ValueOf(&p.Rbp).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RBX 0x%02x\n", reflect.ValueOf(&p.Rbx).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.R11).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.R10).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.R9).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.R8).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RAX 0x%02x\n", reflect.ValueOf(&p.Rax).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RCX 0x%02x\n", reflect.ValueOf(&p.Rcx).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RDX 0x%02x\n", reflect.ValueOf(&p.Rdx).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RSI 0x%02x\n", reflect.ValueOf(&p.Rsi).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RDI 0x%02x\n", reflect.ValueOf(&p.Rdi).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_ORIGRAX 0x%02x\n", reflect.ValueOf(&p.Orig_rax).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RIP 0x%02x\n", reflect.ValueOf(&p.Rip).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_CS 0x%02x\n", reflect.ValueOf(&p.Cs).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_FLAGS 0x%02x\n", reflect.ValueOf(&p.Eflags).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_RSP 0x%02x\n", reflect.ValueOf(&p.Rsp).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_SS 0x%02x\n", reflect.ValueOf(&p.Ss).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_FS 0x%02x\n", reflect.ValueOf(&p.Fs_base).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_GS 0x%02x\n", reflect.ValueOf(&p.Gs_base).Pointer()-reflect.ValueOf(p).Pointer())
-}
diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/sentry/platform/ring0/offsets_arm64.go
deleted file mode 100644
index 164db6d5a..000000000
--- a/pkg/sentry/platform/ring0/offsets_arm64.go
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package ring0
-
-import (
- "fmt"
- "io"
- "reflect"
-
- "gvisor.dev/gvisor/pkg/sentry/arch"
-)
-
-// Emit prints architecture-specific offsets.
-func Emit(w io.Writer) {
- fmt.Fprintf(w, "// Automatically generated, do not edit.\n")
-
- c := &CPU{}
- fmt.Fprintf(w, "\n// CPU offsets.\n")
- fmt.Fprintf(w, "#define CPU_SELF 0x%02x\n", reflect.ValueOf(&c.self).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_STACK_TOP 0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack)))
- fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_FAULT_ADDR 0x%02x\n", reflect.ValueOf(&c.faultAddr).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_TTBR0_KVM 0x%02x\n", reflect.ValueOf(&c.ttbr0Kvm).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_TTBR0_APP 0x%02x\n", reflect.ValueOf(&c.ttbr0App).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_VECTOR_CODE 0x%02x\n", reflect.ValueOf(&c.vecCode).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_APP_ADDR 0x%02x\n", reflect.ValueOf(&c.appAddr).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_LAZY_VFP 0x%02x\n", reflect.ValueOf(&c.lazyVFP).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_APP_ASID 0x%02x\n", reflect.ValueOf(&c.appASID).Pointer()-reflect.ValueOf(c).Pointer())
-
- fmt.Fprintf(w, "\n// Bits.\n")
- fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet)
-
- fmt.Fprintf(w, "\n// Vectors.\n")
-
- fmt.Fprintf(w, "#define El1Sync 0x%02x\n", El1Sync)
- fmt.Fprintf(w, "#define El1Irq 0x%02x\n", El1Irq)
- fmt.Fprintf(w, "#define El1Fiq 0x%02x\n", El1Fiq)
- fmt.Fprintf(w, "#define El1Err 0x%02x\n", El1Err)
-
- fmt.Fprintf(w, "#define El0Sync 0x%02x\n", El0Sync)
- fmt.Fprintf(w, "#define El0Irq 0x%02x\n", El0Irq)
- fmt.Fprintf(w, "#define El0Fiq 0x%02x\n", El0Fiq)
- fmt.Fprintf(w, "#define El0Err 0x%02x\n", El0Err)
-
- fmt.Fprintf(w, "#define El1SyncDa 0x%02x\n", El1SyncDa)
- fmt.Fprintf(w, "#define El1SyncIa 0x%02x\n", El1SyncIa)
- fmt.Fprintf(w, "#define El1SyncSpPc 0x%02x\n", El1SyncSpPc)
- fmt.Fprintf(w, "#define El1SyncUndef 0x%02x\n", El1SyncUndef)
- fmt.Fprintf(w, "#define El1SyncDbg 0x%02x\n", El1SyncDbg)
- fmt.Fprintf(w, "#define El1SyncInv 0x%02x\n", El1SyncInv)
-
- fmt.Fprintf(w, "#define El0SyncSVC 0x%02x\n", El0SyncSVC)
- fmt.Fprintf(w, "#define El0SyncDa 0x%02x\n", El0SyncDa)
- fmt.Fprintf(w, "#define El0SyncIa 0x%02x\n", El0SyncIa)
- fmt.Fprintf(w, "#define El0SyncFpsimdAcc 0x%02x\n", El0SyncFpsimdAcc)
- fmt.Fprintf(w, "#define El0SyncSveAcc 0x%02x\n", El0SyncSveAcc)
- fmt.Fprintf(w, "#define El0SyncFpsimdExc 0x%02x\n", El0SyncFpsimdExc)
- fmt.Fprintf(w, "#define El0SyncSys 0x%02x\n", El0SyncSys)
- fmt.Fprintf(w, "#define El0SyncSpPc 0x%02x\n", El0SyncSpPc)
- fmt.Fprintf(w, "#define El0SyncUndef 0x%02x\n", El0SyncUndef)
- fmt.Fprintf(w, "#define El0SyncDbg 0x%02x\n", El0SyncDbg)
- fmt.Fprintf(w, "#define El0SyncWfx 0x%02x\n", El0SyncWfx)
- fmt.Fprintf(w, "#define El0SyncInv 0x%02x\n", El0SyncInv)
-
- fmt.Fprintf(w, "#define El0ErrNMI 0x%02x\n", El0ErrNMI)
-
- fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault)
- fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
- fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
-
- p := &arch.Registers{}
- fmt.Fprintf(w, "\n// Ptrace registers.\n")
- fmt.Fprintf(w, "#define PTRACE_R0 0x%02x\n", reflect.ValueOf(&p.Regs[0]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R1 0x%02x\n", reflect.ValueOf(&p.Regs[1]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R2 0x%02x\n", reflect.ValueOf(&p.Regs[2]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R3 0x%02x\n", reflect.ValueOf(&p.Regs[3]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R4 0x%02x\n", reflect.ValueOf(&p.Regs[4]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R5 0x%02x\n", reflect.ValueOf(&p.Regs[5]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R6 0x%02x\n", reflect.ValueOf(&p.Regs[6]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R7 0x%02x\n", reflect.ValueOf(&p.Regs[7]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.Regs[8]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.Regs[9]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.Regs[10]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.Regs[11]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.Regs[12]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.Regs[13]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.Regs[14]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.Regs[15]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R16 0x%02x\n", reflect.ValueOf(&p.Regs[16]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R17 0x%02x\n", reflect.ValueOf(&p.Regs[17]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R18 0x%02x\n", reflect.ValueOf(&p.Regs[18]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R19 0x%02x\n", reflect.ValueOf(&p.Regs[19]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R20 0x%02x\n", reflect.ValueOf(&p.Regs[20]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R21 0x%02x\n", reflect.ValueOf(&p.Regs[21]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R22 0x%02x\n", reflect.ValueOf(&p.Regs[22]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R23 0x%02x\n", reflect.ValueOf(&p.Regs[23]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R24 0x%02x\n", reflect.ValueOf(&p.Regs[24]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R25 0x%02x\n", reflect.ValueOf(&p.Regs[25]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R26 0x%02x\n", reflect.ValueOf(&p.Regs[26]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R27 0x%02x\n", reflect.ValueOf(&p.Regs[27]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R28 0x%02x\n", reflect.ValueOf(&p.Regs[28]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R29 0x%02x\n", reflect.ValueOf(&p.Regs[29]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_R30 0x%02x\n", reflect.ValueOf(&p.Regs[30]).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_SP 0x%02x\n", reflect.ValueOf(&p.Sp).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_PC 0x%02x\n", reflect.ValueOf(&p.Pc).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_PSTATE 0x%02x\n", reflect.ValueOf(&p.Pstate).Pointer()-reflect.ValueOf(p).Pointer())
- fmt.Fprintf(w, "#define PTRACE_TLS 0x%02x\n", reflect.ValueOf(&p.TPIDR_EL0).Pointer()-reflect.ValueOf(p).Pointer())
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
deleted file mode 100644
index 9e3539e4c..000000000
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ /dev/null
@@ -1,84 +0,0 @@
-load("//tools:defs.bzl", "go_library", "go_test")
-load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance")
-
-package(licenses = ["notice"])
-
-[
- # These files are tagged with relevant build architectures. We can always
- # build all the input files, which will be included only in the relevant
- # architecture builds.
- go_template(
- name = "generic_walker_%s" % arch,
- srcs = ["walker_%s.go" % arch],
- opt_types = [
- "Visitor",
- ],
- visibility = [":__pkg__"],
- )
- for arch in ("amd64", "arm64")
-]
-
-[
- # See above.
- go_template_instance(
- name = "walker_%s_%s" % (op, arch),
- out = "walker_%s_%s.go" % (op, arch),
- package = "pagetables",
- prefix = op,
- template = ":generic_walker_%s" % arch,
- types = {
- "Visitor": "%sVisitor" % op,
- },
- )
- for op in ("map", "unmap", "lookup", "empty", "check")
- for arch in ("amd64", "arm64")
-]
-
-go_library(
- name = "pagetables",
- srcs = [
- "allocator.go",
- "allocator_unsafe.go",
- "pagetables.go",
- "pagetables_aarch64.go",
- "pagetables_amd64.go",
- "pagetables_arm64.go",
- "pagetables_x86.go",
- "pcids.go",
- "pcids_aarch64.go",
- "pcids_aarch64.s",
- "pcids_x86.go",
- "walker_amd64.go",
- "walker_arm64.go",
- ":walker_empty_amd64",
- ":walker_empty_arm64",
- ":walker_lookup_amd64",
- ":walker_lookup_arm64",
- ":walker_map_amd64",
- ":walker_map_arm64",
- ":walker_unmap_amd64",
- ":walker_unmap_arm64",
- ],
- visibility = [
- "//pkg/sentry/platform/kvm:__subpackages__",
- "//pkg/sentry/platform/ring0:__subpackages__",
- ],
- deps = [
- "//pkg/sync",
- "//pkg/usermem",
- ],
-)
-
-go_test(
- name = "pagetables_test",
- size = "small",
- srcs = [
- "pagetables_amd64_test.go",
- "pagetables_arm64_test.go",
- "pagetables_test.go",
- ":walker_check_amd64",
- ":walker_check_arm64",
- ],
- library = ":pagetables",
- deps = ["//pkg/usermem"],
-)
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator.go b/pkg/sentry/platform/ring0/pagetables/allocator.go
deleted file mode 100644
index 8d75b7599..000000000
--- a/pkg/sentry/platform/ring0/pagetables/allocator.go
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Allocator is used to allocate and map PTEs.
-//
-// Note that allocators may be called concurrently.
-type Allocator interface {
- // NewPTEs returns a new set of PTEs and their physical address.
- NewPTEs() *PTEs
-
- // PhysicalFor gives the physical address for a set of PTEs.
- PhysicalFor(ptes *PTEs) uintptr
-
- // LookupPTEs looks up PTEs by physical address.
- LookupPTEs(physical uintptr) *PTEs
-
- // FreePTEs marks a set of PTEs a freed, although they may not be available
- // for use again until Recycle is called, below.
- FreePTEs(ptes *PTEs)
-
- // Recycle makes freed PTEs available for use again.
- Recycle()
-}
-
-// RuntimeAllocator is a trivial allocator.
-type RuntimeAllocator struct {
- // used is the set of PTEs that have been allocated. This includes any
- // PTEs that may be in the pool below. PTEs are only freed from this
- // map by the Drain call.
- //
- // This exists to prevent accidental garbage collection.
- used map[*PTEs]struct{}
-
- // pool is the set of free-to-use PTEs.
- pool []*PTEs
-
- // freed is the set of recently-freed PTEs.
- freed []*PTEs
-}
-
-// NewRuntimeAllocator returns an allocator that uses runtime allocation.
-func NewRuntimeAllocator() *RuntimeAllocator {
- r := new(RuntimeAllocator)
- r.Init()
- return r
-}
-
-// Init initializes a RuntimeAllocator.
-func (r *RuntimeAllocator) Init() {
- r.used = make(map[*PTEs]struct{})
-}
-
-// Recycle returns freed pages to the pool.
-func (r *RuntimeAllocator) Recycle() {
- r.pool = append(r.pool, r.freed...)
- r.freed = r.freed[:0]
-}
-
-// Drain empties the pool.
-func (r *RuntimeAllocator) Drain() {
- r.Recycle()
- for i, ptes := range r.pool {
- // Zap the entry in the underlying array to ensure that it can
- // be properly garbage collected.
- r.pool[i] = nil
- // Similarly, free the reference held by the used map (these
- // also apply for the pool entries).
- delete(r.used, ptes)
- }
- r.pool = r.pool[:0]
-}
-
-// NewPTEs implements Allocator.NewPTEs.
-//
-// Note that the "physical" address here is actually the virtual address of the
-// PTEs structure. The entries are tracked only to avoid garbage collection.
-//
-// This is guaranteed not to split as long as the pool is sufficiently full.
-//
-//go:nosplit
-func (r *RuntimeAllocator) NewPTEs() *PTEs {
- // Pull from the pool if we can.
- if len(r.pool) > 0 {
- ptes := r.pool[len(r.pool)-1]
- r.pool = r.pool[:len(r.pool)-1]
- return ptes
- }
-
- // Allocate a new entry.
- ptes := newAlignedPTEs()
- r.used[ptes] = struct{}{}
- return ptes
-}
-
-// PhysicalFor returns the physical address for the given PTEs.
-//
-//go:nosplit
-func (r *RuntimeAllocator) PhysicalFor(ptes *PTEs) uintptr {
- return physicalFor(ptes)
-}
-
-// LookupPTEs implements Allocator.LookupPTEs.
-//
-//go:nosplit
-func (r *RuntimeAllocator) LookupPTEs(physical uintptr) *PTEs {
- return fromPhysical(physical)
-}
-
-// FreePTEs implements Allocator.FreePTEs.
-//
-//go:nosplit
-func (r *RuntimeAllocator) FreePTEs(ptes *PTEs) {
- r.freed = append(r.freed, ptes)
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
deleted file mode 100644
index d08bfdeb3..000000000
--- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-import (
- "unsafe"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// newAlignedPTEs returns a set of aligned PTEs.
-func newAlignedPTEs() *PTEs {
- ptes := new(PTEs)
- offset := physicalFor(ptes) & (usermem.PageSize - 1)
- if offset == 0 {
- // Already aligned.
- return ptes
- }
-
- // Need to force an aligned allocation.
- unaligned := make([]byte, (2*usermem.PageSize)-1)
- offset = uintptr(unsafe.Pointer(&unaligned[0])) & (usermem.PageSize - 1)
- if offset != 0 {
- offset = usermem.PageSize - offset
- }
- return (*PTEs)(unsafe.Pointer(&unaligned[offset]))
-}
-
-// physicalFor returns the "physical" address for PTEs.
-//
-//go:nosplit
-func physicalFor(ptes *PTEs) uintptr {
- return uintptr(unsafe.Pointer(ptes))
-}
-
-// fromPhysical returns the PTEs from the "physical" address.
-//
-//go:nosplit
-func fromPhysical(physical uintptr) *PTEs {
- return (*PTEs)(unsafe.Pointer(physical))
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
deleted file mode 100644
index 7605d0cb2..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables.go
+++ /dev/null
@@ -1,310 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package pagetables provides a generic implementation of pagetables.
-//
-// The core functions must be safe to call from a nosplit context. Furthermore,
-// this pagetables implementation goes to lengths to ensure that all functions
-// are free from runtime allocation. Calls to NewPTEs/FreePTEs may be made
-// during walks, but these can be cached elsewhere if required.
-package pagetables
-
-import (
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// PageTables is a set of page tables.
-type PageTables struct {
- // Allocator is used to allocate nodes.
- Allocator Allocator
-
- // root is the pagetable root.
- //
- // For same archs such as amd64, the upper of the PTEs is cloned
- // from and owned by upperSharedPageTables which are shared among
- // many PageTables if upperSharedPageTables is not nil.
- root *PTEs
-
- // rootPhysical is the cached physical address of the root.
- //
- // This is saved only to prevent constant translation.
- rootPhysical uintptr
-
- // archPageTables includes architecture-specific features.
- archPageTables
-
- // upperSharedPageTables represents a read-only shared upper
- // of the Pagetable. When it is not nil, the upper is not
- // allowed to be modified.
- upperSharedPageTables *PageTables
-
- // upperStart is the start address of the upper portion that
- // are shared from upperSharedPageTables
- upperStart uintptr
-
- // readOnlyShared indicates the Pagetables are read-only and
- // own the ranges that are shared with other Pagetables.
- readOnlyShared bool
-}
-
-// Init initializes a set of PageTables.
-//
-//go:nosplit
-func (p *PageTables) Init(allocator Allocator) {
- p.Allocator = allocator
- p.root = p.Allocator.NewPTEs()
- p.rootPhysical = p.Allocator.PhysicalFor(p.root)
-}
-
-// NewWithUpper returns new PageTables.
-//
-// upperSharedPageTables are used for mapping the upper of addresses,
-// starting at upperStart. These pageTables should not be touched (as
-// invalidations may be incorrect) after they are passed as an
-// upperSharedPageTables. Only when all dependent PageTables are gone
-// may they be used. The intenteded use case is for kernel page tables,
-// which are static and fixed.
-//
-// Precondition: upperStart must be between canonical ranges.
-// Precondition: upperStart must be pgdSize aligned.
-// precondition: upperSharedPageTables must be marked read-only shared.
-func NewWithUpper(a Allocator, upperSharedPageTables *PageTables, upperStart uintptr) *PageTables {
- p := new(PageTables)
- p.Init(a)
-
- if upperSharedPageTables != nil {
- if !upperSharedPageTables.readOnlyShared {
- panic("Only read-only shared pagetables can be used as upper")
- }
- p.upperSharedPageTables = upperSharedPageTables
- p.upperStart = upperStart
- }
-
- p.InitArch(a)
-
- return p
-}
-
-// New returns new PageTables.
-func New(a Allocator) *PageTables {
- return NewWithUpper(a, nil, 0)
-}
-
-// mapVisitor is used for map.
-type mapVisitor struct {
- target uintptr // Input.
- physical uintptr // Input.
- opts MapOpts // Input.
- prev bool // Output.
-}
-
-// visit is used for map.
-//
-//go:nosplit
-func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
- p := v.physical + (start - uintptr(v.target))
- if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) {
- v.prev = true
- }
- if p&align != 0 {
- // We will install entries at a smaller granulaity if we don't
- // install a valid entry here, however we must zap any existing
- // entry to ensure this happens.
- pte.Clear()
- return
- }
- pte.Set(p, v.opts)
-}
-
-//go:nosplit
-func (*mapVisitor) requiresAlloc() bool { return true }
-
-//go:nosplit
-func (*mapVisitor) requiresSplit() bool { return true }
-
-// Map installs a mapping with the given physical address.
-//
-// True is returned iff there was a previous mapping in the range.
-//
-// Precondition: addr & length must be page-aligned, their sum must not overflow.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
- if p.readOnlyShared {
- panic("Should not modify read-only shared pagetables.")
- }
- if uintptr(addr)+length < uintptr(addr) {
- panic("addr & length overflow")
- }
- if p.upperSharedPageTables != nil {
- // ignore change to the read-only upper shared portion.
- if uintptr(addr) >= p.upperStart {
- return false
- }
- if uintptr(addr)+length > p.upperStart {
- length = p.upperStart - uintptr(addr)
- }
- }
- if !opts.AccessType.Any() {
- return p.Unmap(addr, length)
- }
- w := mapWalker{
- pageTables: p,
- visitor: mapVisitor{
- target: uintptr(addr),
- physical: physical,
- opts: opts,
- },
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+length)
- return w.visitor.prev
-}
-
-// unmapVisitor is used for unmap.
-type unmapVisitor struct {
- count int
-}
-
-//go:nosplit
-func (*unmapVisitor) requiresAlloc() bool { return false }
-
-//go:nosplit
-func (*unmapVisitor) requiresSplit() bool { return true }
-
-// visit unmaps the given entry.
-//
-//go:nosplit
-func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
- pte.Clear()
- v.count++
-}
-
-// Unmap unmaps the given range.
-//
-// True is returned iff there was a previous mapping in the range.
-//
-// Precondition: addr & length must be page-aligned, their sum must not overflow.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
- if p.readOnlyShared {
- panic("Should not modify read-only shared pagetables.")
- }
- if uintptr(addr)+length < uintptr(addr) {
- panic("addr & length overflow")
- }
- if p.upperSharedPageTables != nil {
- // ignore change to the read-only upper shared portion.
- if uintptr(addr) >= p.upperStart {
- return false
- }
- if uintptr(addr)+length > p.upperStart {
- length = p.upperStart - uintptr(addr)
- }
- }
- w := unmapWalker{
- pageTables: p,
- visitor: unmapVisitor{
- count: 0,
- },
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+length)
- return w.visitor.count > 0
-}
-
-// emptyVisitor is used for emptiness checks.
-type emptyVisitor struct {
- count int
-}
-
-//go:nosplit
-func (*emptyVisitor) requiresAlloc() bool { return false }
-
-//go:nosplit
-func (*emptyVisitor) requiresSplit() bool { return false }
-
-// visit unmaps the given entry.
-//
-//go:nosplit
-func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) {
- v.count++
-}
-
-// IsEmpty checks if the given range is empty.
-//
-// Precondition: addr & length must be page-aligned.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
- w := emptyWalker{
- pageTables: p,
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+length)
- return w.visitor.count == 0
-}
-
-// lookupVisitor is used for lookup.
-type lookupVisitor struct {
- target uintptr // Input.
- physical uintptr // Output.
- opts MapOpts // Output.
-}
-
-// visit matches the given address.
-//
-//go:nosplit
-func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) {
- if !pte.Valid() {
- return
- }
- v.physical = pte.Address() + (start - uintptr(v.target))
- v.opts = pte.Opts()
-}
-
-//go:nosplit
-func (*lookupVisitor) requiresAlloc() bool { return false }
-
-//go:nosplit
-func (*lookupVisitor) requiresSplit() bool { return false }
-
-// Lookup returns the physical address for the given virtual address.
-//
-// +checkescape:hard,stack
-//
-//go:nosplit
-func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) {
- mask := uintptr(usermem.PageSize - 1)
- offset := uintptr(addr) & mask
- w := lookupWalker{
- pageTables: p,
- visitor: lookupVisitor{
- target: uintptr(addr &^ usermem.Addr(mask)),
- },
- }
- w.iterateRange(uintptr(addr), uintptr(addr)+1)
- return w.visitor.physical + offset, w.visitor.opts
-}
-
-// MarkReadOnlyShared marks the pagetables read-only and can be shared.
-//
-// It is usually used on the pagetables that are used as the upper
-func (p *PageTables) MarkReadOnlyShared() {
- p.readOnlyShared = true
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
deleted file mode 100644
index 520161755..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-import (
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// archPageTables is architecture-specific data.
-type archPageTables struct {
- // root is the pagetable root for kernel space.
- root *PTEs
-
- // rootPhysical is the cached physical address of the root.
- //
- // This is saved only to prevent constant translation.
- rootPhysical uintptr
-
- asid uint16
-}
-
-// TTBR0_EL1 returns the translation table base register 0.
-//
-//go:nosplit
-func (p *PageTables) TTBR0_EL1(noFlush bool, asid uint16) uint64 {
- return uint64(p.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset
-}
-
-// TTBR1_EL1 returns the translation table base register 1.
-//
-//go:nosplit
-func (p *PageTables) TTBR1_EL1(noFlush bool, asid uint16) uint64 {
- return uint64(p.archPageTables.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset
-}
-
-// Bits in page table entries.
-const (
- typeTable = 0x3 << 0
- typeSect = 0x1 << 0
- typePage = 0x3 << 0
- pteValid = 0x1 << 0
- pteTableBit = 0x1 << 1
- pteTypeMask = 0x3 << 0
- present = pteValid | pteTableBit
- user = 0x1 << 6 /* AP[1] */
- readOnly = 0x1 << 7 /* AP[2] */
- accessed = 0x1 << 10
- dbm = 0x1 << 51
- writable = dbm
- cont = 0x1 << 52
- pxn = 0x1 << 53
- xn = 0x1 << 54
- dirty = 0x1 << 55
- nG = 0x1 << 11
- shared = 0x3 << 8
-)
-
-const (
- mtDevicenGnRE = 0x1 << 2
- mtNormal = 0x4 << 2
-)
-
-const (
- executeDisable = xn
- optionMask = 0xfff | 0xffff<<48
- protDefault = accessed | shared
-)
-
-// MapOpts are x86 options.
-type MapOpts struct {
- // AccessType defines permissions.
- AccessType usermem.AccessType
-
- // Global indicates the page is globally accessible.
- Global bool
-
- // User indicates the page is a user page.
- User bool
-}
-
-// PTE is a page table entry.
-type PTE uintptr
-
-// Clear clears this PTE, including sect page information.
-//
-//go:nosplit
-func (p *PTE) Clear() {
- atomic.StoreUintptr((*uintptr)(p), 0)
-}
-
-// Valid returns true iff this entry is valid.
-//
-//go:nosplit
-func (p *PTE) Valid() bool {
- return atomic.LoadUintptr((*uintptr)(p))&present != 0
-}
-
-// Opts returns the PTE options.
-//
-// These are all options except Valid and Sect.
-//
-//go:nosplit
-func (p *PTE) Opts() MapOpts {
- v := atomic.LoadUintptr((*uintptr)(p))
-
- return MapOpts{
- AccessType: usermem.AccessType{
- Read: true,
- Write: v&readOnly == 0,
- Execute: v&xn == 0,
- },
- Global: v&nG == 0,
- User: v&user != 0,
- }
-}
-
-// SetSect sets this page as a sect page.
-//
-// The page must not be valid or a panic will result.
-//
-//go:nosplit
-func (p *PTE) SetSect() {
- if p.Valid() {
- // This is not allowed.
- panic("SetSect called on valid page!")
- }
- atomic.StoreUintptr((*uintptr)(p), typeSect)
-}
-
-// IsSect returns true iff this page is a sect page.
-//
-//go:nosplit
-func (p *PTE) IsSect() bool {
- return atomic.LoadUintptr((*uintptr)(p))&pteTypeMask == typeSect
-}
-
-// Set sets this PTE value.
-//
-// This does not change the sect page property.
-//
-//go:nosplit
-func (p *PTE) Set(addr uintptr, opts MapOpts) {
- if !opts.AccessType.Any() {
- p.Clear()
- return
- }
- v := (addr &^ optionMask) | protDefault | nG | readOnly
-
- if p.IsSect() {
- // Note that this is inherited from the previous instance. Set
- // does not change the value of Sect. See above.
- v |= typeSect
- } else {
- v |= typePage
- }
-
- if opts.Global {
- v = v &^ nG
- }
-
- if opts.AccessType.Execute {
- v = v &^ executeDisable
- } else {
- v |= executeDisable
- }
- if opts.AccessType.Write {
- v = v &^ readOnly
- }
-
- if opts.User {
- v |= user
- v |= mtNormal
- } else {
- v = v &^ user
- v |= mtNormal
- }
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// setPageTable sets this PTE value and forces the write bit and sect bit to
-// be cleared. This is used explicitly for breaking sect pages.
-//
-//go:nosplit
-func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) {
- addr := pt.Allocator.PhysicalFor(ptes)
- if addr&^optionMask != addr {
- // This should never happen.
- panic("unaligned physical address!")
- }
- v := addr | typeTable | protDefault | mtNormal
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// Address extracts the address. This should only be used if Valid returns true.
-//
-//go:nosplit
-func (p *PTE) Address() uintptr {
- return atomic.LoadUintptr((*uintptr)(p)) &^ optionMask
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
deleted file mode 100644
index 4bdde8448..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Address constraints.
-//
-// The lowerTop and upperBottom currently apply to four-level pagetables;
-// additional refactoring would be necessary to support five-level pagetables.
-const (
- lowerTop = 0x00007fffffffffff
- upperBottom = 0xffff800000000000
-
- pteShift = 12
- pmdShift = 21
- pudShift = 30
- pgdShift = 39
-
- pteMask = 0x1ff << pteShift
- pmdMask = 0x1ff << pmdShift
- pudMask = 0x1ff << pudShift
- pgdMask = 0x1ff << pgdShift
-
- pteSize = 1 << pteShift
- pmdSize = 1 << pmdShift
- pudSize = 1 << pudShift
- pgdSize = 1 << pgdShift
-
- executeDisable = 1 << 63
- entriesPerPage = 512
-)
-
-// InitArch does some additional initialization related to the architecture.
-//
-//go:nosplit
-func (p *PageTables) InitArch(allocator Allocator) {
- if p.upperSharedPageTables != nil {
- p.cloneUpperShared()
- }
-}
-
-func pgdIndex(upperStart uintptr) uintptr {
- if upperStart&(pgdSize-1) != 0 {
- panic("upperStart should be pgd size aligned")
- }
- if upperStart >= upperBottom {
- return entriesPerPage/2 + (upperStart-upperBottom)/pgdSize
- }
- if upperStart < lowerTop {
- return upperStart / pgdSize
- }
- panic("upperStart should be in canonical range")
-}
-
-// cloneUpperShared clone the upper from the upper shared page tables.
-//
-//go:nosplit
-func (p *PageTables) cloneUpperShared() {
- start := pgdIndex(p.upperStart)
- copy(p.root[start:entriesPerPage], p.upperSharedPageTables.root[start:entriesPerPage])
-}
-
-// PTEs is a collection of entries.
-type PTEs [entriesPerPage]PTE
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
deleted file mode 100644
index 54e8e554f..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package pagetables
-
-import (
- "testing"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-func Test2MAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a small page and a huge page.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*47)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}},
- })
-}
-
-func Test1GAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a small page and a super page.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*47)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}},
- })
-}
-
-func TestSplit1GPage(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a super page and knock out the middle.
- pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42)
- pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize))
-
- checkMappings(t, pt, []mapping{
- {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}},
- {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}},
- })
-}
-
-func TestSplit2MPage(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a huge page and knock out the middle.
- pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42)
- pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize))
-
- checkMappings(t, pt, []mapping{
- {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}},
- {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}},
- })
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go
deleted file mode 100644
index ad0e30c88..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-// Address constraints.
-//
-// The lowerTop and upperBottom currently apply to four-level pagetables;
-// additional refactoring would be necessary to support five-level pagetables.
-const (
- lowerTop = 0x0000ffffffffffff
- upperBottom = 0xffff000000000000
- pteShift = 12
- pmdShift = 21
- pudShift = 30
- pgdShift = 39
-
- pteMask = 0x1ff << pteShift
- pmdMask = 0x1ff << pmdShift
- pudMask = 0x1ff << pudShift
- pgdMask = 0x1ff << pgdShift
-
- pteSize = 1 << pteShift
- pmdSize = 1 << pmdShift
- pudSize = 1 << pudShift
- pgdSize = 1 << pgdShift
-
- ttbrASIDOffset = 48
- ttbrASIDMask = 0xff
-
- entriesPerPage = 512
-)
-
-// InitArch does some additional initialization related to the architecture.
-//
-//go:nosplit
-func (p *PageTables) InitArch(allocator Allocator) {
- if p.upperSharedPageTables != nil {
- p.cloneUpperShared()
- } else {
- p.archPageTables.root = p.Allocator.NewPTEs()
- p.archPageTables.rootPhysical = p.Allocator.PhysicalFor(p.archPageTables.root)
- }
-}
-
-// cloneUpperShared clone the upper from the upper shared page tables.
-//
-//go:nosplit
-func (p *PageTables) cloneUpperShared() {
- if p.upperStart != upperBottom {
- panic("upperStart should be the same as upperBottom")
- }
-
- p.archPageTables.root = p.upperSharedPageTables.archPageTables.root
- p.archPageTables.rootPhysical = p.upperSharedPageTables.archPageTables.rootPhysical
-}
-
-// PTEs is a collection of entries.
-type PTEs [entriesPerPage]PTE
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go
deleted file mode 100644
index 2f73d424f..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-import (
- "testing"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-func Test2MAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a small page and a huge page.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite, User: true}, pteSize*42)
- pt.Map(0x0000ff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: true}, pmdSize*47)
-
- pt.Map(0xffff000000400000, pteSize, MapOpts{AccessType: usermem.ReadWrite, User: false}, pteSize*42)
- pt.Map(0xffffff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: false}, pmdSize*47)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite, User: true}},
- {0x0000ff0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read, User: true}},
- {0xffff000000400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite, User: false}},
- {0xffffff0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read, User: false}},
- })
-}
-
-func Test1GAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a small page and a super page.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite, User: true}, pteSize*42)
- pt.Map(0x0000ff0000000000, pudSize, MapOpts{AccessType: usermem.Read, User: true}, pudSize*47)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite, User: true}},
- {0x0000ff0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read, User: true}},
- })
-}
-
-func TestSplit1GPage(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a super page and knock out the middle.
- pt.Map(0x0000ff0000000000, pudSize, MapOpts{AccessType: usermem.Read, User: true}, pudSize*42)
- pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pudSize-(2*pteSize))
-
- checkMappings(t, pt, []mapping{
- {0x0000ff0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read, User: true}},
- {0x0000ff0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read, User: true}},
- })
-}
-
-func TestSplit2MPage(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map a huge page and knock out the middle.
- pt.Map(0x0000ff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: true}, pmdSize*42)
- pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pmdSize-(2*pteSize))
-
- checkMappings(t, pt, []mapping{
- {0x0000ff0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read, User: true}},
- {0x0000ff0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read, User: true}},
- })
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
deleted file mode 100644
index 5c88d087d..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-import (
- "testing"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-type mapping struct {
- start uintptr
- length uintptr
- addr uintptr
- opts MapOpts
-}
-
-type checkVisitor struct {
- expected []mapping // Input.
- current int // Temporary.
- found []mapping // Output.
- failed string // Output.
-}
-
-func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) {
- v.found = append(v.found, mapping{
- start: start,
- length: align + 1,
- addr: pte.Address(),
- opts: pte.Opts(),
- })
- if v.failed != "" {
- // Don't keep looking for errors.
- return
- }
-
- if v.current >= len(v.expected) {
- v.failed = "more mappings than expected"
- } else if v.expected[v.current].start != start {
- v.failed = "start didn't match expected"
- } else if v.expected[v.current].length != (align + 1) {
- v.failed = "end didn't match expected"
- } else if v.expected[v.current].addr != pte.Address() {
- v.failed = "address didn't match expected"
- } else if v.expected[v.current].opts != pte.Opts() {
- v.failed = "opts didn't match"
- }
- v.current++
-}
-
-func (*checkVisitor) requiresAlloc() bool { return false }
-
-func (*checkVisitor) requiresSplit() bool { return false }
-
-func checkMappings(t *testing.T, pt *PageTables, m []mapping) {
- // Iterate over all the mappings.
- w := checkWalker{
- pageTables: pt,
- visitor: checkVisitor{
- expected: m,
- },
- }
- w.iterateRange(0, ^uintptr(0))
-
- // Were we expected additional mappings?
- if w.visitor.failed == "" && w.visitor.current != len(w.visitor.expected) {
- w.visitor.failed = "insufficient mappings found"
- }
-
- // Emit a meaningful error message on failure.
- if w.visitor.failed != "" {
- t.Errorf("%s; got %#v, wanted %#v", w.visitor.failed, w.visitor.found, w.visitor.expected)
- }
-}
-
-func TestUnmap(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map and unmap one entry.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Unmap(0x400000, pteSize)
-
- checkMappings(t, pt, nil)
-}
-
-func TestReadOnly(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map one entry.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
- })
-}
-
-func TestReadWrite(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map one entry.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- })
-}
-
-func TestSerialEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map two sequential entries.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x401000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*47)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}},
- })
-}
-
-func TestSpanningEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Span a pgd with two pages.
- pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
-
- checkMappings(t, pt, []mapping{
- {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
- {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}},
- })
-}
-
-func TestSparseEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator())
-
- // Map two entries in different pgds.
- pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
- pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*47)
-
- checkMappings(t, pt, []mapping{
- {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
- {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}},
- })
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
deleted file mode 100644
index 157438d9b..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build 386 amd64
-
-package pagetables
-
-import (
- "sync/atomic"
-
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// archPageTables is architecture-specific data.
-type archPageTables struct {
- // pcid is the value assigned by PCIDs.Assign.
- //
- // Note that zero is a valid PCID.
- pcid uint16
-}
-
-// CR3 returns the CR3 value for these tables.
-//
-// This may be called in interrupt contexts. A PCID of zero always implies a
-// flush and should be passed when PCIDs are not enabled. See pcids_x86.go for
-// more information.
-//
-//go:nosplit
-func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 {
- // Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1).
- const noFlushBit uint64 = 0x8000000000000000
- if noFlush && pcid != 0 {
- return noFlushBit | uint64(p.rootPhysical) | uint64(pcid)
- }
- return uint64(p.rootPhysical) | uint64(pcid)
-}
-
-// Bits in page table entries.
-const (
- present = 0x001
- writable = 0x002
- user = 0x004
- writeThrough = 0x008
- cacheDisable = 0x010
- accessed = 0x020
- dirty = 0x040
- super = 0x080
- global = 0x100
- optionMask = executeDisable | 0xfff
-)
-
-// MapOpts are x86 options.
-type MapOpts struct {
- // AccessType defines permissions.
- AccessType usermem.AccessType
-
- // Global indicates the page is globally accessible.
- Global bool
-
- // User indicates the page is a user page.
- User bool
-}
-
-// PTE is a page table entry.
-type PTE uintptr
-
-// Clear clears this PTE, including super page information.
-//
-//go:nosplit
-func (p *PTE) Clear() {
- atomic.StoreUintptr((*uintptr)(p), 0)
-}
-
-// Valid returns true iff this entry is valid.
-//
-//go:nosplit
-func (p *PTE) Valid() bool {
- return atomic.LoadUintptr((*uintptr)(p))&present != 0
-}
-
-// Opts returns the PTE options.
-//
-// These are all options except Valid and Super.
-//
-//go:nosplit
-func (p *PTE) Opts() MapOpts {
- v := atomic.LoadUintptr((*uintptr)(p))
- return MapOpts{
- AccessType: usermem.AccessType{
- Read: v&present != 0,
- Write: v&writable != 0,
- Execute: v&executeDisable == 0,
- },
- Global: v&global != 0,
- User: v&user != 0,
- }
-}
-
-// SetSuper sets this page as a super page.
-//
-// The page must not be valid or a panic will result.
-//
-//go:nosplit
-func (p *PTE) SetSuper() {
- if p.Valid() {
- // This is not allowed.
- panic("SetSuper called on valid page!")
- }
- atomic.StoreUintptr((*uintptr)(p), super)
-}
-
-// IsSuper returns true iff this page is a super page.
-//
-//go:nosplit
-func (p *PTE) IsSuper() bool {
- return atomic.LoadUintptr((*uintptr)(p))&super != 0
-}
-
-// Set sets this PTE value.
-//
-// This does not change the super page property.
-//
-//go:nosplit
-func (p *PTE) Set(addr uintptr, opts MapOpts) {
- if !opts.AccessType.Any() {
- p.Clear()
- return
- }
- v := (addr &^ optionMask) | present | accessed
- if opts.User {
- v |= user
- }
- if opts.Global {
- v |= global
- }
- if !opts.AccessType.Execute {
- v |= executeDisable
- }
- if opts.AccessType.Write {
- v |= writable | dirty
- }
- if p.IsSuper() {
- // Note that this is inherited from the previous instance. Set
- // does not change the value of Super. See above.
- v |= super
- }
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// setPageTable sets this PTE value and forces the write bit and super bit to
-// be cleared. This is used explicitly for breaking super pages.
-//
-//go:nosplit
-func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) {
- addr := pt.Allocator.PhysicalFor(ptes)
- if addr&^optionMask != addr {
- // This should never happen.
- panic("unaligned physical address!")
- }
- v := addr | present | user | writable | accessed | dirty
- atomic.StoreUintptr((*uintptr)(p), v)
-}
-
-// Address extracts the address. This should only be used if Valid returns true.
-//
-//go:nosplit
-func (p *PTE) Address() uintptr {
- return atomic.LoadUintptr((*uintptr)(p)) &^ optionMask
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids.go b/pkg/sentry/platform/ring0/pagetables/pcids.go
deleted file mode 100644
index 964496aac..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids.go
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pagetables
-
-import (
- "gvisor.dev/gvisor/pkg/sync"
-)
-
-// PCIDs is a simple PCID database.
-//
-// This is not protected by locks and is thus suitable for use only with a
-// single CPU at a time.
-type PCIDs struct {
- // mu protects below.
- mu sync.Mutex
-
- // cache are the assigned page tables.
- cache map[*PageTables]uint16
-
- // avail are available PCIDs.
- avail []uint16
-}
-
-// NewPCIDs returns a new PCID database.
-//
-// start is the first index to assign. Typically this will be one, as the zero
-// pcid will always be flushed on transition (see pagetables_x86.go). This may
-// be more than one if specific PCIDs are reserved.
-//
-// Nil is returned iff the start and size are out of range.
-func NewPCIDs(start, size uint16) *PCIDs {
- if start+uint16(size) > limitPCID {
- return nil // See comment.
- }
- p := &PCIDs{
- cache: make(map[*PageTables]uint16),
- }
- for pcid := start; pcid < start+size; pcid++ {
- p.avail = append(p.avail, pcid)
- }
- return p
-}
-
-// Assign assigns a PCID to the given PageTables.
-//
-// This may overwrite any previous assignment provided. If this in the case,
-// true is returned to indicate that the PCID should be flushed.
-func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) {
- p.mu.Lock()
- if pcid, ok := p.cache[pt]; ok {
- p.mu.Unlock()
- return pcid, false // No flush.
- }
-
- // Is there something available?
- if len(p.avail) > 0 {
- pcid := p.avail[len(p.avail)-1]
- p.avail = p.avail[:len(p.avail)-1]
- p.cache[pt] = pcid
-
- // We need to flush because while this is in the available
- // pool, it may have been used previously.
- p.mu.Unlock()
- return pcid, true
- }
-
- // Evict an existing table.
- for old, pcid := range p.cache {
- delete(p.cache, old)
- p.cache[pt] = pcid
-
- // A flush is definitely required in this case, these page
- // tables may still be active. (They will just be assigned some
- // other PCID if and when they hit the given CPU again.)
- p.mu.Unlock()
- return pcid, true
- }
-
- // No PCID.
- p.mu.Unlock()
- return 0, false
-}
-
-// Drop drops references to a set of page tables.
-func (p *PCIDs) Drop(pt *PageTables) {
- p.mu.Lock()
- if pcid, ok := p.cache[pt]; ok {
- delete(p.cache, pt)
- p.avail = append(p.avail, pcid)
- }
- p.mu.Unlock()
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
deleted file mode 100644
index fbfd41d83..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-// limitPCID is the maximum value of PCIDs.
-//
-// In VMSAv8-64, the PCID(ASID) size is an IMPLEMENTATION DEFINED choice
-// of 8 bits or 16 bits, and ID_AA64MMFR0_EL1.ASIDBits identifies the
-// supported size. When an implementation supports a 16-bit ASID, TCR_ELx.AS
-// selects whether the top 8 bits of the ASID are used.
-var limitPCID uint16
-
-// GetASIDBits return the system ASID bits, 8 or 16 bits.
-func GetASIDBits() uint8
-
-func init() {
- limitPCID = uint16(1)<<GetASIDBits() - 1
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
deleted file mode 100644
index e9d62d768..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-#include "funcdata.h"
-#include "textflag.h"
-
-#define ID_AA64MMFR0_ASIDBITS_SHIFT 4
-#define ID_AA64MMFR0_ASIDBITS_16 2
-#define TCR_EL1_AS_BIT 36
-
-// GetASIDBits return the system ASID bits, 8 or 16 bits.
-//
-// func GetASIDBits() uint8
-TEXT ·GetASIDBits(SB),NOSPLIT,$0-1
- // First, check whether 16bits ASID is supported.
- // ID_AA64MMFR0_EL1.ASIDBITS[7:4] == 0010.
- WORD $0xd5380700 // MRS ID_AA64MMFR0_EL1, R0
- UBFX $ID_AA64MMFR0_ASIDBITS_SHIFT, R0, $4, R0
- CMPW $ID_AA64MMFR0_ASIDBITS_16, R0
- BNE bits_8
-
- // Second, check whether 16bits ASID is enabled.
- // TCR_EL1.AS[36] == 1.
- WORD $0xd5382040 // MRS TCR_EL1, R0
- TBZ $TCR_EL1_AS_BIT, R0, bits_8
- MOVD $16, R0
- B done
-bits_8:
- MOVD $8, R0
-done:
- MOVB R0, ret+0(FP)
- RET
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
deleted file mode 100644
index 91fc5e8dd..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build i386 amd64
-
-package pagetables
-
-// limitPCID is the maximum value of valid PCIDs.
-const limitPCID = 4095
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_amd64.go
deleted file mode 100644
index 8f9dacd93..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build amd64
-
-package pagetables
-
-// Visitor is a generic type.
-type Visitor interface {
- // visit is called on each PTE.
- visit(start uintptr, pte *PTE, align uintptr)
-
- // requiresAlloc indicates that new entries should be allocated within
- // the walked range.
- requiresAlloc() bool
-
- // requiresSplit indicates that entries in the given range should be
- // split if they are huge or jumbo pages.
- requiresSplit() bool
-}
-
-// Walker walks page tables.
-type Walker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor Visitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is super pages. If a valid super page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of super pages whenever
-// possible. Whether a super page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-//
-// Precondition: start must be less than end.
-//
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *Walker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func next(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *Walker) iterateRangeCanonical(start, end uintptr) {
- for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &w.pageTables.root[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- start = next(start, pgdSize)
- continue
- }
-
- // Allocate a new pgd.
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- // Map the next level.
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPUDEntries++
- start = next(start, pudSize)
- continue
- }
-
- // This level has 1-GB super pages. Is this
- // entire region at least as large as a single
- // PUD entry? If so, we can skip allocating a
- // new page for the pmd.
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSuper()
- w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
- if pudEntry.Valid() {
- start = next(start, pudSize)
- continue
- }
- }
-
- // Allocate a new pud.
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSuper() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
- // Install the relevant entries.
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSuper()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
- // A super page to be checked directly.
- w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
-
- // Might have been cleared.
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- // Note that the super page was changed.
- start = next(start, pudSize)
- continue
- }
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- // Map the next level, since this is valid.
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPMDEntries++
- start = next(start, pmdSize)
- continue
- }
-
- // This level has 2-MB huge pages. If this
- // region is contined in a single PMD entry?
- // As above, we can skip allocating a new page.
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSuper()
- w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
- if pmdEntry.Valid() {
- start = next(start, pmdSize)
- continue
- }
- }
-
- // Allocate a new pmd.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSuper() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) {
- // Install the relevant entries.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
- // A huge page to be checked directly.
- w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
-
- // Might have been cleared.
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- // Note that the huge page was changed.
- start = next(start, pmdSize)
- continue
- }
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- // Map the next level, since this is valid.
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- // At this point, we are guaranteed that start%pteSize == 0.
- w.visitor.visit(uintptr(start), pteEntry, pteSize-1)
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- // Note that the pte was changed.
- start += pteSize
- continue
- }
-
- // Check if we no longer need this page.
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
-}
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go
deleted file mode 100644
index c261d393a..000000000
--- a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go
+++ /dev/null
@@ -1,314 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build arm64
-
-package pagetables
-
-// Visitor is a generic type.
-type Visitor interface {
- // visit is called on each PTE.
- visit(start uintptr, pte *PTE, align uintptr)
-
- // requiresAlloc indicates that new entries should be allocated within
- // the walked range.
- requiresAlloc() bool
-
- // requiresSplit indicates that entries in the given range should be
- // split if they are huge or jumbo pages.
- requiresSplit() bool
-}
-
-// Walker walks page tables.
-type Walker struct {
- // pageTables are the tables to walk.
- pageTables *PageTables
-
- // Visitor is the set of arguments.
- visitor Visitor
-}
-
-// iterateRange iterates over all appropriate levels of page tables for the given range.
-//
-// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
-// exception is sect pages. If a valid sect page (huge or jumbo) cannot be
-// installed, then the walk will continue to individual entries.
-//
-// This algorithm will attempt to maximize the use of sect pages whenever
-// possible. Whether a sect page is provided will be clear through the range
-// provided in the callback.
-//
-// Note that if requiresAlloc is true, then no gaps will be present. However,
-// if alloc is not set, then the iteration will likely be full of gaps.
-//
-// Note that this function should generally be avoided in favor of Map, Unmap,
-// etc. when not necessary.
-//
-// Precondition: start must be page-aligned.
-//
-// Precondition: start must be less than end.
-//
-// Precondition: If requiresAlloc is true, then start and end should not span
-// non-canonical ranges. If they do, a panic will result.
-//
-//go:nosplit
-func (w *Walker) iterateRange(start, end uintptr) {
- if start%pteSize != 0 {
- panic("unaligned start")
- }
- if end < start {
- panic("start > end")
- }
- if start < lowerTop {
- if end <= lowerTop {
- w.iterateRangeCanonical(start, end)
- } else if end > lowerTop && end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(start, lowerTop)
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else if start < upperBottom {
- if end <= upperBottom {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- } else {
- if w.visitor.requiresAlloc() {
- panic("alloc spans non-canonical range")
- }
- w.iterateRangeCanonical(upperBottom, end)
- }
- } else {
- w.iterateRangeCanonical(start, end)
- }
-}
-
-// next returns the next address quantized by the given size.
-//
-//go:nosplit
-func next(start uintptr, size uintptr) uintptr {
- start &= ^(size - 1)
- start += size
- return start
-}
-
-// iterateRangeCanonical walks a canonical range.
-//
-//go:nosplit
-func (w *Walker) iterateRangeCanonical(start, end uintptr) {
- pgdEntryIndex := w.pageTables.root
- if start >= upperBottom {
- pgdEntryIndex = w.pageTables.archPageTables.root
- }
-
- for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
- var (
- pgdEntry = &pgdEntryIndex[pgdIndex]
- pudEntries *PTEs
- )
- if !pgdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- start = next(start, pgdSize)
- continue
- }
-
- // Allocate a new pgd.
- pudEntries = w.pageTables.Allocator.NewPTEs()
- pgdEntry.setPageTable(w.pageTables, pudEntries)
- } else {
- pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
- }
-
- // Map the next level.
- clearPUDEntries := uint16(0)
-
- for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
- var (
- pudEntry = &pudEntries[pudIndex]
- pmdEntries *PTEs
- )
- if !pudEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPUDEntries++
- start = next(start, pudSize)
- continue
- }
-
- // This level has 1-GB sect pages. Is this
- // entire region at least as large as a single
- // PUD entry? If so, we can skip allocating a
- // new page for the pmd.
- if start&(pudSize-1) == 0 && end-start >= pudSize {
- pudEntry.SetSect()
- w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
- if pudEntry.Valid() {
- start = next(start, pudSize)
- continue
- }
- }
-
- // Allocate a new pud.
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- pudEntry.setPageTable(w.pageTables, pmdEntries)
-
- } else if pudEntry.IsSect() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
- // Install the relevant entries.
- pmdEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pmdEntries[index].SetSect()
- pmdEntries[index].Set(
- pudEntry.Address()+(pmdSize*uintptr(index)),
- pudEntry.Opts())
- }
- pudEntry.setPageTable(w.pageTables, pmdEntries)
- } else {
- // A sect page to be checked directly.
- w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
-
- // Might have been cleared.
- if !pudEntry.Valid() {
- clearPUDEntries++
- }
-
- // Note that the sect page was changed.
- start = next(start, pudSize)
- continue
- }
-
- } else {
- pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
- }
-
- // Map the next level, since this is valid.
- clearPMDEntries := uint16(0)
-
- for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
- var (
- pmdEntry = &pmdEntries[pmdIndex]
- pteEntries *PTEs
- )
- if !pmdEntry.Valid() {
- if !w.visitor.requiresAlloc() {
- // Skip over this entry.
- clearPMDEntries++
- start = next(start, pmdSize)
- continue
- }
-
- // This level has 2-MB huge pages. If this
- // region is contined in a single PMD entry?
- // As above, we can skip allocating a new page.
- if start&(pmdSize-1) == 0 && end-start >= pmdSize {
- pmdEntry.SetSect()
- w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
- if pmdEntry.Valid() {
- start = next(start, pmdSize)
- continue
- }
- }
-
- // Allocate a new pmd.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- pmdEntry.setPageTable(w.pageTables, pteEntries)
-
- } else if pmdEntry.IsSect() {
- // Does this page need to be split?
- if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) {
- // Install the relevant entries.
- pteEntries = w.pageTables.Allocator.NewPTEs()
- for index := uint16(0); index < entriesPerPage; index++ {
- pteEntries[index].Set(
- pmdEntry.Address()+(pteSize*uintptr(index)),
- pmdEntry.Opts())
- }
- pmdEntry.setPageTable(w.pageTables, pteEntries)
- } else {
- // A huge page to be checked directly.
- w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
-
- // Might have been cleared.
- if !pmdEntry.Valid() {
- clearPMDEntries++
- }
-
- // Note that the huge page was changed.
- start = next(start, pmdSize)
- continue
- }
-
- } else {
- pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
- }
-
- // Map the next level, since this is valid.
- clearPTEEntries := uint16(0)
-
- for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
- var (
- pteEntry = &pteEntries[pteIndex]
- )
- if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
- clearPTEEntries++
- start += pteSize
- continue
- }
-
- // At this point, we are guaranteed that start%pteSize == 0.
- w.visitor.visit(uintptr(start), pteEntry, pteSize-1)
- if !pteEntry.Valid() {
- if w.visitor.requiresAlloc() {
- panic("PTE not set after iteration with requiresAlloc!")
- }
- clearPTEEntries++
- }
-
- // Note that the pte was changed.
- start += pteSize
- continue
- }
-
- // Check if we no longer need this page.
- if clearPTEEntries == entriesPerPage {
- pmdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pteEntries)
- clearPMDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPMDEntries == entriesPerPage {
- pudEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pmdEntries)
- clearPUDEntries++
- }
- }
-
- // Check if we no longer need this page.
- if clearPUDEntries == entriesPerPage {
- pgdEntry.Clear()
- w.pageTables.Allocator.FreePTEs(pudEntries)
- }
- }
-}
diff --git a/pkg/sentry/platform/ring0/ring0.go b/pkg/sentry/platform/ring0/ring0.go
deleted file mode 100644
index cdeb1b43a..000000000
--- a/pkg/sentry/platform/ring0/ring0.go
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package ring0 provides basic operating system-level stubs.
-package ring0
diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go
deleted file mode 100644
index 34fbc1c35..000000000
--- a/pkg/sentry/platform/ring0/x86.go
+++ /dev/null
@@ -1,296 +0,0 @@
-// Copyright 2018 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build 386 amd64
-
-package ring0
-
-import (
- "gvisor.dev/gvisor/pkg/cpuid"
-)
-
-// Useful bits.
-const (
- _CR0_PE = 1 << 0
- _CR0_ET = 1 << 4
- _CR0_AM = 1 << 18
- _CR0_PG = 1 << 31
-
- _CR4_PSE = 1 << 4
- _CR4_PAE = 1 << 5
- _CR4_PGE = 1 << 7
- _CR4_OSFXSR = 1 << 9
- _CR4_OSXMMEXCPT = 1 << 10
- _CR4_FSGSBASE = 1 << 16
- _CR4_PCIDE = 1 << 17
- _CR4_OSXSAVE = 1 << 18
- _CR4_SMEP = 1 << 20
-
- _RFLAGS_AC = 1 << 18
- _RFLAGS_NT = 1 << 14
- _RFLAGS_IOPL0 = 1 << 12
- _RFLAGS_IOPL1 = 1 << 13
- _RFLAGS_IOPL = _RFLAGS_IOPL0 | _RFLAGS_IOPL1
- _RFLAGS_DF = 1 << 10
- _RFLAGS_IF = 1 << 9
- _RFLAGS_STEP = 1 << 8
- _RFLAGS_RESERVED = 1 << 1
-
- _EFER_SCE = 0x001
- _EFER_LME = 0x100
- _EFER_LMA = 0x400
- _EFER_NX = 0x800
-
- _MSR_STAR = 0xc0000081
- _MSR_LSTAR = 0xc0000082
- _MSR_CSTAR = 0xc0000083
- _MSR_SYSCALL_MASK = 0xc0000084
- _MSR_PLATFORM_INFO = 0xce
- _MSR_MISC_FEATURES = 0x140
-
- _PLATFORM_INFO_CPUID_FAULT = 1 << 31
-
- _MISC_FEATURE_CPUID_TRAP = 0x1
-)
-
-const (
- // KernelFlagsSet should always be set in the kernel.
- KernelFlagsSet = _RFLAGS_RESERVED
-
- // UserFlagsSet are always set in userspace.
- //
- // _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege
- // level. The Current Privilege Level (CPL) of the task must be less
- // than or equal to the IOPL in order for the task or program to access
- // I/O ports.
- //
- // Here, _RFLAGS_IOPL0 is used only to determine whether the task is
- // running in the kernel or userspace mode. In the user mode, the CPL is
- // always 3 and it doesn't matter what IOPL is set if it is bellow CPL.
- //
- // We need to have one bit which will be always different in user and
- // kernel modes. And we have to remember that even though we have
- // KernelFlagsClear, we still can see some of these flags in the kernel
- // mode. This can happen when the goruntime switches on a goroutine
- // which has been saved in the host mode. On restore, the popf
- // instruction is used to restore flags and this means that all flags
- // what the goroutine has in the host mode will be restored in the
- // kernel mode.
- //
- // _RFLAGS_IOPL0 is never set in host and kernel modes and we always set
- // it in the user mode. So if this flag is set, the task is running in
- // the user mode and if it isn't set, the task is running in the kernel
- // mode.
- UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0
-
- // KernelFlagsClear should always be clear in the kernel.
- KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT
-
- // UserFlagsClear are always cleared in userspace.
- UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1
-)
-
-// IsKernelFlags returns true if rflags coresponds to the kernel mode.
-//
-// go:nosplit
-func IsKernelFlags(rflags uint64) bool {
- return rflags&_RFLAGS_IOPL0 == 0
-}
-
-// Vector is an exception vector.
-type Vector uintptr
-
-// Exception vectors.
-const (
- DivideByZero Vector = iota
- Debug
- NMI
- Breakpoint
- Overflow
- BoundRangeExceeded
- InvalidOpcode
- DeviceNotAvailable
- DoubleFault
- CoprocessorSegmentOverrun
- InvalidTSS
- SegmentNotPresent
- StackSegmentFault
- GeneralProtectionFault
- PageFault
- _
- X87FloatingPointException
- AlignmentCheck
- MachineCheck
- SIMDFloatingPointException
- VirtualizationException
- SecurityException = 0x1e
- SyscallInt80 = 0x80
- _NR_INTERRUPTS = 0x100
-)
-
-// System call vectors.
-const (
- Syscall Vector = _NR_INTERRUPTS
-)
-
-// VirtualAddressBits returns the number bits available for virtual addresses.
-//
-// Note that sign-extension semantics apply to the highest order bit.
-//
-// FIXME(b/69382326): This should use the cpuid passed to Init.
-func VirtualAddressBits() uint32 {
- ax, _, _, _ := cpuid.HostID(0x80000008, 0)
- return (ax >> 8) & 0xff
-}
-
-// PhysicalAddressBits returns the number of bits available for physical addresses.
-//
-// FIXME(b/69382326): This should use the cpuid passed to Init.
-func PhysicalAddressBits() uint32 {
- ax, _, _, _ := cpuid.HostID(0x80000008, 0)
- return ax & 0xff
-}
-
-// Selector is a segment Selector.
-type Selector uint16
-
-// SegmentDescriptor is a segment descriptor.
-type SegmentDescriptor struct {
- bits [2]uint32
-}
-
-// descriptorTable is a collection of descriptors.
-type descriptorTable [32]SegmentDescriptor
-
-// SegmentDescriptorFlags are typed flags within a descriptor.
-type SegmentDescriptorFlags uint32
-
-// SegmentDescriptorFlag declarations.
-const (
- SegmentDescriptorAccess SegmentDescriptorFlags = 1 << 8 // Access bit (always set).
- SegmentDescriptorWrite = 1 << 9 // Write permission.
- SegmentDescriptorExpandDown = 1 << 10 // Grows down, not used.
- SegmentDescriptorExecute = 1 << 11 // Execute permission.
- SegmentDescriptorSystem = 1 << 12 // Zero => system, 1 => user code/data.
- SegmentDescriptorPresent = 1 << 15 // Present.
- SegmentDescriptorAVL = 1 << 20 // Available.
- SegmentDescriptorLong = 1 << 21 // Long mode.
- SegmentDescriptorDB = 1 << 22 // 16 or 32-bit.
- SegmentDescriptorG = 1 << 23 // Granularity: page or byte.
-)
-
-// Base returns the descriptor's base linear address.
-func (d *SegmentDescriptor) Base() uint32 {
- return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16
-}
-
-// Limit returns the descriptor size.
-func (d *SegmentDescriptor) Limit() uint32 {
- l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000
- if d.bits[1]&uint32(SegmentDescriptorG) != 0 {
- l <<= 12
- l |= 0xFFF
- }
- return l
-}
-
-// Flags returns descriptor flags.
-func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags {
- return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00)
-}
-
-// DPL returns the descriptor privilege level.
-func (d *SegmentDescriptor) DPL() int {
- return int((d.bits[1] >> 13) & 3)
-}
-
-func (d *SegmentDescriptor) setNull() {
- d.bits[0] = 0
- d.bits[1] = 0
-}
-
-func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) {
- flags |= SegmentDescriptorPresent
- if limit>>12 != 0 {
- limit >>= 12
- flags |= SegmentDescriptorG
- }
- d.bits[0] = base<<16 | limit&0xFFFF
- d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13
-}
-
-func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) {
- d.set(base, limit, dpl,
- SegmentDescriptorDB|
- SegmentDescriptorExecute|
- SegmentDescriptorSystem)
-}
-
-func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) {
- d.set(base, limit, dpl,
- SegmentDescriptorG|
- SegmentDescriptorLong|
- SegmentDescriptorExecute|
- SegmentDescriptorSystem)
-}
-
-func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) {
- d.set(base, limit, dpl,
- SegmentDescriptorWrite|
- SegmentDescriptorSystem)
-}
-
-// setHi is only used for the TSS segment, which is magically 64-bits.
-func (d *SegmentDescriptor) setHi(base uint32) {
- d.bits[0] = base
- d.bits[1] = 0
-}
-
-// Gate64 is a 64-bit task, trap, or interrupt gate.
-type Gate64 struct {
- bits [4]uint32
-}
-
-// idt64 is a 64-bit interrupt descriptor table.
-type idt64 [_NR_INTERRUPTS]Gate64
-
-func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) {
- g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF
- g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7
- g.bits[2] = uint32(rip >> 32)
-}
-
-func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) {
- g.setInterrupt(cs, rip, dpl, ist)
- g.bits[1] |= 1 << 8
-}
-
-// TaskState64 is a 64-bit task state structure.
-type TaskState64 struct {
- _ uint32
- rsp0Lo, rsp0Hi uint32
- rsp1Lo, rsp1Hi uint32
- rsp2Lo, rsp2Hi uint32
- _ [2]uint32
- ist1Lo, ist1Hi uint32
- ist2Lo, ist2Hi uint32
- ist3Lo, ist3Hi uint32
- ist4Lo, ist4Hi uint32
- ist5Lo, ist5Hi uint32
- ist6Lo, ist6Hi uint32
- ist7Lo, ist7Hi uint32
- _ [2]uint32
- _ uint16
- ioPerm uint16
-}
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 7065a0e46..69693f263 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -251,11 +251,11 @@ var errStackType = syserr.New("expected but did not receive a netstack.Stack", l
type commonEndpoint interface {
// GetLocalAddress implements tcpip.Endpoint.GetLocalAddress and
// transport.Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// GetRemoteAddress implements tcpip.Endpoint.GetRemoteAddress and
// transport.Endpoint.GetRemoteAddress.
- GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetRemoteAddress() (tcpip.FullAddress, tcpip.Error)
// Readiness implements tcpip.Endpoint.Readiness and
// transport.Endpoint.Readiness.
@@ -263,19 +263,19 @@ type commonEndpoint interface {
// SetSockOpt implements tcpip.Endpoint.SetSockOpt and
// transport.Endpoint.SetSockOpt.
- SetSockOpt(tcpip.SettableSocketOption) *tcpip.Error
+ SetSockOpt(tcpip.SettableSocketOption) tcpip.Error
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt and
// transport.Endpoint.SetSockOptInt.
- SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error
+ SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error
// GetSockOpt implements tcpip.Endpoint.GetSockOpt and
// transport.Endpoint.GetSockOpt.
- GetSockOpt(tcpip.GettableSocketOption) *tcpip.Error
+ GetSockOpt(tcpip.GettableSocketOption) tcpip.Error
// GetSockOptInt implements tcpip.Endpoint.GetSockOptInt and
// transport.Endpoint.GetSockOpt.
- GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error)
+ GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error)
// State returns a socket's lifecycle state. The returned value is
// protocol-specific and is primarily used for diagnostics.
@@ -283,7 +283,7 @@ type commonEndpoint interface {
// LastError implements tcpip.Endpoint.LastError and
// transport.Endpoint.LastError.
- LastError() *tcpip.Error
+ LastError() tcpip.Error
// SocketOptions implements tcpip.Endpoint.SocketOptions and
// transport.Endpoint.SocketOptions.
@@ -442,7 +442,7 @@ func (s *SocketOperations) WriteTo(ctx context.Context, _ *fs.File, dst io.Write
func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
r := src.Reader(ctx)
n, err := s.Endpoint.Write(r, tcpip.WriteOptions{})
- if err == tcpip.ErrWouldBlock {
+ if _, ok := err.(*tcpip.ErrWouldBlock); ok {
return 0, syserror.ErrWouldBlock
}
if err != nil {
@@ -459,17 +459,24 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
var _ tcpip.Payloader = (*limitedPayloader)(nil)
type limitedPayloader struct {
- io.LimitedReader
+ inner io.LimitedReader
+ err error
}
-func (l limitedPayloader) Len() int {
- return int(l.N)
+func (l *limitedPayloader) Read(p []byte) (int, error) {
+ n, err := l.inner.Read(p)
+ l.err = err
+ return n, err
+}
+
+func (l *limitedPayloader) Len() int {
+ return int(l.inner.N)
}
// ReadFrom implements fs.FileOperations.ReadFrom.
func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader, count int64) (int64, error) {
f := limitedPayloader{
- LimitedReader: io.LimitedReader{
+ inner: io.LimitedReader{
R: r,
N: count,
},
@@ -479,8 +486,8 @@ func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader
// so we can't release the lock while copying data.
Atomic: true,
})
- if err == tcpip.ErrBadBuffer {
- err = nil
+ if _, ok := err.(*tcpip.ErrBadBuffer); ok {
+ return n, f.err
}
return n, syserr.TranslateNetstackError(err).ToError()
}
@@ -526,7 +533,7 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
if family == linux.AF_UNSPEC {
err := s.Endpoint.Disconnect()
- if err == tcpip.ErrNotSupported {
+ if _, ok := err.(*tcpip.ErrNotSupported); ok {
return syserr.ErrAddressFamilyNotSupported
}
return syserr.TranslateNetstackError(err)
@@ -548,15 +555,16 @@ func (s *socketOpsCommon) Connect(t *kernel.Task, sockaddr []byte, blocking bool
s.EventRegister(&e, waiter.EventOut)
defer s.EventUnregister(&e)
- if err := s.Endpoint.Connect(addr); err != tcpip.ErrConnectStarted && err != tcpip.ErrAlreadyConnecting {
+ switch err := s.Endpoint.Connect(addr); err.(type) {
+ case *tcpip.ErrConnectStarted, *tcpip.ErrAlreadyConnecting:
+ case *tcpip.ErrNoPortAvailable:
if (s.family == unix.AF_INET || s.family == unix.AF_INET6) && s.skType == linux.SOCK_STREAM {
// TCP unlike UDP returns EADDRNOTAVAIL when it can't
// find an available local ephemeral port.
- if err == tcpip.ErrNoPortAvailable {
- return syserr.ErrAddressNotAvailable
- }
+ return syserr.ErrAddressNotAvailable
}
-
+ return syserr.TranslateNetstackError(err)
+ default:
return syserr.TranslateNetstackError(err)
}
@@ -614,16 +622,16 @@ func (s *socketOpsCommon) Bind(t *kernel.Task, sockaddr []byte) *syserr.Error {
// Issue the bind request to the endpoint.
err := s.Endpoint.Bind(addr)
- if err == tcpip.ErrNoPortAvailable {
+ if _, ok := err.(*tcpip.ErrNoPortAvailable); ok {
// Bind always returns EADDRINUSE irrespective of if the specified port was
// already bound or if an ephemeral port was requested but none were
// available.
//
- // tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because
+ // *tcpip.ErrNoPortAvailable is mapped to EAGAIN in syserr package because
// UDP connect returns EAGAIN on ephemeral port exhaustion.
//
// TCP connect returns EADDRNOTAVAIL on ephemeral port exhaustion.
- err = tcpip.ErrPortInUse
+ err = &tcpip.ErrPortInUse{}
}
return syserr.TranslateNetstackError(err)
@@ -646,7 +654,8 @@ func (s *socketOpsCommon) blockingAccept(t *kernel.Task, peerAddr *tcpip.FullAdd
// Try to accept the connection again; if it fails, then wait until we
// get a notification.
for {
- if ep, wq, err := s.Endpoint.Accept(peerAddr); err != tcpip.ErrWouldBlock {
+ ep, wq, err := s.Endpoint.Accept(peerAddr)
+ if _, ok := err.(*tcpip.ErrWouldBlock); !ok {
return ep, wq, syserr.TranslateNetstackError(err)
}
@@ -665,7 +674,7 @@ func (s *SocketOperations) Accept(t *kernel.Task, peerRequested bool, flags int,
}
ep, wq, terr := s.Endpoint.Accept(peerAddr)
if terr != nil {
- if terr != tcpip.ErrWouldBlock || !blocking {
+ if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking {
return 0, nil, 0, syserr.TranslateNetstackError(terr)
}
@@ -1098,6 +1107,29 @@ func getSockOptTCP(t *kernel.Task, s socket.SocketOps, ep commonEndpoint, name,
// TODO(b/64800844): Translate fields once they are added to
// tcpip.TCPInfoOption.
info := linux.TCPInfo{}
+ switch v.CcState {
+ case tcpip.RTORecovery:
+ info.CaState = linux.TCP_CA_Loss
+ case tcpip.FastRecovery, tcpip.SACKRecovery:
+ info.CaState = linux.TCP_CA_Recovery
+ case tcpip.Disorder:
+ info.CaState = linux.TCP_CA_Disorder
+ case tcpip.Open:
+ info.CaState = linux.TCP_CA_Open
+ }
+ info.RTO = uint32(v.RTO / time.Microsecond)
+ info.RTT = uint32(v.RTT / time.Microsecond)
+ info.RTTVar = uint32(v.RTTVar / time.Microsecond)
+ info.SndSsthresh = v.SndSsthresh
+ info.SndCwnd = v.SndCwnd
+
+ // In netstack reorderSeen is updated only when RACK is enabled.
+ // We only track whether the reordering is seen, which is
+ // different than Linux where reorderSeen is not specific to
+ // RACK and is incremented when a reordering event is seen.
+ if v.ReorderSeen {
+ info.ReordSeen = 1
+ }
// Linux truncates the output binary to outLen.
buf := t.CopyScratchBuffer(info.SizeBytes())
@@ -2534,7 +2566,7 @@ func (s *socketOpsCommon) nonBlockingRead(ctx context.Context, dst usermem.IOSeq
defer s.readMu.Unlock()
res, err := s.Endpoint.Read(w, readOptions)
- if err == tcpip.ErrBadBuffer && dst.NumBytes() == 0 {
+ if _, ok := err.(*tcpip.ErrBadBuffer); ok && dst.NumBytes() == 0 {
err = nil
}
if err != nil {
@@ -2634,9 +2666,9 @@ func (s *socketOpsCommon) dequeueErr() *tcpip.SockError {
}
// Update socket error to reflect ICMP errors in queue.
- if nextErr := so.PeekErr(); nextErr != nil && nextErr.ErrOrigin.IsICMPErr() {
+ if nextErr := so.PeekErr(); nextErr != nil && nextErr.Cause.Origin().IsICMPErr() {
so.SetLastError(nextErr.Err)
- } else if err.ErrOrigin.IsICMPErr() {
+ } else if err.Cause.Origin().IsICMPErr() {
so.SetLastError(nil)
}
return err
@@ -2790,13 +2822,15 @@ func (s *socketOpsCommon) SendMsg(t *kernel.Task, src usermem.IOSequence, to []b
if flags&linux.MSG_DONTWAIT != 0 {
return int(total), syserr.TranslateNetstackError(err)
}
- switch err {
+ block := true
+ switch err.(type) {
case nil:
- if total == src.NumBytes() {
- break
- }
- fallthrough
- case tcpip.ErrWouldBlock:
+ block = total != src.NumBytes()
+ case *tcpip.ErrWouldBlock:
+ default:
+ block = false
+ }
+ if block {
if ch == nil {
// We'll have to block. Register for notification and keep trying to
// send all the data.
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 3bbdf552e..24922c400 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -130,7 +130,7 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
r := src.Reader(ctx)
n, err := s.Endpoint.Write(r, tcpip.WriteOptions{})
- if err == tcpip.ErrWouldBlock {
+ if _, ok := err.(*tcpip.ErrWouldBlock); ok {
return 0, syserror.ErrWouldBlock
}
if err != nil {
@@ -154,7 +154,7 @@ func (s *SocketVFS2) Accept(t *kernel.Task, peerRequested bool, flags int, block
}
ep, wq, terr := s.Endpoint.Accept(peerAddr)
if terr != nil {
- if terr != tcpip.ErrWouldBlock || !blocking {
+ if _, ok := terr.(*tcpip.ErrWouldBlock); !ok || !blocking {
return 0, nil, 0, syserr.TranslateNetstackError(terr)
}
diff --git a/pkg/sentry/socket/netstack/provider.go b/pkg/sentry/socket/netstack/provider.go
index c847ff1c7..2515dda80 100644
--- a/pkg/sentry/socket/netstack/provider.go
+++ b/pkg/sentry/socket/netstack/provider.go
@@ -118,7 +118,7 @@ func (p *provider) Socket(t *kernel.Task, stype linux.SockType, protocol int) (*
// Create the endpoint.
var ep tcpip.Endpoint
- var e *tcpip.Error
+ var e tcpip.Error
wq := &waiter.Queue{}
if stype == linux.SOCK_RAW {
ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated)
diff --git a/pkg/sentry/socket/netstack/provider_vfs2.go b/pkg/sentry/socket/netstack/provider_vfs2.go
index 0af805246..ba1cc79e9 100644
--- a/pkg/sentry/socket/netstack/provider_vfs2.go
+++ b/pkg/sentry/socket/netstack/provider_vfs2.go
@@ -62,7 +62,7 @@ func (p *providerVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol int
// Create the endpoint.
var ep tcpip.Endpoint
- var e *tcpip.Error
+ var e tcpip.Error
wq := &waiter.Queue{}
if stype == linux.SOCK_RAW {
ep, e = eps.Stack.NewRawEndpoint(transProto, p.netProto, wq, associated)
diff --git a/pkg/sentry/socket/socket.go b/pkg/sentry/socket/socket.go
index 97729dacc..cc535d794 100644
--- a/pkg/sentry/socket/socket.go
+++ b/pkg/sentry/socket/socket.go
@@ -81,10 +81,10 @@ func sockErrCmsgToLinux(sockErr *tcpip.SockError) linux.SockErrCMsg {
ee := linux.SockExtendedErr{
Errno: uint32(syserr.TranslateNetstackError(sockErr.Err).ToLinux().Number()),
- Origin: errOriginToLinux(sockErr.ErrOrigin),
- Type: sockErr.ErrType,
- Code: sockErr.ErrCode,
- Info: sockErr.ErrInfo,
+ Origin: errOriginToLinux(sockErr.Cause.Origin()),
+ Type: sockErr.Cause.Type(),
+ Code: sockErr.Cause.Code(),
+ Info: sockErr.Cause.Info(),
}
switch sockErr.NetProto {
diff --git a/pkg/sentry/socket/unix/transport/connectioned.go b/pkg/sentry/socket/unix/transport/connectioned.go
index b011082dc..fc5b823b0 100644
--- a/pkg/sentry/socket/unix/transport/connectioned.go
+++ b/pkg/sentry/socket/unix/transport/connectioned.go
@@ -48,7 +48,7 @@ type ConnectingEndpoint interface {
Type() linux.SockType
// GetLocalAddress returns the bound path.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// Locker protects the following methods. While locked, only the holder of
// the lock can change the return value of the protected methods.
diff --git a/pkg/sentry/socket/unix/transport/unix.go b/pkg/sentry/socket/unix/transport/unix.go
index 0e3889c6d..70227bbd2 100644
--- a/pkg/sentry/socket/unix/transport/unix.go
+++ b/pkg/sentry/socket/unix/transport/unix.go
@@ -169,32 +169,32 @@ type Endpoint interface {
Type() linux.SockType
// GetLocalAddress returns the address to which the endpoint is bound.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// GetRemoteAddress returns the address to which the endpoint is
// connected.
- GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetRemoteAddress() (tcpip.FullAddress, tcpip.Error)
// SetSockOpt sets a socket option.
- SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error
+ SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error
// SetSockOptInt sets a socket option for simple cases when a value has
// the int type.
- SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error
+ SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error
// GetSockOpt gets a socket option.
- GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error
+ GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error
// GetSockOptInt gets a socket option for simple cases when a return
// value has the int type.
- GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error)
+ GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error)
// State returns the current state of the socket, as represented by Linux in
// procfs.
State() uint32
// LastError clears and returns the last error reported by the endpoint.
- LastError() *tcpip.Error
+ LastError() tcpip.Error
// SocketOptions returns the structure which contains all the socket
// level options.
@@ -580,7 +580,7 @@ type ConnectedEndpoint interface {
Passcred() bool
// GetLocalAddress implements Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// Send sends a single message. This method does not block.
//
@@ -640,7 +640,7 @@ type connectedEndpoint struct {
Passcred() bool
// GetLocalAddress implements Endpoint.GetLocalAddress.
- GetLocalAddress() (tcpip.FullAddress, *tcpip.Error)
+ GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
// Type implements Endpoint.Type.
Type() linux.SockType
@@ -655,7 +655,7 @@ func (e *connectedEndpoint) Passcred() bool {
}
// GetLocalAddress implements ConnectedEndpoint.GetLocalAddress.
-func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (e *connectedEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
return e.endpoint.GetLocalAddress()
}
@@ -836,11 +836,11 @@ func (e *baseEndpoint) SendMsg(ctx context.Context, data [][]byte, c ControlMess
}
// SetSockOpt sets a socket option.
-func (e *baseEndpoint) SetSockOpt(opt tcpip.SettableSocketOption) *tcpip.Error {
+func (e *baseEndpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
return nil
}
-func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
+func (e *baseEndpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
switch opt {
case tcpip.ReceiveBufferSizeOption:
default:
@@ -855,34 +855,34 @@ func (e *baseEndpoint) IsUnixSocket() bool {
}
// GetSendBufferSize implements tcpip.SocketOptionsHandler.GetSendBufferSize.
-func (e *baseEndpoint) GetSendBufferSize() (int64, *tcpip.Error) {
+func (e *baseEndpoint) GetSendBufferSize() (int64, tcpip.Error) {
e.Lock()
defer e.Unlock()
if !e.Connected() {
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v := e.connected.SendMaxQueueSize()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return v, nil
}
-func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
+func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error) {
switch opt {
case tcpip.ReceiveQueueSizeOption:
v := 0
e.Lock()
if !e.Connected() {
e.Unlock()
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v = int(e.receiver.RecvQueuedSize())
e.Unlock()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return v, nil
@@ -890,12 +890,12 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.Lock()
if !e.Connected() {
e.Unlock()
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v := e.connected.SendQueuedSize()
e.Unlock()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return int(v), nil
@@ -903,29 +903,29 @@ func (e *baseEndpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.Lock()
if e.receiver == nil {
e.Unlock()
- return -1, tcpip.ErrNotConnected
+ return -1, &tcpip.ErrNotConnected{}
}
v := e.receiver.RecvMaxQueueSize()
e.Unlock()
if v < 0 {
- return -1, tcpip.ErrQueueSizeNotSupported
+ return -1, &tcpip.ErrQueueSizeNotSupported{}
}
return int(v), nil
default:
log.Warningf("Unsupported socket option: %d", opt)
- return -1, tcpip.ErrUnknownProtocolOption
+ return -1, &tcpip.ErrUnknownProtocolOption{}
}
}
// GetSockOpt implements tcpip.Endpoint.GetSockOpt.
-func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) *tcpip.Error {
+func (e *baseEndpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error {
log.Warningf("Unsupported socket option: %T", opt)
- return tcpip.ErrUnknownProtocolOption
+ return &tcpip.ErrUnknownProtocolOption{}
}
// LastError implements Endpoint.LastError.
-func (*baseEndpoint) LastError() *tcpip.Error {
+func (*baseEndpoint) LastError() tcpip.Error {
return nil
}
@@ -965,7 +965,7 @@ func (e *baseEndpoint) Shutdown(flags tcpip.ShutdownFlags) *syserr.Error {
}
// GetLocalAddress returns the bound path.
-func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error) {
e.Lock()
defer e.Unlock()
return tcpip.FullAddress{Addr: tcpip.Address(e.path)}, nil
@@ -973,14 +973,14 @@ func (e *baseEndpoint) GetLocalAddress() (tcpip.FullAddress, *tcpip.Error) {
// GetRemoteAddress returns the local address of the connected endpoint (if
// available).
-func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, *tcpip.Error) {
+func (e *baseEndpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error) {
e.Lock()
c := e.connected
e.Unlock()
if c != nil {
return c.GetLocalAddress()
}
- return tcpip.FullAddress{}, tcpip.ErrNotConnected
+ return tcpip.FullAddress{}, &tcpip.ErrNotConnected{}
}
// Release implements BoundEndpoint.Release.
diff --git a/pkg/sentry/syscalls/linux/error.go b/pkg/sentry/syscalls/linux/error.go
index dab6207c0..d1778d029 100644
--- a/pkg/sentry/syscalls/linux/error.go
+++ b/pkg/sentry/syscalls/linux/error.go
@@ -134,8 +134,8 @@ func handleIOErrorImpl(t *kernel.Task, partialResult bool, err, intr error, op s
// Similar to EPIPE. Return what we wrote this time, and let
// ENOSPC be returned on the next call.
return true, nil
- case syserror.ECONNRESET:
- // For TCP sendfile connections, we may have a reset. But we
+ case syserror.ECONNRESET, syserror.ETIMEDOUT:
+ // For TCP sendfile connections, we may have a reset or timeout. But we
// should just return n as the result.
return true, nil
case syserror.ErrWouldBlock:
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index e39f074f2..1a31898e8 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -123,6 +123,15 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ switch cmd {
+ case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC, linux.F_GETFD, linux.F_SETFD, linux.F_GETFL:
+ // allowed
+ default:
+ return 0, nil, syserror.EBADF
+ }
+ }
+
switch cmd {
case linux.F_DUPFD, linux.F_DUPFD_CLOEXEC:
minfd := args[2].Int()
@@ -395,6 +404,10 @@ func Fadvise64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ return 0, nil, syserror.EBADF
+ }
+
// If the FD refers to a pipe or FIFO, return error.
if _, isPipe := file.Impl().(*pipe.VFSPipeFD); isPipe {
return 0, nil, syserror.ESPIPE
diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
index 20c264fef..c7c3fed57 100644
--- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go
+++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
@@ -32,6 +32,10 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ return 0, nil, syserror.EBADF
+ }
+
// Handle ioctls that apply to all FDs.
switch args[1].Int() {
case linux.FIONCLEX:
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index 6e9b599e2..1f8a5878c 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -36,6 +36,10 @@ func Syncfs(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal
}
defer file.DecRef(t)
+ if file.StatusFlags()&linux.O_PATH != 0 {
+ return 0, nil, syserror.EBADF
+ }
+
return 0, nil, file.SyncFS(t)
}
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index a3868bf16..df4990854 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -83,6 +83,7 @@ go_library(
"mount.go",
"mount_namespace_refs.go",
"mount_unsafe.go",
+ "opath.go",
"options.go",
"pathname.go",
"permissions.go",
diff --git a/pkg/sentry/vfs/opath.go b/pkg/sentry/vfs/opath.go
new file mode 100644
index 000000000..39fbac987
--- /dev/null
+++ b/pkg/sentry/vfs/opath.go
@@ -0,0 +1,139 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// opathFD implements vfs.FileDescriptionImpl for a file description opened with O_PATH.
+//
+// +stateify savable
+type opathFD struct {
+ vfsfd FileDescription
+ FileDescriptionDefaultImpl
+ NoLockFD
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *opathFD) Release(context.Context) {
+ // noop
+}
+
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *opathFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return syserror.EBADF
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *opathFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *opathFD) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// PWrite implements vfs.FileDescriptionImpl.PWrite.
+func (fd *opathFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Write implements vfs.FileDescriptionImpl.Write.
+func (fd *opathFD) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *opathFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ return 0, syserror.EBADF
+}
+
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents.
+func (fd *opathFD) IterDirents(ctx context.Context, cb IterDirentsCallback) error {
+ return syserror.EBADF
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *opathFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ return 0, syserror.EBADF
+}
+
+// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
+func (fd *opathFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+ return syserror.EBADF
+}
+
+// ListXattr implements vfs.FileDescriptionImpl.ListXattr.
+func (fd *opathFD) ListXattr(ctx context.Context, size uint64) ([]string, error) {
+ return nil, syserror.EBADF
+}
+
+// GetXattr implements vfs.FileDescriptionImpl.GetXattr.
+func (fd *opathFD) GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) {
+ return "", syserror.EBADF
+}
+
+// SetXattr implements vfs.FileDescriptionImpl.SetXattr.
+func (fd *opathFD) SetXattr(ctx context.Context, opts SetXattrOptions) error {
+ return syserror.EBADF
+}
+
+// RemoveXattr implements vfs.FileDescriptionImpl.RemoveXattr.
+func (fd *opathFD) RemoveXattr(ctx context.Context, name string) error {
+ return syserror.EBADF
+}
+
+// Sync implements vfs.FileDescriptionImpl.Sync.
+func (fd *opathFD) Sync(ctx context.Context) error {
+ return syserror.EBADF
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *opathFD) SetStat(ctx context.Context, opts SetStatOptions) error {
+ return syserror.EBADF
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *opathFD) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) {
+ vfsObj := fd.vfsfd.vd.mount.vfs
+ rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
+ Root: fd.vfsfd.vd,
+ Start: fd.vfsfd.vd,
+ })
+ stat, err := fd.vfsfd.vd.mount.fs.impl.StatAt(ctx, rp, opts)
+ vfsObj.putResolvingPath(ctx, rp)
+ return stat, err
+}
+
+// StatFS returns metadata for the filesystem containing the file represented
+// by fd.
+func (fd *opathFD) StatFS(ctx context.Context) (linux.Statfs, error) {
+ vfsObj := fd.vfsfd.vd.mount.vfs
+ rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{
+ Root: fd.vfsfd.vd,
+ Start: fd.vfsfd.vd,
+ })
+ statfs, err := fd.vfsfd.vd.mount.fs.impl.StatFSAt(ctx, rp)
+ vfsObj.putResolvingPath(ctx, rp)
+ return statfs, err
+}
diff --git a/pkg/sentry/vfs/options.go b/pkg/sentry/vfs/options.go
index bc79e5ecc..c9907843c 100644
--- a/pkg/sentry/vfs/options.go
+++ b/pkg/sentry/vfs/options.go
@@ -129,7 +129,7 @@ type OpenOptions struct {
//
// FilesystemImpls are responsible for implementing the following flags:
// O_RDONLY, O_WRONLY, O_RDWR, O_APPEND, O_CREAT, O_DIRECT, O_DSYNC,
- // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_PATH, O_SYNC, O_TMPFILE, and
+ // O_EXCL, O_NOATIME, O_NOCTTY, O_NONBLOCK, O_SYNC, O_TMPFILE, and
// O_TRUNC. VFS is responsible for handling O_DIRECTORY, O_LARGEFILE, and
// O_NOFOLLOW. VFS users are responsible for handling O_CLOEXEC, since file
// descriptors are mostly outside the scope of VFS.
diff --git a/pkg/sentry/vfs/vfs.go b/pkg/sentry/vfs/vfs.go
index 6fd1bb0b2..0aff2dd92 100644
--- a/pkg/sentry/vfs/vfs.go
+++ b/pkg/sentry/vfs/vfs.go
@@ -425,6 +425,18 @@ func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credential
rp.mustBeDir = true
rp.mustBeDirOrig = true
}
+ if opts.Flags&linux.O_PATH != 0 {
+ vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{})
+ if err != nil {
+ return nil, err
+ }
+ fd := &opathFD{}
+ if err := fd.vfsfd.Init(fd, opts.Flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ vd.DecRef(ctx)
+ return &fd.vfsfd, err
+ }
for {
fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts)
if err == nil {