summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/usermem
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
committergVisor bot <gvisor-bot@google.com>2019-06-02 06:44:55 +0000
commitceb0d792f328d1fc0692197d8856a43c3936a571 (patch)
tree83155f302eff44a78bcc30a3a08f4efe59a79379 /pkg/sentry/usermem
parentdeb7ecf1e46862d54f4b102f2d163cfbcfc37f3b (diff)
parent216da0b733dbed9aad9b2ab92ac75bcb906fd7ee (diff)
Merge 216da0b7 (automated)
Diffstat (limited to 'pkg/sentry/usermem')
-rw-r--r--pkg/sentry/usermem/access_type.go128
-rw-r--r--pkg/sentry/usermem/addr.go108
-rwxr-xr-xpkg/sentry/usermem/addr_range.go62
-rw-r--r--pkg/sentry/usermem/addr_range_seq_unsafe.go277
-rw-r--r--pkg/sentry/usermem/bytes_io.go126
-rw-r--r--pkg/sentry/usermem/bytes_io_unsafe.go47
-rw-r--r--pkg/sentry/usermem/usermem.go587
-rw-r--r--pkg/sentry/usermem/usermem_arm64.go53
-rwxr-xr-xpkg/sentry/usermem/usermem_state_autogen.go49
-rw-r--r--pkg/sentry/usermem/usermem_unsafe.go27
-rw-r--r--pkg/sentry/usermem/usermem_x86.go38
11 files changed, 1502 insertions, 0 deletions
diff --git a/pkg/sentry/usermem/access_type.go b/pkg/sentry/usermem/access_type.go
new file mode 100644
index 000000000..9c1742a59
--- /dev/null
+++ b/pkg/sentry/usermem/access_type.go
@@ -0,0 +1,128 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package usermem
+
+import (
+ "syscall"
+)
+
+// AccessType specifies memory access types. This is used for
+// setting mapping permissions, as well as communicating faults.
+//
+// +stateify savable
+type AccessType struct {
+ // Read is read access.
+ Read bool
+
+ // Write is write access.
+ Write bool
+
+ // Execute is executable access.
+ Execute bool
+}
+
+// String returns a pretty representation of access. This looks like the
+// familiar r-x, rw-, etc. and can be relied on as such.
+func (a AccessType) String() string {
+ bits := [3]byte{'-', '-', '-'}
+ if a.Read {
+ bits[0] = 'r'
+ }
+ if a.Write {
+ bits[1] = 'w'
+ }
+ if a.Execute {
+ bits[2] = 'x'
+ }
+ return string(bits[:])
+}
+
+// Any returns true iff at least one of Read, Write or Execute is true.
+func (a AccessType) Any() bool {
+ return a.Read || a.Write || a.Execute
+}
+
+// Prot returns the system prot (syscall.PROT_READ, etc.) for this access.
+func (a AccessType) Prot() int {
+ var prot int
+ if a.Read {
+ prot |= syscall.PROT_READ
+ }
+ if a.Write {
+ prot |= syscall.PROT_WRITE
+ }
+ if a.Execute {
+ prot |= syscall.PROT_EXEC
+ }
+ return prot
+}
+
+// SupersetOf returns true iff the access types in a are a superset of the
+// access types in other.
+func (a AccessType) SupersetOf(other AccessType) bool {
+ if !a.Read && other.Read {
+ return false
+ }
+ if !a.Write && other.Write {
+ return false
+ }
+ if !a.Execute && other.Execute {
+ return false
+ }
+ return true
+}
+
+// Intersect returns the access types set in both a and other.
+func (a AccessType) Intersect(other AccessType) AccessType {
+ return AccessType{
+ Read: a.Read && other.Read,
+ Write: a.Write && other.Write,
+ Execute: a.Execute && other.Execute,
+ }
+}
+
+// Union returns the access types set in either a or other.
+func (a AccessType) Union(other AccessType) AccessType {
+ return AccessType{
+ Read: a.Read || other.Read,
+ Write: a.Write || other.Write,
+ Execute: a.Execute || other.Execute,
+ }
+}
+
+// Effective returns the set of effective access types allowed by a, even if
+// some types are not explicitly allowed.
+func (a AccessType) Effective() AccessType {
+ // In Linux, Write and Execute access generally imply Read access. See
+ // mm/mmap.c:protection_map.
+ //
+ // The notable exception is get_user_pages, which only checks against
+ // the original vma flags. That said, most user memory accesses do not
+ // use GUP.
+ if a.Write || a.Execute {
+ a.Read = true
+ }
+ return a
+}
+
+// Convenient access types.
+var (
+ NoAccess = AccessType{}
+ Read = AccessType{Read: true}
+ Write = AccessType{Write: true}
+ Execute = AccessType{Execute: true}
+ ReadWrite = AccessType{Read: true, Write: true}
+ AnyAccess = AccessType{Read: true, Write: true, Execute: true}
+)
diff --git a/pkg/sentry/usermem/addr.go b/pkg/sentry/usermem/addr.go
new file mode 100644
index 000000000..e79210804
--- /dev/null
+++ b/pkg/sentry/usermem/addr.go
@@ -0,0 +1,108 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package usermem
+
+import (
+ "fmt"
+)
+
+// Addr represents a generic virtual address.
+//
+// +stateify savable
+type Addr uintptr
+
+// AddLength adds the given length to start and returns the result. ok is true
+// iff adding the length did not overflow the range of Addr.
+//
+// Note: This function is usually used to get the end of an address range
+// defined by its start address and length. Since the resulting end is
+// exclusive, end == 0 is technically valid, and corresponds to a range that
+// extends to the end of the address space, but ok will be false. This isn't
+// expected to ever come up in practice.
+func (v Addr) AddLength(length uint64) (end Addr, ok bool) {
+ end = v + Addr(length)
+ // The second half of the following check is needed in case uintptr is
+ // smaller than 64 bits.
+ ok = end >= v && length <= uint64(^Addr(0))
+ return
+}
+
+// RoundDown returns the address rounded down to the nearest page boundary.
+func (v Addr) RoundDown() Addr {
+ return v & ^Addr(PageSize-1)
+}
+
+// RoundUp returns the address rounded up to the nearest page boundary. ok is
+// true iff rounding up did not wrap around.
+func (v Addr) RoundUp() (addr Addr, ok bool) {
+ addr = Addr(v + PageSize - 1).RoundDown()
+ ok = addr >= v
+ return
+}
+
+// MustRoundUp is equivalent to RoundUp, but panics if rounding up wraps
+// around.
+func (v Addr) MustRoundUp() Addr {
+ addr, ok := v.RoundUp()
+ if !ok {
+ panic(fmt.Sprintf("usermem.Addr(%d).RoundUp() wraps", v))
+ }
+ return addr
+}
+
+// HugeRoundDown returns the address rounded down to the nearest huge page
+// boundary.
+func (v Addr) HugeRoundDown() Addr {
+ return v & ^Addr(HugePageSize-1)
+}
+
+// HugeRoundUp returns the address rounded up to the nearest huge page boundary.
+// ok is true iff rounding up did not wrap around.
+func (v Addr) HugeRoundUp() (addr Addr, ok bool) {
+ addr = Addr(v + HugePageSize - 1).HugeRoundDown()
+ ok = addr >= v
+ return
+}
+
+// PageOffset returns the offset of v into the current page.
+func (v Addr) PageOffset() uint64 {
+ return uint64(v & Addr(PageSize-1))
+}
+
+// IsPageAligned returns true if v.PageOffset() == 0.
+func (v Addr) IsPageAligned() bool {
+ return v.PageOffset() == 0
+}
+
+// AddrRange is a range of Addrs.
+//
+// type AddrRange <generated by go_generics>
+
+// ToRange returns [v, v+length).
+func (v Addr) ToRange(length uint64) (AddrRange, bool) {
+ end, ok := v.AddLength(length)
+ return AddrRange{v, end}, ok
+}
+
+// IsPageAligned returns true if ar.Start.IsPageAligned() and
+// ar.End.IsPageAligned().
+func (ar AddrRange) IsPageAligned() bool {
+ return ar.Start.IsPageAligned() && ar.End.IsPageAligned()
+}
+
+// String implements fmt.Stringer.String.
+func (ar AddrRange) String() string {
+ return fmt.Sprintf("[%#x, %#x)", ar.Start, ar.End)
+}
diff --git a/pkg/sentry/usermem/addr_range.go b/pkg/sentry/usermem/addr_range.go
new file mode 100755
index 000000000..152ed1434
--- /dev/null
+++ b/pkg/sentry/usermem/addr_range.go
@@ -0,0 +1,62 @@
+package usermem
+
+// A Range represents a contiguous range of T.
+//
+// +stateify savable
+type AddrRange struct {
+ // Start is the inclusive start of the range.
+ Start Addr
+
+ // End is the exclusive end of the range.
+ End Addr
+}
+
+// WellFormed returns true if r.Start <= r.End. All other methods on a Range
+// require that the Range is well-formed.
+func (r AddrRange) WellFormed() bool {
+ return r.Start <= r.End
+}
+
+// Length returns the length of the range.
+func (r AddrRange) Length() Addr {
+ return r.End - r.Start
+}
+
+// Contains returns true if r contains x.
+func (r AddrRange) Contains(x Addr) bool {
+ return r.Start <= x && x < r.End
+}
+
+// Overlaps returns true if r and r2 overlap.
+func (r AddrRange) Overlaps(r2 AddrRange) bool {
+ return r.Start < r2.End && r2.Start < r.End
+}
+
+// IsSupersetOf returns true if r is a superset of r2; that is, the range r2 is
+// contained within r.
+func (r AddrRange) IsSupersetOf(r2 AddrRange) bool {
+ return r.Start <= r2.Start && r.End >= r2.End
+}
+
+// Intersect returns a range consisting of the intersection between r and r2.
+// If r and r2 do not overlap, Intersect returns a range with unspecified
+// bounds, but for which Length() == 0.
+func (r AddrRange) Intersect(r2 AddrRange) AddrRange {
+ if r.Start < r2.Start {
+ r.Start = r2.Start
+ }
+ if r.End > r2.End {
+ r.End = r2.End
+ }
+ if r.End < r.Start {
+ r.End = r.Start
+ }
+ return r
+}
+
+// CanSplitAt returns true if it is legal to split a segment spanning the range
+// r at x; that is, splitting at x would produce two ranges, both of which have
+// non-zero length.
+func (r AddrRange) CanSplitAt(x Addr) bool {
+ return r.Contains(x) && r.Start < x
+}
diff --git a/pkg/sentry/usermem/addr_range_seq_unsafe.go b/pkg/sentry/usermem/addr_range_seq_unsafe.go
new file mode 100644
index 000000000..c09337c15
--- /dev/null
+++ b/pkg/sentry/usermem/addr_range_seq_unsafe.go
@@ -0,0 +1,277 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package usermem
+
+import (
+ "bytes"
+ "fmt"
+ "reflect"
+ "unsafe"
+)
+
+// An AddrRangeSeq represents a sequence of AddrRanges.
+//
+// AddrRangeSeqs are immutable and may be copied by value. The zero value of
+// AddrRangeSeq represents an empty sequence.
+//
+// An AddrRangeSeq may contain AddrRanges with a length of 0. This is necessary
+// since zero-length AddrRanges are significant to MM bounds checks.
+type AddrRangeSeq struct {
+ // If length is 0, then the AddrRangeSeq represents no AddrRanges.
+ // Invariants: data == 0; offset == 0; limit == 0.
+ //
+ // If length is 1, then the AddrRangeSeq represents the single
+ // AddrRange{offset, offset+limit}. Invariants: data == 0.
+ //
+ // Otherwise, length >= 2, and the AddrRangeSeq represents the `length`
+ // AddrRanges in the array of AddrRanges starting at address `data`,
+ // starting at `offset` bytes into the first AddrRange and limited to the
+ // following `limit` bytes. (AddrRanges after `limit` are still iterated,
+ // but are truncated to a length of 0.) Invariants: data != 0; offset <=
+ // data[0].Length(); limit > 0; offset+limit <= the combined length of all
+ // AddrRanges in the array.
+ data unsafe.Pointer
+ length int
+ offset Addr
+ limit Addr
+}
+
+// AddrRangeSeqOf returns an AddrRangeSeq representing the single AddrRange ar.
+func AddrRangeSeqOf(ar AddrRange) AddrRangeSeq {
+ return AddrRangeSeq{
+ length: 1,
+ offset: ar.Start,
+ limit: ar.Length(),
+ }
+}
+
+// AddrRangeSeqFromSlice returns an AddrRangeSeq representing all AddrRanges in
+// slice.
+//
+// Whether the returned AddrRangeSeq shares memory with slice is unspecified;
+// clients should avoid mutating slices passed to AddrRangeSeqFromSlice.
+//
+// Preconditions: The combined length of all AddrRanges in slice <=
+// math.MaxInt64.
+func AddrRangeSeqFromSlice(slice []AddrRange) AddrRangeSeq {
+ var limit int64
+ for _, ar := range slice {
+ len64 := int64(ar.Length())
+ if len64 < 0 {
+ panic(fmt.Sprintf("Length of AddrRange %v overflows int64", ar))
+ }
+ sum := limit + len64
+ if sum < limit {
+ panic(fmt.Sprintf("Total length of AddrRanges %v overflows int64", slice))
+ }
+ limit = sum
+ }
+ return addrRangeSeqFromSliceLimited(slice, limit)
+}
+
+// Preconditions: The combined length of all AddrRanges in slice <= limit.
+// limit >= 0. If len(slice) != 0, then limit > 0.
+func addrRangeSeqFromSliceLimited(slice []AddrRange, limit int64) AddrRangeSeq {
+ switch len(slice) {
+ case 0:
+ return AddrRangeSeq{}
+ case 1:
+ return AddrRangeSeq{
+ length: 1,
+ offset: slice[0].Start,
+ limit: Addr(limit),
+ }
+ default:
+ return AddrRangeSeq{
+ data: unsafe.Pointer(&slice[0]),
+ length: len(slice),
+ limit: Addr(limit),
+ }
+ }
+}
+
+// IsEmpty returns true if ars.NumRanges() == 0.
+//
+// Note that since AddrRangeSeq may contain AddrRanges with a length of zero,
+// an AddrRange representing 0 bytes (AddrRangeSeq.NumBytes() == 0) is not
+// necessarily empty.
+func (ars AddrRangeSeq) IsEmpty() bool {
+ return ars.length == 0
+}
+
+// NumRanges returns the number of AddrRanges in ars.
+func (ars AddrRangeSeq) NumRanges() int {
+ return ars.length
+}
+
+// NumBytes returns the number of bytes represented by ars.
+func (ars AddrRangeSeq) NumBytes() int64 {
+ return int64(ars.limit)
+}
+
+// Head returns the first AddrRange in ars.
+//
+// Preconditions: !ars.IsEmpty().
+func (ars AddrRangeSeq) Head() AddrRange {
+ if ars.length == 0 {
+ panic("empty AddrRangeSeq")
+ }
+ if ars.length == 1 {
+ return AddrRange{ars.offset, ars.offset + ars.limit}
+ }
+ ar := *(*AddrRange)(ars.data)
+ ar.Start += ars.offset
+ if ar.Length() > ars.limit {
+ ar.End = ar.Start + ars.limit
+ }
+ return ar
+}
+
+// Tail returns an AddrRangeSeq consisting of all AddrRanges in ars after the
+// first.
+//
+// Preconditions: !ars.IsEmpty().
+func (ars AddrRangeSeq) Tail() AddrRangeSeq {
+ if ars.length == 0 {
+ panic("empty AddrRangeSeq")
+ }
+ if ars.length == 1 {
+ return AddrRangeSeq{}
+ }
+ return ars.externalTail()
+}
+
+// Preconditions: ars.length >= 2.
+func (ars AddrRangeSeq) externalTail() AddrRangeSeq {
+ headLen := (*AddrRange)(ars.data).Length() - ars.offset
+ var tailLimit int64
+ if ars.limit > headLen {
+ tailLimit = int64(ars.limit - headLen)
+ }
+ var extSlice []AddrRange
+ extSliceHdr := (*reflect.SliceHeader)(unsafe.Pointer(&extSlice))
+ extSliceHdr.Data = uintptr(ars.data)
+ extSliceHdr.Len = ars.length
+ extSliceHdr.Cap = ars.length
+ return addrRangeSeqFromSliceLimited(extSlice[1:], tailLimit)
+}
+
+// DropFirst returns an AddrRangeSeq equivalent to ars, but with the first n
+// bytes omitted. If n > ars.NumBytes(), DropFirst returns an empty
+// AddrRangeSeq.
+//
+// If !ars.IsEmpty() and ars.Head().Length() == 0, DropFirst will always omit
+// at least ars.Head(), even if n == 0. This guarantees that the basic pattern
+// of:
+//
+// for !ars.IsEmpty() {
+// n, err = doIOWith(ars.Head())
+// if err != nil {
+// return err
+// }
+// ars = ars.DropFirst(n)
+// }
+//
+// works even in the presence of zero-length AddrRanges.
+//
+// Preconditions: n >= 0.
+func (ars AddrRangeSeq) DropFirst(n int) AddrRangeSeq {
+ if n < 0 {
+ panic(fmt.Sprintf("invalid n: %d", n))
+ }
+ return ars.DropFirst64(int64(n))
+}
+
+// DropFirst64 is equivalent to DropFirst but takes an int64.
+func (ars AddrRangeSeq) DropFirst64(n int64) AddrRangeSeq {
+ if n < 0 {
+ panic(fmt.Sprintf("invalid n: %d", n))
+ }
+ if Addr(n) > ars.limit {
+ return AddrRangeSeq{}
+ }
+ // Handle initial empty AddrRange.
+ switch ars.length {
+ case 0:
+ return AddrRangeSeq{}
+ case 1:
+ if ars.limit == 0 {
+ return AddrRangeSeq{}
+ }
+ default:
+ if rawHeadLen := (*AddrRange)(ars.data).Length(); ars.offset == rawHeadLen {
+ ars = ars.externalTail()
+ }
+ }
+ for n != 0 {
+ // Calling ars.Head() here is surprisingly expensive, so inline getting
+ // the head's length.
+ var headLen Addr
+ if ars.length == 1 {
+ headLen = ars.limit
+ } else {
+ headLen = (*AddrRange)(ars.data).Length() - ars.offset
+ }
+ if Addr(n) < headLen {
+ // Dropping ends partway through the head AddrRange.
+ ars.offset += Addr(n)
+ ars.limit -= Addr(n)
+ return ars
+ }
+ n -= int64(headLen)
+ ars = ars.Tail()
+ }
+ return ars
+}
+
+// TakeFirst returns an AddrRangeSeq equivalent to ars, but iterating at most n
+// bytes. TakeFirst never removes AddrRanges from ars; AddrRanges beyond the
+// first n bytes are reduced to a length of zero, but will still be iterated.
+//
+// Preconditions: n >= 0.
+func (ars AddrRangeSeq) TakeFirst(n int) AddrRangeSeq {
+ if n < 0 {
+ panic(fmt.Sprintf("invalid n: %d", n))
+ }
+ return ars.TakeFirst64(int64(n))
+}
+
+// TakeFirst64 is equivalent to TakeFirst but takes an int64.
+func (ars AddrRangeSeq) TakeFirst64(n int64) AddrRangeSeq {
+ if n < 0 {
+ panic(fmt.Sprintf("invalid n: %d", n))
+ }
+ if ars.limit > Addr(n) {
+ ars.limit = Addr(n)
+ }
+ return ars
+}
+
+// String implements fmt.Stringer.String.
+func (ars AddrRangeSeq) String() string {
+ // This is deliberately chosen to be the same as fmt's automatic stringer
+ // for []AddrRange.
+ var buf bytes.Buffer
+ buf.WriteByte('[')
+ var sep string
+ for !ars.IsEmpty() {
+ buf.WriteString(sep)
+ sep = " "
+ buf.WriteString(ars.Head().String())
+ ars = ars.Tail()
+ }
+ buf.WriteByte(']')
+ return buf.String()
+}
diff --git a/pkg/sentry/usermem/bytes_io.go b/pkg/sentry/usermem/bytes_io.go
new file mode 100644
index 000000000..f98d82168
--- /dev/null
+++ b/pkg/sentry/usermem/bytes_io.go
@@ -0,0 +1,126 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package usermem
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+const maxInt = int(^uint(0) >> 1)
+
+// BytesIO implements IO using a byte slice. Addresses are interpreted as
+// offsets into the slice. Reads and writes beyond the end of the slice return
+// EFAULT.
+type BytesIO struct {
+ Bytes []byte
+}
+
+// CopyOut implements IO.CopyOut.
+func (b *BytesIO) CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error) {
+ rngN, rngErr := b.rangeCheck(addr, len(src))
+ if rngN == 0 {
+ return 0, rngErr
+ }
+ return copy(b.Bytes[int(addr):], src[:rngN]), rngErr
+}
+
+// CopyIn implements IO.CopyIn.
+func (b *BytesIO) CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error) {
+ rngN, rngErr := b.rangeCheck(addr, len(dst))
+ if rngN == 0 {
+ return 0, rngErr
+ }
+ return copy(dst[:rngN], b.Bytes[int(addr):]), rngErr
+}
+
+// ZeroOut implements IO.ZeroOut.
+func (b *BytesIO) ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error) {
+ if toZero > int64(maxInt) {
+ return 0, syserror.EINVAL
+ }
+ rngN, rngErr := b.rangeCheck(addr, int(toZero))
+ if rngN == 0 {
+ return 0, rngErr
+ }
+ zeroSlice := b.Bytes[int(addr) : int(addr)+rngN]
+ for i := range zeroSlice {
+ zeroSlice[i] = 0
+ }
+ return int64(rngN), rngErr
+}
+
+// CopyOutFrom implements IO.CopyOutFrom.
+func (b *BytesIO) CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error) {
+ dsts, rngErr := b.blocksFromAddrRanges(ars)
+ n, err := src.ReadToBlocks(dsts)
+ if err != nil {
+ return int64(n), err
+ }
+ return int64(n), rngErr
+}
+
+// CopyInTo implements IO.CopyInTo.
+func (b *BytesIO) CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error) {
+ srcs, rngErr := b.blocksFromAddrRanges(ars)
+ n, err := dst.WriteFromBlocks(srcs)
+ if err != nil {
+ return int64(n), err
+ }
+ return int64(n), rngErr
+}
+
+func (b *BytesIO) rangeCheck(addr Addr, length int) (int, error) {
+ if length == 0 {
+ return 0, nil
+ }
+ if length < 0 {
+ return 0, syserror.EINVAL
+ }
+ max := Addr(len(b.Bytes))
+ if addr >= max {
+ return 0, syserror.EFAULT
+ }
+ end, ok := addr.AddLength(uint64(length))
+ if !ok || end > max {
+ return int(max - addr), syserror.EFAULT
+ }
+ return length, nil
+}
+
+func (b *BytesIO) blocksFromAddrRanges(ars AddrRangeSeq) (safemem.BlockSeq, error) {
+ blocks := make([]safemem.Block, 0, ars.NumRanges())
+ for !ars.IsEmpty() {
+ ar := ars.Head()
+ n, err := b.rangeCheck(ar.Start, int(ar.Length()))
+ if n != 0 {
+ blocks = append(blocks, safemem.BlockFromSafeSlice(b.Bytes[int(ar.Start):int(ar.Start)+n]))
+ }
+ if err != nil {
+ return safemem.BlockSeqFromSlice(blocks), err
+ }
+ ars = ars.Tail()
+ }
+ return safemem.BlockSeqFromSlice(blocks), nil
+}
+
+// BytesIOSequence returns an IOSequence representing the given byte slice.
+func BytesIOSequence(buf []byte) IOSequence {
+ return IOSequence{
+ IO: &BytesIO{buf},
+ Addrs: AddrRangeSeqOf(AddrRange{0, Addr(len(buf))}),
+ }
+}
diff --git a/pkg/sentry/usermem/bytes_io_unsafe.go b/pkg/sentry/usermem/bytes_io_unsafe.go
new file mode 100644
index 000000000..bb49d2ff3
--- /dev/null
+++ b/pkg/sentry/usermem/bytes_io_unsafe.go
@@ -0,0 +1,47 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package usermem
+
+import (
+ "sync/atomic"
+ "unsafe"
+
+ "gvisor.googlesource.com/gvisor/pkg/atomicbitops"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+)
+
+// SwapUint32 implements IO.SwapUint32.
+func (b *BytesIO) SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error) {
+ if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil {
+ return 0, rngErr
+ }
+ return atomic.SwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), new), nil
+}
+
+// CompareAndSwapUint32 implements IO.CompareAndSwapUint32.
+func (b *BytesIO) CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error) {
+ if _, rngErr := b.rangeCheck(addr, 4); rngErr != nil {
+ return 0, rngErr
+ }
+ return atomicbitops.CompareAndSwapUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)])), old, new), nil
+}
+
+// LoadUint32 implements IO.LoadUint32.
+func (b *BytesIO) LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error) {
+ if _, err := b.rangeCheck(addr, 4); err != nil {
+ return 0, err
+ }
+ return atomic.LoadUint32((*uint32)(unsafe.Pointer(&b.Bytes[int(addr)]))), nil
+}
diff --git a/pkg/sentry/usermem/usermem.go b/pkg/sentry/usermem/usermem.go
new file mode 100644
index 000000000..31e4d6ada
--- /dev/null
+++ b/pkg/sentry/usermem/usermem.go
@@ -0,0 +1,587 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package usermem governs access to user memory.
+package usermem
+
+import (
+ "errors"
+ "io"
+ "strconv"
+
+ "gvisor.googlesource.com/gvisor/pkg/binary"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/context"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/safemem"
+ "gvisor.googlesource.com/gvisor/pkg/syserror"
+)
+
+// IO provides access to the contents of a virtual memory space.
+//
+// FIXME(b/38173783): Implementations of IO cannot expect ctx to contain any
+// meaningful data.
+type IO interface {
+ // CopyOut copies len(src) bytes from src to the memory mapped at addr. It
+ // returns the number of bytes copied. If the number of bytes copied is <
+ // len(src), it returns a non-nil error explaining why.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order.
+ //
+ // Postconditions: CopyOut does not retain src.
+ CopyOut(ctx context.Context, addr Addr, src []byte, opts IOOpts) (int, error)
+
+ // CopyIn copies len(dst) bytes from the memory mapped at addr to dst.
+ // It returns the number of bytes copied. If the number of bytes copied is
+ // < len(dst), it returns a non-nil error explaining why.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order.
+ //
+ // Postconditions: CopyIn does not retain dst.
+ CopyIn(ctx context.Context, addr Addr, dst []byte, opts IOOpts) (int, error)
+
+ // ZeroOut sets toZero bytes to 0, starting at addr. It returns the number
+ // of bytes zeroed. If the number of bytes zeroed is < toZero, it returns a
+ // non-nil error explaining why.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. toZero >= 0.
+ ZeroOut(ctx context.Context, addr Addr, toZero int64, opts IOOpts) (int64, error)
+
+ // CopyOutFrom copies ars.NumBytes() bytes from src to the memory mapped at
+ // ars. It returns the number of bytes copied, which may be less than the
+ // number of bytes read from src if copying fails. CopyOutFrom may return a
+ // partial copy without an error iff src.ReadToBlocks returns a partial
+ // read without an error.
+ //
+ // CopyOutFrom calls src.ReadToBlocks at most once.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. src.ReadToBlocks must not block
+ // on mm.MemoryManager.activeMu or any preceding locks in the lock order.
+ CopyOutFrom(ctx context.Context, ars AddrRangeSeq, src safemem.Reader, opts IOOpts) (int64, error)
+
+ // CopyInTo copies ars.NumBytes() bytes from the memory mapped at ars to
+ // dst. It returns the number of bytes copied. CopyInTo may return a
+ // partial copy without an error iff dst.WriteFromBlocks returns a partial
+ // write without an error.
+ //
+ // CopyInTo calls dst.WriteFromBlocks at most once.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. dst.WriteFromBlocks must not
+ // block on mm.MemoryManager.activeMu or any preceding locks in the lock
+ // order.
+ CopyInTo(ctx context.Context, ars AddrRangeSeq, dst safemem.Writer, opts IOOpts) (int64, error)
+
+ // TODO(jamieliu): The requirement that CopyOutFrom/CopyInTo call src/dst
+ // at most once, which is unnecessary in most cases, forces implementations
+ // to gather safemem.Blocks into a single slice to pass to src/dst. Add
+ // CopyOutFromIter/CopyInToIter, which relaxes this restriction, to avoid
+ // this allocation.
+
+ // SwapUint32 atomically sets the uint32 value at addr to new and
+ // returns the previous value.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. addr must be aligned to a 4-byte
+ // boundary.
+ SwapUint32(ctx context.Context, addr Addr, new uint32, opts IOOpts) (uint32, error)
+
+ // CompareAndSwapUint32 atomically compares the uint32 value at addr to
+ // old; if they are equal, the value in memory is replaced by new. In
+ // either case, the previous value stored in memory is returned.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. addr must be aligned to a 4-byte
+ // boundary.
+ CompareAndSwapUint32(ctx context.Context, addr Addr, old, new uint32, opts IOOpts) (uint32, error)
+
+ // LoadUint32 atomically loads the uint32 value at addr and returns it.
+ //
+ // Preconditions: The caller must not hold mm.MemoryManager.mappingMu or
+ // any following locks in the lock order. addr must be aligned to a 4-byte
+ // boundary.
+ LoadUint32(ctx context.Context, addr Addr, opts IOOpts) (uint32, error)
+}
+
+// IOOpts contains options applicable to all IO methods.
+type IOOpts struct {
+ // If IgnorePermissions is true, application-defined memory protections set
+ // by mmap(2) or mprotect(2) will be ignored. (Memory protections required
+ // by the target of the mapping are never ignored.)
+ IgnorePermissions bool
+
+ // If AddressSpaceActive is true, the IO implementation may assume that it
+ // has an active AddressSpace and can therefore use AddressSpace copying
+ // without performing activation. See mm/io.go for details.
+ AddressSpaceActive bool
+}
+
+// IOReadWriter is an io.ReadWriter that reads from / writes to addresses
+// starting at addr in IO. The preconditions that apply to IO.CopyIn and
+// IO.CopyOut also apply to IOReadWriter.Read and IOReadWriter.Write
+// respectively.
+type IOReadWriter struct {
+ Ctx context.Context
+ IO IO
+ Addr Addr
+ Opts IOOpts
+}
+
+// Read implements io.Reader.Read.
+//
+// Note that an address space does not have an "end of file", so Read can only
+// return io.EOF if IO.CopyIn returns io.EOF. Attempts to read unmapped or
+// unreadable memory, or beyond the end of the address space, should return
+// EFAULT.
+func (rw *IOReadWriter) Read(dst []byte) (int, error) {
+ n, err := rw.IO.CopyIn(rw.Ctx, rw.Addr, dst, rw.Opts)
+ end, ok := rw.Addr.AddLength(uint64(n))
+ if ok {
+ rw.Addr = end
+ } else {
+ // Disallow wraparound.
+ rw.Addr = ^Addr(0)
+ if err != nil {
+ err = syserror.EFAULT
+ }
+ }
+ return n, err
+}
+
+// Writer implements io.Writer.Write.
+func (rw *IOReadWriter) Write(src []byte) (int, error) {
+ n, err := rw.IO.CopyOut(rw.Ctx, rw.Addr, src, rw.Opts)
+ end, ok := rw.Addr.AddLength(uint64(n))
+ if ok {
+ rw.Addr = end
+ } else {
+ // Disallow wraparound.
+ rw.Addr = ^Addr(0)
+ if err != nil {
+ err = syserror.EFAULT
+ }
+ }
+ return n, err
+}
+
+// CopyObjectOut copies a fixed-size value or slice of fixed-size values from
+// src to the memory mapped at addr in uio. It returns the number of bytes
+// copied.
+//
+// CopyObjectOut must use reflection to encode src; performance-sensitive
+// clients should do encoding manually and use uio.CopyOut directly.
+//
+// Preconditions: As for IO.CopyOut.
+func CopyObjectOut(ctx context.Context, uio IO, addr Addr, src interface{}, opts IOOpts) (int, error) {
+ w := &IOReadWriter{
+ Ctx: ctx,
+ IO: uio,
+ Addr: addr,
+ Opts: opts,
+ }
+ // Allocate a byte slice the size of the object being marshaled. This
+ // adds an extra reflection call, but avoids needing to grow the slice
+ // during encoding, which can result in many heap-allocated slices.
+ b := make([]byte, 0, binary.Size(src))
+ return w.Write(binary.Marshal(b, ByteOrder, src))
+}
+
+// CopyObjectIn copies a fixed-size value or slice of fixed-size values from
+// the memory mapped at addr in uio to dst. It returns the number of bytes
+// copied.
+//
+// CopyObjectIn must use reflection to decode dst; performance-sensitive
+// clients should use uio.CopyIn directly and do decoding manually.
+//
+// Preconditions: As for IO.CopyIn.
+func CopyObjectIn(ctx context.Context, uio IO, addr Addr, dst interface{}, opts IOOpts) (int, error) {
+ r := &IOReadWriter{
+ Ctx: ctx,
+ IO: uio,
+ Addr: addr,
+ Opts: opts,
+ }
+ buf := make([]byte, binary.Size(dst))
+ if _, err := io.ReadFull(r, buf); err != nil {
+ return 0, err
+ }
+ binary.Unmarshal(buf, ByteOrder, dst)
+ return int(r.Addr - addr), nil
+}
+
+// copyStringIncrement is the maximum number of bytes that are copied from
+// virtual memory at a time by CopyStringIn.
+const copyStringIncrement = 64
+
+// CopyStringIn copies a NUL-terminated string of unknown length from the
+// memory mapped at addr in uio and returns it as a string (not including the
+// trailing NUL). If the length of the string, including the terminating NUL,
+// would exceed maxlen, CopyStringIn returns the string truncated to maxlen and
+// ENAMETOOLONG.
+//
+// Preconditions: As for IO.CopyFromUser. maxlen >= 0.
+func CopyStringIn(ctx context.Context, uio IO, addr Addr, maxlen int, opts IOOpts) (string, error) {
+ buf := make([]byte, maxlen)
+ var done int
+ for done < maxlen {
+ start, ok := addr.AddLength(uint64(done))
+ if !ok {
+ // Last page of kernel memory. The application can't use this
+ // anyway.
+ return stringFromImmutableBytes(buf[:done]), syserror.EFAULT
+ }
+ // Read up to copyStringIncrement bytes at a time.
+ readlen := copyStringIncrement
+ if readlen > maxlen-done {
+ readlen = maxlen - done
+ }
+ end, ok := start.AddLength(uint64(readlen))
+ if !ok {
+ return stringFromImmutableBytes(buf[:done]), syserror.EFAULT
+ }
+ // Shorten the read to avoid crossing page boundaries, since faulting
+ // in a page unnecessarily is expensive. This also ensures that partial
+ // copies up to the end of application-mappable memory succeed.
+ if start.RoundDown() != end.RoundDown() {
+ end = end.RoundDown()
+ }
+ n, err := uio.CopyIn(ctx, start, buf[done:done+int(end-start)], opts)
+ // Look for the terminating zero byte, which may have occurred before
+ // hitting err.
+ for i, c := range buf[done : done+n] {
+ if c == 0 {
+ return stringFromImmutableBytes(buf[:done+i]), nil
+ }
+ }
+ done += n
+ if err != nil {
+ return stringFromImmutableBytes(buf[:done]), err
+ }
+ }
+ return stringFromImmutableBytes(buf), syserror.ENAMETOOLONG
+}
+
+// CopyOutVec copies bytes from src to the memory mapped at ars in uio. The
+// maximum number of bytes copied is ars.NumBytes() or len(src), whichever is
+// less. CopyOutVec returns the number of bytes copied; if this is less than
+// the maximum, it returns a non-nil error explaining why.
+//
+// Preconditions: As for IO.CopyOut.
+func CopyOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, src []byte, opts IOOpts) (int, error) {
+ var done int
+ for !ars.IsEmpty() && done < len(src) {
+ ar := ars.Head()
+ cplen := len(src) - done
+ if Addr(cplen) >= ar.Length() {
+ cplen = int(ar.Length())
+ }
+ n, err := uio.CopyOut(ctx, ar.Start, src[done:done+cplen], opts)
+ done += n
+ if err != nil {
+ return done, err
+ }
+ ars = ars.DropFirst(n)
+ }
+ return done, nil
+}
+
+// CopyInVec copies bytes from the memory mapped at ars in uio to dst. The
+// maximum number of bytes copied is ars.NumBytes() or len(dst), whichever is
+// less. CopyInVec returns the number of bytes copied; if this is less than the
+// maximum, it returns a non-nil error explaining why.
+//
+// Preconditions: As for IO.CopyIn.
+func CopyInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst []byte, opts IOOpts) (int, error) {
+ var done int
+ for !ars.IsEmpty() && done < len(dst) {
+ ar := ars.Head()
+ cplen := len(dst) - done
+ if Addr(cplen) >= ar.Length() {
+ cplen = int(ar.Length())
+ }
+ n, err := uio.CopyIn(ctx, ar.Start, dst[done:done+cplen], opts)
+ done += n
+ if err != nil {
+ return done, err
+ }
+ ars = ars.DropFirst(n)
+ }
+ return done, nil
+}
+
+// ZeroOutVec writes zeroes to the memory mapped at ars in uio. The maximum
+// number of bytes written is ars.NumBytes() or toZero, whichever is less.
+// ZeroOutVec returns the number of bytes written; if this is less than the
+// maximum, it returns a non-nil error explaining why.
+//
+// Preconditions: As for IO.ZeroOut.
+func ZeroOutVec(ctx context.Context, uio IO, ars AddrRangeSeq, toZero int64, opts IOOpts) (int64, error) {
+ var done int64
+ for !ars.IsEmpty() && done < toZero {
+ ar := ars.Head()
+ cplen := toZero - done
+ if Addr(cplen) >= ar.Length() {
+ cplen = int64(ar.Length())
+ }
+ n, err := uio.ZeroOut(ctx, ar.Start, cplen, opts)
+ done += n
+ if err != nil {
+ return done, err
+ }
+ ars = ars.DropFirst64(n)
+ }
+ return done, nil
+}
+
+func isASCIIWhitespace(b byte) bool {
+ // Compare Linux include/linux/ctype.h, lib/ctype.c.
+ // 9 => horizontal tab '\t'
+ // 10 => line feed '\n'
+ // 11 => vertical tab '\v'
+ // 12 => form feed '\c'
+ // 13 => carriage return '\r'
+ return b == ' ' || (b >= 9 && b <= 13)
+}
+
+// CopyInt32StringsInVec copies up to len(dsts) whitespace-separated decimal
+// strings from the memory mapped at ars in uio and converts them to int32
+// values in dsts. It returns the number of bytes read.
+//
+// CopyInt32StringsInVec shares the following properties with Linux's
+// kernel/sysctl.c:proc_dointvec(write=1):
+//
+// - If any read value overflows the range of int32, or any invalid characters
+// are encountered during the read, CopyInt32StringsInVec returns EINVAL.
+//
+// - If, upon reaching the end of ars, fewer than len(dsts) values have been
+// read, CopyInt32StringsInVec returns no error if at least 1 value was read
+// and EINVAL otherwise.
+//
+// - Trailing whitespace after the last successfully read value is counted in
+// the number of bytes read.
+//
+// Unlike proc_dointvec():
+//
+// - CopyInt32StringsInVec does not implicitly limit ars.NumBytes() to
+// PageSize-1; callers that require this must do so explicitly.
+//
+// - CopyInt32StringsInVec returns EINVAL if ars.NumBytes() == 0.
+//
+// Preconditions: As for CopyInVec.
+func CopyInt32StringsInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dsts []int32, opts IOOpts) (int64, error) {
+ if len(dsts) == 0 {
+ return 0, nil
+ }
+
+ buf := make([]byte, ars.NumBytes())
+ n, cperr := CopyInVec(ctx, uio, ars, buf, opts)
+ buf = buf[:n]
+
+ var i, j int
+ for ; j < len(dsts); j++ {
+ // Skip leading whitespace.
+ for i < len(buf) && isASCIIWhitespace(buf[i]) {
+ i++
+ }
+ if i == len(buf) {
+ break
+ }
+
+ // Find the end of the value to be parsed (next whitespace or end of string).
+ nextI := i + 1
+ for nextI < len(buf) && !isASCIIWhitespace(buf[nextI]) {
+ nextI++
+ }
+
+ // Parse a single value.
+ val, err := strconv.ParseInt(string(buf[i:nextI]), 10, 32)
+ if err != nil {
+ return int64(i), syserror.EINVAL
+ }
+ dsts[j] = int32(val)
+
+ i = nextI
+ }
+
+ // Skip trailing whitespace.
+ for i < len(buf) && isASCIIWhitespace(buf[i]) {
+ i++
+ }
+
+ if cperr != nil {
+ return int64(i), cperr
+ }
+ if j == 0 {
+ return int64(i), syserror.EINVAL
+ }
+ return int64(i), nil
+}
+
+// CopyInt32StringInVec is equivalent to CopyInt32StringsInVec, but copies at
+// most one int32.
+func CopyInt32StringInVec(ctx context.Context, uio IO, ars AddrRangeSeq, dst *int32, opts IOOpts) (int64, error) {
+ dsts := [1]int32{*dst}
+ n, err := CopyInt32StringsInVec(ctx, uio, ars, dsts[:], opts)
+ *dst = dsts[0]
+ return n, err
+}
+
+// IOSequence holds arguments to IO methods.
+type IOSequence struct {
+ IO IO
+ Addrs AddrRangeSeq
+ Opts IOOpts
+}
+
+// NumBytes returns s.Addrs.NumBytes().
+//
+// Note that NumBytes() may return 0 even if !s.Addrs.IsEmpty(), since
+// s.Addrs may contain a non-zero number of zero-length AddrRanges.
+// Many clients of
+// IOSequence currently do something like:
+//
+// if ioseq.NumBytes() == 0 {
+// return 0, nil
+// }
+// if f.availableBytes == 0 {
+// return 0, syserror.ErrWouldBlock
+// }
+// return ioseq.CopyOutFrom(..., reader)
+//
+// In such cases, using s.Addrs.IsEmpty() will cause them to have the wrong
+// behavior for zero-length I/O. However, using s.NumBytes() == 0 instead means
+// that we will return success for zero-length I/O in cases where Linux would
+// return EFAULT due to a failed access_ok() check, so in the long term we
+// should move checks for ErrWouldBlock etc. into the body of
+// reader.ReadToBlocks and use s.Addrs.IsEmpty() instead.
+func (s IOSequence) NumBytes() int64 {
+ return s.Addrs.NumBytes()
+}
+
+// DropFirst returns a copy of s with s.Addrs.DropFirst(n).
+//
+// Preconditions: As for AddrRangeSeq.DropFirst.
+func (s IOSequence) DropFirst(n int) IOSequence {
+ return IOSequence{s.IO, s.Addrs.DropFirst(n), s.Opts}
+}
+
+// DropFirst64 returns a copy of s with s.Addrs.DropFirst64(n).
+//
+// Preconditions: As for AddrRangeSeq.DropFirst64.
+func (s IOSequence) DropFirst64(n int64) IOSequence {
+ return IOSequence{s.IO, s.Addrs.DropFirst64(n), s.Opts}
+}
+
+// TakeFirst returns a copy of s with s.Addrs.TakeFirst(n).
+//
+// Preconditions: As for AddrRangeSeq.TakeFirst.
+func (s IOSequence) TakeFirst(n int) IOSequence {
+ return IOSequence{s.IO, s.Addrs.TakeFirst(n), s.Opts}
+}
+
+// TakeFirst64 returns a copy of s with s.Addrs.TakeFirst64(n).
+//
+// Preconditions: As for AddrRangeSeq.TakeFirst64.
+func (s IOSequence) TakeFirst64(n int64) IOSequence {
+ return IOSequence{s.IO, s.Addrs.TakeFirst64(n), s.Opts}
+}
+
+// CopyOut invokes CopyOutVec over s.Addrs.
+//
+// As with CopyOutVec, if s.NumBytes() < len(src), the copy will be truncated
+// to s.NumBytes(), and a nil error will be returned.
+//
+// Preconditions: As for CopyOutVec.
+func (s IOSequence) CopyOut(ctx context.Context, src []byte) (int, error) {
+ return CopyOutVec(ctx, s.IO, s.Addrs, src, s.Opts)
+}
+
+// CopyIn invokes CopyInVec over s.Addrs.
+//
+// As with CopyInVec, if s.NumBytes() < len(dst), the copy will be truncated to
+// s.NumBytes(), and a nil error will be returned.
+//
+// Preconditions: As for CopyInVec.
+func (s IOSequence) CopyIn(ctx context.Context, dst []byte) (int, error) {
+ return CopyInVec(ctx, s.IO, s.Addrs, dst, s.Opts)
+}
+
+// ZeroOut invokes ZeroOutVec over s.Addrs.
+//
+// As with ZeroOutVec, if s.NumBytes() < toZero, the write will be truncated
+// to s.NumBytes(), and a nil error will be returned.
+//
+// Preconditions: As for ZeroOutVec.
+func (s IOSequence) ZeroOut(ctx context.Context, toZero int64) (int64, error) {
+ return ZeroOutVec(ctx, s.IO, s.Addrs, toZero, s.Opts)
+}
+
+// CopyOutFrom invokes s.CopyOutFrom over s.Addrs.
+//
+// Preconditions: As for IO.CopyOutFrom.
+func (s IOSequence) CopyOutFrom(ctx context.Context, src safemem.Reader) (int64, error) {
+ return s.IO.CopyOutFrom(ctx, s.Addrs, src, s.Opts)
+}
+
+// CopyInTo invokes s.CopyInTo over s.Addrs.
+//
+// Preconditions: As for IO.CopyInTo.
+func (s IOSequence) CopyInTo(ctx context.Context, dst safemem.Writer) (int64, error) {
+ return s.IO.CopyInTo(ctx, s.Addrs, dst, s.Opts)
+}
+
+// Reader returns an io.Reader that reads from s. Reads beyond the end of s
+// return io.EOF. The preconditions that apply to s.CopyIn also apply to the
+// returned io.Reader.Read.
+func (s IOSequence) Reader(ctx context.Context) io.Reader {
+ return &ioSequenceReadWriter{ctx, s}
+}
+
+// Writer returns an io.Writer that writes to s. Writes beyond the end of s
+// return ErrEndOfIOSequence. The preconditions that apply to s.CopyOut also
+// apply to the returned io.Writer.Write.
+func (s IOSequence) Writer(ctx context.Context) io.Writer {
+ return &ioSequenceReadWriter{ctx, s}
+}
+
+// ErrEndOfIOSequence is returned by IOSequence.Writer().Write() when
+// attempting to write beyond the end of the IOSequence.
+var ErrEndOfIOSequence = errors.New("write beyond end of IOSequence")
+
+type ioSequenceReadWriter struct {
+ ctx context.Context
+ s IOSequence
+}
+
+// Read implements io.Reader.Read.
+func (rw *ioSequenceReadWriter) Read(dst []byte) (int, error) {
+ n, err := rw.s.CopyIn(rw.ctx, dst)
+ rw.s = rw.s.DropFirst(n)
+ if err == nil && rw.s.NumBytes() == 0 {
+ err = io.EOF
+ }
+ return n, err
+}
+
+// Write implements io.Writer.Write.
+func (rw *ioSequenceReadWriter) Write(src []byte) (int, error) {
+ n, err := rw.s.CopyOut(rw.ctx, src)
+ rw.s = rw.s.DropFirst(n)
+ if err == nil && n < len(src) {
+ err = ErrEndOfIOSequence
+ }
+ return n, err
+}
diff --git a/pkg/sentry/usermem/usermem_arm64.go b/pkg/sentry/usermem/usermem_arm64.go
new file mode 100644
index 000000000..fdfc30a66
--- /dev/null
+++ b/pkg/sentry/usermem/usermem_arm64.go
@@ -0,0 +1,53 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package usermem
+
+import (
+ "encoding/binary"
+ "syscall"
+)
+
+const (
+ // PageSize is the system page size.
+ // arm64 support 4K/16K/64K page size,
+ // which can be get by syscall.Getpagesize().
+ // Currently, only 4K page size is supported.
+ PageSize = 1 << PageShift
+
+ // HugePageSize is the system huge page size.
+ HugePageSize = 1 << HugePageShift
+
+ // PageShift is the binary log of the system page size.
+ PageShift = 12
+
+ // HugePageShift is the binary log of the system huge page size.
+ // Should be calculated by "PageShift + (PageShift - 3)"
+ // when multiple page size support is ready.
+ HugePageShift = 21
+)
+
+var (
+ // ByteOrder is the native byte order (little endian).
+ ByteOrder = binary.LittleEndian
+)
+
+func init() {
+ // Make sure the page size is 4K on arm64 platform.
+ if size := syscall.Getpagesize(); size != PageSize {
+ panic("Only 4K page size is supported on arm64!")
+ }
+}
diff --git a/pkg/sentry/usermem/usermem_state_autogen.go b/pkg/sentry/usermem/usermem_state_autogen.go
new file mode 100755
index 000000000..bc728eab3
--- /dev/null
+++ b/pkg/sentry/usermem/usermem_state_autogen.go
@@ -0,0 +1,49 @@
+// automatically generated by stateify.
+
+package usermem
+
+import (
+ "gvisor.googlesource.com/gvisor/pkg/state"
+)
+
+func (x *AccessType) beforeSave() {}
+func (x *AccessType) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Read", &x.Read)
+ m.Save("Write", &x.Write)
+ m.Save("Execute", &x.Execute)
+}
+
+func (x *AccessType) afterLoad() {}
+func (x *AccessType) load(m state.Map) {
+ m.Load("Read", &x.Read)
+ m.Load("Write", &x.Write)
+ m.Load("Execute", &x.Execute)
+}
+
+func (x *Addr) save(m state.Map) {
+ m.SaveValue("", (uintptr)(*x))
+}
+
+func (x *Addr) load(m state.Map) {
+ m.LoadValue("", new(uintptr), func(y interface{}) { *x = (Addr)(y.(uintptr)) })
+}
+
+func (x *AddrRange) beforeSave() {}
+func (x *AddrRange) save(m state.Map) {
+ x.beforeSave()
+ m.Save("Start", &x.Start)
+ m.Save("End", &x.End)
+}
+
+func (x *AddrRange) afterLoad() {}
+func (x *AddrRange) load(m state.Map) {
+ m.Load("Start", &x.Start)
+ m.Load("End", &x.End)
+}
+
+func init() {
+ state.Register("usermem.AccessType", (*AccessType)(nil), state.Fns{Save: (*AccessType).save, Load: (*AccessType).load})
+ state.Register("usermem.Addr", (*Addr)(nil), state.Fns{Save: (*Addr).save, Load: (*Addr).load})
+ state.Register("usermem.AddrRange", (*AddrRange)(nil), state.Fns{Save: (*AddrRange).save, Load: (*AddrRange).load})
+}
diff --git a/pkg/sentry/usermem/usermem_unsafe.go b/pkg/sentry/usermem/usermem_unsafe.go
new file mode 100644
index 000000000..876783e78
--- /dev/null
+++ b/pkg/sentry/usermem/usermem_unsafe.go
@@ -0,0 +1,27 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package usermem
+
+import (
+ "unsafe"
+)
+
+// stringFromImmutableBytes is equivalent to string(bs), except that it never
+// copies even if escape analysis can't prove that bs does not escape. This is
+// only valid if bs is never mutated after stringFromImmutableBytes returns.
+func stringFromImmutableBytes(bs []byte) string {
+ // Compare strings.Builder.String().
+ return *(*string)(unsafe.Pointer(&bs))
+}
diff --git a/pkg/sentry/usermem/usermem_x86.go b/pkg/sentry/usermem/usermem_x86.go
new file mode 100644
index 000000000..8059b72d2
--- /dev/null
+++ b/pkg/sentry/usermem/usermem_x86.go
@@ -0,0 +1,38 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64 i386
+
+package usermem
+
+import "encoding/binary"
+
+const (
+ // PageSize is the system page size.
+ PageSize = 1 << PageShift
+
+ // HugePageSize is the system huge page size.
+ HugePageSize = 1 << HugePageShift
+
+ // PageShift is the binary log of the system page size.
+ PageShift = 12
+
+ // HugePageShift is the binary log of the system huge page size.
+ HugePageShift = 21
+)
+
+var (
+ // ByteOrder is the native byte order (little endian).
+ ByteOrder = binary.LittleEndian
+)