summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/loader
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/loader')
-rw-r--r--pkg/sentry/loader/BUILD46
-rw-r--r--pkg/sentry/loader/elf.go700
-rw-r--r--pkg/sentry/loader/interpreter.go108
-rw-r--r--pkg/sentry/loader/loader.go315
-rw-r--r--pkg/sentry/loader/vdso.go382
-rw-r--r--pkg/sentry/loader/vdso_state.go48
6 files changed, 1599 insertions, 0 deletions
diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD
new file mode 100644
index 000000000..34bdb0b69
--- /dev/null
+++ b/pkg/sentry/loader/BUILD
@@ -0,0 +1,46 @@
+load("//tools:defs.bzl", "go_embed_data", "go_library")
+
+package(licenses = ["notice"])
+
+go_embed_data(
+ name = "vdso_bin",
+ src = "//vdso:vdso.so",
+ package = "loader",
+ var = "vdsoBin",
+)
+
+go_library(
+ name = "loader",
+ srcs = [
+ "elf.go",
+ "interpreter.go",
+ "loader.go",
+ "vdso.go",
+ "vdso_state.go",
+ ":vdso_bin",
+ ],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi",
+ "//pkg/abi/linux",
+ "//pkg/binary",
+ "//pkg/context",
+ "//pkg/cpuid",
+ "//pkg/log",
+ "//pkg/rand",
+ "//pkg/safemem",
+ "//pkg/sentry/arch",
+ "//pkg/sentry/fsbridge",
+ "//pkg/sentry/kernel/auth",
+ "//pkg/sentry/limits",
+ "//pkg/sentry/memmap",
+ "//pkg/sentry/mm",
+ "//pkg/sentry/pgalloc",
+ "//pkg/sentry/uniqueid",
+ "//pkg/sentry/usage",
+ "//pkg/sentry/vfs",
+ "//pkg/syserr",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ ],
+)
diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go
new file mode 100644
index 000000000..ddeaff3db
--- /dev/null
+++ b/pkg/sentry/loader/elf.go
@@ -0,0 +1,700 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package loader
+
+import (
+ "bytes"
+ "debug/elf"
+ "fmt"
+ "io"
+
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsbridge"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const (
+ // elfMagic identifies an ELF file.
+ elfMagic = "\x7fELF"
+
+ // maxTotalPhdrSize is the maximum combined size of all program
+ // headers. Linux limits this to one page.
+ maxTotalPhdrSize = usermem.PageSize
+)
+
+var (
+ // header64Size is the size of elf.Header64.
+ header64Size = int(binary.Size(elf.Header64{}))
+
+ // Prog64Size is the size of elf.Prog64.
+ prog64Size = int(binary.Size(elf.Prog64{}))
+)
+
+func progFlagsAsPerms(f elf.ProgFlag) usermem.AccessType {
+ var p usermem.AccessType
+ if f&elf.PF_R == elf.PF_R {
+ p.Read = true
+ }
+ if f&elf.PF_W == elf.PF_W {
+ p.Write = true
+ }
+ if f&elf.PF_X == elf.PF_X {
+ p.Execute = true
+ }
+ return p
+}
+
+// elfInfo contains the metadata needed to load an ELF binary.
+type elfInfo struct {
+ // os is the target OS of the ELF.
+ os abi.OS
+
+ // arch is the target architecture of the ELF.
+ arch arch.Arch
+
+ // entry is the program entry point.
+ entry usermem.Addr
+
+ // phdrs are the program headers.
+ phdrs []elf.ProgHeader
+
+ // phdrSize is the size of a single program header in the ELF.
+ phdrSize int
+
+ // phdrOff is the offset of the program headers in the file.
+ phdrOff uint64
+
+ // sharedObject is true if the ELF represents a shared object.
+ sharedObject bool
+}
+
+// fullReader interface extracts the ReadFull method from fsbridge.File so that
+// client code does not need to define an entire fsbridge.File when only read
+// functionality is needed.
+//
+// TODO(gvisor.dev/issue/1035): Once VFS2 ships, rewrite this to wrap
+// vfs.FileDescription's PRead/Read instead.
+type fullReader interface {
+ // ReadFull is the same as fsbridge.File.ReadFull.
+ ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error)
+}
+
+// parseHeader parse the ELF header, verifying that this is a supported ELF
+// file and returning the ELF program headers.
+//
+// This is similar to elf.NewFile, except that it is more strict about what it
+// accepts from the ELF, and it doesn't parse unnecessary parts of the file.
+func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) {
+ // Check ident first; it will tell us the endianness of the rest of the
+ // structs.
+ var ident [elf.EI_NIDENT]byte
+ _, err := f.ReadFull(ctx, usermem.BytesIOSequence(ident[:]), 0)
+ if err != nil {
+ log.Infof("Error reading ELF ident: %v", err)
+ // The entire ident array always exists.
+ if err == io.EOF || err == io.ErrUnexpectedEOF {
+ err = syserror.ENOEXEC
+ }
+ return elfInfo{}, err
+ }
+
+ // Only some callers pre-check the ELF magic.
+ if !bytes.Equal(ident[:len(elfMagic)], []byte(elfMagic)) {
+ log.Infof("File is not an ELF")
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ // We only support 64-bit, little endian binaries
+ if class := elf.Class(ident[elf.EI_CLASS]); class != elf.ELFCLASS64 {
+ log.Infof("Unsupported ELF class: %v", class)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ if endian := elf.Data(ident[elf.EI_DATA]); endian != elf.ELFDATA2LSB {
+ log.Infof("Unsupported ELF endianness: %v", endian)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ byteOrder := binary.LittleEndian
+
+ if version := elf.Version(ident[elf.EI_VERSION]); version != elf.EV_CURRENT {
+ log.Infof("Unsupported ELF version: %v", version)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ // EI_OSABI is ignored by Linux, which is the only OS supported.
+ os := abi.Linux
+
+ var hdr elf.Header64
+ hdrBuf := make([]byte, header64Size)
+ _, err = f.ReadFull(ctx, usermem.BytesIOSequence(hdrBuf), 0)
+ if err != nil {
+ log.Infof("Error reading ELF header: %v", err)
+ // The entire header always exists.
+ if err == io.EOF || err == io.ErrUnexpectedEOF {
+ err = syserror.ENOEXEC
+ }
+ return elfInfo{}, err
+ }
+ binary.Unmarshal(hdrBuf, byteOrder, &hdr)
+
+ // We support amd64 and arm64.
+ var a arch.Arch
+ switch machine := elf.Machine(hdr.Machine); machine {
+ case elf.EM_X86_64:
+ a = arch.AMD64
+ case elf.EM_AARCH64:
+ a = arch.ARM64
+ default:
+ log.Infof("Unsupported ELF machine %d", machine)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ var sharedObject bool
+ elfType := elf.Type(hdr.Type)
+ switch elfType {
+ case elf.ET_EXEC:
+ sharedObject = false
+ case elf.ET_DYN:
+ sharedObject = true
+ default:
+ log.Infof("Unsupported ELF type %v", elfType)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ if int(hdr.Phentsize) != prog64Size {
+ log.Infof("Unsupported phdr size %d", hdr.Phentsize)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ totalPhdrSize := prog64Size * int(hdr.Phnum)
+ if totalPhdrSize < prog64Size {
+ log.Warningf("No phdrs or total phdr size overflows: prog64Size: %d phnum: %d", prog64Size, int(hdr.Phnum))
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ if totalPhdrSize > maxTotalPhdrSize {
+ log.Infof("Too many phdrs (%d): total size %d > %d", hdr.Phnum, totalPhdrSize, maxTotalPhdrSize)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ phdrBuf := make([]byte, totalPhdrSize)
+ _, err = f.ReadFull(ctx, usermem.BytesIOSequence(phdrBuf), int64(hdr.Phoff))
+ if err != nil {
+ log.Infof("Error reading ELF phdrs: %v", err)
+ // If phdrs were specified, they should all exist.
+ if err == io.EOF || err == io.ErrUnexpectedEOF {
+ err = syserror.ENOEXEC
+ }
+ return elfInfo{}, err
+ }
+
+ phdrs := make([]elf.ProgHeader, hdr.Phnum)
+ for i := range phdrs {
+ var prog64 elf.Prog64
+ binary.Unmarshal(phdrBuf[:prog64Size], byteOrder, &prog64)
+ phdrBuf = phdrBuf[prog64Size:]
+ phdrs[i] = elf.ProgHeader{
+ Type: elf.ProgType(prog64.Type),
+ Flags: elf.ProgFlag(prog64.Flags),
+ Off: prog64.Off,
+ Vaddr: prog64.Vaddr,
+ Paddr: prog64.Paddr,
+ Filesz: prog64.Filesz,
+ Memsz: prog64.Memsz,
+ Align: prog64.Align,
+ }
+ }
+
+ return elfInfo{
+ os: os,
+ arch: a,
+ entry: usermem.Addr(hdr.Entry),
+ phdrs: phdrs,
+ phdrOff: hdr.Phoff,
+ phdrSize: prog64Size,
+ sharedObject: sharedObject,
+ }, nil
+}
+
+// mapSegment maps a phdr into the Task. offset is the offset to apply to
+// phdr.Vaddr.
+func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset usermem.Addr) error {
+ // We must make a page-aligned mapping.
+ adjust := usermem.Addr(phdr.Vaddr).PageOffset()
+
+ addr, ok := offset.AddLength(phdr.Vaddr)
+ if !ok {
+ // If offset != 0 we should have ensured this would fit.
+ ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset)
+ return syserror.ENOEXEC
+ }
+ addr -= usermem.Addr(adjust)
+
+ fileSize := phdr.Filesz + adjust
+ if fileSize < phdr.Filesz {
+ ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust)
+ return syserror.ENOEXEC
+ }
+ ms, ok := usermem.Addr(fileSize).RoundUp()
+ if !ok {
+ ctx.Infof("fileSize %#x too large", fileSize)
+ return syserror.ENOEXEC
+ }
+ mapSize := uint64(ms)
+
+ if mapSize > 0 {
+ // This must result in a page-aligned offset. i.e., the original
+ // phdr.Off must have the same alignment as phdr.Vaddr. If that is not
+ // true, MMap will reject the mapping.
+ fileOffset := phdr.Off - adjust
+
+ prot := progFlagsAsPerms(phdr.Flags)
+ mopts := memmap.MMapOpts{
+ Length: mapSize,
+ Offset: fileOffset,
+ Addr: addr,
+ Fixed: true,
+ // Linux will happily allow conflicting segments to map over
+ // one another.
+ Unmap: true,
+ Private: true,
+ Perms: prot,
+ MaxPerms: usermem.AnyAccess,
+ }
+ defer func() {
+ if mopts.MappingIdentity != nil {
+ mopts.MappingIdentity.DecRef()
+ }
+ }()
+ if err := f.ConfigureMMap(ctx, &mopts); err != nil {
+ ctx.Infof("File is not memory-mappable: %v", err)
+ return err
+ }
+ if _, err := m.MMap(ctx, mopts); err != nil {
+ ctx.Infof("Error mapping PT_LOAD segment %+v at %#x: %v", phdr, addr, err)
+ return err
+ }
+
+ // We need to clear the end of the last page that exceeds fileSize so
+ // we don't map part of the file beyond fileSize.
+ //
+ // Note that Linux *does not* clear the portion of the first page
+ // before phdr.Off.
+ if mapSize > fileSize {
+ zeroAddr, ok := addr.AddLength(fileSize)
+ if !ok {
+ panic(fmt.Sprintf("successfully mmaped address overflows? %#x + %#x", addr, fileSize))
+ }
+ zeroSize := int64(mapSize - fileSize)
+ if zeroSize < 0 {
+ panic(fmt.Sprintf("zeroSize too big? %#x", uint64(zeroSize)))
+ }
+ if _, err := m.ZeroOut(ctx, zeroAddr, zeroSize, usermem.IOOpts{IgnorePermissions: true}); err != nil {
+ ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+usermem.Addr(zeroSize), err)
+ return err
+ }
+ }
+ }
+
+ memSize := phdr.Memsz + adjust
+ if memSize < phdr.Memsz {
+ ctx.Infof("Computed segment mem size overflows: %#x + %#x", phdr.Memsz, adjust)
+ return syserror.ENOEXEC
+ }
+
+ // Allocate more anonymous pages if necessary.
+ if mapSize < memSize {
+ anonAddr, ok := addr.AddLength(mapSize)
+ if !ok {
+ panic(fmt.Sprintf("anonymous memory doesn't fit in pre-sized range? %#x + %#x", addr, mapSize))
+ }
+ anonSize, ok := usermem.Addr(memSize - mapSize).RoundUp()
+ if !ok {
+ ctx.Infof("extra anon pages too large: %#x", memSize-mapSize)
+ return syserror.ENOEXEC
+ }
+
+ // N.B. Linux uses vm_brk_flags to map these pages, which only
+ // honors the X bit, always mapping at least RW. ignoring These
+ // pages are not included in the final brk region.
+ prot := usermem.ReadWrite
+ if phdr.Flags&elf.PF_X == elf.PF_X {
+ prot.Execute = true
+ }
+
+ if _, err := m.MMap(ctx, memmap.MMapOpts{
+ Length: uint64(anonSize),
+ Addr: anonAddr,
+ // Fixed without Unmap will fail the mmap if something is
+ // already at addr.
+ Fixed: true,
+ Private: true,
+ Perms: prot,
+ MaxPerms: usermem.AnyAccess,
+ }); err != nil {
+ ctx.Infof("Error mapping PT_LOAD segment %v anonymous memory: %v", phdr, err)
+ return err
+ }
+ }
+
+ return nil
+}
+
+// loadedELF describes an ELF that has been successfully loaded.
+type loadedELF struct {
+ // os is the target OS of the ELF.
+ os abi.OS
+
+ // arch is the target architecture of the ELF.
+ arch arch.Arch
+
+ // entry is the entry point of the ELF.
+ entry usermem.Addr
+
+ // start is the end of the ELF.
+ start usermem.Addr
+
+ // end is the end of the ELF.
+ end usermem.Addr
+
+ // interpter is the path to the ELF interpreter.
+ interpreter string
+
+ // phdrAddr is the address of the ELF program headers.
+ phdrAddr usermem.Addr
+
+ // phdrSize is the size of a single program header in the ELF.
+ phdrSize int
+
+ // phdrNum is the number of program headers.
+ phdrNum int
+
+ // auxv contains a subset of ELF-specific auxiliary vector entries:
+ // * AT_PHDR
+ // * AT_PHENT
+ // * AT_PHNUM
+ // * AT_BASE
+ // * AT_ENTRY
+ auxv arch.Auxv
+}
+
+// loadParsedELF loads f into mm.
+//
+// info is the parsed elfInfo from the header.
+//
+// It does not load the ELF interpreter, or return any auxv entries.
+//
+// Preconditions:
+// * f is an ELF file
+func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) {
+ first := true
+ var start, end usermem.Addr
+ var interpreter string
+ for _, phdr := range info.phdrs {
+ switch phdr.Type {
+ case elf.PT_LOAD:
+ vaddr := usermem.Addr(phdr.Vaddr)
+ if first {
+ first = false
+ start = vaddr
+ }
+ if vaddr < end {
+ // NOTE(b/37474556): Linux allows out-of-order
+ // segments, in violation of the spec.
+ ctx.Infof("PT_LOAD headers out-of-order. %#x < %#x", vaddr, end)
+ return loadedELF{}, syserror.ENOEXEC
+ }
+ var ok bool
+ end, ok = vaddr.AddLength(phdr.Memsz)
+ if !ok {
+ ctx.Infof("PT_LOAD header size overflows. %#x + %#x", vaddr, phdr.Memsz)
+ return loadedELF{}, syserror.ENOEXEC
+ }
+
+ case elf.PT_INTERP:
+ if phdr.Filesz < 2 {
+ ctx.Infof("PT_INTERP path too small: %v", phdr.Filesz)
+ return loadedELF{}, syserror.ENOEXEC
+ }
+ if phdr.Filesz > linux.PATH_MAX {
+ ctx.Infof("PT_INTERP path too big: %v", phdr.Filesz)
+ return loadedELF{}, syserror.ENOEXEC
+ }
+
+ path := make([]byte, phdr.Filesz)
+ _, err := f.ReadFull(ctx, usermem.BytesIOSequence(path), int64(phdr.Off))
+ if err != nil {
+ // If an interpreter was specified, it should exist.
+ ctx.Infof("Error reading PT_INTERP path: %v", err)
+ return loadedELF{}, syserror.ENOEXEC
+ }
+
+ if path[len(path)-1] != 0 {
+ ctx.Infof("PT_INTERP path not NUL-terminated: %v", path)
+ return loadedELF{}, syserror.ENOEXEC
+ }
+
+ // Strip NUL-terminator and everything beyond from
+ // string. Note that there may be a NUL-terminator
+ // before len(path)-1.
+ interpreter = string(path[:bytes.IndexByte(path, '\x00')])
+ if interpreter == "" {
+ // Linux actually attempts to open_exec("\0").
+ // open_exec -> do_open_execat fails to check
+ // that name != '\0' before calling
+ // do_filp_open, which thus opens the working
+ // directory. do_open_execat returns EACCES
+ // because the directory is not a regular file.
+ //
+ // We bypass that nonsense and simply
+ // short-circuit with EACCES. Those this does
+ // mean that there may be some edge cases where
+ // the open path would return a different
+ // error.
+ ctx.Infof("PT_INTERP path is empty: %v", path)
+ return loadedELF{}, syserror.EACCES
+ }
+ }
+ }
+
+ // Shared objects don't have fixed load addresses. We need to pick a
+ // base address big enough to fit all segments, so we first create a
+ // mapping for the total size just to find a region that is big enough.
+ //
+ // It is safe to unmap it immediately without racing with another mapping
+ // because we are the only one in control of the MemoryManager.
+ //
+ // Note that the vaddr of the first PT_LOAD segment is ignored when
+ // choosing the load address (even if it is non-zero). The vaddr does
+ // become an offset from that load address.
+ var offset usermem.Addr
+ if info.sharedObject {
+ totalSize := end - start
+ totalSize, ok := totalSize.RoundUp()
+ if !ok {
+ ctx.Infof("ELF PT_LOAD segments too big")
+ return loadedELF{}, syserror.ENOEXEC
+ }
+
+ var err error
+ offset, err = m.MMap(ctx, memmap.MMapOpts{
+ Length: uint64(totalSize),
+ Addr: sharedLoadOffset,
+ Private: true,
+ })
+ if err != nil {
+ ctx.Infof("Error allocating address space for shared object: %v", err)
+ return loadedELF{}, err
+ }
+ if err := m.MUnmap(ctx, offset, uint64(totalSize)); err != nil {
+ panic(fmt.Sprintf("Failed to unmap base address: %v", err))
+ }
+
+ start, ok = start.AddLength(uint64(offset))
+ if !ok {
+ panic(fmt.Sprintf("Start %#x + offset %#x overflows?", start, offset))
+ }
+
+ end, ok = end.AddLength(uint64(offset))
+ if !ok {
+ panic(fmt.Sprintf("End %#x + offset %#x overflows?", end, offset))
+ }
+
+ info.entry, ok = info.entry.AddLength(uint64(offset))
+ if !ok {
+ ctx.Infof("Entrypoint %#x + offset %#x overflows? Is the entrypoint within a segment?", info.entry, offset)
+ return loadedELF{}, err
+ }
+ }
+
+ // Map PT_LOAD segments.
+ for _, phdr := range info.phdrs {
+ switch phdr.Type {
+ case elf.PT_LOAD:
+ if phdr.Memsz == 0 {
+ // No need to load segments with size 0, but
+ // they exist in some binaries.
+ continue
+ }
+
+ if err := mapSegment(ctx, m, f, &phdr, offset); err != nil {
+ ctx.Infof("Failed to map PT_LOAD segment: %+v", phdr)
+ return loadedELF{}, err
+ }
+ }
+ }
+
+ // This assumes that the first segment contains the ELF headers. This
+ // may not be true in a malformed ELF, but Linux makes the same
+ // assumption.
+ phdrAddr, ok := start.AddLength(info.phdrOff)
+ if !ok {
+ ctx.Warningf("ELF start address %#x + phdr offset %#x overflows", start, info.phdrOff)
+ phdrAddr = 0
+ }
+
+ return loadedELF{
+ os: info.os,
+ arch: info.arch,
+ entry: info.entry,
+ start: start,
+ end: end,
+ interpreter: interpreter,
+ phdrAddr: phdrAddr,
+ phdrSize: info.phdrSize,
+ phdrNum: len(info.phdrs),
+ }, nil
+}
+
+// loadInitialELF loads f into mm.
+//
+// It creates an arch.Context for the ELF and prepares the mm for this arch.
+//
+// It does not load the ELF interpreter, or return any auxv entries.
+//
+// Preconditions:
+// * f is an ELF file
+// * f is the first ELF loaded into m
+func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureSet, f fsbridge.File) (loadedELF, arch.Context, error) {
+ info, err := parseHeader(ctx, f)
+ if err != nil {
+ ctx.Infof("Failed to parse initial ELF: %v", err)
+ return loadedELF{}, nil, err
+ }
+
+ // Check Image Compatibility.
+ if arch.Host != info.arch {
+ ctx.Warningf("Found mismatch for platform %s with ELF type %s", arch.Host.String(), info.arch.String())
+ return loadedELF{}, nil, syserror.ENOEXEC
+ }
+
+ // Create the arch.Context now so we can prepare the mmap layout before
+ // mapping anything.
+ ac := arch.New(info.arch, fs)
+
+ l, err := m.SetMmapLayout(ac, limits.FromContext(ctx))
+ if err != nil {
+ ctx.Warningf("Failed to set mmap layout: %v", err)
+ return loadedELF{}, nil, err
+ }
+
+ // PIELoadAddress tries to move the ELF out of the way of the default
+ // mmap base to ensure that the initial brk has sufficient space to
+ // grow.
+ le, err := loadParsedELF(ctx, m, f, info, ac.PIELoadAddress(l))
+ return le, ac, err
+}
+
+// loadInterpreterELF loads f into mm.
+//
+// The interpreter must be for the same OS/Arch as the initial ELF.
+//
+// It does not return any auxv entries.
+//
+// Preconditions:
+// * f is an ELF file
+func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, initial loadedELF) (loadedELF, error) {
+ info, err := parseHeader(ctx, f)
+ if err != nil {
+ if err == syserror.ENOEXEC {
+ // Bad interpreter.
+ err = syserror.ELIBBAD
+ }
+ return loadedELF{}, err
+ }
+
+ if info.os != initial.os {
+ ctx.Infof("Initial ELF OS %v and interpreter ELF OS %v differ", initial.os, info.os)
+ return loadedELF{}, syserror.ELIBBAD
+ }
+ if info.arch != initial.arch {
+ ctx.Infof("Initial ELF arch %v and interpreter ELF arch %v differ", initial.arch, info.arch)
+ return loadedELF{}, syserror.ELIBBAD
+ }
+
+ // The interpreter is not given a load offset, as its location does not
+ // affect brk.
+ return loadParsedELF(ctx, m, f, info, 0)
+}
+
+// loadELF loads args.File into the Task address space.
+//
+// If loadELF returns ErrSwitchFile it should be called again with the returned
+// path and argv.
+//
+// Preconditions:
+// * args.File is an ELF file
+func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error) {
+ bin, ac, err := loadInitialELF(ctx, args.MemoryManager, args.Features, args.File)
+ if err != nil {
+ ctx.Infof("Error loading binary: %v", err)
+ return loadedELF{}, nil, err
+ }
+
+ var interp loadedELF
+ if bin.interpreter != "" {
+ // Even if we do not allow the final link of the script to be
+ // resolved, the interpreter should still be resolved if it is
+ // a symlink.
+ args.ResolveFinal = true
+ // Refresh the traversal limit.
+ *args.RemainingTraversals = linux.MaxSymlinkTraversals
+ args.Filename = bin.interpreter
+ intFile, err := openPath(ctx, args)
+ if err != nil {
+ ctx.Infof("Error opening interpreter %s: %v", bin.interpreter, err)
+ return loadedELF{}, nil, err
+ }
+ defer intFile.DecRef()
+
+ interp, err = loadInterpreterELF(ctx, args.MemoryManager, intFile, bin)
+ if err != nil {
+ ctx.Infof("Error loading interpreter: %v", err)
+ return loadedELF{}, nil, err
+ }
+
+ if interp.interpreter != "" {
+ // No recursive interpreters!
+ ctx.Infof("Interpreter requires an interpreter")
+ return loadedELF{}, nil, syserror.ENOEXEC
+ }
+ }
+
+ // ELF-specific auxv entries.
+ bin.auxv = arch.Auxv{
+ arch.AuxEntry{linux.AT_PHDR, bin.phdrAddr},
+ arch.AuxEntry{linux.AT_PHENT, usermem.Addr(bin.phdrSize)},
+ arch.AuxEntry{linux.AT_PHNUM, usermem.Addr(bin.phdrNum)},
+ arch.AuxEntry{linux.AT_ENTRY, bin.entry},
+ }
+ if bin.interpreter != "" {
+ bin.auxv = append(bin.auxv, arch.AuxEntry{linux.AT_BASE, interp.start})
+
+ // Start in the interpreter.
+ // N.B. AT_ENTRY above contains the *original* entry point.
+ bin.entry = interp.entry
+ } else {
+ // Always add AT_BASE even if there is no interpreter.
+ bin.auxv = append(bin.auxv, arch.AuxEntry{linux.AT_BASE, 0})
+ }
+
+ return bin, ac, nil
+}
diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go
new file mode 100644
index 000000000..3886b4d33
--- /dev/null
+++ b/pkg/sentry/loader/interpreter.go
@@ -0,0 +1,108 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package loader
+
+import (
+ "bytes"
+ "io"
+
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsbridge"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const (
+ // interpreterScriptMagic identifies an interpreter script.
+ interpreterScriptMagic = "#!"
+
+ // interpMaxLineLength is the maximum length for the first line of an
+ // interpreter script.
+ //
+ // From execve(2): "A maximum line length of 127 characters is allowed
+ // for the first line in a #! executable shell script."
+ interpMaxLineLength = 127
+)
+
+// parseInterpreterScript returns the interpreter path and argv.
+func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.File, argv []string) (newpath string, newargv []string, err error) {
+ line := make([]byte, interpMaxLineLength)
+ n, err := f.ReadFull(ctx, usermem.BytesIOSequence(line), 0)
+ // Short read is OK.
+ if err != nil && err != io.ErrUnexpectedEOF {
+ if err == io.EOF {
+ err = syserror.ENOEXEC
+ }
+ return "", []string{}, err
+ }
+ line = line[:n]
+
+ if !bytes.Equal(line[:2], []byte(interpreterScriptMagic)) {
+ return "", []string{}, syserror.ENOEXEC
+ }
+ // Ignore #!.
+ line = line[2:]
+
+ // Ignore everything after newline.
+ // Linux silently truncates the remainder of the line if it exceeds
+ // interpMaxLineLength.
+ i := bytes.IndexByte(line, '\n')
+ if i > 0 {
+ line = line[:i]
+ }
+
+ // Skip any whitespace before the interpeter.
+ line = bytes.TrimLeft(line, " \t")
+
+ // Linux only looks for spaces or tabs delimiting the interpreter and
+ // arg.
+ //
+ // execve(2): "On Linux, the entire string following the interpreter
+ // name is passed as a single argument to the interpreter, and this
+ // string can include white space."
+ interp := line
+ var arg []byte
+ i = bytes.IndexAny(line, " \t")
+ if i >= 0 {
+ interp = line[:i]
+ arg = bytes.TrimLeft(line[i:], " \t")
+ }
+
+ if string(interp) == "" {
+ ctx.Infof("Interpreter script contains no interpreter: %v", line)
+ return "", []string{}, syserror.ENOEXEC
+ }
+
+ // Build the new argument list:
+ //
+ // 1. The interpreter.
+ newargv = append(newargv, string(interp))
+
+ // 2. The optional interpreter argument.
+ if len(arg) > 0 {
+ newargv = append(newargv, string(arg))
+ }
+
+ // 3. The original arguments. The original argv[0] is replaced with the
+ // full script filename.
+ if len(argv) > 0 {
+ argv[0] = filename
+ } else {
+ argv = []string{filename}
+ }
+ newargv = append(newargv, argv...)
+
+ return string(interp), newargv, nil
+}
diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go
new file mode 100644
index 000000000..986c7fb4d
--- /dev/null
+++ b/pkg/sentry/loader/loader.go
@@ -0,0 +1,315 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package loader loads an executable file into a MemoryManager.
+package loader
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "path"
+
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/cpuid"
+ "gvisor.dev/gvisor/pkg/rand"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsbridge"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserr"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// LoadArgs holds specifications for an executable file to be loaded.
+type LoadArgs struct {
+ // MemoryManager is the memory manager to load the executable into.
+ MemoryManager *mm.MemoryManager
+
+ // RemainingTraversals is the maximum number of symlinks to follow to
+ // resolve Filename. This counter is passed by reference to keep it
+ // updated throughout the call stack.
+ RemainingTraversals *uint
+
+ // ResolveFinal indicates whether the final link of Filename should be
+ // resolved, if it is a symlink.
+ ResolveFinal bool
+
+ // Filename is the path for the executable.
+ Filename string
+
+ // File is an open fs.File object of the executable. If File is not
+ // nil, then File will be loaded and Filename will be ignored.
+ //
+ // The caller is responsible for checking that the user can execute this file.
+ File fsbridge.File
+
+ // Opener is used to open the executable file when 'File' is nil.
+ Opener fsbridge.Lookup
+
+ // CloseOnExec indicates that the executable (or one of its parent
+ // directories) was opened with O_CLOEXEC. If the executable is an
+ // interpreter script, then cause an ENOENT error to occur, since the
+ // script would otherwise be inaccessible to the interpreter.
+ CloseOnExec bool
+
+ // Argv is the vector of arguments to pass to the executable.
+ Argv []string
+
+ // Envv is the vector of environment variables to pass to the
+ // executable.
+ Envv []string
+
+ // Features specifies the CPU feature set for the executable.
+ Features *cpuid.FeatureSet
+}
+
+// openPath opens args.Filename and checks that it is valid for loading.
+//
+// openPath returns an *fs.Dirent and *fs.File for args.Filename, which is not
+// installed in the Task FDTable. The caller takes ownership of both.
+//
+// args.Filename must be a readable, executable, regular file.
+func openPath(ctx context.Context, args LoadArgs) (fsbridge.File, error) {
+ if args.Filename == "" {
+ ctx.Infof("cannot open empty name")
+ return nil, syserror.ENOENT
+ }
+
+ // TODO(gvisor.dev/issue/160): Linux requires only execute permission,
+ // not read. However, our backing filesystems may prevent us from reading
+ // the file without read permission. Additionally, a task with a
+ // non-readable executable has additional constraints on access via
+ // ptrace and procfs.
+ opts := vfs.OpenOptions{
+ Flags: linux.O_RDONLY,
+ FileExec: true,
+ }
+ return args.Opener.OpenPath(ctx, args.Filename, opts, args.RemainingTraversals, args.ResolveFinal)
+}
+
+// checkIsRegularFile prevents us from trying to execute a directory, pipe, etc.
+func checkIsRegularFile(ctx context.Context, file fsbridge.File, filename string) error {
+ t, err := file.Type(ctx)
+ if err != nil {
+ return err
+ }
+ if t != linux.ModeRegular {
+ ctx.Infof("%q is not a regular file: %v", filename, t)
+ return syserror.EACCES
+ }
+ return nil
+}
+
+// allocStack allocates and maps a stack in to any available part of the address space.
+func allocStack(ctx context.Context, m *mm.MemoryManager, a arch.Context) (*arch.Stack, error) {
+ ar, err := m.MapStack(ctx)
+ if err != nil {
+ return nil, err
+ }
+ return &arch.Stack{a, m, ar.End}, nil
+}
+
+const (
+ // maxLoaderAttempts is the maximum number of attempts to try to load
+ // an interpreter scripts, to prevent loops. 6 (initial + 5 changes) is
+ // what the Linux kernel allows (fs/exec.c:search_binary_handler).
+ maxLoaderAttempts = 6
+)
+
+// loadExecutable loads an executable that is pointed to by args.File. The
+// caller is responsible for checking that the user can execute this file.
+// If nil, the path args.Filename is resolved and loaded (check that the user
+// can execute this file is done here in this case). If the executable is an
+// interpreter script rather than an ELF, the binary of the corresponding
+// interpreter will be loaded.
+//
+// It returns:
+// * loadedELF, description of the loaded binary
+// * arch.Context matching the binary arch
+// * fs.Dirent of the binary file
+// * Possibly updated args.Argv
+func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, fsbridge.File, []string, error) {
+ for i := 0; i < maxLoaderAttempts; i++ {
+ if args.File == nil {
+ var err error
+ args.File, err = openPath(ctx, args)
+ if err != nil {
+ ctx.Infof("Error opening %s: %v", args.Filename, err)
+ return loadedELF{}, nil, nil, nil, err
+ }
+ // Ensure file is release in case the code loops or errors out.
+ defer args.File.DecRef()
+ } else {
+ if err := checkIsRegularFile(ctx, args.File, args.Filename); err != nil {
+ return loadedELF{}, nil, nil, nil, err
+ }
+ }
+
+ // Check the header. Is this an ELF or interpreter script?
+ var hdr [4]uint8
+ // N.B. We assume that reading from a regular file cannot block.
+ _, err := args.File.ReadFull(ctx, usermem.BytesIOSequence(hdr[:]), 0)
+ // Allow unexpected EOF, as a valid executable could be only three bytes
+ // (e.g., #!a).
+ if err != nil && err != io.ErrUnexpectedEOF {
+ if err == io.EOF {
+ err = syserror.ENOEXEC
+ }
+ return loadedELF{}, nil, nil, nil, err
+ }
+
+ switch {
+ case bytes.Equal(hdr[:], []byte(elfMagic)):
+ loaded, ac, err := loadELF(ctx, args)
+ if err != nil {
+ ctx.Infof("Error loading ELF: %v", err)
+ return loadedELF{}, nil, nil, nil, err
+ }
+ // An ELF is always terminal. Hold on to file.
+ args.File.IncRef()
+ return loaded, ac, args.File, args.Argv, err
+
+ case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)):
+ if args.CloseOnExec {
+ return loadedELF{}, nil, nil, nil, syserror.ENOENT
+ }
+ args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv)
+ if err != nil {
+ ctx.Infof("Error loading interpreter script: %v", err)
+ return loadedELF{}, nil, nil, nil, err
+ }
+ // Refresh the traversal limit for the interpreter.
+ *args.RemainingTraversals = linux.MaxSymlinkTraversals
+
+ default:
+ ctx.Infof("Unknown magic: %v", hdr)
+ return loadedELF{}, nil, nil, nil, syserror.ENOEXEC
+ }
+ // Set to nil in case we loop on a Interpreter Script.
+ args.File = nil
+ }
+
+ return loadedELF{}, nil, nil, nil, syserror.ELOOP
+}
+
+// Load loads args.File into a MemoryManager. If args.File is nil, the path
+// args.Filename is resolved and loaded instead.
+//
+// If Load returns ErrSwitchFile it should be called again with the returned
+// path and argv.
+//
+// Preconditions:
+// * The Task MemoryManager is empty.
+// * Load is called on the Task goroutine.
+func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, *syserr.Error) {
+ // Load the executable itself.
+ loaded, ac, file, newArgv, err := loadExecutable(ctx, args)
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
+ }
+ defer file.DecRef()
+
+ // Load the VDSO.
+ vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded)
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("error loading VDSO: %v", err), syserr.FromError(err).ToLinux())
+ }
+
+ // Setup the heap. brk starts at the next page after the end of the
+ // executable. Userspace can assume that the remainer of the page after
+ // loaded.end is available for its use.
+ e, ok := loaded.end.RoundUp()
+ if !ok {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("brk overflows: %#x", loaded.end), linux.ENOEXEC)
+ }
+ args.MemoryManager.BrkSetup(ctx, e)
+
+ // Allocate our stack.
+ stack, err := allocStack(ctx, args.MemoryManager, ac)
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to allocate stack: %v", err), syserr.FromError(err).ToLinux())
+ }
+
+ // Push the original filename to the stack, for AT_EXECFN.
+ execfn, err := stack.Push(args.Filename)
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to push exec filename: %v", err), syserr.FromError(err).ToLinux())
+ }
+
+ // Push 16 random bytes on the stack which AT_RANDOM will point to.
+ var b [16]byte
+ if _, err := rand.Read(b[:]); err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to read random bytes: %v", err), syserr.FromError(err).ToLinux())
+ }
+ random, err := stack.Push(b)
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to push random bytes: %v", err), syserr.FromError(err).ToLinux())
+ }
+
+ c := auth.CredentialsFromContext(ctx)
+
+ // Add generic auxv entries.
+ auxv := append(loaded.auxv, arch.Auxv{
+ arch.AuxEntry{linux.AT_UID, usermem.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_EUID, usermem.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_GID, usermem.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())},
+ arch.AuxEntry{linux.AT_EGID, usermem.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())},
+ // The conditions that require AT_SECURE = 1 never arise. See
+ // kernel.Task.updateCredsForExecLocked.
+ arch.AuxEntry{linux.AT_SECURE, 0},
+ arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC},
+ arch.AuxEntry{linux.AT_EXECFN, execfn},
+ arch.AuxEntry{linux.AT_RANDOM, random},
+ arch.AuxEntry{linux.AT_PAGESZ, usermem.PageSize},
+ arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr},
+ }...)
+ auxv = append(auxv, extraAuxv...)
+
+ sl, err := stack.Load(newArgv, args.Envv, auxv)
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to load stack: %v", err), syserr.FromError(err).ToLinux())
+ }
+
+ m := args.MemoryManager
+ m.SetArgvStart(sl.ArgvStart)
+ m.SetArgvEnd(sl.ArgvEnd)
+ m.SetEnvvStart(sl.EnvvStart)
+ m.SetEnvvEnd(sl.EnvvEnd)
+ m.SetAuxv(auxv)
+ m.SetExecutable(file)
+
+ symbolValue, err := getSymbolValueFromVDSO("rt_sigreturn")
+ if err != nil {
+ return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to find rt_sigreturn in vdso: %v", err), syserr.FromError(err).ToLinux())
+ }
+
+ // Found rt_sigretrun.
+ addr := uint64(vdsoAddr) + symbolValue - vdsoPrelink
+ m.SetVDSOSigReturn(addr)
+
+ ac.SetIP(uintptr(loaded.entry))
+ ac.SetStack(uintptr(stack.Bottom))
+
+ name := path.Base(args.Filename)
+ if len(name) > linux.TASK_COMM_LEN-1 {
+ name = name[:linux.TASK_COMM_LEN-1]
+ }
+
+ return loaded.os, ac, name, nil
+}
diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go
new file mode 100644
index 000000000..05a294fe6
--- /dev/null
+++ b/pkg/sentry/loader/vdso.go
@@ -0,0 +1,382 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package loader
+
+import (
+ "bytes"
+ "debug/elf"
+ "fmt"
+ "io"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/abi"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/memmap"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/pgalloc"
+ "gvisor.dev/gvisor/pkg/sentry/uniqueid"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const vdsoPrelink = 0xffffffffff700000
+
+type fileContext struct {
+ context.Context
+}
+
+func (f *fileContext) Value(key interface{}) interface{} {
+ switch key {
+ case uniqueid.CtxGlobalUniqueID:
+ return uint64(0)
+ default:
+ return f.Context.Value(key)
+ }
+}
+
+type byteFullReader struct {
+ data []byte
+}
+
+func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ if offset >= int64(len(b.data)) {
+ return 0, io.EOF
+ }
+ n, err := dst.CopyOut(ctx, b.data[offset:])
+ return int64(n), err
+}
+
+// validateVDSO checks that the VDSO can be loaded by loadVDSO.
+//
+// VDSOs are special (see below). Since we are going to map the VDSO directly
+// rather than using a normal loading process, we require that the PT_LOAD
+// segments have the same layout in the ELF as they expect to have in memory.
+//
+// Namely, this means that we must verify:
+// * PT_LOAD file offsets are equivalent to the memory offset from the first
+// segment.
+// * No extra zeroed space (memsz) is required.
+// * PT_LOAD segments are in order.
+// * No two PT_LOAD segments occupy parts of the same page.
+// * PT_LOAD segments don't extend beyond the end of the file.
+//
+// ctx may be nil if f does not need it.
+func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) {
+ info, err := parseHeader(ctx, f)
+ if err != nil {
+ log.Infof("Unable to parse VDSO header: %v", err)
+ return elfInfo{}, err
+ }
+
+ var first *elf.ProgHeader
+ var prev *elf.ProgHeader
+ var prevEnd usermem.Addr
+ for i, phdr := range info.phdrs {
+ if phdr.Type != elf.PT_LOAD {
+ continue
+ }
+
+ if first == nil {
+ first = &info.phdrs[i]
+ if phdr.Off != 0 {
+ log.Warningf("First PT_LOAD segment has non-zero file offset")
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ }
+
+ memoryOffset := phdr.Vaddr - first.Vaddr
+ if memoryOffset != phdr.Off {
+ log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ // memsz larger than filesz means that extra zeroed space should be
+ // provided at the end of the segment. Since we are mapping the ELF
+ // directly, we don't want to just overwrite part of the ELF with
+ // zeroes.
+ if phdr.Memsz != phdr.Filesz {
+ log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ start := usermem.Addr(memoryOffset)
+ end, ok := start.AddLength(phdr.Memsz)
+ if !ok {
+ log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ if uint64(end) > size {
+ log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ if prev != nil {
+ if start < prevEnd {
+ log.Warningf("PT_LOAD segments out of order")
+ return elfInfo{}, syserror.ENOEXEC
+ }
+
+ // We mprotect entire pages, so each segment must be in
+ // its own page.
+ prevEndPage := prevEnd.RoundDown()
+ startPage := start.RoundDown()
+ if prevEndPage >= startPage {
+ log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage)
+ return elfInfo{}, syserror.ENOEXEC
+ }
+ }
+ prev = &info.phdrs[i]
+ prevEnd = end
+ }
+
+ return info, nil
+}
+
+// VDSO describes a VDSO.
+//
+// NOTE(mpratt): to support multiple architectures or operating systems, this
+// would need to contain a VDSO for each.
+//
+// +stateify savable
+type VDSO struct {
+ // ParamPage is the VDSO parameter page. This page should be updated to
+ // inform the VDSO for timekeeping data.
+ ParamPage *mm.SpecialMappable
+
+ // vdso is the VDSO ELF itself.
+ vdso *mm.SpecialMappable
+
+ // os is the operating system targeted by the VDSO.
+ os abi.OS
+
+ // arch is the architecture targeted by the VDSO.
+ arch arch.Arch
+
+ // phdrs are the VDSO ELF phdrs.
+ phdrs []elf.ProgHeader `state:".([]elfProgHeader)"`
+}
+
+// getSymbolValueFromVDSO returns the specific symbol value in vdso.so.
+func getSymbolValueFromVDSO(symbol string) (uint64, error) {
+ f, err := elf.NewFile(bytes.NewReader(vdsoBin))
+ if err != nil {
+ return 0, err
+ }
+ syms, err := f.Symbols()
+ if err != nil {
+ return 0, err
+ }
+
+ for _, sym := range syms {
+ if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF {
+ if strings.Contains(sym.Name, symbol) {
+ return sym.Value, nil
+ }
+ }
+ }
+ return 0, fmt.Errorf("no %v in vdso.so", symbol)
+}
+
+// PrepareVDSO validates the system VDSO and returns a VDSO, containing the
+// param page for updating by the kernel.
+func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) {
+ vdsoFile := &byteFullReader{data: vdsoBin}
+
+ // First make sure the VDSO is valid. vdsoFile does not use ctx, so a
+ // nil context can be passed.
+ info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsoBin)))
+ if err != nil {
+ return nil, err
+ }
+
+ // Then copy it into a VDSO mapping.
+ size, ok := usermem.Addr(len(vdsoBin)).RoundUp()
+ if !ok {
+ return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsoBin))
+ }
+
+ mf := mfp.MemoryFile()
+ vdso, err := mf.Allocate(uint64(size), usage.System)
+ if err != nil {
+ return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err)
+ }
+
+ ims, err := mf.MapInternal(vdso, usermem.ReadWrite)
+ if err != nil {
+ mf.DecRef(vdso)
+ return nil, fmt.Errorf("unable to map VDSO memory: %v", err)
+ }
+
+ _, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsoBin)))
+ if err != nil {
+ mf.DecRef(vdso)
+ return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err)
+ }
+
+ // Finally, allocate a param page for this VDSO.
+ paramPage, err := mf.Allocate(usermem.PageSize, usage.System)
+ if err != nil {
+ mf.DecRef(vdso)
+ return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err)
+ }
+
+ return &VDSO{
+ ParamPage: mm.NewSpecialMappable("[vvar]", mfp, paramPage),
+ // TODO(gvisor.dev/issue/157): Don't advertise the VDSO, as
+ // some applications may not be able to handle multiple [vdso]
+ // hints.
+ vdso: mm.NewSpecialMappable("", mfp, vdso),
+ os: info.os,
+ arch: info.arch,
+ phdrs: info.phdrs,
+ }, nil
+}
+
+// loadVDSO loads the VDSO into m.
+//
+// VDSOs are special.
+//
+// VDSOs are fully position independent. However, instead of loading a VDSO
+// like a normal ELF binary, mapping only the PT_LOAD segments, the Linux
+// kernel simply directly maps the entire file into process memory, with very
+// little real ELF parsing.
+//
+// NOTE(b/25323870): This means that userspace can, and unfortunately does,
+// depend on parts of the ELF that would normally not be mapped. To maintain
+// compatibility with such binaries, we load the VDSO much like Linux.
+//
+// loadVDSO takes a reference on the VDSO and parameter page FrameRegions.
+func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (usermem.Addr, error) {
+ if v.os != bin.os {
+ ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os)
+ return 0, syserror.ENOEXEC
+ }
+ if v.arch != bin.arch {
+ ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch)
+ return 0, syserror.ENOEXEC
+ }
+
+ // Reserve address space for the VDSO and its parameter page, which is
+ // mapped just before the VDSO.
+ mapSize := v.vdso.Length() + v.ParamPage.Length()
+ addr, err := m.MMap(ctx, memmap.MMapOpts{
+ Length: mapSize,
+ Private: true,
+ })
+ if err != nil {
+ ctx.Infof("Unable to reserve VDSO address space: %v", err)
+ return 0, err
+ }
+
+ // Now map the param page.
+ _, err = m.MMap(ctx, memmap.MMapOpts{
+ Length: v.ParamPage.Length(),
+ MappingIdentity: v.ParamPage,
+ Mappable: v.ParamPage,
+ Addr: addr,
+ Fixed: true,
+ Unmap: true,
+ Private: true,
+ Perms: usermem.Read,
+ MaxPerms: usermem.Read,
+ })
+ if err != nil {
+ ctx.Infof("Unable to map VDSO param page: %v", err)
+ return 0, err
+ }
+
+ // Now map the VDSO itself.
+ vdsoAddr, ok := addr.AddLength(v.ParamPage.Length())
+ if !ok {
+ panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length()))
+ }
+ _, err = m.MMap(ctx, memmap.MMapOpts{
+ Length: v.vdso.Length(),
+ MappingIdentity: v.vdso,
+ Mappable: v.vdso,
+ Addr: vdsoAddr,
+ Fixed: true,
+ Unmap: true,
+ Private: true,
+ Perms: usermem.Read,
+ MaxPerms: usermem.AnyAccess,
+ })
+ if err != nil {
+ ctx.Infof("Unable to map VDSO: %v", err)
+ return 0, err
+ }
+
+ vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length())
+ if !ok {
+ panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length()))
+ }
+
+ // Set additional protections for the individual segments.
+ var first *elf.ProgHeader
+ for i, phdr := range v.phdrs {
+ if phdr.Type != elf.PT_LOAD {
+ continue
+ }
+
+ if first == nil {
+ first = &v.phdrs[i]
+ }
+
+ memoryOffset := phdr.Vaddr - first.Vaddr
+ segAddr, ok := vdsoAddr.AddLength(memoryOffset)
+ if !ok {
+ ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset)
+ return 0, syserror.ENOEXEC
+ }
+ segPage := segAddr.RoundDown()
+ segSize := usermem.Addr(phdr.Memsz)
+ segSize, ok = segSize.AddLength(segAddr.PageOffset())
+ if !ok {
+ ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset())
+ return 0, syserror.ENOEXEC
+ }
+ segSize, ok = segSize.RoundUp()
+ if !ok {
+ ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset())
+ return 0, syserror.ENOEXEC
+ }
+ segEnd, ok := segPage.AddLength(uint64(segSize))
+ if !ok {
+ ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize)
+ return 0, syserror.ENOEXEC
+ }
+ if segEnd > vdsoEnd {
+ ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd)
+ return 0, syserror.ENOEXEC
+ }
+
+ perms := progFlagsAsPerms(phdr.Flags)
+ if perms != usermem.Read {
+ if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil {
+ ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err)
+ return 0, syserror.ENOEXEC
+ }
+ }
+ }
+
+ return vdsoAddr, nil
+}
diff --git a/pkg/sentry/loader/vdso_state.go b/pkg/sentry/loader/vdso_state.go
new file mode 100644
index 000000000..db378e90a
--- /dev/null
+++ b/pkg/sentry/loader/vdso_state.go
@@ -0,0 +1,48 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package loader
+
+import (
+ "debug/elf"
+)
+
+// +stateify savable
+type elfProgHeader struct {
+ Type elf.ProgType
+ Flags elf.ProgFlag
+ Off uint64
+ Vaddr uint64
+ Paddr uint64
+ Filesz uint64
+ Memsz uint64
+ Align uint64
+}
+
+// savePhdrs is invoked by stateify.
+func (v *VDSO) savePhdrs() []elfProgHeader {
+ s := make([]elfProgHeader, 0, len(v.phdrs))
+ for _, h := range v.phdrs {
+ s = append(s, elfProgHeader(h))
+ }
+ return s
+}
+
+// loadPhdrs is invoked by stateify.
+func (v *VDSO) loadPhdrs(s []elfProgHeader) {
+ v.phdrs = make([]elf.ProgHeader, 0, len(s))
+ for _, h := range s {
+ v.phdrs = append(v.phdrs, elf.ProgHeader(h))
+ }
+}