diff options
Diffstat (limited to 'pkg/sentry/loader')
-rw-r--r-- | pkg/sentry/loader/BUILD | 46 | ||||
-rw-r--r-- | pkg/sentry/loader/elf.go | 700 | ||||
-rw-r--r-- | pkg/sentry/loader/interpreter.go | 108 | ||||
-rw-r--r-- | pkg/sentry/loader/loader.go | 315 | ||||
-rw-r--r-- | pkg/sentry/loader/vdso.go | 382 | ||||
-rw-r--r-- | pkg/sentry/loader/vdso_state.go | 48 |
6 files changed, 1599 insertions, 0 deletions
diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD new file mode 100644 index 000000000..34bdb0b69 --- /dev/null +++ b/pkg/sentry/loader/BUILD @@ -0,0 +1,46 @@ +load("//tools:defs.bzl", "go_embed_data", "go_library") + +package(licenses = ["notice"]) + +go_embed_data( + name = "vdso_bin", + src = "//vdso:vdso.so", + package = "loader", + var = "vdsoBin", +) + +go_library( + name = "loader", + srcs = [ + "elf.go", + "interpreter.go", + "loader.go", + "vdso.go", + "vdso_state.go", + ":vdso_bin", + ], + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi", + "//pkg/abi/linux", + "//pkg/binary", + "//pkg/context", + "//pkg/cpuid", + "//pkg/log", + "//pkg/rand", + "//pkg/safemem", + "//pkg/sentry/arch", + "//pkg/sentry/fsbridge", + "//pkg/sentry/kernel/auth", + "//pkg/sentry/limits", + "//pkg/sentry/memmap", + "//pkg/sentry/mm", + "//pkg/sentry/pgalloc", + "//pkg/sentry/uniqueid", + "//pkg/sentry/usage", + "//pkg/sentry/vfs", + "//pkg/syserr", + "//pkg/syserror", + "//pkg/usermem", + ], +) diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go new file mode 100644 index 000000000..ddeaff3db --- /dev/null +++ b/pkg/sentry/loader/elf.go @@ -0,0 +1,700 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "bytes" + "debug/elf" + "fmt" + "io" + + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" + "gvisor.dev/gvisor/pkg/sentry/limits" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +const ( + // elfMagic identifies an ELF file. + elfMagic = "\x7fELF" + + // maxTotalPhdrSize is the maximum combined size of all program + // headers. Linux limits this to one page. + maxTotalPhdrSize = usermem.PageSize +) + +var ( + // header64Size is the size of elf.Header64. + header64Size = int(binary.Size(elf.Header64{})) + + // Prog64Size is the size of elf.Prog64. + prog64Size = int(binary.Size(elf.Prog64{})) +) + +func progFlagsAsPerms(f elf.ProgFlag) usermem.AccessType { + var p usermem.AccessType + if f&elf.PF_R == elf.PF_R { + p.Read = true + } + if f&elf.PF_W == elf.PF_W { + p.Write = true + } + if f&elf.PF_X == elf.PF_X { + p.Execute = true + } + return p +} + +// elfInfo contains the metadata needed to load an ELF binary. +type elfInfo struct { + // os is the target OS of the ELF. + os abi.OS + + // arch is the target architecture of the ELF. + arch arch.Arch + + // entry is the program entry point. + entry usermem.Addr + + // phdrs are the program headers. + phdrs []elf.ProgHeader + + // phdrSize is the size of a single program header in the ELF. + phdrSize int + + // phdrOff is the offset of the program headers in the file. + phdrOff uint64 + + // sharedObject is true if the ELF represents a shared object. + sharedObject bool +} + +// fullReader interface extracts the ReadFull method from fsbridge.File so that +// client code does not need to define an entire fsbridge.File when only read +// functionality is needed. +// +// TODO(gvisor.dev/issue/1035): Once VFS2 ships, rewrite this to wrap +// vfs.FileDescription's PRead/Read instead. +type fullReader interface { + // ReadFull is the same as fsbridge.File.ReadFull. + ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) +} + +// parseHeader parse the ELF header, verifying that this is a supported ELF +// file and returning the ELF program headers. +// +// This is similar to elf.NewFile, except that it is more strict about what it +// accepts from the ELF, and it doesn't parse unnecessary parts of the file. +func parseHeader(ctx context.Context, f fullReader) (elfInfo, error) { + // Check ident first; it will tell us the endianness of the rest of the + // structs. + var ident [elf.EI_NIDENT]byte + _, err := f.ReadFull(ctx, usermem.BytesIOSequence(ident[:]), 0) + if err != nil { + log.Infof("Error reading ELF ident: %v", err) + // The entire ident array always exists. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return elfInfo{}, err + } + + // Only some callers pre-check the ELF magic. + if !bytes.Equal(ident[:len(elfMagic)], []byte(elfMagic)) { + log.Infof("File is not an ELF") + return elfInfo{}, syserror.ENOEXEC + } + + // We only support 64-bit, little endian binaries + if class := elf.Class(ident[elf.EI_CLASS]); class != elf.ELFCLASS64 { + log.Infof("Unsupported ELF class: %v", class) + return elfInfo{}, syserror.ENOEXEC + } + if endian := elf.Data(ident[elf.EI_DATA]); endian != elf.ELFDATA2LSB { + log.Infof("Unsupported ELF endianness: %v", endian) + return elfInfo{}, syserror.ENOEXEC + } + byteOrder := binary.LittleEndian + + if version := elf.Version(ident[elf.EI_VERSION]); version != elf.EV_CURRENT { + log.Infof("Unsupported ELF version: %v", version) + return elfInfo{}, syserror.ENOEXEC + } + // EI_OSABI is ignored by Linux, which is the only OS supported. + os := abi.Linux + + var hdr elf.Header64 + hdrBuf := make([]byte, header64Size) + _, err = f.ReadFull(ctx, usermem.BytesIOSequence(hdrBuf), 0) + if err != nil { + log.Infof("Error reading ELF header: %v", err) + // The entire header always exists. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return elfInfo{}, err + } + binary.Unmarshal(hdrBuf, byteOrder, &hdr) + + // We support amd64 and arm64. + var a arch.Arch + switch machine := elf.Machine(hdr.Machine); machine { + case elf.EM_X86_64: + a = arch.AMD64 + case elf.EM_AARCH64: + a = arch.ARM64 + default: + log.Infof("Unsupported ELF machine %d", machine) + return elfInfo{}, syserror.ENOEXEC + } + + var sharedObject bool + elfType := elf.Type(hdr.Type) + switch elfType { + case elf.ET_EXEC: + sharedObject = false + case elf.ET_DYN: + sharedObject = true + default: + log.Infof("Unsupported ELF type %v", elfType) + return elfInfo{}, syserror.ENOEXEC + } + + if int(hdr.Phentsize) != prog64Size { + log.Infof("Unsupported phdr size %d", hdr.Phentsize) + return elfInfo{}, syserror.ENOEXEC + } + totalPhdrSize := prog64Size * int(hdr.Phnum) + if totalPhdrSize < prog64Size { + log.Warningf("No phdrs or total phdr size overflows: prog64Size: %d phnum: %d", prog64Size, int(hdr.Phnum)) + return elfInfo{}, syserror.ENOEXEC + } + if totalPhdrSize > maxTotalPhdrSize { + log.Infof("Too many phdrs (%d): total size %d > %d", hdr.Phnum, totalPhdrSize, maxTotalPhdrSize) + return elfInfo{}, syserror.ENOEXEC + } + + phdrBuf := make([]byte, totalPhdrSize) + _, err = f.ReadFull(ctx, usermem.BytesIOSequence(phdrBuf), int64(hdr.Phoff)) + if err != nil { + log.Infof("Error reading ELF phdrs: %v", err) + // If phdrs were specified, they should all exist. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return elfInfo{}, err + } + + phdrs := make([]elf.ProgHeader, hdr.Phnum) + for i := range phdrs { + var prog64 elf.Prog64 + binary.Unmarshal(phdrBuf[:prog64Size], byteOrder, &prog64) + phdrBuf = phdrBuf[prog64Size:] + phdrs[i] = elf.ProgHeader{ + Type: elf.ProgType(prog64.Type), + Flags: elf.ProgFlag(prog64.Flags), + Off: prog64.Off, + Vaddr: prog64.Vaddr, + Paddr: prog64.Paddr, + Filesz: prog64.Filesz, + Memsz: prog64.Memsz, + Align: prog64.Align, + } + } + + return elfInfo{ + os: os, + arch: a, + entry: usermem.Addr(hdr.Entry), + phdrs: phdrs, + phdrOff: hdr.Phoff, + phdrSize: prog64Size, + sharedObject: sharedObject, + }, nil +} + +// mapSegment maps a phdr into the Task. offset is the offset to apply to +// phdr.Vaddr. +func mapSegment(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, phdr *elf.ProgHeader, offset usermem.Addr) error { + // We must make a page-aligned mapping. + adjust := usermem.Addr(phdr.Vaddr).PageOffset() + + addr, ok := offset.AddLength(phdr.Vaddr) + if !ok { + // If offset != 0 we should have ensured this would fit. + ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset) + return syserror.ENOEXEC + } + addr -= usermem.Addr(adjust) + + fileSize := phdr.Filesz + adjust + if fileSize < phdr.Filesz { + ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust) + return syserror.ENOEXEC + } + ms, ok := usermem.Addr(fileSize).RoundUp() + if !ok { + ctx.Infof("fileSize %#x too large", fileSize) + return syserror.ENOEXEC + } + mapSize := uint64(ms) + + if mapSize > 0 { + // This must result in a page-aligned offset. i.e., the original + // phdr.Off must have the same alignment as phdr.Vaddr. If that is not + // true, MMap will reject the mapping. + fileOffset := phdr.Off - adjust + + prot := progFlagsAsPerms(phdr.Flags) + mopts := memmap.MMapOpts{ + Length: mapSize, + Offset: fileOffset, + Addr: addr, + Fixed: true, + // Linux will happily allow conflicting segments to map over + // one another. + Unmap: true, + Private: true, + Perms: prot, + MaxPerms: usermem.AnyAccess, + } + defer func() { + if mopts.MappingIdentity != nil { + mopts.MappingIdentity.DecRef() + } + }() + if err := f.ConfigureMMap(ctx, &mopts); err != nil { + ctx.Infof("File is not memory-mappable: %v", err) + return err + } + if _, err := m.MMap(ctx, mopts); err != nil { + ctx.Infof("Error mapping PT_LOAD segment %+v at %#x: %v", phdr, addr, err) + return err + } + + // We need to clear the end of the last page that exceeds fileSize so + // we don't map part of the file beyond fileSize. + // + // Note that Linux *does not* clear the portion of the first page + // before phdr.Off. + if mapSize > fileSize { + zeroAddr, ok := addr.AddLength(fileSize) + if !ok { + panic(fmt.Sprintf("successfully mmaped address overflows? %#x + %#x", addr, fileSize)) + } + zeroSize := int64(mapSize - fileSize) + if zeroSize < 0 { + panic(fmt.Sprintf("zeroSize too big? %#x", uint64(zeroSize))) + } + if _, err := m.ZeroOut(ctx, zeroAddr, zeroSize, usermem.IOOpts{IgnorePermissions: true}); err != nil { + ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+usermem.Addr(zeroSize), err) + return err + } + } + } + + memSize := phdr.Memsz + adjust + if memSize < phdr.Memsz { + ctx.Infof("Computed segment mem size overflows: %#x + %#x", phdr.Memsz, adjust) + return syserror.ENOEXEC + } + + // Allocate more anonymous pages if necessary. + if mapSize < memSize { + anonAddr, ok := addr.AddLength(mapSize) + if !ok { + panic(fmt.Sprintf("anonymous memory doesn't fit in pre-sized range? %#x + %#x", addr, mapSize)) + } + anonSize, ok := usermem.Addr(memSize - mapSize).RoundUp() + if !ok { + ctx.Infof("extra anon pages too large: %#x", memSize-mapSize) + return syserror.ENOEXEC + } + + // N.B. Linux uses vm_brk_flags to map these pages, which only + // honors the X bit, always mapping at least RW. ignoring These + // pages are not included in the final brk region. + prot := usermem.ReadWrite + if phdr.Flags&elf.PF_X == elf.PF_X { + prot.Execute = true + } + + if _, err := m.MMap(ctx, memmap.MMapOpts{ + Length: uint64(anonSize), + Addr: anonAddr, + // Fixed without Unmap will fail the mmap if something is + // already at addr. + Fixed: true, + Private: true, + Perms: prot, + MaxPerms: usermem.AnyAccess, + }); err != nil { + ctx.Infof("Error mapping PT_LOAD segment %v anonymous memory: %v", phdr, err) + return err + } + } + + return nil +} + +// loadedELF describes an ELF that has been successfully loaded. +type loadedELF struct { + // os is the target OS of the ELF. + os abi.OS + + // arch is the target architecture of the ELF. + arch arch.Arch + + // entry is the entry point of the ELF. + entry usermem.Addr + + // start is the end of the ELF. + start usermem.Addr + + // end is the end of the ELF. + end usermem.Addr + + // interpter is the path to the ELF interpreter. + interpreter string + + // phdrAddr is the address of the ELF program headers. + phdrAddr usermem.Addr + + // phdrSize is the size of a single program header in the ELF. + phdrSize int + + // phdrNum is the number of program headers. + phdrNum int + + // auxv contains a subset of ELF-specific auxiliary vector entries: + // * AT_PHDR + // * AT_PHENT + // * AT_PHNUM + // * AT_BASE + // * AT_ENTRY + auxv arch.Auxv +} + +// loadParsedELF loads f into mm. +// +// info is the parsed elfInfo from the header. +// +// It does not load the ELF interpreter, or return any auxv entries. +// +// Preconditions: +// * f is an ELF file +func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) { + first := true + var start, end usermem.Addr + var interpreter string + for _, phdr := range info.phdrs { + switch phdr.Type { + case elf.PT_LOAD: + vaddr := usermem.Addr(phdr.Vaddr) + if first { + first = false + start = vaddr + } + if vaddr < end { + // NOTE(b/37474556): Linux allows out-of-order + // segments, in violation of the spec. + ctx.Infof("PT_LOAD headers out-of-order. %#x < %#x", vaddr, end) + return loadedELF{}, syserror.ENOEXEC + } + var ok bool + end, ok = vaddr.AddLength(phdr.Memsz) + if !ok { + ctx.Infof("PT_LOAD header size overflows. %#x + %#x", vaddr, phdr.Memsz) + return loadedELF{}, syserror.ENOEXEC + } + + case elf.PT_INTERP: + if phdr.Filesz < 2 { + ctx.Infof("PT_INTERP path too small: %v", phdr.Filesz) + return loadedELF{}, syserror.ENOEXEC + } + if phdr.Filesz > linux.PATH_MAX { + ctx.Infof("PT_INTERP path too big: %v", phdr.Filesz) + return loadedELF{}, syserror.ENOEXEC + } + + path := make([]byte, phdr.Filesz) + _, err := f.ReadFull(ctx, usermem.BytesIOSequence(path), int64(phdr.Off)) + if err != nil { + // If an interpreter was specified, it should exist. + ctx.Infof("Error reading PT_INTERP path: %v", err) + return loadedELF{}, syserror.ENOEXEC + } + + if path[len(path)-1] != 0 { + ctx.Infof("PT_INTERP path not NUL-terminated: %v", path) + return loadedELF{}, syserror.ENOEXEC + } + + // Strip NUL-terminator and everything beyond from + // string. Note that there may be a NUL-terminator + // before len(path)-1. + interpreter = string(path[:bytes.IndexByte(path, '\x00')]) + if interpreter == "" { + // Linux actually attempts to open_exec("\0"). + // open_exec -> do_open_execat fails to check + // that name != '\0' before calling + // do_filp_open, which thus opens the working + // directory. do_open_execat returns EACCES + // because the directory is not a regular file. + // + // We bypass that nonsense and simply + // short-circuit with EACCES. Those this does + // mean that there may be some edge cases where + // the open path would return a different + // error. + ctx.Infof("PT_INTERP path is empty: %v", path) + return loadedELF{}, syserror.EACCES + } + } + } + + // Shared objects don't have fixed load addresses. We need to pick a + // base address big enough to fit all segments, so we first create a + // mapping for the total size just to find a region that is big enough. + // + // It is safe to unmap it immediately without racing with another mapping + // because we are the only one in control of the MemoryManager. + // + // Note that the vaddr of the first PT_LOAD segment is ignored when + // choosing the load address (even if it is non-zero). The vaddr does + // become an offset from that load address. + var offset usermem.Addr + if info.sharedObject { + totalSize := end - start + totalSize, ok := totalSize.RoundUp() + if !ok { + ctx.Infof("ELF PT_LOAD segments too big") + return loadedELF{}, syserror.ENOEXEC + } + + var err error + offset, err = m.MMap(ctx, memmap.MMapOpts{ + Length: uint64(totalSize), + Addr: sharedLoadOffset, + Private: true, + }) + if err != nil { + ctx.Infof("Error allocating address space for shared object: %v", err) + return loadedELF{}, err + } + if err := m.MUnmap(ctx, offset, uint64(totalSize)); err != nil { + panic(fmt.Sprintf("Failed to unmap base address: %v", err)) + } + + start, ok = start.AddLength(uint64(offset)) + if !ok { + panic(fmt.Sprintf("Start %#x + offset %#x overflows?", start, offset)) + } + + end, ok = end.AddLength(uint64(offset)) + if !ok { + panic(fmt.Sprintf("End %#x + offset %#x overflows?", end, offset)) + } + + info.entry, ok = info.entry.AddLength(uint64(offset)) + if !ok { + ctx.Infof("Entrypoint %#x + offset %#x overflows? Is the entrypoint within a segment?", info.entry, offset) + return loadedELF{}, err + } + } + + // Map PT_LOAD segments. + for _, phdr := range info.phdrs { + switch phdr.Type { + case elf.PT_LOAD: + if phdr.Memsz == 0 { + // No need to load segments with size 0, but + // they exist in some binaries. + continue + } + + if err := mapSegment(ctx, m, f, &phdr, offset); err != nil { + ctx.Infof("Failed to map PT_LOAD segment: %+v", phdr) + return loadedELF{}, err + } + } + } + + // This assumes that the first segment contains the ELF headers. This + // may not be true in a malformed ELF, but Linux makes the same + // assumption. + phdrAddr, ok := start.AddLength(info.phdrOff) + if !ok { + ctx.Warningf("ELF start address %#x + phdr offset %#x overflows", start, info.phdrOff) + phdrAddr = 0 + } + + return loadedELF{ + os: info.os, + arch: info.arch, + entry: info.entry, + start: start, + end: end, + interpreter: interpreter, + phdrAddr: phdrAddr, + phdrSize: info.phdrSize, + phdrNum: len(info.phdrs), + }, nil +} + +// loadInitialELF loads f into mm. +// +// It creates an arch.Context for the ELF and prepares the mm for this arch. +// +// It does not load the ELF interpreter, or return any auxv entries. +// +// Preconditions: +// * f is an ELF file +// * f is the first ELF loaded into m +func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureSet, f fsbridge.File) (loadedELF, arch.Context, error) { + info, err := parseHeader(ctx, f) + if err != nil { + ctx.Infof("Failed to parse initial ELF: %v", err) + return loadedELF{}, nil, err + } + + // Check Image Compatibility. + if arch.Host != info.arch { + ctx.Warningf("Found mismatch for platform %s with ELF type %s", arch.Host.String(), info.arch.String()) + return loadedELF{}, nil, syserror.ENOEXEC + } + + // Create the arch.Context now so we can prepare the mmap layout before + // mapping anything. + ac := arch.New(info.arch, fs) + + l, err := m.SetMmapLayout(ac, limits.FromContext(ctx)) + if err != nil { + ctx.Warningf("Failed to set mmap layout: %v", err) + return loadedELF{}, nil, err + } + + // PIELoadAddress tries to move the ELF out of the way of the default + // mmap base to ensure that the initial brk has sufficient space to + // grow. + le, err := loadParsedELF(ctx, m, f, info, ac.PIELoadAddress(l)) + return le, ac, err +} + +// loadInterpreterELF loads f into mm. +// +// The interpreter must be for the same OS/Arch as the initial ELF. +// +// It does not return any auxv entries. +// +// Preconditions: +// * f is an ELF file +func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f fsbridge.File, initial loadedELF) (loadedELF, error) { + info, err := parseHeader(ctx, f) + if err != nil { + if err == syserror.ENOEXEC { + // Bad interpreter. + err = syserror.ELIBBAD + } + return loadedELF{}, err + } + + if info.os != initial.os { + ctx.Infof("Initial ELF OS %v and interpreter ELF OS %v differ", initial.os, info.os) + return loadedELF{}, syserror.ELIBBAD + } + if info.arch != initial.arch { + ctx.Infof("Initial ELF arch %v and interpreter ELF arch %v differ", initial.arch, info.arch) + return loadedELF{}, syserror.ELIBBAD + } + + // The interpreter is not given a load offset, as its location does not + // affect brk. + return loadParsedELF(ctx, m, f, info, 0) +} + +// loadELF loads args.File into the Task address space. +// +// If loadELF returns ErrSwitchFile it should be called again with the returned +// path and argv. +// +// Preconditions: +// * args.File is an ELF file +func loadELF(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, error) { + bin, ac, err := loadInitialELF(ctx, args.MemoryManager, args.Features, args.File) + if err != nil { + ctx.Infof("Error loading binary: %v", err) + return loadedELF{}, nil, err + } + + var interp loadedELF + if bin.interpreter != "" { + // Even if we do not allow the final link of the script to be + // resolved, the interpreter should still be resolved if it is + // a symlink. + args.ResolveFinal = true + // Refresh the traversal limit. + *args.RemainingTraversals = linux.MaxSymlinkTraversals + args.Filename = bin.interpreter + intFile, err := openPath(ctx, args) + if err != nil { + ctx.Infof("Error opening interpreter %s: %v", bin.interpreter, err) + return loadedELF{}, nil, err + } + defer intFile.DecRef() + + interp, err = loadInterpreterELF(ctx, args.MemoryManager, intFile, bin) + if err != nil { + ctx.Infof("Error loading interpreter: %v", err) + return loadedELF{}, nil, err + } + + if interp.interpreter != "" { + // No recursive interpreters! + ctx.Infof("Interpreter requires an interpreter") + return loadedELF{}, nil, syserror.ENOEXEC + } + } + + // ELF-specific auxv entries. + bin.auxv = arch.Auxv{ + arch.AuxEntry{linux.AT_PHDR, bin.phdrAddr}, + arch.AuxEntry{linux.AT_PHENT, usermem.Addr(bin.phdrSize)}, + arch.AuxEntry{linux.AT_PHNUM, usermem.Addr(bin.phdrNum)}, + arch.AuxEntry{linux.AT_ENTRY, bin.entry}, + } + if bin.interpreter != "" { + bin.auxv = append(bin.auxv, arch.AuxEntry{linux.AT_BASE, interp.start}) + + // Start in the interpreter. + // N.B. AT_ENTRY above contains the *original* entry point. + bin.entry = interp.entry + } else { + // Always add AT_BASE even if there is no interpreter. + bin.auxv = append(bin.auxv, arch.AuxEntry{linux.AT_BASE, 0}) + } + + return bin, ac, nil +} diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go new file mode 100644 index 000000000..3886b4d33 --- /dev/null +++ b/pkg/sentry/loader/interpreter.go @@ -0,0 +1,108 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "bytes" + "io" + + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +const ( + // interpreterScriptMagic identifies an interpreter script. + interpreterScriptMagic = "#!" + + // interpMaxLineLength is the maximum length for the first line of an + // interpreter script. + // + // From execve(2): "A maximum line length of 127 characters is allowed + // for the first line in a #! executable shell script." + interpMaxLineLength = 127 +) + +// parseInterpreterScript returns the interpreter path and argv. +func parseInterpreterScript(ctx context.Context, filename string, f fsbridge.File, argv []string) (newpath string, newargv []string, err error) { + line := make([]byte, interpMaxLineLength) + n, err := f.ReadFull(ctx, usermem.BytesIOSequence(line), 0) + // Short read is OK. + if err != nil && err != io.ErrUnexpectedEOF { + if err == io.EOF { + err = syserror.ENOEXEC + } + return "", []string{}, err + } + line = line[:n] + + if !bytes.Equal(line[:2], []byte(interpreterScriptMagic)) { + return "", []string{}, syserror.ENOEXEC + } + // Ignore #!. + line = line[2:] + + // Ignore everything after newline. + // Linux silently truncates the remainder of the line if it exceeds + // interpMaxLineLength. + i := bytes.IndexByte(line, '\n') + if i > 0 { + line = line[:i] + } + + // Skip any whitespace before the interpeter. + line = bytes.TrimLeft(line, " \t") + + // Linux only looks for spaces or tabs delimiting the interpreter and + // arg. + // + // execve(2): "On Linux, the entire string following the interpreter + // name is passed as a single argument to the interpreter, and this + // string can include white space." + interp := line + var arg []byte + i = bytes.IndexAny(line, " \t") + if i >= 0 { + interp = line[:i] + arg = bytes.TrimLeft(line[i:], " \t") + } + + if string(interp) == "" { + ctx.Infof("Interpreter script contains no interpreter: %v", line) + return "", []string{}, syserror.ENOEXEC + } + + // Build the new argument list: + // + // 1. The interpreter. + newargv = append(newargv, string(interp)) + + // 2. The optional interpreter argument. + if len(arg) > 0 { + newargv = append(newargv, string(arg)) + } + + // 3. The original arguments. The original argv[0] is replaced with the + // full script filename. + if len(argv) > 0 { + argv[0] = filename + } else { + argv = []string{filename} + } + newargv = append(newargv, argv...) + + return string(interp), newargv, nil +} diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go new file mode 100644 index 000000000..986c7fb4d --- /dev/null +++ b/pkg/sentry/loader/loader.go @@ -0,0 +1,315 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package loader loads an executable file into a MemoryManager. +package loader + +import ( + "bytes" + "fmt" + "io" + "path" + + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/cpuid" + "gvisor.dev/gvisor/pkg/rand" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsbridge" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/syserr" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +// LoadArgs holds specifications for an executable file to be loaded. +type LoadArgs struct { + // MemoryManager is the memory manager to load the executable into. + MemoryManager *mm.MemoryManager + + // RemainingTraversals is the maximum number of symlinks to follow to + // resolve Filename. This counter is passed by reference to keep it + // updated throughout the call stack. + RemainingTraversals *uint + + // ResolveFinal indicates whether the final link of Filename should be + // resolved, if it is a symlink. + ResolveFinal bool + + // Filename is the path for the executable. + Filename string + + // File is an open fs.File object of the executable. If File is not + // nil, then File will be loaded and Filename will be ignored. + // + // The caller is responsible for checking that the user can execute this file. + File fsbridge.File + + // Opener is used to open the executable file when 'File' is nil. + Opener fsbridge.Lookup + + // CloseOnExec indicates that the executable (or one of its parent + // directories) was opened with O_CLOEXEC. If the executable is an + // interpreter script, then cause an ENOENT error to occur, since the + // script would otherwise be inaccessible to the interpreter. + CloseOnExec bool + + // Argv is the vector of arguments to pass to the executable. + Argv []string + + // Envv is the vector of environment variables to pass to the + // executable. + Envv []string + + // Features specifies the CPU feature set for the executable. + Features *cpuid.FeatureSet +} + +// openPath opens args.Filename and checks that it is valid for loading. +// +// openPath returns an *fs.Dirent and *fs.File for args.Filename, which is not +// installed in the Task FDTable. The caller takes ownership of both. +// +// args.Filename must be a readable, executable, regular file. +func openPath(ctx context.Context, args LoadArgs) (fsbridge.File, error) { + if args.Filename == "" { + ctx.Infof("cannot open empty name") + return nil, syserror.ENOENT + } + + // TODO(gvisor.dev/issue/160): Linux requires only execute permission, + // not read. However, our backing filesystems may prevent us from reading + // the file without read permission. Additionally, a task with a + // non-readable executable has additional constraints on access via + // ptrace and procfs. + opts := vfs.OpenOptions{ + Flags: linux.O_RDONLY, + FileExec: true, + } + return args.Opener.OpenPath(ctx, args.Filename, opts, args.RemainingTraversals, args.ResolveFinal) +} + +// checkIsRegularFile prevents us from trying to execute a directory, pipe, etc. +func checkIsRegularFile(ctx context.Context, file fsbridge.File, filename string) error { + t, err := file.Type(ctx) + if err != nil { + return err + } + if t != linux.ModeRegular { + ctx.Infof("%q is not a regular file: %v", filename, t) + return syserror.EACCES + } + return nil +} + +// allocStack allocates and maps a stack in to any available part of the address space. +func allocStack(ctx context.Context, m *mm.MemoryManager, a arch.Context) (*arch.Stack, error) { + ar, err := m.MapStack(ctx) + if err != nil { + return nil, err + } + return &arch.Stack{a, m, ar.End}, nil +} + +const ( + // maxLoaderAttempts is the maximum number of attempts to try to load + // an interpreter scripts, to prevent loops. 6 (initial + 5 changes) is + // what the Linux kernel allows (fs/exec.c:search_binary_handler). + maxLoaderAttempts = 6 +) + +// loadExecutable loads an executable that is pointed to by args.File. The +// caller is responsible for checking that the user can execute this file. +// If nil, the path args.Filename is resolved and loaded (check that the user +// can execute this file is done here in this case). If the executable is an +// interpreter script rather than an ELF, the binary of the corresponding +// interpreter will be loaded. +// +// It returns: +// * loadedELF, description of the loaded binary +// * arch.Context matching the binary arch +// * fs.Dirent of the binary file +// * Possibly updated args.Argv +func loadExecutable(ctx context.Context, args LoadArgs) (loadedELF, arch.Context, fsbridge.File, []string, error) { + for i := 0; i < maxLoaderAttempts; i++ { + if args.File == nil { + var err error + args.File, err = openPath(ctx, args) + if err != nil { + ctx.Infof("Error opening %s: %v", args.Filename, err) + return loadedELF{}, nil, nil, nil, err + } + // Ensure file is release in case the code loops or errors out. + defer args.File.DecRef() + } else { + if err := checkIsRegularFile(ctx, args.File, args.Filename); err != nil { + return loadedELF{}, nil, nil, nil, err + } + } + + // Check the header. Is this an ELF or interpreter script? + var hdr [4]uint8 + // N.B. We assume that reading from a regular file cannot block. + _, err := args.File.ReadFull(ctx, usermem.BytesIOSequence(hdr[:]), 0) + // Allow unexpected EOF, as a valid executable could be only three bytes + // (e.g., #!a). + if err != nil && err != io.ErrUnexpectedEOF { + if err == io.EOF { + err = syserror.ENOEXEC + } + return loadedELF{}, nil, nil, nil, err + } + + switch { + case bytes.Equal(hdr[:], []byte(elfMagic)): + loaded, ac, err := loadELF(ctx, args) + if err != nil { + ctx.Infof("Error loading ELF: %v", err) + return loadedELF{}, nil, nil, nil, err + } + // An ELF is always terminal. Hold on to file. + args.File.IncRef() + return loaded, ac, args.File, args.Argv, err + + case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)): + if args.CloseOnExec { + return loadedELF{}, nil, nil, nil, syserror.ENOENT + } + args.Filename, args.Argv, err = parseInterpreterScript(ctx, args.Filename, args.File, args.Argv) + if err != nil { + ctx.Infof("Error loading interpreter script: %v", err) + return loadedELF{}, nil, nil, nil, err + } + // Refresh the traversal limit for the interpreter. + *args.RemainingTraversals = linux.MaxSymlinkTraversals + + default: + ctx.Infof("Unknown magic: %v", hdr) + return loadedELF{}, nil, nil, nil, syserror.ENOEXEC + } + // Set to nil in case we loop on a Interpreter Script. + args.File = nil + } + + return loadedELF{}, nil, nil, nil, syserror.ELOOP +} + +// Load loads args.File into a MemoryManager. If args.File is nil, the path +// args.Filename is resolved and loaded instead. +// +// If Load returns ErrSwitchFile it should be called again with the returned +// path and argv. +// +// Preconditions: +// * The Task MemoryManager is empty. +// * Load is called on the Task goroutine. +func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, *syserr.Error) { + // Load the executable itself. + loaded, ac, file, newArgv, err := loadExecutable(ctx, args) + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux()) + } + defer file.DecRef() + + // Load the VDSO. + vdsoAddr, err := loadVDSO(ctx, args.MemoryManager, vdso, loaded) + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("error loading VDSO: %v", err), syserr.FromError(err).ToLinux()) + } + + // Setup the heap. brk starts at the next page after the end of the + // executable. Userspace can assume that the remainer of the page after + // loaded.end is available for its use. + e, ok := loaded.end.RoundUp() + if !ok { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("brk overflows: %#x", loaded.end), linux.ENOEXEC) + } + args.MemoryManager.BrkSetup(ctx, e) + + // Allocate our stack. + stack, err := allocStack(ctx, args.MemoryManager, ac) + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to allocate stack: %v", err), syserr.FromError(err).ToLinux()) + } + + // Push the original filename to the stack, for AT_EXECFN. + execfn, err := stack.Push(args.Filename) + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to push exec filename: %v", err), syserr.FromError(err).ToLinux()) + } + + // Push 16 random bytes on the stack which AT_RANDOM will point to. + var b [16]byte + if _, err := rand.Read(b[:]); err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to read random bytes: %v", err), syserr.FromError(err).ToLinux()) + } + random, err := stack.Push(b) + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to push random bytes: %v", err), syserr.FromError(err).ToLinux()) + } + + c := auth.CredentialsFromContext(ctx) + + // Add generic auxv entries. + auxv := append(loaded.auxv, arch.Auxv{ + arch.AuxEntry{linux.AT_UID, usermem.Addr(c.RealKUID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_EUID, usermem.Addr(c.EffectiveKUID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_GID, usermem.Addr(c.RealKGID.In(c.UserNamespace).OrOverflow())}, + arch.AuxEntry{linux.AT_EGID, usermem.Addr(c.EffectiveKGID.In(c.UserNamespace).OrOverflow())}, + // The conditions that require AT_SECURE = 1 never arise. See + // kernel.Task.updateCredsForExecLocked. + arch.AuxEntry{linux.AT_SECURE, 0}, + arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC}, + arch.AuxEntry{linux.AT_EXECFN, execfn}, + arch.AuxEntry{linux.AT_RANDOM, random}, + arch.AuxEntry{linux.AT_PAGESZ, usermem.PageSize}, + arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr}, + }...) + auxv = append(auxv, extraAuxv...) + + sl, err := stack.Load(newArgv, args.Envv, auxv) + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to load stack: %v", err), syserr.FromError(err).ToLinux()) + } + + m := args.MemoryManager + m.SetArgvStart(sl.ArgvStart) + m.SetArgvEnd(sl.ArgvEnd) + m.SetEnvvStart(sl.EnvvStart) + m.SetEnvvEnd(sl.EnvvEnd) + m.SetAuxv(auxv) + m.SetExecutable(file) + + symbolValue, err := getSymbolValueFromVDSO("rt_sigreturn") + if err != nil { + return 0, nil, "", syserr.NewDynamic(fmt.Sprintf("Failed to find rt_sigreturn in vdso: %v", err), syserr.FromError(err).ToLinux()) + } + + // Found rt_sigretrun. + addr := uint64(vdsoAddr) + symbolValue - vdsoPrelink + m.SetVDSOSigReturn(addr) + + ac.SetIP(uintptr(loaded.entry)) + ac.SetStack(uintptr(stack.Bottom)) + + name := path.Base(args.Filename) + if len(name) > linux.TASK_COMM_LEN-1 { + name = name[:linux.TASK_COMM_LEN-1] + } + + return loaded.os, ac, name, nil +} diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go new file mode 100644 index 000000000..05a294fe6 --- /dev/null +++ b/pkg/sentry/loader/vdso.go @@ -0,0 +1,382 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "bytes" + "debug/elf" + "fmt" + "io" + "strings" + + "gvisor.dev/gvisor/pkg/abi" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/mm" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/uniqueid" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" +) + +const vdsoPrelink = 0xffffffffff700000 + +type fileContext struct { + context.Context +} + +func (f *fileContext) Value(key interface{}) interface{} { + switch key { + case uniqueid.CtxGlobalUniqueID: + return uint64(0) + default: + return f.Context.Value(key) + } +} + +type byteFullReader struct { + data []byte +} + +func (b *byteFullReader) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if offset >= int64(len(b.data)) { + return 0, io.EOF + } + n, err := dst.CopyOut(ctx, b.data[offset:]) + return int64(n), err +} + +// validateVDSO checks that the VDSO can be loaded by loadVDSO. +// +// VDSOs are special (see below). Since we are going to map the VDSO directly +// rather than using a normal loading process, we require that the PT_LOAD +// segments have the same layout in the ELF as they expect to have in memory. +// +// Namely, this means that we must verify: +// * PT_LOAD file offsets are equivalent to the memory offset from the first +// segment. +// * No extra zeroed space (memsz) is required. +// * PT_LOAD segments are in order. +// * No two PT_LOAD segments occupy parts of the same page. +// * PT_LOAD segments don't extend beyond the end of the file. +// +// ctx may be nil if f does not need it. +func validateVDSO(ctx context.Context, f fullReader, size uint64) (elfInfo, error) { + info, err := parseHeader(ctx, f) + if err != nil { + log.Infof("Unable to parse VDSO header: %v", err) + return elfInfo{}, err + } + + var first *elf.ProgHeader + var prev *elf.ProgHeader + var prevEnd usermem.Addr + for i, phdr := range info.phdrs { + if phdr.Type != elf.PT_LOAD { + continue + } + + if first == nil { + first = &info.phdrs[i] + if phdr.Off != 0 { + log.Warningf("First PT_LOAD segment has non-zero file offset") + return elfInfo{}, syserror.ENOEXEC + } + } + + memoryOffset := phdr.Vaddr - first.Vaddr + if memoryOffset != phdr.Off { + log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off) + return elfInfo{}, syserror.ENOEXEC + } + + // memsz larger than filesz means that extra zeroed space should be + // provided at the end of the segment. Since we are mapping the ELF + // directly, we don't want to just overwrite part of the ELF with + // zeroes. + if phdr.Memsz != phdr.Filesz { + log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz) + return elfInfo{}, syserror.ENOEXEC + } + + start := usermem.Addr(memoryOffset) + end, ok := start.AddLength(phdr.Memsz) + if !ok { + log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) + return elfInfo{}, syserror.ENOEXEC + } + if uint64(end) > size { + log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size) + return elfInfo{}, syserror.ENOEXEC + } + + if prev != nil { + if start < prevEnd { + log.Warningf("PT_LOAD segments out of order") + return elfInfo{}, syserror.ENOEXEC + } + + // We mprotect entire pages, so each segment must be in + // its own page. + prevEndPage := prevEnd.RoundDown() + startPage := start.RoundDown() + if prevEndPage >= startPage { + log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage) + return elfInfo{}, syserror.ENOEXEC + } + } + prev = &info.phdrs[i] + prevEnd = end + } + + return info, nil +} + +// VDSO describes a VDSO. +// +// NOTE(mpratt): to support multiple architectures or operating systems, this +// would need to contain a VDSO for each. +// +// +stateify savable +type VDSO struct { + // ParamPage is the VDSO parameter page. This page should be updated to + // inform the VDSO for timekeeping data. + ParamPage *mm.SpecialMappable + + // vdso is the VDSO ELF itself. + vdso *mm.SpecialMappable + + // os is the operating system targeted by the VDSO. + os abi.OS + + // arch is the architecture targeted by the VDSO. + arch arch.Arch + + // phdrs are the VDSO ELF phdrs. + phdrs []elf.ProgHeader `state:".([]elfProgHeader)"` +} + +// getSymbolValueFromVDSO returns the specific symbol value in vdso.so. +func getSymbolValueFromVDSO(symbol string) (uint64, error) { + f, err := elf.NewFile(bytes.NewReader(vdsoBin)) + if err != nil { + return 0, err + } + syms, err := f.Symbols() + if err != nil { + return 0, err + } + + for _, sym := range syms { + if elf.ST_BIND(sym.Info) != elf.STB_LOCAL && sym.Section != elf.SHN_UNDEF { + if strings.Contains(sym.Name, symbol) { + return sym.Value, nil + } + } + } + return 0, fmt.Errorf("no %v in vdso.so", symbol) +} + +// PrepareVDSO validates the system VDSO and returns a VDSO, containing the +// param page for updating by the kernel. +func PrepareVDSO(mfp pgalloc.MemoryFileProvider) (*VDSO, error) { + vdsoFile := &byteFullReader{data: vdsoBin} + + // First make sure the VDSO is valid. vdsoFile does not use ctx, so a + // nil context can be passed. + info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsoBin))) + if err != nil { + return nil, err + } + + // Then copy it into a VDSO mapping. + size, ok := usermem.Addr(len(vdsoBin)).RoundUp() + if !ok { + return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsoBin)) + } + + mf := mfp.MemoryFile() + vdso, err := mf.Allocate(uint64(size), usage.System) + if err != nil { + return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err) + } + + ims, err := mf.MapInternal(vdso, usermem.ReadWrite) + if err != nil { + mf.DecRef(vdso) + return nil, fmt.Errorf("unable to map VDSO memory: %v", err) + } + + _, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsoBin))) + if err != nil { + mf.DecRef(vdso) + return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err) + } + + // Finally, allocate a param page for this VDSO. + paramPage, err := mf.Allocate(usermem.PageSize, usage.System) + if err != nil { + mf.DecRef(vdso) + return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err) + } + + return &VDSO{ + ParamPage: mm.NewSpecialMappable("[vvar]", mfp, paramPage), + // TODO(gvisor.dev/issue/157): Don't advertise the VDSO, as + // some applications may not be able to handle multiple [vdso] + // hints. + vdso: mm.NewSpecialMappable("", mfp, vdso), + os: info.os, + arch: info.arch, + phdrs: info.phdrs, + }, nil +} + +// loadVDSO loads the VDSO into m. +// +// VDSOs are special. +// +// VDSOs are fully position independent. However, instead of loading a VDSO +// like a normal ELF binary, mapping only the PT_LOAD segments, the Linux +// kernel simply directly maps the entire file into process memory, with very +// little real ELF parsing. +// +// NOTE(b/25323870): This means that userspace can, and unfortunately does, +// depend on parts of the ELF that would normally not be mapped. To maintain +// compatibility with such binaries, we load the VDSO much like Linux. +// +// loadVDSO takes a reference on the VDSO and parameter page FrameRegions. +func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (usermem.Addr, error) { + if v.os != bin.os { + ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) + return 0, syserror.ENOEXEC + } + if v.arch != bin.arch { + ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch) + return 0, syserror.ENOEXEC + } + + // Reserve address space for the VDSO and its parameter page, which is + // mapped just before the VDSO. + mapSize := v.vdso.Length() + v.ParamPage.Length() + addr, err := m.MMap(ctx, memmap.MMapOpts{ + Length: mapSize, + Private: true, + }) + if err != nil { + ctx.Infof("Unable to reserve VDSO address space: %v", err) + return 0, err + } + + // Now map the param page. + _, err = m.MMap(ctx, memmap.MMapOpts{ + Length: v.ParamPage.Length(), + MappingIdentity: v.ParamPage, + Mappable: v.ParamPage, + Addr: addr, + Fixed: true, + Unmap: true, + Private: true, + Perms: usermem.Read, + MaxPerms: usermem.Read, + }) + if err != nil { + ctx.Infof("Unable to map VDSO param page: %v", err) + return 0, err + } + + // Now map the VDSO itself. + vdsoAddr, ok := addr.AddLength(v.ParamPage.Length()) + if !ok { + panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length())) + } + _, err = m.MMap(ctx, memmap.MMapOpts{ + Length: v.vdso.Length(), + MappingIdentity: v.vdso, + Mappable: v.vdso, + Addr: vdsoAddr, + Fixed: true, + Unmap: true, + Private: true, + Perms: usermem.Read, + MaxPerms: usermem.AnyAccess, + }) + if err != nil { + ctx.Infof("Unable to map VDSO: %v", err) + return 0, err + } + + vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length()) + if !ok { + panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length())) + } + + // Set additional protections for the individual segments. + var first *elf.ProgHeader + for i, phdr := range v.phdrs { + if phdr.Type != elf.PT_LOAD { + continue + } + + if first == nil { + first = &v.phdrs[i] + } + + memoryOffset := phdr.Vaddr - first.Vaddr + segAddr, ok := vdsoAddr.AddLength(memoryOffset) + if !ok { + ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset) + return 0, syserror.ENOEXEC + } + segPage := segAddr.RoundDown() + segSize := usermem.Addr(phdr.Memsz) + segSize, ok = segSize.AddLength(segAddr.PageOffset()) + if !ok { + ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) + return 0, syserror.ENOEXEC + } + segSize, ok = segSize.RoundUp() + if !ok { + ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset()) + return 0, syserror.ENOEXEC + } + segEnd, ok := segPage.AddLength(uint64(segSize)) + if !ok { + ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize) + return 0, syserror.ENOEXEC + } + if segEnd > vdsoEnd { + ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd) + return 0, syserror.ENOEXEC + } + + perms := progFlagsAsPerms(phdr.Flags) + if perms != usermem.Read { + if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { + ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) + return 0, syserror.ENOEXEC + } + } + } + + return vdsoAddr, nil +} diff --git a/pkg/sentry/loader/vdso_state.go b/pkg/sentry/loader/vdso_state.go new file mode 100644 index 000000000..db378e90a --- /dev/null +++ b/pkg/sentry/loader/vdso_state.go @@ -0,0 +1,48 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "debug/elf" +) + +// +stateify savable +type elfProgHeader struct { + Type elf.ProgType + Flags elf.ProgFlag + Off uint64 + Vaddr uint64 + Paddr uint64 + Filesz uint64 + Memsz uint64 + Align uint64 +} + +// savePhdrs is invoked by stateify. +func (v *VDSO) savePhdrs() []elfProgHeader { + s := make([]elfProgHeader, 0, len(v.phdrs)) + for _, h := range v.phdrs { + s = append(s, elfProgHeader(h)) + } + return s +} + +// loadPhdrs is invoked by stateify. +func (v *VDSO) loadPhdrs(s []elfProgHeader) { + v.phdrs = make([]elf.ProgHeader, 0, len(s)) + for _, h := range s { + v.phdrs = append(v.phdrs, elf.ProgHeader(h)) + } +} |