diff options
Diffstat (limited to 'pkg/sentry/loader')
-rw-r--r-- | pkg/sentry/loader/BUILD | 59 | ||||
-rw-r--r-- | pkg/sentry/loader/elf.go | 637 | ||||
-rw-r--r-- | pkg/sentry/loader/interpreter.go | 105 | ||||
-rw-r--r-- | pkg/sentry/loader/loader.go | 277 | ||||
-rw-r--r-- | pkg/sentry/loader/vdso.go | 382 | ||||
-rw-r--r-- | pkg/sentry/loader/vdso_state.go | 47 |
6 files changed, 1507 insertions, 0 deletions
diff --git a/pkg/sentry/loader/BUILD b/pkg/sentry/loader/BUILD new file mode 100644 index 000000000..917ec8cc8 --- /dev/null +++ b/pkg/sentry/loader/BUILD @@ -0,0 +1,59 @@ +package(licenses = ["notice"]) # Apache 2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_embed_data", "go_library") +load("//tools/go_stateify:defs.bzl", "go_stateify") + +go_embed_data( + name = "vdso_bin", + src = "//vdso:vdso.so", + package = "loader", + var = "vdsoBin", +) + +go_stateify( + name = "loader_state", + srcs = [ + "vdso.go", + "vdso_state.go", + ], + out = "loader_state.go", + package = "loader", +) + +go_library( + name = "loader", + srcs = [ + "elf.go", + "interpreter.go", + "loader.go", + "loader_state.go", + "vdso.go", + "vdso_state.go", + ":vdso_bin", + ], + importpath = "gvisor.googlesource.com/gvisor/pkg/sentry/loader", + visibility = ["//pkg/sentry:internal"], + deps = [ + "//pkg/abi", + "//pkg/abi/linux", + "//pkg/binary", + "//pkg/cpuid", + "//pkg/log", + "//pkg/refs", + "//pkg/sentry/arch", + "//pkg/sentry/context", + "//pkg/sentry/fs", + "//pkg/sentry/fs/fsutil", + "//pkg/sentry/limits", + "//pkg/sentry/memmap", + "//pkg/sentry/mm", + "//pkg/sentry/platform", + "//pkg/sentry/safemem", + "//pkg/sentry/uniqueid", + "//pkg/sentry/usage", + "//pkg/sentry/usermem", + "//pkg/state", + "//pkg/syserror", + "//pkg/waiter", + ], +) diff --git a/pkg/sentry/loader/elf.go b/pkg/sentry/loader/elf.go new file mode 100644 index 000000000..d23dc1096 --- /dev/null +++ b/pkg/sentry/loader/elf.go @@ -0,0 +1,637 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "bytes" + "debug/elf" + "fmt" + "io" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/abi" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/binary" + "gvisor.googlesource.com/gvisor/pkg/cpuid" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/limits" + "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +const ( + // elfMagic identifies an ELF file. + elfMagic = "\x7fELF" + + // maxTotalPhdrSize is the maximum combined size of all program + // headers. Linux limits this to one page. + maxTotalPhdrSize = usermem.PageSize +) + +var ( + // header64Size is the size of elf.Header64. + header64Size = int(binary.Size(elf.Header64{})) + + // Prog64Size is the size of elf.Prog64. + prog64Size = int(binary.Size(elf.Prog64{})) +) + +func progFlagsAsPerms(f elf.ProgFlag) usermem.AccessType { + var p usermem.AccessType + if f&elf.PF_R == elf.PF_R { + p.Read = true + } + if f&elf.PF_W == elf.PF_W { + p.Write = true + } + if f&elf.PF_X == elf.PF_X { + p.Execute = true + } + return p +} + +// elfInfo contains the metadata needed to load an ELF binary. +type elfInfo struct { + // os is the target OS of the ELF. + os abi.OS + + // arch is the target architecture of the ELF. + arch arch.Arch + + // entry is the program entry point. + entry usermem.Addr + + // phdrs are the program headers. + phdrs []elf.ProgHeader + + // phdrSize is the size of a single program header in the ELF. + phdrSize int + + // phdrOff is the offset of the program headers in the file. + phdrOff uint64 + + // sharedObject is true if the ELF represents a shared object. + sharedObject bool +} + +// parseHeader parse the ELF header, verifying that this is a supported ELF +// file and returning the ELF program headers. +// +// This is similar to elf.NewFile, except that it is more strict about what it +// accepts from the ELF, and it doesn't parse unnecessary parts of the file. +// +// ctx may be nil if f does not need it. +func parseHeader(ctx context.Context, f *fs.File) (elfInfo, error) { + // Check ident first; it will tell us the endianness of the rest of the + // structs. + var ident [elf.EI_NIDENT]byte + _, err := readFull(ctx, f, usermem.BytesIOSequence(ident[:]), 0) + if err != nil { + log.Infof("Error reading ELF ident: %v", err) + // The entire ident array always exists. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return elfInfo{}, err + } + + // Only some callers pre-check the ELF magic. + if !bytes.Equal(ident[:len(elfMagic)], []byte(elfMagic)) { + log.Infof("File is not an ELF") + return elfInfo{}, syserror.ENOEXEC + } + + // We only support 64-bit, little endian binaries + if class := elf.Class(ident[elf.EI_CLASS]); class != elf.ELFCLASS64 { + log.Infof("Unsupported ELF class: %v", class) + return elfInfo{}, syserror.ENOEXEC + } + if endian := elf.Data(ident[elf.EI_DATA]); endian != elf.ELFDATA2LSB { + log.Infof("Unsupported ELF endianness: %v", endian) + return elfInfo{}, syserror.ENOEXEC + } + byteOrder := binary.LittleEndian + + if version := elf.Version(ident[elf.EI_VERSION]); version != elf.EV_CURRENT { + log.Infof("Unsupported ELF version: %v", version) + return elfInfo{}, syserror.ENOEXEC + } + // EI_OSABI is ignored by Linux, which is the only OS supported. + os := abi.Linux + + var hdr elf.Header64 + hdrBuf := make([]byte, header64Size) + _, err = readFull(ctx, f, usermem.BytesIOSequence(hdrBuf), 0) + if err != nil { + log.Infof("Error reading ELF header: %v", err) + // The entire header always exists. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return elfInfo{}, err + } + binary.Unmarshal(hdrBuf, byteOrder, &hdr) + + // We only support amd64. + if machine := elf.Machine(hdr.Machine); machine != elf.EM_X86_64 { + log.Infof("Unsupported ELF machine %d", machine) + return elfInfo{}, syserror.ENOEXEC + } + a := arch.AMD64 + + var sharedObject bool + elfType := elf.Type(hdr.Type) + switch elfType { + case elf.ET_EXEC: + sharedObject = false + case elf.ET_DYN: + sharedObject = true + default: + log.Infof("Unsupported ELF type %v", elfType) + return elfInfo{}, syserror.ENOEXEC + } + + if int(hdr.Phentsize) != prog64Size { + log.Infof("Unsupported phdr size %d", hdr.Phentsize) + return elfInfo{}, syserror.ENOEXEC + } + totalPhdrSize := prog64Size * int(hdr.Phnum) + if totalPhdrSize < prog64Size { + log.Warningf("No phdrs or total phdr size overflows: prog64Size: %d phnum: %d", prog64Size, int(hdr.Phnum)) + return elfInfo{}, syserror.ENOEXEC + } + if totalPhdrSize > maxTotalPhdrSize { + log.Infof("Too many phdrs (%d): total size %d > %d", hdr.Phnum, totalPhdrSize, maxTotalPhdrSize) + return elfInfo{}, syserror.ENOEXEC + } + + phdrBuf := make([]byte, totalPhdrSize) + _, err = readFull(ctx, f, usermem.BytesIOSequence(phdrBuf), int64(hdr.Phoff)) + if err != nil { + log.Infof("Error reading ELF phdrs: %v", err) + // If phdrs were specified, they should all exist. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return elfInfo{}, err + } + + phdrs := make([]elf.ProgHeader, hdr.Phnum) + for i := range phdrs { + var prog64 elf.Prog64 + binary.Unmarshal(phdrBuf[:prog64Size], byteOrder, &prog64) + phdrBuf = phdrBuf[prog64Size:] + phdrs[i] = elf.ProgHeader{ + Type: elf.ProgType(prog64.Type), + Flags: elf.ProgFlag(prog64.Flags), + Off: prog64.Off, + Vaddr: prog64.Vaddr, + Paddr: prog64.Paddr, + Filesz: prog64.Filesz, + Memsz: prog64.Memsz, + Align: prog64.Align, + } + } + + return elfInfo{ + os: os, + arch: a, + entry: usermem.Addr(hdr.Entry), + phdrs: phdrs, + phdrOff: hdr.Phoff, + phdrSize: prog64Size, + sharedObject: sharedObject, + }, nil +} + +// mapSegment maps a phdr into the Task. offset is the offset to apply to +// phdr.Vaddr. +func mapSegment(ctx context.Context, m *mm.MemoryManager, f *fs.File, phdr *elf.ProgHeader, offset usermem.Addr) error { + // Alignment of vaddr and offset must match. We'll need to map on the + // page boundary. + adjust := usermem.Addr(phdr.Vaddr).PageOffset() + if adjust != usermem.Addr(phdr.Off).PageOffset() { + ctx.Infof("Alignment of vaddr %#x != off %#x", phdr.Vaddr, phdr.Off) + return syserror.ENOEXEC + } + + addr, ok := offset.AddLength(phdr.Vaddr) + if !ok { + // If offset != 0 we should have ensured this would fit. + ctx.Warningf("Computed segment load address overflows: %#x + %#x", phdr.Vaddr, offset) + return syserror.ENOEXEC + } + addr -= usermem.Addr(adjust) + + fileOffset := phdr.Off - adjust + fileSize := phdr.Filesz + adjust + if fileSize < phdr.Filesz { + ctx.Infof("Computed segment file size overflows: %#x + %#x", phdr.Filesz, adjust) + return syserror.ENOEXEC + } + memSize := phdr.Memsz + adjust + if memSize < phdr.Memsz { + ctx.Infof("Computed segment mem size overflows: %#x + %#x", phdr.Memsz, adjust) + return syserror.ENOEXEC + } + ms, ok := usermem.Addr(fileSize).RoundUp() + if !ok { + ctx.Infof("fileSize %#x too large", fileSize) + return syserror.ENOEXEC + } + mapSize := uint64(ms) + + prot := progFlagsAsPerms(phdr.Flags) + mopts := memmap.MMapOpts{ + Length: mapSize, + Offset: fileOffset, + Addr: addr, + Fixed: true, + // Linux will happily allow conflicting segments to map over + // one another. + Unmap: true, + Private: true, + Perms: prot, + MaxPerms: usermem.AnyAccess, + } + if err := f.ConfigureMMap(ctx, &mopts); err != nil { + ctx.Infof("File is not memory-mappable: %v", err) + return err + } + if _, err := m.MMap(ctx, mopts); err != nil { + ctx.Infof("Error mapping PT_LOAD segment %+v at %#x: %v", phdr, addr, err) + return err + } + + // We need to clear the end of the last page that exceeds fileSize so + // we don't map part of the file beyond fileSize. + // + // Note that Linux *does not* clear the portion of the first page + // before phdr.Off. + if mapSize > fileSize { + zeroAddr, ok := addr.AddLength(fileSize) + if !ok { + panic(fmt.Sprintf("successfully mmaped address overflows? %#x + %#x", addr, fileSize)) + } + zeroSize := int64(mapSize - fileSize) + if zeroSize < 0 { + panic(fmt.Sprintf("zeroSize too big? %#x", uint64(zeroSize))) + } + if _, err := m.ZeroOut(ctx, zeroAddr, zeroSize, usermem.IOOpts{IgnorePermissions: true}); err != nil { + ctx.Warningf("Failed to zero end of page [%#x, %#x): %v", zeroAddr, zeroAddr+usermem.Addr(zeroSize), err) + return err + } + } + + // Allocate more anonymous pages if necessary. + if mapSize < memSize { + anonAddr, ok := addr.AddLength(mapSize) + if !ok { + panic(fmt.Sprintf("anonymous memory doesn't fit in pre-sized range? %#x + %#x", addr, mapSize)) + } + anonSize, ok := usermem.Addr(memSize - mapSize).RoundUp() + if !ok { + ctx.Infof("extra anon pages too large: %#x", memSize-mapSize) + return syserror.ENOEXEC + } + + if _, err := m.MMap(ctx, memmap.MMapOpts{ + Length: uint64(anonSize), + Addr: anonAddr, + // Fixed without Unmap will fail the mmap if something is + // already at addr. + Fixed: true, + Private: true, + Perms: progFlagsAsPerms(phdr.Flags), + MaxPerms: usermem.AnyAccess, + }); err != nil { + ctx.Infof("Error mapping PT_LOAD segment %v anonymous memory: %v", phdr, err) + return err + } + } + + return nil +} + +// loadedELF describes an ELF that has been successfully loaded. +type loadedELF struct { + // os is the target OS of the ELF. + os abi.OS + + // arch is the target architecture of the ELF. + arch arch.Arch + + // entry is the entry point of the ELF. + entry usermem.Addr + + // start is the end of the ELF. + start usermem.Addr + + // end is the end of the ELF. + end usermem.Addr + + // interpter is the path to the ELF interpreter. + interpreter string + + // phdrAddr is the address of the ELF program headers. + phdrAddr usermem.Addr + + // phdrSize is the size of a single program header in the ELF. + phdrSize int + + // phdrNum is the number of program headers. + phdrNum int + + // auxv contains a subset of ELF-specific auxiliary vector entries: + // * AT_PHDR + // * AT_PHENT + // * AT_PHNUM + // * AT_BASE + // * AT_ENTRY + auxv arch.Auxv +} + +// loadParsedELF loads f into mm. +// +// info is the parsed elfInfo from the header. +// +// It does not load the ELF interpreter, or return any auxv entries. +// +// Preconditions: +// * f is an ELF file +func loadParsedELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, info elfInfo, sharedLoadOffset usermem.Addr) (loadedELF, error) { + first := true + var start, end usermem.Addr + var interpreter string + for _, phdr := range info.phdrs { + switch phdr.Type { + case elf.PT_LOAD: + vaddr := usermem.Addr(phdr.Vaddr) + if first { + first = false + start = vaddr + } + if vaddr < end { + ctx.Infof("PT_LOAD headers out-of-order. %#x < %#x", vaddr, end) + return loadedELF{}, syserror.ENOEXEC + } + var ok bool + end, ok = vaddr.AddLength(phdr.Memsz) + if !ok { + ctx.Infof("PT_LOAD header size overflows. %#x + %#x", vaddr, phdr.Memsz) + return loadedELF{}, syserror.ENOEXEC + } + + case elf.PT_INTERP: + if phdr.Filesz > syscall.PathMax { + ctx.Infof("PT_INTERP path too big: %v", phdr.Filesz) + return loadedELF{}, syserror.ENOEXEC + } + + path := make([]byte, phdr.Filesz) + _, err := readFull(ctx, f, usermem.BytesIOSequence(path), int64(phdr.Off)) + if err != nil { + ctx.Infof("Error reading PT_INTERP path: %v", err) + // If an interpreter was specified, it should exist. + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = syserror.ENOEXEC + } + return loadedELF{}, syserror.ENOEXEC + } + + if path[len(path)-1] != 0 { + ctx.Infof("PT_INTERP path not NUL-terminated: %v", path) + return loadedELF{}, syserror.ENOEXEC + } + + // Strip NUL-terminator from string. + interpreter = string(path[:len(path)-1]) + } + } + + // Shared objects don't have fixed load addresses. We need to pick a + // base address big enough to fit all segments, so we first create a + // mapping for the total size just to find a region that is big enough. + // + // It is safe to unmap it immediately with racing with another mapping + // because we are the only one in control of the MemoryManager. + // + // Note that the vaddr of the first PT_LOAD segment is ignored when + // choosing the load address (even if it is non-zero). The vaddr does + // become an offset from that load address. + var offset usermem.Addr + if info.sharedObject { + totalSize := end - start + totalSize, ok := totalSize.RoundUp() + if !ok { + ctx.Infof("ELF PT_LOAD segments too big") + return loadedELF{}, syserror.ENOEXEC + } + + var err error + offset, err = m.MMap(ctx, memmap.MMapOpts{ + Length: uint64(totalSize), + Addr: sharedLoadOffset, + Private: true, + }) + if err != nil { + ctx.Infof("Error allocating address space for shared object: %v", err) + return loadedELF{}, err + } + if err := m.MUnmap(ctx, offset, uint64(totalSize)); err != nil { + panic(fmt.Sprintf("Failed to unmap base address: %v", err)) + } + + start, ok = start.AddLength(uint64(offset)) + if !ok { + panic(fmt.Sprintf("Start %#x + offset %#x overflows?", start, offset)) + } + + end, ok = end.AddLength(uint64(offset)) + if !ok { + panic(fmt.Sprintf("End %#x + offset %#x overflows?", end, offset)) + } + + info.entry, ok = info.entry.AddLength(uint64(offset)) + if !ok { + ctx.Infof("Entrypoint %#x + offset %#x overflows? Is the entrypoint within a segment?", info.entry, offset) + return loadedELF{}, err + } + } + + // Map PT_LOAD segments. + for _, phdr := range info.phdrs { + switch phdr.Type { + case elf.PT_LOAD: + if phdr.Memsz == 0 { + // No need to load segments with size 0, but + // they exist in some binaries. + continue + } + + if err := mapSegment(ctx, m, f, &phdr, offset); err != nil { + ctx.Infof("Failed to map PT_LOAD segment: %+v", phdr) + return loadedELF{}, err + } + } + } + + // This assumes that the first segment contains the ELF headers. This + // may not be true in a malformed ELF, but Linux makes the same + // assumption. + phdrAddr, ok := start.AddLength(info.phdrOff) + if !ok { + ctx.Warningf("ELF start address %#x + phdr offset %#x overflows", start, info.phdrOff) + phdrAddr = 0 + } + + return loadedELF{ + os: info.os, + arch: info.arch, + entry: info.entry, + start: start, + end: end, + interpreter: interpreter, + phdrAddr: phdrAddr, + phdrSize: info.phdrSize, + phdrNum: len(info.phdrs), + }, nil +} + +// loadInitialELF loads f into mm. +// +// It creates an arch.Context for the ELF and prepares the mm for this arch. +// +// It does not load the ELF interpreter, or return any auxv entries. +// +// Preconditions: +// * f is an ELF file +// * f is the first ELF loaded into m +func loadInitialELF(ctx context.Context, m *mm.MemoryManager, fs *cpuid.FeatureSet, f *fs.File) (loadedELF, arch.Context, error) { + info, err := parseHeader(ctx, f) + if err != nil { + ctx.Infof("Failed to parse initial ELF: %v", err) + return loadedELF{}, nil, err + } + + // Create the arch.Context now so we can prepare the mmap layout before + // mapping anything. + ac := arch.New(info.arch, fs) + + l, err := m.SetMmapLayout(ac, limits.FromContext(ctx)) + if err != nil { + ctx.Warningf("Failed to set mmap layout: %v", err) + return loadedELF{}, nil, err + } + + // PIELoadAddress tries to move the ELF out of the way of the default + // mmap base to ensure that the initial brk has sufficient space to + // grow. + le, err := loadParsedELF(ctx, m, f, info, ac.PIELoadAddress(l)) + return le, ac, err +} + +// loadInterpreterELF loads f into mm. +// +// The interpreter must be for the same OS/Arch as the initial ELF. +// +// It does not return any auxv entries. +// +// Preconditions: +// * f is an ELF file +func loadInterpreterELF(ctx context.Context, m *mm.MemoryManager, f *fs.File, initial loadedELF) (loadedELF, error) { + info, err := parseHeader(ctx, f) + if err != nil { + if err == syserror.ENOEXEC { + // Bad interpreter. + err = syserror.ELIBBAD + } + return loadedELF{}, err + } + + if info.os != initial.os { + ctx.Infof("Initial ELF OS %v and interpreter ELF OS %v differ", initial.os, info.os) + return loadedELF{}, syserror.ELIBBAD + } + if info.arch != initial.arch { + ctx.Infof("Initial ELF arch %v and interpreter ELF arch %v differ", initial.arch, info.arch) + return loadedELF{}, syserror.ELIBBAD + } + + // The interpreter is not given a load offset, as its location does not + // affect brk. + return loadParsedELF(ctx, m, f, info, 0) +} + +// loadELF loads f into the Task address space. +// +// If loadELF returns ErrSwitchFile it should be called again with the returned +// path and argv. +// +// Preconditions: +// * f is an ELF file +func loadELF(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals uint, fs *cpuid.FeatureSet, f *fs.File) (loadedELF, arch.Context, error) { + bin, ac, err := loadInitialELF(ctx, m, fs, f) + if err != nil { + ctx.Infof("Error loading binary: %v", err) + return loadedELF{}, nil, err + } + + var interp loadedELF + if bin.interpreter != "" { + d, i, err := openPath(ctx, mounts, root, wd, maxTraversals, bin.interpreter) + if err != nil { + ctx.Infof("Error opening interpreter %s: %v", bin.interpreter, err) + return loadedELF{}, nil, err + } + defer i.DecRef() + // We don't need the Dirent. + d.DecRef() + + interp, err = loadInterpreterELF(ctx, m, i, bin) + if err != nil { + ctx.Infof("Error loading interpreter: %v", err) + return loadedELF{}, nil, err + } + + if interp.interpreter != "" { + // No recursive interpreters! + ctx.Infof("Interpreter requires an interpreter") + return loadedELF{}, nil, syserror.ENOEXEC + } + } + + // ELF-specific auxv entries. + bin.auxv = arch.Auxv{ + arch.AuxEntry{linux.AT_PHDR, bin.phdrAddr}, + arch.AuxEntry{linux.AT_PHENT, usermem.Addr(bin.phdrSize)}, + arch.AuxEntry{linux.AT_PHNUM, usermem.Addr(bin.phdrNum)}, + arch.AuxEntry{linux.AT_ENTRY, bin.entry}, + } + if bin.interpreter != "" { + bin.auxv = append(bin.auxv, arch.AuxEntry{linux.AT_BASE, interp.start}) + + // Start in the interpreter. + // N.B. AT_ENTRY above contains the *original* entry point. + bin.entry = interp.entry + } + + return bin, ac, nil +} diff --git a/pkg/sentry/loader/interpreter.go b/pkg/sentry/loader/interpreter.go new file mode 100644 index 000000000..b8ecbe92f --- /dev/null +++ b/pkg/sentry/loader/interpreter.go @@ -0,0 +1,105 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "bytes" + "io" + + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +const ( + // interpreterScriptMagic identifies an interpreter script. + interpreterScriptMagic = "#!" + + // interpMaxLineLength is the maximum length for the first line of an + // interpreter script. + // + // From execve(2): "A maximum line length of 127 characters is allowed + // for the first line in a #! executable shell script." + interpMaxLineLength = 127 +) + +// parseInterpreterScript returns the interpreter path and argv. +func parseInterpreterScript(ctx context.Context, filename string, f *fs.File, argv, envv []string) (newpath string, newargv []string, err error) { + line := make([]byte, interpMaxLineLength) + n, err := readFull(ctx, f, usermem.BytesIOSequence(line), 0) + // Short read is OK. + if err != nil && err != io.ErrUnexpectedEOF { + if err == io.EOF { + err = syserror.ENOEXEC + } + return "", []string{}, err + } + line = line[:n] + + if !bytes.Equal(line[:2], []byte(interpreterScriptMagic)) { + return "", []string{}, syserror.ENOEXEC + } + // Ignore #!. + line = line[2:] + + // Ignore everything after newline. + // Linux silently truncates the remainder of the line if it exceeds + // interpMaxLineLength. + i := bytes.IndexByte(line, '\n') + if i > 0 { + line = line[:i] + } + + // Skip any whitespace before the interpeter. + line = bytes.TrimLeft(line, " \t") + + // Linux only looks for a space or tab delimiting the interpreter and + // arg. + // + // execve(2): "On Linux, the entire string following the interpreter + // name is passed as a single argument to the interpreter, and this + // string can include white space." + interp := line + var arg []byte + i = bytes.IndexAny(line, " \t") + if i >= 0 { + interp = line[:i] + if i+1 < len(line) { + arg = line[i+1:] + } + } + + // Build the new argument list: + // + // 1. The interpreter. + newargv = append(newargv, string(interp)) + + // 2. The optional interpreter argument. + if len(arg) > 0 { + newargv = append(newargv, string(arg)) + } + + // 3. The original arguments. The original argv[0] is replaced with the + // full script filename. + if len(argv) > 0 { + argv[0] = filename + } else { + argv = []string{filename} + } + newargv = append(newargv, argv...) + + return string(interp), newargv, nil +} diff --git a/pkg/sentry/loader/loader.go b/pkg/sentry/loader/loader.go new file mode 100644 index 000000000..94c281b72 --- /dev/null +++ b/pkg/sentry/loader/loader.go @@ -0,0 +1,277 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package loader loads a binary into a MemoryManager. +package loader + +import ( + "bytes" + "crypto/rand" + "io" + "path" + + "gvisor.googlesource.com/gvisor/pkg/abi" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" + "gvisor.googlesource.com/gvisor/pkg/cpuid" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" +) + +// readFull behaves like io.ReadFull for an *fs.File. +func readFull(ctx context.Context, f *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + var total int64 + for dst.NumBytes() > 0 { + n, err := f.Preadv(ctx, dst, offset+total) + total += n + if err == io.EOF && total != 0 { + return total, io.ErrUnexpectedEOF + } else if err != nil { + return total, err + } + dst = dst.DropFirst64(n) + } + return total, nil +} + +// openPath opens name for loading. +// +// openPath returns the fs.Dirent and an *fs.File for name, which is not +// installed in the Task FDMap. The caller takes ownership of both. +// +// name must be a readable, executable, regular file. +func openPath(ctx context.Context, mm *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals uint, name string) (*fs.Dirent, *fs.File, error) { + d, err := mm.FindInode(ctx, root, wd, name, maxTraversals) + if err != nil { + return nil, nil, err + } + defer d.DecRef() + + perms := fs.PermMask{ + // TODO: Linux requires only execute permission, + // not read. However, our backing filesystems may prevent us + // from reading the file without read permission. + // + // Additionally, a task with a non-readable executable has + // additional constraints on access via ptrace and procfs. + Read: true, + Execute: true, + } + if err := d.Inode.CheckPermission(ctx, perms); err != nil { + return nil, nil, err + } + + // If they claim it's a directory, then make sure. + // + // N.B. we reject directories below, but we must first reject + // non-directories passed as directories. + if len(name) > 0 && name[len(name)-1] == '/' && !fs.IsDir(d.Inode.StableAttr) { + return nil, nil, syserror.ENOTDIR + } + + // No exec-ing directories, pipes, etc! + if !fs.IsRegular(d.Inode.StableAttr) { + ctx.Infof("Error regularing %s: %v", name, d.Inode.StableAttr) + return nil, nil, syserror.EACCES + } + + // Create a new file. + file, err := d.Inode.GetFile(ctx, d, fs.FileFlags{Read: true}) + if err != nil { + return nil, nil, err + } + + // We must be able to read at arbitrary offsets. + if !file.Flags().Pread { + file.DecRef() + ctx.Infof("%s cannot be read at an offset: %+v", name, file.Flags()) + return nil, nil, syserror.EACCES + } + + // Grab a reference for the caller. + d.IncRef() + return d, file, nil +} + +// allocStack allocates and maps a stack in to any available part of the address space. +func allocStack(ctx context.Context, m *mm.MemoryManager, a arch.Context) (*arch.Stack, error) { + ar, err := m.MapStack(ctx) + if err != nil { + return nil, err + } + return &arch.Stack{a, m, ar.End}, nil +} + +const ( + // maxLoaderAttempts is the maximum number of attempts to try to load + // an interpreter scripts, to prevent loops. 6 (inital + 5 changes) is + // what the Linux kernel allows (fs/exec.c:search_binary_handler). + maxLoaderAttempts = 6 +) + +// loadPath resolves filename to a binary and loads it. +// +// It returns: +// * loadedELF, description of the loaded binary +// * arch.Context matching the binary arch +// * fs.Dirent of the binary file +// * Possibly updated argv +func loadPath(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals uint, fs *cpuid.FeatureSet, filename string, argv, envv []string) (loadedELF, arch.Context, *fs.Dirent, []string, error) { + for i := 0; i < maxLoaderAttempts; i++ { + d, f, err := openPath(ctx, mounts, root, wd, maxTraversals, filename) + if err != nil { + ctx.Infof("Error opening %s: %v", filename, err) + return loadedELF{}, nil, nil, nil, err + } + defer f.DecRef() + // We will return d in the successful case, but defer a DecRef + // for intermediate loops and failure cases. + defer d.DecRef() + + // Check the header. Is this an ELF or interpreter script? + var hdr [4]uint8 + // N.B. We assume that reading from a regular file cannot block. + _, err = readFull(ctx, f, usermem.BytesIOSequence(hdr[:]), 0) + // Allow unexpected EOF, as a valid executable could be only three + // bytes (e.g., #!a). + if err != nil && err != io.ErrUnexpectedEOF { + if err == io.EOF { + err = syserror.ENOEXEC + } + return loadedELF{}, nil, nil, nil, err + } + + switch { + case bytes.Equal(hdr[:], []byte(elfMagic)): + loaded, ac, err := loadELF(ctx, m, mounts, root, wd, maxTraversals, fs, f) + if err != nil { + ctx.Infof("Error loading ELF: %v", err) + return loadedELF{}, nil, nil, nil, err + } + // An ELF is always terminal. Hold on to d. + d.IncRef() + return loaded, ac, d, argv, err + case bytes.Equal(hdr[:2], []byte(interpreterScriptMagic)): + newpath, newargv, err := parseInterpreterScript(ctx, filename, f, argv, envv) + if err != nil { + ctx.Infof("Error loading interpreter script: %v", err) + return loadedELF{}, nil, nil, nil, err + } + filename = newpath + argv = newargv + default: + ctx.Infof("Unknown magic: %v", hdr) + return loadedELF{}, nil, nil, nil, syserror.ENOEXEC + } + } + + return loadedELF{}, nil, nil, nil, syserror.ELOOP +} + +// Load loads filename into a MemoryManager. +// +// If Load returns ErrSwitchFile it should be called again with the returned +// path and argv. +// +// Preconditions: +// * The Task MemoryManager is empty. +// * Load is called on the Task goroutine. +func Load(ctx context.Context, m *mm.MemoryManager, mounts *fs.MountNamespace, root, wd *fs.Dirent, maxTraversals uint, fs *cpuid.FeatureSet, filename string, argv, envv []string, extraAuxv []arch.AuxEntry, vdso *VDSO) (abi.OS, arch.Context, string, error) { + // Load the binary itself. + loaded, ac, d, argv, err := loadPath(ctx, m, mounts, root, wd, maxTraversals, fs, filename, argv, envv) + if err != nil { + ctx.Infof("Failed to load %s: %v", filename, err) + return 0, nil, "", err + } + defer d.DecRef() + + // Load the VDSO. + vdsoAddr, err := loadVDSO(ctx, m, vdso, loaded) + if err != nil { + ctx.Infof("Error loading VDSO: %v", err) + return 0, nil, "", err + } + + // Setup the heap. brk starts at the next page after the end of the + // binary. Userspace can assume that the remainer of the page after + // loaded.end is available for its use. + e, ok := loaded.end.RoundUp() + if !ok { + ctx.Warningf("brk overflows: %#x", loaded.end) + return 0, nil, "", syserror.ENOEXEC + } + m.BrkSetup(ctx, e) + + // Allocate our stack. + stack, err := allocStack(ctx, m, ac) + if err != nil { + ctx.Infof("Failed to allocate stack: %v", err) + return 0, nil, "", err + } + + // Push the original filename to the stack, for AT_EXECFN. + execfn, err := stack.Push(filename) + if err != nil { + ctx.Infof("Failed to push exec filename: %v", err) + return 0, nil, "", err + } + + // Push 16 random bytes on the stack which AT_RANDOM will point to. + var b [16]byte + if _, err := rand.Read(b[:]); err != nil { + ctx.Infof("Failed to read random bytes: %v", err) + return 0, nil, "", err + } + random, err := stack.Push(b) + if err != nil { + ctx.Infof("Failed to push random bytes: %v", err) + return 0, nil, "", err + } + + // Add generic auxv entries + auxv := append(loaded.auxv, arch.Auxv{ + arch.AuxEntry{linux.AT_CLKTCK, linux.CLOCKS_PER_SEC}, + arch.AuxEntry{linux.AT_EXECFN, execfn}, + arch.AuxEntry{linux.AT_RANDOM, random}, + arch.AuxEntry{linux.AT_PAGESZ, usermem.PageSize}, + arch.AuxEntry{linux.AT_SYSINFO_EHDR, vdsoAddr}, + }...) + auxv = append(auxv, extraAuxv...) + + sl, err := stack.Load(argv, envv, auxv) + if err != nil { + ctx.Infof("Failed to load stack: %v", err) + return 0, nil, "", err + } + + m.SetArgvStart(sl.ArgvStart) + m.SetArgvEnd(sl.ArgvEnd) + m.SetEnvvStart(sl.EnvvStart) + m.SetEnvvEnd(sl.EnvvEnd) + m.SetAuxv(auxv) + m.SetExecutable(d) + + ac.SetIP(uintptr(loaded.entry)) + ac.SetStack(uintptr(stack.Bottom)) + + name := path.Base(filename) + if len(name) > linux.TASK_COMM_LEN-1 { + name = name[:linux.TASK_COMM_LEN-1] + } + + return loaded.os, ac, name, nil +} diff --git a/pkg/sentry/loader/vdso.go b/pkg/sentry/loader/vdso.go new file mode 100644 index 000000000..ce4f6f5d9 --- /dev/null +++ b/pkg/sentry/loader/vdso.go @@ -0,0 +1,382 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "debug/elf" + "fmt" + "io" + + "gvisor.googlesource.com/gvisor/pkg/abi" + "gvisor.googlesource.com/gvisor/pkg/log" + "gvisor.googlesource.com/gvisor/pkg/sentry/arch" + "gvisor.googlesource.com/gvisor/pkg/sentry/context" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs" + "gvisor.googlesource.com/gvisor/pkg/sentry/fs/fsutil" + "gvisor.googlesource.com/gvisor/pkg/sentry/memmap" + "gvisor.googlesource.com/gvisor/pkg/sentry/mm" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform" + "gvisor.googlesource.com/gvisor/pkg/sentry/safemem" + "gvisor.googlesource.com/gvisor/pkg/sentry/uniqueid" + "gvisor.googlesource.com/gvisor/pkg/sentry/usage" + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" + "gvisor.googlesource.com/gvisor/pkg/syserror" + "gvisor.googlesource.com/gvisor/pkg/waiter" +) + +// byteReaderFileOperations implements fs.FileOperations for reading +// from a []byte source. +type byteReader struct { + fsutil.NoopRelease + fsutil.PipeSeek + fsutil.NotDirReaddir + fsutil.NoFsync + fsutil.NoopFlush + fsutil.NoMMap + fsutil.NoIoctl + waiter.AlwaysReady + data []byte +} + +type fileContext struct { + context.Context +} + +func (f *fileContext) Value(key interface{}) interface{} { + switch key { + case uniqueid.CtxGlobalUniqueID: + return uint64(0) + default: + return f.Context.Value(key) + } +} + +func newByteReaderFile(data []byte) *fs.File { + dirent := fs.NewTransientDirent(nil) + flags := fs.FileFlags{Read: true, Pread: true} + return fs.NewFile(&fileContext{Context: context.Background()}, dirent, flags, &byteReader{ + data: data, + }) +} + +func (b *byteReader) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { + if offset < 0 { + return 0, syserror.EINVAL + } + if offset >= int64(len(b.data)) { + return 0, io.EOF + } + n, err := dst.CopyOut(ctx, b.data[offset:]) + return int64(n), err +} + +func (b *byteReader) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { + panic("Write not supported") +} + +// validateVDSO checks that the VDSO can be loaded by loadVDSO. +// +// VDSOs are special (see below). Since we are going to map the VDSO directly +// rather than using a normal loading process, we require that the PT_LOAD +// segments have the same layout in the ELF as they expect to have in memory. +// +// Namely, this means that we must verify: +// * PT_LOAD file offsets are equivalent to the memory offset from the first +// segment. +// * No extra zeroed space (memsz) is required. +// * PT_LOAD segments are in order. +// * No two PT_LOAD segments occupy parts of the same page. +// * PT_LOAD segments don't extend beyond the end of the file. +// +// ctx may be nil if f does not need it. +func validateVDSO(ctx context.Context, f *fs.File, size uint64) (elfInfo, error) { + info, err := parseHeader(ctx, f) + if err != nil { + log.Infof("Unable to parse VDSO header: %v", err) + return elfInfo{}, err + } + + var first *elf.ProgHeader + var prev *elf.ProgHeader + var prevEnd usermem.Addr + for i, phdr := range info.phdrs { + if phdr.Type != elf.PT_LOAD { + continue + } + + if first == nil { + first = &info.phdrs[i] + if phdr.Off != 0 { + log.Warningf("First PT_LOAD segment has non-zero file offset") + return elfInfo{}, syserror.ENOEXEC + } + } + + memoryOffset := phdr.Vaddr - first.Vaddr + if memoryOffset != phdr.Off { + log.Warningf("PT_LOAD segment memory offset %#x != file offset %#x", memoryOffset, phdr.Off) + return elfInfo{}, syserror.ENOEXEC + } + + // memsz larger than filesz means that extra zeroed space should be + // provided at the end of the segment. Since we are mapping the ELF + // directly, we don't want to just overwrite part of the ELF with + // zeroes. + if phdr.Memsz != phdr.Filesz { + log.Warningf("PT_LOAD segment memsz %#x != filesz %#x", phdr.Memsz, phdr.Filesz) + return elfInfo{}, syserror.ENOEXEC + } + + start := usermem.Addr(memoryOffset) + end, ok := start.AddLength(phdr.Memsz) + if !ok { + log.Warningf("PT_LOAD segment size overflows: %#x + %#x", start, end) + return elfInfo{}, syserror.ENOEXEC + } + if uint64(end) > size { + log.Warningf("PT_LOAD segment end %#x extends beyond end of file %#x", end, size) + return elfInfo{}, syserror.ENOEXEC + } + + if prev != nil { + if start < prevEnd { + log.Warningf("PT_LOAD segments out of order") + return elfInfo{}, syserror.ENOEXEC + } + + // We mprotect entire pages, so each segment must be in + // its own page. + prevEndPage := prevEnd.RoundDown() + startPage := start.RoundDown() + if prevEndPage >= startPage { + log.Warningf("PT_LOAD segments share a page: %#x", prevEndPage) + return elfInfo{}, syserror.ENOEXEC + } + } + prev = &info.phdrs[i] + prevEnd = end + } + + return info, nil +} + +// VDSO describes a VDSO. +// +// NOTE: to support multiple architectures or operating systems, this +// would need to contain a VDSO for each. +type VDSO struct { + // ParamPage is the VDSO parameter page. This page should be updated to + // inform the VDSO for timekeeping data. + ParamPage *mm.SpecialMappable + + // vdso is the VDSO ELF itself. + vdso *mm.SpecialMappable + + // os is the operating system targeted by the VDSO. + os abi.OS + + // arch is the architecture targeted by the VDSO. + arch arch.Arch + + // phdrs are the VDSO ELF phdrs. + phdrs []elf.ProgHeader `state:".([]elfProgHeader)"` +} + +// PrepareVDSO validates the system VDSO and returns a VDSO, containing the +// param page for updating by the kernel. +func PrepareVDSO(p platform.Platform) (*VDSO, error) { + vdsoFile := newByteReaderFile(vdsoBin) + + // First make sure the VDSO is valid. vdsoFile does not use ctx, so a + // nil context can be passed. + info, err := validateVDSO(nil, vdsoFile, uint64(len(vdsoBin))) + if err != nil { + return nil, err + } + + // Then copy it into a VDSO mapping. + size, ok := usermem.Addr(len(vdsoBin)).RoundUp() + if !ok { + return nil, fmt.Errorf("VDSO size overflows? %#x", len(vdsoBin)) + } + + vdso, err := p.Memory().Allocate(uint64(size), usage.System) + if err != nil { + return nil, fmt.Errorf("unable to allocate VDSO memory: %v", err) + } + + ims, err := p.Memory().MapInternal(vdso, usermem.ReadWrite) + if err != nil { + p.Memory().DecRef(vdso) + return nil, fmt.Errorf("unable to map VDSO memory: %v", err) + } + + _, err = safemem.CopySeq(ims, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(vdsoBin))) + if err != nil { + p.Memory().DecRef(vdso) + return nil, fmt.Errorf("unable to copy VDSO into memory: %v", err) + } + + // Finally, allocate a param page for this VDSO. + paramPage, err := p.Memory().Allocate(usermem.PageSize, usage.System) + if err != nil { + p.Memory().DecRef(vdso) + return nil, fmt.Errorf("unable to allocate VDSO param page: %v", err) + } + + return &VDSO{ + ParamPage: mm.NewSpecialMappable("[vvar]", p, paramPage), + // TODO: Don't advertise the VDSO, as some applications may + // not be able to handle multiple [vdso] hints. + vdso: mm.NewSpecialMappable("", p, vdso), + phdrs: info.phdrs, + }, nil +} + +// loadVDSO loads the VDSO into m. +// +// VDSOs are special. +// +// VDSOs are fully position independent. However, instead of loading a VDSO +// like a normal ELF binary, mapping only the PT_LOAD segments, the Linux +// kernel simply directly maps the entire file into process memory, with very +// little real ELF parsing. +// +// NOTE: This means that userspace can, and unfortunately does, +// depend on parts of the ELF that would normally not be mapped. To maintain +// compatibility with such binaries, we load the VDSO much like Linux. +// +// loadVDSO takes a reference on the VDSO and parameter page FrameRegions. +func loadVDSO(ctx context.Context, m *mm.MemoryManager, v *VDSO, bin loadedELF) (usermem.Addr, error) { + if v == nil { + // Should be used only by tests. + ctx.Warningf("No VDSO provided, skipping VDSO mapping") + return 0, nil + } + + if v.os != bin.os { + ctx.Warningf("Binary ELF OS %v and VDSO ELF OS %v differ", bin.os, v.os) + return 0, syserror.ENOEXEC + } + if v.arch != bin.arch { + ctx.Warningf("Binary ELF arch %v and VDSO ELF arch %v differ", bin.arch, v.arch) + return 0, syserror.ENOEXEC + } + + // Reserve address space for the VDSO and its parameter page, which is + // mapped just before the VDSO. + mapSize := v.vdso.Length() + v.ParamPage.Length() + addr, err := m.MMap(ctx, memmap.MMapOpts{ + Length: mapSize, + Private: true, + }) + if err != nil { + ctx.Infof("Unable to reserve VDSO address space: %v", err) + return 0, err + } + + // Now map the param page. + _, err = m.MMap(ctx, memmap.MMapOpts{ + Length: v.ParamPage.Length(), + MappingIdentity: v.ParamPage, + Mappable: v.ParamPage, + Addr: addr, + Fixed: true, + Unmap: true, + Private: true, + Perms: usermem.Read, + MaxPerms: usermem.Read, + }) + if err != nil { + ctx.Infof("Unable to map VDSO param page: %v", err) + return 0, err + } + + // Now map the VDSO itself. + vdsoAddr, ok := addr.AddLength(v.ParamPage.Length()) + if !ok { + panic(fmt.Sprintf("Part of mapped range overflows? %#x + %#x", addr, v.ParamPage.Length())) + } + _, err = m.MMap(ctx, memmap.MMapOpts{ + Length: v.vdso.Length(), + MappingIdentity: v.vdso, + Mappable: v.vdso, + Addr: vdsoAddr, + Fixed: true, + Unmap: true, + Private: true, + Perms: usermem.Read, + MaxPerms: usermem.AnyAccess, + }) + if err != nil { + ctx.Infof("Unable to map VDSO: %v", err) + return 0, err + } + + vdsoEnd, ok := vdsoAddr.AddLength(v.vdso.Length()) + if !ok { + panic(fmt.Sprintf("VDSO mapping overflows? %#x + %#x", vdsoAddr, v.vdso.Length())) + } + + // Set additional protections for the individual segments. + var first *elf.ProgHeader + for i, phdr := range v.phdrs { + if phdr.Type != elf.PT_LOAD { + continue + } + + if first == nil { + first = &v.phdrs[i] + } + + memoryOffset := phdr.Vaddr - first.Vaddr + segAddr, ok := vdsoAddr.AddLength(memoryOffset) + if !ok { + ctx.Warningf("PT_LOAD segment address overflows: %#x + %#x", segAddr, memoryOffset) + return 0, syserror.ENOEXEC + } + segPage := segAddr.RoundDown() + segSize := usermem.Addr(phdr.Memsz) + segSize, ok = segSize.AddLength(segAddr.PageOffset()) + if !ok { + ctx.Warningf("PT_LOAD segment memsize %#x + offset %#x overflows", phdr.Memsz, segAddr.PageOffset()) + return 0, syserror.ENOEXEC + } + segSize, ok = segSize.RoundUp() + if !ok { + ctx.Warningf("PT_LOAD segment size overflows: %#x", phdr.Memsz+segAddr.PageOffset()) + return 0, syserror.ENOEXEC + } + segEnd, ok := segPage.AddLength(uint64(segSize)) + if !ok { + ctx.Warningf("PT_LOAD segment range overflows: %#x + %#x", segAddr, segSize) + return 0, syserror.ENOEXEC + } + if segEnd > vdsoEnd { + ctx.Warningf("PT_LOAD segment ends beyond VDSO: %#x > %#x", segEnd, vdsoEnd) + return 0, syserror.ENOEXEC + } + + perms := progFlagsAsPerms(phdr.Flags) + if perms != usermem.Read { + if err := m.MProtect(segPage, uint64(segSize), perms, false); err != nil { + ctx.Warningf("Unable to set PT_LOAD segment protections %+v at [%#x, %#x): %v", perms, segAddr, segEnd, err) + return 0, syserror.ENOEXEC + } + } + } + + return vdsoAddr, nil +} diff --git a/pkg/sentry/loader/vdso_state.go b/pkg/sentry/loader/vdso_state.go new file mode 100644 index 000000000..92004ad9e --- /dev/null +++ b/pkg/sentry/loader/vdso_state.go @@ -0,0 +1,47 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package loader + +import ( + "debug/elf" +) + +type elfProgHeader struct { + Type elf.ProgType + Flags elf.ProgFlag + Off uint64 + Vaddr uint64 + Paddr uint64 + Filesz uint64 + Memsz uint64 + Align uint64 +} + +// savePhdrs is invoked by stateify. +func (v *VDSO) savePhdrs() []elfProgHeader { + s := make([]elfProgHeader, 0, len(v.phdrs)) + for _, h := range v.phdrs { + s = append(s, elfProgHeader(h)) + } + return s +} + +// loadPhdrs is invoked by stateify. +func (v *VDSO) loadPhdrs(s []elfProgHeader) { + v.phdrs = make([]elf.ProgHeader, 0, len(s)) + for _, h := range s { + v.phdrs = append(v.phdrs, elf.ProgHeader(h)) + } +} |