diff options
author | Googler <noreply@google.com> | 2018-04-27 10:37:02 -0700 |
---|---|---|
committer | Adin Scannell <ascannell@google.com> | 2018-04-28 01:44:26 -0400 |
commit | d02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch) | |
tree | 54f95eef73aee6bacbfc736fffc631be2605ed53 /vdso | |
parent | f70210e742919f40aa2f0934a22f1c9ba6dada62 (diff) |
Check in gVisor.
PiperOrigin-RevId: 194583126
Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'vdso')
-rw-r--r-- | vdso/BUILD | 57 | ||||
-rw-r--r-- | vdso/barrier.h | 35 | ||||
-rw-r--r-- | vdso/check_vdso.py | 204 | ||||
-rw-r--r-- | vdso/compiler.h | 29 | ||||
-rw-r--r-- | vdso/cycle_clock.h | 42 | ||||
-rw-r--r-- | vdso/seqlock.h | 39 | ||||
-rw-r--r-- | vdso/syscalls.h | 54 | ||||
-rw-r--r-- | vdso/vdso.cc | 95 | ||||
-rw-r--r-- | vdso/vdso.lds | 101 | ||||
-rw-r--r-- | vdso/vdso_time.cc | 145 | ||||
-rw-r--r-- | vdso/vdso_time.h | 27 |
11 files changed, 828 insertions, 0 deletions
diff --git a/vdso/BUILD b/vdso/BUILD new file mode 100644 index 000000000..9c4bc167e --- /dev/null +++ b/vdso/BUILD @@ -0,0 +1,57 @@ +# Description: +# This VDSO is a shared library that provides the same interfaces as the +# normal system VDSO (time, gettimeofday, clock_gettimeofday) but which uses +# timekeeping parameters managed by the sandbox kernel. + +package(licenses = ["notice"]) # Apache 2.0 + +genrule( + name = "vdso", + srcs = [ + "barrier.h", + "compiler.h", + "cycle_clock.h", + "seqlock.h", + "syscalls.h", + "vdso.cc", + "vdso.lds", + "vdso_time.h", + "vdso_time.cc", + ], + outs = [ + "vdso.so", + ], + cmd = "$(CC) $(CC_FLAGS) " + + "-I. " + + "-O2 " + + "-std=c++11 " + + "-fPIC " + + "-fuse-ld=gold " + + "-m64 " + + "-shared " + + "-nostdlib " + + "-Wl,-soname=linux-vdso.so.1 " + + "-Wl,--hash-style=sysv " + + "-Wl,--no-undefined " + + "-Wl,-Bsymbolic " + + "-Wl,-z,max-page-size=4096 " + + "-Wl,-z,common-page-size=4096 " + + "-Wl,-T$(location vdso.lds) " + + "-o $(location vdso.so) " + + "$(location vdso.cc) " + + "$(location vdso_time.cc) " + + "&& $(location :check_vdso) " + + "--check-data " + + "--vdso $(location vdso.so) ", + features = ["-pie"], + tools = [ + ":check_vdso", + ], + visibility = ["//:sandbox"], +) + +py_binary( + name = "check_vdso", + srcs = ["check_vdso.py"], + visibility = ["//:sandbox"], +) diff --git a/vdso/barrier.h b/vdso/barrier.h new file mode 100644 index 000000000..db8185b2e --- /dev/null +++ b/vdso/barrier.h @@ -0,0 +1,35 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VDSO_BARRIER_H_ +#define VDSO_BARRIER_H_ + +namespace vdso { + +// Compiler Optimization barrier. +inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); } + +#if __x86_64__ +inline void memory_barrier(void) { + __asm__ __volatile__("mfence" ::: "memory"); +} +inline void read_barrier(void) { barrier(); } +inline void write_barrier(void) { barrier(); } +#else +#error "unsupported architecture" +#endif + +} // namespace vdso + +#endif // VDSO_BARRIER_H_ diff --git a/vdso/check_vdso.py b/vdso/check_vdso.py new file mode 100644 index 000000000..f1288e0c2 --- /dev/null +++ b/vdso/check_vdso.py @@ -0,0 +1,204 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Verify VDSO ELF does not contain any relocations and is directly mmappable. +""" + +import argparse +import logging +import re +import subprocess + +PAGE_SIZE = 4096 + + +def PageRoundDown(addr): + """Rounds down to the nearest page. + + Args: + addr: An address. + + Returns: + The address rounded down to thie nearest page. + """ + return addr & ~(PAGE_SIZE - 1) + + +def Fatal(*args, **kwargs): + """Logs a critical message and exits with code 1. + + Args: + *args: Args to pass to logging.critical. + **kwargs: Keyword args to pass to logging.critical. + """ + logging.critical(*args, **kwargs) + exit(1) + + +def CheckSegments(vdso_path): + """Verifies layout of PT_LOAD segments. + + PT_LOAD segments must be laid out such that the ELF is directly mmappable. + + Specifically, check that: + * PT_LOAD file offsets are equivalent to the memory offset from the first + segment. + * No extra zeroed space (memsz) is required. + * PT_LOAD segments are in order (required for any ELF). + * No two PT_LOAD segments share part of the same page. + + The readelf line format looks like: + Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align + LOAD 0x000000 0xffffffffff700000 0xffffffffff700000 0x000e68 0x000e68 R E 0x1000 + + Args: + vdso_path: Path to VDSO binary. + """ + output = subprocess.check_output(["readelf", "-lW", vdso_path]) + lines = output.split("\n") + + segments = [] + for line in lines: + if not line.startswith(" LOAD"): + continue + + components = line.split() + + segments.append({ + "offset": int(components[1], 16), + "addr": int(components[2], 16), + "filesz": int(components[4], 16), + "memsz": int(components[5], 16), + }) + + if not segments: + Fatal("No PT_LOAD segments in VDSO") + + first = segments[0] + if first["offset"] != 0: + Fatal("First PT_LOAD segment has non-zero file offset: %s", first) + + for i, segment in enumerate(segments): + memoff = segment["addr"] - first["addr"] + if memoff != segment["offset"]: + Fatal("PT_LOAD segment has different memory and file offsets: %s", + segments) + + if segment["memsz"] != segment["filesz"]: + Fatal("PT_LOAD segment memsz != filesz: %s", segment) + + if i > 0: + last_end = segments[i-1]["addr"] + segments[i-1]["memsz"] + if segment["addr"] < last_end: + Fatal("PT_LOAD segments out of order") + + last_page = PageRoundDown(last_end) + start_page = PageRoundDown(segment["addr"]) + if last_page >= start_page: + Fatal("PT_LOAD segments share a page: %s and %s", segment, + segments[i - 1]) + + +# Matches the section name in readelf -SW output. +_SECTION_NAME_RE = re.compile(r"""^\s+\[\ ?\d+\]\s+ + (?P<name>\.\S+)\s+ + (?P<type>\S+)\s+ + (?P<addr>[0-9a-f]+)\s+ + (?P<off>[0-9a-f]+)\s+ + (?P<size>[0-9a-f]+)""", re.VERBOSE) + + +def CheckData(vdso_path): + """Verifies the VDSO contains no .data or .bss sections. + + The readelf line format looks like: + + There are 15 section headers, starting at offset 0x15f0: + + Section Headers: + [Nr] Name Type Address Off Size ES Flg Lk Inf Al + [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 + [ 1] .hash HASH ffffffffff700120 000120 000040 04 A 2 0 8 + [ 2] .dynsym DYNSYM ffffffffff700160 000160 000108 18 A 3 1 8 + ... + [13] .strtab STRTAB 0000000000000000 001448 000123 00 0 0 1 + [14] .shstrtab STRTAB 0000000000000000 00156b 000083 00 0 0 1 + Key to Flags: + W (write), A (alloc), X (execute), M (merge), S (strings), I (info), + L (link order), O (extra OS processing required), G (group), T (TLS), + C (compressed), x (unknown), o (OS specific), E (exclude), + l (large), p (processor specific) + + Args: + vdso_path: Path to VDSO binary. + """ + output = subprocess.check_output(["readelf", "-SW", vdso_path]) + lines = output.split("\n") + + found_text = False + for line in lines: + m = re.search(_SECTION_NAME_RE, line) + if not m: + continue + + if not line.startswith(" ["): + continue + + name = m.group("name") + size = int(m.group("size"), 16) + + if name == ".text" and size != 0: + found_text = True + + # Clang will typically omit these sections entirely; gcc will include them + # but with size 0. + if name.startswith(".data") and size != 0: + Fatal("VDSO contains non-empty .data section:\n%s" % output) + + if name.startswith(".bss") and size != 0: + Fatal("VDSO contains non-empty .bss section:\n%s" % output) + + if not found_text: + Fatal("VDSO contains no/empty .text section? Bad parsing?:\n%s" % output) + + +def CheckRelocs(vdso_path): + """Verifies that the VDSO includes no relocations. + + Args: + vdso_path: Path to VDSO binary. + """ + output = subprocess.check_output(["readelf", "-r", vdso_path]) + if output.strip() != "There are no relocations in this file.": + Fatal("VDSO contains relocations: %s", output) + + +def main(): + parser = argparse.ArgumentParser(description="Verify VDSO ELF.") + parser.add_argument("--vdso", required=True, help="Path to VDSO ELF") + parser.add_argument( + "--check-data", + action="store_true", + help="Check that the ELF contains no .data or .bss sections") + args = parser.parse_args() + + CheckSegments(args.vdso) + CheckRelocs(args.vdso) + + if args.check_data: + CheckData(args.vdso) + + +if __name__ == "__main__": + main() diff --git a/vdso/compiler.h b/vdso/compiler.h new file mode 100644 index 000000000..a661516c3 --- /dev/null +++ b/vdso/compiler.h @@ -0,0 +1,29 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VDSO_COMPILER_H_ +#define VDSO_COMPILER_H_ + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#ifndef __section +#define __section(S) __attribute__((__section__(#S))) +#endif + +#ifndef __aligned +#define __aligned(N) __attribute__((__aligned__(N))) +#endif + +#endif // VDSO_COMPILER_H_ diff --git a/vdso/cycle_clock.h b/vdso/cycle_clock.h new file mode 100644 index 000000000..93c5f2c0d --- /dev/null +++ b/vdso/cycle_clock.h @@ -0,0 +1,42 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VDSO_CYCLE_CLOCK_H_ +#define VDSO_CYCLE_CLOCK_H_ + +#include <stdint.h> + +#include "vdso/barrier.h" + +namespace vdso { + +#if __x86_64__ + +// TODO: The appropriate barrier instruction to use with rdtsc on +// x86_64 depends on the vendor. Intel processors can use lfence but AMD may +// need mfence, depending on MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT. + +static inline uint64_t cycle_clock(void) { + uint32_t lo, hi; + asm volatile("lfence" : : : "memory"); + asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); + return ((uint64_t)hi << 32) | lo; +} +#else +#error "unsupported architecture" +#endif + +} // namespace vdso + +#endif // VDSO_CYCLE_CLOCK_H_ diff --git a/vdso/seqlock.h b/vdso/seqlock.h new file mode 100644 index 000000000..b527bdbca --- /dev/null +++ b/vdso/seqlock.h @@ -0,0 +1,39 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Low level raw interfaces to the sequence counter used by the VDSO. +#ifndef VDSO_SEQLOCK_H_ +#define VDSO_SEQLOCK_H_ + +#include <stdint.h> + +#include "vdso/barrier.h" +#include "vdso/compiler.h" + +namespace vdso { + +inline int32_t read_seqcount_begin(const uint64_t* s) { + uint64_t seq = *s; + read_barrier(); + return seq & ~1; +} + +inline int read_seqcount_retry(const uint64_t* s, uint64_t seq) { + read_barrier(); + return unlikely(*s != seq); +} + +} // namespace vdso + +#endif // VDSO_SEQLOCK_H_ diff --git a/vdso/syscalls.h b/vdso/syscalls.h new file mode 100644 index 000000000..fd79c4642 --- /dev/null +++ b/vdso/syscalls.h @@ -0,0 +1,54 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// System call support for the VDSO. +// +// Provides fallback system call interfaces for getcpu() +// and clock_gettime(). + +#ifndef VDSO_SYSCALLS_H_ +#define VDSO_SYSCALLS_H_ + +#include <asm/unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <sys/types.h> + +struct getcpu_cache; + +namespace vdso { + +static inline int sys_clock_gettime(clockid_t clock, struct timespec* ts) { + int num = __NR_clock_gettime; + asm volatile("syscall\n" + : "+a"(num) + : "D"(clock), "S"(ts) + : "rcx", "r11", "memory"); + return num; +} + +static inline int sys_getcpu(unsigned* cpu, unsigned* node, + struct getcpu_cache* cache) { + int num = __NR_getcpu; + asm volatile("syscall\n" + : "+a"(num) + : "D"(cpu), "S"(node), "d"(cache) + : "rcx", "r11", "memory"); + return num; +} + +} // namespace vdso + +#endif // VDSO_SYSCALLS_H_ diff --git a/vdso/vdso.cc b/vdso/vdso.cc new file mode 100644 index 000000000..db3bdef01 --- /dev/null +++ b/vdso/vdso.cc @@ -0,0 +1,95 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is the VDSO for sandboxed binaries. This file just contains the entry +// points to the VDSO. All of the real work is done in vdso_time.cc + +#define _DEFAULT_SOURCE // ensure glibc provides struct timezone. +#include <sys/time.h> +#include <time.h> + +#include "vdso/syscalls.h" +#include "vdso/vdso_time.h" + +namespace vdso { + +// __vdso_clock_gettime() implements clock_gettime() +extern "C" int __vdso_clock_gettime(clockid_t clock, struct timespec* ts) { + int ret; + + switch (clock) { + case CLOCK_REALTIME: + ret = ClockRealtime(ts); + break; + + case CLOCK_MONOTONIC: + ret = ClockMonotonic(ts); + break; + + default: + ret = sys_clock_gettime(clock, ts); + break; + } + + return ret; +} +extern "C" int clock_gettime(clockid_t clock, struct timespec* ts) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +// __vdso_gettimeofday() implements gettimeofday() +extern "C" int __vdso_gettimeofday(struct timeval* tv, struct timezone* tz) { + if (tv) { + struct timespec ts; + int ret = ClockRealtime(&ts); + if (ret) { + return ret; + } + tv->tv_sec = ts.tv_sec; + tv->tv_usec = ts.tv_nsec / 1000; + } + + // Nobody should be calling gettimeofday() with a non-NULL + // timezone pointer. If they do then they will get zeros. + if (tz) { + tz->tz_minuteswest = 0; + tz->tz_dsttime = 0; + } + + return 0; +} +extern "C" int gettimeofday(struct timeval* tv, struct timezone* tz) + __attribute__((weak, alias("__vdso_gettimeofday"))); + +// __vdso_time() implements time() +extern "C" time_t __vdso_time(time_t* t) { + struct timespec ts; + ClockRealtime(&ts); + if (t) { + *t = ts.tv_sec; + } + return ts.tv_sec; +} +extern "C" time_t time(time_t* t) __attribute__((weak, alias("__vdso_time"))); + +// __vdso_getcpu() implements getcpu() +extern "C" long __vdso_getcpu(unsigned* cpu, unsigned* node, + struct getcpu_cache* cache) { + // No optimizations yet, just make the real system call. + return sys_getcpu(cpu, node, cache); +} +extern "C" long getcpu(unsigned* cpu, unsigned* node, + struct getcpu_cache* cache) + __attribute__((weak, alias("__vdso_getcpu"))); + +} // namespace vdso diff --git a/vdso/vdso.lds b/vdso/vdso.lds new file mode 100644 index 000000000..97bb6d0c1 --- /dev/null +++ b/vdso/vdso.lds @@ -0,0 +1,101 @@ +/* + * Linker script for the VDSO. + * + * The VDSO is essentially a normal ELF shared library that is mapped into the + * address space of the process that is going to use it. The address of the + * VDSO is passed to the runtime linker in the AT_SYSINFO_EHDR entry of the aux + * vector. + * + * There are, however, three ways in which the VDSO differs from a normal + * shared library: + * + * - The runtime linker does not attempt to process any relocations for the + * VDSO so it is the responsibility of whoever loads the VDSO into the + * address space to do this if necessary. Because of this restriction we are + * careful to ensure that the VDSO does not need to have any relocations + * applied to it. + * + * - Although the VDSO is position independent and would normally be linked at + * virtual address 0, the Linux kernel VDSO is actually linked at a non zero + * virtual address and the code in the system runtime linker that handles the + * VDSO expects this to be the case so we have to explicitly link this VDSO + * at a non zero address. The actual address is arbitrary, but we use the + * same one as the Linux kernel VDSO. + * + * - The VDSO will be directly mmapped by the sentry, rather than going through + * a normal ELF loading process. The VDSO must be carefully constructed such + * that the layout in the ELF file is identical to the layout in memory. + */ + +VDSO_PRELINK = 0xffffffffff700000; + +SECTIONS { + /* The parameter page is mapped just before the VDSO. */ + _params = VDSO_PRELINK - 0x1000; + + . = VDSO_PRELINK + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + + /* + * TODO: Remove this alignment? Then the VDSO would fit in a + * single page. + */ + . = ALIGN(0x1000); + .text : { *(.text*) } :text =0x90909090 + + /* + * N.B. There is no data/bss section. This VDSO neither needs nor uses a data + * section. We omit it entirely because some gcc/clang and gold/bfd version + * combinations struggle to handle an empty data PHDR segment (internal + * linker assertion failures result). + * + * If the VDSO does incorrectly include a data section, the linker will + * include it in the text segment. check_vdso.py looks for this degenerate + * case. + */ +} + +PHDRS { + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R | PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * Define the symbols that are to be exported. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + time; + __vdso_time; + + local: *; + }; +} diff --git a/vdso/vdso_time.cc b/vdso/vdso_time.cc new file mode 100644 index 000000000..5d5c8de65 --- /dev/null +++ b/vdso/vdso_time.cc @@ -0,0 +1,145 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vdso/vdso_time.h" + +#include <stdint.h> +#include <sys/time.h> +#include <time.h> + +#include "vdso/cycle_clock.h" +#include "vdso/seqlock.h" +#include "vdso/syscalls.h" + +// struct params defines the layout of the parameter page maintained by the +// kernel (i.e., sentry). +// +// This is similar to the VVAR page maintained by the normal Linux kernel for +// its VDSO, but it has a different layout. +// +// It must be kept in sync with VDSOParamPage in pkg/sentry/kernel/vdso.go. +struct params { + uint64_t seq_count; + + uint64_t monotonic_ready; + int64_t monotonic_base_cycles; + int64_t monotonic_base_ref; + uint64_t monotonic_frequency; + + uint64_t realtime_ready; + int64_t realtime_base_cycles; + int64_t realtime_base_ref; + uint64_t realtime_frequency; +}; + +// Returns a pointer to the global parameter page. +// +// This page lives in the page just before the VDSO binary itself. The linker +// defines _params as the page before the VDSO. +// +// Ideally, we'd simply declare _params as an extern struct params. +// Unfortunately various combinations of old/new versions of gcc/clang and +// gold/bfd struggle to generate references to such a global without generating +// relocations. +// +// So instead, we use inline assembly with a construct that seems to have wide +// compatibility across many toolchains. +inline struct params* get_params() { + struct params* p = nullptr; + asm volatile("leaq _params(%%rip), %0" : "=r"(p) : :); + return p; +} + +namespace vdso { + +const uint64_t kNsecsPerSec = 1000000000UL; + +inline struct timespec ns_to_timespec(uint64_t ns) { + struct timespec ts; + ts.tv_sec = ns / kNsecsPerSec; + ts.tv_nsec = ns % kNsecsPerSec; + return ts; +} + +inline uint64_t cycles_to_ns(uint64_t frequency, uint64_t cycles) { + uint64_t mult = (kNsecsPerSec << 32) / frequency; + return ((unsigned __int128)cycles * mult) >> 32; +} + +// ClockRealtime() is the VDSO implementation of clock_gettime(CLOCK_REALTIME). +int ClockRealtime(struct timespec* ts) { + struct params* params = get_params(); + uint64_t seq; + uint64_t ready; + int64_t base_ref; + int64_t base_cycles; + uint64_t frequency; + int64_t now_cycles; + + do { + seq = read_seqcount_begin(¶ms->seq_count); + ready = params->realtime_ready; + base_ref = params->realtime_base_ref; + base_cycles = params->realtime_base_cycles; + frequency = params->realtime_frequency; + now_cycles = cycle_clock(); + } while (read_seqcount_retry(¶ms->seq_count, seq)); + + if (!ready) { + // The sandbox kernel ensures that we won't compute a time later than this + // once the params are ready. + return sys_clock_gettime(CLOCK_REALTIME, ts); + } + + int64_t delta_cycles = + (now_cycles < base_cycles) ? 0 : now_cycles - base_cycles; + int64_t now_ns = base_ref + cycles_to_ns(frequency, delta_cycles); + *ts = ns_to_timespec(now_ns); + return 0; +} + +// ClockMonotonic() is the VDSO implementation of +// clock_gettime(CLOCK_MONOTONIC). +int ClockMonotonic(struct timespec* ts) { + struct params* params = get_params(); + uint64_t seq; + uint64_t ready; + int64_t base_ref; + int64_t base_cycles; + uint64_t frequency; + int64_t now_cycles; + + do { + seq = read_seqcount_begin(¶ms->seq_count); + ready = params->monotonic_ready; + base_ref = params->monotonic_base_ref; + base_cycles = params->monotonic_base_cycles; + frequency = params->monotonic_frequency; + now_cycles = cycle_clock(); + } while (read_seqcount_retry(¶ms->seq_count, seq)); + + if (!ready) { + // The sandbox kernel ensures that we won't compute a time later than this + // once the params are ready. + return sys_clock_gettime(CLOCK_MONOTONIC, ts); + } + + int64_t delta_cycles = + (now_cycles < base_cycles) ? 0 : now_cycles - base_cycles; + int64_t now_ns = base_ref + cycles_to_ns(frequency, delta_cycles); + *ts = ns_to_timespec(now_ns); + return 0; +} + +} // namespace vdso diff --git a/vdso/vdso_time.h b/vdso/vdso_time.h new file mode 100644 index 000000000..71d6e2f64 --- /dev/null +++ b/vdso/vdso_time.h @@ -0,0 +1,27 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VDSO_VDSO_TIME_H_ +#define VDSO_VDSO_TIME_H_ + +#include <time.h> + +namespace vdso { + +int ClockRealtime(struct timespec* ts); +int ClockMonotonic(struct timespec* ts); + +} // namespace vdso + +#endif // VDSO_VDSO_TIME_H_ |