summaryrefslogtreecommitdiffhomepage
path: root/vdso
diff options
context:
space:
mode:
authorGoogler <noreply@google.com>2018-04-27 10:37:02 -0700
committerAdin Scannell <ascannell@google.com>2018-04-28 01:44:26 -0400
commitd02b74a5dcfed4bfc8f2f8e545bca4d2afabb296 (patch)
tree54f95eef73aee6bacbfc736fffc631be2605ed53 /vdso
parentf70210e742919f40aa2f0934a22f1c9ba6dada62 (diff)
Check in gVisor.
PiperOrigin-RevId: 194583126 Change-Id: Ica1d8821a90f74e7e745962d71801c598c652463
Diffstat (limited to 'vdso')
-rw-r--r--vdso/BUILD57
-rw-r--r--vdso/barrier.h35
-rw-r--r--vdso/check_vdso.py204
-rw-r--r--vdso/compiler.h29
-rw-r--r--vdso/cycle_clock.h42
-rw-r--r--vdso/seqlock.h39
-rw-r--r--vdso/syscalls.h54
-rw-r--r--vdso/vdso.cc95
-rw-r--r--vdso/vdso.lds101
-rw-r--r--vdso/vdso_time.cc145
-rw-r--r--vdso/vdso_time.h27
11 files changed, 828 insertions, 0 deletions
diff --git a/vdso/BUILD b/vdso/BUILD
new file mode 100644
index 000000000..9c4bc167e
--- /dev/null
+++ b/vdso/BUILD
@@ -0,0 +1,57 @@
+# Description:
+# This VDSO is a shared library that provides the same interfaces as the
+# normal system VDSO (time, gettimeofday, clock_gettimeofday) but which uses
+# timekeeping parameters managed by the sandbox kernel.
+
+package(licenses = ["notice"]) # Apache 2.0
+
+genrule(
+ name = "vdso",
+ srcs = [
+ "barrier.h",
+ "compiler.h",
+ "cycle_clock.h",
+ "seqlock.h",
+ "syscalls.h",
+ "vdso.cc",
+ "vdso.lds",
+ "vdso_time.h",
+ "vdso_time.cc",
+ ],
+ outs = [
+ "vdso.so",
+ ],
+ cmd = "$(CC) $(CC_FLAGS) " +
+ "-I. " +
+ "-O2 " +
+ "-std=c++11 " +
+ "-fPIC " +
+ "-fuse-ld=gold " +
+ "-m64 " +
+ "-shared " +
+ "-nostdlib " +
+ "-Wl,-soname=linux-vdso.so.1 " +
+ "-Wl,--hash-style=sysv " +
+ "-Wl,--no-undefined " +
+ "-Wl,-Bsymbolic " +
+ "-Wl,-z,max-page-size=4096 " +
+ "-Wl,-z,common-page-size=4096 " +
+ "-Wl,-T$(location vdso.lds) " +
+ "-o $(location vdso.so) " +
+ "$(location vdso.cc) " +
+ "$(location vdso_time.cc) " +
+ "&& $(location :check_vdso) " +
+ "--check-data " +
+ "--vdso $(location vdso.so) ",
+ features = ["-pie"],
+ tools = [
+ ":check_vdso",
+ ],
+ visibility = ["//:sandbox"],
+)
+
+py_binary(
+ name = "check_vdso",
+ srcs = ["check_vdso.py"],
+ visibility = ["//:sandbox"],
+)
diff --git a/vdso/barrier.h b/vdso/barrier.h
new file mode 100644
index 000000000..db8185b2e
--- /dev/null
+++ b/vdso/barrier.h
@@ -0,0 +1,35 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef VDSO_BARRIER_H_
+#define VDSO_BARRIER_H_
+
+namespace vdso {
+
+// Compiler Optimization barrier.
+inline void barrier(void) { __asm__ __volatile__("" ::: "memory"); }
+
+#if __x86_64__
+inline void memory_barrier(void) {
+ __asm__ __volatile__("mfence" ::: "memory");
+}
+inline void read_barrier(void) { barrier(); }
+inline void write_barrier(void) { barrier(); }
+#else
+#error "unsupported architecture"
+#endif
+
+} // namespace vdso
+
+#endif // VDSO_BARRIER_H_
diff --git a/vdso/check_vdso.py b/vdso/check_vdso.py
new file mode 100644
index 000000000..f1288e0c2
--- /dev/null
+++ b/vdso/check_vdso.py
@@ -0,0 +1,204 @@
+# Copyright 2018 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Verify VDSO ELF does not contain any relocations and is directly mmappable.
+"""
+
+import argparse
+import logging
+import re
+import subprocess
+
+PAGE_SIZE = 4096
+
+
+def PageRoundDown(addr):
+ """Rounds down to the nearest page.
+
+ Args:
+ addr: An address.
+
+ Returns:
+ The address rounded down to thie nearest page.
+ """
+ return addr & ~(PAGE_SIZE - 1)
+
+
+def Fatal(*args, **kwargs):
+ """Logs a critical message and exits with code 1.
+
+ Args:
+ *args: Args to pass to logging.critical.
+ **kwargs: Keyword args to pass to logging.critical.
+ """
+ logging.critical(*args, **kwargs)
+ exit(1)
+
+
+def CheckSegments(vdso_path):
+ """Verifies layout of PT_LOAD segments.
+
+ PT_LOAD segments must be laid out such that the ELF is directly mmappable.
+
+ Specifically, check that:
+ * PT_LOAD file offsets are equivalent to the memory offset from the first
+ segment.
+ * No extra zeroed space (memsz) is required.
+ * PT_LOAD segments are in order (required for any ELF).
+ * No two PT_LOAD segments share part of the same page.
+
+ The readelf line format looks like:
+ Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
+ LOAD 0x000000 0xffffffffff700000 0xffffffffff700000 0x000e68 0x000e68 R E 0x1000
+
+ Args:
+ vdso_path: Path to VDSO binary.
+ """
+ output = subprocess.check_output(["readelf", "-lW", vdso_path])
+ lines = output.split("\n")
+
+ segments = []
+ for line in lines:
+ if not line.startswith(" LOAD"):
+ continue
+
+ components = line.split()
+
+ segments.append({
+ "offset": int(components[1], 16),
+ "addr": int(components[2], 16),
+ "filesz": int(components[4], 16),
+ "memsz": int(components[5], 16),
+ })
+
+ if not segments:
+ Fatal("No PT_LOAD segments in VDSO")
+
+ first = segments[0]
+ if first["offset"] != 0:
+ Fatal("First PT_LOAD segment has non-zero file offset: %s", first)
+
+ for i, segment in enumerate(segments):
+ memoff = segment["addr"] - first["addr"]
+ if memoff != segment["offset"]:
+ Fatal("PT_LOAD segment has different memory and file offsets: %s",
+ segments)
+
+ if segment["memsz"] != segment["filesz"]:
+ Fatal("PT_LOAD segment memsz != filesz: %s", segment)
+
+ if i > 0:
+ last_end = segments[i-1]["addr"] + segments[i-1]["memsz"]
+ if segment["addr"] < last_end:
+ Fatal("PT_LOAD segments out of order")
+
+ last_page = PageRoundDown(last_end)
+ start_page = PageRoundDown(segment["addr"])
+ if last_page >= start_page:
+ Fatal("PT_LOAD segments share a page: %s and %s", segment,
+ segments[i - 1])
+
+
+# Matches the section name in readelf -SW output.
+_SECTION_NAME_RE = re.compile(r"""^\s+\[\ ?\d+\]\s+
+ (?P<name>\.\S+)\s+
+ (?P<type>\S+)\s+
+ (?P<addr>[0-9a-f]+)\s+
+ (?P<off>[0-9a-f]+)\s+
+ (?P<size>[0-9a-f]+)""", re.VERBOSE)
+
+
+def CheckData(vdso_path):
+ """Verifies the VDSO contains no .data or .bss sections.
+
+ The readelf line format looks like:
+
+ There are 15 section headers, starting at offset 0x15f0:
+
+ Section Headers:
+ [Nr] Name Type Address Off Size ES Flg Lk Inf Al
+ [ 0] NULL 0000000000000000 000000 000000 00 0 0 0
+ [ 1] .hash HASH ffffffffff700120 000120 000040 04 A 2 0 8
+ [ 2] .dynsym DYNSYM ffffffffff700160 000160 000108 18 A 3 1 8
+ ...
+ [13] .strtab STRTAB 0000000000000000 001448 000123 00 0 0 1
+ [14] .shstrtab STRTAB 0000000000000000 00156b 000083 00 0 0 1
+ Key to Flags:
+ W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
+ L (link order), O (extra OS processing required), G (group), T (TLS),
+ C (compressed), x (unknown), o (OS specific), E (exclude),
+ l (large), p (processor specific)
+
+ Args:
+ vdso_path: Path to VDSO binary.
+ """
+ output = subprocess.check_output(["readelf", "-SW", vdso_path])
+ lines = output.split("\n")
+
+ found_text = False
+ for line in lines:
+ m = re.search(_SECTION_NAME_RE, line)
+ if not m:
+ continue
+
+ if not line.startswith(" ["):
+ continue
+
+ name = m.group("name")
+ size = int(m.group("size"), 16)
+
+ if name == ".text" and size != 0:
+ found_text = True
+
+ # Clang will typically omit these sections entirely; gcc will include them
+ # but with size 0.
+ if name.startswith(".data") and size != 0:
+ Fatal("VDSO contains non-empty .data section:\n%s" % output)
+
+ if name.startswith(".bss") and size != 0:
+ Fatal("VDSO contains non-empty .bss section:\n%s" % output)
+
+ if not found_text:
+ Fatal("VDSO contains no/empty .text section? Bad parsing?:\n%s" % output)
+
+
+def CheckRelocs(vdso_path):
+ """Verifies that the VDSO includes no relocations.
+
+ Args:
+ vdso_path: Path to VDSO binary.
+ """
+ output = subprocess.check_output(["readelf", "-r", vdso_path])
+ if output.strip() != "There are no relocations in this file.":
+ Fatal("VDSO contains relocations: %s", output)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Verify VDSO ELF.")
+ parser.add_argument("--vdso", required=True, help="Path to VDSO ELF")
+ parser.add_argument(
+ "--check-data",
+ action="store_true",
+ help="Check that the ELF contains no .data or .bss sections")
+ args = parser.parse_args()
+
+ CheckSegments(args.vdso)
+ CheckRelocs(args.vdso)
+
+ if args.check_data:
+ CheckData(args.vdso)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/vdso/compiler.h b/vdso/compiler.h
new file mode 100644
index 000000000..a661516c3
--- /dev/null
+++ b/vdso/compiler.h
@@ -0,0 +1,29 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef VDSO_COMPILER_H_
+#define VDSO_COMPILER_H_
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+#ifndef __section
+#define __section(S) __attribute__((__section__(#S)))
+#endif
+
+#ifndef __aligned
+#define __aligned(N) __attribute__((__aligned__(N)))
+#endif
+
+#endif // VDSO_COMPILER_H_
diff --git a/vdso/cycle_clock.h b/vdso/cycle_clock.h
new file mode 100644
index 000000000..93c5f2c0d
--- /dev/null
+++ b/vdso/cycle_clock.h
@@ -0,0 +1,42 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef VDSO_CYCLE_CLOCK_H_
+#define VDSO_CYCLE_CLOCK_H_
+
+#include <stdint.h>
+
+#include "vdso/barrier.h"
+
+namespace vdso {
+
+#if __x86_64__
+
+// TODO: The appropriate barrier instruction to use with rdtsc on
+// x86_64 depends on the vendor. Intel processors can use lfence but AMD may
+// need mfence, depending on MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT.
+
+static inline uint64_t cycle_clock(void) {
+ uint32_t lo, hi;
+ asm volatile("lfence" : : : "memory");
+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)hi << 32) | lo;
+}
+#else
+#error "unsupported architecture"
+#endif
+
+} // namespace vdso
+
+#endif // VDSO_CYCLE_CLOCK_H_
diff --git a/vdso/seqlock.h b/vdso/seqlock.h
new file mode 100644
index 000000000..b527bdbca
--- /dev/null
+++ b/vdso/seqlock.h
@@ -0,0 +1,39 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Low level raw interfaces to the sequence counter used by the VDSO.
+#ifndef VDSO_SEQLOCK_H_
+#define VDSO_SEQLOCK_H_
+
+#include <stdint.h>
+
+#include "vdso/barrier.h"
+#include "vdso/compiler.h"
+
+namespace vdso {
+
+inline int32_t read_seqcount_begin(const uint64_t* s) {
+ uint64_t seq = *s;
+ read_barrier();
+ return seq & ~1;
+}
+
+inline int read_seqcount_retry(const uint64_t* s, uint64_t seq) {
+ read_barrier();
+ return unlikely(*s != seq);
+}
+
+} // namespace vdso
+
+#endif // VDSO_SEQLOCK_H_
diff --git a/vdso/syscalls.h b/vdso/syscalls.h
new file mode 100644
index 000000000..fd79c4642
--- /dev/null
+++ b/vdso/syscalls.h
@@ -0,0 +1,54 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// System call support for the VDSO.
+//
+// Provides fallback system call interfaces for getcpu()
+// and clock_gettime().
+
+#ifndef VDSO_SYSCALLS_H_
+#define VDSO_SYSCALLS_H_
+
+#include <asm/unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+struct getcpu_cache;
+
+namespace vdso {
+
+static inline int sys_clock_gettime(clockid_t clock, struct timespec* ts) {
+ int num = __NR_clock_gettime;
+ asm volatile("syscall\n"
+ : "+a"(num)
+ : "D"(clock), "S"(ts)
+ : "rcx", "r11", "memory");
+ return num;
+}
+
+static inline int sys_getcpu(unsigned* cpu, unsigned* node,
+ struct getcpu_cache* cache) {
+ int num = __NR_getcpu;
+ asm volatile("syscall\n"
+ : "+a"(num)
+ : "D"(cpu), "S"(node), "d"(cache)
+ : "rcx", "r11", "memory");
+ return num;
+}
+
+} // namespace vdso
+
+#endif // VDSO_SYSCALLS_H_
diff --git a/vdso/vdso.cc b/vdso/vdso.cc
new file mode 100644
index 000000000..db3bdef01
--- /dev/null
+++ b/vdso/vdso.cc
@@ -0,0 +1,95 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is the VDSO for sandboxed binaries. This file just contains the entry
+// points to the VDSO. All of the real work is done in vdso_time.cc
+
+#define _DEFAULT_SOURCE // ensure glibc provides struct timezone.
+#include <sys/time.h>
+#include <time.h>
+
+#include "vdso/syscalls.h"
+#include "vdso/vdso_time.h"
+
+namespace vdso {
+
+// __vdso_clock_gettime() implements clock_gettime()
+extern "C" int __vdso_clock_gettime(clockid_t clock, struct timespec* ts) {
+ int ret;
+
+ switch (clock) {
+ case CLOCK_REALTIME:
+ ret = ClockRealtime(ts);
+ break;
+
+ case CLOCK_MONOTONIC:
+ ret = ClockMonotonic(ts);
+ break;
+
+ default:
+ ret = sys_clock_gettime(clock, ts);
+ break;
+ }
+
+ return ret;
+}
+extern "C" int clock_gettime(clockid_t clock, struct timespec* ts)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
+
+// __vdso_gettimeofday() implements gettimeofday()
+extern "C" int __vdso_gettimeofday(struct timeval* tv, struct timezone* tz) {
+ if (tv) {
+ struct timespec ts;
+ int ret = ClockRealtime(&ts);
+ if (ret) {
+ return ret;
+ }
+ tv->tv_sec = ts.tv_sec;
+ tv->tv_usec = ts.tv_nsec / 1000;
+ }
+
+ // Nobody should be calling gettimeofday() with a non-NULL
+ // timezone pointer. If they do then they will get zeros.
+ if (tz) {
+ tz->tz_minuteswest = 0;
+ tz->tz_dsttime = 0;
+ }
+
+ return 0;
+}
+extern "C" int gettimeofday(struct timeval* tv, struct timezone* tz)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
+
+// __vdso_time() implements time()
+extern "C" time_t __vdso_time(time_t* t) {
+ struct timespec ts;
+ ClockRealtime(&ts);
+ if (t) {
+ *t = ts.tv_sec;
+ }
+ return ts.tv_sec;
+}
+extern "C" time_t time(time_t* t) __attribute__((weak, alias("__vdso_time")));
+
+// __vdso_getcpu() implements getcpu()
+extern "C" long __vdso_getcpu(unsigned* cpu, unsigned* node,
+ struct getcpu_cache* cache) {
+ // No optimizations yet, just make the real system call.
+ return sys_getcpu(cpu, node, cache);
+}
+extern "C" long getcpu(unsigned* cpu, unsigned* node,
+ struct getcpu_cache* cache)
+ __attribute__((weak, alias("__vdso_getcpu")));
+
+} // namespace vdso
diff --git a/vdso/vdso.lds b/vdso/vdso.lds
new file mode 100644
index 000000000..97bb6d0c1
--- /dev/null
+++ b/vdso/vdso.lds
@@ -0,0 +1,101 @@
+/*
+ * Linker script for the VDSO.
+ *
+ * The VDSO is essentially a normal ELF shared library that is mapped into the
+ * address space of the process that is going to use it. The address of the
+ * VDSO is passed to the runtime linker in the AT_SYSINFO_EHDR entry of the aux
+ * vector.
+ *
+ * There are, however, three ways in which the VDSO differs from a normal
+ * shared library:
+ *
+ * - The runtime linker does not attempt to process any relocations for the
+ * VDSO so it is the responsibility of whoever loads the VDSO into the
+ * address space to do this if necessary. Because of this restriction we are
+ * careful to ensure that the VDSO does not need to have any relocations
+ * applied to it.
+ *
+ * - Although the VDSO is position independent and would normally be linked at
+ * virtual address 0, the Linux kernel VDSO is actually linked at a non zero
+ * virtual address and the code in the system runtime linker that handles the
+ * VDSO expects this to be the case so we have to explicitly link this VDSO
+ * at a non zero address. The actual address is arbitrary, but we use the
+ * same one as the Linux kernel VDSO.
+ *
+ * - The VDSO will be directly mmapped by the sentry, rather than going through
+ * a normal ELF loading process. The VDSO must be carefully constructed such
+ * that the layout in the ELF file is identical to the layout in memory.
+ */
+
+VDSO_PRELINK = 0xffffffffff700000;
+
+SECTIONS {
+ /* The parameter page is mapped just before the VDSO. */
+ _params = VDSO_PRELINK - 0x1000;
+
+ . = VDSO_PRELINK + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : { *(.rodata*) } :text
+
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
+ /*
+ * TODO: Remove this alignment? Then the VDSO would fit in a
+ * single page.
+ */
+ . = ALIGN(0x1000);
+ .text : { *(.text*) } :text =0x90909090
+
+ /*
+ * N.B. There is no data/bss section. This VDSO neither needs nor uses a data
+ * section. We omit it entirely because some gcc/clang and gold/bfd version
+ * combinations struggle to handle an empty data PHDR segment (internal
+ * linker assertion failures result).
+ *
+ * If the VDSO does incorrectly include a data section, the linker will
+ * include it in the text segment. check_vdso.py looks for this degenerate
+ * case.
+ */
+}
+
+PHDRS {
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R | PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * Define the symbols that are to be exported.
+ */
+VERSION {
+ LINUX_2.6 {
+ global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ getcpu;
+ __vdso_getcpu;
+ time;
+ __vdso_time;
+
+ local: *;
+ };
+}
diff --git a/vdso/vdso_time.cc b/vdso/vdso_time.cc
new file mode 100644
index 000000000..5d5c8de65
--- /dev/null
+++ b/vdso/vdso_time.cc
@@ -0,0 +1,145 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "vdso/vdso_time.h"
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "vdso/cycle_clock.h"
+#include "vdso/seqlock.h"
+#include "vdso/syscalls.h"
+
+// struct params defines the layout of the parameter page maintained by the
+// kernel (i.e., sentry).
+//
+// This is similar to the VVAR page maintained by the normal Linux kernel for
+// its VDSO, but it has a different layout.
+//
+// It must be kept in sync with VDSOParamPage in pkg/sentry/kernel/vdso.go.
+struct params {
+ uint64_t seq_count;
+
+ uint64_t monotonic_ready;
+ int64_t monotonic_base_cycles;
+ int64_t monotonic_base_ref;
+ uint64_t monotonic_frequency;
+
+ uint64_t realtime_ready;
+ int64_t realtime_base_cycles;
+ int64_t realtime_base_ref;
+ uint64_t realtime_frequency;
+};
+
+// Returns a pointer to the global parameter page.
+//
+// This page lives in the page just before the VDSO binary itself. The linker
+// defines _params as the page before the VDSO.
+//
+// Ideally, we'd simply declare _params as an extern struct params.
+// Unfortunately various combinations of old/new versions of gcc/clang and
+// gold/bfd struggle to generate references to such a global without generating
+// relocations.
+//
+// So instead, we use inline assembly with a construct that seems to have wide
+// compatibility across many toolchains.
+inline struct params* get_params() {
+ struct params* p = nullptr;
+ asm volatile("leaq _params(%%rip), %0" : "=r"(p) : :);
+ return p;
+}
+
+namespace vdso {
+
+const uint64_t kNsecsPerSec = 1000000000UL;
+
+inline struct timespec ns_to_timespec(uint64_t ns) {
+ struct timespec ts;
+ ts.tv_sec = ns / kNsecsPerSec;
+ ts.tv_nsec = ns % kNsecsPerSec;
+ return ts;
+}
+
+inline uint64_t cycles_to_ns(uint64_t frequency, uint64_t cycles) {
+ uint64_t mult = (kNsecsPerSec << 32) / frequency;
+ return ((unsigned __int128)cycles * mult) >> 32;
+}
+
+// ClockRealtime() is the VDSO implementation of clock_gettime(CLOCK_REALTIME).
+int ClockRealtime(struct timespec* ts) {
+ struct params* params = get_params();
+ uint64_t seq;
+ uint64_t ready;
+ int64_t base_ref;
+ int64_t base_cycles;
+ uint64_t frequency;
+ int64_t now_cycles;
+
+ do {
+ seq = read_seqcount_begin(&params->seq_count);
+ ready = params->realtime_ready;
+ base_ref = params->realtime_base_ref;
+ base_cycles = params->realtime_base_cycles;
+ frequency = params->realtime_frequency;
+ now_cycles = cycle_clock();
+ } while (read_seqcount_retry(&params->seq_count, seq));
+
+ if (!ready) {
+ // The sandbox kernel ensures that we won't compute a time later than this
+ // once the params are ready.
+ return sys_clock_gettime(CLOCK_REALTIME, ts);
+ }
+
+ int64_t delta_cycles =
+ (now_cycles < base_cycles) ? 0 : now_cycles - base_cycles;
+ int64_t now_ns = base_ref + cycles_to_ns(frequency, delta_cycles);
+ *ts = ns_to_timespec(now_ns);
+ return 0;
+}
+
+// ClockMonotonic() is the VDSO implementation of
+// clock_gettime(CLOCK_MONOTONIC).
+int ClockMonotonic(struct timespec* ts) {
+ struct params* params = get_params();
+ uint64_t seq;
+ uint64_t ready;
+ int64_t base_ref;
+ int64_t base_cycles;
+ uint64_t frequency;
+ int64_t now_cycles;
+
+ do {
+ seq = read_seqcount_begin(&params->seq_count);
+ ready = params->monotonic_ready;
+ base_ref = params->monotonic_base_ref;
+ base_cycles = params->monotonic_base_cycles;
+ frequency = params->monotonic_frequency;
+ now_cycles = cycle_clock();
+ } while (read_seqcount_retry(&params->seq_count, seq));
+
+ if (!ready) {
+ // The sandbox kernel ensures that we won't compute a time later than this
+ // once the params are ready.
+ return sys_clock_gettime(CLOCK_MONOTONIC, ts);
+ }
+
+ int64_t delta_cycles =
+ (now_cycles < base_cycles) ? 0 : now_cycles - base_cycles;
+ int64_t now_ns = base_ref + cycles_to_ns(frequency, delta_cycles);
+ *ts = ns_to_timespec(now_ns);
+ return 0;
+}
+
+} // namespace vdso
diff --git a/vdso/vdso_time.h b/vdso/vdso_time.h
new file mode 100644
index 000000000..71d6e2f64
--- /dev/null
+++ b/vdso/vdso_time.h
@@ -0,0 +1,27 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef VDSO_VDSO_TIME_H_
+#define VDSO_VDSO_TIME_H_
+
+#include <time.h>
+
+namespace vdso {
+
+int ClockRealtime(struct timespec* ts);
+int ClockMonotonic(struct timespec* ts);
+
+} // namespace vdso
+
+#endif // VDSO_VDSO_TIME_H_