diff options
author | Jamie Liu <jamieliu@google.com> | 2018-12-17 11:37:38 -0800 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-12-17 11:38:59 -0800 |
commit | 2421006426445a1827422c2dbdd6fc6a47087147 (patch) | |
tree | 49aa2bc113c208fc117aff8a036866a7260090e5 /test/syscalls/linux | |
parent | 54694086dfb02a6f8453f043a44ffd10bb5a7070 (diff) |
Implement mlock(), kind of.
Currently mlock() and friends do nothing whatsoever. However, mlocking
is directly application-visible in a number of ways; for example,
madvise(MADV_DONTNEED) and msync(MS_INVALIDATE) both fail on mlocked
regions. We handle this inconsistently: MADV_DONTNEED is too important
to not work, but MS_INVALIDATE is rejected.
Change MM to track mlocked regions in a manner consistent with Linux.
It still will not actually pin pages into host physical memory, but:
- mlock() will now cause sentry memory management to precommit mlocked
pages.
- MADV_DONTNEED and MS_INVALIDATE will interact with mlocked pages as
described above.
PiperOrigin-RevId: 225861605
Change-Id: Iee187204979ac9a4d15d0e037c152c0902c8d0ee
Diffstat (limited to 'test/syscalls/linux')
-rw-r--r-- | test/syscalls/linux/BUILD | 15 | ||||
-rw-r--r-- | test/syscalls/linux/mlock.cc | 344 | ||||
-rw-r--r-- | test/syscalls/linux/msync.cc | 20 |
3 files changed, 373 insertions, 6 deletions
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 784997c18..aca55f492 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -1020,6 +1020,21 @@ cc_binary( ) cc_binary( + name = "mlock_test", + testonly = 1, + srcs = ["mlock.cc"], + linkstatic = 1, + deps = [ + "//test/util:capability_util", + "//test/util:cleanup", + "//test/util:memory_util", + "//test/util:multiprocess_util", + "//test/util:test_util", + "@com_google_googletest//:gtest", + ], +) + +cc_binary( name = "mmap_test", testonly = 1, srcs = ["mmap.cc"], diff --git a/test/syscalls/linux/mlock.cc b/test/syscalls/linux/mlock.cc new file mode 100644 index 000000000..a0d876c2e --- /dev/null +++ b/test/syscalls/linux/mlock.cc @@ -0,0 +1,344 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "test/util/capability_util.h" +#include "test/util/cleanup.h" +#include "test/util/memory_util.h" +#include "test/util/multiprocess_util.h" +#include "test/util/test_util.h" + +using ::testing::_; + +namespace gvisor { +namespace testing { + +namespace { + +PosixErrorOr<bool> CanMlock() { + struct rlimit rlim; + if (getrlimit(RLIMIT_MEMLOCK, &rlim) < 0) { + return PosixError(errno, "getrlimit(RLIMIT_MEMLOCK)"); + } + if (rlim.rlim_cur != 0) { + return true; + } + return HaveCapability(CAP_IPC_LOCK); +} + +// Returns true if the page containing addr is mlocked. +bool IsPageMlocked(uintptr_t addr) { + // This relies on msync(MS_INVALIDATE) interacting correctly with mlocked + // pages, which is tested for by the MsyncInvalidate case below. + int const rv = msync(reinterpret_cast<void*>(addr & ~(kPageSize - 1)), + kPageSize, MS_ASYNC | MS_INVALIDATE); + if (rv == 0) { + return false; + } + // This uses TEST_PCHECK_MSG since it's used in subprocesses. + TEST_PCHECK_MSG(errno == EBUSY, "msync failed with unexpected errno"); + return true; +} + +PosixErrorOr<Cleanup> ScopedSetSoftRlimit(int resource, rlim_t newval) { + struct rlimit old_rlim; + if (getrlimit(resource, &old_rlim) != 0) { + return PosixError(errno, "getrlimit failed"); + } + struct rlimit new_rlim = old_rlim; + new_rlim.rlim_cur = newval; + if (setrlimit(resource, &new_rlim) != 0) { + return PosixError(errno, "setrlimit failed"); + } + return Cleanup([resource, old_rlim] { + TEST_PCHECK(setrlimit(resource, &old_rlim) == 0); + }); +} + +TEST(MlockTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(MlockTest, ProtNone) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = + ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(kPageSize, PROT_NONE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), + SyscallFailsWithErrno(ENOMEM)); + // ENOMEM is returned because mlock can't populate the page, but it's still + // considered locked. + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(MlockTest, MadviseDontneed) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_THAT(madvise(mapping.ptr(), mapping.len(), MADV_DONTNEED), + SyscallFailsWithErrno(EINVAL)); +} + +TEST(MlockTest, MsyncInvalidate) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_ASYNC | MS_INVALIDATE), + SyscallFailsWithErrno(EBUSY)); + EXPECT_THAT(msync(mapping.ptr(), mapping.len(), MS_SYNC | MS_INVALIDATE), + SyscallFailsWithErrno(EBUSY)); +} + +TEST(MlockTest, Fork) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + EXPECT_THAT( + InForkedProcess([&] { TEST_CHECK(!IsPageMlocked(mapping.addr())); }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(MlockTest, RlimitMemlockZero) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0)); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), + SyscallFailsWithErrno(EPERM)); +} + +TEST(MlockTest, RlimitMemlockInsufficient) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize)); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), + SyscallFailsWithErrno(ENOMEM)); +} + +TEST(MunlockTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +TEST(MunlockTest, NotLocked) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +// There is currently no test for mlockall(MCL_CURRENT) because the default +// RLIMIT_MEMLOCK of 64 KB is insufficient to actually invoke +// mlockall(MCL_CURRENT). + +TEST(MlockallTest, Future) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + + // Run this test in a separate (single-threaded) subprocess to ensure that a + // background thread doesn't try to mmap a large amount of memory, fail due + // to hitting RLIMIT_MEMLOCK, and explode the process violently. + EXPECT_THAT(InForkedProcess([] { + auto const mapping = + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE) + .ValueOrDie(); + TEST_CHECK(!IsPageMlocked(mapping.addr())); + TEST_PCHECK(mlockall(MCL_FUTURE) == 0); + // Ensure that mlockall(MCL_FUTURE) is turned off before the end + // of the test, as otherwise mmaps may fail unexpectedly. + Cleanup do_munlockall([] { TEST_PCHECK(munlockall() == 0); }); + auto const mapping2 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + TEST_CHECK(IsPageMlocked(mapping2.addr())); + // Fire munlockall() and check that it disables + // mlockall(MCL_FUTURE). + do_munlockall.Release()(); + auto const mapping3 = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + TEST_CHECK(!IsPageMlocked(mapping2.addr())); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(MunlockallTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(munlockall(), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +#ifndef SYS_mlock2 +#ifdef __x86_64__ +#define SYS_mlock2 325 +#endif +#endif + +#ifndef MLOCK_ONFAULT +#define MLOCK_ONFAULT 0x01 // Linux: include/uapi/asm-generic/mman-common.h +#endif + +#ifdef SYS_mlock2 + +int mlock2(void const* addr, size_t len, int flags) { + return syscall(SYS_mlock2, addr, len, flags); +} + +TEST(Mlock2Test, NoFlags) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), 0), SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(Mlock2Test, MlockOnfault) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); + ASSERT_THAT(mlock2(mapping.ptr(), mapping.len(), MLOCK_ONFAULT), + SyscallSucceeds()); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); +} + +TEST(Mlock2Test, UnknownFlags) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + EXPECT_THAT(mlock2(mapping.ptr(), mapping.len(), ~0), + SyscallFailsWithErrno(EINVAL)); +} + +#endif // defined(SYS_mlock2) + +TEST(MapLockedTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto const mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + EXPECT_THAT(munlock(mapping.ptr(), mapping.len()), SyscallSucceeds()); + EXPECT_FALSE(IsPageMlocked(mapping.addr())); +} + +TEST(MapLockedTest, RlimitMemlockZero) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0)); + EXPECT_THAT( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED), + PosixErrorIs(EPERM, _)); +} + +TEST(MapLockedTest, RlimitMemlockInsufficient) { + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, kPageSize)); + EXPECT_THAT( + MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED), + PosixErrorIs(EAGAIN, _)); +} + +TEST(MremapLockedTest, Basic) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + + void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(), + MREMAP_MAYMOVE, nullptr); + if (addr == MAP_FAILED) { + FAIL() << "mremap failed: " << errno << " (" << strerror(errno) << ")"; + } + mapping.release(); + mapping.reset(addr, 2 * mapping.len()); + EXPECT_TRUE(IsPageMlocked(reinterpret_cast<uintptr_t>(addr))); +} + +TEST(MremapLockedTest, RlimitMemlockZero) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = + ASSERT_NO_ERRNO_AND_VALUE(ScopedSetSoftRlimit(RLIMIT_MEMLOCK, 0)); + void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(), + MREMAP_MAYMOVE, nullptr); + EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN) + << "addr = " << addr << ", errno = " << errno; +} + +TEST(MremapLockedTest, RlimitMemlockInsufficient) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanMlock())); + auto mapping = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_LOCKED)); + EXPECT_TRUE(IsPageMlocked(mapping.addr())); + + if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_IPC_LOCK))) { + ASSERT_NO_ERRNO(SetCapability(CAP_IPC_LOCK, false)); + } + Cleanup reset_rlimit = ASSERT_NO_ERRNO_AND_VALUE( + ScopedSetSoftRlimit(RLIMIT_MEMLOCK, mapping.len())); + void* addr = mremap(mapping.ptr(), mapping.len(), 2 * mapping.len(), + MREMAP_MAYMOVE, nullptr); + EXPECT_TRUE(addr == MAP_FAILED && errno == EAGAIN) + << "addr = " << addr << ", errno = " << errno; +} + +} // namespace + +} // namespace testing +} // namespace gvisor diff --git a/test/syscalls/linux/msync.cc b/test/syscalls/linux/msync.cc index 0ddc621aa..72d90dc78 100644 --- a/test/syscalls/linux/msync.cc +++ b/test/syscalls/linux/msync.cc @@ -43,14 +43,13 @@ class MsyncParameterizedTest : public ::testing::TestWithParam<MsyncTestParam> { protected: int msync_flags() const { return std::get<0>(GetParam()); } - PosixErrorOr<Mapping> GetMapping() const { - auto rv = std::get<1>(GetParam())(); - return rv; - } + PosixErrorOr<Mapping> GetMapping() const { return std::get<1>(GetParam())(); } }; -// All valid msync(2) flag combinations (not including MS_INVALIDATE, which -// gVisor doesn't implement). +// All valid msync(2) flag combinations, not including MS_INVALIDATE. ("Linux +// permits a call to msync() that specifies neither [MS_SYNC or MS_ASYNC], with +// semantics that are (currently) equivalent to specifying MS_ASYNC." - +// msync(2)) constexpr std::initializer_list<int> kMsyncFlags = {MS_SYNC, MS_ASYNC, 0}; // Returns functions that return mappings that should be successfully @@ -134,6 +133,15 @@ TEST_P(MsyncFullParamTest, UnalignedAddressFails) { SyscallFailsWithErrno(EINVAL)); } +TEST_P(MsyncFullParamTest, InvalidateUnlockedSucceeds) { + auto m = ASSERT_NO_ERRNO_AND_VALUE(GetMapping()); + EXPECT_THAT(msync(m.ptr(), m.len(), msync_flags() | MS_INVALIDATE), + SyscallSucceeds()); +} + +// The test for MS_INVALIDATE on mlocked pages is in mlock.cc since it requires +// probing for mlock support. + INSTANTIATE_TEST_CASE_P( All, MsyncFullParamTest, ::testing::Combine(::testing::ValuesIn(kMsyncFlags), |