diff options
Diffstat (limited to 'pkg/sentry/platform/kvm/bluepill_fault.go')
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_fault.go | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go new file mode 100644 index 000000000..3c452f5ba --- /dev/null +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -0,0 +1,127 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kvm + +import ( + "sync/atomic" + "syscall" + + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" +) + +const ( + // faultBlockSize is the size used for servicing memory faults. + // + // This should be large enough to avoid frequent faults and avoid using + // all available KVM slots (~512), but small enough that KVM does not + // complain about slot sizes (~4GB). See handleBluepillFault for how + // this block is used. + faultBlockSize = 2 << 30 + + // faultBlockMask is the mask for the fault blocks. + // + // This must be typed to avoid overflow complaints (ugh). + faultBlockMask = ^uintptr(faultBlockSize - 1) +) + +// yield yields the CPU. +// +//go:nosplit +func yield() { + syscall.RawSyscall(syscall.SYS_SCHED_YIELD, 0, 0, 0) +} + +// calculateBluepillFault calculates the fault address range. +// +//go:nosplit +func calculateBluepillFault(physical uintptr) (virtualStart, physicalStart, length uintptr, ok bool) { + alignedPhysical := physical &^ uintptr(usermem.PageSize-1) + for _, pr := range physicalRegions { + end := pr.physical + pr.length + if physical < pr.physical || physical >= end { + continue + } + + // Adjust the block to match our size. + physicalStart = alignedPhysical & faultBlockMask + if physicalStart < pr.physical { + // Bound the starting point to the start of the region. + physicalStart = pr.physical + } + virtualStart = pr.virtual + (physicalStart - pr.physical) + physicalEnd := physicalStart + faultBlockSize + if physicalEnd > end { + physicalEnd = end + } + length = physicalEnd - physicalStart + return virtualStart, physicalStart, length, true + } + + return 0, 0, 0, false +} + +// handleBluepillFault handles a physical fault. +// +// The corresponding virtual address is returned. This may throw on error. +// +//go:nosplit +func handleBluepillFault(m *machine, physical uintptr) (uintptr, bool) { + // Paging fault: we need to map the underlying physical pages for this + // fault. This all has to be done in this function because we're in a + // signal handler context. (We can't call any functions that might + // split the stack.) + virtualStart, physicalStart, length, ok := calculateBluepillFault(physical) + if !ok { + return 0, false + } + + // Set the KVM slot. + // + // First, we need to acquire the exclusive right to set a slot. See + // machine.nextSlot for information about the protocol. + slot := atomic.SwapUint32(&m.nextSlot, ^uint32(0)) + for slot == ^uint32(0) { + yield() // Race with another call. + slot = atomic.SwapUint32(&m.nextSlot, ^uint32(0)) + } + errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart) + if errno == 0 { + // Successfully added region; we can increment nextSlot and + // allow another set to proceed here. + atomic.StoreUint32(&m.nextSlot, slot+1) + return virtualStart + (physical - physicalStart), true + } + + // Release our slot (still available). + atomic.StoreUint32(&m.nextSlot, slot) + + switch errno { + case syscall.EEXIST: + // The region already exists. It's possible that we raced with + // another vCPU here. We just revert nextSlot and return true, + // because this must have been satisfied by some other vCPU. + return virtualStart + (physical - physicalStart), true + case syscall.EINVAL: + throw("set memory region failed; out of slots") + case syscall.ENOMEM: + throw("set memory region failed: out of memory") + case syscall.EFAULT: + throw("set memory region failed: invalid physical range") + default: + throw("set memory region failed: unknown reason") + } + + panic("unreachable") +} |