1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kvm
import (
"sync/atomic"
"syscall"
"gvisor.dev/gvisor/pkg/sentry/usermem"
)
const (
// faultBlockSize is the size used for servicing memory faults.
//
// This should be large enough to avoid frequent faults and avoid using
// all available KVM slots (~512), but small enough that KVM does not
// complain about slot sizes (~4GB). See handleBluepillFault for how
// this block is used.
faultBlockSize = 2 << 30
// faultBlockMask is the mask for the fault blocks.
//
// This must be typed to avoid overflow complaints (ugh).
faultBlockMask = ^uintptr(faultBlockSize - 1)
)
// yield yields the CPU.
//
//go:nosplit
func yield() {
syscall.RawSyscall(syscall.SYS_SCHED_YIELD, 0, 0, 0)
}
// calculateBluepillFault calculates the fault address range.
//
//go:nosplit
func calculateBluepillFault(physical uintptr) (virtualStart, physicalStart, length uintptr, ok bool) {
alignedPhysical := physical &^ uintptr(usermem.PageSize-1)
for _, pr := range physicalRegions {
end := pr.physical + pr.length
if physical < pr.physical || physical >= end {
continue
}
// Adjust the block to match our size.
physicalStart = alignedPhysical & faultBlockMask
if physicalStart < pr.physical {
// Bound the starting point to the start of the region.
physicalStart = pr.physical
}
virtualStart = pr.virtual + (physicalStart - pr.physical)
physicalEnd := physicalStart + faultBlockSize
if physicalEnd > end {
physicalEnd = end
}
length = physicalEnd - physicalStart
return virtualStart, physicalStart, length, true
}
return 0, 0, 0, false
}
// handleBluepillFault handles a physical fault.
//
// The corresponding virtual address is returned. This may throw on error.
//
//go:nosplit
func handleBluepillFault(m *machine, physical uintptr) (uintptr, bool) {
// Paging fault: we need to map the underlying physical pages for this
// fault. This all has to be done in this function because we're in a
// signal handler context. (We can't call any functions that might
// split the stack.)
virtualStart, physicalStart, length, ok := calculateBluepillFault(physical)
if !ok {
return 0, false
}
// Set the KVM slot.
//
// First, we need to acquire the exclusive right to set a slot. See
// machine.nextSlot for information about the protocol.
slot := atomic.SwapUint32(&m.nextSlot, ^uint32(0))
for slot == ^uint32(0) {
yield() // Race with another call.
slot = atomic.SwapUint32(&m.nextSlot, ^uint32(0))
}
errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart)
if errno == 0 {
// Successfully added region; we can increment nextSlot and
// allow another set to proceed here.
atomic.StoreUint32(&m.nextSlot, slot+1)
return virtualStart + (physical - physicalStart), true
}
// Release our slot (still available).
atomic.StoreUint32(&m.nextSlot, slot)
switch errno {
case syscall.EEXIST:
// The region already exists. It's possible that we raced with
// another vCPU here. We just revert nextSlot and return true,
// because this must have been satisfied by some other vCPU.
return virtualStart + (physical - physicalStart), true
case syscall.EINVAL:
throw("set memory region failed; out of slots")
case syscall.ENOMEM:
throw("set memory region failed: out of memory")
case syscall.EFAULT:
throw("set memory region failed: invalid physical range")
default:
throw("set memory region failed: unknown reason")
}
panic("unreachable")
}
|