summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/kvm/bluepill_fault.go
blob: 37c53fa02f31e8ec05f9336f541f3c88d45142be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package kvm

import (
	"sync/atomic"

	"golang.org/x/sys/unix"
	"gvisor.dev/gvisor/pkg/usermem"
)

const (
	// faultBlockSize is the size used for servicing memory faults.
	//
	// This should be large enough to avoid frequent faults and avoid using
	// all available KVM slots (~512), but small enough that KVM does not
	// complain about slot sizes (~4GB). See handleBluepillFault for how
	// this block is used.
	faultBlockSize = 2 << 30

	// faultBlockMask is the mask for the fault blocks.
	//
	// This must be typed to avoid overflow complaints (ugh).
	faultBlockMask = ^uintptr(faultBlockSize - 1)
)

// yield yields the CPU.
//
//go:nosplit
func yield() {
	unix.RawSyscall(unix.SYS_SCHED_YIELD, 0, 0, 0)
}

// calculateBluepillFault calculates the fault address range.
//
//go:nosplit
func calculateBluepillFault(physical uintptr, phyRegions []physicalRegion) (virtualStart, physicalStart, length uintptr, ok bool) {
	alignedPhysical := physical &^ uintptr(usermem.PageSize-1)
	for _, pr := range phyRegions {
		end := pr.physical + pr.length
		if physical < pr.physical || physical >= end {
			continue
		}

		// Adjust the block to match our size.
		physicalStart = alignedPhysical & faultBlockMask
		if physicalStart < pr.physical {
			// Bound the starting point to the start of the region.
			physicalStart = pr.physical
		}
		virtualStart = pr.virtual + (physicalStart - pr.physical)
		physicalEnd := physicalStart + faultBlockSize
		if physicalEnd > end {
			physicalEnd = end
		}
		length = physicalEnd - physicalStart
		return virtualStart, physicalStart, length, true
	}

	return 0, 0, 0, false
}

// handleBluepillFault handles a physical fault.
//
// The corresponding virtual address is returned. This may throw on error.
//
//go:nosplit
func handleBluepillFault(m *machine, physical uintptr, phyRegions []physicalRegion, flags uint32) (uintptr, bool) {
	// Paging fault: we need to map the underlying physical pages for this
	// fault. This all has to be done in this function because we're in a
	// signal handler context. (We can't call any functions that might
	// split the stack.)
	virtualStart, physicalStart, length, ok := calculateBluepillFault(physical, phyRegions)
	if !ok {
		return 0, false
	}

	// Set the KVM slot.
	//
	// First, we need to acquire the exclusive right to set a slot.  See
	// machine.nextSlot for information about the protocol.
	slot := atomic.SwapUint32(&m.nextSlot, ^uint32(0))
	for slot == ^uint32(0) {
		yield() // Race with another call.
		slot = atomic.SwapUint32(&m.nextSlot, ^uint32(0))
	}
	errno := m.setMemoryRegion(int(slot), physicalStart, length, virtualStart, flags)
	if errno == 0 {
		// Store the physical address in the slot. This is used to
		// avoid calls to handleBluepillFault in the future (see
		// machine.mapPhysical).
		atomic.StoreUintptr(&m.usedSlots[slot], physical)
		// Successfully added region; we can increment nextSlot and
		// allow another set to proceed here.
		atomic.StoreUint32(&m.nextSlot, slot+1)
		return virtualStart + (physical - physicalStart), true
	}

	// Release our slot (still available).
	atomic.StoreUint32(&m.nextSlot, slot)

	switch errno {
	case unix.EEXIST:
		// The region already exists. It's possible that we raced with
		// another vCPU here. We just revert nextSlot and return true,
		// because this must have been satisfied by some other vCPU.
		return virtualStart + (physical - physicalStart), true
	case unix.EINVAL:
		throw("set memory region failed; out of slots")
	case unix.ENOMEM:
		throw("set memory region failed: out of memory")
	case unix.EFAULT:
		throw("set memory region failed: invalid physical range")
	default:
		throw("set memory region failed: unknown reason")
	}

	panic("unreachable")
}