summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/kvm/machine.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/platform/kvm/machine.go')
-rw-r--r--pkg/sentry/platform/kvm/machine.go131
1 files changed, 64 insertions, 67 deletions
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index f045345d5..abdc51431 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -46,16 +46,14 @@ type machine struct {
mappingCache sync.Map
// mu protects vCPUs.
- mu sync.Mutex
+ mu sync.RWMutex
// available is notified when vCPUs are available.
available sync.Cond
// vCPUs are the machine vCPUs.
//
- // This is eventually keyed by system TID, but is initially indexed by
- // the negative vCPU id. This is merely an optimization, so while
- // collisions here are not possible, it wouldn't matter anyways.
+ // These are populated dynamically.
vCPUs map[uint64]*vCPU
}
@@ -117,73 +115,65 @@ type vCPU struct {
vCPUArchState
}
+// newVCPU creates a returns a new vCPU.
+//
+// Precondtion: mu must be held.
+func (m *machine) newVCPU() *vCPU {
+ id := len(m.vCPUs)
+
+ // Create the vCPU.
+ fd, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CREATE_VCPU, uintptr(id))
+ if errno != 0 {
+ panic(fmt.Sprintf("error creating new vCPU: %v", errno))
+ }
+
+ c := &vCPU{
+ id: id,
+ fd: int(fd),
+ machine: m,
+ }
+ c.CPU.Init(&m.kernel)
+ c.CPU.KernelSyscall = bluepillSyscall
+ c.CPU.KernelException = bluepillException
+
+ // Ensure the signal mask is correct.
+ if err := c.setSignalMask(); err != nil {
+ panic(fmt.Sprintf("error setting signal mask: %v", err))
+ }
+
+ // Initialize architecture state.
+ if err := c.initArchState(); err != nil {
+ panic(fmt.Sprintf("error initialization vCPU state: %v", err))
+ }
+
+ // Map the run data.
+ runData, err := mapRunData(int(fd))
+ if err != nil {
+ panic(fmt.Sprintf("error mapping run data: %v", err))
+ }
+ c.runData = runData
+
+ return c // Done.
+}
+
// newMachine returns a new VM context.
-func newMachine(vm int, vCPUs int) (*machine, error) {
+func newMachine(vm int) (*machine, error) {
// Create the machine.
m := &machine{
fd: vm,
vCPUs: make(map[uint64]*vCPU),
}
m.available.L = &m.mu
- if vCPUs > _KVM_NR_VCPUS {
- // Hard cap at KVM's limit.
- vCPUs = _KVM_NR_VCPUS
- }
- if n := 2 * runtime.NumCPU(); vCPUs > n {
- // Cap at twice the number of physical cores. Otherwise we're
- // just wasting memory and thrashing. (There may be scheduling
- // issues when you've got > n active threads.)
- vCPUs = n
- }
m.kernel.Init(ring0.KernelOpts{
PageTables: pagetables.New(newAllocator()),
})
// Initialize architecture state.
- if err := m.initArchState(vCPUs); err != nil {
+ if err := m.initArchState(); err != nil {
m.Destroy()
return nil, err
}
- // Create all the vCPUs.
- for id := 0; id < vCPUs; id++ {
- // Create the vCPU.
- fd, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(vm), _KVM_CREATE_VCPU, uintptr(id))
- if errno != 0 {
- m.Destroy()
- return nil, fmt.Errorf("error creating VCPU: %v", errno)
- }
- c := &vCPU{
- id: id,
- fd: int(fd),
- machine: m,
- }
- c.CPU.Init(&m.kernel)
- c.CPU.KernelSyscall = bluepillSyscall
- c.CPU.KernelException = bluepillException
- m.vCPUs[uint64(-id)] = c // See above.
-
- // Ensure the signal mask is correct.
- if err := c.setSignalMask(); err != nil {
- m.Destroy()
- return nil, err
- }
-
- // Initialize architecture state.
- if err := c.initArchState(); err != nil {
- m.Destroy()
- return nil, err
- }
-
- // Map the run data.
- runData, err := mapRunData(int(fd))
- if err != nil {
- m.Destroy()
- return nil, err
- }
- c.runData = runData
- }
-
// Apply the physical mappings. Note that these mappings may point to
// guest physical addresses that are not actually available. These
// physical pages are mapped on demand, see kernel_unsafe.go.
@@ -298,15 +288,20 @@ func (m *machine) Destroy() {
func (m *machine) Get() *vCPU {
runtime.LockOSThread()
tid := procid.Current()
- m.mu.Lock()
+ m.mu.RLock()
// Check for an exact match.
if c := m.vCPUs[tid]; c != nil {
c.lock()
- m.mu.Unlock()
+ m.mu.RUnlock()
return c
}
+ // The happy path failed. We now proceed to acquire an exclusive lock
+ // (because the vCPU map may change), and scan all available vCPUs.
+ m.mu.RUnlock()
+ m.mu.Lock()
+
for {
// Scan for an available vCPU.
for origTID, c := range m.vCPUs {
@@ -314,16 +309,21 @@ func (m *machine) Get() *vCPU {
delete(m.vCPUs, origTID)
m.vCPUs[tid] = c
m.mu.Unlock()
-
- // We need to reload thread-local segments as
- // we have origTID != tid and the vCPU state
- // may be stale.
- c.loadSegments()
- atomic.StoreUint64(&c.tid, tid)
+ c.loadSegments(tid)
return c
}
}
+ // Create a new vCPU (maybe).
+ if len(m.vCPUs) < _KVM_NR_VCPUS {
+ c := m.newVCPU()
+ c.lock()
+ m.vCPUs[tid] = c
+ m.mu.Unlock()
+ c.loadSegments(tid)
+ return c
+ }
+
// Scan for something not in user mode.
for origTID, c := range m.vCPUs {
if !atomic.CompareAndSwapUint32(&c.state, vCPUGuest, vCPUGuest|vCPUWaiter) {
@@ -346,10 +346,7 @@ func (m *machine) Get() *vCPU {
delete(m.vCPUs, origTID)
m.vCPUs[tid] = c
m.mu.Unlock()
-
- // See above.
- c.loadSegments()
- atomic.StoreUint64(&c.tid, tid)
+ c.loadSegments(tid)
return c
}