diff options
author | Chenggang <chenggang.qcg@alibaba-inc.com> | 2018-09-13 21:46:03 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-09-13 21:47:11 -0700 |
commit | faa34a0738456f5328cf99de13622a150042776d (patch) | |
tree | 72f2aaac72df21efac1c4ce55d6a711ae0a7d296 /pkg | |
parent | 29a7271f5da9fdb7b4a9a6c9ea61421ce6844a73 (diff) |
platform/kvm: Get max vcpu number dynamically by ioctl
The old kernel version, such as 4.4, only support 255 vcpus.
While gvisor is ran on these kernels, it could panic because the
vcpu id and vcpu number beyond max_vcpus.
Use ioctl(vmfd, _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS) to get max
vcpus number dynamically.
Change-Id: I50dd859a11b1c2cea854a8e27d4bf11a411aa45c
PiperOrigin-RevId: 212929704
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/platform/kvm/address_space.go | 29 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm.go | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_const.go | 8 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 30 |
4 files changed, 47 insertions, 21 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 463617170..c4293c517 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -26,31 +26,26 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) -type vCPUBitArray [(_KVM_NR_VCPUS + 63) / 64]uint64 - // dirtySet tracks vCPUs for invalidation. type dirtySet struct { - vCPUs vCPUBitArray + vCPUs []uint64 } // forEach iterates over all CPUs in the dirty set. func (ds *dirtySet) forEach(m *machine, fn func(c *vCPU)) { - var localSet vCPUBitArray - for index := 0; index < len(ds.vCPUs); index++ { - // Clear the dirty set, copy to the local one. - localSet[index] = atomic.SwapUint64(&ds.vCPUs[index], 0) - } - m.mu.RLock() defer m.mu.RUnlock() - for _, c := range m.vCPUs { - index := uint64(c.id) / 64 - bit := uint64(1) << uint(c.id%64) - - // Call the function if it was set. - if localSet[index]&bit != 0 { - fn(c) + for index := range ds.vCPUs { + mask := atomic.SwapUint64(&ds.vCPUs[index], 0) + if mask != 0 { + for bit := 0; bit < 64; bit++ { + if mask&(1<<uint64(bit)) == 0 { + continue + } + id := 64*index + bit + fn(m.vCPUsByID[id]) + } } } } @@ -92,7 +87,7 @@ type addressSpace struct { pageTables *pagetables.PageTables // dirtySet is the set of dirty vCPUs. - dirtySet dirtySet + dirtySet *dirtySet // files contains files mapped in the host address space. // diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 19bc2d515..0c4dff308 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -143,6 +143,7 @@ func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan stru filemem: k.FileMem, machine: k.machine, pageTables: pageTables, + dirtySet: k.machine.newDirtySet(), }, nil, nil } diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go index c819fd16f..ca44c31b3 100644 --- a/pkg/sentry/platform/kvm/kvm_const.go +++ b/pkg/sentry/platform/kvm/kvm_const.go @@ -25,6 +25,7 @@ const ( _KVM_SET_TSS_ADDR = 0xae47 _KVM_RUN = 0xae80 _KVM_NMI = 0xae9a + _KVM_CHECK_EXTENSION = 0xae03 _KVM_INTERRUPT = 0x4004ae86 _KVM_SET_MSRS = 0x4008ae89 _KVM_SET_USER_MEMORY_REGION = 0x4020ae46 @@ -49,9 +50,14 @@ const ( _KVM_EXIT_INTERNAL_ERROR = 0x11 ) +// KVM capability options. +const ( + _KVM_CAP_MAX_VCPUS = 0x42 +) + // KVM limits. const ( - _KVM_NR_VCPUS = 0x100 + _KVM_NR_VCPUS = 0xff _KVM_NR_INTERRUPTS = 0x100 _KVM_NR_CPUID_ENTRIES = 0x100 ) diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 68e099d1b..9f60b6b31 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -22,6 +22,7 @@ import ( "syscall" "gvisor.googlesource.com/gvisor/pkg/atomicbitops" + "gvisor.googlesource.com/gvisor/pkg/log" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/procid" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" @@ -55,6 +56,12 @@ type machine struct { // // These are populated dynamically. vCPUs map[uint64]*vCPU + + // vCPUsByID are the machine vCPUs, can be indexed by the vCPU's ID. + vCPUsByID map[int]*vCPU + + // maxVCPUs is the maximum number of vCPUs supported by the machine. + maxVCPUs int } const ( @@ -135,6 +142,7 @@ func (m *machine) newVCPU() *vCPU { c.CPU.Init(&m.kernel) c.CPU.KernelSyscall = bluepillSyscall c.CPU.KernelException = bluepillException + m.vCPUsByID[c.id] = c // Ensure the signal mask is correct. if err := c.setSignalMask(); err != nil { @@ -160,14 +168,23 @@ func (m *machine) newVCPU() *vCPU { func newMachine(vm int) (*machine, error) { // Create the machine. m := &machine{ - fd: vm, - vCPUs: make(map[uint64]*vCPU), + fd: vm, + vCPUs: make(map[uint64]*vCPU), + vCPUsByID: make(map[int]*vCPU), } m.available.L = &m.mu m.kernel.Init(ring0.KernelOpts{ PageTables: pagetables.New(newAllocator()), }) + maxVCPUs, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS) + if errno != 0 { + m.maxVCPUs = _KVM_NR_VCPUS + } else { + m.maxVCPUs = int(maxVCPUs) + } + log.Debugf("The maximum number of vCPUs is %d.", m.maxVCPUs) + // Apply the physical mappings. Note that these mappings may point to // guest physical addresses that are not actually available. These // physical pages are mapped on demand, see kernel_unsafe.go. @@ -315,7 +332,7 @@ func (m *machine) Get() *vCPU { } // Create a new vCPU (maybe). - if len(m.vCPUs) < _KVM_NR_VCPUS { + if len(m.vCPUs) < m.maxVCPUs { c := m.newVCPU() c.lock() m.vCPUs[tid] = c @@ -365,6 +382,13 @@ func (m *machine) Put(c *vCPU) { m.available.Signal() } +// newDirtySet returns a new dirty set. +func (m *machine) newDirtySet() *dirtySet { + return &dirtySet{ + vCPUs: make([]uint64, (m.maxVCPUs+63)/64, (m.maxVCPUs+63)/64), + } +} + // lock marks the vCPU as in user mode. // // This should only be called directly when known to be safe, i.e. when |