summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
authorChenggang <chenggang.qcg@alibaba-inc.com>2018-09-13 21:46:03 -0700
committerShentubot <shentubot@google.com>2018-09-13 21:47:11 -0700
commitfaa34a0738456f5328cf99de13622a150042776d (patch)
tree72f2aaac72df21efac1c4ce55d6a711ae0a7d296 /pkg/sentry
parent29a7271f5da9fdb7b4a9a6c9ea61421ce6844a73 (diff)
platform/kvm: Get max vcpu number dynamically by ioctl
The old kernel version, such as 4.4, only support 255 vcpus. While gvisor is ran on these kernels, it could panic because the vcpu id and vcpu number beyond max_vcpus. Use ioctl(vmfd, _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS) to get max vcpus number dynamically. Change-Id: I50dd859a11b1c2cea854a8e27d4bf11a411aa45c PiperOrigin-RevId: 212929704
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/platform/kvm/address_space.go29
-rw-r--r--pkg/sentry/platform/kvm/kvm.go1
-rw-r--r--pkg/sentry/platform/kvm/kvm_const.go8
-rw-r--r--pkg/sentry/platform/kvm/machine.go30
4 files changed, 47 insertions, 21 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 463617170..c4293c517 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -26,31 +26,26 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
-type vCPUBitArray [(_KVM_NR_VCPUS + 63) / 64]uint64
-
// dirtySet tracks vCPUs for invalidation.
type dirtySet struct {
- vCPUs vCPUBitArray
+ vCPUs []uint64
}
// forEach iterates over all CPUs in the dirty set.
func (ds *dirtySet) forEach(m *machine, fn func(c *vCPU)) {
- var localSet vCPUBitArray
- for index := 0; index < len(ds.vCPUs); index++ {
- // Clear the dirty set, copy to the local one.
- localSet[index] = atomic.SwapUint64(&ds.vCPUs[index], 0)
- }
-
m.mu.RLock()
defer m.mu.RUnlock()
- for _, c := range m.vCPUs {
- index := uint64(c.id) / 64
- bit := uint64(1) << uint(c.id%64)
-
- // Call the function if it was set.
- if localSet[index]&bit != 0 {
- fn(c)
+ for index := range ds.vCPUs {
+ mask := atomic.SwapUint64(&ds.vCPUs[index], 0)
+ if mask != 0 {
+ for bit := 0; bit < 64; bit++ {
+ if mask&(1<<uint64(bit)) == 0 {
+ continue
+ }
+ id := 64*index + bit
+ fn(m.vCPUsByID[id])
+ }
}
}
}
@@ -92,7 +87,7 @@ type addressSpace struct {
pageTables *pagetables.PageTables
// dirtySet is the set of dirty vCPUs.
- dirtySet dirtySet
+ dirtySet *dirtySet
// files contains files mapped in the host address space.
//
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index 19bc2d515..0c4dff308 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -143,6 +143,7 @@ func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan stru
filemem: k.FileMem,
machine: k.machine,
pageTables: pageTables,
+ dirtySet: k.machine.newDirtySet(),
}, nil, nil
}
diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go
index c819fd16f..ca44c31b3 100644
--- a/pkg/sentry/platform/kvm/kvm_const.go
+++ b/pkg/sentry/platform/kvm/kvm_const.go
@@ -25,6 +25,7 @@ const (
_KVM_SET_TSS_ADDR = 0xae47
_KVM_RUN = 0xae80
_KVM_NMI = 0xae9a
+ _KVM_CHECK_EXTENSION = 0xae03
_KVM_INTERRUPT = 0x4004ae86
_KVM_SET_MSRS = 0x4008ae89
_KVM_SET_USER_MEMORY_REGION = 0x4020ae46
@@ -49,9 +50,14 @@ const (
_KVM_EXIT_INTERNAL_ERROR = 0x11
)
+// KVM capability options.
+const (
+ _KVM_CAP_MAX_VCPUS = 0x42
+)
+
// KVM limits.
const (
- _KVM_NR_VCPUS = 0x100
+ _KVM_NR_VCPUS = 0xff
_KVM_NR_INTERRUPTS = 0x100
_KVM_NR_CPUID_ENTRIES = 0x100
)
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 68e099d1b..9f60b6b31 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -22,6 +22,7 @@ import (
"syscall"
"gvisor.googlesource.com/gvisor/pkg/atomicbitops"
+ "gvisor.googlesource.com/gvisor/pkg/log"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/procid"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
@@ -55,6 +56,12 @@ type machine struct {
//
// These are populated dynamically.
vCPUs map[uint64]*vCPU
+
+ // vCPUsByID are the machine vCPUs, can be indexed by the vCPU's ID.
+ vCPUsByID map[int]*vCPU
+
+ // maxVCPUs is the maximum number of vCPUs supported by the machine.
+ maxVCPUs int
}
const (
@@ -135,6 +142,7 @@ func (m *machine) newVCPU() *vCPU {
c.CPU.Init(&m.kernel)
c.CPU.KernelSyscall = bluepillSyscall
c.CPU.KernelException = bluepillException
+ m.vCPUsByID[c.id] = c
// Ensure the signal mask is correct.
if err := c.setSignalMask(); err != nil {
@@ -160,14 +168,23 @@ func (m *machine) newVCPU() *vCPU {
func newMachine(vm int) (*machine, error) {
// Create the machine.
m := &machine{
- fd: vm,
- vCPUs: make(map[uint64]*vCPU),
+ fd: vm,
+ vCPUs: make(map[uint64]*vCPU),
+ vCPUsByID: make(map[int]*vCPU),
}
m.available.L = &m.mu
m.kernel.Init(ring0.KernelOpts{
PageTables: pagetables.New(newAllocator()),
})
+ maxVCPUs, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, uintptr(m.fd), _KVM_CHECK_EXTENSION, _KVM_CAP_MAX_VCPUS)
+ if errno != 0 {
+ m.maxVCPUs = _KVM_NR_VCPUS
+ } else {
+ m.maxVCPUs = int(maxVCPUs)
+ }
+ log.Debugf("The maximum number of vCPUs is %d.", m.maxVCPUs)
+
// Apply the physical mappings. Note that these mappings may point to
// guest physical addresses that are not actually available. These
// physical pages are mapped on demand, see kernel_unsafe.go.
@@ -315,7 +332,7 @@ func (m *machine) Get() *vCPU {
}
// Create a new vCPU (maybe).
- if len(m.vCPUs) < _KVM_NR_VCPUS {
+ if len(m.vCPUs) < m.maxVCPUs {
c := m.newVCPU()
c.lock()
m.vCPUs[tid] = c
@@ -365,6 +382,13 @@ func (m *machine) Put(c *vCPU) {
m.available.Signal()
}
+// newDirtySet returns a new dirty set.
+func (m *machine) newDirtySet() *dirtySet {
+ return &dirtySet{
+ vCPUs: make([]uint64, (m.maxVCPUs+63)/64, (m.maxVCPUs+63)/64),
+ }
+}
+
// lock marks the vCPU as in user mode.
//
// This should only be called directly when known to be safe, i.e. when