kvm是一种基于硬件辅助的虚拟化解决方案,和qemu模拟器一起来完成整个系统的虚拟化。kvm以内核模块的形式存在,下面以intel虚拟化为例讲述kvm的代码框架和流程。
static struct file_operations kvm_chardev_ops = {
.unlocked_ioctl = kvm_dev_ioctl,
.compat_ioctl = kvm_dev_ioctl,
.llseek = noop_llseek,
};
static struct miscdevice kvm_dev = {
KVM_MINOR,
"kvm",
&kvm_chardev_ops,
};
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
long r = -EINVAL;
switch (ioctl) {
case KVM_GET_API_VERSION: //获取api版本
if (arg)
goto out;
r = KVM_API_VERSION;
break;
case KVM_CREATE_VM: //创建VM,返回vmfd
r = kvm_dev_ioctl_create_vm(arg);
break;
case KVM_CHECK_EXTENSION:
r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
break;
case KVM_GET_VCPU_MMAP_SIZE:
if (arg)
goto out;
r = PAGE_SIZE; /* struct kvm_run */
#ifdef CONFIG_X86
r += PAGE_SIZE; /* pio data page */
#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
r += PAGE_SIZE; /* coalesced mmio ring page */
#endif
break;
default:
return kvm_arch_dev_ioctl(filp, ioctl, arg); //默认创建设备
}
out:
return r;
}
kvm_dev_ioctl_create_vm 首先创建vm,然后挂载vmfd的ioctl接口来提供vm的控制
static int kvm_dev_ioctl_create_vm(unsigned long type)
{
kvm = kvm_create_vm(type); //创建VM
r = get_unused_fd_flags(O_CLOEXEC);
file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);//挂载vmfd的ioctl接口
return r;
}
static struct file_operations kvm_vm_fops = {
.release = kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,//vmfd的ioctl接口,提供对vm级别的控制
#ifdef CONFIG_KVM_COMPAT
.compat_ioctl = kvm_vm_compat_ioctl,
#endif
.llseek = noop_llseek,
};
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
switch (ioctl) {
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);//创建vcpu
break;
case KVM_SET_USER_MEMORY_REGION: {
struct kvm_userspace_memory_region kvm_userspace_mem;
r = -EFAULT;
if (copy_from_user(&kvm_userspace_mem, argp,
sizeof(kvm_userspace_mem)))
goto out;
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);//设置虚机内存
case KVM_IRQFD: {
struct kvm_irqfd data;
r = -EFAULT;
if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_irqfd(kvm, &data);
break;
}
case KVM_IOEVENTFD: {
struct kvm_ioeventfd data;
r = -EFAULT;
if (copy_from_user(&data, argp, sizeof(data)))
goto out;
r = kvm_ioeventfd(kvm, &data);
break;
}
case KVM_CREATE_DEVICE: {
struct kvm_create_device cd;
r = -EFAULT;
if (copy_from_user(&cd, argp, sizeof(cd)))
goto out;
r = kvm_ioctl_create_device(kvm, &cd);
if (r)
goto out;
r = -EFAULT;
if (copy_to_user(argp, &cd, sizeof(cd)))
goto out;
r = 0;
break;
}
case KVM_CHECK_EXTENSION:
r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
break;
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
out:
return r;
}
kvm_vm_ioctl_create_vcpu创建vcpu时再挂载ioctl接口来提供cpu级别的控制,
static struct file_operations kvm_vcpu_fops = {
.release = kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl,
.mmap = kvm_vcpu_mmap,
.llseek = noop_llseek,
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);//挂载vcpu的ioctl接口kvm_vcpu_ioctl
}
vcpu的ioctl接口提供了run方法
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
r = vcpu_load(vcpu);
if (r)
return r;
switch (ioctl) {
case KVM_RUN:
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
break;
至此,kvm的所有框架都初始化完成,用户态接口也都实现好了。
- CPU RUN流程
kvm_arch_vcpu_ioctl_run
===》vcpu_run //死循环进入vcpu_enter_guest
===》vcpu_enter_guest //准备guestos的运行上下文
===》kvm_x86_ops->run(vcpu); //回到我们之前提到的初始化流程,这里调用vmx_vcpu_run
===》kvm_x86_ops->handle_exit //vmexit的处理
vmx_vcpu_run主要实现是一段汇编代码,利用intel的VT-X技术,通过指令vmlautch和vmexit来处理root与non-root的模式切换。
- KVM与qemu的交互流程以及KVM的分布
附件PPT中描述了整个KVM与qemu的交互流程
kvm流程分析.pptx
本文来自网易实践者社区,经作者赵建明授权发布。