?? vmx.c
字號:
/* * Kernel-based Virtual Machine driver for Linux * * This module enables machines with Intel VT-x extensions to run virtual * machines without emulation or binary translation. * * Copyright (C) 2006 Qumranet, Inc. * * Authors: * Avi Kivity <avi@qumranet.com> * Yaniv Kamay <yaniv@qumranet.com> * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. * */#include "kvm.h"#include "x86_emulate.h"#include "irq.h"#include "vmx.h"#include "segment_descriptor.h"#include <linux/module.h>#include <linux/kernel.h>#include <linux/mm.h>#include <linux/highmem.h>#include <linux/sched.h>#include <asm/io.h>#include <asm/desc.h>MODULE_AUTHOR("Qumranet");MODULE_LICENSE("GPL");struct vmcs { u32 revision_id; u32 abort; char data[0];};struct vcpu_vmx { struct kvm_vcpu vcpu; int launched; u8 fail; struct kvm_msr_entry *guest_msrs; struct kvm_msr_entry *host_msrs; int nmsrs; int save_nmsrs; int msr_offset_efer;#ifdef CONFIG_X86_64 int msr_offset_kernel_gs_base;#endif struct vmcs *vmcs; struct { int loaded; u16 fs_sel, gs_sel, ldt_sel; int gs_ldt_reload_needed; int fs_reload_needed; }host_state;};static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu){ return container_of(vcpu, struct vcpu_vmx, vcpu);}static int init_rmode_tss(struct kvm *kvm);static DEFINE_PER_CPU(struct vmcs *, vmxarea);static DEFINE_PER_CPU(struct vmcs *, current_vmcs);static struct page *vmx_io_bitmap_a;static struct page *vmx_io_bitmap_b;#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)static struct vmcs_config { int size; int order; u32 revision_id; u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl;} vmcs_config;#define VMX_SEGMENT_FIELD(seg) \ [VCPU_SREG_##seg] = { \ .selector = GUEST_##seg##_SELECTOR, \ .base = GUEST_##seg##_BASE, \ .limit = GUEST_##seg##_LIMIT, \ .ar_bytes = GUEST_##seg##_AR_BYTES, \ }static struct kvm_vmx_segment_field { unsigned selector; unsigned base; unsigned limit; unsigned ar_bytes;} kvm_vmx_segment_fields[] = { VMX_SEGMENT_FIELD(CS), VMX_SEGMENT_FIELD(DS), VMX_SEGMENT_FIELD(ES), VMX_SEGMENT_FIELD(FS), VMX_SEGMENT_FIELD(GS), VMX_SEGMENT_FIELD(SS), VMX_SEGMENT_FIELD(TR), VMX_SEGMENT_FIELD(LDTR),};/* * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it * away by decrementing the array size. */static const u32 vmx_msr_index[] = {#ifdef CONFIG_X86_64 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,#endif MSR_EFER, MSR_K6_STAR,};#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)static void load_msrs(struct kvm_msr_entry *e, int n){ int i; for (i = 0; i < n; ++i) wrmsrl(e[i].index, e[i].data);}static void save_msrs(struct kvm_msr_entry *e, int n){ int i; for (i = 0; i < n; ++i) rdmsrl(e[i].index, e[i].data);}static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr){ return (u64)msr.data & EFER_SAVE_RESTORE_BITS;}static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx){ int efer_offset = vmx->msr_offset_efer; return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) != msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);}static inline int is_page_fault(u32 intr_info){ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);}static inline int is_no_device(u32 intr_info){ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);}static inline int is_external_interrupt(u32 intr_info){ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);}static inline int cpu_has_vmx_tpr_shadow(void){ return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);}static inline int vm_need_tpr_shadow(struct kvm *kvm){ return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));}static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr){ int i; for (i = 0; i < vmx->nmsrs; ++i) if (vmx->guest_msrs[i].index == msr) return i; return -1;}static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr){ int i; i = __find_msr_index(vmx, msr); if (i >= 0) return &vmx->guest_msrs[i]; return NULL;}static void vmcs_clear(struct vmcs *vmcs){ u64 phys_addr = __pa(vmcs); u8 error; asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); if (error) printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", vmcs, phys_addr);}static void __vcpu_clear(void *arg){ struct vcpu_vmx *vmx = arg; int cpu = raw_smp_processor_id(); if (vmx->vcpu.cpu == cpu) vmcs_clear(vmx->vmcs); if (per_cpu(current_vmcs, cpu) == vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; rdtscll(vmx->vcpu.host_tsc);}static void vcpu_clear(struct vcpu_vmx *vmx){ if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1) smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1); else __vcpu_clear(vmx); vmx->launched = 0;}static unsigned long vmcs_readl(unsigned long field){ unsigned long value; asm volatile (ASM_VMX_VMREAD_RDX_RAX : "=a"(value) : "d"(field) : "cc"); return value;}static u16 vmcs_read16(unsigned long field){ return vmcs_readl(field);}static u32 vmcs_read32(unsigned long field){ return vmcs_readl(field);}static u64 vmcs_read64(unsigned long field){#ifdef CONFIG_X86_64 return vmcs_readl(field);#else return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);#endif}static noinline void vmwrite_error(unsigned long field, unsigned long value){ printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); dump_stack();}static void vmcs_writel(unsigned long field, unsigned long value){ u8 error; asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" : "=q"(error) : "a"(value), "d"(field) : "cc" ); if (unlikely(error)) vmwrite_error(field, value);}static void vmcs_write16(unsigned long field, u16 value){ vmcs_writel(field, value);}static void vmcs_write32(unsigned long field, u32 value){ vmcs_writel(field, value);}static void vmcs_write64(unsigned long field, u64 value){#ifdef CONFIG_X86_64 vmcs_writel(field, value);#else vmcs_writel(field, value); asm volatile (""); vmcs_writel(field+1, value >> 32);#endif}static void vmcs_clear_bits(unsigned long field, u32 mask){ vmcs_writel(field, vmcs_readl(field) & ~mask);}static void vmcs_set_bits(unsigned long field, u32 mask){ vmcs_writel(field, vmcs_readl(field) | mask);}static void update_exception_bitmap(struct kvm_vcpu *vcpu){ u32 eb; eb = 1u << PF_VECTOR; if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; if (vcpu->guest_debug.enabled) eb |= 1u << 1; if (vcpu->rmode.active) eb = ~0; vmcs_write32(EXCEPTION_BITMAP, eb);}static void reload_tss(void){#ifndef CONFIG_X86_64 /* * VT restores TR but not its size. Useless. */ struct descriptor_table gdt; struct segment_descriptor *descs; get_gdt(&gdt); descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc();#endif}static void load_transition_efer(struct vcpu_vmx *vmx){ u64 trans_efer; int efer_offset = vmx->msr_offset_efer; trans_efer = vmx->host_msrs[efer_offset].data; trans_efer &= ~EFER_SAVE_RESTORE_BITS; trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); wrmsrl(MSR_EFER, trans_efer); vmx->vcpu.stat.efer_reload++;}static void vmx_save_host_state(struct kvm_vcpu *vcpu){ struct vcpu_vmx *vmx = to_vmx(vcpu); if (vmx->host_state.loaded) return; vmx->host_state.loaded = 1; /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ vmx->host_state.ldt_sel = read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; vmx->host_state.fs_sel = read_fs(); if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; } else { vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } vmx->host_state.gs_sel = read_gs(); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { vmcs_write16(HOST_GS_SELECTOR, 0); vmx->host_state.gs_ldt_reload_needed = 1; }#ifdef CONFIG_X86_64 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));#else vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));#endif#ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) { save_msrs(vmx->host_msrs + vmx->msr_offset_kernel_gs_base, 1); }#endif load_msrs(vmx->guest_msrs, vmx->save_nmsrs); if (msr_efer_need_save_restore(vmx)) load_transition_efer(vmx);}static void vmx_load_host_state(struct vcpu_vmx *vmx){ unsigned long flags; if (!vmx->host_state.loaded) return; vmx->host_state.loaded = 0; if (vmx->host_state.fs_reload_needed) load_fs(vmx->host_state.fs_sel); if (vmx->host_state.gs_ldt_reload_needed) { load_ldt(vmx->host_state.ldt_sel); /* * If we have to reload gs, we must take care to * preserve our gs base. */ local_irq_save(flags); load_gs(vmx->host_state.gs_sel);#ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));#endif local_irq_restore(flags); } reload_tss(); save_msrs(vmx->guest_msrs, vmx->save_nmsrs); load_msrs(vmx->host_msrs, vmx->save_nmsrs); if (msr_efer_need_save_restore(vmx)) load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);}/* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu){ struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx->vmcs); u64 tsc_this, delta; if (vcpu->cpu != cpu) { vcpu_clear(vmx); kvm_migrate_apic_timer(vcpu); } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { u8 error; per_cpu(current_vmcs, cpu) = vmx->vmcs; asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc"); if (error) printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", vmx->vmcs, phys_addr); } if (vcpu->cpu != cpu) { struct descriptor_table dt; unsigned long sysenter_esp; vcpu->cpu = cpu; /* * Linux uses per-cpu TSS and GDT, so set these when switching * processors. */ vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ get_gdt(&dt); vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ /* * Make sure the time stamp counter is monotonous. */ rdtscll(tsc_this); delta = vcpu->host_tsc - tsc_this; vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); }}static void vmx_vcpu_put(struct kvm_vcpu *vcpu){ vmx_load_host_state(to_vmx(vcpu)); kvm_put_guest_fpu(vcpu);}static void vmx_fpu_activate(struct kvm_vcpu *vcpu){ if (vcpu->fpu_active) return; vcpu->fpu_active = 1; vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); if (vcpu->cr0 & X86_CR0_TS) vmcs_set_bits(GUEST_CR0, X86_CR0_TS); update_exception_bitmap(vcpu);}static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu){ if (!vcpu->fpu_active) return; vcpu->fpu_active = 0; vmcs_set_bits(GUEST_CR0, X86_CR0_TS); update_exception_bitmap(vcpu);}static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -