Skip to content

Commit 4f350c6

Browse files
jsmattsonjrbonzini
authored andcommitted
kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly
When emulating a nested VM-entry from L1 to L2, several control field validation checks are deferred to the hardware. Should one of these validation checks fail, vcpu_vmx_run will set the vmx->fail flag. When this happens, the L2 guest state is not loaded (even in part), and execution should continue in L1 with the next instruction after the VMLAUNCH/VMRESUME. The VMCS12 is not modified (except for the VM-instruction error field), the VMCS12 MSR save/load lists are not processed, and the CPU state is not loaded from the VMCS12 host area. Moreover, the vmcs02 exit reason is stale, so it should not be consulted for any reason. Signed-off-by: Jim Mattson <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent b060ca3 commit 4f350c6

File tree

1 file changed

+75
-59
lines changed

1 file changed

+75
-59
lines changed

arch/x86/kvm/vmx.c

Lines changed: 75 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -8344,12 +8344,14 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
83448344
struct vcpu_vmx *vmx = to_vmx(vcpu);
83458345
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
83468346

8347-
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
8348-
vmcs_readl(EXIT_QUALIFICATION),
8349-
vmx->idt_vectoring_info,
8350-
intr_info,
8351-
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
8352-
KVM_ISA_VMX);
8347+
if (vmx->nested.nested_run_pending)
8348+
return false;
8349+
8350+
if (unlikely(vmx->fail)) {
8351+
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
8352+
vmcs_read32(VM_INSTRUCTION_ERROR));
8353+
return true;
8354+
}
83538355

83548356
/*
83558357
* The host physical addresses of some pages of guest memory
@@ -8363,14 +8365,12 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
83638365
*/
83648366
nested_mark_vmcs12_pages_dirty(vcpu);
83658367

8366-
if (vmx->nested.nested_run_pending)
8367-
return false;
8368-
8369-
if (unlikely(vmx->fail)) {
8370-
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
8371-
vmcs_read32(VM_INSTRUCTION_ERROR));
8372-
return true;
8373-
}
8368+
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
8369+
vmcs_readl(EXIT_QUALIFICATION),
8370+
vmx->idt_vectoring_info,
8371+
intr_info,
8372+
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
8373+
KVM_ISA_VMX);
83748374

83758375
switch (exit_reason) {
83768376
case EXIT_REASON_EXCEPTION_NMI:
@@ -11395,46 +11395,30 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1139511395
{
1139611396
struct vcpu_vmx *vmx = to_vmx(vcpu);
1139711397
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
11398-
u32 vm_inst_error = 0;
1139911398

1140011399
/* trying to cancel vmlaunch/vmresume is a bug */
1140111400
WARN_ON_ONCE(vmx->nested.nested_run_pending);
1140211401

11402+
/*
11403+
* The only expected VM-instruction error is "VM entry with
11404+
* invalid control field(s)." Anything else indicates a
11405+
* problem with L0.
11406+
*/
11407+
WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
11408+
VMXERR_ENTRY_INVALID_CONTROL_FIELD));
11409+
1140311410
leave_guest_mode(vcpu);
11404-
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
11405-
exit_qualification);
1140611411

11407-
if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
11408-
vmcs12->vm_exit_msr_store_count))
11409-
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
11412+
if (likely(!vmx->fail)) {
11413+
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
11414+
exit_qualification);
1141011415

11411-
if (unlikely(vmx->fail))
11412-
vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
11416+
if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
11417+
vmcs12->vm_exit_msr_store_count))
11418+
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
11419+
}
1141311420

1141411421
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
11415-
11416-
/*
11417-
* TODO: SDM says that with acknowledge interrupt on exit, bit 31 of
11418-
* the VM-exit interrupt information (valid interrupt) is always set to
11419-
* 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need
11420-
* kvm_cpu_has_interrupt(). See the commit message for details.
11421-
*/
11422-
if (nested_exit_intr_ack_set(vcpu) &&
11423-
exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
11424-
kvm_cpu_has_interrupt(vcpu)) {
11425-
int irq = kvm_cpu_get_interrupt(vcpu);
11426-
WARN_ON(irq < 0);
11427-
vmcs12->vm_exit_intr_info = irq |
11428-
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
11429-
}
11430-
11431-
trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
11432-
vmcs12->exit_qualification,
11433-
vmcs12->idt_vectoring_info_field,
11434-
vmcs12->vm_exit_intr_info,
11435-
vmcs12->vm_exit_intr_error_code,
11436-
KVM_ISA_VMX);
11437-
1143811422
vm_entry_controls_reset_shadow(vmx);
1143911423
vm_exit_controls_reset_shadow(vmx);
1144011424
vmx_segment_cache_clear(vmx);
@@ -11443,8 +11427,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1144311427
if (VMCS02_POOL_SIZE == 0)
1144411428
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
1144511429

11446-
load_vmcs12_host_state(vcpu, vmcs12);
11447-
1144811430
/* Update any VMCS fields that might have changed while L2 ran */
1144911431
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
1145011432
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
@@ -11493,23 +11475,57 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
1149311475
*/
1149411476
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
1149511477

11496-
if (unlikely(vmx->fail)) {
11497-
/*
11498-
* After an early L2 VM-entry failure, we're now back
11499-
* in L1 which thinks it just finished a VMLAUNCH or
11500-
* VMRESUME instruction, so we need to set the failure
11501-
* flag and the VM-instruction error field of the VMCS
11502-
* accordingly.
11503-
*/
11504-
vmx->fail = 0;
11505-
nested_vmx_failValid(vcpu, vm_inst_error);
11506-
}
11507-
1150811478
if (enable_shadow_vmcs)
1150911479
vmx->nested.sync_shadow_vmcs = true;
1151011480

1151111481
/* in case we halted in L2 */
1151211482
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
11483+
11484+
if (likely(!vmx->fail)) {
11485+
/*
11486+
* TODO: SDM says that with acknowledge interrupt on
11487+
* exit, bit 31 of the VM-exit interrupt information
11488+
* (valid interrupt) is always set to 1 on
11489+
* EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
11490+
* need kvm_cpu_has_interrupt(). See the commit
11491+
* message for details.
11492+
*/
11493+
if (nested_exit_intr_ack_set(vcpu) &&
11494+
exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
11495+
kvm_cpu_has_interrupt(vcpu)) {
11496+
int irq = kvm_cpu_get_interrupt(vcpu);
11497+
WARN_ON(irq < 0);
11498+
vmcs12->vm_exit_intr_info = irq |
11499+
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
11500+
}
11501+
11502+
trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
11503+
vmcs12->exit_qualification,
11504+
vmcs12->idt_vectoring_info_field,
11505+
vmcs12->vm_exit_intr_info,
11506+
vmcs12->vm_exit_intr_error_code,
11507+
KVM_ISA_VMX);
11508+
11509+
load_vmcs12_host_state(vcpu, vmcs12);
11510+
11511+
return;
11512+
}
11513+
11514+
/*
11515+
* After an early L2 VM-entry failure, we're now back
11516+
* in L1 which thinks it just finished a VMLAUNCH or
11517+
* VMRESUME instruction, so we need to set the failure
11518+
* flag and the VM-instruction error field of the VMCS
11519+
* accordingly.
11520+
*/
11521+
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
11522+
/*
11523+
* The emulated instruction was already skipped in
11524+
* nested_vmx_run, but the updated RIP was never
11525+
* written back to the vmcs01.
11526+
*/
11527+
skip_emulated_instruction(vcpu);
11528+
vmx->fail = 0;
1151311529
}
1151411530

1151511531
/*

0 commit comments

Comments
 (0)