diff --git a/core/cpu.c b/core/cpu.c index 5f82fd7c..39a19094 100644 --- a/core/cpu.c +++ b/core/cpu.c @@ -40,10 +40,6 @@ #include "include/intr.h" #include "include/ept.h" -static cpuid_cache_t cache = { - .initialized = 0 -}; - static void cpu_vmentry_failed(struct vcpu_t *vcpu, vmx_result_t result); static int cpu_vmexit_handler(struct vcpu_t *vcpu, exit_reason_t exit_reason, struct hax_tunnel *htun); @@ -66,15 +62,7 @@ static int cpu_nx_enable(void) bool cpu_has_feature(uint32_t feature) { - if (!cache.initialized) { - cpuid_host_init(&cache); - } - return cpuid_host_has_feature(&cache, feature); -} - -void cpu_init_feature_cache(void) -{ - cpuid_host_init(&cache); + return cpuid_host_has_feature(feature); } void cpu_init_vmx(void *arg) diff --git a/core/cpuid.c b/core/cpuid.c index 4a279786..49ae9311 100644 --- a/core/cpuid.c +++ b/core/cpuid.c @@ -32,6 +32,15 @@ #include "include/ia32.h" +#define CPUID_CACHE_SIZE 6 + +typedef struct cpuid_cache_t { + uint32_t data[CPUID_CACHE_SIZE]; // Host cached features + cpuid_t host_supported; // Physical CPU supported features + cpuid_t hax_supported; // Hypervisor supported features + bool initialized; +} cpuid_cache_t; + typedef union cpuid_feature_t { struct { uint32_t index : 5; @@ -46,6 +55,14 @@ typedef union cpuid_feature_t { uint32_t value; } cpuid_feature_t; +static cpuid_cache_t cache = {0}; + +static hax_cpuid_entry * find_cpuid_entry(hax_cpuid *cpuid_info, + uint32_t function, uint32_t index); +static void cpuid_set_0000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info); +static void cpuid_set_8000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info); +static void cpuid_set_fixed_features(cpuid_t *cpuid); + void cpuid_query_leaf(cpuid_args_t *args, uint32_t leaf) { args->eax = leaf; @@ -59,10 +76,10 @@ void cpuid_query_subleaf(cpuid_args_t *args, uint32_t leaf, uint32_t subleaf) asm_cpuid(args); } -void cpuid_host_init(cpuid_cache_t *cache) +void cpuid_host_init(void) { cpuid_args_t res; - uint32_t *data = cache->data; + uint32_t *data = cache.data; cpuid_query_leaf(&res, 0x00000001); data[0] = res.ecx; @@ -76,19 +93,19 @@ void cpuid_host_init(cpuid_cache_t *cache) data[4] = res.ecx; data[5] = res.edx; - cache->initialized = 1; + cache.initialized = true; } -bool cpuid_host_has_feature(cpuid_cache_t *cache, uint32_t feature_key) +bool cpuid_host_has_feature(uint32_t feature_key) { cpuid_feature_t feature; uint32_t value; feature.value = feature_key; - if (!cache->initialized || feature.index >= CPUID_CACHE_SIZE) { + if (!cache.initialized || feature.index >= CPUID_CACHE_SIZE) { return cpuid_host_has_feature_uncached(feature_key); } - value = cache->data[feature.index]; + value = cache.data[feature.index]; if (value & (1 << feature.bit)) { return true; } @@ -114,3 +131,235 @@ bool cpuid_host_has_feature_uncached(uint32_t feature_key) } return false; } + +void cpuid_init_supported_features(void) +{ + uint32_t bit, flag, function, x86_feature; + + // Initialize host supported features + for (bit = 0; bit < sizeof(uint32_t) * 8; ++bit) { + flag = 1 << bit; + + function = 0x01; + x86_feature = FEATURE_KEY_LEAF(0, function, CPUID_REG_ECX, bit); + if (cpuid_host_has_feature(x86_feature)) { + cache.host_supported.feature_1_ecx |= flag; + } + + x86_feature = FEATURE_KEY_LEAF(1, function, CPUID_REG_EDX, bit); + if (cpuid_host_has_feature(x86_feature)) { + cache.host_supported.feature_1_edx |= flag; + } + + function = 0x80000001; + x86_feature = FEATURE_KEY_LEAF(5, function, CPUID_REG_EDX, bit); + if (cpuid_host_has_feature(x86_feature)) { + cache.host_supported.feature_8000_0001_edx |= flag; + } + } + + hax_log(HAX_LOGI, "%s: host supported features:\n", __func__); + hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n", + cache.host_supported.feature_1_ecx, + cache.host_supported.feature_1_edx); + hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, " + "feature_8000_0001_edx: %08lx\n", + cache.host_supported.feature_8000_0001_ecx, + cache.host_supported.feature_8000_0001_edx); + + // Initialize HAXM supported features + cache.hax_supported = (cpuid_t){ + .feature_1_ecx = + FEATURE(SSE3) | + FEATURE(SSSE3) | + FEATURE(SSE41) | + FEATURE(SSE42) | + FEATURE(CMPXCHG16B) | + FEATURE(MOVBE) | + FEATURE(AESNI) | + FEATURE(PCLMULQDQ) | + FEATURE(POPCNT), + .feature_1_edx = + FEATURE(PAT) | + FEATURE(FPU) | + FEATURE(VME) | + FEATURE(DE) | + FEATURE(TSC) | + FEATURE(MSR) | + FEATURE(PAE) | + FEATURE(MCE) | + FEATURE(CX8) | + FEATURE(APIC) | + FEATURE(SEP) | + FEATURE(MTRR) | + FEATURE(PGE) | + FEATURE(MCA) | + FEATURE(CMOV) | + FEATURE(CLFSH) | + FEATURE(MMX) | + FEATURE(FXSR) | + FEATURE(SSE) | + FEATURE(SSE2) | + FEATURE(SS) | + FEATURE(PSE) | + FEATURE(HTT), + .feature_8000_0001_ecx = 0, + .feature_8000_0001_edx = + FEATURE(NX) | + FEATURE(SYSCALL) | + FEATURE(RDTSCP) | + FEATURE(EM64T) + }; + + hax_log(HAX_LOGI, "%s: HAXM supported features:\n", __func__); + hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n", + cache.hax_supported.feature_1_ecx, + cache.hax_supported.feature_1_edx); + hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, " + "feature_8000_0001_edx: %08lx\n", + cache.hax_supported.feature_8000_0001_ecx, + cache.hax_supported.feature_8000_0001_edx); +} + +void cpuid_guest_init(cpuid_t *cpuid) +{ + *cpuid = cache.hax_supported; + cpuid->features_mask = ~0ULL; +} + +void cpuid_get_features_mask(cpuid_t *cpuid, uint64_t *features_mask) +{ + *features_mask = cpuid->features_mask; +} + +void cpuid_set_features_mask(cpuid_t *cpuid, uint64_t features_mask) +{ + cpuid->features_mask = features_mask; +} + +void cpuid_get_guest_features(cpuid_t *cpuid, uint32_t *cpuid_1_features_ecx, + uint32_t *cpuid_1_features_edx, + uint32_t *cpuid_8000_0001_features_ecx, + uint32_t *cpuid_8000_0001_features_edx) +{ + *cpuid_1_features_ecx = cpuid->feature_1_ecx; + *cpuid_1_features_edx = cpuid->feature_1_edx; + *cpuid_8000_0001_features_ecx = cpuid->feature_8000_0001_ecx; + *cpuid_8000_0001_features_edx = cpuid->feature_8000_0001_edx; +} + +void cpuid_set_guest_features(cpuid_t *cpuid, hax_cpuid *cpuid_info) +{ + static void (*cpuid_set_guest_feature[])(cpuid_t *, hax_cpuid *) = { + cpuid_set_0000_0001, + cpuid_set_8000_0001 + }; + static size_t count = sizeof(cpuid_set_guest_feature) / + sizeof(cpuid_set_guest_feature[0]); + int i; + + hax_log(HAX_LOGI, "%s: before:\n", __func__); + hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n", + cpuid->feature_1_ecx, cpuid->feature_1_edx); + hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, feature_8000_0001_edx: %08lx" + "\n", cpuid->feature_8000_0001_ecx, cpuid->feature_8000_0001_edx); + + for (i = 0; i < count; ++i) { + cpuid_set_guest_feature[i](cpuid, cpuid_info); + } + + hax_log(HAX_LOGI, "%s: after:\n", __func__); + hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n", + cpuid->feature_1_ecx, cpuid->feature_1_edx); + hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, feature_8000_0001_edx: %08lx" + "\n", cpuid->feature_8000_0001_ecx, cpuid->feature_8000_0001_edx); +} + +static hax_cpuid_entry * find_cpuid_entry(hax_cpuid *cpuid_info, + uint32_t function, uint32_t index) +{ + int i; + hax_cpuid_entry *entry, *found = NULL; + + for (i = 0; i < cpuid_info->total; ++i) { + entry = &cpuid_info->entries[i]; + if (entry->function == function && entry->index == index) { + found = entry; + break; + } + } + + return found; +} + +static void cpuid_set_0000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info) +{ + const uint32_t kFunction = 0x01; + hax_cpuid_entry *entry; + + entry = find_cpuid_entry(cpuid_info, kFunction, 0); + if (entry == NULL) + return; + + hax_log(HAX_LOGI, "%s: function: %08lx, index: %lu, flags: %08lx\n", + __func__, entry->function, entry->index, entry->flags); + hax_log(HAX_LOGI, "%s: eax: %08lx, ebx: %08lx, ecx: %08lx, edx: %08lx\n", + __func__, entry->eax, entry->ebx, entry->ecx, entry->edx); + + cpuid->feature_1_ecx = entry->ecx; + cpuid->feature_1_edx = entry->edx; + + // Filter the unsupported features + cpuid->feature_1_ecx &= cache.host_supported.feature_1_ecx & + cache.hax_supported.feature_1_ecx; + cpuid->feature_1_edx &= cache.host_supported.feature_1_edx & + cache.hax_supported.feature_1_edx; + + // Set fixed supported features + cpuid_set_fixed_features(cpuid); + + if (entry->ecx != cpuid->feature_1_ecx || + entry->edx != cpuid->feature_1_edx) { + hax_log(HAX_LOGW, "%s: filtered or unchanged flags: ecx: %08lx, " + "edx: %08lx\n", __func__, entry->ecx ^ cpuid->feature_1_ecx, + entry->edx ^ cpuid->feature_1_edx); + } +} + +static void cpuid_set_8000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info) +{ + const uint32_t kFunction = 0x80000001; + hax_cpuid_entry *entry; + + entry = find_cpuid_entry(cpuid_info, kFunction, 0); + if (entry == NULL) + return; + + hax_log(HAX_LOGI, "%s: function: %08lx, index: %lu, flags: %08lx\n", + __func__, entry->function, entry->index, entry->flags); + hax_log(HAX_LOGI, "%s: eax: %08lx, ebx: %08lx, ecx: %08lx, edx: %08lx\n", + __func__, entry->eax, entry->ebx, entry->ecx, entry->edx); + + cpuid->feature_8000_0001_edx = entry->edx; + + // Filter the unsupported features + cpuid->feature_8000_0001_edx &= + cache.host_supported.feature_8000_0001_edx & + cache.hax_supported.feature_8000_0001_edx; + + if (entry->edx != cpuid->feature_8000_0001_edx) { + hax_log(HAX_LOGW, "%s: filtered or unchanged flags: edx: %08lx\n", + __func__, entry->edx ^ cpuid->feature_8000_0001_edx); + } +} + +static void cpuid_set_fixed_features(cpuid_t *cpuid) +{ + const uint32_t kFixedFeatures = + FEATURE(MCE) | + FEATURE(APIC) | + FEATURE(MTRR) | + FEATURE(PAT); + + cpuid->feature_1_edx |= kFixedFeatures; +} diff --git a/core/hax.c b/core/hax.c index cb554996..9bc5ea79 100644 --- a/core/hax.c +++ b/core/hax.c @@ -373,6 +373,7 @@ int hax_get_capability(void *buf, int bufLeng, int *outLength) cap->winfo |= HAX_CAP_TUNNEL_PAGE; cap->winfo |= HAX_CAP_RAM_PROTECTION; cap->winfo |= HAX_CAP_DEBUG; + cap->winfo |= HAX_CAP_CPUID; if (cpu_data->vmx_info._ept_cap) { cap->winfo |= HAX_CAP_EPT; } @@ -565,7 +566,10 @@ int hax_module_init(void) hax_clear_page(hax_cpu_data[cpu_id]->hstate.hfxpage); hax_cpu_data[cpu_id]->cpu_id = cpu_id; } - cpu_init_feature_cache(); + + cpuid_host_init(); + cpuid_init_supported_features(); + if (hax_vmx_init() < 0) goto out_2; diff --git a/core/include/cpuid.h b/core/include/cpuid.h index 6536b3d6..2c8ad89b 100644 --- a/core/include/cpuid.h +++ b/core/include/cpuid.h @@ -31,20 +31,14 @@ #ifndef HAX_CORE_CPUID_H_ #define HAX_CORE_CPUID_H_ +#include "../../include/hax.h" #include "../../include/hax_types.h" -#define CPUID_CACHE_SIZE 6 - #define CPUID_REG_EAX 0 #define CPUID_REG_ECX 1 #define CPUID_REG_EDX 2 #define CPUID_REG_EBX 3 -typedef struct cpuid_cache_t { - uint32_t data[CPUID_CACHE_SIZE]; - bool initialized; -} cpuid_cache_t; - typedef union cpuid_args_t { struct { uint32_t eax; @@ -55,6 +49,14 @@ typedef union cpuid_args_t { uint32_t regs[4]; } cpuid_args_t; +typedef struct cpuid_t { + uint64_t features_mask; + uint32_t feature_1_ecx; + uint32_t feature_1_edx; + uint32_t feature_8000_0001_ecx; + uint32_t feature_8000_0001_edx; +} cpuid_t; + /* * X86 Features * ============ @@ -254,8 +256,18 @@ enum { void cpuid_query_leaf(cpuid_args_t *args, uint32_t leaf); void cpuid_query_subleaf(cpuid_args_t *args, uint32_t leaf, uint32_t subleaf); -void cpuid_host_init(cpuid_cache_t *cache); -bool cpuid_host_has_feature(cpuid_cache_t *cache, uint32_t feature_key); +void cpuid_host_init(void); +bool cpuid_host_has_feature(uint32_t feature_key); bool cpuid_host_has_feature_uncached(uint32_t feature_key); +void cpuid_init_supported_features(void); +void cpuid_guest_init(cpuid_t *cpuid); +void cpuid_get_features_mask(cpuid_t *cpuid, uint64_t *features_mask); +void cpuid_set_features_mask(cpuid_t *cpuid, uint64_t features_mask); +void cpuid_get_guest_features(cpuid_t *cpuid, uint32_t *cpuid_1_features_ecx, + uint32_t *cpuid_1_features_edx, + uint32_t *cpuid_8000_0001_features_ecx, + uint32_t *cpuid_8000_0001_features_edx); +void cpuid_set_guest_features(cpuid_t *cpuid, hax_cpuid *cpuid_info); + #endif /* HAX_CORE_CPUID_H_ */ diff --git a/core/include/hax_core_interface.h b/core/include/hax_core_interface.h index 0ad425d7..65ba0611 100644 --- a/core/include/hax_core_interface.h +++ b/core/include/hax_core_interface.h @@ -45,6 +45,7 @@ int vcpu_put_fpu(struct vcpu_t *vcpu, struct fx_layout *fl); int vcpu_get_fpu(struct vcpu_t *vcpu, struct fx_layout *fl); int vcpu_set_regs(struct vcpu_t *vcpu, struct vcpu_state_t *vs); int vcpu_get_regs(struct vcpu_t *vcpu, struct vcpu_state_t *vs); +int vcpu_set_cpuid(struct vcpu_t *vcpu, hax_cpuid *cpuid_info); void vcpu_debug(struct vcpu_t *vcpu, struct hax_debug_t *debug); void * get_vcpu_host(struct vcpu_t *vcpu); diff --git a/core/include/vcpu.h b/core/include/vcpu.h index 33bb326d..99ca496e 100644 --- a/core/include/vcpu.h +++ b/core/include/vcpu.h @@ -31,6 +31,7 @@ #ifndef HAX_CORE_VCPU_H_ #define HAX_CORE_VCPU_H_ +#include "cpuid.h" #include "emulate.h" #include "vmx.h" #include "mtrr.h" @@ -216,7 +217,6 @@ struct vcpu_t { uint64_t pae_pdptes[4]; uint64_t cr_pat; - uint64_t cpuid_features_flag_mask; /* Debugging */ uint32_t debug_control; @@ -233,6 +233,14 @@ struct vcpu_t { struct em_context_t emulate_ctxt; struct vcpu_post_mmio post_mmio; struct mmio_fetch_cache mmio_fetch; + + // Guest CPUID feature set + // * The CPUID feature set is always same for each vCPU. A CPUID instruction + // executed on any core will get the same result. + // * All vCPUs share the unique memory, which is actually allocated by the + // first vCPU created by VM. If any vCPU sets features in this field, all + // vCPUs will change accordingly. + cpuid_t *guest_cpuid; }; #define vmx(v, field) v->vmx.field @@ -258,6 +266,7 @@ int vcpu_get_fpu(struct vcpu_t *vcpu, struct fx_layout *fl); int vcpu_put_fpu(struct vcpu_t *vcpu, struct fx_layout *fl); int vcpu_get_msr(struct vcpu_t *vcpu, uint64_t entry, uint64_t *val); int vcpu_put_msr(struct vcpu_t *vcpu, uint64_t entry, uint64_t val); +int vcpu_set_cpuid(struct vcpu_t *vcpu, hax_cpuid *cpuid_info); void vcpu_debug(struct vcpu_t *vcpu, struct hax_debug_t *debug); /* The declaration for OS wrapper code */ diff --git a/core/vcpu.c b/core/vcpu.c index 1556d943..9f3cd1fd 100644 --- a/core/vcpu.c +++ b/core/vcpu.c @@ -123,6 +123,10 @@ static void vcpu_enter_fpu_state(struct vcpu_t *vcpu); static int vcpu_set_apic_base(struct vcpu_t *vcpu, uint64_t val); static bool vcpu_is_bsp(struct vcpu_t *vcpu); +static void vcpu_init_cpuid(struct vcpu_t *vcpu); +static int vcpu_alloc_cpuid(struct vcpu_t *vcpu); +static void vcpu_free_cpuid(struct vcpu_t *vcpu); + static uint32_t get_seg_present(uint32_t seg) { mword ldtr_base; @@ -457,9 +461,12 @@ struct vcpu_t *vcpu_create(struct vm_t *vm, void *vm_host, int vcpu_id) if (!vcpu_vtlb_alloc(vcpu)) goto fail_6; - if (hax_vcpu_create_host(vcpu, vm_host, vm->vm_id, vcpu_id)) + if (!vcpu_alloc_cpuid(vcpu)) goto fail_7; + if (hax_vcpu_create_host(vcpu, vm_host, vm->vm_id, vcpu_id)) + goto fail_8; + vcpu->prev_cpu_id = (uint32_t)(~0ULL); vcpu->cpu_id = hax_cpu_id(); vcpu->vcpu_id = vcpu_id; @@ -488,6 +495,8 @@ struct vcpu_t *vcpu_create(struct vm_t *vm, void *vm_host, int vcpu_id) hax_log(HAX_LOGD, "vcpu %d is created.\n", vcpu->vcpu_id); return vcpu; +fail_8: + vcpu_free_cpuid(vcpu); fail_7: vcpu_vtlb_free(vcpu); fail_6: @@ -542,6 +551,7 @@ static int _vcpu_teardown(struct vcpu_t *vcpu) hax_vfree(vcpu->state, sizeof(struct vcpu_state_t)); vcpu_vtlb_free(vcpu); hax_mutex_free(vcpu->tmutex); + vcpu_free_cpuid(vcpu); hax_vfree(vcpu, sizeof(struct vcpu_t)); hax_log(HAX_LOGI, "vcpu %d is teardown.\n", vcpu_id); @@ -574,7 +584,6 @@ static void vcpu_init(struct vcpu_t *vcpu) // TODO: mtrr ? vcpu->cr_pat = 0x0007040600070406ULL; - vcpu->cpuid_features_flag_mask = 0xffffffffffffffffULL; vcpu->cur_state = GS_VALID; vmx(vcpu, entry_exception_vector) = ~0u; vmx(vcpu, cr0_mask) = 0; @@ -630,6 +639,9 @@ static void vcpu_init(struct vcpu_t *vcpu) vcpu->gstate.apic_base |= APIC_BASE_BSP; } + // Initialize guest CPUID + vcpu_init_cpuid(vcpu); + hax_mutex_unlock(vcpu->tmutex); } @@ -2536,50 +2548,16 @@ static void handle_cpuid_virtual(struct vcpu_t *vcpu, uint32_t a, uint32_t c) uint32_t hw_family; uint32_t hw_model; uint8_t physical_address_size; + uint32_t cpuid_1_features_ecx, cpuid_1_features_edx, + cpuid_8000_0001_features_ecx, cpuid_8000_0001_features_edx; - static uint32_t cpuid_1_features_edx = - FEATURE(PAT) | - FEATURE(FPU) | - FEATURE(VME) | - FEATURE(DE) | - FEATURE(TSC) | - FEATURE(MSR) | - FEATURE(PAE) | - FEATURE(MCE) | - FEATURE(CX8) | - FEATURE(APIC) | - FEATURE(SEP) | - FEATURE(MTRR) | - FEATURE(PGE) | - FEATURE(MCA) | - FEATURE(CMOV) | - FEATURE(CLFSH) | - FEATURE(MMX) | - FEATURE(FXSR) | - FEATURE(SSE) | - FEATURE(SSE2) | - FEATURE(SS) | - FEATURE(PSE) | - FEATURE(HTT); - - static uint32_t cpuid_1_features_ecx = - FEATURE(SSE3) | - FEATURE(SSSE3) | - FEATURE(SSE41) | - FEATURE(SSE42) | - FEATURE(CMPXCHG16B) | - FEATURE(MOVBE) | - FEATURE(AESNI) | - FEATURE(PCLMULQDQ) | - FEATURE(POPCNT); - - static uint32_t cpuid_8000_0001_features_edx = - FEATURE(NX) | - FEATURE(SYSCALL) | - FEATURE(RDTSCP) | - FEATURE(EM64T); - - static uint32_t cpuid_8000_0001_features_ecx = 0; + // To fully support CPUID instructions (opcode = 0F A2) by software, it is + // recommended to add opcode_table_0FA2[] in core/emulate.c to emulate + // (Refer to Intel SDM Vol. 2A 3.2 CPUID). + cpuid_get_guest_features(vcpu->guest_cpuid, &cpuid_1_features_ecx, + &cpuid_1_features_edx, + &cpuid_8000_0001_features_ecx, + &cpuid_8000_0001_features_edx); switch (a) { case 0: { // Maximum Basic Information @@ -3454,7 +3432,7 @@ static int handle_msr_read(struct vcpu_t *vcpu, uint32_t msr, uint64_t *val) break; } case IA32_CPUID_FEATURE_MASK: { - *val = vcpu->cpuid_features_flag_mask; + cpuid_get_features_mask(vcpu->guest_cpuid, val); break; } case IA32_EBC_FREQUENCY_ID: { @@ -3606,7 +3584,7 @@ static int handle_msr_write(struct vcpu_t *vcpu, uint32_t msr, uint64_t val, break; } case IA32_CPUID_FEATURE_MASK: { - vcpu->cpuid_features_flag_mask = val; + cpuid_set_features_mask(vcpu->guest_cpuid, val); break; } case IA32_EFER: { @@ -4177,6 +4155,28 @@ int vcpu_set_msr(struct vcpu_t *vcpu, uint64_t entry, uint64_t val) return handle_msr_write(vcpu, entry, val, true); } +int vcpu_set_cpuid(struct vcpu_t *vcpu, hax_cpuid *cpuid_info) +{ + hax_log(HAX_LOGI, "%s: vCPU #%u is setting guest CPUID.\n", __func__, + vcpu->vcpu_id); + + if (cpuid_info->total == 0 || cpuid_info->total > HAX_MAX_CPUID_ENTRIES) { + hax_log(HAX_LOGW, "%s: No entry or exceeds maximum: total = %lu.\n", + __func__, cpuid_info->total); + return -EINVAL; + } + + if (vcpu->is_running) { + hax_log(HAX_LOGW, "%s: Cannot set CPUID: vcpu->is_running = %llu.\n", + __func__, vcpu->is_running); + return -EFAULT; + } + + cpuid_set_guest_features(vcpu->guest_cpuid, cpuid_info); + + return 0; +} + void vcpu_debug(struct vcpu_t *vcpu, struct hax_debug_t *debug) { bool hbreak_enabled = false; @@ -4496,3 +4496,56 @@ static bool vcpu_is_bsp(struct vcpu_t *vcpu) // TODO: add an API to set bootstrap processor return (vcpu->vm->bsp_vcpu_id == vcpu->vcpu_id); } + +static void vcpu_init_cpuid(struct vcpu_t *vcpu) +{ + struct vcpu_t *vcpu_0; + + if (vcpu->vcpu_id != 0) { + vcpu_0 = hax_get_vcpu(vcpu->vm->vm_id, 0, 0); + hax_assert(vcpu_0 != NULL); + vcpu->guest_cpuid = vcpu_0->guest_cpuid; + hax_log(HAX_LOGI, "%s: referenced vcpu[%u].guest_cpuid to vcpu[%u].\n", + __func__, vcpu->vcpu_id, vcpu_0->vcpu_id); + return; + } + + cpuid_guest_init(vcpu->guest_cpuid); + hax_log(HAX_LOGI, "%s: initialized vcpu[%u].guest_cpuid with default " + "feature set.\n", __func__, vcpu->vcpu_id); +} + +static int vcpu_alloc_cpuid(struct vcpu_t *vcpu) +{ + // Only the first vCPU will allocate the CPUID memory, and other vCPUs will + // share this memory. + if (vcpu->vcpu_id != 0) + return 1; + + vcpu->guest_cpuid = hax_vmalloc(sizeof(cpuid_t), HAX_MEM_NONPAGE); + if (vcpu->guest_cpuid == NULL) + return 0; + + return 1; +} + +static void vcpu_free_cpuid(struct vcpu_t *vcpu) +{ + if (vcpu->vcpu_id != 0) { + vcpu->guest_cpuid = NULL; + hax_log(HAX_LOGI, "%s: dereferenced vcpu[%u].guest_cpuid from vcpu[0]." + "\n", __func__, vcpu->vcpu_id); + return; + } + + if (vcpu->guest_cpuid == NULL) { + hax_log(HAX_LOGW, "%s: already freed vcpu[%u].guest_cpuid.\n", + __func__, vcpu->vcpu_id); + return; + } + + hax_vfree(vcpu->guest_cpuid, sizeof(cpuid_t)); + vcpu->guest_cpuid = NULL; + hax_log(HAX_LOGI, "%s: freed vcpu[%u].guest_cpuid.\n", __func__, + vcpu->vcpu_id); +} diff --git a/docs/api.md b/docs/api.md index 585bbb29..73aa2687 100644 --- a/docs/api.md +++ b/docs/api.md @@ -97,6 +97,7 @@ itself as well as the host environment. #define HAX_CAP_TUNNEL_PAGE (1 << 5) #define HAX_CAP_DEBUG (1 << 7) #define HAX_CAP_IMPLICIT_RAMBLOCK (1 << 8) + #define HAX_CAP_CPUID (1 << 9) ``` * (Output) `wstatus`: The first set of capability flags reported to the caller. The following bits may be set, while others are reserved: @@ -124,6 +125,7 @@ feature. * `HAX_CAP_64BIT_SETRAM`: If set, `HAX_VM_IOCTL_SET_RAM2` is available. * `HAX_CAP_IMPLICIT_RAMBLOCK`: If set, `HAX_VM_IOCTL_SET_RAM2` supports the `HAX_RAM_INFO_STANDALONE` flag. + * `HAX_CAP_CPUID`: If set, `HAX_VCPU_IOCTL_SET_CPUID` is available. * (Output) `win_refcount`: (Windows only) * (Output) `mem_quota`: If the global memory cap setting is enabled (q.v. `HAX_IOCTL_SET_MEMLIMIT`), reports the current quota on memory allocation (the @@ -689,3 +691,75 @@ Injects an interrupt into this VCPU. * Error codes: * `STATUS_INVALID_PARAMETER` (Windows): The input buffer provided by the caller is smaller than the size of `uint32_t`. + +#### HAX\_VCPU\_IOCTL\_SET\_CPUID +Defines the VCPU responses to the CPU identification (CPUID) instructions. + +HAXM initializes a minimal feature set for guest VCPUs in kernel space. This +ensures that most modern CPUs can support these basic CPUID features. Only the +supported CPUID instructions in the feature set will be passed to the physical +CPU for processing. + +This IOCTL is used to dynamically adjust the supported feature set of CPUID for +guest VCPUs so as to leverage the latest features from modern CPUs. The features +to be enabled will be incorporated into the feature set, while the features to +be disabled will be removed. If the physical CPU does not support some specified +CPUID features, the enabling operation will be ignored. Usually, this IOCTL is +invoked when the VM is initially configured. + +All VCPUs share the same feature set in a VM. This can avoid confusion caused by +the case that when VCPU has multiple cores, different VCPUs executing the same +instruction will produce different results. Send this IOCTL to any VCPU to set +CPUID features, then all VCPUs will change accordingly. + +* Since: Capability `HAX_CAP_CPUID` +* Parameter: `struct hax_cpuid cpuid`, where + ``` + struct hax_cpuid { + uint32_t total; + uint32_t pad; + hax_cpuid_entry entries[0]; + } __attribute__ ((__packed__)); + ``` + where + ``` + #define HAX_MAX_CPUID_ENTRIES 0x40 + struct hax_cpuid_entry { + uint32_t function; + uint32_t index; + uint32_t flags; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t pad[3]; + } __attribute__ ((__packed__)); + ``` + `hax_cpuid` is a variable-length type. The accessible memory of `entries` is + decided by the actual allocation from user space. For macOS, the argument of + user data should pass the address of the pointer to `hax_cpuid` when `ioctl()` + is invoked. + * (Input) `total`: Number of CPUIDs in entries. The valid value should be in +the range (0, `HAX_MAX_CPUID_ENTRIES`]. + * (Input) `pad`: Ignored. + * (Input) `entries`: Array of `struct hax_cpuid_entry`. This array contains +the CPUID feature set of the guest VCPU that is pre-configured by the VM in user +space. + + For each entry in `struct hax_cpuid_entry` + * (Input) `function`: CPUID function code, i.e., initial EAX value. + * (Input) `index`: Sub-leaf index. + * (Input) `flags`: Feature flags. + * (Input) `eax`: EAX register value. + * (Input) `ebx`: EBX register value. + * (Input) `ecx`: ECX register value. + * (Input) `edx`: EDX register value. + * (Input) `pad`: Ignored. +* Error codes: + * `STATUS_INVALID_PARAMETER` (Windows): The input buffer provided by the +caller is smaller than the size of `struct hax_cpuid`. + * `STATUS_UNSUCCESSFUL` (Windows): Failed to set CPUID features. + * `-E2BIG` (macOS): The input value of `total` is greater than +`HAX_MAX_CPUID_ENTRIES`. + * `-EFAULT` (macOS): Failed to copy contents in `entries` to the memory in +kernel space. diff --git a/include/darwin/hax_interface_mac.h b/include/darwin/hax_interface_mac.h index 43a9719d..2487cdf0 100644 --- a/include/darwin/hax_interface_mac.h +++ b/include/darwin/hax_interface_mac.h @@ -70,6 +70,11 @@ #define HAX_IOCTL_VCPU_DEBUG _IOW(0, 0xc9, struct hax_debug_t) +// `hax_cpuid *` is specified as the size of data buffer because `hax_cpuid` is +// a variable-length type. When ioctl() is invoked, the argument of user data +// should pass the address of the pointer to `hax_cpuid`. +#define HAX_VCPU_IOCTL_SET_CPUID _IOW(0, 0xca, struct hax_cpuid *) + #define HAX_KERNEL64_CS 0x80 #define HAX_KERNEL32_CS 0x08 #ifdef __i386__ diff --git a/include/hax_interface.h b/include/hax_interface.h index 25c08b31..eb642de4 100644 --- a/include/hax_interface.h +++ b/include/hax_interface.h @@ -194,6 +194,7 @@ struct hax_module_version { #define HAX_CAP_RAM_PROTECTION (1 << 6) #define HAX_CAP_DEBUG (1 << 7) #define HAX_CAP_IMPLICIT_RAMBLOCK (1 << 8) +#define HAX_CAP_CPUID (1 << 9) struct hax_capabilityinfo { /* @@ -297,4 +298,28 @@ struct hax_debug_t { uint64_t dr[8]; } PACKED; +#define HAX_MAX_CPUID_ENTRIES 0x40 + +typedef struct hax_cpuid_entry { + uint32_t function; + uint32_t index; + uint32_t flags; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t pad[3]; +} hax_cpuid_entry; + +// `hax_cpuid` is a variable-length type. The size of `hax_cpuid` itself is only +// 8 bytes. `entries` is just a body placeholder, which will not actually occupy +// memory. The accessible memory of `entries` is decided by the allocation from +// user space, and the array length is specified by `total`. + +typedef struct hax_cpuid { + uint32_t total; + uint32_t pad; + hax_cpuid_entry entries[0]; +} hax_cpuid; + #endif // HAX_INTERFACE_H_ diff --git a/include/linux/hax_interface_linux.h b/include/linux/hax_interface_linux.h index ecb13fd0..97810934 100644 --- a/include/linux/hax_interface_linux.h +++ b/include/linux/hax_interface_linux.h @@ -70,6 +70,7 @@ #define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version) #define HAX_IOCTL_VCPU_DEBUG _IOW(0, 0xc9, struct hax_debug_t) +#define HAX_VCPU_IOCTL_SET_CPUID _IOW(0, 0xca, struct hax_cpuid *) #define HAX_KERNEL64_CS 0x80 #define HAX_KERNEL32_CS 0x08 diff --git a/platforms/darwin/com_intel_hax_ui.c b/platforms/darwin/com_intel_hax_ui.c index 22fb15be..8d803da4 100644 --- a/platforms/darwin/com_intel_hax_ui.c +++ b/platforms/darwin/com_intel_hax_ui.c @@ -79,6 +79,40 @@ static int hax_vcpu_major = 0; #define HAX_VM_DEVFS_FMT_COMPAT "hax_vm*/vm%02d" #define HAX_VM_DEVFS_FMT "hax_vm/vm%02d" +#define load_user_data(dest, src, body_len, body_max, arg_t, body_t) \ + user_addr_t uaddr = (user_addr_t)(*(arg_t **)(src)); \ + size_t size; \ + arg_t header; \ + (dest) = NULL; \ + if (copyin(uaddr, &header, sizeof(arg_t))) { \ + hax_log(HAX_LOGE, "%s: argument header read error.\n", __func__); \ + ret = -EFAULT; \ + break; \ + } \ + if (header.body_len > (body_max)) { \ + hax_log(HAX_LOGW, "%s: %d exceeds argument body maximum %d.\n", \ + __func__, header.body_len, (body_max)); \ + ret = -E2BIG; \ + break; \ + } \ + size = sizeof(arg_t) + header.body_len * sizeof(body_t); \ + (dest) = hax_vmalloc(size, HAX_MEM_NONPAGE); \ + if ((dest) == NULL) { \ + hax_log(HAX_LOGE, "%s: failed to allocate memory.\n", __func__); \ + ret = -ENOMEM; \ + break; \ + } \ + if (copyin(uaddr, (dest), size)) { \ + hax_log(HAX_LOGE, "%s: argument read error.\n", __func__); \ + unload_user_data(dest); \ + ret = -EFAULT; \ + break; \ + } + +#define unload_user_data(dest) \ + if ((dest) != NULL) \ + hax_vfree((dest), size); + static void handle_unknown_ioctl(dev_t dev, ulong cmd, struct proc *p); static struct vcpu_t * get_vcpu_by_dev(dev_t dev) { @@ -243,6 +277,14 @@ static int hax_vcpu_ioctl(dev_t dev, ulong cmd, caddr_t data, int flag, vcpu_debug(cvcpu, hax_debug); break; } + case HAX_VCPU_IOCTL_SET_CPUID: { + struct hax_cpuid *cpuid; + load_user_data(cpuid, data, total, HAX_MAX_CPUID_ENTRIES, hax_cpuid, + hax_cpuid_entry); + ret = vcpu_set_cpuid(cvcpu, cpuid); + unload_user_data(cpuid); + break; + } default: { handle_unknown_ioctl(dev, cmd, p); ret = -ENOSYS; diff --git a/platforms/linux/components.c b/platforms/linux/components.c index 0f4140b9..a9adcd96 100644 --- a/platforms/linux/components.c +++ b/platforms/linux/components.c @@ -42,6 +42,40 @@ #define HAX_VM_DEVFS_FMT "hax_vm/vm%02d" #define HAX_VCPU_DEVFS_FMT "hax_vm%02d/vcpu%02d" +#define load_user_data(dest, src, body_len, body_max, arg_t, body_t) \ + arg_t __user *from = (arg_t __user *)(*(arg_t **)(src)); \ + size_t size; \ + arg_t header; \ + (dest) = NULL; \ + if (copy_from_user(&header, from, sizeof(arg_t))) { \ + hax_log(HAX_LOGE, "%s: argument header read error.\n", __func__); \ + ret = -EFAULT; \ + break; \ + } \ + if (header.body_len > (body_max)) { \ + hax_log(HAX_LOGW, "%s: %d exceeds argument body maximum %d.\n", \ + __func__, header.body_len, (body_max)); \ + ret = -E2BIG; \ + break; \ + } \ + size = sizeof(arg_t) + header.body_len * sizeof(body_t); \ + (dest) = hax_vmalloc(size, HAX_MEM_NONPAGE); \ + if ((dest) == NULL) { \ + hax_log(HAX_LOGE, "%s: failed to allocate memory.\n", __func__); \ + ret = -ENOMEM; \ + break; \ + } \ + if (copy_from_user((dest), from, size)) { \ + hax_log(HAX_LOGE, "%s: argument read error.\n", __func__); \ + unload_user_data(dest); \ + ret = -EFAULT; \ + break; \ + } + +#define unload_user_data(dest) \ + if ((dest) != NULL) \ + hax_vfree((dest), size); + typedef struct hax_vm_linux_t { struct vm_t *cvm; int id; @@ -445,6 +479,14 @@ static long hax_vcpu_ioctl(struct file *filp, unsigned int cmd, vcpu_debug(cvcpu, &hax_debug); break; } + case HAX_VCPU_IOCTL_SET_CPUID: { + struct hax_cpuid *cpuid; + load_user_data(cpuid, argp, total, HAX_MAX_CPUID_ENTRIES, hax_cpuid, + hax_cpuid_entry); + ret = vcpu_set_cpuid(cvcpu, cpuid); + unload_user_data(cpuid); + break; + } default: // TODO: Print information about the process that sent the ioctl. hax_log(HAX_LOGE, "Unknown VCPU IOCTL 0x%lx\n", cmd); diff --git a/platforms/windows/hax_entry.c b/platforms/windows/hax_entry.c index 2d184b6b..16167d14 100644 --- a/platforms/windows/hax_entry.c +++ b/platforms/windows/hax_entry.c @@ -431,6 +431,19 @@ NTSTATUS HaxVcpuControl(PDEVICE_OBJECT DeviceObject, vcpu_debug(cvcpu, (struct hax_debug_t*)inBuf); break; } + case HAX_VCPU_IOCTL_SET_CPUID: { + hax_cpuid *cpuid = (hax_cpuid *)inBuf; + if (inBufLength < sizeof(hax_cpuid) || inBufLength < + sizeof(hax_cpuid) + cpuid->total * + sizeof(hax_cpuid_entry)) { + ret = STATUS_INVALID_PARAMETER; + goto done; + } + if (vcpu_set_cpuid(cvcpu, cpuid)) { + ret = STATUS_UNSUCCESSFUL; + } + break; + } default: hax_log(HAX_LOGE, "Unknow vcpu ioctl %lx\n", irpSp->Parameters.DeviceIoControl.IoControlCode); diff --git a/platforms/windows/hax_entry.h b/platforms/windows/hax_entry.h index 52613f7d..4659fe32 100644 --- a/platforms/windows/hax_entry.h +++ b/platforms/windows/hax_entry.h @@ -165,5 +165,7 @@ extern PDRIVER_OBJECT HaxDriverObject; #define HAX_IOCTL_VCPU_DEBUG \ CTL_CODE(HAX_DEVICE_TYPE, 0x916, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define HAX_VCPU_IOCTL_SET_CPUID \ + CTL_CODE(HAX_DEVICE_TYPE, 0x917, METHOD_BUFFERED, FILE_ANY_ACCESS) #endif // HAX_WINDOWS_HAX_ENTRY_H_