diff --git a/core/cpu.c b/core/cpu.c
index 5f82fd7c..39a19094 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -40,10 +40,6 @@
 #include "include/intr.h"
 #include "include/ept.h"
 
-static cpuid_cache_t cache = {
-    .initialized = 0
-};
-
 static void cpu_vmentry_failed(struct vcpu_t *vcpu, vmx_result_t result);
 static int cpu_vmexit_handler(struct vcpu_t *vcpu, exit_reason_t exit_reason,
                               struct hax_tunnel *htun);
@@ -66,15 +62,7 @@ static int cpu_nx_enable(void)
 
 bool cpu_has_feature(uint32_t feature)
 {
-    if (!cache.initialized) {
-        cpuid_host_init(&cache);
-    }
-    return cpuid_host_has_feature(&cache, feature);
-}
-
-void cpu_init_feature_cache(void)
-{
-    cpuid_host_init(&cache);
+    return cpuid_host_has_feature(feature);
 }
 
 void cpu_init_vmx(void *arg)
diff --git a/core/cpuid.c b/core/cpuid.c
index 4a279786..49ae9311 100644
--- a/core/cpuid.c
+++ b/core/cpuid.c
@@ -32,6 +32,15 @@
 
 #include "include/ia32.h"
 
+#define CPUID_CACHE_SIZE 6
+
+typedef struct cpuid_cache_t {
+    uint32_t data[CPUID_CACHE_SIZE];  // Host cached features
+    cpuid_t  host_supported;          // Physical CPU supported features
+    cpuid_t  hax_supported;           // Hypervisor supported features
+    bool     initialized;
+} cpuid_cache_t;
+
 typedef union cpuid_feature_t {
     struct {
         uint32_t index        : 5;
@@ -46,6 +55,14 @@ typedef union cpuid_feature_t {
     uint32_t value;
 } cpuid_feature_t;
 
+static cpuid_cache_t cache = {0};
+
+static hax_cpuid_entry * find_cpuid_entry(hax_cpuid *cpuid_info,
+                                          uint32_t function, uint32_t index);
+static void cpuid_set_0000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info);
+static void cpuid_set_8000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info);
+static void cpuid_set_fixed_features(cpuid_t *cpuid);
+
 void cpuid_query_leaf(cpuid_args_t *args, uint32_t leaf)
 {
     args->eax = leaf;
@@ -59,10 +76,10 @@ void cpuid_query_subleaf(cpuid_args_t *args, uint32_t leaf, uint32_t subleaf)
     asm_cpuid(args);
 }
 
-void cpuid_host_init(cpuid_cache_t *cache)
+void cpuid_host_init(void)
 {
     cpuid_args_t res;
-    uint32_t *data = cache->data;
+    uint32_t *data = cache.data;
 
     cpuid_query_leaf(&res, 0x00000001);
     data[0] = res.ecx;
@@ -76,19 +93,19 @@ void cpuid_host_init(cpuid_cache_t *cache)
     data[4] = res.ecx;
     data[5] = res.edx;
 
-    cache->initialized = 1;
+    cache.initialized = true;
 }
 
-bool cpuid_host_has_feature(cpuid_cache_t *cache, uint32_t feature_key)
+bool cpuid_host_has_feature(uint32_t feature_key)
 {
     cpuid_feature_t feature;
     uint32_t value;
 
     feature.value = feature_key;
-    if (!cache->initialized || feature.index >= CPUID_CACHE_SIZE) {
+    if (!cache.initialized || feature.index >= CPUID_CACHE_SIZE) {
         return cpuid_host_has_feature_uncached(feature_key);
     }
-    value = cache->data[feature.index];
+    value = cache.data[feature.index];
     if (value & (1 << feature.bit)) {
         return true;
     }
@@ -114,3 +131,235 @@ bool cpuid_host_has_feature_uncached(uint32_t feature_key)
     }
     return false;
 }
+
+void cpuid_init_supported_features(void)
+{
+    uint32_t bit, flag, function, x86_feature;
+
+    // Initialize host supported features
+    for (bit = 0; bit < sizeof(uint32_t) * 8; ++bit) {
+        flag = 1 << bit;
+
+        function = 0x01;
+        x86_feature = FEATURE_KEY_LEAF(0, function, CPUID_REG_ECX, bit);
+        if (cpuid_host_has_feature(x86_feature)) {
+            cache.host_supported.feature_1_ecx |= flag;
+        }
+
+        x86_feature = FEATURE_KEY_LEAF(1, function, CPUID_REG_EDX, bit);
+        if (cpuid_host_has_feature(x86_feature)) {
+            cache.host_supported.feature_1_edx |= flag;
+        }
+
+        function = 0x80000001;
+        x86_feature = FEATURE_KEY_LEAF(5, function, CPUID_REG_EDX, bit);
+        if (cpuid_host_has_feature(x86_feature)) {
+            cache.host_supported.feature_8000_0001_edx |= flag;
+        }
+    }
+
+    hax_log(HAX_LOGI, "%s: host supported features:\n", __func__);
+    hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n",
+            cache.host_supported.feature_1_ecx,
+            cache.host_supported.feature_1_edx);
+    hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, "
+            "feature_8000_0001_edx: %08lx\n",
+            cache.host_supported.feature_8000_0001_ecx,
+            cache.host_supported.feature_8000_0001_edx);
+
+    // Initialize HAXM supported features
+    cache.hax_supported = (cpuid_t){
+        .feature_1_ecx =
+            FEATURE(SSE3)       |
+            FEATURE(SSSE3)      |
+            FEATURE(SSE41)      |
+            FEATURE(SSE42)      |
+            FEATURE(CMPXCHG16B) |
+            FEATURE(MOVBE)      |
+            FEATURE(AESNI)      |
+            FEATURE(PCLMULQDQ)  |
+            FEATURE(POPCNT),
+        .feature_1_edx =
+            FEATURE(PAT)        |
+            FEATURE(FPU)        |
+            FEATURE(VME)        |
+            FEATURE(DE)         |
+            FEATURE(TSC)        |
+            FEATURE(MSR)        |
+            FEATURE(PAE)        |
+            FEATURE(MCE)        |
+            FEATURE(CX8)        |
+            FEATURE(APIC)       |
+            FEATURE(SEP)        |
+            FEATURE(MTRR)       |
+            FEATURE(PGE)        |
+            FEATURE(MCA)        |
+            FEATURE(CMOV)       |
+            FEATURE(CLFSH)      |
+            FEATURE(MMX)        |
+            FEATURE(FXSR)       |
+            FEATURE(SSE)        |
+            FEATURE(SSE2)       |
+            FEATURE(SS)         |
+            FEATURE(PSE)        |
+            FEATURE(HTT),
+        .feature_8000_0001_ecx = 0,
+        .feature_8000_0001_edx =
+            FEATURE(NX)         |
+            FEATURE(SYSCALL)    |
+            FEATURE(RDTSCP)     |
+            FEATURE(EM64T)
+    };
+
+    hax_log(HAX_LOGI, "%s: HAXM supported features:\n", __func__);
+    hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n",
+            cache.hax_supported.feature_1_ecx,
+            cache.hax_supported.feature_1_edx);
+    hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, "
+            "feature_8000_0001_edx: %08lx\n",
+            cache.hax_supported.feature_8000_0001_ecx,
+            cache.hax_supported.feature_8000_0001_edx);
+}
+
+void cpuid_guest_init(cpuid_t *cpuid)
+{
+    *cpuid = cache.hax_supported;
+    cpuid->features_mask = ~0ULL;
+}
+
+void cpuid_get_features_mask(cpuid_t *cpuid, uint64_t *features_mask)
+{
+    *features_mask = cpuid->features_mask;
+}
+
+void cpuid_set_features_mask(cpuid_t *cpuid, uint64_t features_mask)
+{
+    cpuid->features_mask = features_mask;
+}
+
+void cpuid_get_guest_features(cpuid_t *cpuid, uint32_t *cpuid_1_features_ecx,
+                              uint32_t *cpuid_1_features_edx,
+                              uint32_t *cpuid_8000_0001_features_ecx,
+                              uint32_t *cpuid_8000_0001_features_edx)
+{
+    *cpuid_1_features_ecx         = cpuid->feature_1_ecx;
+    *cpuid_1_features_edx         = cpuid->feature_1_edx;
+    *cpuid_8000_0001_features_ecx = cpuid->feature_8000_0001_ecx;
+    *cpuid_8000_0001_features_edx = cpuid->feature_8000_0001_edx;
+}
+
+void cpuid_set_guest_features(cpuid_t *cpuid, hax_cpuid *cpuid_info)
+{
+    static void (*cpuid_set_guest_feature[])(cpuid_t *, hax_cpuid *) = {
+        cpuid_set_0000_0001,
+        cpuid_set_8000_0001
+    };
+    static size_t count = sizeof(cpuid_set_guest_feature) /
+                          sizeof(cpuid_set_guest_feature[0]);
+    int i;
+
+    hax_log(HAX_LOGI, "%s: before:\n", __func__);
+    hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n",
+            cpuid->feature_1_ecx, cpuid->feature_1_edx);
+    hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, feature_8000_0001_edx: %08lx"
+            "\n", cpuid->feature_8000_0001_ecx, cpuid->feature_8000_0001_edx);
+
+    for (i = 0; i < count; ++i) {
+        cpuid_set_guest_feature[i](cpuid, cpuid_info);
+    }
+
+    hax_log(HAX_LOGI, "%s: after:\n", __func__);
+    hax_log(HAX_LOGI, "feature_1_ecx: %08lx, feature_1_edx: %08lx\n",
+            cpuid->feature_1_ecx, cpuid->feature_1_edx);
+    hax_log(HAX_LOGI, "feature_8000_0001_ecx: %08lx, feature_8000_0001_edx: %08lx"
+            "\n", cpuid->feature_8000_0001_ecx, cpuid->feature_8000_0001_edx);
+}
+
+static hax_cpuid_entry * find_cpuid_entry(hax_cpuid *cpuid_info,
+                                          uint32_t function, uint32_t index)
+{
+    int i;
+    hax_cpuid_entry *entry, *found = NULL;
+
+    for (i = 0; i < cpuid_info->total; ++i) {
+        entry = &cpuid_info->entries[i];
+        if (entry->function == function && entry->index == index) {
+            found = entry;
+            break;
+        }
+    }
+
+    return found;
+}
+
+static void cpuid_set_0000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info)
+{
+    const uint32_t kFunction = 0x01;
+    hax_cpuid_entry *entry;
+
+    entry = find_cpuid_entry(cpuid_info, kFunction, 0);
+    if (entry == NULL)
+        return;
+
+    hax_log(HAX_LOGI, "%s: function: %08lx, index: %lu, flags: %08lx\n",
+            __func__, entry->function, entry->index, entry->flags);
+    hax_log(HAX_LOGI, "%s: eax: %08lx, ebx: %08lx, ecx: %08lx, edx: %08lx\n",
+            __func__, entry->eax, entry->ebx, entry->ecx, entry->edx);
+
+    cpuid->feature_1_ecx = entry->ecx;
+    cpuid->feature_1_edx = entry->edx;
+
+    // Filter the unsupported features
+    cpuid->feature_1_ecx &= cache.host_supported.feature_1_ecx &
+                            cache.hax_supported.feature_1_ecx;
+    cpuid->feature_1_edx &= cache.host_supported.feature_1_edx &
+                            cache.hax_supported.feature_1_edx;
+
+    // Set fixed supported features
+    cpuid_set_fixed_features(cpuid);
+
+    if (entry->ecx != cpuid->feature_1_ecx ||
+        entry->edx != cpuid->feature_1_edx) {
+        hax_log(HAX_LOGW, "%s: filtered or unchanged flags: ecx: %08lx, "
+                "edx: %08lx\n", __func__, entry->ecx ^ cpuid->feature_1_ecx,
+                entry->edx ^ cpuid->feature_1_edx);
+    }
+}
+
+static void cpuid_set_8000_0001(cpuid_t *cpuid, hax_cpuid *cpuid_info)
+{
+    const uint32_t kFunction = 0x80000001;
+    hax_cpuid_entry *entry;
+
+    entry = find_cpuid_entry(cpuid_info, kFunction, 0);
+    if (entry == NULL)
+        return;
+
+    hax_log(HAX_LOGI, "%s: function: %08lx, index: %lu, flags: %08lx\n",
+            __func__, entry->function, entry->index, entry->flags);
+    hax_log(HAX_LOGI, "%s: eax: %08lx, ebx: %08lx, ecx: %08lx, edx: %08lx\n",
+            __func__, entry->eax, entry->ebx, entry->ecx, entry->edx);
+
+    cpuid->feature_8000_0001_edx = entry->edx;
+
+    // Filter the unsupported features
+    cpuid->feature_8000_0001_edx &=
+        cache.host_supported.feature_8000_0001_edx &
+        cache.hax_supported.feature_8000_0001_edx;
+
+    if (entry->edx != cpuid->feature_8000_0001_edx) {
+        hax_log(HAX_LOGW, "%s: filtered or unchanged flags: edx: %08lx\n",
+                __func__, entry->edx ^ cpuid->feature_8000_0001_edx);
+    }
+}
+
+static void cpuid_set_fixed_features(cpuid_t *cpuid)
+{
+    const uint32_t kFixedFeatures =
+        FEATURE(MCE)  |
+        FEATURE(APIC) |
+        FEATURE(MTRR) |
+        FEATURE(PAT);
+
+    cpuid->feature_1_edx |= kFixedFeatures;
+}
diff --git a/core/hax.c b/core/hax.c
index cb554996..9bc5ea79 100644
--- a/core/hax.c
+++ b/core/hax.c
@@ -373,6 +373,7 @@ int hax_get_capability(void *buf, int bufLeng, int *outLength)
         cap->winfo |= HAX_CAP_TUNNEL_PAGE;
         cap->winfo |= HAX_CAP_RAM_PROTECTION;
         cap->winfo |= HAX_CAP_DEBUG;
+        cap->winfo |= HAX_CAP_CPUID;
         if (cpu_data->vmx_info._ept_cap) {
             cap->winfo |= HAX_CAP_EPT;
         }
@@ -565,7 +566,10 @@ int hax_module_init(void)
         hax_clear_page(hax_cpu_data[cpu_id]->hstate.hfxpage);
         hax_cpu_data[cpu_id]->cpu_id = cpu_id;
     }
-    cpu_init_feature_cache();
+
+    cpuid_host_init();
+    cpuid_init_supported_features();
+
     if (hax_vmx_init() < 0)
         goto out_2;
 
diff --git a/core/include/cpuid.h b/core/include/cpuid.h
index 6536b3d6..2c8ad89b 100644
--- a/core/include/cpuid.h
+++ b/core/include/cpuid.h
@@ -31,20 +31,14 @@
 #ifndef HAX_CORE_CPUID_H_
 #define HAX_CORE_CPUID_H_
 
+#include "../../include/hax.h"
 #include "../../include/hax_types.h"
 
-#define CPUID_CACHE_SIZE 6
-
 #define CPUID_REG_EAX 0
 #define CPUID_REG_ECX 1
 #define CPUID_REG_EDX 2
 #define CPUID_REG_EBX 3
 
-typedef struct cpuid_cache_t {
-    uint32_t data[CPUID_CACHE_SIZE];
-    bool initialized;
-} cpuid_cache_t;
-
 typedef union cpuid_args_t {
     struct {
         uint32_t eax;
@@ -55,6 +49,14 @@ typedef union cpuid_args_t {
     uint32_t regs[4];
 } cpuid_args_t;
 
+typedef struct cpuid_t {
+    uint64_t features_mask;
+    uint32_t feature_1_ecx;
+    uint32_t feature_1_edx;
+    uint32_t feature_8000_0001_ecx;
+    uint32_t feature_8000_0001_edx;
+} cpuid_t;
+
 /*
  * X86 Features
  * ============
@@ -254,8 +256,18 @@ enum {
 void cpuid_query_leaf(cpuid_args_t *args, uint32_t leaf);
 void cpuid_query_subleaf(cpuid_args_t *args, uint32_t leaf, uint32_t subleaf);
 
-void cpuid_host_init(cpuid_cache_t *cache);
-bool cpuid_host_has_feature(cpuid_cache_t *cache, uint32_t feature_key);
+void cpuid_host_init(void);
+bool cpuid_host_has_feature(uint32_t feature_key);
 bool cpuid_host_has_feature_uncached(uint32_t feature_key);
 
+void cpuid_init_supported_features(void);
+void cpuid_guest_init(cpuid_t *cpuid);
+void cpuid_get_features_mask(cpuid_t *cpuid, uint64_t *features_mask);
+void cpuid_set_features_mask(cpuid_t *cpuid, uint64_t features_mask);
+void cpuid_get_guest_features(cpuid_t *cpuid, uint32_t *cpuid_1_features_ecx,
+                              uint32_t *cpuid_1_features_edx,
+                              uint32_t *cpuid_8000_0001_features_ecx,
+                              uint32_t *cpuid_8000_0001_features_edx);
+void cpuid_set_guest_features(cpuid_t *cpuid, hax_cpuid *cpuid_info);
+
 #endif /* HAX_CORE_CPUID_H_ */
diff --git a/core/include/hax_core_interface.h b/core/include/hax_core_interface.h
index 0ad425d7..65ba0611 100644
--- a/core/include/hax_core_interface.h
+++ b/core/include/hax_core_interface.h
@@ -45,6 +45,7 @@ int vcpu_put_fpu(struct vcpu_t *vcpu, struct fx_layout *fl);
 int vcpu_get_fpu(struct vcpu_t *vcpu, struct fx_layout *fl);
 int vcpu_set_regs(struct vcpu_t *vcpu, struct vcpu_state_t *vs);
 int vcpu_get_regs(struct vcpu_t *vcpu, struct vcpu_state_t *vs);
+int vcpu_set_cpuid(struct vcpu_t *vcpu, hax_cpuid *cpuid_info);
 void vcpu_debug(struct vcpu_t *vcpu, struct hax_debug_t *debug);
 
 void * get_vcpu_host(struct vcpu_t *vcpu);
diff --git a/core/include/vcpu.h b/core/include/vcpu.h
index 33bb326d..99ca496e 100644
--- a/core/include/vcpu.h
+++ b/core/include/vcpu.h
@@ -31,6 +31,7 @@
 #ifndef HAX_CORE_VCPU_H_
 #define HAX_CORE_VCPU_H_
 
+#include "cpuid.h"
 #include "emulate.h"
 #include "vmx.h"
 #include "mtrr.h"
@@ -216,7 +217,6 @@ struct vcpu_t {
     uint64_t pae_pdptes[4];
 
     uint64_t cr_pat;
-    uint64_t cpuid_features_flag_mask;
 
     /* Debugging */
     uint32_t debug_control;
@@ -233,6 +233,14 @@ struct vcpu_t {
     struct em_context_t emulate_ctxt;
     struct vcpu_post_mmio post_mmio;
     struct mmio_fetch_cache mmio_fetch;
+
+    // Guest CPUID feature set
+    // * The CPUID feature set is always same for each vCPU. A CPUID instruction
+    //   executed on any core will get the same result.
+    // * All vCPUs share the unique memory, which is actually allocated by the
+    //   first vCPU created by VM. If any vCPU sets features in this field, all
+    //   vCPUs will change accordingly.
+    cpuid_t *guest_cpuid;
 };
 
 #define vmx(v, field) v->vmx.field
@@ -258,6 +266,7 @@ int vcpu_get_fpu(struct vcpu_t *vcpu, struct fx_layout *fl);
 int vcpu_put_fpu(struct vcpu_t *vcpu, struct fx_layout *fl);
 int vcpu_get_msr(struct vcpu_t *vcpu, uint64_t entry, uint64_t *val);
 int vcpu_put_msr(struct vcpu_t *vcpu, uint64_t entry, uint64_t val);
+int vcpu_set_cpuid(struct vcpu_t *vcpu, hax_cpuid *cpuid_info);
 void vcpu_debug(struct vcpu_t *vcpu, struct hax_debug_t *debug);
 
 /* The declaration for OS wrapper code */
diff --git a/core/vcpu.c b/core/vcpu.c
index 1556d943..9f3cd1fd 100644
--- a/core/vcpu.c
+++ b/core/vcpu.c
@@ -123,6 +123,10 @@ static void vcpu_enter_fpu_state(struct vcpu_t *vcpu);
 static int vcpu_set_apic_base(struct vcpu_t *vcpu, uint64_t val);
 static bool vcpu_is_bsp(struct vcpu_t *vcpu);
 
+static void vcpu_init_cpuid(struct vcpu_t *vcpu);
+static int vcpu_alloc_cpuid(struct vcpu_t *vcpu);
+static void vcpu_free_cpuid(struct vcpu_t *vcpu);
+
 static uint32_t get_seg_present(uint32_t seg)
 {
     mword ldtr_base;
@@ -457,9 +461,12 @@ struct vcpu_t *vcpu_create(struct vm_t *vm, void *vm_host, int vcpu_id)
     if (!vcpu_vtlb_alloc(vcpu))
         goto fail_6;
 
-    if (hax_vcpu_create_host(vcpu, vm_host, vm->vm_id, vcpu_id))
+    if (!vcpu_alloc_cpuid(vcpu))
         goto fail_7;
 
+    if (hax_vcpu_create_host(vcpu, vm_host, vm->vm_id, vcpu_id))
+        goto fail_8;
+
     vcpu->prev_cpu_id = (uint32_t)(~0ULL);
     vcpu->cpu_id = hax_cpu_id();
     vcpu->vcpu_id = vcpu_id;
@@ -488,6 +495,8 @@ struct vcpu_t *vcpu_create(struct vm_t *vm, void *vm_host, int vcpu_id)
 
     hax_log(HAX_LOGD, "vcpu %d is created.\n", vcpu->vcpu_id);
     return vcpu;
+fail_8:
+    vcpu_free_cpuid(vcpu);
 fail_7:
     vcpu_vtlb_free(vcpu);
 fail_6:
@@ -542,6 +551,7 @@ static int _vcpu_teardown(struct vcpu_t *vcpu)
     hax_vfree(vcpu->state, sizeof(struct vcpu_state_t));
     vcpu_vtlb_free(vcpu);
     hax_mutex_free(vcpu->tmutex);
+    vcpu_free_cpuid(vcpu);
     hax_vfree(vcpu, sizeof(struct vcpu_t));
 
     hax_log(HAX_LOGI, "vcpu %d is teardown.\n", vcpu_id);
@@ -574,7 +584,6 @@ static void vcpu_init(struct vcpu_t *vcpu)
 
     // TODO: mtrr ?
     vcpu->cr_pat = 0x0007040600070406ULL;
-    vcpu->cpuid_features_flag_mask = 0xffffffffffffffffULL;
     vcpu->cur_state = GS_VALID;
     vmx(vcpu, entry_exception_vector) = ~0u;
     vmx(vcpu, cr0_mask) = 0;
@@ -630,6 +639,9 @@ static void vcpu_init(struct vcpu_t *vcpu)
         vcpu->gstate.apic_base |= APIC_BASE_BSP;
     }
 
+    // Initialize guest CPUID
+    vcpu_init_cpuid(vcpu);
+
     hax_mutex_unlock(vcpu->tmutex);
 }
 
@@ -2536,50 +2548,16 @@ static void handle_cpuid_virtual(struct vcpu_t *vcpu, uint32_t a, uint32_t c)
     uint32_t hw_family;
     uint32_t hw_model;
     uint8_t physical_address_size;
+    uint32_t cpuid_1_features_ecx, cpuid_1_features_edx,
+             cpuid_8000_0001_features_ecx, cpuid_8000_0001_features_edx;
 
-    static uint32_t cpuid_1_features_edx =
-            FEATURE(PAT)        |
-            FEATURE(FPU)        |
-            FEATURE(VME)        |
-            FEATURE(DE)         |
-            FEATURE(TSC)        |
-            FEATURE(MSR)        |
-            FEATURE(PAE)        |
-            FEATURE(MCE)        |
-            FEATURE(CX8)        |
-            FEATURE(APIC)       |
-            FEATURE(SEP)        |
-            FEATURE(MTRR)       |
-            FEATURE(PGE)        |
-            FEATURE(MCA)        |
-            FEATURE(CMOV)       |
-            FEATURE(CLFSH)      |
-            FEATURE(MMX)        |
-            FEATURE(FXSR)       |
-            FEATURE(SSE)        |
-            FEATURE(SSE2)       |
-            FEATURE(SS)         |
-            FEATURE(PSE)        |
-            FEATURE(HTT);
-
-    static uint32_t cpuid_1_features_ecx =
-            FEATURE(SSE3)       |
-            FEATURE(SSSE3)      |
-            FEATURE(SSE41)      |
-            FEATURE(SSE42)      |
-            FEATURE(CMPXCHG16B) |
-            FEATURE(MOVBE)      |
-            FEATURE(AESNI)      |
-            FEATURE(PCLMULQDQ)  |
-            FEATURE(POPCNT);
-
-    static uint32_t cpuid_8000_0001_features_edx =
-            FEATURE(NX)         |
-            FEATURE(SYSCALL)    |
-            FEATURE(RDTSCP)     |
-            FEATURE(EM64T);
-
-    static uint32_t cpuid_8000_0001_features_ecx = 0;
+    // To fully support CPUID instructions (opcode = 0F A2) by software, it is
+    // recommended to add opcode_table_0FA2[] in core/emulate.c to emulate
+    // (Refer to Intel SDM Vol. 2A 3.2 CPUID).
+    cpuid_get_guest_features(vcpu->guest_cpuid, &cpuid_1_features_ecx,
+                             &cpuid_1_features_edx,
+                             &cpuid_8000_0001_features_ecx,
+                             &cpuid_8000_0001_features_edx);
 
     switch (a) {
         case 0: {                       // Maximum Basic Information
@@ -3454,7 +3432,7 @@ static int handle_msr_read(struct vcpu_t *vcpu, uint32_t msr, uint64_t *val)
             break;
         }
         case IA32_CPUID_FEATURE_MASK: {
-            *val = vcpu->cpuid_features_flag_mask;
+            cpuid_get_features_mask(vcpu->guest_cpuid, val);
             break;
         }
         case IA32_EBC_FREQUENCY_ID: {
@@ -3606,7 +3584,7 @@ static int handle_msr_write(struct vcpu_t *vcpu, uint32_t msr, uint64_t val,
             break;
         }
         case IA32_CPUID_FEATURE_MASK: {
-            vcpu->cpuid_features_flag_mask = val;
+            cpuid_set_features_mask(vcpu->guest_cpuid, val);
             break;
         }
         case IA32_EFER: {
@@ -4177,6 +4155,28 @@ int vcpu_set_msr(struct vcpu_t *vcpu, uint64_t entry, uint64_t val)
     return handle_msr_write(vcpu, entry, val, true);
 }
 
+int vcpu_set_cpuid(struct vcpu_t *vcpu, hax_cpuid *cpuid_info)
+{
+    hax_log(HAX_LOGI, "%s: vCPU #%u is setting guest CPUID.\n", __func__,
+            vcpu->vcpu_id);
+
+    if (cpuid_info->total == 0 || cpuid_info->total > HAX_MAX_CPUID_ENTRIES) {
+        hax_log(HAX_LOGW, "%s: No entry or exceeds maximum: total = %lu.\n",
+                __func__, cpuid_info->total);
+        return -EINVAL;
+    }
+
+    if (vcpu->is_running) {
+        hax_log(HAX_LOGW, "%s: Cannot set CPUID: vcpu->is_running = %llu.\n",
+                __func__, vcpu->is_running);
+        return -EFAULT;
+    }
+
+    cpuid_set_guest_features(vcpu->guest_cpuid, cpuid_info);
+
+    return 0;
+}
+
 void vcpu_debug(struct vcpu_t *vcpu, struct hax_debug_t *debug)
 {
     bool hbreak_enabled = false;
@@ -4496,3 +4496,56 @@ static bool vcpu_is_bsp(struct vcpu_t *vcpu)
     // TODO: add an API to set bootstrap processor
     return (vcpu->vm->bsp_vcpu_id == vcpu->vcpu_id);
 }
+
+static void vcpu_init_cpuid(struct vcpu_t *vcpu)
+{
+    struct vcpu_t *vcpu_0;
+
+    if (vcpu->vcpu_id != 0) {
+        vcpu_0 = hax_get_vcpu(vcpu->vm->vm_id, 0, 0);
+        hax_assert(vcpu_0 != NULL);
+        vcpu->guest_cpuid = vcpu_0->guest_cpuid;
+        hax_log(HAX_LOGI, "%s: referenced vcpu[%u].guest_cpuid to vcpu[%u].\n",
+                __func__, vcpu->vcpu_id, vcpu_0->vcpu_id);
+        return;
+    }
+
+    cpuid_guest_init(vcpu->guest_cpuid);
+    hax_log(HAX_LOGI, "%s: initialized vcpu[%u].guest_cpuid with default "
+            "feature set.\n", __func__, vcpu->vcpu_id);
+}
+
+static int vcpu_alloc_cpuid(struct vcpu_t *vcpu)
+{
+    // Only the first vCPU will allocate the CPUID memory, and other vCPUs will
+    // share this memory.
+    if (vcpu->vcpu_id != 0)
+        return 1;
+
+    vcpu->guest_cpuid = hax_vmalloc(sizeof(cpuid_t), HAX_MEM_NONPAGE);
+    if (vcpu->guest_cpuid == NULL)
+        return 0;
+
+    return 1;
+}
+
+static void vcpu_free_cpuid(struct vcpu_t *vcpu)
+{
+    if (vcpu->vcpu_id != 0) {
+        vcpu->guest_cpuid = NULL;
+        hax_log(HAX_LOGI, "%s: dereferenced vcpu[%u].guest_cpuid from vcpu[0]."
+                "\n", __func__, vcpu->vcpu_id);
+        return;
+    }
+
+    if (vcpu->guest_cpuid == NULL) {
+        hax_log(HAX_LOGW, "%s: already freed vcpu[%u].guest_cpuid.\n",
+                __func__, vcpu->vcpu_id);
+        return;
+    }
+
+    hax_vfree(vcpu->guest_cpuid, sizeof(cpuid_t));
+    vcpu->guest_cpuid = NULL;
+    hax_log(HAX_LOGI, "%s: freed vcpu[%u].guest_cpuid.\n", __func__,
+            vcpu->vcpu_id);
+}
diff --git a/docs/api.md b/docs/api.md
index 585bbb29..73aa2687 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -97,6 +97,7 @@ itself as well as the host environment.
   #define HAX_CAP_TUNNEL_PAGE        (1 << 5)
   #define HAX_CAP_DEBUG              (1 << 7)
   #define HAX_CAP_IMPLICIT_RAMBLOCK  (1 << 8)
+  #define HAX_CAP_CPUID              (1 << 9)
   ```
   * (Output) `wstatus`: The first set of capability flags reported to the
 caller. The following bits may be set, while others are reserved:
@@ -124,6 +125,7 @@ feature.
     * `HAX_CAP_64BIT_SETRAM`: If set, `HAX_VM_IOCTL_SET_RAM2` is available.
     * `HAX_CAP_IMPLICIT_RAMBLOCK`: If set, `HAX_VM_IOCTL_SET_RAM2` supports the
 `HAX_RAM_INFO_STANDALONE` flag.
+    * `HAX_CAP_CPUID`: If set, `HAX_VCPU_IOCTL_SET_CPUID` is available.
   * (Output) `win_refcount`: (Windows only)
   * (Output) `mem_quota`: If the global memory cap setting is enabled (q.v.
 `HAX_IOCTL_SET_MEMLIMIT`), reports the current quota on memory allocation (the
@@ -689,3 +691,75 @@ Injects an interrupt into this VCPU.
 * Error codes:
   * `STATUS_INVALID_PARAMETER` (Windows): The input buffer provided by the
 caller is smaller than the size of `uint32_t`.
+
+#### HAX\_VCPU\_IOCTL\_SET\_CPUID
+Defines the VCPU responses to the CPU identification (CPUID) instructions.
+
+HAXM initializes a minimal feature set for guest VCPUs in kernel space. This
+ensures that most modern CPUs can support these basic CPUID features. Only the
+supported CPUID instructions in the feature set will be passed to the physical
+CPU for processing.
+
+This IOCTL is used to dynamically adjust the supported feature set of CPUID for
+guest VCPUs so as to leverage the latest features from modern CPUs. The features
+to be enabled will be incorporated into the feature set, while the features to
+be disabled will be removed. If the physical CPU does not support some specified
+CPUID features, the enabling operation will be ignored. Usually, this IOCTL is
+invoked when the VM is initially configured.
+
+All VCPUs share the same feature set in a VM. This can avoid confusion caused by
+the case that when VCPU has multiple cores, different VCPUs executing the same
+instruction will produce different results. Send this IOCTL to any VCPU to set
+CPUID features, then all VCPUs will change accordingly.
+
+* Since: Capability `HAX_CAP_CPUID`
+* Parameter: `struct hax_cpuid cpuid`, where
+  ```
+  struct hax_cpuid {
+      uint32_t total;
+      uint32_t pad;
+      hax_cpuid_entry entries[0];
+  } __attribute__ ((__packed__));
+  ```
+  where
+  ```
+  #define HAX_MAX_CPUID_ENTRIES 0x40
+  struct hax_cpuid_entry {
+      uint32_t function;
+      uint32_t index;
+      uint32_t flags;
+      uint32_t eax;
+      uint32_t ebx;
+      uint32_t ecx;
+      uint32_t edx;
+      uint32_t pad[3];
+  } __attribute__ ((__packed__));
+  ```
+  `hax_cpuid` is a variable-length type. The accessible memory of `entries` is
+  decided by the actual allocation from user space. For macOS, the argument of
+  user data should pass the address of the pointer to `hax_cpuid` when `ioctl()`
+  is invoked.
+  * (Input) `total`: Number of CPUIDs in entries. The valid value should be in
+the range (0, `HAX_MAX_CPUID_ENTRIES`].
+  * (Input) `pad`: Ignored.
+  * (Input) `entries`: Array of `struct hax_cpuid_entry`. This array contains
+the CPUID feature set of the guest VCPU that is pre-configured by the VM in user
+space.
+
+  For each entry in `struct hax_cpuid_entry`
+  * (Input) `function`: CPUID function code, i.e., initial EAX value.
+  * (Input) `index`: Sub-leaf index.
+  * (Input) `flags`: Feature flags.
+  * (Input) `eax`: EAX register value.
+  * (Input) `ebx`: EBX register value.
+  * (Input) `ecx`: ECX register value.
+  * (Input) `edx`: EDX register value.
+  * (Input) `pad`: Ignored.
+* Error codes:
+  * `STATUS_INVALID_PARAMETER` (Windows): The input buffer provided by the
+caller is smaller than the size of `struct hax_cpuid`.
+  * `STATUS_UNSUCCESSFUL` (Windows): Failed to set CPUID features.
+  * `-E2BIG` (macOS): The input value of `total` is greater than
+`HAX_MAX_CPUID_ENTRIES`.
+  * `-EFAULT` (macOS): Failed to copy contents in `entries` to the memory in
+kernel space.
diff --git a/include/darwin/hax_interface_mac.h b/include/darwin/hax_interface_mac.h
index 43a9719d..2487cdf0 100644
--- a/include/darwin/hax_interface_mac.h
+++ b/include/darwin/hax_interface_mac.h
@@ -70,6 +70,11 @@
 
 #define HAX_IOCTL_VCPU_DEBUG _IOW(0, 0xc9, struct hax_debug_t)
 
+// `hax_cpuid *` is specified as the size of data buffer because `hax_cpuid` is
+// a variable-length type. When ioctl() is invoked, the argument of user data
+// should pass the address of the pointer to `hax_cpuid`.
+#define HAX_VCPU_IOCTL_SET_CPUID _IOW(0, 0xca, struct hax_cpuid *)
+
 #define HAX_KERNEL64_CS 0x80
 #define HAX_KERNEL32_CS 0x08
 #ifdef __i386__
diff --git a/include/hax_interface.h b/include/hax_interface.h
index 25c08b31..eb642de4 100644
--- a/include/hax_interface.h
+++ b/include/hax_interface.h
@@ -194,6 +194,7 @@ struct hax_module_version {
 #define HAX_CAP_RAM_PROTECTION     (1 << 6)
 #define HAX_CAP_DEBUG              (1 << 7)
 #define HAX_CAP_IMPLICIT_RAMBLOCK  (1 << 8)
+#define HAX_CAP_CPUID              (1 << 9)
 
 struct hax_capabilityinfo {
     /*
@@ -297,4 +298,28 @@ struct hax_debug_t {
     uint64_t dr[8];
 } PACKED;
 
+#define HAX_MAX_CPUID_ENTRIES 0x40
+
+typedef struct hax_cpuid_entry {
+    uint32_t function;
+    uint32_t index;
+    uint32_t flags;
+    uint32_t eax;
+    uint32_t ebx;
+    uint32_t ecx;
+    uint32_t edx;
+    uint32_t pad[3];
+} hax_cpuid_entry;
+
+// `hax_cpuid` is a variable-length type. The size of `hax_cpuid` itself is only
+// 8 bytes. `entries` is just a body placeholder, which will not actually occupy
+// memory. The accessible memory of `entries` is decided by the allocation from
+// user space, and the array length is specified by `total`.
+
+typedef struct hax_cpuid {
+    uint32_t total;
+    uint32_t pad;
+    hax_cpuid_entry entries[0];
+} hax_cpuid;
+
 #endif  // HAX_INTERFACE_H_
diff --git a/include/linux/hax_interface_linux.h b/include/linux/hax_interface_linux.h
index ecb13fd0..97810934 100644
--- a/include/linux/hax_interface_linux.h
+++ b/include/linux/hax_interface_linux.h
@@ -70,6 +70,7 @@
 #define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version)
 
 #define HAX_IOCTL_VCPU_DEBUG _IOW(0, 0xc9, struct hax_debug_t)
+#define HAX_VCPU_IOCTL_SET_CPUID _IOW(0, 0xca, struct hax_cpuid *)
 
 #define HAX_KERNEL64_CS 0x80
 #define HAX_KERNEL32_CS 0x08
diff --git a/platforms/darwin/com_intel_hax_ui.c b/platforms/darwin/com_intel_hax_ui.c
index 22fb15be..8d803da4 100644
--- a/platforms/darwin/com_intel_hax_ui.c
+++ b/platforms/darwin/com_intel_hax_ui.c
@@ -79,6 +79,40 @@ static int hax_vcpu_major = 0;
 #define HAX_VM_DEVFS_FMT_COMPAT   "hax_vm*/vm%02d"
 #define HAX_VM_DEVFS_FMT          "hax_vm/vm%02d"
 
+#define load_user_data(dest, src, body_len, body_max, arg_t, body_t)          \
+        user_addr_t uaddr = (user_addr_t)(*(arg_t **)(src));                  \
+        size_t size;                                                          \
+        arg_t header;                                                         \
+        (dest) = NULL;                                                        \
+        if (copyin(uaddr, &header, sizeof(arg_t))) {                          \
+            hax_log(HAX_LOGE, "%s: argument header read error.\n", __func__); \
+            ret = -EFAULT;                                                    \
+            break;                                                            \
+        }                                                                     \
+        if (header.body_len > (body_max)) {                                   \
+            hax_log(HAX_LOGW, "%s: %d exceeds argument body maximum %d.\n",   \
+                    __func__, header.body_len, (body_max));                   \
+            ret = -E2BIG;                                                     \
+            break;                                                            \
+        }                                                                     \
+        size = sizeof(arg_t) + header.body_len * sizeof(body_t);              \
+        (dest) = hax_vmalloc(size, HAX_MEM_NONPAGE);                          \
+        if ((dest) == NULL) {                                                 \
+            hax_log(HAX_LOGE, "%s: failed to allocate memory.\n", __func__);  \
+            ret = -ENOMEM;                                                    \
+            break;                                                            \
+        }                                                                     \
+        if (copyin(uaddr, (dest), size)) {                                    \
+            hax_log(HAX_LOGE, "%s: argument read error.\n", __func__);        \
+            unload_user_data(dest);                                           \
+            ret = -EFAULT;                                                    \
+            break;                                                            \
+        }
+
+#define unload_user_data(dest)       \
+        if ((dest) != NULL)          \
+            hax_vfree((dest), size);
+
 static void handle_unknown_ioctl(dev_t dev, ulong cmd, struct proc *p);
 
 static struct vcpu_t * get_vcpu_by_dev(dev_t dev) {
@@ -243,6 +277,14 @@ static int hax_vcpu_ioctl(dev_t dev, ulong cmd, caddr_t data, int flag,
             vcpu_debug(cvcpu, hax_debug);
             break;
         }
+        case HAX_VCPU_IOCTL_SET_CPUID: {
+            struct hax_cpuid *cpuid;
+            load_user_data(cpuid, data, total, HAX_MAX_CPUID_ENTRIES, hax_cpuid,
+                           hax_cpuid_entry);
+            ret = vcpu_set_cpuid(cvcpu, cpuid);
+            unload_user_data(cpuid);
+            break;
+        }
         default: {
             handle_unknown_ioctl(dev, cmd, p);
             ret = -ENOSYS;
diff --git a/platforms/linux/components.c b/platforms/linux/components.c
index 0f4140b9..a9adcd96 100644
--- a/platforms/linux/components.c
+++ b/platforms/linux/components.c
@@ -42,6 +42,40 @@
 #define HAX_VM_DEVFS_FMT    "hax_vm/vm%02d"
 #define HAX_VCPU_DEVFS_FMT  "hax_vm%02d/vcpu%02d"
 
+#define load_user_data(dest, src, body_len, body_max, arg_t, body_t)          \
+        arg_t __user *from = (arg_t __user *)(*(arg_t **)(src));              \
+        size_t size;                                                          \
+        arg_t header;                                                         \
+        (dest) = NULL;                                                        \
+        if (copy_from_user(&header, from, sizeof(arg_t))) {                   \
+            hax_log(HAX_LOGE, "%s: argument header read error.\n", __func__); \
+            ret = -EFAULT;                                                    \
+            break;                                                            \
+        }                                                                     \
+        if (header.body_len > (body_max)) {                                   \
+            hax_log(HAX_LOGW, "%s: %d exceeds argument body maximum %d.\n",   \
+                    __func__, header.body_len, (body_max));                   \
+            ret = -E2BIG;                                                     \
+            break;                                                            \
+        }                                                                     \
+        size = sizeof(arg_t) + header.body_len * sizeof(body_t);              \
+        (dest) = hax_vmalloc(size, HAX_MEM_NONPAGE);                          \
+        if ((dest) == NULL) {                                                 \
+            hax_log(HAX_LOGE, "%s: failed to allocate memory.\n", __func__);  \
+            ret = -ENOMEM;                                                    \
+            break;                                                            \
+        }                                                                     \
+        if (copy_from_user((dest), from, size)) {                             \
+            hax_log(HAX_LOGE, "%s: argument read error.\n", __func__);        \
+            unload_user_data(dest);                                           \
+            ret = -EFAULT;                                                    \
+            break;                                                            \
+        }
+
+#define unload_user_data(dest)         \
+        if ((dest) != NULL)            \
+            hax_vfree((dest), size);
+
 typedef struct hax_vm_linux_t {
     struct vm_t *cvm;
     int id;
@@ -445,6 +479,14 @@ static long hax_vcpu_ioctl(struct file *filp, unsigned int cmd,
         vcpu_debug(cvcpu, &hax_debug);
         break;
     }
+    case HAX_VCPU_IOCTL_SET_CPUID: {
+        struct hax_cpuid *cpuid;
+        load_user_data(cpuid, argp, total, HAX_MAX_CPUID_ENTRIES, hax_cpuid,
+                       hax_cpuid_entry);
+        ret = vcpu_set_cpuid(cvcpu, cpuid);
+        unload_user_data(cpuid);
+        break;
+    }
     default:
         // TODO: Print information about the process that sent the ioctl.
         hax_log(HAX_LOGE, "Unknown VCPU IOCTL 0x%lx\n", cmd);
diff --git a/platforms/windows/hax_entry.c b/platforms/windows/hax_entry.c
index 2d184b6b..16167d14 100644
--- a/platforms/windows/hax_entry.c
+++ b/platforms/windows/hax_entry.c
@@ -431,6 +431,19 @@ NTSTATUS HaxVcpuControl(PDEVICE_OBJECT DeviceObject,
             vcpu_debug(cvcpu, (struct hax_debug_t*)inBuf);
             break;
         }
+        case HAX_VCPU_IOCTL_SET_CPUID: {
+            hax_cpuid *cpuid = (hax_cpuid *)inBuf;
+            if (inBufLength < sizeof(hax_cpuid) || inBufLength <
+                    sizeof(hax_cpuid) + cpuid->total *
+                    sizeof(hax_cpuid_entry)) {
+                ret = STATUS_INVALID_PARAMETER;
+                goto done;
+            }
+            if (vcpu_set_cpuid(cvcpu, cpuid)) {
+                ret = STATUS_UNSUCCESSFUL;
+            }
+            break;
+        }
         default:
             hax_log(HAX_LOGE, "Unknow vcpu ioctl %lx\n",
                     irpSp->Parameters.DeviceIoControl.IoControlCode);
diff --git a/platforms/windows/hax_entry.h b/platforms/windows/hax_entry.h
index 52613f7d..4659fe32 100644
--- a/platforms/windows/hax_entry.h
+++ b/platforms/windows/hax_entry.h
@@ -165,5 +165,7 @@ extern PDRIVER_OBJECT HaxDriverObject;
 
 #define HAX_IOCTL_VCPU_DEBUG \
         CTL_CODE(HAX_DEVICE_TYPE, 0x916, METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_SET_CPUID \
+        CTL_CODE(HAX_DEVICE_TYPE, 0x917, METHOD_BUFFERED, FILE_ANY_ACCESS)
 
 #endif // HAX_WINDOWS_HAX_ENTRY_H_