diff --git a/definitions/src/generate_asm_constants.rs b/definitions/src/generate_asm_constants.rs
index 29a135a5..f582bd6f 100644
--- a/definitions/src/generate_asm_constants.rs
+++ b/definitions/src/generate_asm_constants.rs
@@ -150,6 +150,10 @@ fn main() {
         "#define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_LOAD_RESERVATION_ADDRESS {}",
         (&m.load_reservation_address as *const u64 as usize) - m_address
     );
+    println!(
+        "#define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_VERSION {}",
+        (&m.version as *const u32 as usize) - m_address
+    );
     println!(
         "#define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_MEMORY_SIZE {}",
         (&m.memory_size as *const u64 as usize) - m_address
diff --git a/src/machine/asm/cdefinitions_generated.h b/src/machine/asm/cdefinitions_generated.h
index c006ce62..3d6e5156 100644
--- a/src/machine/asm/cdefinitions_generated.h
+++ b/src/machine/asm/cdefinitions_generated.h
@@ -44,6 +44,7 @@
 #define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_CHAOS_MODE 296
 #define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_CHAOS_SEED 300
 #define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_LOAD_RESERVATION_ADDRESS 304
+#define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_VERSION 316
 #define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_MEMORY_SIZE 320
 #define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_FRAMES_SIZE 328
 #define CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_FLAGS_SIZE 336
diff --git a/src/machine/asm/execute_aarch64.S b/src/machine/asm/execute_aarch64.S
index 1d2b36c8..69776f11 100644
--- a/src/machine/asm/execute_aarch64.S
+++ b/src/machine/asm/execute_aarch64.S
@@ -87,6 +87,16 @@
 #define WRITE_RS3(v) \
   str v, REGISTER_ADDRESS(RS3)
 
+/*
+ * This is added to replicate a bug in x64 assembly VM
+ */
+#define LOAD_PC(reg1, reg1w, reg2, reg2w, temp_regw) \
+  ldr reg1, [MACHINE, CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_PC] SEP \
+  ubfx reg2, reg1, 0, 32 SEP \
+  ldrb temp_regw, [MACHINE, CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_VERSION] SEP \
+  cmp temp_regw, 2 SEP \
+  csel reg2, reg2, reg1, lo
+
 #define NEXT_INST \
   ldr TEMP1, [INST_ARGS] SEP \
   add INST_ARGS, INST_ARGS, 8 SEP \
@@ -305,8 +315,7 @@ ckb_vm_x64_execute:
   add MEMORY_OFFSET_ADDRESS, MEMORY_OFFSET_ADDRESS, CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_MEMORY_L
 
 .CKB_VM_ASM_LABEL_OP_CUSTOM_TRACE_END:
-  ldr TEMP2, [MACHINE, CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_PC]
-  mov TEMP3, TEMP2
+  LOAD_PC(TEMP2, TEMP2w, TEMP3, TEMP3w, TEMP4w)
   lsr TEMP2, TEMP2, 2
   and TEMP2, TEMP2, 8191
   mov TEMP1, CKB_VM_ASM_TRACE_STRUCT_SIZE
@@ -346,8 +355,7 @@ ckb_vm_x64_execute:
   ldarb TEMP2w, [PAUSE]
   cmp TEMP2, ZERO_VALUE
   bne .exit_pause
-  ldr TEMP2, [MACHINE, CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_PC]
-  mov TEMP3, TEMP2
+  LOAD_PC(TEMP2, TEMP2w, TEMP3, TEMP3w, TEMP4w)
   lsr TEMP2, TEMP2, 2
   and TEMP2, TEMP2, 8191
   mov TEMP1, CKB_VM_ASM_TRACE_STRUCT_SIZE
diff --git a/src/machine/asm/execute_x64.S b/src/machine/asm/execute_x64.S
index 19cc35da..1d79fcbe 100644
--- a/src/machine/asm/execute_x64.S
+++ b/src/machine/asm/execute_x64.S
@@ -349,6 +349,19 @@
 #define WRITE_RS3(v) \
   movq v, REGISTER_ADDRESS(RS3); \
 
+/*
+ * This macro is added to cope with an implementation bug in the x64 assembly code,
+ * where address bigger than u32 limit will be truncated unexpectedly.
+ *
+ * Useful tip: https://stackoverflow.com/a/66416462
+ */
+#define LOAD_PC(reg1, reg1d, reg2, reg2d, temp_regd) \
+  movq PC_ADDRESS, reg1; \
+  mov reg1d, reg2d; \
+  movzbl CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_VERSION(MACHINE), temp_regd; \
+  cmp $2, temp_regd; \
+  cmovae reg1, reg2
+
 /*
  * Notice we could replace the last 3 instructions with the following:
  *
@@ -419,8 +432,7 @@ ckb_vm_x64_execute:
   movq CKB_VM_ASM_ASM_CORE_MACHINE_OFFSET_MEMORY_SIZE(MACHINE), MEMORY_SIZE
 .p2align 3
 .CKB_VM_ASM_LABEL_OP_CUSTOM_TRACE_END:
-  movq PC_ADDRESS, %rax
-  mov %eax, %ecx
+  LOAD_PC(%rax, %eax, %rcx, %ecx, TEMP3d)
   shr $2, %eax
   andq $8191, %rax
   imul $CKB_VM_ASM_TRACE_STRUCT_SIZE, %eax
@@ -440,7 +452,6 @@ ckb_vm_x64_execute:
   addq %rdx, PC_ADDRESS
   /* Prefetch trace info for the consecutive block */
   movq PC_ADDRESS, %rax
-  mov %eax, %ecx
   shr $2, %eax
   andq $8191, %rax
   imul $CKB_VM_ASM_TRACE_STRUCT_SIZE, %eax
@@ -453,8 +464,7 @@ ckb_vm_x64_execute:
   movzbl 0(PAUSE), %eax
   cmp $0, %rax
   jnz .exit_pause
-  movq PC_ADDRESS, %rax
-  mov %eax, %ecx
+  LOAD_PC(%rax, %eax, %rcx, %ecx, TEMP3d)
   shr $2, %eax
   andq $8191, %rax
   imul $CKB_VM_ASM_TRACE_STRUCT_SIZE, %eax
diff --git a/src/machine/trace.rs b/src/machine/trace.rs
index be25817a..8be3e94c 100644
--- a/src/machine/trace.rs
+++ b/src/machine/trace.rs
@@ -8,7 +8,7 @@ use super::{
         },
         Error,
     },
-    CoreMachine, DefaultMachine, Machine, SupportMachine,
+    CoreMachine, DefaultMachine, Machine, SupportMachine, VERSION2,
 };
 use bytes::Bytes;
 
@@ -147,7 +147,13 @@ impl<Inner: SupportMachine> TraceMachine<Inner> {
             }
             let pc = self.machine.pc().to_u64();
             let slot = calculate_slot(pc);
-            if pc != self.traces[slot].address || self.traces[slot].instruction_count == 0 {
+            // This is to replicate a bug in x64 VM
+            let address_match = if self.machine.version() < VERSION2 {
+                (pc as u32 as u64) == self.traces[slot].address
+            } else {
+                pc == self.traces[slot].address
+            };
+            if (!address_match) || self.traces[slot].instruction_count == 0 {
                 self.traces[slot] = Trace::default();
                 let mut current_pc = pc;
                 let mut i = 0;
diff --git a/tests/programs/asm_trace_bug b/tests/programs/asm_trace_bug
new file mode 100644
index 00000000..3b2f5031
Binary files /dev/null and b/tests/programs/asm_trace_bug differ
diff --git a/tests/test_versions.rs b/tests/test_versions.rs
index 74548dc6..40921bcd 100644
--- a/tests/test_versions.rs
+++ b/tests/test_versions.rs
@@ -1,10 +1,11 @@
 #![cfg(has_asm)]
+use ckb_vm::cost_model::constant_cycles;
 use ckb_vm::machine::asm::{AsmCoreMachine, AsmMachine};
-use ckb_vm::machine::{VERSION0, VERSION1};
+use ckb_vm::machine::{VERSION0, VERSION1, VERSION2};
 use ckb_vm::memory::{FLAG_DIRTY, FLAG_FREEZED};
 use ckb_vm::{
     CoreMachine, DefaultCoreMachine, DefaultMachine, DefaultMachineBuilder, Error, Memory,
-    SparseMemory, WXorXMemory, ISA_IMC, RISCV_PAGESIZE,
+    SparseMemory, TraceMachine, WXorXMemory, ISA_A, ISA_B, ISA_IMC, ISA_MOP, RISCV_PAGESIZE,
 };
 use std::fs;
 
@@ -331,3 +332,81 @@ pub fn test_asm_version1_cadd_hints() {
     assert!(result.is_ok());
     assert_eq!(result.unwrap(), 0);
 }
+
+#[test]
+pub fn test_asm_version1_asm_trace_bug() {
+    let buffer = fs::read("tests/programs/asm_trace_bug").unwrap().into();
+
+    let mut machine = {
+        let asm_core = AsmCoreMachine::new(ISA_IMC | ISA_A | ISA_B | ISA_MOP, VERSION1, 2000);
+        let machine = DefaultMachineBuilder::<Box<AsmCoreMachine>>::new(asm_core)
+            .instruction_cycle_func(Box::new(constant_cycles))
+            .build();
+        AsmMachine::new(machine)
+    };
+    machine.load_program(&buffer, &[]).unwrap();
+    let result = machine.run();
+
+    assert_eq!(result, Err(Error::CyclesExceeded));
+}
+
+#[test]
+pub fn test_asm_version2_asm_trace_bug() {
+    let buffer = fs::read("tests/programs/asm_trace_bug").unwrap().into();
+
+    let mut machine = {
+        let asm_core = AsmCoreMachine::new(ISA_IMC | ISA_A | ISA_B | ISA_MOP, VERSION2, 2000);
+        let machine = DefaultMachineBuilder::<Box<AsmCoreMachine>>::new(asm_core)
+            .instruction_cycle_func(Box::new(constant_cycles))
+            .build();
+        AsmMachine::new(machine)
+    };
+    machine.load_program(&buffer, &[]).unwrap();
+    let result = machine.run();
+
+    assert_eq!(result, Err(Error::MemOutOfBound));
+}
+
+#[test]
+pub fn test_trace_version1_asm_trace_bug() {
+    let buffer = fs::read("tests/programs/asm_trace_bug").unwrap().into();
+
+    let mut machine = {
+        let core_machine = DefaultCoreMachine::<u64, WXorXMemory<SparseMemory<u64>>>::new(
+            ISA_IMC | ISA_A | ISA_B | ISA_MOP,
+            VERSION1,
+            2000,
+        );
+        TraceMachine::new(
+            DefaultMachineBuilder::new(core_machine)
+                .instruction_cycle_func(Box::new(constant_cycles))
+                .build(),
+        )
+    };
+    machine.load_program(&buffer, &[]).unwrap();
+    let result = machine.run();
+
+    assert_eq!(result, Err(Error::CyclesExceeded));
+}
+
+#[test]
+pub fn test_trace_version2_asm_trace_bug() {
+    let buffer = fs::read("tests/programs/asm_trace_bug").unwrap().into();
+
+    let mut machine = {
+        let core_machine = DefaultCoreMachine::<u64, WXorXMemory<SparseMemory<u64>>>::new(
+            ISA_IMC | ISA_A | ISA_B | ISA_MOP,
+            VERSION2,
+            2000,
+        );
+        TraceMachine::new(
+            DefaultMachineBuilder::new(core_machine)
+                .instruction_cycle_func(Box::new(constant_cycles))
+                .build(),
+        )
+    };
+    machine.load_program(&buffer, &[]).unwrap();
+    let result = machine.run();
+
+    assert_eq!(result, Err(Error::MemOutOfBound));
+}