diff --git a/bochs/CHANGES b/bochs/CHANGES
index ebd9b17fc..e6557162e 100644
--- a/bochs/CHANGES
+++ b/bochs/CHANGES
@@ -10,7 +10,7 @@ Brief summary :
   ! Implemented Linear Address Separation (LASS) extension
   ! Implemented User-Level Interrupt (UINTR) extension
   ! Implemented recently published Intel instruction sets:
-    - MOVDIRI, AVX512 BF16, AVX IFMA52, VNNI-INT8, VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3, SM4, SHA512, WRMSRNS, WAITPKG, SERIALIZE
+    - MOVDIRI/MOVDIR64B, AVX512 BF16, AVX IFMA52, VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3, SM4, SHA512, WRMSRNS, WAITPKG, SERIALIZE
   - Improved 64-bit guest support in Bochs internal debugger, added new internal debugger commands
   - Bochs debugger enhanced with new commands (setpmem, loadmem, deref, ...)
     Enhanced magic breakpoint capabilities. Refer to user documentation for more details.
@@ -34,7 +34,7 @@ Detailed change log :
   - Implemented Linear Address Separation (LASS) extension
   - Implemented User-Level Interrupt (UINTR) extension
   - Implemented recently published Intel instruction sets:
-    - MOVDIRI, AVX512 BF16, AVX IFMA52, VNNI-INT8, VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3, SM4, SHA512, WRMSRNS, WAITPKG, SERIALIZE
+    - MOVDIRI/MOVDIR64B, AVX512 BF16, AVX IFMA52, VNNI-INT8/VNNI-INT16, AVX-NE-CONVERT, CMPCCXADD, SM3, SM4, SHA512, WRMSRNS, WAITPKG, SERIALIZE
 
 - Bochs Debugger and Instrumentation
   - Updated Bochs instrumentation examples for new disassembler introduced in Bochs 2.7 release.
diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h
index 0274e3992..42344229c 100644
--- a/bochs/cpu/cpu.h
+++ b/bochs/cpu/cpu.h
@@ -2624,6 +2624,8 @@ public: // for now...
 #endif
   /* CET instructions */
 
+  BX_SMF void MOVDIR64B(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
+
 #if BX_SUPPORT_AVX
   /* AVX */
   BX_SMF void VZEROUPPER(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
diff --git a/bochs/cpu/cpudb/intel/tigerlake.cc b/bochs/cpu/cpudb/intel/tigerlake.cc
index 3adbd95aa..cd25e071e 100644
--- a/bochs/cpu/cpudb/intel/tigerlake.cc
+++ b/bochs/cpu/cpudb/intel/tigerlake.cc
@@ -133,6 +133,7 @@ tigerlake_t::tigerlake_t(BX_CPU_C *cpu):
   enable_cpu_extension(BX_ISA_UMIP);
   enable_cpu_extension(BX_ISA_RDPID);
   enable_cpu_extension(BX_ISA_MOVDIRI);
+  enable_cpu_extension(BX_ISA_MOVDIR64B);
   enable_cpu_extension(BX_ISA_SCA_MITIGATIONS);
 }
 
@@ -548,7 +549,7 @@ void tigerlake_t::get_std_cpuid_leaf_7(Bit32u subfunction, cpuid_function_t *lea
     //   [25:25] CLDEMOTE: CLDEMOTE instruction support
     //   [26:26] reserved
     // * [27:27] MOVDIRI: MOVDIRI instruction support
-    // ! [28:28] MOVDIRI64: MOVDIRI64 instruction support (not implemented yet)
+    // * [28:28] MOVDIRI64: MOVDIRI64 instruction support
     //   [29:29] ENQCMD: Enqueue Stores support
     //   [30:30] SGX_LC: SGX Launch Configuration
     //   [31:31] PKS: Protection keys for supervisor-mode pages
diff --git a/bochs/cpu/cpuid.cc b/bochs/cpu/cpuid.cc
index 897af8abf..e9027c7ce 100644
--- a/bochs/cpu/cpuid.cc
+++ b/bochs/cpu/cpuid.cc
@@ -1040,7 +1040,10 @@ Bit32u bx_cpuid_t::get_std_cpuid_leaf_7_ecx(Bit32u extra) const
   if (is_cpu_extension_supported(BX_ISA_MOVDIRI))
     ecx |= BX_CPUID_STD7_SUBLEAF0_ECX_MOVDIRI;
 
-  // [28:28] MOVDIR64B: MOVDIR64B instruction support - not supported
+  // [28:28] MOVDIR64: MOVDIR64 instruction support
+  if (is_cpu_extension_supported(BX_ISA_MOVDIR64B))
+    ecx |= BX_CPUID_STD7_SUBLEAF0_ECX_MOVDIR64B;
+
   // [29:29] ENQCMD: Enqueue Stores support - not supported
   // [30:30] SGX_LC: SGX Launch Configuration - not supported
 
diff --git a/bochs/cpu/decoder/features.h b/bochs/cpu/decoder/features.h
index 794fd3adc..d86064966 100644
--- a/bochs/cpu/decoder/features.h
+++ b/bochs/cpu/decoder/features.h
@@ -142,3 +142,4 @@ x86_feature(BX_ISA_SERIALIZE, "serialize")                              /* SERIA
 x86_feature(BX_ISA_LASS, "lass")                                        /* Linear Address Space Separation support */
 x86_feature(BX_ISA_UINTR, "uintr")                                      /* User Level Interrupts support */
 x86_feature(BX_ISA_MOVDIRI, "movdiri")                                  /* MOVDIRI instruction support */
+x86_feature(BX_ISA_MOVDIR64B, "movdir64b")                              /* MOVDIR64B instruction support */
diff --git a/bochs/cpu/decoder/fetchdecode32.cc b/bochs/cpu/decoder/fetchdecode32.cc
index f43c19311..095b05b73 100644
--- a/bochs/cpu/decoder/fetchdecode32.cc
+++ b/bochs/cpu/decoder/fetchdecode32.cc
@@ -890,7 +890,7 @@ static BxOpcodeDecodeDescriptor32 decode32_descriptor[] =
 #endif
    /* 0F 38 F6 */ { &decoder32_modrm, BxOpcodeTable0F38F6 },
    /* 0F 38 F7 */ { &decoder_ud32, NULL },
-   /* 0F 38 F8 */ { &decoder_ud32, NULL },
+   /* 0F 38 F8 */ { &decoder32_modrm, BxOpcodeTable0F38F8 },
    /* 0F 38 F9 */ { &decoder32_modrm, BxOpcodeTable0F38F9 },
    /* 0F 38 FA */ { &decoder_ud32, NULL },
    /* 0F 38 FB */ { &decoder_ud32, NULL },
diff --git a/bochs/cpu/decoder/fetchdecode64.cc b/bochs/cpu/decoder/fetchdecode64.cc
index 264f644fd..b36af906d 100644
--- a/bochs/cpu/decoder/fetchdecode64.cc
+++ b/bochs/cpu/decoder/fetchdecode64.cc
@@ -902,7 +902,7 @@ static BxOpcodeDecodeDescriptor64 decode64_descriptor[] =
 #endif
    /* 0F 38 F6 */ { &decoder64_modrm, BxOpcodeTable0F38F6 },
    /* 0F 38 F7 */ { &decoder_ud64, NULL },
-   /* 0F 38 F8 */ { &decoder_ud64, NULL },
+   /* 0F 38 F8 */ { &decoder64_modrm, BxOpcodeTable0F38F8 },
    /* 0F 38 F9 */ { &decoder64_modrm, BxOpcodeTable0F38F9 },
    /* 0F 38 FA */ { &decoder_ud64, NULL },
    /* 0F 38 FB */ { &decoder_ud64, NULL },
diff --git a/bochs/cpu/decoder/fetchdecode_opmap_0f38.h b/bochs/cpu/decoder/fetchdecode_opmap_0f38.h
index 8cb3181a8..47fac51cb 100644
--- a/bochs/cpu/decoder/fetchdecode_opmap_0f38.h
+++ b/bochs/cpu/decoder/fetchdecode_opmap_0f38.h
@@ -215,6 +215,14 @@ static const Bit64u BxOpcodeTable0F38F6[] = {
   last_opcode(ATTR_SSE_PREFIX_F3, BX_IA_ADOX_GdEd)
 };
 
+// opcode 0F 38 F8
+static const Bit64u BxOpcodeTable0F38F8[] = {
+#if BX_SUPPORT_X86_64
+  form_opcode(ATTR_OS64 | ATTR_MOD_MEM | ATTR_SSE_PREFIX_66, BX_IA_MOVDIR64B_GqMdq),
+#endif
+  last_opcode(            ATTR_MOD_MEM | ATTR_SSE_PREFIX_66, BX_IA_MOVDIR64B_GdMdq)
+};
+
 // opcode 0F 38 F9
 static const Bit64u BxOpcodeTable0F38F9[] = {
 #if BX_SUPPORT_X86_64
diff --git a/bochs/cpu/decoder/ia_opcodes.def b/bochs/cpu/decoder/ia_opcodes.def
index 6c1d2d2d0..8e6789e3e 100644
--- a/bochs/cpu/decoder/ia_opcodes.def
+++ b/bochs/cpu/decoder/ia_opcodes.def
@@ -2745,6 +2745,12 @@ bx_define_opcode(BX_IA_MOVDIRI_MdGd, "movdiri", "movdiril", &BX_CPU_C::MOV32_EdG
 bx_define_opcode(BX_IA_MOVDIRI_MqGq, "movdiri", "movdiriq", &BX_CPU_C::MOV_EqGqM, NULL, BX_ISA_MOVDIRI, OP_Mq, OP_Gq, OP_NONE, OP_NONE, 0)
 #endif
 
+// MOVDIR64B
+bx_define_opcode(BX_IA_MOVDIR64B_GdMdq, "movdir64b", "movdir64b", &BX_CPU_C::MOVDIR64B, NULL, BX_ISA_MOVDIR64B, OP_Gd, OP_M, OP_NONE, OP_NONE, 0)
+#if BX_SUPPORT_X86_64
+bx_define_opcode(BX_IA_MOVDIR64B_GqMdq, "movdir64b", "movdir64b", &BX_CPU_C::MOVDIR64B, NULL, BX_ISA_MOVDIR64B, OP_Gq, OP_M, OP_NONE, OP_NONE, 0)
+#endif
+
 #if BX_SUPPORT_AVX
 bx_define_opcode(BX_IA_KADDW_KGwKHwKEw, "kaddw", "kaddw", &BX_CPU_C::BxError, &BX_CPU_C::KADDW_KGwKHwKEwR, BX_ISA_AVX512_DQ, OP_KGw, OP_KHw, OP_KEw, OP_NONE, BX_PREPARE_OPMASK)
 bx_define_opcode(BX_IA_KADDQ_KGqKHqKEq, "kaddq", "kaddq", &BX_CPU_C::BxError, &BX_CPU_C::KADDQ_KGqKHqKEqR, BX_ISA_AVX512_BW, OP_KGq, OP_KHq, OP_KEq, OP_NONE, BX_PREPARE_OPMASK)
diff --git a/bochs/cpu/proc_ctrl.cc b/bochs/cpu/proc_ctrl.cc
index 7d904d22b..d34a4da62 100644
--- a/bochs/cpu/proc_ctrl.cc
+++ b/bochs/cpu/proc_ctrl.cc
@@ -322,6 +322,26 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::CLZERO(bxInstruction_c *i)
   BX_NEXT_INSTR(i);
 }
 
+void BX_CPP_AttrRegparmN(1) BX_CPU_C::MOVDIR64B(bxInstruction_c *i)
+{
+#if BX_CPU_LEVEL >= 6
+
+#if BX_SUPPORT_X86_64
+  bx_address src_eaddr = BX_READ_64BIT_REG(i->dst()) & i->asize_mask();
+#else
+  bx_address src_eaddr = BX_READ_32BIT_REG(i->dst()) & i->asize_mask();
+#endif
+
+  BxPackedZmmRegister zmm; // zmm is always made available even if EVEX is not compiled in
+  read_virtual_zmmword(BX_SEG_REG_ES, src_eaddr, &zmm);
+
+  bx_address dst_eaddr = BX_CPU_RESOLVE_ADDR(i);
+  write_virtual_zmmword_aligned(i->seg(), dst_eaddr, &zmm);
+#endif
+
+  BX_NEXT_INSTR(i);
+}
+
 void BX_CPU_C::handleCpuModeChange(void)
 {
   unsigned mode = BX_CPU_THIS_PTR cpu_mode;