diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp
index 1d541285..44181f7b 100644
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@@ -589,7 +589,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
 
         code.movaps(tmp, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask));
         code.andps(tmp, result);
-        FCODE(ucomis)(result, code.MConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal));
+        FCODE(ucomis)(tmp, code.MConst(xword, fsize == 32 ? f32_smallest_normal : f64_smallest_normal));
         code.jz(fallback, code.T_NEAR);
         code.L(end);
 
diff --git a/tests/A64/a64.cpp b/tests/A64/a64.cpp
index e49767c7..c068ebf9 100644
--- a/tests/A64/a64.cpp
+++ b/tests/A64/a64.cpp
@@ -428,3 +428,25 @@ TEST_CASE("A64: FMLA.4S (0x80800000)", "[a64]") {
 
     REQUIRE(jit.GetVector(11) == Vector{0xc79b271e7fc00000, 0x7fc0000080000000});
 }
+
+// x64 has different rounding behaviour to AArch64.
+// AArch64 performs rounding after flushing-to-zero.
+// x64 performs rounding before flushing-to-zero.
+TEST_CASE("A64: FMADD (0x80800000)", "[a64]") {
+    A64TestEnv env;
+    Dynarmic::A64::Jit jit{Dynarmic::A64::UserConfig{&env}};
+
+    env.code_mem.emplace_back(0x1f0f7319); // FMADD S25, S24, S15, S28
+    env.code_mem.emplace_back(0x14000000); // B .
+
+    jit.SetPC(0);
+    jit.SetVector(24, {0x00800000, 0});
+    jit.SetVector(15, {0x0ba98d27, 0});
+    jit.SetVector(28, {0x80800000, 0});
+    jit.SetFpcr(0x01000000);
+
+    env.ticks_left = 2;
+    jit.Run();
+
+    REQUIRE(jit.GetVector(25) == Vector{0x80000000, 0});
+}