From 61b0c908b864d6c357552c51491e889ea2a0d2e3 Mon Sep 17 00:00:00 2001 From: Brendan Fletcher Date: Wed, 11 Dec 2024 19:16:41 -0500 Subject: [PATCH] ARM: implement multiplication carry flag algorithm --- src/nba/src/arm/handlers/arithmetic.inl | 106 +++++++++++++++++++++++- src/nba/src/arm/handlers/handler16.inl | 12 ++- src/nba/src/arm/handlers/handler32.inl | 33 ++++++-- 3 files changed, 138 insertions(+), 13 deletions(-) diff --git a/src/nba/src/arm/handlers/arithmetic.inl b/src/nba/src/arm/handlers/arithmetic.inl index 749c53ce..e2eb68e2 100644 --- a/src/nba/src/arm/handlers/arithmetic.inl +++ b/src/nba/src/arm/handlers/arithmetic.inl @@ -3,6 +3,18 @@ * * Licensed under GPLv3 or any later version. * Refer to the included LICENSE file. + * + * Multiplication carry flag algorithm has been altered from its original form according to its GPL-compatible license, as follows: + * + * Copyright (C) 2024 zaydlang, calc84maniac + * + * This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. */ void SetZeroAndSignFlag(u32 value) { @@ -11,7 +23,7 @@ void SetZeroAndSignFlag(u32 value) { } template -void TickMultiply(u32 multiplier) { +bool TickMultiply(u32 multiplier) { u32 mask = 0xFFFFFF00; bus.Idle(); @@ -28,6 +40,98 @@ void TickMultiply(u32 multiplier) { mask <<= 8; bus.Idle(); } + + // Return true if full ticks used. + return mask == 0; +} + +bool MultiplyCarrySimple(u32 multiplier) { + // Carry comes directly from final injected booth carry bit. + // Final booth addend is negative only if upper 2 bits are 10. + return (multiplier >> 30) == 2; +} + +bool MultiplyCarryLo(u32 multiplicand, u32 multiplier, u32 accum = 0) { + // Set low bit of multiplicand to cause negation to invert the upper bits. + // This bit cannot propagate to the resulting carry bit. + multiplicand |= 1; + + // Optimized first iteration. + u32 booth = (s32)(multiplier << 31) >> 31; + u32 carry = multiplicand * booth; + u32 sum = carry + accum; + + int shift = 29; + do { + // Process 8 multiplier bits using 4 booth iterations. + for (int i = 0; i < 4; i++, shift -= 2) { + // Get next booth factor (-2 to 2, shifted left by 30-shift). + u32 next_booth = (s32)(multiplier << shift) >> shift; + u32 factor = next_booth - booth; + booth = next_booth; + // Get scaled value of booth addend. + u32 addend = multiplicand * factor; + // Accumulate addend with carry-save add. + accum ^= carry ^ addend; + sum += addend; + carry = sum - accum; + } + } while (booth != multiplier); + + // Carry flag comes from bit 31 of carry-save adder's final carry. + return carry >> 31; +} + +template +bool MultiplyCarryHi(u32 multiplicand, u32 multiplier, u32 accum_hi = 0) { + // Only last 3 booth iterations are relevant to output carry. + // Reduce scale of both inputs to get upper bits of 64-bit booth addends + // in upper bits of 32-bit values, while handling sign extension. + if (sign_extend) { + multiplicand = (s32)multiplicand >> 6; + multiplier = (s32)multiplier >> 26; + } else { + multiplicand >>= 6; + multiplier >>= 26; + } + // Set low bit of multiplicand to cause negation to invert the upper bits. + // This bit cannot propagate to the resulting carry bit. + multiplicand |= 1; + + // Pre-populate magic bit 61 for carry. + u32 carry = ~accum_hi & 0x20000000; + // Pre-populate magic bits 63-60 for accum (with carry magic pre-added). + u32 accum = accum_hi - 0x08000000; + + // Get factors for last 3 booth iterations. + u32 booth0 = (s32)(multiplier << 27) >> 27; + u32 booth1 = (s32)(multiplier << 29) >> 29; + u32 booth2 = (s32)(multiplier << 31) >> 31; + u32 factor0 = multiplier - booth0; + u32 factor1 = booth0 - booth1; + u32 factor2 = booth1 - booth2; + + // Get scaled value of 3rd-last booth addend. + u32 addend = multiplicand * factor2; + // Finalize bits 61-60 of accum magic using its sign. + accum -= addend & 0x10000000; + // Get scaled value of 2nd-last booth addend. + addend = multiplicand * factor1; + // Finalize bits 63-62 of accum magic using its sign. + accum -= addend & 0x40000000; + + // Get carry from carry-save add in bit 61 and propagate it to bit 62. + u32 sum = accum + (addend & 0x20000000); + // Subtract out carry magic to get actual accum magic. + accum -= carry; + + // Get scaled value of last booth addend. + addend = multiplicand * factor0; + // Add to bit 62 and propagate carry. + sum += addend & 0x40000000; + + // Cancel out accum magic bit 63 to get carry bit 63. + return (sum ^ accum) >> 31; } u32 ADD(u32 op1, u32 op2, bool set_flags) { diff --git a/src/nba/src/arm/handlers/handler16.inl b/src/nba/src/arm/handlers/handler16.inl index 8862f070..19b3bf0e 100644 --- a/src/nba/src/arm/handlers/handler16.inl +++ b/src/nba/src/arm/handlers/handler16.inl @@ -183,12 +183,18 @@ void Thumb_ALU(u16 instruction) { break; } case ThumbDataOp::MUL: { - TickMultiply(state.reg[dst]); + u32 lhs = state.reg[src]; + u32 rhs = state.reg[dst]; + bool full = TickMultiply(rhs); pipe.access = Access::Code | Access::Nonsequential; - state.reg[dst] *= state.reg[src]; + state.reg[dst] = lhs * rhs; SetZeroAndSignFlag(state.reg[dst]); - state.cpsr.f.c = 0; + if (full) { + state.cpsr.f.c = MultiplyCarrySimple(rhs); + } else { + state.cpsr.f.c = MultiplyCarryLo(lhs, rhs); + } break; } case ThumbDataOp::BIC: { diff --git a/src/nba/src/arm/handlers/handler32.inl b/src/nba/src/arm/handlers/handler32.inl index 79afe774..b99d00d9 100644 --- a/src/nba/src/arm/handlers/handler32.inl +++ b/src/nba/src/arm/handlers/handler32.inl @@ -247,15 +247,22 @@ void ARM_Multiply(u32 instruction) { auto rhs = GetReg(op2); auto result = lhs * rhs; - TickMultiply(rhs); + bool full = TickMultiply(rhs); + u32 accum = 0; if (accumulate) { - result += GetReg(op3); + accum = GetReg(op3); + result += accum; bus.Idle(); } if (set_flags) { SetZeroAndSignFlag(result); + if (full) { + state.cpsr.f.c = MultiplyCarrySimple(rhs); + } else { + state.cpsr.f.c = MultiplyCarryLo(lhs, rhs, accum); + } } SetReg(dst, result); @@ -273,7 +280,7 @@ void ARM_MultiplyLong(u32 instruction) { int dst_lo = (instruction >> 12) & 0xF; int dst_hi = (instruction >> 16) & 0xF; - s64 result; + u64 result; pipe.access = Access::Code | Access::Nonsequential; state.r15 += 4; @@ -284,18 +291,21 @@ void ARM_MultiplyLong(u32 instruction) { if (sign_extend) { result = s64(s32(lhs)) * s64(s32(rhs)); } else { - result = s64(u64(lhs) * u64(rhs)); + result = u64(lhs) * u64(rhs); } - TickMultiply(rhs); + bool full = TickMultiply(rhs); bus.Idle(); + u32 accum_lo = 0; + u32 accum_hi = 0; if (accumulate) { - s64 value = GetReg(dst_hi); + accum_lo = GetReg(dst_lo); + accum_hi = GetReg(dst_hi); - value <<= 16; - value <<= 16; - value |= GetReg(dst_lo); + u64 value = accum_hi; + value <<= 32; + value |= accum_lo; result += value; bus.Idle(); @@ -306,6 +316,11 @@ void ARM_MultiplyLong(u32 instruction) { if (set_flags) { state.cpsr.f.n = result_hi >> 31; state.cpsr.f.z = result == 0; + if (full) { + state.cpsr.f.c = MultiplyCarryHi(lhs, rhs, accum_hi); + } else { + state.cpsr.f.c = MultiplyCarryLo(lhs, rhs, accum_lo); + } } SetReg(dst_lo, result & 0xFFFFFFFF);