forked from LuaJIT/LuaJIT
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Disable FMA by default. Use -Ofma or jit.opt.start("+fma") to enable.
See the discussion in the corresponding ticket for the rationale. (cherry picked from commit de2e1ca) For the modulo operation, the arm64 VM uses `fmsub` [1] instruction, which is the fused multiply-add (FMA [2]) operation (more precisely, multiply-sub). Hence, it may produce different results compared to the unfused one. This patch fixes the behaviour by using the unfused instructions by default. However, the new JIT optimization flag (fma) is introduced to make it possible to take advantage of the FMA optimizations. Sergey Kaplun: * added the description and the test for the problem [1]: https://developer.arm.com/documentation/dui0801/g/A64-Floating-point-Instructions/FMSUB [2]: https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation Part of tarantool/tarantool#10709 Reviewed-by: Sergey Bronnikov <[email protected]> Signed-off-by: Sergey Kaplun <[email protected]> (cherry picked from commit 58b013a)
- Loading branch information
Showing
10 changed files
with
151 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
test/tarantool-tests/lj-918-fma-numerical-accuracy-jit.test.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
local tap = require('tap') | ||
|
||
-- Test file to demonstrate consistent behaviour for JIT and the | ||
-- VM regarding FMA optimization (disabled by default). | ||
-- XXX: The VM behaviour is checked in the | ||
-- <lj-918-fma-numerical-accuracy.test.lua>. | ||
-- See also: https://github.com/LuaJIT/LuaJIT/issues/918. | ||
local test = tap.test('lj-918-fma-numerical-accuracy-jit'):skipcond({ | ||
['Test requires JIT enabled'] = not jit.status(), | ||
}) | ||
|
||
test:plan(1) | ||
|
||
local _2pow52 = 2 ^ 52 | ||
|
||
-- XXX: Before this commit the LuaJIT arm64 VM uses `fmsub` [1] | ||
-- instruction for the modulo operation, which is the fused | ||
-- multiply-add (FMA [2]) operation (more precisely, | ||
-- multiply-sub). Hence, it may produce different results compared | ||
-- to the unfused one. For the test, let's just use 2 numbers in | ||
-- modulo for which the single rounding is different from the | ||
-- double rounding. The numbers from the original issue are good | ||
-- enough. | ||
-- | ||
-- [1]:https://developer.arm.com/documentation/dui0801/g/A64-Floating-point-Instructions/FMSUB | ||
-- [2]:https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation | ||
-- | ||
-- IEEE754 components to double: | ||
-- sign * (2 ^ (exp - 1023)) * (mantissa / _2pow52 + normal). | ||
local a = 1 * (2 ^ (1083 - 1023)) * (4080546448249347 / _2pow52 + 1) | ||
assert(a == 2197541395358679800) | ||
|
||
local b = -1 * (2 ^ (1052 - 1023)) * (3927497732209973 / _2pow52 + 1) | ||
assert(b == -1005065126.3690554) | ||
|
||
local results = {} | ||
|
||
jit.opt.start('hotloop=1') | ||
for i = 1, 4 do | ||
results[i] = a % b | ||
end | ||
|
||
-- XXX: The test doesn't fail before this commit. But it is | ||
-- required to be sure that there are no inconsistencies after the | ||
-- commit. | ||
test:samevalues(results, 'consistent behaviour between the JIT and the VM') | ||
|
||
test:done(true) |
43 changes: 43 additions & 0 deletions
43
test/tarantool-tests/lj-918-fma-numerical-accuracy.test.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
local tap = require('tap') | ||
|
||
-- Test file to demonstrate possible numerical inaccuracy if FMA | ||
-- optimization takes place. | ||
-- XXX: The JIT consistency is checked in the | ||
-- <lj-918-fma-numerical-accuracy-jit.test.lua>. | ||
-- See also: https://github.com/LuaJIT/LuaJIT/issues/918. | ||
local test = tap.test('lj-918-fma-numerical-accuracy') | ||
|
||
test:plan(2) | ||
|
||
local _2pow52 = 2 ^ 52 | ||
|
||
-- XXX: Before this commit the LuaJIT arm64 VM uses `fmsub` [1] | ||
-- instruction for the modulo operation, which is the fused | ||
-- multiply-add (FMA [2]) operation (more precisely, | ||
-- multiply-sub). Hence, it may produce different results compared | ||
-- to the unfused one. For the test, let's just use 2 numbers in | ||
-- modulo for which the single rounding is different from the | ||
-- double rounding. The numbers from the original issue are good | ||
-- enough. | ||
-- | ||
-- [1]:https://developer.arm.com/documentation/dui0801/g/A64-Floating-point-Instructions/FMSUB | ||
-- [2]:https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation | ||
-- | ||
-- IEEE754 components to double: | ||
-- sign * (2 ^ (exp - 1023)) * (mantissa / _2pow52 + normal). | ||
local a = 1 * (2 ^ (1083 - 1023)) * (4080546448249347 / _2pow52 + 1) | ||
assert(a == 2197541395358679800) | ||
|
||
local b = -1 * (2 ^ (1052 - 1023)) * (3927497732209973 / _2pow52 + 1) | ||
assert(b == -1005065126.3690554) | ||
|
||
-- These tests fail on ARM64 before this patch or with FMA | ||
-- optimization enabled. | ||
-- The first test may not fail if the compiler doesn't generate | ||
-- an ARM64 FMA operation in `lj_vm_foldarith()`. | ||
test:is(2197541395358679800 % -1005065126.3690554, -606337536, | ||
'FMA in the lj_vm_foldarith() during parsing') | ||
|
||
test:is(a % b, -606337536, 'FMA in the VM') | ||
|
||
test:done(true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
local tap = require('tap') | ||
local test = tap.test('lj-918-fma-optimization'):skipcond({ | ||
['Test requires JIT enabled'] = not jit.status(), | ||
}) | ||
|
||
test:plan(3) | ||
|
||
local function jit_opt_is_on(flag) | ||
for _, opt in ipairs({jit.status()}) do | ||
if opt == flag then | ||
return true | ||
end | ||
end | ||
return false | ||
end | ||
|
||
test:ok(not jit_opt_is_on('fma'), 'FMA is disabled by default') | ||
|
||
local ok, _ = pcall(jit.opt.start, '+fma') | ||
|
||
test:ok(ok, 'fma flag is recognized') | ||
|
||
test:ok(jit_opt_is_on('fma'), 'FMA is enabled after jit.opt.start()') | ||
|
||
test:done(true) |