diff --git a/CMakeLists.txt b/CMakeLists.txt index 2227730f6..e119ec401 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,6 +130,7 @@ option(BUILD_NUMBER_AREALS "Set to ON to build static areal tests" option(BUILD_NUMBER_UNUM1S "Set to ON to build static unum type 1 tests" OFF) option(BUILD_NUMBER_UNUM2S "Set to ON to build static unum type 2 tests" OFF) option(BUILD_NUMBER_POSITS "Set to ON to build static unum type 3 posit tests" OFF) +option(BUILD_NUMBER_POSITOS "Set to ON to build static unum type 3 posito tests" OFF) option(BUILD_NUMBER_VALIDS "Set to ON to build static unum type 3 valid tests" OFF) option(BUILD_NUMBER_LNS "Set to ON to build static lns tests" OFF) option(BUILD_NUMBER_DBNS "Set to ON to build static dbns tests" OFF) @@ -639,6 +640,7 @@ if(BUILD_NUMBER_STATICS) set(BUILD_NUMBER_UNUM1S ON) set(BUILD_NUMBER_UNUM2S ON) set(BUILD_NUMBER_POSITS ON) + set(BUILD_NUMBER_POSITOS ON) set(BUILD_NUMBER_VALIDS ON) set(BUILD_NUMBER_LNS ON) set(BUILD_NUMBER_DBNS ON) @@ -763,6 +765,10 @@ add_subdirectory("static/posit/specialized") add_subdirectory("static/posit2") endif(BUILD_NUMBER_POSITS) +if(BUILD_NUMBER_POSITOS) +add_subdirectory("static/posito") +endif(BUILD_NUMBER_POSITOS) + if(BUILD_NUMBER_VALIDS) add_subdirectory("static/valid") endif(BUILD_NUMBER_VALIDS) diff --git a/include/universal/number/posit/manipulators.hpp b/include/universal/number/posit/manipulators.hpp index 0d3f1b18d..cba580804 100644 --- a/include/universal/number/posit/manipulators.hpp +++ b/include/universal/number/posit/manipulators.hpp @@ -24,7 +24,7 @@ namespace sw { namespace universal { return str.str(); } - // Generate a type field descriptor for this cfloat + // Generate a type field descriptor for this posit template, bool> = true > @@ -118,7 +118,7 @@ std::string component_values_to_string(const posit& p) { return str.str(); } -// generate a binary string for cfloat +// generate a binary string for posit template inline std::string to_hex(const posit& v, bool nibbleMarker = false, bool hexPrefix = true) { char hexChar[16] = { diff --git a/include/universal/number/posit/posit.hpp b/include/universal/number/posit/posit.hpp index fd253a56e..742c5b0e3 100644 --- a/include/universal/number/posit/posit.hpp +++ b/include/universal/number/posit/posit.hpp @@ -36,8 +36,12 @@ #if !defined(POSIT_THROW_ARITHMETIC_EXCEPTION) // default is to use NaR as a signalling error #define POSIT_THROW_ARITHMETIC_EXCEPTION 0 +#if !defined(VALUE_THROW_ARITHMETIC_EXCEPTION) #define VALUE_THROW_ARITHMETIC_EXCEPTION 0 +#endif +#if !defined(BITBLOCK_THROW_ARITHMETIC_EXCEPTION) #define BITBLOCK_THROW_ARITHMETIC_EXCEPTION 0 +#endif #else // for the composite value<> class assume the same behavior as requested for posits #define VALUE_THROW_ARITHMETIC_EXCEPTION POSIT_THROW_ARITHMETIC_EXCEPTION diff --git a/include/universal/number/posito/ReadMe.txt b/include/universal/number/posito/ReadMe.txt new file mode 100644 index 000000000..824afbf02 --- /dev/null +++ b/include/universal/number/posito/ReadMe.txt @@ -0,0 +1,86 @@ +======================================================================== + Posit Project Overview +======================================================================== + +Random 32 bits which is intentionally zero heavy +----------------- +Sign +| +| +v +1100 1000 1100 0100 0100 0010 0001 0001 + +How to decode this? + +Here is some pseudocode that should work on any bit sequence. Suppose + + n is the number of bits (which in the example is 32), + es is the number of exponent bits (es = 3 in the example since n = 32), + p is the bit sequence treated as a signed integer. + +Also suppose, though it is not the convention, that the bits are numbered from left to right starting with 1, +so the sign bit is bit 1 and the rightmost (least significant) bit is bit n. That numbering makes the pseudocode +more readable. Assume we have a quick machine instruction FindFirstOne(p) that finds the position of the first 1 bit +in p, returning n+1 if all bits are zero, and a similar instruction FindFirstZero(p). The function Bits(j..k, p) +extracts bits j through k of p, clipped to bit n. If j is greater than n, Bits(j..k, p) returns zero. + +We seek x, the value represented by p. The green text indicates what would happen for the 32 bits in the example. + +(* Check for exception values: *) +If Bits(2..n, p) == 0, (* Check for a string of all zeros after the first bit. *) + If Bits(1..1, p) == 0, (* If the first bit is also zero, then x is zero. *) + x ← 0; + Else, (* The first bit must be one, indicating x is unsigned infinity. *) + x ← ±∞; + End If; + End; (* Can cut out early. *) +End If; + +The example sequence fails the exception value tests, so continue. + +(* Process sign bit: *) +If Bits(1..1, p) == 1, sign ← –1; p ← –p; (* 2's complement negate p. *) + Else sign ← 1; +End If; + +The first bit is a 1, so sign = -1 and p gets negated. Flip all the bits and add 1: +0011 0111 0011 1011 1011 1101 1110 1111 + +(* Process regime bits. The second bit, r, determines the sign of the regime. *) +r ← Bits(2..2, p) +Bits(1..1, p) ← r (* Duplicate first regime bit into sign bit so the "FindFirst..." instructions work. *) +If r == 0, (* regime value is negative *) + k = FindFirstOne(p); + shift ← –(2^es) × (k – 2); (* Actually a shift of k-2 by a shift by es, so this is fast. *) +Else (* regime value is zero or positive *) + k = FindFirstZero(p); + shift ← (2^es) × (k – 3); +End If; + +The example 001... has r = 0 so the regime value is negative. The first 1 occurs in bit 3, +so the regime represents –1 and contributes a shift of –(2^es) = –8. This leaves k pointing to bit 3. +The exponent will be in the next es bits after bit 3, which will fine-tune the shift roughed out by the regime bits. + +(* Add the exponent bits, as an unsigned integer, to the shift. *) +shift ← shift + Bits(k+1..k+es, p); + +The three exponent bits in the example are the underlined ones: 0011 0111 0011 1011 1011 1101 1110 1111. +As an unsigned binary integer, 101 means decimal 5, so the shift is –8 + 5 = –3. + +(* The remaining bits are the fraction, with a hidden bit of 1, so we're done. It looks like a regular float: *) +x ← sign × 2^shift × 1.Bits(k+es+1..n) +End + +The fraction bits in the example are underlined: 0011 0111 0011 1011 1011 1101 1110 1111. That fraction, +plus the hidden bit 1, is 121355759/(2^26), or exactly 1.80834172666072845458984375. +Scale this by 2^(shift) = 2^(–3) = 1/8, and the sign, to get –0.22604271583259105682373046875. +Notice that there are three more bits in the fraction than there are in an IEEE 32-bit float, +giving almost a full decimal more accuracy. + +I didn't actually run the above pseudocode so it may contain a typo or two, but it is closely based on the +Mathematica code for decoding a posit and that has been debugged and in use for months. +It is possible to decode the bit string _without_ doing 2's complement negation if the sign bit is zero +(the hidden bit for negative posits represents –2, not 1!), which according to Isaac Yonemoto leads to +simpler hardware... but it creates a more cryptic pseudocode explanation, so I used the 2's complement negation here. + +///////////////////////////////////////////////////////////////////////////// diff --git a/include/universal/number/posito/attributes.hpp b/include/universal/number/posito/attributes.hpp new file mode 100644 index 000000000..7867a3c6b --- /dev/null +++ b/include/universal/number/posito/attributes.hpp @@ -0,0 +1,28 @@ +#pragma once +// attributes.hpp: functions to query number system attributes +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +// functions to provide details about properties of a posit configuration + +using namespace sw::universal::internal; + +// calculate the scale of a posit +template +inline int scale(const posito& p) { + regime _regime; + exponent _exponent; + internal::bitblock tmp(p.get()); + tmp = sign(p) ? internal::twos_complement(tmp) : tmp; + int k = decode_regime(tmp); + unsigned nrRegimeBits = _regime.assign_regime_pattern(k); // get the regime bits + _exponent.extract_exponent_bits(tmp, nrRegimeBits); // get the exponent bits + // return the scale + return _regime.scale() + _exponent.scale(); +} + +}} // namespace sw::universal diff --git a/include/universal/number/posito/exceptions.hpp b/include/universal/number/posito/exceptions.hpp new file mode 100644 index 000000000..12879d253 --- /dev/null +++ b/include/universal/number/posito/exceptions.hpp @@ -0,0 +1,74 @@ +#pragma once +// exceptions.hpp: exception hierarchy for exceptions during posit calculations +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/// POSIT ARITHMETIC EXCEPTIONS + +// base class for posit arithmetic exceptions +struct posito_arithmetic_exception : public universal_arithmetic_exception { + posito_arithmetic_exception(const std::string& error) + : universal_arithmetic_exception(std::string("posit arithmetic exception: ") + error) {}; +}; + +////////////////////////////////////////////////////////////////////////////////////////////////// +/// specialized exceptions to aid application level exception handling + +// not_a_real is thrown when a rvar is NaR +struct posito_nar : public posito_arithmetic_exception { + posito_nar() : posito_arithmetic_exception("nar (not a real)") {} +}; + +// divide_by_zero is thrown when the denominator in a division operator is 0 +struct posito_divide_by_zero : public posito_arithmetic_exception { + posito_divide_by_zero() : posito_arithmetic_exception("divide by zero") {} +}; + +// divide_by_nar is thrown when the denominator in a division operator is NaR +struct posito_divide_by_nar : public posito_arithmetic_exception { + posito_divide_by_nar() : posito_arithmetic_exception("divide by nar") {} +}; + +// numerator_is_nar is thrown when the numerator in a division operator is NaR +struct posito_numerator_is_nar : public posito_arithmetic_exception { + posito_numerator_is_nar() : posito_arithmetic_exception("numerator is nar") {} +}; + +// operand_is_nar is thrown when an rvar in a binary operator is NaR +struct posito_operand_is_nar : public posito_arithmetic_exception { + posito_operand_is_nar() : posito_arithmetic_exception("operand is nar") {} +}; + +// thrown when division yields no signficant fraction bits +struct posito_division_result_is_zero : public posito_arithmetic_exception { + posito_division_result_is_zero() : posito_arithmetic_exception("division yielded no significant bits") {} +}; + +// thrown when division yields NaR +struct posito_division_result_is_infinite : public posito_arithmetic_exception { + posito_division_result_is_infinite() : posito_arithmetic_exception("division yielded infinite") {} +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +/// POSIT INTERNAL OPERATION EXCEPTIONS + +struct posito_internal_exception : public universal_internal_exception { + posito_internal_exception(const std::string& error) + : universal_internal_exception(std::string("posit internal exception: ") + error) {}; +}; + +struct posito_hpos_too_large : public posito_internal_exception { + posito_hpos_too_large() : posito_internal_exception("position of hidden bit too large for given posit") {} +}; + +struct posito_rbits_too_large : public posito_internal_exception { + posito_rbits_too_large() :posito_internal_exception("number of remaining bits too large for this fraction") {} +}; + +}} // namespace sw::universal diff --git a/include/universal/number/posito/fdp.hpp b/include/universal/number/posito/fdp.hpp new file mode 100644 index 000000000..d02c58d30 --- /dev/null +++ b/include/universal/number/posito/fdp.hpp @@ -0,0 +1,87 @@ +#pragma once +// fdp.hpp : include file containing templated C++ interfaces to fused dot product +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + +namespace sw { namespace universal { + +/// ////////////////////////////////////////////////////////////////// +/// fused dot product operators +/// fdp_qc fused dot product with quire continuation +/// fdp_stride fused dot product with non-negative stride +/// fdp fused dot product of two vectors + +// Fused dot product with quire continuation +template +void fdp_qc(Qy& sum_of_products, size_t n, const Vector& x, size_t incx, const Vector& y, size_t incy) { + size_t ix, iy; + for (ix = 0, iy = 0; ix < n && iy < n; ix = ix + incx, iy = iy + incy) { + sum_of_products += sw::universal::quire_mul(x[ix], y[iy]); + } +} + +// Resolved fused dot product, with the option to control capacity bits in the quire +template +enable_if_posito, value_type > // as return type +fdp_stride(size_t n, const Vector& x, size_t incx, const Vector& y, size_t incy) { + constexpr unsigned nbits = Vector::value_type::nbits; + constexpr unsigned es = Vector::value_type::es; + constexpr unsigned capacity = 20; // support vectors up to 1M elements + quire q = 0; + size_t ix, iy; + for (ix = 0, iy = 0; ix < n && iy < n; ix = ix + incx, iy = iy + incy) { + q += sw::universal::quire_mul(x[ix], y[iy]); + if (sw::universal::_trace_quire_add) std::cout << q << '\n'; + } + typename Vector::value_type sum; + convert(q.to_value(), sum); // one and only rounding step of the fused-dot product + return sum; +} + +#if defined(_MSC_VER) +/* Microsoft Visual Studio. --------------------------------- */ +// Specialized resolved fused dot product that assumes unit stride and a standard vector +template +enable_if_posito, value_type > // as return type +fdp(const Vector& x, const Vector& y) { + constexpr unsigned nbits = Vector::value_type::nbits; + constexpr unsigned es = Vector::value_type::es; + constexpr unsigned capacity = 20; // support vectors up to 1M elements + quire q(0); + size_t ix, iy, n = size(x); + for (ix = 0, iy = 0; ix < n && iy < n; ++ix, ++iy) { + q += sw::universal::quire_mul(x[ix], y[iy]); + } + typename Vector::value_type sum; + convert(q.to_value(), sum); // one and only rounding step of the fused-dot product + return sum; +} +#else + +// template constexpr auto size(const std::vector& v) -> decltype(v.size()) { return (v.size()); } + +// Specialized resolved fused dot product that assumes unit stride and a standard vector +template +enable_if_posito, value_type > // as return type +fdp(const Vector& x, const Vector& y) { + constexpr unsigned nbits = Vector::value_type::nbits; + constexpr unsigned es = Vector::value_type::es; + constexpr unsigned capacity = 20; // support vectors up to 1M elements + quire q(0); + size_t ix, iy, n = size(x); + for (ix = 0, iy = 0; ix < n && iy < n; ++ix, ++iy) { + q += sw::universal::quire_mul(x[ix], y[iy]); + } + typename Vector::value_type sum; + convert(q.to_value(), sum); // one and only rounding step of the fused-dot product + return sum; +} +#endif + +}} // namespace sw::universal + diff --git a/include/universal/number/posito/manipulators.hpp b/include/universal/number/posito/manipulators.hpp new file mode 100644 index 000000000..9608ff8c3 --- /dev/null +++ b/include/universal/number/posito/manipulators.hpp @@ -0,0 +1,216 @@ +#pragma once +// manipulators.hpp: definitions of helper functions for posit type manipulation +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include // base class for color printing in shells + +// This file contains functions that manipulate a posit type +// using posit number system knowledge. + +namespace sw { namespace universal { + + + // Generate a type tag for this posit, for example, posit<8,1> + template + std::string type_tag(const posito & = {}) { + std::stringstream str; + str << "posito<" + << std::setw(3) << nbits << ", " + << std::setw(1) << es << '>'; + return str.str(); + } + + // Generate a type field descriptor for this cfloat + template, bool> = true + > + inline std::string type_field(const PositoType & = {}) { + std::stringstream s; +// unsigned nbits = PositoType::nbits; // total bits + unsigned ebits = PositoType::es; // exponent bits + unsigned fbits = PositoType::fbits; // integer bits + s << "fields(s:1|r:[2]+|e:" << ebits << "|m:" << fbits << ')'; + return s.str(); + } + +// Generate a string representing the posit components: sign, regime, exponent, faction, and value +template +std::string components(const posito& p) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); + std::stringstream str; + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(p.get(), _sign, _regime, _exponent, _fraction); + + // TODO: hardcoded field width is governed by pretty printing posit tables, which by construction will always be small posits + str << std::setw(14) << p.get() << " " << std::setw(14) << decoded(p) + << " Sign : " << std::setw(2) << _sign + << " Regime : " << std::setw(3) << _regime.regime_k() + << " Exponent : " << std::setw(5) << exponent_value(p) + << " Fraction : " << std::setw(8) << std::setprecision(21) << _fraction.value() + << " Value : " << std::setw(16) << p; + + return str.str(); +} + +// generate a binary string for posit +template +inline std::string to_hex(const posito& v, bool nibbleMarker = false, bool hexPrefix = true) { + char hexChar[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + }; + std::stringstream s; + if (hexPrefix) s << "0x" << std::hex; + int nrNibbles = int(1ull + ((nbits - 1ull) >> 2ull)); + for (int n = nrNibbles - 1; n >= 0; --n) { + uint8_t nibble = v.nibble(unsigned(n)); + s << hexChar[nibble]; + if (nibbleMarker && n > 0 && (n % 4) == 0) s << '\''; + } + return s.str(); +} + +// generate a posit format ASCII format nbits.esxNN...NNp +template +inline std::string hex_print(const posito& p) { + // we need to transform the posit into a string + std::stringstream str; + str << nbits << '.' << es << 'x' << to_hex(p.get()) << 'p'; + return str.str(); +} + +template +std::string pretty_print(const posito& p, int printPrecision = std::numeric_limits::max_digits10) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); + std::stringstream str; + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(p.get(), _sign, _regime, _exponent, _fraction); + str << ( _sign ? "s1 r" : "s0 r" ); + bitblock r = _regime.get(); + int regimeBits = (int)_regime.nrBits(); + int nrOfRegimeBitsProcessed = 0; + for (int i = nbits - 2; i >= 0; --i) { + if (regimeBits > nrOfRegimeBitsProcessed++) { + str << (r[static_cast(i)] ? "1" : "0"); + } + } + str << " e"; + bitblock e = _exponent.get(); + int exponentBits = (int)_exponent.nrBits(); + int nrOfExponentBitsProcessed = 0; + for (int i = int(es) - 1; i >= 0; --i) { + if (exponentBits > nrOfExponentBitsProcessed++) { + str << (e[static_cast(i)] ? "1" : "0"); + } + } + str << " f"; + bitblock f = _fraction.get(); + int fractionBits = (int)_fraction.nrBits(); + int nrOfFractionBitsProcessed = 0; + //for (int i = int(p.fbits) - 1; i >= 0; --i) { // this does not look correct + for (int i = int(fbits) - 1; i >= 0; --i) { + if (fractionBits > nrOfFractionBitsProcessed++) { + str << (f[static_cast(i)] ? "1" : "0"); + } + } + str << " q"; + str << quadrant(p) << " v" + << std::setprecision(printPrecision) << p + << std::setprecision(0); + return str.str(); +} + +template +std::string info_print(const posito& p, int printPrecision = 17) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); + std::stringstream str; + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(p.get(), _sign, _regime, _exponent, _fraction); + + str << "raw: " << p.get() << " " // << " decoded: " << decoded(p) << " " + << quadrant(p) << " " + << (_sign ? "s1 r" : "s0 r") + << _regime << " e" + << _exponent << " f" + << _fraction << " : value " + << std::setprecision(printPrecision) << p + << std::setprecision(0); + return str.str(); +} + +template +std::string color_print(const posito& p) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); + std::stringstream str; + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + extract_fields(p.get(), _sign, _regime, _exponent, _fraction); + + Color red(ColorCode::FG_RED); + Color yellow(ColorCode::FG_YELLOW); + Color blue(ColorCode::FG_BLUE); + Color magenta(ColorCode::FG_MAGENTA); + Color cyan(ColorCode::FG_CYAN); + Color white(ColorCode::FG_WHITE); + Color def(ColorCode::FG_DEFAULT); + str << red << (p.isneg() ? "1" : "0"); + + if (p.isnar()) { + for (unsigned i = 0; i < nbits - 1; ++i) str << yellow << '0'; + } + else { + bitblock r = _regime.get(); + int regimeBits = (int)_regime.nrBits(); + int nrOfRegimeBitsProcessed = 0; + for (unsigned i = 0; i < nbits - 1; ++i) { + unsigned bitIndex = nbits - 2u - i; + if (regimeBits > nrOfRegimeBitsProcessed++) { + str << yellow << (r[bitIndex] ? '1' : '0'); + } + } + } + + int exponentBits = (int)_exponent.nrBits(); + int nrOfExponentBitsProcessed = 0; + if constexpr (es > 0) { + for (unsigned i = 0; i < es; ++i) { + bitblock e = _exponent.get(); + unsigned bitIndex = es - 1u - i; + if (exponentBits > nrOfExponentBitsProcessed++) { + str << cyan << (e[bitIndex] ? '1' : '0'); + } + } + } + + bitblock::fbits> f = _fraction.get(); + f = (_sign ? twos_complement(f) : f); + int fractionBits = (int)_fraction.nrBits(); + int nrOfFractionBitsProcessed = 0; + for (unsigned i = 0; i < p.fbits; ++i) { + unsigned bitIndex = p.fbits - 1u - i; + if (fractionBits > nrOfFractionBitsProcessed++) { + str << magenta << (f[bitIndex] ? "1" : "0"); + } + } + + str << def; + return str.str(); +} + +}} // namespace sw::universal + diff --git a/include/universal/number/posito/numeric_limits.hpp b/include/universal/number/posito/numeric_limits.hpp new file mode 100644 index 000000000..f8b161232 --- /dev/null +++ b/include/universal/number/posito/numeric_limits.hpp @@ -0,0 +1,70 @@ +#pragma once +// numeric_limits.hpp: definition of numeric_limits for posito types +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace std { + + template + class numeric_limits< sw::universal::posito > { + public: + using Posit = sw::universal::posito; + static constexpr bool is_specialized = true; + static constexpr Posit min() { // return minimum value + return Posit(sw::universal::SpecificValue::minpos); + } + static constexpr Posit max() { // return maximum value + return Posit(sw::universal::SpecificValue::maxpos); + } + static constexpr Posit lowest() { // return most negative value + return Posit(sw::universal::SpecificValue::maxneg); + } + static constexpr Posit epsilon() { // return smallest effective increment from 1.0 + Posit one{ 1.0f }, incr{ 1.0f }; + return ++incr - one; + } + static constexpr Posit round_error() { // return largest rounding error + return Posit(0.5); + } + static constexpr Posit denorm_min() { // return minimum denormalized value + return Posit(sw::universal::SpecificValue::minpos); + } + static constexpr Posit infinity() { // return positive infinity + return Posit(sw::universal::SpecificValue::maxpos); + } + static constexpr Posit quiet_NaN() { // return non-signaling NaN + return Posit(NAR); + } + static constexpr Posit signaling_NaN() { // return signaling NaN + return Posit(NAR); + } + + static constexpr int digits = ((es + 2) > nbits) ? 0 : (int(nbits) - 3 - int(es) + 1); + static constexpr int digits10 = static_cast(digits / 3.3f); + static constexpr int max_digits10 = static_cast(digits / 3.3f) + 1; + static constexpr bool is_signed = true; + static constexpr bool is_integer = false; + static constexpr bool is_exact = false; + static constexpr int radix = 2; + + static constexpr int min_exponent = static_cast(2 - int(nbits)) * (1 << es); + static constexpr int min_exponent10 = static_cast(min_exponent / 3.3f); + static constexpr int max_exponent = (nbits - 2) * (1 << es); + static constexpr int max_exponent10 = static_cast(max_exponent / 3.3f); + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr float_denorm_style has_denorm = denorm_absent; + static constexpr bool has_denorm_loss = false; + + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = false; + static constexpr bool is_modulo = false; + static constexpr bool traps = false; + static constexpr bool tinyness_before = false; + static constexpr float_round_style round_style = round_to_nearest; + }; + +} diff --git a/include/universal/number/posito/posito.hpp b/include/universal/number/posito/posito.hpp new file mode 100644 index 000000000..a843ef9f0 --- /dev/null +++ b/include/universal/number/posito/posito.hpp @@ -0,0 +1,78 @@ +// posito.hpp: arbitrary configuration fixed-size posit standard header +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#ifndef _POSITO_STANDARD_HEADER_ +#define _POSITO_STANDARD_HEADER_ + +//////////////////////////////////////////////////////////////////////////////////////// +/// COMPILATION DIRECTIVES TO DIFFERENT COMPILERS +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// BEHAVIORAL COMPILATION SWITCHES + +//////////////////////////////////////////////////////////////////////////////////////// +// enable/disable error-free posit format I/O +#if !defined(POSITO_ERROR_FREE_IO_FORMAT) +// default is to print double values in human-readable form +#define POSITO_ERROR_FREE_IO_FORMAT 0 +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// enable/disable the ability to use literals in binary logic and arithmetic operators +#if !defined(POSITO_ENABLE_LITERALS) +// default is to enable them +#define POSITO_ENABLE_LITERALS 1 +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +// enable throwing specific exceptions for posit arithmetic errors +// left to application to enable +#if !defined(POSITO_THROW_ARITHMETIC_EXCEPTION) +// default is to use NaR as a signalling error +#define POSITO_THROW_ARITHMETIC_EXCEPTION 0 +#define VALUE_THROW_ARITHMETIC_EXCEPTION 0 +#define BITBLOCK_THROW_ARITHMETIC_EXCEPTION 0 +#else +// for the composite value<> class assume the same behavior as requested for posits +#define VALUE_THROW_ARITHMETIC_EXCEPTION POSITO_THROW_ARITHMETIC_EXCEPTION +#if !defined(BITBLOCK_THROW_ARITHMETIC_EXCEPTION) +#define BITBLOCK_THROW_ARITHMETIC_EXCEPTION POSITO_THROW_ARITHMETIC_EXCEPTION +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////////////// +/// END OF BEHAVIOR SWITCHES /// +//////////////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////////////// +/// define generic number system traits to be specialized by the posit library +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////////////// +/// INCLUDE FILES that make up the library +#include // some shared functions that do not depend on the posit vs posito separation +#include +#include +#include +#include +#include +#include + +// useful functions to work with posits +#include +#include + +/////////////////////////////////////////////////////////////////////////////////////// +/// the posit exact dot product +//#include + +#endif diff --git a/include/universal/number/posito/posito_fwd.hpp b/include/universal/number/posito/posito_fwd.hpp new file mode 100644 index 000000000..158c2a18d --- /dev/null +++ b/include/universal/number/posito/posito_fwd.hpp @@ -0,0 +1,28 @@ +#pragma once +// posito_fwd.hpp : forward declarations of the posit/quire environment +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + + namespace internal { + template class bitblock; + // generalized floating point type + template class value; + } + + // posito types + template class posito; + template inline int scale(const posito&); + template inline internal::bitblock extract_significant(const posito&); + template posito abs(const posito&); + template posito sqrt(const posito&); + template posito& convert(const internal::value&, posito&); + + template int decode_regime(const internal::bitblock&); + template constexpr int calculate_k(int scale); + +}} // namespace sw::universal + diff --git a/include/universal/number/posito/posito_impl.hpp b/include/universal/number/posito/posito_impl.hpp new file mode 100644 index 000000000..00eec8056 --- /dev/null +++ b/include/universal/number/posito/posito_impl.hpp @@ -0,0 +1,2439 @@ +#pragma once +// posito_impl.hpp: implementation of arbitrary configuration fixed-size posits +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include +#include +#include +#include + +#if POSITO_THROW_ARITHMETIC_EXCEPTION +// propagate this behavior down to constituent classes +#ifndef BITBLOCK_THROW_ARITHMETIC_EXCEPTION +#define BITBLOCK_THROW_ARITHMETIC_EXCEPTION 1 +#endif +#endif + +// calling environment should define behavioral flags +// typically set in the library aggregation include file +// but can be set by individual programs when including posit.hpp +// For example: +// - define to non-zero if you want to enable arithmetic and logic literals +// #define POSITO_ENABLE_LITERALS 1 +// - define to non-zero if you want to throw exceptions on arithmetic errors +// #define POSITO_THROW_ARITHMETIC_EXCEPTION 1 + +#if POSITO_THROW_ARITHMETIC_EXCEPTION +// Posits encode error conditions as NaR (Not a Real) +// propagating the error through arithmetic operations is preferred +#include "exceptions.hpp" +#endif // POSITO_THROW_ARITHMETIC_EXCEPTION + +// TODO: these need to be redesigned to enable constexpr and improve performance: roadmap V3 Q1 2021 +#include +#include +#include +#include +// posit environment +#include +#include +#include +#include +#include + +namespace sw { namespace universal { + +// inject internal namespace +using namespace sw::universal::internal; + +// needed to avoid double rounding situations during arithmetic: TODO: does that mean the condensed version below should be removed? +template +inline posito& convert_(bool _sign, int _scale, const bitblock& fraction_in, posito& p) { + if (_trace_conversion) std::cout << "------------------- CONVERT ------------------" << std::endl; + if (_trace_conversion) std::cout << "sign " << (_sign ? "-1 " : " 1 ") << "scale " << std::setw(3) << _scale << " fraction " << fraction_in << std::endl; + + p.clear(); + // construct the posito + // interpolation rule checks + if (check_inward_projection_range(_scale)) { // regime dominated + if (_trace_conversion) std::cout << "inward projection" << std::endl; + // we are projecting to minpos/maxpos + int k = calculate_unconstrained_k(_scale); + k < 0 ? p.setBitblock(minpos_pattern(_sign)) : p.setBitblock(maxpos_pattern(_sign)); + // we are done + if (_trace_rounding) std::cout << "projection rounding "; + } + else { + constexpr unsigned pt_len = nbits + 3 + es; + bitblock pt_bits; + bitblock regime; + bitblock exponent; + bitblock fraction; + bitblock sticky_bit; + + bool s = _sign; + int e = _scale; + bool r = (e >= 0); + + unsigned run = unsigned(r ? 1 + (e >> es) : -(e >> es)); + regime.set(0, 1 ^ r); + for (unsigned i = 1; i <= run; i++) regime.set(i, r); + + unsigned esval = e % (uint32_t(1) << es); + exponent = convert_to_bitblock(esval); + int nbits_plus_one = static_cast(nbits) + 1; + int sign_regime_es = 2 + int(run) + static_cast(es); + unsigned nf = (unsigned)std::max(0, (nbits_plus_one - sign_regime_es)); + //unsigned nf = (unsigned)std::max(0, (static_cast(nbits + 1) - (2 + run + static_cast(es)))); + // TODO: what needs to be done if nf > fbits? + //assert(nf <= input_fbits); + // copy the most significant nf fraction bits into fraction + unsigned lsb = nf <= fbits ? 0 : nf - fbits; + for (unsigned i = lsb; i < nf; ++i) fraction[i] = fraction_in[static_cast(fbits) - nf + i]; + + bool sb = anyAfter(fraction_in, static_cast(fbits) - 1ll - static_cast(nf)); + + // construct the untruncated posito + // pt = BitOr[BitShiftLeft[reg, es + nf + 1], BitShiftLeft[esval, nf + 1], BitShiftLeft[fv, 1], sb]; + regime <<= (es + nf + 1ull); + exponent <<= (nf + 1ull); + fraction <<= 1; + sticky_bit.set(0, sb); + + pt_bits |= regime; + pt_bits |= exponent; + pt_bits |= fraction; + pt_bits |= sticky_bit; + + unsigned len = 1 + std::max((nbits + 1), (2 + run + es)); + bool blast = pt_bits.test(len - nbits); + bool bafter = pt_bits.test(len - nbits - 1); + bool bsticky = anyAfter(pt_bits, int(len) - static_cast(nbits) - 1 - 1); + + bool rb = (blast & bafter) | (bafter & bsticky); + + bitblock ptt; + pt_bits <<= pt_len - len; + truncate(pt_bits, ptt); + if (rb) increment_bitset(ptt); + if (s) ptt = twos_complement(ptt); + p.setBitblock(ptt); + } + return p; +} + +// convert a floating point value to a specific posito configuration. Semantically, p = v, return reference to p +template +inline posito& convert(const internal::value& v, posito& p) { + if (_trace_conversion) std::cout << "------------------- CONVERT ------------------" << std::endl; + if (_trace_conversion) std::cout << "sign " << (v.sign() ? "-1 " : " 1 ") << "scale " << std::setw(3) << v.scale() << " fraction " << v.fraction() << std::endl; + + if (v.iszero()) { + p.setzero(); + return p; + } + if (v.isnan() || v.isinf()) { + p.setnar(); + return p; + } + return convert_(v.sign(), v.scale(), v.fraction(), p); +} + +// quadrant returns a two character string indicating the quadrant of the projective reals the posito resides: from 0, SE, NE, NaR, NW, SW +template +std::string quadrant(const posito& p) { + posito pOne(1), pMinusOne(-1); + if (p.isneg()) { + // west + if (p > pMinusOne) { + return "SW"; + } + else { + return "NW"; + } + } + else { + // east + if (p < pOne) { + return "SE"; + } + else { + return "NE"; + } + } +} + +// Construct posito from its components +template +posito& construct(bool s, const regime& r, const exponent& e, const fraction& f, posito& p) { + // generate raw bit representation + bitblock raw_bits = s ? twos_complement(collect(s, r, e, f)) : collect(s, r, e, f); + raw_bits.set(nbits - 1, s); + p.set(raw_bits); + return p; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// class posito represents posit numbers of arbitrary configuration and their basic arithmetic operations (add/sub, mul/div) +template +class posito { + +// static_assert(sizeof(long double) == 16, "Posit library requires compiler support for 128 bit long double."); +// static_assert((sizeof(long double) == 16) && (std::numeric_limits::digits < 113), "C++ math library for long double does not support 128-bit quad precision floats."); + +public: + static constexpr unsigned nbits = _nbits; + static constexpr unsigned es = _es; + static constexpr unsigned sbits = 1; // number of sign bits: specified + static constexpr unsigned rbits = nbits - sbits; // maximum number of regime bits: derived + static constexpr unsigned ebits = es; // maximum number of exponent bits: specified + static constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); // maximum number of fraction bits: derived + static constexpr unsigned fhbits = fbits + 1; // maximum number of fraction + one hidden bit + + static constexpr unsigned abits = fhbits + 3; // size of the addend + static constexpr unsigned mbits = 2 * fhbits; // size of the multiplier output + static constexpr unsigned divbits = 3 * fhbits + 4; // size of the divider output + + // constexpr posito() { setzero(); } + constexpr posito() : _bits{} {} + + constexpr posito(const posito&) = default; + constexpr posito(posito&&) = default; + + posito& operator=(const posito&) = default; + posito& operator=(posito&&) = default; + + /// Construct posito from another posito + template + posito(const posito& a) noexcept { + *this = a.to_value(); + } + + // specific value constructor + constexpr posito(const SpecificValue code) noexcept { + switch (code) { + case SpecificValue::infpos: + case SpecificValue::maxpos: + maxpos(); + break; + case SpecificValue::minpos: + minpos(); + break; + case SpecificValue::zero: + default: + zero(); + break; + case SpecificValue::minneg: + minneg(); + break; + case SpecificValue::infneg: + case SpecificValue::maxneg: + maxneg(); + break; + case SpecificValue::snan: + case SpecificValue::qnan: + case SpecificValue::nar: + setnar(); + break; + } + } + + // initializers for native types, allow for implicit conversion (Peter) + constexpr posito(signed char initial_value) noexcept { *this = initial_value; } + constexpr posito(short initial_value) noexcept { *this = initial_value; } + constexpr posito(int initial_value) noexcept { *this = initial_value; } + constexpr posito(long initial_value) noexcept { *this = initial_value; } + constexpr posito(long long initial_value) noexcept { *this = initial_value; } + constexpr posito(char initial_value) noexcept { *this = initial_value; } + constexpr posito(unsigned short initial_value) noexcept { *this = initial_value; } + constexpr posito(unsigned int initial_value) noexcept { *this = initial_value; } + constexpr posito(unsigned long initial_value) noexcept { *this = initial_value; } + constexpr posito(unsigned long long initial_value) noexcept { *this = initial_value; } + constexpr posito(float initial_value) noexcept { *this = initial_value; } + constexpr posito(double initial_value) noexcept { *this = initial_value; } + constexpr posito(long double initial_value) noexcept { *this = initial_value; } + + // assignment operators for native types + posito& operator=(signed char rhs) noexcept { + internal::value<8*sizeof(signed char)-1> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(short rhs) noexcept { + internal::value<8*sizeof(short)-1> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(int rhs) noexcept { + internal::value<8*sizeof(int)-1> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(long rhs) noexcept { + internal::value<8*sizeof(long)> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(long long rhs) noexcept { + internal::value<8*sizeof(long long)-1> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(char rhs) noexcept { + internal::value<8*sizeof(char)> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(unsigned short rhs) noexcept { + internal::value<8*sizeof(unsigned short)> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(unsigned int rhs) noexcept { + internal::value<8*sizeof(unsigned int)> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(unsigned long rhs) noexcept { + internal::value<8*sizeof(unsigned long)> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(unsigned long long rhs) noexcept { + internal::value<8*sizeof(unsigned long long)> v(rhs); + if (v.iszero()) { + setzero(); + return *this; + } + else { + convert(v, *this); + } + return *this; + } + posito& operator=(float rhs) noexcept { + return convert_ieee754(rhs); + } + constexpr posito& operator=(double rhs) noexcept { + convert_ieee754(rhs); + return *this; + } + posito& operator=(long double rhs) noexcept { + return convert_ieee754(rhs); + } + +#ifdef ADAPTER_POSITO_AND_INTEGER + // convenience assignment operator + template + posito& operator=(const integer& rhs) { + convert_i2p(rhs, *this); + return *this; + } +#endif + + // assignment for value type + template + posito& operator=(const internal::value& rhs) { + clear(); + convert(rhs, *this); + return *this; + } + + // negation operator + posito operator-() const { + if (iszero()) { + return *this; + } + if (isnar()) { + return *this; + } + posito negated(0); // TODO: artificial initialization to pass -Wmaybe-uninitialized + bitblock raw_bits = twos_complement(_bits); + negated.setBitblock(raw_bits); + return negated; + } + // prefix/postfix operators + posito& operator++() noexcept { + increment_posit(); + return *this; + } + posito operator++(int) noexcept { + posito tmp(*this); + operator++(); + return tmp; + } + posito& operator--() noexcept { + decrement_posit(); + return *this; + } + posito operator--(int) noexcept { + posito tmp(*this); + operator--(); + return tmp; + } + + // we model a hw pipeline with register assignments, functional block, and conversion + posito& operator+=(const posito& rhs) { + if (_trace_add) std::cout << "---------------------- ADD -------------------" << std::endl; + // special case handling of the inputs +#if POSITO_THROW_ARITHMETIC_EXCEPTION + if (isnar() || rhs.isnar()) { + throw posito{}; + } +#else + if (isnar() || rhs.isnar()) { + setnar(); + return *this; + } +#endif + if (iszero()) { + *this = rhs; + return *this; + } + if (rhs.iszero()) return *this; + + // arithmetic operation + internal::value sum; + internal::value a, b; + // transform the inputs into (sign,scale,fraction) triples + normalize(a); + rhs.normalize(b); + module_add(a, b, sum); // add the two inputs + + // special case handling of the result + if (sum.iszero()) { + setzero(); + } + else if (sum.isinf()) { + setnar(); + } + else { + convert(sum, *this); + } + return *this; + } + posito& operator+=(double rhs) { + return *this += posito(rhs); + } + posito& operator-=(const posito& rhs) { + if (_trace_sub) std::cout << "---------------------- SUB -------------------" << std::endl; + // special case handling of the inputs +#if POSITO_THROW_ARITHMETIC_EXCEPTION + if (isnar() || rhs.isnar()) { + throw posito{}; + } +#else + if (isnar() || rhs.isnar()) { + setnar(); + return *this; + } +#endif + if (iszero()) { + *this = -rhs; + return *this; + } + if (rhs.iszero()) return *this; + + // arithmetic operation + internal::value difference; + internal::value a, b; + // transform the inputs into (sign,scale,fraction) triples + normalize(a); + rhs.normalize(b); + module_subtract(a, b, difference); // add the two inputs + + // special case handling of the result + if (difference.iszero()) { + setzero(); + } + else if (difference.isinf()) { + setnar(); + } + else { + convert(difference, *this); + } + return *this; + } + posito& operator-=(double rhs) { + return *this -= posito(rhs); + } + posito& operator*=(const posito& rhs) { + static_assert(fhbits > 0, "posito configuration does not support multiplication"); + if (_trace_mul) std::cout << "---------------------- MUL -------------------" << std::endl; + // special case handling of the inputs +#if POSITO_THROW_ARITHMETIC_EXCEPTION + if (isnar() || rhs.isnar()) { + throw posito{}; + } +#else + if (isnar() || rhs.isnar()) { + setnar(); + return *this; + } +#endif + if (iszero() || rhs.iszero()) { + setzero(); + return *this; + } + + // arithmetic operation + internal::value product; + internal::value a, b; + // transform the inputs into (sign,scale,fraction) triples + normalize(a); + rhs.normalize(b); + + module_multiply(a, b, product); // multiply the two inputs + + // special case handling on the output + if (product.iszero()) { + setzero(); + } + else if (product.isinf()) { + setnar(); + } + else { + convert(product, *this); + } + return *this; + } + posito& operator*=(double rhs) { + return *this *= posito(rhs); + } + posito& operator/=(const posito& rhs) { + if (_trace_div) std::cout << "---------------------- DIV -------------------" << std::endl; +#if POSITO_THROW_ARITHMETIC_EXCEPTION + if (rhs.iszero()) { + throw posito{}; // not throwing is a quiet signalling NaR + } + if (rhs.isnar()) { + throw posito{}; + } + if (isnar()) { + throw posito{}; + } + if (iszero() || isnar()) { + return *this; + } +#else + // not throwing is a quiet signalling NaR + if (rhs.iszero()) { + setnar(); + return *this; + } + if (rhs.isnar()) { + setnar(); + return *this; + } + if (iszero() || isnar()) { + return *this; + } +#endif + internal::value ratio; + internal::value a, b; + // transform the inputs into (sign,scale,fraction) triples + normalize(a); + rhs.normalize(b); + + module_divide(a, b, ratio); + + // special case handling on the output +#if POSITO_THROW_ARITHMETIC_EXCEPTION + if (ratio.iszero()) { + throw posito{}; + } + else if (ratio.isinf()) { + throw posito{}; + } + else { + convert(ratio, *this); + } +#else + if (ratio.iszero()) { + setzero(); // this shouldn't happen as we should project back onto minpos + } + else if (ratio.isinf()) { + setnar(); // this shouldn't happen as we should project back onto maxpos + } + else { + convert(ratio, *this); + } +#endif + + return *this; + } + posito& operator/=(double rhs) { + return *this /= posito(rhs); + } + + posito reciprocate() const { + if (_trace_reciprocate) std::cout << "-------------------- RECIPROCATE ----------------" << std::endl; + posito p; + // special case of NaR (Not a Real) + if (isnar()) { + p.setnar(); + return p; + } + if (iszero()) { + p.setnar(); + return p; + } + // compute the reciprocal + bool old_sign = _bits[nbits-1]; + internal::bitblock raw_bits; + if (ispowerof2()) { + raw_bits = twos_complement(_bits); + raw_bits.set(nbits-1, old_sign); + p.setBitblock(raw_bits); + } + else { + bool s{ false }; + regime r; + exponent e; + fraction f; + decode(_bits, s, r, e, f); + + constexpr unsigned operand_size = fhbits; + internal::bitblock one; + one.set(operand_size - 1, true); + internal::bitblock frac; + copy_into(f.get(), 0, frac); + frac.set(operand_size - 1, true); + constexpr unsigned reciprocal_size = 3 * fbits + 4; + internal::bitblock reciprocal; + divide_with_fraction(one, frac, reciprocal); + if (_trace_reciprocate) { + std::cout << "one " << one << std::endl; + std::cout << "frac " << frac << std::endl; + std::cout << "recip " << reciprocal << std::endl; + } + + // radix point falls at operand size == reciprocal_size - operand_size - 1 + reciprocal <<= operand_size - 1; + if (_trace_reciprocate) std::cout << "frac " << reciprocal << std::endl; + int new_scale = -scale(*this); + int msb = findMostSignificantBit(reciprocal); + if (msb > 0) { + int shift = static_cast(reciprocal_size - static_cast(msb)); + reciprocal <<= static_cast(shift); + new_scale -= (shift-1); + if (_trace_reciprocate) std::cout << "result " << reciprocal << std::endl; + } + //std::bitset tr; + //truncate(reciprocal, tr); + //std::cout << "tr " << tr << std::endl; + + // the following is failing for some reason + // value v(old_sign, new_scale, reciprocal); + // convert(v, p); + // instead the following works + convert_(old_sign, new_scale, reciprocal, p); + } + return p; + } + // absolute value is simply the 2's complement when negative + posito abs() const { + posito p; + if (isneg()) { + p.setBitblock(twos_complement(_bits)); + } + else { + p.setBitblock(_bits); + } + return p; + } + + // conversion operators + // Maybe remove explicit, MTL compiles, but we have lots of double computation then + explicit operator unsigned short() const { return to_ushort(); } + explicit operator unsigned int() const { return to_uint(); } + explicit operator unsigned long() const { return to_ulong(); } + explicit operator unsigned long long() const { return to_ulong_long(); } + explicit operator short() const { return to_short(); } + explicit operator int() const { return to_int(); } + explicit operator long() const { return to_long(); } + explicit operator long long() const { return to_long_long(); } + explicit operator float() const { return to_float(); } + explicit operator double() const { return to_double(); } + explicit operator long double() const { return to_long_double(); } + + // Selectors + bool sign() const { return _bits[nbits - 1]; } + bool isnar() const { + if (_bits[nbits - 1] == false) return false; + bitblock tmp(_bits); + tmp.reset(nbits - 1); + return tmp.none() ? true : false; + } + bool iszero() const { return _bits.none() ? true : false; } + bool isone() const { // pattern 010000.... + bitblock tmp(_bits); + tmp.set(nbits - 2, false); + return _bits[nbits - 2] & tmp.none(); + } + bool isminusone() const { // pattern 110000... + bitblock tmp(_bits); + tmp.set(nbits - 1, false); + tmp.set(nbits - 2, false); + return _bits[nbits - 1] & _bits[nbits - 2] & tmp.none(); + } + bool isneg() const { return _bits[nbits - 1]; } + bool ispos() const { return !_bits[nbits - 1]; } + bool ispowerof2() const { + bool s{ false }; + regime r; + exponent e; + fraction f; + decode(_bits, s, r, e, f); + return f.none(); + } + bool isinteger() const { return true; } // return (floor(*this) == *this) ? true : false; } + + bitblock get() const { return _bits; } + unsigned long long bits() const { return _bits.to_ullong(); } + constexpr bool test(unsigned bitIndex) const noexcept { + return (bitIndex < nbits ? _bits[bitIndex] : false); + } + constexpr bool at(unsigned bitIndex) const noexcept { + return (bitIndex < nbits ? _bits[bitIndex] : false); + } + constexpr uint8_t nibble(unsigned n) const noexcept { + uint8_t nibbleBits{ 0 }; + if (n < (1 + ((nbits - 1) >> 2))) { + unsigned baseNibbleIndex = 4 * n; + unsigned mask = 0x1; + for (unsigned i = baseNibbleIndex; i < nbits && i < baseNibbleIndex + 4; ++i) { + nibbleBits |= (test(i) ? mask : 0); + mask <<= 1; + } + } + return nibbleBits; + } + // Modifiers + constexpr void clear() { _bits.reset(); } + constexpr void setzero() { clear(); } + constexpr void setnar() { + _bits.reset(); + _bits.set(nbits - 1, true); + } + + posito& minpos() noexcept { clear(); return ++(*this); } + posito& maxpos() noexcept { setnar(); return --(*this); } + posito& zero() noexcept { clear(); return *this; } + posito& minneg() noexcept { clear(); return --(*this); } + posito& maxneg() noexcept { setnar(); return ++(*this); } + + // set the posito bits explicitely + constexpr posito& setBitblock(const bitblock& raw_bits) { + _bits = raw_bits; + return *this; + } + // Set the raw bits of the posito given an unsigned value starting from the lsb. Handy for enumerating a posito state space + constexpr posito& setbits(uint64_t value) { + clear(); + bitblock raw_bits; + uint64_t mask = 1; + for ( unsigned i = 0; i < nbits; i++ ) { + raw_bits.set(i,(value & mask)); + mask <<= 1; + } + _bits = raw_bits; + return *this; + } + + // currently, size is tied to fbits size of posito config. Is there a need for a case that captures a user-defined sized fraction? + internal::value to_value() const { + using namespace sw::universal::internal; + bool _sign{ false }; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(_bits, _sign, _regime, _exponent, _fraction); + return internal::value(_sign, _regime.scale() + _exponent.scale(), _fraction.get(), iszero(), isnar()); + } + void normalize(internal::value& v) const { + using namespace sw::universal::internal; + bool _sign{ false }; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(_bits, _sign, _regime, _exponent, _fraction); + v.set(_sign, _regime.scale() + _exponent.scale(), _fraction.get(), iszero(), isnar()); + } + template + void normalize_to(internal::value& v) const { + using namespace sw::universal::internal; + bool _sign{ false }; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(_bits, _sign, _regime, _exponent, _fraction); + bitblock _fr; + bitblock _src = _fraction.get(); + int tgt, src; + for (tgt = int(tgt_fbits) - 1, src = int(fbits) - 1; tgt >= 0 && src >= 0; tgt--, src--) _fr[tgt] = _src[src]; + v.set(_sign, _regime.scale() + _exponent.scale(), _fr, iszero(), isnar()); + } + + // step up to the next posito in a lexicographical order + void increment_posit() { + increment_bitset(_bits); + } + // step down to the previous posito in a lexicographical order + void decrement_posit() { + decrement_bitset(_bits); + } + + // return human readable type configuration for this posito + inline std::string cfg() { + std::stringstream ss; + ss << "posito<" << nbits << ", " << es << ">"; + return ss.str(); + } + +private: + internal::bitblock _bits; // raw bit representation + + // HELPER methods + + // Conversion functions +#if POSITO_THROW_ARITHMETIC_EXCEPTION + short to_short() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return short(to_float()); + } + int to_int() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return int(to_double()); + } + long to_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return long(to_long_double()); + } + long long to_long_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return (long long)(to_long_double()); + } + unsigned short to_ushort() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return (unsigned short)(to_float()); + } + unsigned int to_uint() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return (unsigned int)(to_double()); + } + unsigned long to_ulong() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return (unsigned long)(to_long_double()); + } + unsigned long long to_ulong_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return (unsigned long long)(to_long_double()); + } +#else + short to_short() const { return short(to_float()); } + int to_int() const { return int(to_double()); } + long to_long() const { return long(to_long_double()); } + long long to_long_long() const { return (long long)(to_long_double()); } + unsigned short to_ushort() const { return (unsigned short)(to_float()); } + unsigned int to_uint() const { return (unsigned int)(to_double()); } + unsigned long to_ulong() const { return (unsigned long)(to_long_double()); } + unsigned long long to_ulong_long() const { return (unsigned long long)(to_long_double()); } +#endif + float to_float() const { + return (float)to_double(); + } + double to_double() const { + if (iszero()) return 0.0; + if (isnar()) return std::numeric_limits::quiet_NaN(); + bool _sign{ false }; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(_bits, _sign, _regime, _exponent, _fraction); + double s = (_sign ? -1.0 : 1.0); + double r = double(_regime.value()); + double e = double(_exponent.value()); + double f = (1.0 + double(_fraction.value())); + return s * r * e * f; + } + long double to_long_double() const { + if (iszero()) return 0.0l; + if (isnar()) return std::numeric_limits::quiet_NaN();; + bool _sign{ false }; + regime _regime; + exponent _exponent; + fraction _fraction; + decode(_bits, _sign, _regime, _exponent, _fraction); + long double s = (_sign ? -1.0l : 1.0l); + long double r = _regime.value(); + long double e = _exponent.value(); + long double f = (1.0l + _fraction.value()); + return s * r * e * f; + } + template + constexpr posito& convert_ieee754(const T& rhs) { + constexpr int dfbits = std::numeric_limits::digits - 1; + internal::value v(static_cast(rhs)); + + // special case processing + if (v.iszero()) { + setzero(); + return *this; + } + if (v.isinf() || v.isnan()) { // posito encode for FP_INFINITE and NaN as NaR (Not a Real) + setnar(); + return *this; + } + + convert(v, *this); + return *this; + } + + // friend functions + // template parameters need names different from class template parameters (for gcc and clang) + template + friend std::ostream& operator<< (std::ostream& ostr, const posito& p); + template + friend std::istream& operator>> (std::istream& istr, posito& p); + + // posito - posito logic functions + template + friend bool operator==(const posito& lhs, const posito& rhs); + template + friend bool operator!=(const posito& lhs, const posito& rhs); + template + friend bool operator< (const posito& lhs, const posito& rhs); + template + friend bool operator> (const posito& lhs, const posito& rhs); + template + friend bool operator<=(const posito& lhs, const posito& rhs); + template + friend bool operator>=(const posito& lhs, const posito& rhs); + +#if POSITO_ENABLE_LITERALS + // posito - literal logic functions + + // posito - signed char + template + friend bool operator==(const posito& lhs, signed char rhs); + template + friend bool operator!=(const posito& lhs, signed char rhs); + template + friend bool operator< (const posito& lhs, signed char rhs); + template + friend bool operator> (const posito& lhs, signed char rhs); + template + friend bool operator<=(const posito& lhs, signed char rhs); + template + friend bool operator>=(const posito& lhs, signed char rhs); + + // posito - char + template + friend bool operator==(const posito& lhs, char rhs); + template + friend bool operator!=(const posito& lhs, char rhs); + template + friend bool operator< (const posito& lhs, char rhs); + template + friend bool operator> (const posito& lhs, char rhs); + template + friend bool operator<=(const posito& lhs, char rhs); + template + friend bool operator>=(const posito& lhs, char rhs); + + // posito - short + template + friend bool operator==(const posito& lhs, short rhs); + template + friend bool operator!=(const posito& lhs, short rhs); + template + friend bool operator< (const posito& lhs, short rhs); + template + friend bool operator> (const posito& lhs, short rhs); + template + friend bool operator<=(const posito& lhs, short rhs); + template + friend bool operator>=(const posito& lhs, short rhs); + + // posito - unsigned short + template + friend bool operator==(const posito& lhs, unsigned short rhs); + template + friend bool operator!=(const posito& lhs, unsigned short rhs); + template + friend bool operator< (const posito& lhs, unsigned short rhs); + template + friend bool operator> (const posito& lhs, unsigned short rhs); + template + friend bool operator<=(const posito& lhs, unsigned short rhs); + template + friend bool operator>=(const posito& lhs, unsigned short rhs); + + // posito - int + template + friend bool operator==(const posito& lhs, int rhs); + template + friend bool operator!=(const posito& lhs, int rhs); + template + friend bool operator< (const posito& lhs, int rhs); + template + friend bool operator> (const posito& lhs, int rhs); + template + friend bool operator<=(const posito& lhs, int rhs); + template + friend bool operator>=(const posito& lhs, int rhs); + + // posito - unsigned int + template + friend bool operator==(const posito& lhs, unsigned int rhs); + template + friend bool operator!=(const posito& lhs, unsigned int rhs); + template + friend bool operator< (const posito& lhs, unsigned int rhs); + template + friend bool operator> (const posito& lhs, unsigned int rhs); + template + friend bool operator<=(const posito& lhs, unsigned int rhs); + template + friend bool operator>=(const posito& lhs, unsigned int rhs); + + // posito - long + template + friend bool operator==(const posito& lhs, long rhs); + template + friend bool operator!=(const posito& lhs, long rhs); + template + friend bool operator< (const posito& lhs, long rhs); + template + friend bool operator> (const posito& lhs, long rhs); + template + friend bool operator<=(const posito& lhs, long rhs); + template + friend bool operator>=(const posito& lhs, long rhs); + + // posito - unsigned long long + template + friend bool operator==(const posito& lhs, unsigned long rhs); + template + friend bool operator!=(const posito& lhs, unsigned long rhs); + template + friend bool operator< (const posito& lhs, unsigned long rhs); + template + friend bool operator> (const posito& lhs, unsigned long rhs); + template + friend bool operator<=(const posito& lhs, unsigned long rhs); + template + friend bool operator>=(const posito& lhs, unsigned long rhs); + + // posito - long long + template + friend bool operator==(const posito& lhs, long long rhs); + template + friend bool operator!=(const posito& lhs, long long rhs); + template + friend bool operator< (const posito& lhs, long long rhs); + template + friend bool operator> (const posito& lhs, long long rhs); + template + friend bool operator<=(const posito& lhs, long long rhs); + template + friend bool operator>=(const posito& lhs, long long rhs); + + // posito - unsigned long long + template + friend bool operator==(const posito& lhs, unsigned long long rhs); + template + friend bool operator!=(const posito& lhs, unsigned long long rhs); + template + friend bool operator< (const posito& lhs, unsigned long long rhs); + template + friend bool operator> (const posito& lhs, unsigned long long rhs); + template + friend bool operator<=(const posito& lhs, unsigned long long rhs); + template + friend bool operator>=(const posito& lhs, unsigned long long rhs); + + // posito - float + template + friend bool operator==(const posito& lhs, float rhs); + template + friend bool operator!=(const posito& lhs, float rhs); + template + friend bool operator< (const posito& lhs, float rhs); + template + friend bool operator> (const posito& lhs, float rhs); + template + friend bool operator<=(const posito& lhs, float rhs); + template + friend bool operator>=(const posito& lhs, float rhs); + + // posito - double + template + friend bool operator==(const posito& lhs, double rhs); + template + friend bool operator!=(const posito& lhs, double rhs); + template + friend bool operator< (const posito& lhs, double rhs); + template + friend bool operator> (const posito& lhs, double rhs); + template + friend bool operator<=(const posito& lhs, double rhs); + template + friend bool operator>=(const posito& lhs, double rhs); + + // posito - long double + template + friend bool operator==(const posito& lhs, long double rhs); + template + friend bool operator!=(const posito& lhs, long double rhs); + template + friend bool operator< (const posito& lhs, long double rhs); + template + friend bool operator> (const posito& lhs, long double rhs); + template + friend bool operator<=(const posito& lhs, long double rhs); + template + friend bool operator>=(const posito& lhs, long double rhs); + + // literal - posito logic functions + + // signed char - posito + template + friend bool operator==(signed char lhs, const posito& rhs); + template + friend bool operator!=(signed char lhs, const posito& rhs); + template + friend bool operator< (signed char lhs, const posito& rhs); + template + friend bool operator> (signed char lhs, const posito& rhs); + template + friend bool operator<=(signed char lhs, const posito& rhs); + template + friend bool operator>=(signed char lhs, const posito& rhs); + + // char - posito + template + friend bool operator==(char lhs, const posito& rhs); + template + friend bool operator!=(char lhs, const posito& rhs); + template + friend bool operator< (char lhs, const posito& rhs); + template + friend bool operator> (char lhs, const posito& rhs); + template + friend bool operator<=(char lhs, const posito& rhs); + template + friend bool operator>=(char lhs, const posito& rhs); + + // short - posito + template + friend bool operator==(short lhs, const posito& rhs); + template + friend bool operator!=(short lhs, const posito& rhs); + template + friend bool operator< (short lhs, const posito& rhs); + template + friend bool operator> (short lhs, const posito& rhs); + template + friend bool operator<=(short lhs, const posito& rhs); + template + friend bool operator>=(short lhs, const posito& rhs); + + // unsigned short - posito + template + friend bool operator==(unsigned short lhs, const posito& rhs); + template + friend bool operator!=(unsigned short lhs, const posito& rhs); + template + friend bool operator< (unsigned short lhs, const posito& rhs); + template + friend bool operator> (unsigned short lhs, const posito& rhs); + template + friend bool operator<=(unsigned short lhs, const posito& rhs); + template + friend bool operator>=(unsigned short lhs, const posito& rhs); + + // int - posito + template + friend bool operator==(int lhs, const posito& rhs); + template + friend bool operator!=(int lhs, const posito& rhs); + template + friend bool operator< (int lhs, const posito& rhs); + template + friend bool operator> (int lhs, const posito& rhs); + template + friend bool operator<=(int lhs, const posito& rhs); + template + friend bool operator>=(int lhs, const posito& rhs); + + // unsigned int - posito + template + friend bool operator==(unsigned int lhs, const posito& rhs); + template + friend bool operator!=(unsigned int lhs, const posito& rhs); + template + friend bool operator< (unsigned int lhs, const posito& rhs); + template + friend bool operator> (unsigned int lhs, const posito& rhs); + template + friend bool operator<=(unsigned int lhs, const posito& rhs); + template + friend bool operator>=(unsigned int lhs, const posito& rhs); + + // long - posito + template + friend bool operator==(long lhs, const posito& rhs); + template + friend bool operator!=(long lhs, const posito& rhs); + template + friend bool operator< (long lhs, const posito& rhs); + template + friend bool operator> (long lhs, const posito& rhs); + template + friend bool operator<=(long lhs, const posito& rhs); + template + friend bool operator>=(long lhs, const posito& rhs); + + // unsigned long - posito + template + friend bool operator==(unsigned long lhs, const posito& rhs); + template + friend bool operator!=(unsigned long lhs, const posito& rhs); + template + friend bool operator< (unsigned long lhs, const posito& rhs); + template + friend bool operator> (unsigned long lhs, const posito& rhs); + template + friend bool operator<=(unsigned long lhs, const posito& rhs); + template + friend bool operator>=(unsigned long lhs, const posito& rhs); + + // long long - posito + template + friend bool operator==(long long lhs, const posito& rhs); + template + friend bool operator!=(long long lhs, const posito& rhs); + template + friend bool operator< (long long lhs, const posito& rhs); + template + friend bool operator> (long long lhs, const posito& rhs); + template + friend bool operator<=(long long lhs, const posito& rhs); + template + friend bool operator>=(long long lhs, const posito& rhs); + + // unsigned long long - posito + template + friend bool operator==(unsigned long long lhs, const posito& rhs); + template + friend bool operator!=(unsigned long long lhs, const posito& rhs); + template + friend bool operator< (unsigned long long lhs, const posito& rhs); + template + friend bool operator> (unsigned long long lhs, const posito& rhs); + template + friend bool operator<=(unsigned long long lhs, const posito& rhs); + template + friend bool operator>=(unsigned long long lhs, const posito& rhs); + + // float - posito + template + friend bool operator==(float lhs, const posito& rhs); + template + friend bool operator!=(float lhs, const posito& rhs); + template + friend bool operator< (float lhs, const posito& rhs); + template + friend bool operator> (float lhs, const posito& rhs); + template + friend bool operator<=(float lhs, const posito& rhs); + template + friend bool operator>=(float lhs, const posito& rhs); + + // double - posito + template + friend bool operator==(double lhs, const posito& rhs); + template + friend bool operator!=(double lhs, const posito& rhs); + template + friend bool operator< (double lhs, const posito& rhs); + template + friend bool operator> (double lhs, const posito& rhs); + template + friend bool operator<=(double lhs, const posito& rhs); + template + friend bool operator>=(double lhs, const posito& rhs); + + // long double - posito + template + friend bool operator==(long double lhs, const posito& rhs); + template + friend bool operator!=(long double lhs, const posito& rhs); + template + friend bool operator< (long double lhs, const posito& rhs); + template + friend bool operator> (long double lhs, const posito& rhs); + template + friend bool operator<=(long double lhs, const posito& rhs); + template + friend bool operator>=(long double lhs, const posito& rhs); + +#endif // POSITO_ENABLE_LITERALS + +}; + +////////////////// convenience/shim functions + +template +inline bool isnar(const posito& p) { + return p.isnar(); +} +template +inline bool iszero(const posito& p) { + return p.iszero(); +} +template +inline bool ispos(const posito& p) { + return p.ispos(); +} +template +inline bool isneg(const posito& p) { + return p.isneg(); +} +template +inline bool isone(const posito& p) { + return p.isone(); +} +template +inline bool isminusone(const posito& p) { + return p.isminusone(); +} +template +inline bool ispowerof2(const posito& p) { + return p.ispowerof2(); +} + +////////////////// POSITO operators + +// stream operators + +// generate a posito format ASCII format nbits.esxNN...NNp +template +inline std::ostream& operator<<(std::ostream& ostr, const posito& p) { + // to make certain that setw and left/right operators work properly + // we need to transform the posito into a string + std::stringstream ss; +#if POSITO_ERROR_FREE_IO_FORMAT + ss << nbits << '.' << es << 'x' << to_hex(p.get()) << 'p'; +#else + std::streamsize prec = ostr.precision(); + std::streamsize width = ostr.width(); + std::ios_base::fmtflags ff; + ff = ostr.flags(); + ss.flags(ff); +// ss << std::showpos << std::setw(width) << std::setprecision(prec) << (long double)p; + // TODO: how do you react to fmtflags being set, such as hexfloat or showpos? + // it appears that the fmtflags are opaque and not a user-visible feature + ss << std::setw(width) << std::setprecision(prec); + ss << to_string(p, prec); // TODO: we need a true native serialization function +#endif + return ostr << ss.str(); +} + +// read an ASCII float or posito format: nbits.esxNN...NNp, for example: 32.2x80000000p +template +inline std::istream& operator>> (std::istream& istr, posito& p) { + std::string txt; + istr >> txt; + if (!parse(txt, p)) { + std::cerr << "unable to parse -" << txt << "- into a posito value\n"; + } + return istr; +} + +// generate a posito format ASCII format nbits.esxNN...NNp +template +inline std::string hex_format(const posito& p) { + // we need to transform the posito into a string + std::stringstream ss; + ss << nbits << '.' << es << 'x' << to_hex(p.get()) << 'p'; + return ss.str(); +} + +// convert a posito value to a string using "nar" as designation of NaR +template +inline std::string to_string(const posito& p, std::streamsize precision = 17) { + if (p.isnar()) { + return std::string("nar"); + } + std::stringstream ss; + ss << std::setprecision(precision) << (long double)p; + return ss.str(); +} + +// binary representation of a posito with delimiters: i.e. 0.10.00.000000 => sign.regime.exp.fraction +template +inline std::string to_binary(const posito& number, bool nibbleMarker = false) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); // maximum number of fraction bits: derived + bool s{ false }; + regime r; + exponent e; + fraction f; + bitblock raw = number.get(); + std::stringstream ss; + extract_fields(raw, s, r, e, f); + + ss << (s ? "0b1." : "0b0."); + ss << to_string(r, false, nibbleMarker) << "." + << to_string(e, false, nibbleMarker) << "." + << to_string(f, false, nibbleMarker); + + return ss.str(); +} + +template +inline std::string to_triple(const posito& number, bool nibbleMarker = false) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); // maximum number of fraction bits: derived + bool s{ false }; + regime r; + exponent e; + fraction f; + bitblock raw = number.get(); + std::stringstream ss; + extract_fields(raw, s, r, e, f); + + ss << (s ? "(-, " : "(+, "); + ss << scale(number) + << ", " + << to_string(f, false, nibbleMarker) + << ')'; + + return ss.str(); +} + +// numerical helpers + +template +inline posito ulp(const posito& a) { + posito b(a); + return ++b - a; +} + +// binary exponent representation: i.e. 1.0101010e2^-37 +template +inline std::string to_base2_scientific(const posito& number) { + constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); // maximum number of fraction bits: derived + bool s{ false }; + scale(number); + regime r; + exponent e; + fraction f; + bitblock raw = number.get(); + std::stringstream ss; + extract_fields(raw, s, r, e, f); + ss << (s ? "-" : "+") << "1." << to_string(f, true) << "e2^" << std::showpos << r.scale() + e.scale(); + return ss.str(); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// posito - posito binary logic operators + +template +inline bool operator==(const posito& lhs, const posito& rhs) { + return lhs._bits == rhs._bits; +} +template +inline bool operator!=(const posito& lhs, const posito& rhs) { + return !operator==(lhs, rhs); +} +template +inline bool operator< (const posito& lhs, const posito& rhs) { + return twosComplementLessThan(lhs._bits, rhs._bits); +} +template +inline bool operator> (const posito& lhs, const posito& rhs) { + return operator< (rhs, lhs); +} +template +inline bool operator<=(const posito& lhs, const posito& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} +template +inline bool operator>=(const posito& lhs, const posito& rhs) { + return !operator< (lhs, rhs); +} + +// posito - posito binary arithmetic operators +// BINARY ADDITION +template +inline posito operator+(const posito& lhs, const posito& rhs) { + posito sum = lhs; + return sum += rhs; +} +// BINARY SUBTRACTION +template +inline posito operator-(const posito& lhs, const posito& rhs) { + posito diff = lhs; + return diff -= rhs; +} +// BINARY MULTIPLICATION +template +inline posito operator*(const posito& lhs, const posito& rhs) { + posito mul = lhs; + return mul *= rhs; +} +// BINARY DIVISION +template +inline posito operator/(const posito& lhs, const posito& rhs) { + posito ratio(lhs); + return ratio /= rhs; +} + +#if POSITO_ENABLE_LITERALS + +// posito - signed char logic operators +template +inline bool operator==(const posito& lhs, signed char rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, signed char rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, signed char rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, signed char rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, signed char rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, signed char rhs) { + return !operator<(lhs, posito(rhs)); +} + +// signed char - posito logic operators +template +inline bool operator==(signed char lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(signed char lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (signed char lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (signed char lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(signed char lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(signed char lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - char logic operators +template +inline bool operator==(const posito& lhs, char rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, char rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, char rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, char rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, char rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, char rhs) { + return !operator<(lhs, posito(rhs)); +} + +// char - posito logic operators +template +inline bool operator==(char lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(char lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (char lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (char lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(char lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(char lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - short logic operators +template +inline bool operator==(const posito& lhs, short rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, short rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, short rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, short rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, short rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, short rhs) { + return !operator<(lhs, posito(rhs)); +} + +// short - posito logic operators +template +inline bool operator==(short lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(short lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (short lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (short lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(short lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(short lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - unsigned short logic operators +template +inline bool operator==(const posito& lhs, unsigned short rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, unsigned short rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, unsigned short rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, unsigned short rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, unsigned short rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, unsigned short rhs) { + return !operator<(lhs, posito(rhs)); +} + +// unsigned short - posito logic operators +template +inline bool operator==(unsigned short lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(unsigned short lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (unsigned short lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (unsigned short lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(unsigned short lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(unsigned short lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - int logic operators +template +inline bool operator==(const posito& lhs, int rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, int rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, int rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, int rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, int rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, int rhs) { + return !operator<(lhs, posito(rhs)); +} + +// int - posito logic operators +template +inline bool operator==(int lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(int lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (int lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (int lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(int lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(int lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - unsigned int logic operators +template +inline bool operator==(const posito& lhs, unsigned int rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, unsigned int rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, unsigned int rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, unsigned int rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, unsigned int rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, unsigned int rhs) { + return !operator<(lhs, posito(rhs)); +} + +// unsigned int - posito logic operators +template +inline bool operator==(unsigned int lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(unsigned int lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (unsigned int lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (unsigned int lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(unsigned int lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(unsigned int lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - long logic operators +template +inline bool operator==(const posito& lhs, long rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, long rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, long rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, long rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, long rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, long rhs) { + return !operator<(lhs, posito(rhs)); +} + +// long - posito logic operators +template +inline bool operator==(long lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(long lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (long lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (long lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(long lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(long lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - unsigned long logic operators +template +inline bool operator==(const posito& lhs, unsigned long rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, unsigned long rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, unsigned long rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, unsigned long rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, unsigned long rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, unsigned long rhs) { + return !operator<(lhs, posito(rhs)); +} + +// unsigned long - posito logic operators +template +inline bool operator==(unsigned long lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(unsigned long lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (unsigned long lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (unsigned long lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(unsigned long lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(unsigned long lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - unsigned long long logic operators +template +inline bool operator==(const posito& lhs, unsigned long long rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, unsigned long long rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, unsigned long long rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, unsigned long long rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, unsigned long long rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, unsigned long long rhs) { + return !operator<(lhs, posito(rhs)); +} + +// unsigned long long - posito logic operators +template +inline bool operator==(unsigned long long lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(unsigned long long lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (unsigned long long lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (unsigned long long lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(unsigned long long lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(unsigned long long lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - long long logic operators +template +inline bool operator==(const posito& lhs, long long rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, long long rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, long long rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, long long rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, long long rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, long long rhs) { + return !operator<(lhs, posito(rhs)); +} + +// long long - posito logic operators +template +inline bool operator==(long long lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(long long lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (long long lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (long long lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(long long lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(long long lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - float logic operators +template +inline bool operator==(const posito& lhs, float rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, float rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, float rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, float rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, float rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, float rhs) { + return !operator<(lhs, posito(rhs)); +} + +// float - posito logic operators +template +inline bool operator==(float lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(float lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (float lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (float lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(float lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(float lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - double logic operators +template +inline bool operator==(const posito& lhs, double rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, double rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, double rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, double rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, double rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, double rhs) { + return !operator<(lhs, posito(rhs)); +} + +// double - posito logic operators +template +inline bool operator==(double lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(double lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (double lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (double lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(double lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(double lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// posito - long double logic operators +template +inline bool operator==(const posito& lhs, long double rhs) { + return lhs == posito(rhs); +} +template +inline bool operator!=(const posito& lhs, long double rhs) { + return !operator==(lhs, posito(rhs)); +} +template +inline bool operator< (const posito& lhs, long double rhs) { + return twosComplementLessThan(lhs._bits, posito(rhs)._bits); +} +template +inline bool operator> (const posito& lhs, long double rhs) { + return operator< (posito(rhs), lhs); +} +template +inline bool operator<=(const posito& lhs, long double rhs) { + return !operator>(lhs, posito(rhs)); +} +template +inline bool operator>=(const posito& lhs, long double rhs) { + return !operator<(lhs, posito(rhs)); +} + +// long double - posito logic operators +template +inline bool operator==(long double lhs, const posito& rhs) { + return posito(lhs) == rhs; +} +template +inline bool operator!=(long double lhs, const posito& rhs) { + return !operator==(posito(lhs), rhs); +} +template +inline bool operator< (long double lhs, const posito& rhs) { + return twosComplementLessThan(posito(lhs)._bits, rhs._bits); +} +template +inline bool operator> (long double lhs, const posito& rhs) { + return operator< (posito(lhs), rhs); +} +template +inline bool operator<=(long double lhs, const posito& rhs) { + return !operator>(posito(lhs), rhs); +} +template +inline bool operator>=(long double lhs, const posito& rhs) { + return !operator<(posito(lhs), rhs); +} + +// BINARY ADDITION +template +inline posito operator+(const posito& lhs, double rhs) { + posito sum = lhs; + sum += posito(rhs); + return sum; +} + +// More generic alternative to avoid ambiguities with intrinsic + +template > +inline posito operator+(const posito& lhs, Value rhs) { + posito sum = lhs; + sum += posito(rhs); + return sum; +} + +template +inline posito operator+(double lhs, const posito& rhs) { + posito sum(lhs); + sum += rhs; + return sum; +} + +// BINARY SUBTRACTION +template +inline posito operator-(double lhs, const posito& rhs) { + posito diff(lhs); + diff -= rhs; + return diff; +} + +// More generic alternative to avoid ambiguities with intrinsic + +template > +inline posito operator-(const posito& lhs, Value rhs) { + posito diff = lhs; + diff -= posito(rhs); + return diff; +} + +template +inline posito operator-(const posito& lhs, double rhs) { + posito diff(lhs); + diff -= posito(rhs); + return diff; +} +// BINARY MULTIPLICATION +template +inline posito operator*(double lhs, const posito& rhs) { + posito mul(lhs); + mul *= rhs; + return mul; +} + +template > +inline posito operator*(Value lhs, const posito& rhs) { + posito mul(lhs); + mul *= rhs; + return mul; +} + +template +inline posito operator*(const posito& lhs, double rhs) { + posito mul(lhs); + mul *= posito(rhs); + return mul; +} + +// BINARY DIVISION +template +inline posito operator/(double lhs, const posito& rhs) { + posito ratio(lhs); + ratio /= rhs; + return ratio; +} + +template > +inline posito operator/(Value lhs, const posito& rhs) { + posito ratio(lhs); + ratio /= rhs; + return ratio; +} + +template +inline posito operator/(const posito& lhs, double rhs) { + posito ratio(lhs); + ratio /= posito(rhs); + return ratio; +} + +template > +inline posito operator/(const posito& lhs, Value rhs) { + posito ratio(lhs); + ratio /= posito(rhs); + return ratio; +} + +#endif // POSITO_ENABLE_LITERALS + +// Magnitude of a posito (expensive as we are creating a new posito). +template +posito abs(const posito& p) { + return p.abs(); +} +template +posito fabs(const posito& v) { + posito p(v); + return p.abs(); +} + +// Atomic fused operators + +// FMA: fused multiply-add: a*b + c +template +internal::value<1 + 2 * (nbits - es)> fma(const posito& a, const posito& b, const posito& c) { + constexpr unsigned fbits = nbits - 3 - es; + constexpr unsigned fhbits = fbits + 1; // size of fraction + hidden bit + constexpr unsigned mbits = 2 * fhbits; // size of the multiplier output + constexpr unsigned abits = mbits + 4; // size of the addend + + internal::value product; + internal::value sum; + internal::value va, vb, ctmp; + + // special case handling of input arguments + if (a.isnar() || b.isnar() || c.isnar()) { + sum.setnan(); + return sum; + } + + if (a.iszero() || b.iszero()) { // product will only become non-zero if neither a and b are zero + if (c.iszero()) { + sum.setzero(); + } + else { + ctmp.set(sign(c), scale(c), extract_fraction(c), c.iszero(), c.isnar()); + sum.template right_extend(ctmp); // right-extend the c input argument and assign to sum + } + } + else { // else clause guarantees that the product is non-zero + // first, the multiply: transform the inputs into (sign,scale,fraction) triples + va.set(sign(a), scale(a), extract_fraction(a), a.iszero(), a.isnar());; + vb.set(sign(b), scale(b), extract_fraction(b), b.iszero(), b.isnar());; + + module_multiply(va, vb, product); // multiply the two inputs + + // second, the add : at this point we are guaranteed that product is non-zero and non-nar + if (c.iszero()) { + sum.template right_extend(product); // right-extend the product and assign to sum + } + else { + ctmp.set(sign(c), scale(c), extract_fraction(c), c.iszero(), c.isnar()); + internal::value vc; + vc.template right_extend(ctmp); // right-extend the c argument and assign to adder input + module_add(product, vc, sum); + } + } + + return sum; +} + +// FAM: fused add-multiply: (a + b) * c +template +internal::value<2 * (nbits - 2 - es)> fam(const posito& a, const posito& b, const posito& c) { + constexpr unsigned fbits = nbits - 3 - es; + constexpr unsigned abits = fbits + 4; // size of the addend + constexpr unsigned fhbits = fbits + 1; // size of fraction + hidden bit + constexpr unsigned mbits = 2 * fhbits; // size of the multiplier output + + internal::value va, vb; + internal::value sum, vc; + internal::value product; + + // special case + if (c.iszero()) return product; + + // first the add + if (!a.iszero() || !b.iszero()) { + // transform the inputs into (sign,scale,fraction) triples + va.set(sign(a), scale(a), extract_fraction(a), a.iszero(), a.isnar());; + vb.set(sign(b), scale(b), extract_fraction(b), b.iszero(), b.isnar());; + + module_add(va, vb, sum); // multiply the two inputs + if (sum.iszero()) return product; // product is still zero + } + // second, the multiply + vc.set(c.get_size(), scale(c), extract_fraction(c), c.iszero(), c.isnar()); + module_multiply(sum, vc, product); + return product; +} + +// FMMA: fused multiply-multiply-add: (a * b) +/- (c * d) +template +internal::value fmma(const posito& a, const posito& b, const posito& c, const posito& d, bool opIsAdd = true) +{ + // todo: implement + internal::value result; + return result; +} + +}} // namespace sw::universal + + diff --git a/include/universal/number/posito/posito_parse.hpp b/include/universal/number/posito/posito_parse.hpp new file mode 100644 index 000000000..25909405c --- /dev/null +++ b/include/universal/number/posito/posito_parse.hpp @@ -0,0 +1,63 @@ +#pragma once +// posito_parse.hpp: parsing a posito in posit format +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +namespace sw { namespace universal { + +// read a posito ASCII format and make a memory posit out of it +template +bool parse(std::string& txt, posito& p) { + bool bSuccess = false; + // check if the txt is of the native posit form: nbits.esXhexvalue + std::regex posit_regex("[\\d]+\\.[0123456789][xX][\\w]+[p]*"); + if (std::regex_match(txt, posit_regex)) { + // found a posit representation + std::string nbitsStr, esStr, bitStr; + auto it = txt.begin(); + for (; it != txt.end(); it++) { + if (*it == '.') break; + nbitsStr.append(1, *it); + } + for (it++; it != txt.end(); it++) { + if (*it == 'x' || *it == 'X') break; + esStr.append(1, *it); + } + for (it++; it != txt.end(); it++) { + if (*it == 'p') break; + bitStr.append(1, *it); + } + unsigned nbits_in = nbits; + { + std::istringstream ss(nbitsStr); + ss >> nbits_in; + } + uint64_t raw; + std::istringstream ss(bitStr); + ss >> std::hex >> raw; + //std::cout << "[" << nbitsStr << "] [" << esStr << "] [" << bitStr << "] = " << raw << std::endl; + // if not aligned, setbits takes the least significant nbits, so we need to shift to pick up the most significant nbits + if (nbits < nbits_in) { + raw >>= (nbits_in - nbits); + } + p.setbits(raw); + bSuccess = true; + } + else { + // assume it is a float/double/long double representation + std::istringstream ss(txt); + double d; + ss >> d; + p = d; + bSuccess = true; + } + return bSuccess; +} + +}} // namespace sw::universal diff --git a/include/universal/number/posito/table.hpp b/include/universal/number/posito/table.hpp new file mode 100644 index 000000000..017cf741c --- /dev/null +++ b/include/universal/number/posito/table.hpp @@ -0,0 +1,91 @@ +#pragma once +// table.hpp: generate a table of encoding and values for fixed-size arbitrary configuration posits +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. + +namespace sw { namespace universal { + +// generate a full binary representation table for a given posit configuration +template +void GeneratePositTable(std::ostream& ostr, bool csvFormat = false) { + static constexpr unsigned fbits = (es + 2 >= nbits ? 0 : nbits - 3 - es); + const unsigned size = (1 << nbits); + posito p; + if (csvFormat) { + ostr << "\"Generate Posit Lookup table for a POSIT<" << nbits << "," << es << "> in CSV format\"" << std::endl; + ostr << "#, Binary, Decoded, k, sign, scale, regime, exponent, fraction, value, posit\n"; + for (unsigned i = 0; i < size; i++) { + p.setbits(i); + bool s; + regime r; + exponent e; + fraction f; + decode(p.get(), s, r, e, f); + ostr << i << "," + << p.get() << "," + << decoded(p) << "," + << r.regime_k() << "," + << s << "," + << scale(p) << "," + << std::right << r << "," + << std::right << e << "," + << std::right << f << "," + << to_string(p, 22) << "," + << p + << '\n'; + } + ostr << std::endl; + } + else { + ostr << "Generate Posit Lookup table for a POSIT<" << nbits << "," << es << "> in TXT format" << std::endl; + + const unsigned index_column = 5; + const unsigned bin_column = 16; + const unsigned k_column = 8; + const unsigned sign_column = 8; + const unsigned scale_column = 8; + const unsigned regime_column = 16; + const unsigned exponent_column = 16; + const unsigned fraction_column = 16; + const unsigned value_column = 30; + const unsigned posit_format_column = 16; + + ostr << std::setw(index_column) << " # " + << std::setw(bin_column) << "Binary" + << std::setw(bin_column) << "Decoded" + << std::setw(k_column) << "k" + << std::setw(sign_column) << "sign" + << std::setw(scale_column) << "scale" + << std::setw(regime_column) << "regime" + << std::setw(exponent_column) << "exponent" + << std::setw(fraction_column) << "fraction" + << std::setw(value_column) << "value" + << std::setw(posit_format_column) << "posit_format" + << std::endl; + for (unsigned i = 0; i < size; i++) { + p.setbits(i); + bool s; + regime r; + exponent e; + fraction f; + decode(p.get(), s, r, e, f); + ostr << std::setw(4) << i << ": " + << std::setw(bin_column) << p.get() + << std::setw(bin_column) << decoded(p) + << std::setw(k_column) << r.regime_k() + << std::setw(sign_column) << s + << std::setw(scale_column) << scale(p) + << std::setw(regime_column) << std::right << to_string(r) + << std::setw(exponent_column) << std::right << to_string(e) + << std::setw(fraction_column) << std::right << to_string(f) + << std::setw(value_column) << to_string(p, 22) << " " + << std::setw(posit_format_column) << std::right << p + << std::endl; + } + } +} + +}} // namespace sw::universal + diff --git a/include/universal/traits/posito_traits.hpp b/include/universal/traits/posito_traits.hpp new file mode 100644 index 000000000..09ff70ebe --- /dev/null +++ b/include/universal/traits/posito_traits.hpp @@ -0,0 +1,29 @@ +#pragma once +// posito_traits.hpp : traits for positos +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +namespace sw { namespace universal { + +// define a trait for posito types +template +struct is_posito_trait + : false_type +{ +}; +template +struct is_posito_trait< sw::universal::posito > + : true_type +{ +}; + +template +constexpr bool is_posito = is_posito_trait<_Ty>::value; + +template +using enable_if_posito = std::enable_if_t, Type>; + +}} // namespace sw::universal diff --git a/static/posito/CMakeLists.txt b/static/posito/CMakeLists.txt new file mode 100644 index 000000000..cf5330c17 --- /dev/null +++ b/static/posito/CMakeLists.txt @@ -0,0 +1,3 @@ +file (GLOB API_SRC "./api/*.cpp") + +compile_all("true" "posito" "Number Systems/static/floating-point/tapered/posito/api" "${API_SRC}") diff --git a/static/posito/api/api.cpp b/static/posito/api/api.cpp new file mode 100644 index 000000000..9d8018a92 --- /dev/null +++ b/static/posito/api/api.cpp @@ -0,0 +1,340 @@ +// api.cpp: class interface tests for arbitrary configuration posito types +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include + +// Configure the posito template environment +// enable/disable arithmetic exceptions +#define POSITO_THROW_ARITHMETIC_EXCEPTION 1 +// enable support for native literals in logic and arithmetic operations +#define POSITO_ENABLE_LITERALS 1 +// enable/disable error-free serialization I/O +#define POSITO_ERROR_FREE_IO_FORMAT 0 +// minimum set of include files to reflect source code dependencies +#include +#include + +int main() +try { + using namespace sw::universal; + + int nrOfFailedTestCases = 0; + + std::cout << "posito class interface tests\n"; + + ///////////////////////////////////////////////////////////////////////////////////// + //// posito construction, initialization, assignment and comparisions + + std::cout << "*** posito construction, initialization, assignment, and comparisons\n"; + { + // maxpos of a posito<8,0> = 64 + posito<8, 0> a(-64), b(128), c(64), d(-64); + // b initialized to -128 in saturating arithmetic becomes -64 + if (0 != (c + d)) ++nrOfFailedTestCases; //cout << to_binary(c + d) << endl; + if (a != -b) ++nrOfFailedTestCases; + + if (a != (d - 32)) ++nrOfFailedTestCases; // saturating to maxneg + if (a != (d - 0.5)) ++nrOfFailedTestCases; // saturating to maxneg + std::cout << to_binary(a) << " : " << a << '\n'; + std::cout << to_binary(b) << " : " << b << '\n'; + std::cout << to_binary(c) << " : " << c << '\n'; + std::cout << to_binary(d) << " : " << d << '\n'; + } + + std::cout << "*** type tag to identify the type without having to depend on demangle\n"; + { + using Posit = posito<16, 2>; + Posit a{ 0 }; + std::cout << "type identifier : " << type_tag(a) << '\n'; + std::cout << "standard posito : " << type_tag(posito< 8, 2>()) << '\n'; + std::cout << "standard posito : " << type_tag(posito< 16, 2>()) << '\n'; + std::cout << "standard posito : " << type_tag(posito< 32, 2>()) << '\n'; + std::cout << "standard posito : " << type_tag(posito< 64, 2>()) << '\n'; + std::cout << "standard posito : " << type_tag(posito<128, 2>()) << '\n'; + std::cout << "standard posito : " << type_tag(posito<256, 2>()) << '\n'; + } + + std::cout << "*** special cases\n"; + { + using Posit = posito<8, 0>; + Posit a; + a.setnar(); ReportValue(a, "NaR : "); + a.maxpos(); ReportValue(a, "maxpos : "); + a = 1; ReportValue(a, " 1 : "); + a.minpos(); ReportValue(a, "minpos : "); + a.setzero(); ReportValue(a, "zero : "); + a.minneg(); ReportValue(a, "minneg : "); + a = -1; ReportValue(a, " -1 : "); + a.maxneg(); ReportValue(a, "maxneg : "); + } + + std::cout << "*** binary, color, and value printing\n"; + { + using Posit = posito<5, 1>; + Posit a; + for (unsigned i = 0; i < 32; ++i) { + a.setbits(i); + std::cout << to_binary(a) << " : " << color_print(a) << " : " << a << '\n'; + } + } + + std::cout << "*** pretty and info printing\n"; + { + using Posit = posito<5, 1>; + Posit a; + for (unsigned i = 0; i < 32; ++i) { + a.setbits(i); + std::cout << std::left << std::setw(30) << pretty_print(a) << " : " << info_print(a) << '\n'; + } + } + + ///////////////////////////////////////////////////////////////////////////////////// + //// improving efficiency for positos through explicit BlockType specification + + { + int start = nrOfFailedTestCases; + + posito<16, 2> a{ 0 }, b{ -0.984375f }, c{ 0.984375 }, d{ -0.984375 }; + if (a != (c + d)) ++nrOfFailedTestCases; + if (a != (-b - c)) ++nrOfFailedTestCases; + // cout << to_binary(a, true) << ' ' << to_binary(b, true) << ' ' << to_binary(c, true) << ' ' << to_binary(d, true) << endl; + if (nrOfFailedTestCases - start > 0) { + std::cout << "FAIL : construction " << to_binary(a) << ' ' << to_binary(b) << ' ' << to_binary(c) << ' ' << to_binary(d) << '\n'; + std::cout << a << ' ' << b << ' ' << c << ' ' << d << '\n'; + } + } +#undef LATER +#ifdef LATER + ///////////////////////////////////////////////////////////////////////////////////// + // selectors + + { + int start = nrOfFailedTestCases; + + if (nrOfFailedTestCases - start > 0) { + cout << "FAIL : selectors\n"; + } + } + + ///////////////////////////////////////////////////////////////////////////////////// + // modifiers + + { + int start = nrOfFailedTestCases; + // state/bit management + + if (nrOfFailedTestCases - start > 0) { + cout << "FAIL : modifiers\n"; + } + } + + ///////////////////////////////////////////////////////////////////////////// + // complements + { + int start = nrOfFailedTestCases; + + if (nrOfFailedTestCases - start > 0) { + cout << "FAIL : complements 1\n"; + } + } + + //////////////////////////////////////////////////////////////////////////////////// + // parsing of text input + { + /* TODO: implement parse + constexpr size_t nbits = 128; + constexpr size_t rbits = 64; + parse a, b, c, d; + a.assign("123456789.987654321"); + parse("123456789.987654321", b); + */ + } + + /////////////////////////////////////////////////////////////////////////////////// + // arithmetic + { + int start = nrOfFailedTestCases; + constexpr size_t nbits = 16; + constexpr size_t es = 8; + posito a, b, c, d; + maxpos(a); + maxneg(b); + minpos(c); + minneg(d); + if ((c + d) != 0) ++nrOfFailedTestCases; + + if ((a + c) != b) ++nrOfFailedTestCases; + if (nrOfFailedTestCases - start > 0) { + cout << "FAIL: min/max\n"; + cout << to_binary(c + d) << " vs " << to_binary(posito(0)) << endl; + cout << to_binary(a + c) << " vs " << to_binary(b) << endl; + } + } + + /////////////////////////////////////////////////////////////////////////////////// + // logic, in particular, all the literal constant combinations + { + int start = nrOfFailedTestCases; + constexpr size_t nbits = 8; + constexpr size_t es = 4; + posito a, b, c, d; + a = 1; + b = 2l; + c = 3ll; + d = 0ull; + // unsigned literals + if (a != 1u) ++nrOfFailedTestCases; + if (b != 2ul) ++nrOfFailedTestCases; + if (c != 3ull) ++nrOfFailedTestCases; + if (1u != a) ++nrOfFailedTestCases; + if (2ul != b) ++nrOfFailedTestCases; + if (3ull != c) ++nrOfFailedTestCases; + if (d != c - b - a) ++nrOfFailedTestCases; + // signed literals + if (-a != -1) ++nrOfFailedTestCases; + if (-b != -2l) ++nrOfFailedTestCases; + if (-c != -3ll) ++nrOfFailedTestCases; + if (-1 != -a) ++nrOfFailedTestCases; + if (-2l != -b) ++nrOfFailedTestCases; + if (-3ll != -c) ++nrOfFailedTestCases; + + // less than unsigned literal + d = 4.0f; + if (d < 1u) ++nrOfFailedTestCases; + if (d < 2ul) ++nrOfFailedTestCases; + if (d < 3ull) ++nrOfFailedTestCases; + d = 0.0; + if (1u < d) ++nrOfFailedTestCases; + if (2ul < d) ++nrOfFailedTestCases; + if (3ull < d) ++nrOfFailedTestCases; + + // greater than unsigned literal + if (d > 1u) ++nrOfFailedTestCases; + if (d > 2ul) ++nrOfFailedTestCases; + if (d > 3ull) ++nrOfFailedTestCases; + d = 4ll; + if (1u > d) ++nrOfFailedTestCases; + if (2ul > d) ++nrOfFailedTestCases; + if (3ull > d) ++nrOfFailedTestCases; + + // less than or equal unsigned literal + if (d <= 1u) ++nrOfFailedTestCases; + if (d <= 2ul) ++nrOfFailedTestCases; + if (d <= 3ull) ++nrOfFailedTestCases; + d = 0.0f; + if (1u <= d) ++nrOfFailedTestCases; + if (2ul <= d) ++nrOfFailedTestCases; + if (3ull <= d) ++nrOfFailedTestCases; + + // greater than or equal unsigned literal + if (d >= 1u) ++nrOfFailedTestCases; + if (d >= 2ul) ++nrOfFailedTestCases; + if (d >= 3ull) ++nrOfFailedTestCases; + d = 4.0; + if (1u >= d) ++nrOfFailedTestCases; + if (2ul >= d) ++nrOfFailedTestCases; + if (3ull >= d) ++nrOfFailedTestCases; + + // comparisons with signed literals + // less than signed literal + d = 4.0f; + if (d < 1) ++nrOfFailedTestCases; + if (d < 2l) ++nrOfFailedTestCases; + if (d < 3ll) ++nrOfFailedTestCases; + d = 0.0; + if (1 < d) ++nrOfFailedTestCases; + if (2l < d) ++nrOfFailedTestCases; + if (3ll < d) ++nrOfFailedTestCases; + + // greater than signed literal + if (d > 1) ++nrOfFailedTestCases; + if (d > 2l) ++nrOfFailedTestCases; + if (d > 3ll) ++nrOfFailedTestCases; + d = 4ll; + if (1 > d) ++nrOfFailedTestCases; + if (2l > d) ++nrOfFailedTestCases; + if (3ll > d) ++nrOfFailedTestCases; + + // less than or equal signed literal + if (d <= 1) ++nrOfFailedTestCases; + if (d <= 2l) ++nrOfFailedTestCases; + if (d <= 3ll) ++nrOfFailedTestCases; + d = 0.0f; + if (1 <= d) ++nrOfFailedTestCases; + if (2l <= d) ++nrOfFailedTestCases; + if (3ll <= d) ++nrOfFailedTestCases; + + // greater than or equal signed literal + if (d >= 1) ++nrOfFailedTestCases; + if (d >= 2l) ++nrOfFailedTestCases; + if (d >= 3ll) ++nrOfFailedTestCases; + d = 4.0; + if (1 >= d) ++nrOfFailedTestCases; + if (2l >= d) ++nrOfFailedTestCases; + if (3ll >= d) ++nrOfFailedTestCases; + if (nrOfFailedTestCases - start > 0) { + cout << "FAIL: logic operators\n"; + } + } + +#ifdef SHOW_STATE_SPACE + { + constexpr size_t nbits = 7; + constexpr size_t es = 4; + constexpr size_t NR_VALUES = (1 << nbits); + + posito a, b, c, d; + for (size_t i = 0; i < NR_VALUES; ++i) { + a.set_raw_bits(i); + float f = float(a); + b = int(f); + c = f; + d = double(a); + if (a != c && a != d) ++nrOfFailedTestCases; + cout << setw(3) << i << ' ' << to_binary(a) << ' ' << setw(10) << a << ' ' << setw(3) << int(f) << ' ' << to_binary(b) << ' ' << b << ' ' << to_binary(c) << ' ' << to_binary(d) << endl; + } + } + + { + constexpr size_t nbits = 8; + constexpr size_t es = 4; + posito a, b, c, d; + + for (int i = -16; i < 16; ++i) { + a = i; + cout << to_binary(i) << ' ' << a << ' ' << to_binary(a) << ' ' << to_binary(-a) << ' ' << -a << ' ' << to_binary(-i) << endl; + } + } +#endif // SHOW_STATE_SPACE +#endif // LATER + + if (nrOfFailedTestCases > 0) { + std::cout << "FAIL\n"; + } + else { + std::cout << "PASS\n"; + } + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); +} +catch (char const* msg) { + std::cerr << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::posito_arithmetic_exception& err) { + std::cerr << "Uncaught posito arithmetic exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::posito_internal_exception& err) { + std::cerr << "Uncaught posito internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Uncaught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/tools/cmake/summary.cmake b/tools/cmake/summary.cmake index b10cb00ba..0d75f3d50 100644 --- a/tools/cmake/summary.cmake +++ b/tools/cmake/summary.cmake @@ -128,6 +128,7 @@ function(universal_print_configuration_summary) universal_status(" BUILD_NUMBER_UNUM1S : ${BUILD_NUMBER_UNUM1S}") universal_status(" BUILD_NUMBER_UNUM2S : ${BUILD_NUMBER_UNUM2S}") universal_status(" BUILD_NUMBER_POSITS : ${BUILD_NUMBER_POSITS}") + universal_status(" BUILD_NUMBER_POSITOS : ${BUILD_NUMBER_POSITOS}") universal_status(" BUILD_NUMBER_VALIDS : ${BUILD_NUMBER_VALIDS}") universal_status(" BUILD_NUMBER_LNS : ${BUILD_NUMBER_LNS}") universal_status(" BUILD_NUMBER_DBNS : ${BUILD_NUMBER_DBNS}")