From 55435aa40cdc8811acadb252fcee1251e5eafd78 Mon Sep 17 00:00:00 2001 From: Mantas Mikaitis Date: Mon, 29 Apr 2024 11:57:37 +0100 Subject: [PATCH] Allow customizable emin instead of default emin=1-emax This allows to make fp8-e4m3 fully OCP compliant, where it is specified with emax = 8 and emin = -6. --- mex/cpfloat.c | 1 + src/cpfloat_definitions.h | 13 +++++++++++++ src/cpfloat_template.h | 14 +++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/mex/cpfloat.c b/mex/cpfloat.c index f843956..2dc10d5 100644 --- a/mex/cpfloat.c +++ b/mex/cpfloat.c @@ -78,6 +78,7 @@ void mexFunction(int nlhs, !strcmp(fpopts->format, "E4M3")) { fpopts->precision = 4; fpopts->emax = 8; + fpopts->emin = -6; } else if (!strcmp(fpopts->format, "q52") || !strcmp(fpopts->format, "fp8-e5m2") || !strcmp(fpopts->format, "E5M2")) { diff --git a/src/cpfloat_definitions.h b/src/cpfloat_definitions.h index d49c817..ab9e94e 100644 --- a/src/cpfloat_definitions.h +++ b/src/cpfloat_definitions.h @@ -202,6 +202,19 @@ typedef struct { * exponent is larger than the maximum allowed by the storage format. */ cpfloat_exponent_t emax; + /** + * @brief Minimum exponent of target format. + * + * @details The minimum values allowed are -126 and -1022 if the storage format + * is `float` or `double`, respectively. Smaller values are increase to the + * minimum allowed value without warning. This field is ignored unless + * `explim` is set to `CPFLOAT_EXPRANGE_TARG`. + * + * The validation functions cpfloatf_validate_optstruct() and + * cpfloat_validate_optstruct() return an error code if the required minimum + * exponent is smaller than the minimum allowed by the storage format. + */ + cpfloat_exponent_t emin; /** * @brief Support for subnormal numbers in target format. * diff --git a/src/cpfloat_template.h b/src/cpfloat_template.h index 1b12db0..91734d9 100644 --- a/src/cpfloat_template.h +++ b/src/cpfloat_template.h @@ -106,6 +106,7 @@ optstruct *init_optstruct() { fpopts->bitseed = NULL; fpopts->randseedf = NULL; fpopts->randseed = NULL; + fpopts->emin = -99999; return fpopts; } @@ -279,6 +280,10 @@ static inline int VALIDATE_INPUT(const optstruct *fpopts) { if (fpopts->flip != CPFLOAT_NO_SOFTERR && (fpopts->p > 1 || fpopts->p < 0)) return 5; + /* Return -6 if emin is invalid (either nonnegative or too small). */ + if (fpopts->emin < DEFEMIN || fpopts->emin >= 0) + return -6; + /* Return 0 or warning value. */ return retval; } @@ -304,7 +309,14 @@ static inline FPPARAMS COMPUTE_GLOBAL_PARAMS(const optstruct *fpopts, } /* Derived floating point parameters. */ - int emin = 1-emax; + int emin = fpopts->emin; + /* If emin is not set by user, set it to the default 1-emax. */ + if (emin == -99999) + emin = 1-emax; + if (emin < DEFEMIN) { + emax = DEFEMIN; + *retval = -6; + } FPTYPE xmin = ldexp(1., emin); /* Smallest pos. normal. */ FPTYPE xmins = ldexp(1., emin-precision+1); /* Smallest pos. subnormal. */ FPTYPE ftzthreshold = (fpopts->subnormal == CPFLOAT_SUBN_USE) ? xmins : xmin;