From e08743d9771c96afa6dce3d2326aab3554db6337 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 12 Jul 2023 23:02:36 +0200 Subject: [PATCH 1/6] Update to use safe scaling algorithm from Reference-LAPACK PR 527 --- interface/rotg.c | 62 ++++++++++++++++++++++++++++++++++------------- interface/zrotg.c | 40 ++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/interface/rotg.c b/interface/rotg.c index 69443a5a06..3ccf4f7ebe 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -1,9 +1,11 @@ #include +#include #include "common.h" #ifdef FUNCTION_PROFILE #include "functable.h" #endif + #ifndef CBLAS void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ @@ -14,17 +16,27 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ #endif +#ifdef DOUBLE + long double safmin = DBL_MIN; +#else + long double safmin = FLT_MIN; +#endif + #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da = *DA; long double db = *DB; long double c; long double s; - long double r, roe, z; + long double r, z; + long double sigma, dascal,dbscal; long double ada = fabsl(da); long double adb = fabsl(db); - long double scale = ada + adb; + long double maxab = MAX(ada,adb); + long double safmax; + long double scale; + #ifndef CBLAS PRINT_DEBUG_NAME; @@ -32,17 +44,25 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ PRINT_DEBUG_CNAME; #endif - roe = db; - if (ada > adb) roe = da; - - if (scale == ZERO) { + if (adb == ZERO) { *C = ONE; *S = ZERO; - *DA = ZERO; *DB = ZERO; + } else if (ada == ZERO) { + *C = ZERO; + *S = ONE; + *DA = *DB; + *DB = ONE; } else { - r = sqrt(da * da + db * db); - if (roe < 0) r = -r; + safmax = 1./safmin; + scale = MIN(MAX(safmin,maxab), safmax); + if (ada > adb) + sigma = copysign(1.,da); + else + sigma = copysign(1.,db); + dascal = da / scale; + dbscal = db / scale; + r = sigma * (scale * sqrt(dascal * dascal + dbscal * dbscal)); c = da / r; s = db / r; z = ONE; @@ -65,11 +85,22 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ FLOAT db = *DB; FLOAT c = *C; FLOAT s = *S; - FLOAT r, roe, z; + FLOAT sigma; + FLOAT r, z; FLOAT ada = fabs(da); FLOAT adb = fabs(db); - FLOAT scale = ada + adb; + FLOAT maxab = MAX(ada,adb); + long double safmax ; + FLOAT scale ; + + safmax = 1./safmin; + scale = MIN(MAX(safmin,maxab), safmax); + + if (ada > adb) + sigma = sign(1.,da); + else + sigma = sign(1.,db); #ifndef CBLAS PRINT_DEBUG_NAME; @@ -77,20 +108,17 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ PRINT_DEBUG_CNAME; #endif - roe = db; - if (ada > adb) roe = da; - if (scale == ZERO) { + if (adb == ZERO) { *C = ONE; *S = ZERO; - *DA = ZERO; + DA = ZERO; *DB = ZERO; } else { FLOAT aa = da / scale; FLOAT bb = db / scale; - r = scale * sqrt(aa * aa + bb * bb); - if (roe < 0) r = -r; + r = sigma * scale * sqrt(aa * aa + bb * bb); c = da / r; s = db / r; z = ONE; diff --git a/interface/zrotg.c b/interface/zrotg.c index 123f4da85b..7c4e2ed08a 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -1,9 +1,11 @@ #include +#include #include "common.h" #ifdef FUNCTION_PROFILE #include "functable.h" #endif + #ifndef CBLAS void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ @@ -14,6 +16,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FLOAT *S = (FLOAT*) VS; #endif /* CBLAS */ +#ifdef DOUBLE + long double safmin = DBL_MIN; +#else + long double safmin = FLT_MIN; +#endif + #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da_r = *(DA + 0); @@ -23,6 +31,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { long double r; long double ada = fabsl(da_r) + fabsl(da_i); + long double adb = sqrt(db_r * db_r + db_i * db_i); PRINT_DEBUG_NAME; @@ -38,10 +47,24 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { *(DA + 1) = db_i; } else { long double alpha_r, alpha_i; + long double safmax = 1./safmin; + long double sigma; + long double maxab = MAX(ada,adb); + long double scale = MIN(MAX(safmin,maxab), safmax); - ada = sqrt(da_r * da_r + da_i * da_i); - r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i); + long double aa_r = da_r / scale; + long double aa_i = da_i / scale; + long double bb_r = db_r / scale; + long double bb_i = db_i / scale; + + if (ada > adb) + sigma = copysign(1.,da_r); + else + sigma = copysign(1.,db_r); + + r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); + alpha_r = da_r / ada; alpha_i = da_i / ada; @@ -60,7 +83,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FLOAT r; FLOAT ada = fabs(da_r) + fabs(da_i); - FLOAT adb; + FLOAT ada = fabs(db_r) + fabs(db_i); PRINT_DEBUG_NAME; @@ -75,6 +98,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { *(DA + 0) = db_r; *(DA + 1) = db_i; } else { + long double safmax = 1./safmin; FLOAT scale; FLOAT aa_r, aa_i, bb_r, bb_i; FLOAT alpha_r, alpha_i; @@ -108,14 +132,20 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { scale = (bb_i / bb_r); adb = bb_r * sqrt(ONE + scale * scale); } - scale = ada + adb; + FLOAT maxab = MAX(ada,adb); + scale = MIN(MAX(safmin,maxab), safmax); aa_r = da_r / scale; aa_i = da_i / scale; bb_r = db_r / scale; bb_i = db_i / scale; - r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); + if (ada > adb) + sigma = copysign(1.,da_r); + else + sigma = copysign(1.,db_r); + + r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); alpha_r = da_r / ada; alpha_i = da_i / ada; From 0f2ce93904272255a0e0a60d8d296ab5974ef235 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 13 Jul 2023 10:56:59 +0200 Subject: [PATCH 2/6] typo fix --- interface/rotg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/rotg.c b/interface/rotg.c index 3ccf4f7ebe..530dce16ae 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -112,7 +112,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ if (adb == ZERO) { *C = ONE; *S = ZERO; - DA = ZERO; + *DA = ZERO; *DB = ZERO; } else { FLOAT aa = da / scale; From 7c75c8b2fe3fc4b2e6c5c09f63d7ce2b8d5b5c18 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 13 Jul 2023 21:40:12 +0200 Subject: [PATCH 3/6] fix truncated edit --- interface/rotg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/interface/rotg.c b/interface/rotg.c index 530dce16ae..8d6df531ab 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -112,8 +112,12 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ if (adb == ZERO) { *C = ONE; *S = ZERO; - *DA = ZERO; *DB = ZERO; + else if (ada == ZERO) { + *C = ZERO; + *S = ONE; + *DA = *DB; + *DB = ONE; } else { FLOAT aa = da / scale; FLOAT bb = db / scale; From 5e1103b8d7cd3ff60dc7ebddbcc8f47c9c927f98 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 13 Jul 2023 23:35:38 +0200 Subject: [PATCH 4/6] Update rotg.c --- interface/rotg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/interface/rotg.c b/interface/rotg.c index 8d6df531ab..8d40d9c53c 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -98,9 +98,9 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ scale = MIN(MAX(safmin,maxab), safmax); if (ada > adb) - sigma = sign(1.,da); + sigma = copysign(1.,da); else - sigma = sign(1.,db); + sigma = copysign(1.,db); #ifndef CBLAS PRINT_DEBUG_NAME; @@ -113,7 +113,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ *C = ONE; *S = ZERO; *DB = ZERO; - else if (ada == ZERO) { + } else if (ada == ZERO) { *C = ZERO; *S = ONE; *DA = *DB; From 04cdf5efb4a13010874bed854df4264a5730819e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 14 Jul 2023 00:05:00 +0200 Subject: [PATCH 5/6] fix typo and missing declaration --- interface/zrotg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/zrotg.c b/interface/zrotg.c index 7c4e2ed08a..dd765f05f0 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -83,7 +83,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FLOAT r; FLOAT ada = fabs(da_r) + fabs(da_i); - FLOAT ada = fabs(db_r) + fabs(db_i); + FLOAT adb = fabs(db_r) + fabs(db_i); PRINT_DEBUG_NAME; @@ -99,7 +99,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { *(DA + 1) = db_i; } else { long double safmax = 1./safmin; - FLOAT scale; + FLOAT scale, sigma; FLOAT aa_r, aa_i, bb_r, bb_i; FLOAT alpha_r, alpha_i; From a2a184572ce12d5e361a974328281d5243323de8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 31 Aug 2023 23:42:12 +0200 Subject: [PATCH 6/6] update zrotg --- interface/zrotg.c | 271 ++++++++++++++++++++++++---------------------- 1 file changed, 143 insertions(+), 128 deletions(-) diff --git a/interface/zrotg.c b/interface/zrotg.c index dd765f05f0..af6f85c1ca 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -18,20 +18,26 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { #ifdef DOUBLE long double safmin = DBL_MIN; + long double rtmin = sqrt(DBL_MIN/DBL_EPSILON); #else long double safmin = FLT_MIN; + long double rtmin = sqrt(FLT_MIN/FLT_EPSILON); #endif -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) - long double da_r = *(DA + 0); - long double da_i = *(DA + 1); - long double db_r = *(DB + 0); - long double db_i = *(DB + 1); - long double r; + FLOAT da_r = *(DA+0); + FLOAT da_i = *(DA+1); + FLOAT db_r = *(DB+0); + FLOAT db_i = *(DB+1); + //long double r; + FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT)); + FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT)); + long double d; - long double ada = fabsl(da_r) + fabsl(da_i); - long double adb = sqrt(db_r * db_r + db_i * db_i); + FLOAT ada = da_r * da_r + da_i * da_i; + FLOAT adb = db_r * db_r + db_i * db_i; + FLOAT adart = sqrt( da_r * da_r + da_i * da_i); + FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i); PRINT_DEBUG_NAME; @@ -39,128 +45,137 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FUNCTION_PROFILE_START(); - if (ada == ZERO) { - *C = ZERO; - *(S + 0) = ONE; + if (db_r == ZERO && db_i == ZERO) { + *C = ONE; + *(S + 0) = ZERO; *(S + 1) = ZERO; - *(DA + 0) = db_r; - *(DA + 1) = db_i; - } else { - long double alpha_r, alpha_i; - long double safmax = 1./safmin; - long double sigma; - long double maxab = MAX(ada,adb); - long double scale = MIN(MAX(safmin,maxab), safmax); - - - long double aa_r = da_r / scale; - long double aa_i = da_i / scale; - long double bb_r = db_r / scale; - long double bb_i = db_i / scale; - - if (ada > adb) - sigma = copysign(1.,da_r); - else - sigma = copysign(1.,db_r); - - r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); - - - alpha_r = da_r / ada; - alpha_i = da_i / ada; - - *(C + 0) = ada / r; - *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; - *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; - *(DA + 0) = alpha_r * r; - *(DA + 1) = alpha_i * r; + return; } -#else - FLOAT da_r = *(DA + 0); - FLOAT da_i = *(DA + 1); - FLOAT db_r = *(DB + 0); - FLOAT db_i = *(DB + 1); - FLOAT r; - - FLOAT ada = fabs(da_r) + fabs(da_i); - FLOAT adb = fabs(db_r) + fabs(db_i); - - PRINT_DEBUG_NAME; - - IDEBUG_START; - - FUNCTION_PROFILE_START(); - - if (ada == ZERO) { - *C = ZERO; - *(S + 0) = ONE; - *(S + 1) = ZERO; - *(DA + 0) = db_r; - *(DA + 1) = db_i; - } else { - long double safmax = 1./safmin; - FLOAT scale, sigma; - FLOAT aa_r, aa_i, bb_r, bb_i; - FLOAT alpha_r, alpha_i; - - aa_r = fabs(da_r); - aa_i = fabs(da_i); - - if (aa_i > aa_r) { - aa_r = fabs(da_i); - aa_i = fabs(da_r); - } - - if (aa_r == ZERO) { - ada = 0.; - } else { - scale = (aa_i / aa_r); - ada = aa_r * sqrt(ONE + scale * scale); - } - - bb_r = fabs(db_r); - bb_i = fabs(db_i); - - if (bb_i > bb_r) { - bb_r = fabs(bb_i); - bb_i = fabs(bb_r); - } - - if (bb_r == ZERO) { - adb = 0.; - } else { - scale = (bb_i / bb_r); - adb = bb_r * sqrt(ONE + scale * scale); - } - FLOAT maxab = MAX(ada,adb); - scale = MIN(MAX(safmin,maxab), safmax); - - aa_r = da_r / scale; - aa_i = da_i / scale; - bb_r = db_r / scale; - bb_i = db_i / scale; - if (ada > adb) - sigma = copysign(1.,da_r); - else - sigma = copysign(1.,db_r); - - r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); - - alpha_r = da_r / ada; - alpha_i = da_i / ada; - - *(C + 0) = ada / r; - *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; - *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; - *(DA + 0) = alpha_r * r; - *(DA + 1) = alpha_i * r; - } + long double safmax = 1./safmin; +#if defined DOUBLE + long double rtmax = safmax /DBL_EPSILON; +#else + long double rtmax = safmax /FLT_EPSILON; #endif - - FUNCTION_PROFILE_END(4, 4, 4); - - IDEBUG_END; - - return; + *(S1 + 0) = *(DB + 0); + *(S1 + 1) = *(DB + 1) *-1; + if (da_r == ZERO && da_i == ZERO) { + *C = ZERO; + if (db_r == ZERO) { + (*DA) = fabsl(db_i); + *S = *S1 /da_r; + *(S+1) = *(S1+1) /da_r; + return; + } else if ( db_i == ZERO) { + *DA = fabsl(db_r); + *S = *S1 /da_r; + *(S+1) = *(S1+1) /da_r; + return; + } else { + long double g1 = MAX( fabsl(db_r), fabsl(db_i)); + rtmax =sqrt(safmax/2.); + if (g1 > rtmin && g1 < rtmax) { // unscaled + d = sqrt(adb); + *S = *S1 /d; + *(S+1) = *(S1+1) /d; + *DA = d ; + *(DA+1) = ZERO; + return; + } else { // scaled algorithm + long double u = MIN ( safmax, MAX ( safmin, g1)); + FLOAT gs_r = db_r/u; + FLOAT gs_i = db_i/u; + d = sqrt ( gs_r*gs_r + gs_i*gs_i); + *S = gs_r / d; + *(S + 1) = (gs_i * -1) / d; + *DA = d * u; + *(DA+1) = ZERO; + return; + } + } + } else { + FLOAT f1 = MAX ( fabsl(da_r), fabsl(da_i)); + FLOAT g1 = MAX ( fabsl(db_r), fabsl(db_i)); + rtmax = sqrt(safmax / 4.); + if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax) { //unscaled + long double h = ada + adb; + double adahsq = sqrt(ada * h); + if (ada >= h *safmin) { + *C = sqrt(ada/h); + *R = *DA / *C; + *(R+1) = *(DA+1) / *(C+1); + rtmax *= 2.; + if ( ada > rtmin && h < rtmax) { // no risk of intermediate overflow + *S = *S1 * (*DA / adahsq) - *(S1+1)* (*(DA+1)/adahsq); + *(S+1) = *S1 * (*(DA+1) / adahsq) + *(S1+1) * (*DA/adahsq); + } else { + *S = *S1 * (*R/h) - *(S1+1) * (*(R+1)/h); + *(S+1) = *S1 * (*(R+1)/h) + *(S1+1) * (*(R)/h); + } + } else { + *C = ada / adahsq; + if (*C >= safmin) + *R = *DA / *C; + else + *R = *DA * (h / adahsq); + *S = *S1 * ada / adahsq; + *(S+1) = *(S1+1) * ada / adahsq; + } + *DA=*R; + *(DA+1)=*(R+1); + return; + } else { // scaled + FLOAT fs_r, fs_i, gs_r, gs_i; + long double v,w,f2,g2,h; + long double u = MIN ( safmax, MAX ( safmin, MAX(f1,g1))); + gs_r = db_r/u; + gs_i = db_i/u; + g2 = sqrt ( gs_r*gs_r + gs_i*gs_i); + if (f1 /u < rtmin) { + v = MIN (safmax, MAX (safmin, f1)); + w = v / u; + fs_r = *DA/ v; + fs_i = *(DA+1) / v; + f2 = sqrt ( fs_r*fs_r + fs_i*fs_i); + h = f2 * w * w + g2; + } else { // use same scaling for both + w = 1.; + fs_r = *DA/ u; + fs_i = *(DA+1) / u; + f2 = sqrt ( fs_r*fs_r + fs_i*fs_i); + h = f2 + g2; + } + if ( f2 >= h * safmin) { + *C = sqrt ( f2 / h ); + *DA = fs_r / *C; + *(DA+1) = fs_i / *C; + rtmax *= 2; + if ( f2 > rtmin && h < rtmax) { + *S = gs_r * (fs_r /sqrt(f2*h)) - gs_i * (fs_i / sqrt(f2*h)); + *(S+1) = gs_r * (fs_i /sqrt(f2*h)) + gs_i * -1. * (fs_r / sqrt(f2*h)); + } else { + *S = gs_r * (*DA/h) - gs_i * (*(DA+1) / h); + *(S+1) = gs_r * (*(DA+1) /h) + gs_i * -1. * (*DA / h); + } + } else { // intermediates might overflow + d = sqrt ( f2 * h); + *C = f2 /d; + if (*C >= safmin) { + *DA = fs_r / *C; + *(DA+1) = fs_i / *C; + } else { + *DA = fs_r * (h / d); + *(DA+1) = fs_i / (h / d); + } + *S = gs_r * (fs_r /d) - gs_i * (fs_i / d); + *(S+1) = gs_r * (fs_i /d) + gs_i * -1. * (fs_r / d); + } + *C *= w; + *DA *= u; + *(DA+1) *= u; + return; + } + } } + \ No newline at end of file