Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to use safe scaling algorithm from Reference-LAPACK PR 527 #4143

Merged
merged 8 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 44 additions & 16 deletions interface/rotg.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include <math.h>
#include <float.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif


#ifndef CBLAS

void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
Expand All @@ -14,35 +16,53 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

#endif

#ifdef DOUBLE
long double safmin = DBL_MIN;
#else
long double safmin = FLT_MIN;
#endif

#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)

long double da = *DA;
long double db = *DB;
long double c;
long double s;
long double r, roe, z;
long double r, z;
long double sigma, dascal,dbscal;

long double ada = fabsl(da);
long double adb = fabsl(db);
long double scale = ada + adb;
long double maxab = MAX(ada,adb);
long double safmax;
long double scale;


#ifndef CBLAS
PRINT_DEBUG_NAME;
#else
PRINT_DEBUG_CNAME;
#endif

roe = db;
if (ada > adb) roe = da;

if (scale == ZERO) {
if (adb == ZERO) {
*C = ONE;
*S = ZERO;
*DA = ZERO;
*DB = ZERO;
} else if (ada == ZERO) {
*C = ZERO;
*S = ONE;
*DA = *DB;
*DB = ONE;
} else {
r = sqrt(da * da + db * db);
if (roe < 0) r = -r;
safmax = 1./safmin;
scale = MIN(MAX(safmin,maxab), safmax);
if (ada > adb)
sigma = copysign(1.,da);
else
sigma = copysign(1.,db);
dascal = da / scale;
dbscal = db / scale;
r = sigma * (scale * sqrt(dascal * dascal + dbscal * dbscal));
c = da / r;
s = db / r;
z = ONE;
Expand All @@ -65,22 +85,31 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT db = *DB;
FLOAT c = *C;
FLOAT s = *S;
FLOAT r, roe, z;
FLOAT sigma;
FLOAT r, z;

FLOAT ada = fabs(da);
FLOAT adb = fabs(db);
FLOAT scale = ada + adb;
FLOAT maxab = MAX(ada,adb);
long double safmax ;
FLOAT scale ;

safmax = 1./safmin;
scale = MIN(MAX(safmin,maxab), safmax);

if (ada > adb)
sigma = sign(1.,da);
else
sigma = sign(1.,db);

#ifndef CBLAS
PRINT_DEBUG_NAME;
#else
PRINT_DEBUG_CNAME;
#endif

roe = db;
if (ada > adb) roe = da;

if (scale == ZERO) {
if (adb == ZERO) {
*C = ONE;
*S = ZERO;
*DA = ZERO;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about this change. If the input vector is [a,0], then the rotation is the identity and the rotated vector [a,0]. Maybe your initial intention was to delete the whole line as in the other #if branch?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, looks like a cut-n-paste gone wrong. thanks for your vigilance - still haven't run any tests with this

Expand All @@ -89,8 +118,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
FLOAT aa = da / scale;
FLOAT bb = db / scale;

r = scale * sqrt(aa * aa + bb * bb);
if (roe < 0) r = -r;
r = sigma * scale * sqrt(aa * aa + bb * bb);
c = da / r;
s = db / r;
z = ONE;
Expand Down
40 changes: 35 additions & 5 deletions interface/zrotg.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#include <math.h>
#include <float.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif


#ifndef CBLAS
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){

Expand All @@ -14,6 +16,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
FLOAT *S = (FLOAT*) VS;
#endif /* CBLAS */

#ifdef DOUBLE
long double safmin = DBL_MIN;
#else
long double safmin = FLT_MIN;
#endif

#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)

long double da_r = *(DA + 0);
Expand All @@ -23,6 +31,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
long double r;

long double ada = fabsl(da_r) + fabsl(da_i);
long double adb = sqrt(db_r * db_r + db_i * db_i);

PRINT_DEBUG_NAME;

Expand All @@ -38,10 +47,24 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
*(DA + 1) = db_i;
} else {
long double alpha_r, alpha_i;
long double safmax = 1./safmin;
long double sigma;
long double maxab = MAX(ada,adb);
long double scale = MIN(MAX(safmin,maxab), safmax);

ada = sqrt(da_r * da_r + da_i * da_i);

r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i);
long double aa_r = da_r / scale;
long double aa_i = da_i / scale;
long double bb_r = db_r / scale;
long double bb_i = db_i / scale;

if (ada > adb)
sigma = copysign(1.,da_r);
else
sigma = copysign(1.,db_r);

r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);


alpha_r = da_r / ada;
alpha_i = da_i / ada;
Expand All @@ -60,7 +83,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
FLOAT r;

FLOAT ada = fabs(da_r) + fabs(da_i);
FLOAT adb;
FLOAT ada = fabs(db_r) + fabs(db_i);

PRINT_DEBUG_NAME;

Expand All @@ -75,6 +98,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
*(DA + 0) = db_r;
*(DA + 1) = db_i;
} else {
long double safmax = 1./safmin;
FLOAT scale;
FLOAT aa_r, aa_i, bb_r, bb_i;
FLOAT alpha_r, alpha_i;
Expand Down Expand Up @@ -108,14 +132,20 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
scale = (bb_i / bb_r);
adb = bb_r * sqrt(ONE + scale * scale);
}
scale = ada + adb;
FLOAT maxab = MAX(ada,adb);
scale = MIN(MAX(safmin,maxab), safmax);

aa_r = da_r / scale;
aa_i = da_i / scale;
bb_r = db_r / scale;
bb_i = db_i / scale;

r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
if (ada > adb)
sigma = copysign(1.,da_r);
else
sigma = copysign(1.,db_r);

r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);

alpha_r = da_r / ada;
alpha_i = da_i / ada;
Expand Down