diff --git a/example/sz_openmp.c b/example/sz_openmp.c index 8ae132e4..873f9139 100755 --- a/example/sz_openmp.c +++ b/example/sz_openmp.c @@ -453,8 +453,25 @@ int main(int argc, char* argv[]) } else if(parallelMode==1)//openMP { - printf("Error: current openMP version supports only float datatype\n"); - exit(0); + if(r5>0) + r3 = r5*r4*r3; + else if(r4>0) + r3 = r4*r3; + if(confparams_cpr->errorBoundMode!=ABS) + { + printf("Error: current version supports only absolute error bound (errorBoundMode=%d)\n", confparams_cpr->errorBoundMode); + exit(0); + } + + cost_start_omp(); + if(r2==0) + bytes = SZ_compress_double_1D_MDQ_openmp(data, r1, confparams_cpr->absErrBound, &outSize); + else if(r3==0) + bytes = SZ_compress_double_2D_MDQ_openmp(data, r2, r1, confparams_cpr->absErrBound, &outSize); + else //3d + bytes = SZ_compress_double_3D_MDQ_openmp(data, r3, r2, r1, confparams_cpr->absErrBound, &outSize); + printf("outSize=%zu\n", outSize); + cost_end_omp(); } if(cmpPath == NULL) diff --git a/sz/include/sz_double.h b/sz/include/sz_double.h index bc87c1db..1c0cd92a 100644 --- a/sz/include/sz_double.h +++ b/sz/include/sz_double.h @@ -29,6 +29,8 @@ unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision); unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision); +size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, double * P0, double * P1, int * type, double * unpredictable_data); + unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dataLength, double realPrecision); unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision); unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision); diff --git a/sz/include/sz_omp.h b/sz/include/sz_omp.h index 9634de88..cb83acbe 100644 --- a/sz/include/sz_omp.h +++ b/sz/include/sz_omp.h @@ -22,17 +22,21 @@ extern "C" { #endif unsigned char * SZ_compress_float_1D_MDQ_openmp(float *oriData, size_t r1, double realPrecision, size_t * comp_size); - unsigned char * SZ_compress_float_2D_MDQ_openmp(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); - unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size); void decompressDataSeries_float_1D_openmp(float** data, size_t r1, unsigned char* comp_data); - void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); - void decompressDataSeries_float_2D_openmp(float** data, size_t r1, size_t r2, unsigned char* comp_data); +unsigned char * SZ_compress_double_1D_MDQ_openmp(double *oriData, size_t r1, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_double_2D_MDQ_openmp(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size); +unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size); + +void decompressDataSeries_double_1D_openmp(double** data, size_t r1, unsigned char* comp_data); +void decompressDataSeries_double_2D_openmp(double** data, size_t r1, size_t r2, unsigned char* comp_data); +void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data); + //void Huffman_init_openmp(HuffmanTree* huffmanTree, int *s, size_t length, int thread_num); void Huffman_init_openmp(HuffmanTree* huffmanTree, int *s, size_t length, int thread_num, size_t * freq); diff --git a/sz/include/szd_double.h b/sz/include/szd_double.h index 1fda9553..3fcf48bc 100644 --- a/sz/include/szd_double.h +++ b/sz/include/szd_double.h @@ -32,6 +32,8 @@ void getSnapshotData_double_4D(double** data, size_t r1, size_t r2, size_t r3, s void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data, double* hist_data); void decompressDataSeries_double_3D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data, double* hist_data); +size_t decompressDataSeries_double_3D_RA_block(double * data, double mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, int * type, double * unpredictable_data); + int SZ_decompress_args_double(double** newData, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, unsigned char* cmpBytes, size_t cmpSize, int compressionType, double* hist_data); #ifdef __cplusplus diff --git a/sz/src/sz_double.c b/sz/src/sz_double.c index a920a4d5..4de4027d 100644 --- a/sz/src/sz_double.c +++ b/sz/src/sz_double.c @@ -330,6 +330,7 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal checkRadius = (exe_params->intvCapacity-1)*realPrecision; double interval = 2*realPrecision; + double recip_realPrecision = 1/realPrecision; for(i=2;i=pred) { type[i] = exe_params->intvRadius+state; @@ -493,6 +494,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si decData = (double*)(multisteps->hist_data); #endif + double recip_realPrecision = 1/realPrecision; unsigned int quantization_intervals; if(exe_params->optQuantMode==1) { @@ -560,7 +562,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si pred1D = P1[0]; diff = spaceFillingValue[1] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -588,7 +590,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si pred1D = 2*P1[j-1] - P1[j-2]; diff = spaceFillingValue[j] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -620,7 +622,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si pred1D = P1[0]; diff = spaceFillingValue[index] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -650,7 +652,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -780,6 +782,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si decData = (double*)(multisteps->hist_data); #endif + double recip_realPrecision = 1/realPrecision; unsigned int quantization_intervals; if(exe_params->optQuantMode==1) { @@ -849,7 +852,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred1D = P1[0]; diff = spaceFillingValue[1] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -877,7 +880,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred1D = 2*P1[j-1] - P1[j-2]; diff = spaceFillingValue[j] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -909,7 +912,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred1D = P1[index-r3]; diff = spaceFillingValue[index] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -939,7 +942,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -973,7 +976,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred1D = P1[0]; diff = spaceFillingValue[index] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1003,7 +1006,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred2D = P0[j-1] + P1[j] - P1[j-1]; diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1036,7 +1039,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1067,7 +1070,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; diff = spaceFillingValue[index] - pred3D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1186,6 +1189,7 @@ char SZ_compress_args_double_NoCkRngeNoGzip_3D(int cmprType, unsigned char** new TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, double valueRangeSize, double medianValue_d) { + double recip_realPrecision = 1/realPrecision; unsigned int quantization_intervals; if(exe_params->optQuantMode==1) { @@ -1287,7 +1291,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si pred1D = 2*P1[index2D-1] - P1[index2D-2]; diff = spaceFillingValue[index] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1316,7 +1320,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si pred1D = P1[index2D-r4]; diff = spaceFillingValue[index] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1344,7 +1348,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1376,7 +1380,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si pred1D = P1[index2D]; diff = spaceFillingValue[index] - pred1D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1404,7 +1408,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si pred2D = P0[index2D-1] + P1[index2D] - P1[index2D-1]; diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1433,7 +1437,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si pred2D = P0[index2D-r4] + P1[index2D] - P1[index2D-r4]; diff = spaceFillingValue[index] - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -1461,7 +1465,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si diff = spaceFillingValue[index] - pred3D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < exe_params->intvCapacity) { @@ -4342,6 +4346,302 @@ unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t return powerOf2; } +size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean, size_t dim_0, size_t dim_1, size_t dim_2, size_t block_dim_0, size_t block_dim_1, size_t block_dim_2, double realPrecision, double * P0, double * P1, int * type, double * unpredictable_data) +{ + double recip_realPrecision = 1/realPrecision; + size_t dim0_offset = dim_1 * dim_2; + size_t dim1_offset = dim_2; + + mean[0] = block_ori_data[0]; + + size_t unpredictable_count = 0; + size_t r1, r2, r3; + r1 = block_dim_0; + r2 = block_dim_1; + r3 = block_dim_2; + + double * cur_data_pos = block_ori_data; + double curData; + double pred1D, pred2D, pred3D; + double itvNum; + double diff; + size_t i, j, k; + size_t r23 = r2*r3; + // Process Row-0 data 0 + pred1D = mean[0]; + curData = *cur_data_pos; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[0] = (int) (itvNum/2) + exe_params->intvRadius; + P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[0])>realPrecision){ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[0] = 0; + P1[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-0 data 1*/ + pred1D = P1[0]; + curData = cur_data_pos[1]; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[1] = (int) (itvNum/2) + exe_params->intvRadius; + P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[1])>realPrecision){ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[1] = 0; + P1[1] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++){ + pred1D = 2*P1[j-1] - P1[j-2]; + curData = cur_data_pos[j]; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity){ + if (diff < 0) itvNum = -itvNum; + type[j] = (int) (itvNum/2) + exe_params->intvRadius; + P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[j])>realPrecision){ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else{ + type[j] = 0; + P1[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = P1[index-r3]; + curData = *cur_data_pos; + diff = curData - pred1D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[index])>realPrecision) + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1]; + + curData = cur_data_pos[j]; + diff = curData - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P1[index])>realPrecision) + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P1[index] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = P1[0]; + curData = *cur_data_pos; + diff = curData - pred1D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[0])>realPrecision) + { + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[0] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = P0[j-1] + P1[j] - P1[j-1]; + curData = cur_data_pos[j]; + diff = curData - pred2D; + itvNum = fabs(diff)*recip_realPrecision + 1; + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[j])>realPrecision) + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[j] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + + cur_data_pos += dim1_offset; + /* Process Row-1 --> Row-r2-1 */ + size_t index2D; + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + index2D = i*r3; + pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3]; + curData = *cur_data_pos; + diff = curData - pred2D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3 + i*r3 + j; + index ++; + index2D = i*r3 + j; + pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1]; + curData = cur_data_pos[j]; + diff = curData - pred3D; + + itvNum = fabs(diff)*recip_realPrecision + 1; + + if (itvNum < exe_params->intvCapacity) + { + if (diff < 0) itvNum = -itvNum; + type[index] = (int) (itvNum/2) + exe_params->intvRadius; + P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision; + + //ganrantee comporession error against the case of machine-epsilon + if(fabs(curData-P0[index2D])>realPrecision) + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + else + { + type[index] = 0; + P0[index2D] = curData; + unpredictable_data[unpredictable_count ++] = curData; + } + } + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + double *Pt; + Pt = P1; + P1 = P0; + P0 = Pt; + } + + return unpredictable_count; +} + unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision) { size_t i; @@ -4791,9 +5091,6 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou if(mean_count > 0) mean = sum / mean_count; } - - double tmp_realPrecision = realPrecision; - // use two prediction buffers for higher performance double * unpredictable_data = result_unpredictable_data; unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); @@ -4825,8 +5122,10 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou int * coeff_result_type = (int *) malloc(num_blocks*3*sizeof(int)); double * coeff_unpred_data[3]; double * coeff_unpredictable_data = (double *) malloc(num_blocks*3*sizeof(double)); - double precision[3]; + double precision[3], recip_precision[3]; precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c; + recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c; + double recip_realPrecision = 1/realPrecision; for(int i=0; i<3; i++){ coeff_type[i] = coeff_result_type + i * num_blocks; coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; @@ -4888,7 +5187,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou for(int e=0; e<3; e++){ cur_coeff = reg_params_pos[e*num_blocks]; diff = cur_coeff - last_coeffcients[e]; - itvNum = fabs(diff)/precision[e] + 1; + itvNum = fabs(diff)*recip_precision[e] + 1; if (itvNum < coeff_intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; @@ -4920,7 +5219,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -4946,7 +5245,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -4978,7 +5277,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -5006,7 +5305,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -5060,14 +5359,14 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou { pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; diff = curData - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision; if(type[index] <= intvRadius) type[index] -= 1; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -5100,14 +5399,14 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou { pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; diff = curData - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision; if(type[index] <= intvRadius) type[index] -= 1; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -5197,7 +5496,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou for(int e=0; e<3; e++){ cur_coeff = reg_params_pos[e*num_blocks]; diff = cur_coeff - last_coeffcients[e]; - itvNum = fabs(diff)/precision[e] + 1; + itvNum = fabs(diff)*recip_precision[e] + 1; if (itvNum < coeff_intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; @@ -5229,7 +5528,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -5256,7 +5555,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -5288,7 +5587,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -5317,7 +5616,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2]; diff = curData - pred; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; @@ -5365,13 +5664,13 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; diff = curData - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -5398,13 +5697,13 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou pred2D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim0_offset - 1]; diff = curData - pred2D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -5684,8 +5983,6 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou if(mean_count > 0) mean = sum / mean_count; } - double tmp_realPrecision = realPrecision; - // use two prediction buffers for higher performance double * unpredictable_data = result_unpredictable_data; unsigned char * indicator = (unsigned char *) malloc(num_blocks * sizeof(unsigned char)); @@ -5721,8 +6018,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou int * coeff_result_type = (int *) malloc(num_blocks*4*sizeof(int)); double * coeff_unpred_data[4]; double * coeff_unpredictable_data = (double *) malloc(num_blocks*4*sizeof(double)); - double precision[4]; + double precision[4], recip_precision[4]; precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d; + recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c, recip_precision[3] = 1/precision_d; + double recip_realPrecision = 1/realPrecision; + for(int i=0; i<4; i++){ coeff_type[i] = coeff_result_type + i * num_blocks; coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks; @@ -5808,7 +6108,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou for(int e=0; e<4; e++){ cur_coeff = reg_params_pos[e*num_blocks]; diff = cur_coeff - last_coeffcients[e]; - itvNum = fabs(diff)/precision[e] + 1; + itvNum = fabs(diff)*recip_precision[e] + 1; if (itvNum < coeff_intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; @@ -5841,13 +6141,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; diff = curData - pred; - itvNum = fabs(diff)/tmp_realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - pred)>tmp_realPrecision){ + if(fabs(curData - pred)>realPrecision){ type[index] = 0; pred = curData; unpredictable_data[block_unpredictable_count ++] = curData; @@ -5885,13 +6185,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; diff = curData - pred; - itvNum = fabs(diff)/tmp_realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - pred)>tmp_realPrecision){ + if(fabs(curData - pred)>realPrecision){ type[index] = 0; pred = curData; unpredictable_data[block_unpredictable_count ++] = curData; @@ -5952,14 +6252,14 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; diff = curData - pred3D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision; if(type[index] <= intvRadius) type[index] -= 1; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -6005,14 +6305,14 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; diff = curData - pred3D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision; if(type[index] <= intvRadius) type[index] -= 1; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -6149,7 +6449,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou for(int e=0; e<4; e++){ cur_coeff = reg_params_pos[e*num_blocks]; diff = cur_coeff - last_coeffcients[e]; - itvNum = fabs(diff)/precision[e] + 1; + itvNum = fabs(diff)*recip_precision[e] + 1; if (itvNum < coeff_intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius; @@ -6183,13 +6483,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; diff = curData - pred; - itvNum = fabs(diff)/tmp_realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - pred)>tmp_realPrecision){ + if(fabs(curData - pred)>realPrecision){ type[index] = 0; pred = curData; unpredictable_data[block_unpredictable_count ++] = curData; @@ -6227,13 +6527,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou curData = *cur_data_pos; pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3]; diff = curData - pred; - itvNum = fabs(diff)/tmp_realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision; + pred = pred + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - pred)>tmp_realPrecision){ + if(fabs(curData - pred)>realPrecision){ type[index] = 0; pred = curData; unpredictable_data[block_unpredictable_count ++] = curData; @@ -6286,13 +6586,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; diff = curData - pred3D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; @@ -6330,13 +6630,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou pred3D = cur_pb_pos[-1] + cur_pb_pos[-strip_dim1_offset]+ cur_pb_pos[-strip_dim0_offset] - cur_pb_pos[-strip_dim1_offset - 1] - cur_pb_pos[-strip_dim0_offset - 1] - cur_pb_pos[-strip_dim0_offset - strip_dim1_offset] + cur_pb_pos[-strip_dim0_offset - strip_dim1_offset - 1]; diff = curData - pred3D; - itvNum = fabs(diff)/realPrecision + 1; + itvNum = fabs(diff)*recip_realPrecision + 1; if (itvNum < intvCapacity_sz){ if (diff < 0) itvNum = -itvNum; type[index] = (int) (itvNum/2) + intvRadius; - *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision; + *cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision; //ganrantee comporession error against the case of machine-epsilon - if(fabs(curData - *cur_pb_pos)>tmp_realPrecision){ + if(fabs(curData - *cur_pb_pos)>realPrecision){ type[index] = 0; *cur_pb_pos = curData; unpredictable_data[unpredictable_count ++] = curData; diff --git a/sz/src/sz_omp.c b/sz/src/sz_omp.c index c171de4c..555efe79 100644 --- a/sz/src/sz_omp.c +++ b/sz/src/sz_omp.c @@ -11,13 +11,6 @@ #include #include -unsigned char * SZ_compress_float_1D_MDQ_openmp(float *oriData, size_t r1, double realPrecision, size_t * comp_size){ - return NULL; -} -unsigned char * SZ_compress_float_2D_MDQ_openmp(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ - return NULL; -} - double sz_wtime(){ #ifdef _OPENMP return omp_get_wtime(); @@ -51,6 +44,13 @@ void sz_set_num_threads(int nthreads){ #endif } +unsigned char * SZ_compress_float_1D_MDQ_openmp(float *oriData, size_t r1, double realPrecision, size_t * comp_size){ + return NULL; +} +unsigned char * SZ_compress_float_2D_MDQ_openmp(float *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ + return NULL; +} + unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size){ float elapsed_time = 0.0; @@ -319,10 +319,10 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_ void decompressDataSeries_float_1D_openmp(float** data, size_t r1, unsigned char* comp_data){ } + void decompressDataSeries_float_2D_openmp(float** data, size_t r1, size_t r2, unsigned char* comp_data){ } - void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){ if(confparams_dec==NULL) @@ -508,6 +508,438 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si SZ_ReleaseHuffman(huffmanTree); } +//Double Precision + +unsigned char * SZ_compress_double_1D_MDQ_openmp(double *oriData, size_t r1, double realPrecision, size_t * comp_size){ + return NULL; +} + +unsigned char * SZ_compress_double_2D_MDQ_openmp(double *oriData, size_t r1, size_t r2, double realPrecision, size_t * comp_size){ + return NULL; +} + +unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){ + + float elapsed_time = 0.0; + + elapsed_time = -sz_wtime(); + unsigned int quantization_intervals; + if(exe_params->optQuantMode==1) + { + // quantization_intervals = optimize_intervals_float_3D(oriData, r1, realPrecision); + quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision); + //quantization_intervals = 32768; + printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision); + // exit(0); + updateQuantizationInfo(quantization_intervals); + } + else{ + quantization_intervals = exe_params->intvCapacity; + } + elapsed_time += sz_wtime(); + printf("opt interval time: %.4f\n", elapsed_time); + + elapsed_time = -sz_wtime(); + int thread_num = sz_get_max_threads(); + int thread_order = (int)log2(thread_num); + size_t num_x = 0, num_y = 0, num_z = 0; + { + int block_thread_order = thread_order / 3; + switch(thread_order % 3){ + case 0:{ + num_x = 1 << block_thread_order; + num_y = 1 << block_thread_order; + num_z = 1 << block_thread_order; + break; + } + case 1:{ + num_x = 1 << (block_thread_order + 1); + num_y = 1 << block_thread_order; + num_z = 1 << block_thread_order; + break; + } + case 2:{ + num_x = 1 << (block_thread_order + 1); + num_y = 1 << (block_thread_order + 1); + num_z = 1 << block_thread_order; + break; + } + } + thread_num = num_x * num_y * num_z; + } + sz_set_num_threads(thread_num); + // calculate block dims + printf("number of blocks: %zu %zu %zu\n", num_x, num_y, num_z); + + size_t split_index_x, split_index_y, split_index_z; + size_t early_blockcount_x, early_blockcount_y, early_blockcount_z; + size_t late_blockcount_x, late_blockcount_y, late_blockcount_z; + SZ_COMPUTE_BLOCKCOUNT(r1, num_x, split_index_x, early_blockcount_x, late_blockcount_x); + SZ_COMPUTE_BLOCKCOUNT(r2, num_y, split_index_y, early_blockcount_y, late_blockcount_y); + SZ_COMPUTE_BLOCKCOUNT(r3, num_z, split_index_z, early_blockcount_z, late_blockcount_z); + + size_t max_num_block_elements = early_blockcount_x * early_blockcount_y * early_blockcount_z; + size_t num_blocks = num_x * num_y * num_z; + size_t num_elements = r1 * r2 * r3; + // printf("max_num_block_elements %d num_blocks %d\n", max_num_block_elements, num_blocks); + + size_t dim0_offset = r2 * r3; + size_t dim1_offset = r3; + + // printf("malloc blockinfo array start\n"); + // fflush(stdout); + + size_t buffer_size = early_blockcount_y * early_blockcount_z * sizeof(double); + int * result_type = (int *) malloc(num_elements * sizeof(int)); + size_t unpred_data_max_size = max_num_block_elements; + double * result_unpredictable_data = (double *) malloc(unpred_data_max_size * sizeof(double) * num_blocks); + unsigned int * unpredictable_count = (unsigned int *) malloc(num_blocks * sizeof(unsigned int)); + double * mean = malloc(num_blocks * sizeof(double)); + double * buffer0, * buffer1; + buffer0 = (double *) malloc(buffer_size * thread_num); + buffer1 = (double *) malloc(buffer_size * thread_num); + unsigned char * result = (unsigned char *) malloc(num_elements * (sizeof(int) + sizeof(double))); + size_t * unpred_offset = (size_t *) malloc(num_blocks * sizeof(size_t)); + unsigned char * encoding_buffer = (unsigned char *) malloc(max_num_block_elements * sizeof(int) * num_blocks); + size_t * block_offset = (size_t *) malloc(num_blocks * sizeof(size_t)); + size_t *freq = (size_t *)malloc(thread_num*quantization_intervals*4*sizeof(size_t)); + memset(freq, 0, thread_num*quantization_intervals*4*sizeof(size_t)); + + size_t stateNum = quantization_intervals*2; + HuffmanTree* huffmanTree = createHuffmanTree(stateNum); + + int num_yz = num_y * num_z; + #pragma omp parallel for + for(int t=0; tcode[i]) nodeCount++; + nodeCount = nodeCount*2-1; + unsigned char *treeBytes; + unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes); + + unsigned int meta_data_offset = 3 + 1 + MetaDataByteLength; + size_t total_unpred = 0; + for(int i=0; iintvRadius = (int)((tdps->intervals - 1)/ 2); + + unsigned int tree_size = bytesToInt_bigEndian(comp_data_pos); + comp_data_pos += sizeof(unsigned int); + size_t huffman_nodes = bytesToInt_bigEndian(comp_data_pos); + huffmanTree->allNodes = huffman_nodes; + // printf("Reconstruct huffman tree with node count %ld\n", nodeCount); + // fflush(stdout); + node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree, comp_data_pos+4, huffmanTree->allNodes); + + comp_data_pos += 4 + tree_size; + unsigned int * unpred_count = (unsigned int *) comp_data_pos; + comp_data_pos += num_blocks * sizeof(unsigned int); + double * mean_pos = (double *) comp_data_pos; + comp_data_pos += num_blocks * sizeof(double); + double * result_unpredictable_data = (double *) comp_data_pos; + size_t total_unpred = 0; + for(int i=0; iintvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-0 data 1*/ + pred1D = cur_data_pos[0]; + type_ = type[1]; + if (type_ != 0){ + cur_data_pos[1] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[1] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 2 --> data r3-1 */ + for (j = 2; j < r3; j++){ + pred1D = 2*cur_data_pos[j-1] - cur_data_pos[j-2]; + type_ = type[j]; + if (type_ != 0){ + cur_data_pos[j] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + size_t index; + for (i = 1; i < r2; i++) + { + /* Process row-i data 0 */ + index = i*r3; + pred1D = last_row_pos[0]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process row-i data 1 --> data r3-1*/ + for (j = 1; j < r3; j++) + { + index = i*r3+j; + pred2D = cur_data_pos[j-1] + last_row_pos[j] - last_row_pos[j-1]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + // printf("pred2D %.2f cur_data %.2f last_row_data %.2f %.2f, result %.2f\n", pred2D, cur_data_pos[j-1], last_row_pos[j], last_row_pos[j-1], cur_data_pos[j]); + // getchar(); + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + /////////////////////////// Process layer-1 --> layer-r1-1 /////////////////////////// + + for (k = 1; k < r1; k++) + { + /* Process Row-0 data 0*/ + index = k*r23; + pred1D = cur_data_pos[- dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred1D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + /* Process Row-0 data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3+j; + index ++; + pred2D = cur_data_pos[j-1] + cur_data_pos[j - dim0_offset] - cur_data_pos[j - 1 - dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + + /* Process Row-1 --> Row-r2-1 */ + for (i = 1; i < r2; i++) + { + /* Process Row-i data 0 */ + index = k*r23 + i*r3; + pred2D = last_row_pos[0] + cur_data_pos[- dim0_offset] - last_row_pos[- dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[0] = pred2D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[0] = unpredictable_data[unpredictable_count ++]; + } + + /* Process Row-i data 1 --> data r3-1 */ + for (j = 1; j < r3; j++) + { + //index = k*r2*r3 + i*r3 + j; + index ++; + pred3D = cur_data_pos[j-1] + last_row_pos[j]+ cur_data_pos[j - dim0_offset] - last_row_pos[j-1] - last_row_pos[j - dim0_offset] - cur_data_pos[j-1 - dim0_offset] + last_row_pos[j-1 - dim0_offset]; + type_ = type[index]; + if (type_ != 0){ + cur_data_pos[j] = pred3D + 2 * (type_ - exe_params->intvRadius) * realPrecision; + } + else{ + cur_data_pos[j] = unpredictable_data[unpredictable_count ++]; + } + } + last_row_pos = cur_data_pos; + cur_data_pos += dim1_offset; + } + cur_data_pos += dim0_offset - r2 * dim1_offset; + } + + return unpredictable_count; +} + void decompressDataSeries_double_2D_nonblocked_with_blocked_regression(double** data, size_t r1, size_t r2, unsigned char* comp_data, double* hist_data){ size_t dim0_offset = r2;