From 1d4dec8669cbbd7542abf3d709d96256dbd41b09 Mon Sep 17 00:00:00 2001
From: disheng222 <disheng222@gmail.com>
Date: Fri, 11 Feb 2022 16:49:40 -0600
Subject: [PATCH] revise the SZ package for the builtin integration with HDF5
 filter

---
 CMakeLists.txt                  |   72 +-
 example/sz.c                    |    1 +
 example/sz_gsl.c                |    1 +
 example/sz_openmp.c             |    1 +
 example/sz_pastri.c             |    1 +
 example/sz_randomaccess.c       |    1 +
 example/testdouble_compress.c   |    2 +-
 example/testdouble_decompress.c |    4 +-
 example/testfloat_compress.c    |    2 +-
 example/testfloat_decompress.c  |    4 +-
 example/testint_compress.c      |    2 +-
 example/testint_decompress.c    |    4 +-
 sz/CMakeLists.txt               |   29 +-
 sz/include/ByteToolkit.h        |   52 +-
 sz/include/CompressElement.h    |   22 +-
 sz/include/DynamicByteArray.h   |    6 +-
 sz/include/DynamicIntArray.h    |    4 +-
 sz/include/dataCompression.h    |   36 +-
 sz/include/dictionary.h         |    5 +-
 sz/include/rw.h                 |    5 +
 sz/include/sz.h                 |   40 +-
 sz/include/sz_api.h             |   94 ++
 sz/include/sz_double.h          |   14 +-
 sz/src/Huffman.c                |  144 +--
 sz/src/conf.c                   |  146 +--
 sz/src/dataCompression.c        |  181 ++--
 sz/src/dictionary.c             |   10 +-
 sz/src/rw.c                     |   95 +-
 sz/src/rwf.c                    |   16 +-
 sz/src/sz.c                     |  389 ++++----
 sz/src/sz_double.c              | 1007 ++++++++++----------
 sz/src/sz_double_pwr.c          |  533 +++++------
 sz/src/sz_double_ts.c           |   79 +-
 sz/src/sz_float.c               | 1585 ++++++++++++++++---------------
 sz/src/sz_float_pwr.c           |  537 +++++------
 sz/src/sz_float_ts.c            |   89 +-
 sz/src/sz_int16.c               |  297 +++---
 sz/src/sz_int32.c               |  265 +++---
 sz/src/sz_int64.c               |  263 ++---
 sz/src/sz_int8.c                |  297 +++---
 sz/src/sz_omp.c                 |   83 +-
 sz/src/sz_uint16.c              |  301 +++---
 sz/src/sz_uint32.c              |  263 ++---
 sz/src/sz_uint64.c              |  263 ++---
 sz/src/sz_uint8.c               |  297 +++---
 zlib/CMakeLists.txt             |   27 +-
 zlib/zconf.h                    |    4 +-
 zstd/CMakeLists.txt             |   27 +-
 48 files changed, 3939 insertions(+), 3661 deletions(-)
 create mode 100644 sz/include/sz_api.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3268fc56..10978507 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,10 +31,29 @@ endif()
 #compile with C-99 and standard C++14
 set(CMAKE_C_STANDARD 99)
 set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
+#  Check for the existence of certain header files
+include (CheckIncludeFiles)
+include (CheckFunctionExists)
+CHECK_INCLUDE_FILES ("unistd.h"        HAVE_UNISTD_H)
+CHECK_INCLUDE_FILES ("sys/time.h"      HAVE_SYS_TIME_H)
+
+if (WINDOWS)
+  set (HAVE_GETTIMEOFDAY 1)
+endif ()
+
+if (MINGW OR NOT WINDOWS)
+  CHECK_FUNCTION_EXISTS (gettimeofday           HAVE_GETTIMEOFDAY)
+  CHECK_FUNCTION_EXISTS (clock_gettime          HAVE_CLOCK_GETTIME)
+endif ()
+
+# Generate the config.h file containing user settings needed by compilation
+configure_file (config.h.cmake ${CMAKE_BINARY_DIR}/config.h @ONLY)
+
 #generate tags for the project if tags exist
 option(BUILD_CTAGS "enable ctags generation target" OFF)
 if(BUILD_CTAGS)
@@ -51,34 +70,47 @@ endif()
 option(BUILD_SHARED_LIBS "build shared libraries over static libraries" ON)
 
 #find dependencies
+option(SZ_FIND_DEPS "find dependent libraries or build ext libraries" ON)
 include(GNUInstallDirs)
-find_package(PkgConfig)
-pkg_search_module(ZSTD  IMPORTED_TARGET libzstd)
-
-#by default pass no 3rd party exports
-set(thirdparty_export "")
-
-if(ZSTD_FOUND)
-  set(ZSTD_dep PkgConfig::ZSTD)
-else()
-  add_subdirectory(zstd)
-  set(ZSTD_dep zstd)
-  list(APPEND thirdparty_export "zstd")
-endif()
+if(SZ_FIND_DEPS)
+  find_package(PkgConfig)
+  pkg_search_module(ZSTD  IMPORTED_TARGET libzstd)
+
+  #by default pass no 3rd party exports
+  set(thirdparty_export "")
+
+  if(ZSTD_FOUND)
+    set(ZSTD_dep PkgConfig::ZSTD)
+  else()
+    add_subdirectory(zstd)
+    set(ZSTD_dep zstd)
+    list(APPEND thirdparty_export "zstd")
+  endif()
 
-find_package(ZLIB)
-if(ZLIB_FOUND)
-  set(ZLIB_dep ZLIB::ZLIB)
+  find_package(ZLIB)
+  if(ZLIB_FOUND)
+    set(ZLIB_dep ZLIB::ZLIB)
+  else()
+    add_subdirectory(zlib)
+    set(ZLIB_dep ZLIB)
+    list(APPEND thirdparty_export "ZLIB")
+  endif()
 else()
-  add_subdirectory(zlib)
-  set(ZLIB_dep ZLIB)
-  list(APPEND thirdparty_export "ZLIB")
+    add_subdirectory(zstd)
+    set(ZSTD_dep zstd)
+    list(APPEND thirdparty_export "zstd")
+    add_subdirectory(zlib)
+    set(ZLIB_dep ZLIB)
+    list(APPEND thirdparty_export "ZLIB")
 endif()
 
 find_package(OpenMP)
 
 add_subdirectory(sz)
-add_subdirectory(example)
+option(BUILD_SZ_EXAMPLES "build sz example" OFF)
+if(BUILD_SZ_EXAMPLES)
+  add_subdirectory(example)
+endif()
 
 option(BUILD_PYTHON_WRAPPER "build python wrapper" OFF)
 if(BUILD_PYTHON_WRAPPER)
diff --git a/example/sz.c b/example/sz.c
index be4b4fb0..9ebcf3fe 100644
--- a/example/sz.c
+++ b/example/sz.c
@@ -3,6 +3,7 @@
 #include <math.h>
 #include "sz.h"
 #include "rw.h"
+#include <sys/time.h>
 
 struct timeval startTime;
 struct timeval endTime;  /* Start and end times */
diff --git a/example/sz_gsl.c b/example/sz_gsl.c
index 4137ce2c..33742e7f 100644
--- a/example/sz_gsl.c
+++ b/example/sz_gsl.c
@@ -4,6 +4,7 @@
 #include <gsl/gsl_wavelet.h>
 #include "sz.h"
 #include "rw.h"
+#include <sys/time.h>
 
 struct timeval startTime;
 struct timeval endTime;  /* Start and end times */
diff --git a/example/sz_openmp.c b/example/sz_openmp.c
index ca0fd078..c39e722f 100755
--- a/example/sz_openmp.c
+++ b/example/sz_openmp.c
@@ -4,6 +4,7 @@
 #include "sz.h"
 #include "rw.h"
 #include "sz_omp.h"
+#include <sys/time.h>
 
 struct timeval startTime;
 struct timeval endTime;  /* Start and end times */
diff --git a/example/sz_pastri.c b/example/sz_pastri.c
index 80541754..e2360b07 100755
--- a/example/sz_pastri.c
+++ b/example/sz_pastri.c
@@ -3,6 +3,7 @@
 #include <math.h>
 #include "sz.h"
 #include "rw.h"
+#include <sys/time.h>
 
 struct timeval startTime;
 struct timeval endTime;  /* Start and end times */
diff --git a/example/sz_randomaccess.c b/example/sz_randomaccess.c
index 613ca97b..f11fb799 100755
--- a/example/sz_randomaccess.c
+++ b/example/sz_randomaccess.c
@@ -3,6 +3,7 @@
 #include <math.h>
 #include "sz.h"
 #include "rw.h"
+#include <sys/time.h>
 
 struct timeval startTime;
 struct timeval endTime;  /* Start and end times */
diff --git a/example/testdouble_compress.c b/example/testdouble_compress.c
index 95a93211..7d3bc87c 100644
--- a/example/testdouble_compress.c
+++ b/example/testdouble_compress.c
@@ -7,7 +7,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-
+#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "sz.h"
diff --git a/example/testdouble_decompress.c b/example/testdouble_decompress.c
index 8a913305..e94e119a 100644
--- a/example/testdouble_decompress.c
+++ b/example/testdouble_decompress.c
@@ -7,7 +7,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-
+#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -104,7 +104,7 @@ int main(int argc, char * argv[])
     
 
     char oriFilePath[650];
-    strncpy(oriFilePath, zipFilePath, (unsigned)strlen(zipFilePath)-3);
+    strcpy(oriFilePath, zipFilePath);
     oriFilePath[strlen(zipFilePath)-3] = '\0';
     double *ori_data = readDoubleData(oriFilePath, &totalNbEle, &status);
     if(status!=SZ_SCES)
diff --git a/example/testfloat_compress.c b/example/testfloat_compress.c
index d4db6b58..f01376e4 100644
--- a/example/testfloat_compress.c
+++ b/example/testfloat_compress.c
@@ -7,7 +7,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-
+#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "sz.h"
diff --git a/example/testfloat_decompress.c b/example/testfloat_decompress.c
index cc4653fb..5ecfd40a 100644
--- a/example/testfloat_decompress.c
+++ b/example/testfloat_decompress.c
@@ -7,7 +7,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-
+#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -106,7 +106,7 @@ int main(int argc, char * argv[])
     //SZ_Finalize();
     
     char oriFilePath[640];
-    strncpy(oriFilePath, zipFilePath, (unsigned)strlen(zipFilePath)-3);
+    strcpy(oriFilePath, zipFilePath);
     oriFilePath[strlen(zipFilePath)-3] = '\0';
     float *ori_data = readFloatData(oriFilePath, &totalNbEle, &status);
     if(status!=SZ_SCES)
diff --git a/example/testint_compress.c b/example/testint_compress.c
index 82e419a0..24cc75cd 100644
--- a/example/testint_compress.c
+++ b/example/testint_compress.c
@@ -7,7 +7,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-
+#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/example/testint_decompress.c b/example/testint_decompress.c
index f47c33eb..4a44858c 100644
--- a/example/testint_decompress.c
+++ b/example/testint_decompress.c
@@ -7,7 +7,7 @@
  *      See COPYRIGHT in top-level directory.
  */
 
-
+#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -274,7 +274,7 @@ void assessDeCompressionData(int dataType, char* zipFilePath, void* decompressed
 	size_t i, totalNbEle;
 	int status;
     char oriFilePath[640];
-    strncpy(oriFilePath, zipFilePath, (unsigned)strlen(zipFilePath)-3);
+    strcpy(oriFilePath, zipFilePath);
     oriFilePath[strlen(zipFilePath)-3] = '\0';
 	int64_t *data = (int64_t*)malloc(sizeof(int64_t)*nbEle);//decompressed data
 	int64_t *ori_data = (int64_t*)malloc(sizeof(int64_t)*nbEle); //original data
diff --git a/sz/CMakeLists.txt b/sz/CMakeLists.txt
index 746ae622..7fb9bfab 100644
--- a/sz/CMakeLists.txt
+++ b/sz/CMakeLists.txt
@@ -1,4 +1,10 @@
-add_library (SZ
+if (BUILD_SHARED_LIBS)
+  set (BUILD_EXT_LIBS_TYPE "SHARED")
+else ()
+  set (BUILD_EXT_LIBS_TYPE "STATIC")
+endif ()
+
+add_library (SZ ${BUILD_EXT_LIBS_TYPE}
   src/ArithmeticCoding.c
   src/ByteToolkit.c
   src/CacheTable.c
@@ -60,10 +66,31 @@ add_library (SZ
 )
 
 target_include_directories(SZ 
+  PRIVATE 
+  ${CMAKE_BINARY_DIR}
   PUBLIC 
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/sz>
   )
+if (${BUILD_EXT_LIBS_TYPE} MATCHES "SHARED")
+  set (LIB_RELEASE_NAME "SZ")
+  set (LIB_DEBUG_NAME "SZ${CMAKE_DEBUG_POSTFIX}")
+else ()
+  if (WIN32 AND NOT MINGW)
+    set (LIB_RELEASE_NAME "libSZ")
+    set (LIB_DEBUG_NAME "libSZ${CMAKE_DEBUG_POSTFIX}")
+  else ()
+    set (LIB_RELEASE_NAME "SZ")
+    set (LIB_DEBUG_NAME "SZ${CMAKE_DEBUG_POSTFIX}")
+  endif ()
+endif ()
+set_target_properties (SZ PROPERTIES
+    OUTPUT_NAME                ${LIB_RELEASE_NAME}
+#    OUTPUT_NAME_DEBUG          ${LIB_DEBUG_NAME}
+    OUTPUT_NAME_RELEASE        ${LIB_RELEASE_NAME}
+    OUTPUT_NAME_MINSIZEREL     ${LIB_RELEASE_NAME}
+    OUTPUT_NAME_RELWITHDEBINFO ${LIB_RELEASE_NAME}
+  )
 
 target_link_libraries (SZ PUBLIC ${ZLIB_dep} ${ZSTD_dep} m)
 
diff --git a/sz/include/ByteToolkit.h b/sz/include/ByteToolkit.h
index e88bf020..e8a62bbe 100644
--- a/sz/include/ByteToolkit.h
+++ b/sz/include/ByteToolkit.h
@@ -18,29 +18,29 @@ extern "C" {
 
 //ByteToolkit.c
 
-unsigned short bytesToUInt16_bigEndian(unsigned char* bytes);
-unsigned int bytesToUInt32_bigEndian(unsigned char* bytes);
-unsigned long bytesToUInt64_bigEndian(unsigned char* b);
+extern unsigned short bytesToUInt16_bigEndian(unsigned char* bytes);
+extern unsigned int bytesToUInt32_bigEndian(unsigned char* bytes);
+extern unsigned long bytesToUInt64_bigEndian(unsigned char* b);
 
-short bytesToInt16_bigEndian(unsigned char* bytes);
-int bytesToInt32_bigEndian(unsigned char* bytes);
-long bytesToInt64_bigEndian(unsigned char* b);
-int bytesToInt_bigEndian(unsigned char* bytes);
+extern short bytesToInt16_bigEndian(unsigned char* bytes);
+extern int bytesToInt32_bigEndian(unsigned char* bytes);
+extern long bytesToInt64_bigEndian(unsigned char* b);
+extern int bytesToInt_bigEndian(unsigned char* bytes);
 
-void intToBytes_bigEndian(unsigned char *b, unsigned int num);
+extern void intToBytes_bigEndian(unsigned char *b, unsigned int num);
 
-void int64ToBytes_bigEndian(unsigned char *b, uint64_t num);
-void int32ToBytes_bigEndian(unsigned char *b, uint32_t num);
-void int16ToBytes_bigEndian(unsigned char *b, uint16_t num);
+extern void int64ToBytes_bigEndian(unsigned char *b, uint64_t num);
+extern void int32ToBytes_bigEndian(unsigned char *b, uint32_t num);
+extern void int16ToBytes_bigEndian(unsigned char *b, uint16_t num);
 
-long bytesToLong_bigEndian(unsigned char* b);
-void longToBytes_bigEndian(unsigned char *b, unsigned long num);
+extern long bytesToLong_bigEndian(unsigned char* b);
+extern void longToBytes_bigEndian(unsigned char *b, unsigned long num);
 long doubleToOSEndianLong(double value);
 int floatToOSEndianInt(float value);
-short getExponent_float(float value);
-short getPrecisionReqLength_float(float precision);
-short getExponent_double(double value);
-short getPrecisionReqLength_double(double precision);
+extern short getExponent_float(float value);
+extern short getPrecisionReqLength_float(float precision);
+extern short getExponent_double(double value);
+extern short getPrecisionReqLength_double(double precision);
 unsigned char numberOfLeadingZeros_Int(int i);
 unsigned char numberOfLeadingZeros_Long(long i);
 unsigned char getLeadingNumbers_Int(int v1, int v2);
@@ -49,15 +49,15 @@ short bytesToShort(unsigned char* bytes);
 void shortToBytes(unsigned char* b, short value);
 int bytesToInt(unsigned char* bytes);
 long bytesToLong(unsigned char* bytes);
-float bytesToFloat(unsigned char* bytes);
-void floatToBytes(unsigned char *b, float num);
-double bytesToDouble(unsigned char* bytes);
-void doubleToBytes(unsigned char *b, double num);
+extern float bytesToFloat(unsigned char* bytes);
+extern void floatToBytes(unsigned char *b, float num);
+extern double bytesToDouble(unsigned char* bytes);
+extern void doubleToBytes(unsigned char *b, double num);
 int extractBytes(unsigned char* byteArray, size_t k, int validLength);
 int getMaskRightCode(int m);
-int getLeftMovingCode(int kMod8);
-int getRightMovingSteps(int kMod8, int resiBitLength);
-int getRightMovingCode(int kMod8, int resiBitLength);
+extern int getLeftMovingCode(int kMod8);
+extern int getRightMovingSteps(int kMod8, int resiBitLength);
+extern int getRightMovingCode(int kMod8, int resiBitLength);
 short* convertByteDataToShortArray(unsigned char* bytes, size_t byteLength);
 unsigned short* convertByteDataToUShortArray(unsigned char* bytes, size_t byteLength);
 
@@ -68,8 +68,8 @@ void convertUIntArrayToBytes(unsigned int* states, size_t stateLength, unsigned
 void convertLongArrayToBytes(int64_t* states, size_t stateLength, unsigned char* bytes);
 void convertULongArrayToBytes(uint64_t* states, size_t stateLength, unsigned char* bytes);
 
-size_t bytesToSize(unsigned char* bytes);
-void sizeToBytes(unsigned char* outBytes, size_t size);
+extern size_t bytesToSize(unsigned char* bytes);
+extern void sizeToBytes(unsigned char* outBytes, size_t size);
 
 void put_codes_to_output(unsigned int buf, int bitSize, unsigned char** p, int* lackBits, size_t *outSize);
 
diff --git a/sz/include/CompressElement.h b/sz/include/CompressElement.h
index 1d388c12..1bbb91fe 100644
--- a/sz/include/CompressElement.h
+++ b/sz/include/CompressElement.h
@@ -40,33 +40,33 @@ typedef struct LossyCompressionElement
 	unsigned char integerMidBytes[8];
 	int integerMidBytes_Length; //they are mid_bits actually
 	//char curBytes[8];
-	//int curBytes_Length; //4 for single_precision or 8 for double_precision	
+	//int curBytes_Length; //4 for single_precision or 8 for double_precision
 	int resMidBitsLength;
 	int residualMidBits;
 } LossyCompressionElement;
 
 char* decompressGroupIDArray(unsigned char* bytes, size_t dataLength);
 
-short computeGroupNum_float(float value);
-short computeGroupNum_double(double value);
+extern short computeGroupNum_float(float value);
+extern short computeGroupNum_double(double value);
 
-void listAdd_double(double last3CmprsData[3], double value);
-void listAdd_float(float last3CmprsData[3], float value);
-void listAdd_int(int64_t last3CmprsData[3], int64_t value);
+extern void listAdd_double(double last3CmprsData[3], double value);
+extern void listAdd_float(float last3CmprsData[3], float value);
+extern void listAdd_int(int64_t last3CmprsData[3], int64_t value);
 void listAdd_int32(int32_t last3CmprsData[3], int32_t value);
-void listAdd_float_group(float *groups, int *flags, char groupNum, float oriValue, float decValue, char* curGroupID);
-void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID);
+extern void listAdd_float_group(float *groups, int *flags, char groupNum, float oriValue, float decValue, char* curGroupID);
+extern void listAdd_double_group(double *groups, int *flags, char groupNum, double oriValue, double decValue, char* curGroupID);
 
 int validPrediction_double(double minErr, double precision);
 int validPrediction_float(float minErr, float precision);
 double* generateGroupErrBounds(int errorBoundMode, double realPrecision, double pwrErrBound);
 int generateGroupMaxIntervalCount(double* groupErrBounds);
 
-void new_LossyCompressionElement(LossyCompressionElement *lce, int leadingNum, unsigned char* intMidBytes, 
+void new_LossyCompressionElement(LossyCompressionElement *lce, int leadingNum, unsigned char* intMidBytes,
 		int intMidBytes_Length, int resiMidBitsLength, int resiBits);
-void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes, 
+void updateLossyCompElement_Double(unsigned char* curBytes, unsigned char* preBytes,
 		int reqBytesLength, int resiBitsLength,  LossyCompressionElement *lce);
-void updateLossyCompElement_Float(unsigned char* curBytes, unsigned char* preBytes, 
+extern void updateLossyCompElement_Float(unsigned char* curBytes, unsigned char* preBytes,
 		int reqBytesLength, int resiBitsLength,  LossyCompressionElement *lce);
 
 #ifdef __cplusplus
diff --git a/sz/include/DynamicByteArray.h b/sz/include/DynamicByteArray.h
index 71709794..9f50a2ac 100644
--- a/sz/include/DynamicByteArray.h
+++ b/sz/include/DynamicByteArray.h
@@ -16,7 +16,7 @@ extern "C" {
 
 #include <stdio.h>
 typedef struct DynamicByteArray
-{	
+{
 	unsigned char* array;
 	size_t size;
 	size_t capacity;
@@ -26,8 +26,8 @@ void new_DBA(DynamicByteArray **dba, size_t cap);
 void convertDBAtoBytes(DynamicByteArray *dba, unsigned char** bytes);
 void free_DBA(DynamicByteArray *dba);
 unsigned char getDBA_Data(DynamicByteArray *dba, size_t pos);
-void addDBA_Data(DynamicByteArray *dba, unsigned char value);
-void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length);
+extern void addDBA_Data(DynamicByteArray *dba, unsigned char value);
+extern void memcpyDBA_Data(DynamicByteArray *dba, unsigned char* data, size_t length);
 
 #ifdef __cplusplus
 }
diff --git a/sz/include/DynamicIntArray.h b/sz/include/DynamicIntArray.h
index c821c571..b9c0a4f3 100644
--- a/sz/include/DynamicIntArray.h
+++ b/sz/include/DynamicIntArray.h
@@ -16,7 +16,7 @@ extern "C" {
 
 #include <stdio.h>
 typedef struct DynamicIntArray
-{	
+{
 	unsigned char* array; //char* (one byte) is enough, don't have to be int*
 	size_t size;
 	size_t capacity;
@@ -26,7 +26,7 @@ void new_DIA(DynamicIntArray **dia, size_t cap);
 void convertDIAtoInts(DynamicIntArray *dia, unsigned char **data);
 void free_DIA(DynamicIntArray *dia);
 int getDIA_Data(DynamicIntArray *dia, size_t pos);
-void addDIA_Data(DynamicIntArray *dia, int value);
+extern void addDIA_Data(DynamicIntArray *dia, int value);
 
 #ifdef __cplusplus
 }
diff --git a/sz/include/dataCompression.h b/sz/include/dataCompression.h
index 0e499395..a59f7d03 100644
--- a/sz/include/dataCompression.h
+++ b/sz/include/dataCompression.h
@@ -52,30 +52,30 @@ float max_f(float a, float b);
 double getRealPrecision_double(double valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
 double getRealPrecision_float(float valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
 double getRealPrecision_int(long valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
-void symTransform_8bytes(unsigned char data[8]);
-void symTransform_2bytes(unsigned char data[2]);
-void symTransform_4bytes(unsigned char data[4]);
+extern void symTransform_8bytes(unsigned char data[8]);
+extern void symTransform_2bytes(unsigned char data[2]);
+extern void symTransform_4bytes(unsigned char data[4]);
 
-void compressInt8Value(int8_t tgtValue, int8_t minValue, int byteSize, unsigned char* bytes);
-void compressInt16Value(int16_t tgtValue, int16_t minValue, int byteSize, unsigned char* bytes);
-void compressInt32Value(int32_t tgtValue, int32_t minValue, int byteSize, unsigned char* bytes);
-void compressInt64Value(int64_t tgtValue, int64_t minValue, int byteSize, unsigned char* bytes);
+extern void compressInt8Value(int8_t tgtValue, int8_t minValue, int byteSize, unsigned char* bytes);
+extern void compressInt16Value(int16_t tgtValue, int16_t minValue, int byteSize, unsigned char* bytes);
+extern void compressInt32Value(int32_t tgtValue, int32_t minValue, int byteSize, unsigned char* bytes);
+extern void compressInt64Value(int64_t tgtValue, int64_t minValue, int byteSize, unsigned char* bytes);
 
-void compressUInt8Value(uint8_t tgtValue, uint8_t minValue, int byteSize, unsigned char* bytes);
-void compressUInt16Value(uint16_t tgtValue, uint16_t minValue, int byteSize, unsigned char* bytes);
-void compressUInt32Value(uint32_t tgtValue, uint32_t minValue, int byteSize, unsigned char* bytes);
-void compressUInt64Value(uint64_t tgtValue, uint64_t minValue, int byteSize, unsigned char* bytes);
+extern void compressUInt8Value(uint8_t tgtValue, uint8_t minValue, int byteSize, unsigned char* bytes);
+extern void compressUInt16Value(uint16_t tgtValue, uint16_t minValue, int byteSize, unsigned char* bytes);
+extern void compressUInt32Value(uint32_t tgtValue, uint32_t minValue, int byteSize, unsigned char* bytes);
+extern void compressUInt64Value(uint64_t tgtValue, uint64_t minValue, int byteSize, unsigned char* bytes);
 
-void compressSingleFloatValue(FloatValueCompressElement *vce, float tgtValue, float precision, float medianValue, 
+extern void compressSingleFloatValue(FloatValueCompressElement *vce, float tgtValue, float precision, float medianValue,
 		int reqLength, int reqBytesLength, int resiBitsLength);
 void compressSingleFloatValue_MSST19(FloatValueCompressElement *vce, float tgtValue, float precision, int reqLength, int reqBytesLength, int resiBitsLength);
-void compressSingleDoubleValue(DoubleValueCompressElement *vce, double tgtValue, double precision, double medianValue, 
+void compressSingleDoubleValue(DoubleValueCompressElement *vce, double tgtValue, double precision, double medianValue,
 		int reqLength, int reqBytesLength, int resiBitsLength);
 void compressSingleDoubleValue_MSST19(DoubleValueCompressElement *vce, double tgtValue, double precision, int reqLength, int reqBytesLength, int resiBitsLength);
-                              
+
 int compIdenticalLeadingBytesCount_double(unsigned char* preBytes, unsigned char* curBytes);
-int compIdenticalLeadingBytesCount_float(unsigned char* preBytes, unsigned char* curBytes);
-void addExactData(DynamicByteArray *exactMidByteArray, DynamicIntArray *exactLeadNumArray, 
+extern int compIdenticalLeadingBytesCount_float(unsigned char* preBytes, unsigned char* curBytes);
+extern void addExactData(DynamicByteArray *exactMidByteArray, DynamicIntArray *exactLeadNumArray,
 		DynamicIntArray *resiBitArray, LossyCompressionElement *lce);
 
 int getPredictionCoefficients(int layers, int dimension, int **coeff_array, int *status);
@@ -85,13 +85,13 @@ int computeBlockEdgeSize_2D(int segmentSize);
 int initRandomAccessBytes(unsigned char* raBytes);
 
 int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData);
-int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 
+int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
 int reqLength, int reqBytesLength, int resiBitsLength, float medianValue);
 
 void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, float medianValue, float** decData);
 
 int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData);
-int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 
+int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
 int reqLength, int reqBytesLength, int resiBitsLength, double medianValue);
 
 void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exactMidBytes, unsigned char* residualMidBits, size_t nbEle, int reqLength, double medianValue, double** decData);
diff --git a/sz/include/dictionary.h b/sz/include/dictionary.h
index 0cf326ad..c30dd2a2 100644
--- a/sz/include/dictionary.h
+++ b/sz/include/dictionary.h
@@ -18,10 +18,13 @@
                                 Includes
  ---------------------------------------------------------------------------*/
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 
 /*---------------------------------------------------------------------------
                                 New types
diff --git a/sz/include/rw.h b/sz/include/rw.h
index 846243de..4004d57c 100644
--- a/sz/include/rw.h
+++ b/sz/include/rw.h
@@ -23,6 +23,11 @@
 extern "C" {
 #endif
 
+#if defined(_MSC_VER) /* MSVC Compiler Case */
+#define F_OK    0       /* Test for existence.  */
+#define access _access
+#endif
+
 int checkFileExistance(char* filePath);
 
 float** create2DArray_float(size_t m, size_t n);
diff --git a/sz/include/sz.h b/sz/include/sz.h
index a9cc0e2f..25ec7f29 100644
--- a/sz/include/sz.h
+++ b/sz/include/sz.h
@@ -12,7 +12,9 @@
 
 #include <stdio.h>
 #include <stdint.h>
+#ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>      /* For gettimeofday(), in microseconds */
+#endif
 #include <time.h>          /* For time(), in seconds */
 #include "CompressElement.h"
 #include "DynamicByteArray.h"
@@ -81,7 +83,7 @@ extern "C" {
 //typedef unsigned long uint64_t;
 
 #include "defines.h"
-	
+
 //Note: the following setting should be consistent with stateNum in Huffman.h
 //#define intvCapacity 65536
 //#define intvRadius 32768
@@ -163,7 +165,7 @@ typedef struct sz_params
 {
 	int dataType;
 	unsigned int max_quant_intervals; //max number of quantization intervals for quantization
-	unsigned int quantization_intervals; 
+	unsigned int quantization_intervals;
 	unsigned int maxRangeRadius;
 	int sol_ID;// it's SZ or SZ_Transpose, unless the setting is PASTRI compression mode (./configure --enable-pastri)
 	int losslessCompressor;
@@ -179,20 +181,20 @@ typedef struct sz_params
 	double pw_relBoundRatio; //point-wise relative error bound
 	int segment_size; //only used for 2D/3D data compression with pw_relBoundRatio (deprecated)
 	int pwr_type; //only used for 2D/3D data compression with pw_relBoundRatio
-	
+
 	int protectValueRange; //0 or 1
 	float fmin, fmax;
 	double dmin, dmax;
-	
+
 	int snapshotCmprStep; //perform single-snapshot-based compression if time_step == snapshotCmprStep
 	int predictionMode;
 
 	int accelerate_pw_rel_compression;
 	int plus_bits;
-	
+
 	int randomAccess;
 	int withRegression;
-	
+
 } sz_params;
 
 typedef struct sz_metadata
@@ -208,10 +210,10 @@ typedef struct sz_metadata
 
 typedef struct sz_exedata
 {
-	char optQuantMode;	//opt Quantization (0: fixed ; 1: optimized)	
+	char optQuantMode;	//opt Quantization (0: fixed ; 1: optimized)
 	int intvCapacity; // the number of intervals for the linear-scaling quantization
 	int intvRadius;  // the number of intervals for the radius of the quantization range (intvRadius=intvCapacity/2)
-	unsigned int SZ_SIZE_TYPE; //the length (# bytes) of the size_t in the system at runtime //4 or 8: sizeof(size_t) 
+	unsigned int SZ_SIZE_TYPE; //the length (# bytes) of the size_t in the system at runtime //4 or 8: sizeof(size_t)
 } sz_exedata;
 
 /*We use a linked list to maintain time-step meta info for time-step based compression*/
@@ -223,7 +225,7 @@ typedef struct sz_tsc_metainfo
 	FILE *metadata_file;
 	unsigned char* bit_array; //sihuan added
 	size_t intersect_size; //sihuan added
-	int64_t* hist_index; //sihuan added: prestep index 
+	int64_t* hist_index; //sihuan added: prestep index
 
 } sz_tsc_metadata;
 
@@ -242,9 +244,9 @@ extern SZ_VarSet* sz_varset;
 extern sz_multisteps *multisteps; //compression based on multiple time steps (time-dimension based compression)
 extern sz_tsc_metadata *sz_tsc;
 
-//for pastri 
+//for pastri
 #ifdef PASTRI
-extern pastri_params pastri_par; 
+extern pastri_params pastri_par;
 #endif
 
 //sz.h
@@ -273,22 +275,22 @@ size_t *outSize, int errBoundMode, double absErr_Bound, double relBoundRatio);
 
 unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 
-unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound, 
+unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound,
 double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 
-int SZ_compress_args2(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, 
-int errBoundMode, double absErrBound, double relBoundRatio, double pwrBoundRatio, 
+int SZ_compress_args2(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize,
+int errBoundMode, double absErrBound, double relBoundRatio, double pwrBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 
-int SZ_compress_args3(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, 
+int SZ_compress_args3(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
 size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
 size_t e5, size_t e4, size_t e3, size_t e2, size_t e1);
 
-unsigned char *SZ_compress_rev_args(int dataType, void *data, void *reservedValue, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, 
+unsigned char *SZ_compress_rev_args(int dataType, void *data, void *reservedValue, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 
-int SZ_compress_rev_args2(int dataType, void *data, void *reservedValue, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, 
+int SZ_compress_rev_args2(int dataType, void *data, void *reservedValue, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 unsigned char *SZ_compress_rev(int dataType, void *data, void *reservedValue, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 
@@ -305,8 +307,8 @@ void filloutDimArray(size_t* dim, size_t r5, size_t r4, size_t r3, size_t r2, si
 
 size_t compute_total_batch_size();
 
-void SZ_registerVar(int var_id, char* varName, int dataType, void* data, 
-			int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, 
+void SZ_registerVar(int var_id, char* varName, int dataType, void* data,
+			int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio,
 			size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
 
 int SZ_deregisterVar_ID(int var_id);
diff --git a/sz/include/sz_api.h b/sz/include/sz_api.h
new file mode 100644
index 00000000..9e8e89eb
--- /dev/null
+++ b/sz/include/sz_api.h
@@ -0,0 +1,94 @@
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef SZ_API_H
+#define SZ_API_H
+
+/* =====   SZLIB_API : control library symbols visibility   ===== */
+#ifndef SZLIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define SZLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define SZLIB_VISIBILITY
+#  endif
+#endif
+#if defined(SZ_DLL_EXPORT) && (SZ_DLL_EXPORT==1)
+#  define SZLIB_API __declspec(dllexport) SZLIB_VISIBILITY
+#elif defined(SZ_DLL_IMPORT) && (SZ_DLL_IMPORT==1)
+#  define SZLIB_API __declspec(dllimport) SZLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define SZLIB_API SZLIB_VISIBILITY
+#endif
+
+#include "defines.h"
+#include "ByteToolkit.h"
+
+/* array meta data and compression parameters for SZ_Init_Params() */
+typedef struct sz_params
+{
+    int dataType;
+    unsigned int max_quant_intervals; //max number of quantization intervals for quantization
+    unsigned int quantization_intervals;
+    unsigned int maxRangeRadius;
+    int sol_ID;// it's SZ or SZ_Transpose, unless the setting is PASTRI compression mode (./configure --enable-pastri)
+    int losslessCompressor;
+    int sampleDistance; //2 bytes
+    float predThreshold;  // 2 bytes
+    int szMode; //* 0 (best speed) or 1 (better compression with Zstd/Gzip) or 3 temporal-dimension based compression
+    int gzipMode; //* four options: Z_NO_COMPRESSION, or Z_BEST_SPEED, Z_BEST_COMPRESSION, Z_DEFAULT_COMPRESSION
+    int  errorBoundMode; //4bits (0.5byte), //ABS, REL, ABS_AND_REL, or ABS_OR_REL, PSNR, or PW_REL, PSNR
+    double absErrBound; //absolute error bound
+    double relBoundRatio; //value range based relative error bound ratio
+    double psnr; //PSNR
+    double normErr;
+    double pw_relBoundRatio; //point-wise relative error bound
+    int segment_size; //only used for 2D/3D data compression with pw_relBoundRatio (deprecated)
+    int pwr_type; //only used for 2D/3D data compression with pw_relBoundRatio
+
+    int protectValueRange; //0 or 1
+    float fmin, fmax;
+    double dmin, dmax;
+
+    int snapshotCmprStep; //perform single-snapshot-based compression if time_step == snapshotCmprStep
+    int predictionMode;
+
+    int accelerate_pw_rel_compression;
+    int plus_bits;
+
+    int randomAccess;
+    int withRegression;
+
+} sz_params;
+
+//-------------------key global variables--------------
+extern int dataEndianType; //*endian type of the data read from disk
+extern int sysEndianType; //*sysEndianType is actually set automatically.
+
+extern sz_params *confparams_cpr;
+extern sz_params *confparams_dec;
+
+int SZ_Init(const char *configFilePath);
+
+int SZ_Init_Params(sz_params *params);
+
+size_t computeDataLength(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
+
+int computeDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
+int filterDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t* correctedDimension);
+
+unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
+
+unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound,
+double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
+
+void *SZ_decompress(int dataType, unsigned char *bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
+size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength, void* decompressed_array, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1);
+
+void SZ_Finalize();
+
+#endif  /* SZ_API_H */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/sz/include/sz_double.h b/sz/include/sz_double.h
index 1004f118..2d87075f 100644
--- a/sz/include/sz_double.h
+++ b/sz/include/sz_double.h
@@ -17,8 +17,8 @@ extern "C" {
 #include <stdio.h>
 unsigned char* SZ_skip_compress_double(double* data, size_t dataLength, size_t* outSize);
 
-void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue);
-short computeReqLength_double_MSST19(double realPrecision);
+extern void computeReqLength_double(double realPrecision, short radExpo, int* reqLength, double* medianValue);
+extern short computeReqLength_double_MSST19(double realPrecision);
 
 unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision);
 unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision);
@@ -34,7 +34,7 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dataLength, double realPrecision);
 unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision);
 unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision);
-TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData, 
+TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
 size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d);
 void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize);
 
@@ -55,12 +55,12 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 
 void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *oriData, size_t dataLength, size_t *outSize);
 
-/*int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+/*int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio);*/
 
-int SZ_compress_args_double(int cmprType, int withRegression, unsigned char** newByteData, double *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_double(int cmprType, int withRegression, unsigned char** newByteData, double *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio);
 
 void SZ_compress_args_double_NoCkRnge_1D_subblock(unsigned char* compressedBytes, double *oriData, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d,
diff --git a/sz/src/Huffman.c b/sz/src/Huffman.c
index f0f95134..a3fac513 100644
--- a/sz/src/Huffman.c
+++ b/sz/src/Huffman.c
@@ -15,17 +15,17 @@
 
 
 HuffmanTree* createHuffmanTree(int stateNum)
-{			
+{
 	HuffmanTree *huffmanTree = (HuffmanTree*)malloc(sizeof(HuffmanTree));
 	memset(huffmanTree, 0, sizeof(HuffmanTree));
 	huffmanTree->stateNum = stateNum;
 	huffmanTree->allNodes = 2*stateNum;
-	
+
 	huffmanTree->pool = (struct node_t*)malloc(huffmanTree->allNodes*2*sizeof(struct node_t));
 	huffmanTree->qqq = (node*)malloc(huffmanTree->allNodes*2*sizeof(node));
 	huffmanTree->code = (unsigned long**)malloc(huffmanTree->stateNum*sizeof(unsigned long*));
 	huffmanTree->cout = (unsigned char *)malloc(huffmanTree->stateNum*sizeof(unsigned char));
-	
+
 	memset(huffmanTree->pool, 0, huffmanTree->allNodes*2*sizeof(struct node_t));
 	memset(huffmanTree->qqq, 0, huffmanTree->allNodes*2*sizeof(node));
     memset(huffmanTree->code, 0, huffmanTree->stateNum*sizeof(unsigned long*));
@@ -33,8 +33,8 @@ HuffmanTree* createHuffmanTree(int stateNum)
 	huffmanTree->qq = huffmanTree->qqq - 1;
 	huffmanTree->n_nodes = 0;
     huffmanTree->n_inode = 0;
-    huffmanTree->qend = 1;	
-    
+    huffmanTree->qend = 1;
+
     return huffmanTree;
 }
 
@@ -45,18 +45,18 @@ HuffmanTree* createDefaultHuffmanTree()
 
     return createHuffmanTree(stateNum);
 }
- 
+
 node new_node(HuffmanTree* huffmanTree, size_t freq, unsigned int c, node a, node b)
 {
 	node n = huffmanTree->pool + huffmanTree->n_nodes++;
-	if (freq) 
+	if (freq)
 	{
 		n->c = c;
 		n->freq = freq;
 		n->t = 1;
 	}
 	else {
-		n->left = a; 
+		n->left = a;
 		n->right = b;
 		n->freq = a->freq + b->freq;
 		n->t = 0;
@@ -64,14 +64,14 @@ node new_node(HuffmanTree* huffmanTree, size_t freq, unsigned int c, node a, nod
 	}
 	return n;
 }
- 
+
 node new_node2(HuffmanTree *huffmanTree, unsigned int c, unsigned char t)
 {
 	huffmanTree->pool[huffmanTree->n_nodes].c = c;
 	huffmanTree->pool[huffmanTree->n_nodes].t = t;
 	return huffmanTree->pool + huffmanTree->n_nodes++;
-} 
- 
+}
+
 /* priority queue */
 void qinsert(HuffmanTree *huffmanTree, node n)
 {
@@ -83,7 +83,7 @@ void qinsert(HuffmanTree *huffmanTree, node n)
 	}
 	huffmanTree->qq[i] = n;
 }
- 
+
 node qremove(HuffmanTree* huffmanTree)
 {
 	int i, l;
@@ -92,7 +92,7 @@ node qremove(HuffmanTree* huffmanTree)
 	if (huffmanTree->qend < 2) return 0;
 	huffmanTree->qend --;
 	huffmanTree->qq[i] = huffmanTree->qq[huffmanTree->qend];
-	
+
 	while ((l = (i<<1)) < huffmanTree->qend)  //l=(i*2)
 	{
 		if (l + 1 < huffmanTree->qend && huffmanTree->qq[l + 1]->freq < huffmanTree->qq[l]->freq) l++;
@@ -101,18 +101,18 @@ node qremove(HuffmanTree* huffmanTree)
 			p = huffmanTree->qq[i];
 			huffmanTree->qq[i] = huffmanTree->qq[l];
 			huffmanTree->qq[l] = p;
-			i = l;			
-		}	
+			i = l;
+		}
 		else
 		{
 			break;
 		}
-		
+
 	}
-	
+
 	return n;
 }
- 
+
 /* walk the tree and put 0s and 1s */
 /**
  * @out1 should be set to 0.
@@ -143,7 +143,7 @@ void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, u
 		out1 = out1 | 0;
 		build_code(huffmanTree, n->left, len + 1, out1, 0);
 		out1 = out1 | 1;
-		build_code(huffmanTree, n->right, len + 1, out1, 0);		
+		build_code(huffmanTree, n->right, len + 1, out1, 0);
 	}
 	else
 	{
@@ -152,7 +152,7 @@ void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, u
 		out2 = out2 | 0;
 		build_code(huffmanTree, n->left, len + 1, out1, out2);
 		out2 = out2 | 1;
-		build_code(huffmanTree, n->right, len + 1, out1, out2);	
+		build_code(huffmanTree, n->right, len + 1, out1, out2);
 	}
 }
 
@@ -201,7 +201,7 @@ void init_static(HuffmanTree* huffmanTree, int *s, size_t length)
 	build_code(huffmanTree, huffmanTree->qq[1], 0, 0, 0);
 	free(freq);
 }
- 
+
 void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out, size_t *outSize)
 {
 	size_t i = 0;
@@ -210,13 +210,13 @@ void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out,
 	unsigned char *p = out;
 	int lackBits = 0;
 	//long totalBitSize = 0, maxBitSize = 0, bitSize21 = 0, bitSize32 = 0;
-	for (i = 0;i<length;i++) 
+	for (i = 0;i<length;i++)
 	{
 		state = s[i];
-		bitSize = huffmanTree->cout[state];	
-		
-		//printf("%d %d : %d %u\n",i, state, bitSize, (code[state])[0] >> (64-cout[state])); 
-		//debug: compute the average bitSize and the count that is over 32... 	
+		bitSize = huffmanTree->cout[state];
+
+		//printf("%d %d : %d %u\n",i, state, bitSize, (code[state])[0] >> (64-cout[state]));
+		//debug: compute the average bitSize and the count that is over 32...
 		/*if(bitSize>=21)
 			bitSize21++;
 		if(bitSize>=32)
@@ -306,23 +306,23 @@ void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out,
 	printf("bitSize32 ratio = %f\n", ((float)bitSize32)/length);
 	printf("avg bit size = %f\n", ((float)totalBitSize)/length);*/
 }
- 
+
 void decode(unsigned char *s, size_t targetLength, node t, int *out)
 {
 	size_t i = 0, byteIndex = 0, count = 0;
-	int r; 
+	int r;
 	node n = t;
-	
+
 	if(n->t) //root->t==1 means that all state values are the same (constant)
 	{
 		for(count=0;count<targetLength;count++)
 			out[count] = n->c;
 		return;
 	}
-	
+
 	for(i=0;count<targetLength;i++)
 	{
-		
+
 		byteIndex = i>>3; //i/8
 		r = i%8;
 		if(((s[byteIndex] >> (7-r)) & 0x01) == 0)
@@ -331,9 +331,9 @@ void decode(unsigned char *s, size_t targetLength, node t, int *out)
 			n = n->right;
 
 		if (n->t) {
-			//putchar(n->c); 
+			//putchar(n->c);
 			out[count] = n->c;
-			n = t; 
+			n = t;
 			count++;
 		}
 	}
@@ -457,7 +457,7 @@ void pad_tree_uchar(HuffmanTree* huffmanTree, unsigned char* L, unsigned char* R
 		R[i] = huffmanTree->n_inode;
 		pad_tree_uchar(huffmanTree, L,R,C,t, huffmanTree->n_inode, rroot);
 	}
-}  
+}
 
 void pad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short* R, unsigned int* C, unsigned char* t, unsigned int i, node root)
 {
@@ -476,7 +476,7 @@ void pad_tree_ushort(HuffmanTree* huffmanTree, unsigned short* L, unsigned short
 		huffmanTree->n_inode++;
 		R[i] = huffmanTree->n_inode;
 		pad_tree_ushort(huffmanTree,L,R,C,t,huffmanTree->n_inode, rroot);
-	}	
+	}
 }
 
 void pad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, unsigned int* C, unsigned char* t, unsigned int i, node root)
@@ -498,8 +498,8 @@ void pad_tree_uint(HuffmanTree* huffmanTree, unsigned int* L, unsigned int* R, u
 		pad_tree_uint(huffmanTree,L,R,C,t,huffmanTree->n_inode, rroot);
 	}
 }
- 
-unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int nodeCount, unsigned char** out) 
+
+unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int nodeCount, unsigned char** out)
 {
 	if(nodeCount<=256)
 	{
@@ -514,7 +514,7 @@ unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int n
 
 		pad_tree_uchar(huffmanTree,L,R,C,t,0,huffmanTree->qq[1]);
 
-		unsigned int totalSize = 1+3*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int);	
+		unsigned int totalSize = 1+3*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int);
 		*out = (unsigned char*)malloc(totalSize*sizeof(unsigned char));
 		(*out)[0] = (unsigned char)sysEndianType;
 		memcpy(*out+1, L, nodeCount*sizeof(unsigned char));
@@ -534,14 +534,14 @@ unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int n
 		memset(L, 0, nodeCount*sizeof(unsigned short));
 		unsigned short* R = (unsigned short*)malloc(nodeCount*sizeof(unsigned short));
 		memset(R, 0, nodeCount*sizeof(unsigned short));
-		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));	
-		memset(C, 0, nodeCount*sizeof(unsigned int));		
+		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));
+		memset(C, 0, nodeCount*sizeof(unsigned int));
 		unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char));
-		memset(t, 0, nodeCount*sizeof(unsigned char));		
+		memset(t, 0, nodeCount*sizeof(unsigned char));
 		pad_tree_ushort(huffmanTree,L,R,C,t,0,huffmanTree->qq[1]);
 		unsigned int totalSize = 1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned char) + nodeCount*sizeof(unsigned int);
 		*out = (unsigned char*)malloc(totalSize);
-		(*out)[0] = (unsigned char)sysEndianType;		
+		(*out)[0] = (unsigned char)sysEndianType;
 		memcpy(*out+1, L, nodeCount*sizeof(unsigned short));
 		memcpy((*out)+1+nodeCount*sizeof(unsigned short),R,nodeCount*sizeof(unsigned short));
 		memcpy((*out)+1+2*nodeCount*sizeof(unsigned short),C,nodeCount*sizeof(unsigned int));
@@ -549,7 +549,7 @@ unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int n
 		free(L);
 		free(R);
 		free(C);
-		free(t);		
+		free(t);
 		return totalSize;
 	}
 	else //nodeCount>65536
@@ -558,16 +558,16 @@ unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int n
 		memset(L, 0, nodeCount*sizeof(unsigned int));
 		unsigned int* R = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));
 		memset(R, 0, nodeCount*sizeof(unsigned int));
-		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));	
+		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));
 		memset(C, 0, nodeCount*sizeof(unsigned int));
 		unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char));
 		memset(t, 0, nodeCount*sizeof(unsigned char));
 		pad_tree_uint(huffmanTree, L,R,C,t,0,huffmanTree->qq[1]);
-		
+
 		//debug
 		//node root = new_node2(0,0);
-		//unpad_tree_uint(L,R,C,t,0,root);		
-		
+		//unpad_tree_uint(L,R,C,t,0,root);
+
 		unsigned int totalSize = 1+3*nodeCount*sizeof(unsigned int)+nodeCount*sizeof(unsigned char);
 		*out = (unsigned char*)malloc(totalSize);
 		(*out)[0] = (unsigned char)sysEndianType;
@@ -579,7 +579,7 @@ unsigned int convert_HuffTree_to_bytes_anyStates(HuffmanTree* huffmanTree, int n
 		free(R);
 		free(C);
 		free(t);
-		return totalSize;		
+		return totalSize;
 	}
 }
 
@@ -677,11 +677,11 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 					p+=sizeof(unsigned int);
 				else
 					break;
-			}		
+			}
 		}
 		memcpy(L, bytes+1, nodeCount*sizeof(unsigned char));
 		memcpy(R, bytes+1+nodeCount*sizeof(unsigned char), nodeCount*sizeof(unsigned char));
-		memcpy(C, bytes+1+2*nodeCount*sizeof(unsigned char), nodeCount*sizeof(unsigned int));	
+		memcpy(C, bytes+1+2*nodeCount*sizeof(unsigned char), nodeCount*sizeof(unsigned int));
 		memcpy(t, bytes+1+2*nodeCount*sizeof(unsigned char)+nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned char));
 		node root = new_node2(huffmanTree, C[0],t[0]);
 		unpad_tree_uchar(huffmanTree,L,R,C,t,0,root);
@@ -697,17 +697,17 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 		memset(L, 0, nodeCount*sizeof(unsigned short));
 		unsigned short* R = (unsigned short*)malloc(nodeCount*sizeof(unsigned short));
 		memset(R, 0, nodeCount*sizeof(unsigned short));
-		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));	
-		memset(C, 0, nodeCount*sizeof(unsigned int));		
+		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));
+		memset(C, 0, nodeCount*sizeof(unsigned int));
 		unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char));
-		memset(t, 0, nodeCount*sizeof(unsigned char));	
-				
-		unsigned char cmpSysEndianType = bytes[0];	
+		memset(t, 0, nodeCount*sizeof(unsigned char));
+
+		unsigned char cmpSysEndianType = bytes[0];
 		if(cmpSysEndianType!=(unsigned char)sysEndianType)
 		{
 			unsigned char* p = (unsigned char*)(bytes+1);
 			size_t i = 0, size = 2*nodeCount*sizeof(unsigned short);
-			
+
 			while(1)
 			{
 				symTransform_2bytes(p);
@@ -717,7 +717,7 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 				else
 					break;
 			}
-			
+
 			size = nodeCount*sizeof(unsigned int);
 			while(1)
 			{
@@ -726,23 +726,23 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 				if(i<size)
 					p+=sizeof(unsigned int);
 				else
-					break;				
+					break;
 			}
 		}
 
 		memcpy(L, bytes+1, nodeCount*sizeof(unsigned short));
 		memcpy(R, bytes+1+nodeCount*sizeof(unsigned short), nodeCount*sizeof(unsigned short));
-		memcpy(C, bytes+1+2*nodeCount*sizeof(unsigned short), nodeCount*sizeof(unsigned int));	
+		memcpy(C, bytes+1+2*nodeCount*sizeof(unsigned short), nodeCount*sizeof(unsigned int));
 
-		memcpy(t, bytes+1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned char));	
+		memcpy(t, bytes+1+2*nodeCount*sizeof(unsigned short)+nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned char));
 
 		node root = new_node2(huffmanTree,0,0);
 		unpad_tree_ushort(huffmanTree,L,R,C,t,0,root);
 		free(L);
 		free(R);
 		free(C);
-		free(t);		
-		return root;				
+		free(t);
+		return root;
 	}
 	else //nodeCount>65536
 	{
@@ -750,7 +750,7 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 		memset(L, 0, nodeCount*sizeof(unsigned int));
 		unsigned int* R = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));
 		memset(R, 0, nodeCount*sizeof(unsigned int));
-		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));	
+		unsigned int* C = (unsigned int*)malloc(nodeCount*sizeof(unsigned int));
 		memset(C, 0, nodeCount*sizeof(unsigned int));
 		unsigned char* t = (unsigned char*)malloc(nodeCount*sizeof(unsigned char));
 		memset(t, 0, nodeCount*sizeof(unsigned char));
@@ -772,10 +772,10 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 
 		memcpy(L, bytes+1, nodeCount*sizeof(unsigned int));
 		memcpy(R, bytes+1+nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned int));
-		memcpy(C, bytes+1+2*nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned int));	
-	
-		memcpy(t, bytes+1+3*nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned char));			
-					
+		memcpy(C, bytes+1+2*nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned int));
+
+		memcpy(t, bytes+1+3*nodeCount*sizeof(unsigned int), nodeCount*sizeof(unsigned char));
+
 		node root = new_node2(huffmanTree,0,0);
 		unpad_tree_uint(huffmanTree,L,R,C,t,0,root);
 		free(L);
@@ -788,13 +788,13 @@ node reconstruct_HuffTree_from_bytes_anyStates(HuffmanTree *huffmanTree, unsigne
 
 void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned char **out, size_t *outSize)
 {
-	size_t i; 
+	size_t i;
 	int nodeCount = 0;
 	unsigned char *treeBytes, buffer[4];
-	
+
 	init(huffmanTree, s, length);
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 	unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree,nodeCount, &treeBytes);
 	//printf("treeByteSize = %d\n", treeByteSize);
@@ -856,7 +856,7 @@ int encode_withTree_MSST19(HuffmanTree* huffmanTree, int *s, size_t length, unsi
 
 /**
  * @par *out rememmber to allocate targetLength short_type data for it beforehand.
- * 
+ *
  * */
 void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out)
 {
@@ -926,7 +926,7 @@ void SZ_ReleaseHuffman(HuffmanTree* huffmanTree)
 	free(huffmanTree->code);
 	huffmanTree->code = NULL;
 	free(huffmanTree->cout);
-	huffmanTree->cout = NULL;	
+	huffmanTree->cout = NULL;
 	free(huffmanTree);
 	huffmanTree = NULL;
 }
diff --git a/sz/src/conf.c b/sz/src/conf.c
index 39107a21..794cda44 100644
--- a/sz/src/conf.c
+++ b/sz/src/conf.c
@@ -23,15 +23,15 @@
     file and sets other required parameters.
 
  **/
- 
+
 /*struct node_t *pool;
 node *qqq;
 node *qq;
 int n_nodes = 0, qend;
 unsigned long **code;
 unsigned char *cout;
-int n_inode;*/ 
- 
+int n_inode;*/
+
 unsigned int roundUpToPowerOf2(unsigned int base)
 {
   base -= 1;
@@ -43,42 +43,42 @@ unsigned int roundUpToPowerOf2(unsigned int base)
   base = base | (base >> 16);
 
   return base + 1;
-} 
- 
+}
+
 void updateQuantizationInfo(int quant_intervals)
 {
 	exe_params->intvCapacity = quant_intervals;
 	exe_params->intvRadius = quant_intervals/2;
-} 
- 
+}
+
 double computeABSErrBoundFromPSNR(double psnr, double threshold, double value_range)
 {
 	double v1 = psnr + 10 * log10(1-2.0/3.0*threshold);
 	double v2 = v1/(-20);
 	double v3 = pow(10, v2);
 	return value_range * v3;
-} 
+}
 
 double computeABSErrBoundFromNORM_ERR(double normErr, size_t nbEle)
 {
 	return sqrt(3.0/nbEle)*normErr;
-} 
+}
+
 
- 
 /*-------------------------------------------------------------------------*/
 /**
- * 
- * 
+ *
+ *
  * @return the status of loading conf. file: 1 (success) or 0 (error code);
  * */
 int SZ_ReadConf(const char* sz_cfgFile) {
     // Check access to SZ configuration file and load dictionary
     //record the setting in confparams_cpr
-    confparams_cpr = (sz_params*)malloc(sizeof(sz_params));   
-    memset(confparams_cpr,0,sizeof(sz_params)); 
+    confparams_cpr = (sz_params*)malloc(sizeof(sz_params));
+    memset(confparams_cpr,0,sizeof(sz_params));
     exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
-    memset(exe_params,0,sizeof(sz_exedata)); 
-    
+    memset(exe_params,0,sizeof(sz_exedata));
+
     int x = 1;
     char sol_name[256];
     char *modeBuf;
@@ -88,65 +88,65 @@ int SZ_ReadConf(const char* sz_cfgFile) {
     char *par;
 
 	char *y = (char*)&x;
-	
+
 	if(*y==1)
 		sysEndianType = LITTLE_ENDIAN_SYSTEM;
 	else //=0
 		sysEndianType = BIG_ENDIAN_SYSTEM;
-    
+
     confparams_cpr->plus_bits = 3;
-    
+
     if(sz_cfgFile == NULL)
     {
 		dataEndianType = LITTLE_ENDIAN_DATA;
 		confparams_cpr->sol_ID = SZ;
 		confparams_cpr->max_quant_intervals = 65536;
 		confparams_cpr->maxRangeRadius = confparams_cpr->max_quant_intervals/2;
-				
+
 		exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2;
 		exe_params->intvRadius = confparams_cpr->maxRangeRadius;
-		
+
 		confparams_cpr->quantization_intervals = 0;
 		exe_params->optQuantMode = 1;
 		confparams_cpr->predThreshold = 0.99;
 		confparams_cpr->sampleDistance = 100;
-		
+
 		confparams_cpr->szMode = SZ_BEST_COMPRESSION;
 		confparams_cpr->losslessCompressor = ZSTD_COMPRESSOR; //other option: GZIP_COMPRESSOR;
 		if(confparams_cpr->losslessCompressor==ZSTD_COMPRESSOR)
 			confparams_cpr->gzipMode = 3; //fast mode
 		else
 			confparams_cpr->gzipMode = 1; //high speed mode
-		
+
 		confparams_cpr->errorBoundMode = PSNR;
 		confparams_cpr->psnr = 90;
 		confparams_cpr->absErrBound = 1E-4;
 		confparams_cpr->relBoundRatio = 1E-4;
 		confparams_cpr->accelerate_pw_rel_compression = 1;
-		
+
 		confparams_cpr->pw_relBoundRatio = 1E-3;
 		confparams_cpr->segment_size = 36;
-		
+
 		confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
-		
+
 		confparams_cpr->snapshotCmprStep = 5;
-		
+
 		confparams_cpr->withRegression = SZ_WITH_LINEAR_REGRESSION;
-	
+
 		confparams_cpr->randomAccess = 0; //0: no random access , 1: support random access
-	
+
 		confparams_cpr->protectValueRange = 0;
-	
+
 		return SZ_SCES;
 	}
-    
+
     if (access(sz_cfgFile, F_OK) != 0)
     {
         printf("[SZ] Configuration file NOT accessible.\n");
         return SZ_NSCS;
     }
-    
-    //printf("[SZ] Reading SZ configuration file (%s) ...\n", sz_cfgFile);    
+
+    //printf("[SZ] Reading SZ configuration file (%s) ...\n", sz_cfgFile);
     ini = iniparser_load(sz_cfgFile);
     if (ini == NULL)
     {
@@ -167,10 +167,10 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 	}
 
 	// Reading/setting detection parameters
-	
+
 	par = iniparser_getstring(ini, "ENV:sol_name", NULL);
 	snprintf(sol_name, 256, "%s", par);
-	
+
     if(strcmp(sol_name, "SZ")==0)
 		confparams_cpr->sol_ID = SZ;
 	else if(strcmp(sol_name, "PASTRI")==0)
@@ -182,12 +182,12 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 		iniparser_freedict(ini);
 		return SZ_NSCS;
 	}
-	
+
 	if(confparams_cpr->sol_ID==SZ || confparams_cpr->sol_ID==SZ_Transpose)
 	{
 		int max_quant_intervals = iniparser_getint(ini, "PARAMETER:max_quant_intervals", 65536);
 		confparams_cpr->max_quant_intervals = max_quant_intervals;
-		
+
 		int quantization_intervals = (int)iniparser_getint(ini, "PARAMETER:quantization_intervals", 0);
 		confparams_cpr->quantization_intervals = quantization_intervals;
 		if(quantization_intervals>0)
@@ -202,26 +202,26 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 
 			exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2;
 			exe_params->intvRadius = confparams_cpr->maxRangeRadius;
-			
+
 			exe_params->optQuantMode = 1;
 		}
-		
+
 		if(quantization_intervals%2!=0)
 		{
 			printf("Error: quantization_intervals must be an even number!\n");
 			iniparser_freedict(ini);
 			return SZ_NSCS;
 		}
-		
+
 		confparams_cpr->predThreshold = (float)iniparser_getdouble(ini, "PARAMETER:predThreshold", 0);
 		confparams_cpr->sampleDistance = (int)iniparser_getint(ini, "PARAMETER:sampleDistance", 0);
-		
+
 		modeBuf = iniparser_getstring(ini, "PARAMETER:szMode", NULL);
 		if(modeBuf==NULL)
 		{
 			printf("[SZ] Error: Null szMode setting (please check sz.config file)\n");
 			iniparser_freedict(ini);
-			return SZ_NSCS;					
+			return SZ_NSCS;
 		}
 		else if(strcmp(modeBuf, "SZ_BEST_SPEED")==0)
 			confparams_cpr->szMode = SZ_BEST_SPEED;
@@ -233,9 +233,9 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 		{
 			printf("[SZ] Error: Wrong szMode setting (please check sz.config file)\n");
 			iniparser_freedict(ini);
-			return SZ_NSCS;	
+			return SZ_NSCS;
 		}
-		
+
 		modeBuf = iniparser_getstring(ini, "PARAMETER:losslessCompressor", "ZSTD_COMPRESSOR");
 		if(strcmp(modeBuf, "GZIP_COMPRESSOR")==0)
 			confparams_cpr->losslessCompressor = GZIP_COMPRESSOR;
@@ -246,22 +246,22 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 			printf("[SZ] Error: Wrong losslessCompressor setting (please check sz.config file)\n");\
 			printf("No Such a lossless compressor: %s\n", modeBuf);
 			iniparser_freedict(ini);
-			return SZ_NSCS;	
-		}		
-		
+			return SZ_NSCS;
+		}
+
 		modeBuf = iniparser_getstring(ini, "PARAMETER:withLinearRegression", "YES");
 		if(strcmp(modeBuf, "YES")==0 || strcmp(modeBuf, "yes")==0)
 			confparams_cpr->withRegression = SZ_WITH_LINEAR_REGRESSION;
 		else
 			confparams_cpr->withRegression = SZ_NO_REGRESSION;
-		
+
 		modeBuf = iniparser_getstring(ini, "PARAMETER:gzipMode", "Gzip_BEST_SPEED");
 		if(modeBuf==NULL)
 		{
 			printf("[SZ] Error: Null Gzip mode setting (please check sz.config file)\n");
 			iniparser_freedict(ini);
-			return SZ_NSCS;					
-		}		
+			return SZ_NSCS;
+		}
 		else if(strcmp(modeBuf, "Gzip_NO_COMPRESSION")==0)
 			confparams_cpr->gzipMode = 0;
 		else if(strcmp(modeBuf, "Gzip_BEST_SPEED")==0)
@@ -275,14 +275,14 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 			printf("[SZ] Error: Wrong gzip Mode (please check sz.config file)\n");
 			return SZ_NSCS;
 		}
-		
-		modeBuf = iniparser_getstring(ini, "PARAMETER:zstdMode", "Zstd_HIGH_SPEED");		
+
+		modeBuf = iniparser_getstring(ini, "PARAMETER:zstdMode", "Zstd_HIGH_SPEED");
 		if(modeBuf==NULL)
 		{
 			printf("[SZ] Error: Null Zstd mode setting (please check sz.config file)\n");
 			iniparser_freedict(ini);
-			return SZ_NSCS;					
-		}		
+			return SZ_NSCS;
+		}
 		else if(strcmp(modeBuf, "Zstd_BEST_SPEED")==0)
 			confparams_cpr->gzipMode = 1;
 		else if(strcmp(modeBuf, "Zstd_HIGH_SPEED")==0)
@@ -290,32 +290,32 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 		else if(strcmp(modeBuf, "Zstd_HIGH_COMPRESSION")==0)
 			confparams_cpr->gzipMode = 19;
 		else if(strcmp(modeBuf, "Zstd_BEST_COMPRESSION")==0)
-			confparams_cpr->gzipMode = 22;			
+			confparams_cpr->gzipMode = 22;
 		else if(strcmp(modeBuf, "Zstd_DEFAULT_COMPRESSION")==0)
 			confparams_cpr->gzipMode = 3;
 		else
 		{
 			printf("[SZ] Error: Wrong zstd Mode (please check sz.config file)\n");
 			return SZ_NSCS;
-		}		
-		
+		}
+
 		modeBuf = iniparser_getstring(ini, "PARAMETER:protectValueRange", "YES");
 		if(strcmp(modeBuf, "YES")==0)
 			confparams_cpr->protectValueRange = 1;
 		else
 			confparams_cpr->protectValueRange = 0;
-		
+
 		confparams_cpr->randomAccess = (int)iniparser_getint(ini, "PARAMETER:randomAccess", 0);
-		
+
 		//TODO
 		confparams_cpr->snapshotCmprStep = (int)iniparser_getint(ini, "PARAMETER:snapshotCmprStep", 5);
-				
+
 		errBoundMode = iniparser_getstring(ini, "PARAMETER:errorBoundMode", NULL);
 		if(errBoundMode==NULL)
 		{
 			printf("[SZ] Error: Null error bound setting (please check sz.config file)\n");
 			iniparser_freedict(ini);
-			return SZ_NSCS;				
+			return SZ_NSCS;
 		}
 		else if(strcmp(errBoundMode,"ABS")==0||strcmp(errBoundMode,"abs")==0)
 			confparams_cpr->errorBoundMode=ABS;
@@ -347,7 +347,7 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 			iniparser_freedict(ini);
 			return SZ_NSCS;
 		}
-		
+
 		confparams_cpr->absErrBound = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 0);
 		confparams_cpr->relBoundRatio = (double)iniparser_getdouble(ini, "PARAMETER:relBoundRatio", 0);
 		confparams_cpr->psnr = (double)iniparser_getdouble(ini, "PARAMETER:psnr", 0);
@@ -355,9 +355,9 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 		confparams_cpr->pw_relBoundRatio = (double)iniparser_getdouble(ini, "PARAMETER:pw_relBoundRatio", 0);
 		confparams_cpr->segment_size = (int)iniparser_getint(ini, "PARAMETER:segment_size", 0);
 		confparams_cpr->accelerate_pw_rel_compression = (int)iniparser_getint(ini, "PARAMETER:accelerate_pw_rel_compression", 1);
-		
+
 		modeBuf = iniparser_getstring(ini, "PARAMETER:pwr_type", "MIN");
-		
+
 		if(strcmp(modeBuf, "MIN")==0)
 			confparams_cpr->pwr_type = SZ_PWR_MIN_TYPE;
 		else if(strcmp(modeBuf, "AVG")==0)
@@ -368,24 +368,24 @@ int SZ_ReadConf(const char* sz_cfgFile) {
 		{
 			printf("[SZ] Error: Wrong pwr_type setting (please check sz.config file).\n");
 			iniparser_freedict(ini);
-			return SZ_NSCS;	
+			return SZ_NSCS;
 		}
 		else //by default
 			confparams_cpr->pwr_type = SZ_PWR_AVG_TYPE;
-    
+
 		//initialization for Huffman encoding
-		//SZ_Reset();	
+		//SZ_Reset();
 	}
 	else if(confparams_cpr->sol_ID == PASTRI)
 	{//load parameters for PSTRI
-		pastri_par.bf[0] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_0", 0);		
-		pastri_par.bf[1] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_1", 0);		
-		pastri_par.bf[2] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_2", 0);		
+		pastri_par.bf[0] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_0", 0);
+		pastri_par.bf[1] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_1", 0);
+		pastri_par.bf[2] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_2", 0);
 		pastri_par.bf[3] = (int)iniparser_getint(ini, "PARAMETER:basisFunction_3", 0);
-		pastri_par.numBlocks = (int)iniparser_getint(ini, "PARAMETER:numBlocks", 0);		
+		pastri_par.numBlocks = (int)iniparser_getint(ini, "PARAMETER:numBlocks", 0);
 		confparams_cpr->absErrBound = pastri_par.originalEb = (double)iniparser_getdouble(ini, "PARAMETER:absErrBound", 1E-3);
 	}
-	
+
     iniparser_freedict(ini);
     return SZ_SCES;
 }
@@ -429,7 +429,7 @@ int checkVersion2(char* version)
 	int major = version[0];
 	int minor = version[1];
 	int revision = version[2];
-	
+
 	int preVersion = 20108;
 	int givenVersion = computeVersion(major, minor, revision);
 	//int currentVersion = computeVersion(SZ_VER_MAJOR, SZ_VER_MINOR, SZ_VER_REVISION);
diff --git a/sz/src/dataCompression.c b/sz/src/dataCompression.c
index 0051c542..96ab3ad8 100644
--- a/sz/src/dataCompression.c
+++ b/sz/src/dataCompression.c
@@ -7,10 +7,13 @@
  *      See COPYRIGHT in top-level directory.
  */
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include "sz.h"
 #include "DynamicByteArray.h"
 #include "DynamicIntArray.h"
@@ -38,7 +41,7 @@ long computeRangeSize_int(void* oriData, int dataType, size_t size, int64_t* val
 	if(dataType==SZ_UINT8)
 	{
 		unsigned char* data = (unsigned char*)oriData;
-		unsigned char data_; 
+		unsigned char data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
@@ -52,48 +55,48 @@ long computeRangeSize_int(void* oriData, int dataType, size_t size, int64_t* val
 	else if(dataType == SZ_UINT16)
 	{
 		unsigned short* data = (unsigned short*)oriData;
-		unsigned short data_; 
+		unsigned short data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
 	else if(dataType == SZ_INT16)
-	{ 
+	{
 		short* data = (short*)oriData;
-		short data_; 
+		short data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
 	else if(dataType == SZ_UINT32)
 	{
 		unsigned int* data = (unsigned int*)oriData;
-		unsigned int data_; 
+		unsigned int data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
 	else if(dataType == SZ_INT32)
 	{
 		int* data = (int*)oriData;
-		int data_; 
+		int data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
 	else if(dataType == SZ_UINT64)
 	{
 		unsigned long* data = (unsigned long*)oriData;
-		unsigned long data_; 
+		unsigned long data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
 	else if(dataType == SZ_INT64)
 	{
 		long* data = (long *)oriData;
-		long data_; 
+		long data_;
 		min = data[0], max = min;
 		computeMinMax(data);
 	}
 
 	*valueRangeSize = max - min;
-	return min;	
+	return min;
 }
 
 float computeRangeSize_float(float* oriData, size_t size, float* valueRangeSize, float* medianValue)
@@ -156,7 +159,7 @@ double computeRangeSize_double(double* oriData, size_t size, double* valueRangeS
 		else if(max<data)
 			max = data;
 	}
-	
+
 	*valueRangeSize = max - min;
 	*medianValue = min + *valueRangeSize/2;
 	return min;
@@ -287,7 +290,7 @@ double getRealPrecision_double(double valueRangeSize, int errBoundMode, double a
 	int state = SZ_SCES;
 	double precision = 0;
 	if(errBoundMode==ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL)
-		precision = absErrBound; 
+		precision = absErrBound;
 	else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL)
 		precision = relBoundRatio*valueRangeSize;
 	else if(errBoundMode==ABS_AND_REL)
@@ -310,7 +313,7 @@ double getRealPrecision_float(float valueRangeSize, int errBoundMode, double abs
 	int state = SZ_SCES;
 	double precision = 0;
 	if(errBoundMode==ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL)
-		precision = absErrBound; 
+		precision = absErrBound;
 	else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL)
 		precision = relBoundRatio*valueRangeSize;
 	else if(errBoundMode==ABS_AND_REL)
@@ -333,7 +336,7 @@ double getRealPrecision_int(long valueRangeSize, int errBoundMode, double absErr
 	int state = SZ_SCES;
 	double precision = 0;
 	if(errBoundMode==ABS||errBoundMode==ABS_OR_PW_REL||errBoundMode==ABS_AND_PW_REL)
-		precision = absErrBound; 
+		precision = absErrBound;
 	else if(errBoundMode==REL||errBoundMode==REL_OR_PW_REL||errBoundMode==REL_AND_PW_REL)
 		precision = relBoundRatio*valueRangeSize;
 	else if(errBoundMode==ABS_AND_REL)
@@ -360,11 +363,11 @@ void symTransform_8bytes(unsigned char data[8])
 	tmp = data[1];
 	data[1] = data[6];
 	data[6] = tmp;
-	
+
 	tmp = data[2];
 	data[2] = data[5];
 	data[5] = tmp;
-	
+
 	tmp = data[3];
 	data[3] = data[4];
 	data[4] = tmp;
@@ -448,25 +451,25 @@ inline void compressUInt64Value(uint64_t tgtValue, uint64_t minValue, int byteSi
 	memcpy(bytes, tmpBytes + 8 - byteSize, byteSize);
 }
 
-inline void compressSingleFloatValue(FloatValueCompressElement *vce, float tgtValue, float precision, float medianValue, 
+inline void compressSingleFloatValue(FloatValueCompressElement *vce, float tgtValue, float precision, float medianValue,
 		int reqLength, int reqBytesLength, int resiBitsLength)
-{		
+{
 	float normValue = tgtValue - medianValue;
 
 	lfloat lfBuf;
 	lfBuf.value = normValue;
-			
+
 	int ignBytesLength = 32 - reqLength;
 	if(ignBytesLength<0)
 		ignBytesLength = 0;
-	
+
 	int tmp_int = lfBuf.ivalue;
 	intToBytes_bigEndian(vce->curBytes, tmp_int);
-		
+
 	lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength;
-	
+
 	//float tmpValue = lfBuf.value;
-	
+
 	vce->data = lfBuf.value+medianValue;
 	vce->curValue = tmp_int;
 	vce->reqBytesLength = reqBytesLength;
@@ -519,25 +522,25 @@ void compressSingleDoubleValue_MSST19(DoubleValueCompressElement *vce, double tg
     vce->resiBitsLength = resiBitsLength;
 }
 
-void compressSingleDoubleValue(DoubleValueCompressElement *vce, double tgtValue, double precision, double medianValue, 
+void compressSingleDoubleValue(DoubleValueCompressElement *vce, double tgtValue, double precision, double medianValue,
 		int reqLength, int reqBytesLength, int resiBitsLength)
-{		
+{
 	double normValue = tgtValue - medianValue;
 
 	ldouble lfBuf;
 	lfBuf.value = normValue;
-			
+
 	int ignBytesLength = 64 - reqLength;
 	if(ignBytesLength<0)
 		ignBytesLength = 0;
 
 	long tmp_long = lfBuf.lvalue;
 	longToBytes_bigEndian(vce->curBytes, tmp_long);
-				
+
 	lfBuf.lvalue = (lfBuf.lvalue >> ignBytesLength)<<ignBytesLength;
-	
+
 	//double tmpValue = lfBuf.value;
-	
+
 	vce->data = lfBuf.value+medianValue;
 	vce->curValue = tmp_long;
 	vce->reqBytesLength = reqBytesLength;
@@ -569,7 +572,7 @@ inline int compIdenticalLeadingBytesCount_float(unsigned char* preBytes, unsigne
 }
 
 //TODO double-check the correctness...
-inline void addExactData(DynamicByteArray *exactMidByteArray, DynamicIntArray *exactLeadNumArray, 
+inline void addExactData(DynamicByteArray *exactMidByteArray, DynamicIntArray *exactLeadNumArray,
 		DynamicIntArray *resiBitArray, LossyCompressionElement *lce)
 {
 	int i;
@@ -617,35 +620,35 @@ int getPredictionCoefficients(int layers, int dimension, int **coeff_array, int
 					(*coeff_array)[1] = -3;
 					(*coeff_array)[2] = 1;
 					break;
-			}	
+			}
 			break;
 		case 2:
 			switch(layers)
 			{
 				case 1:
-				
+
 					break;
 				case 2:
-				
+
 					break;
 				case 3:
-				
+
 					break;
-			}				
+			}
 			break;
 		case 3:
 			switch(layers)
 			{
 				case 1:
-				
+
 					break;
 				case 2:
-				
+
 					break;
 				case 3:
-				
+
 					break;
-			}			
+			}
 			break;
 		default:
 			printf("Error: dimension must be no greater than 3 in the current version.\n");
@@ -675,7 +678,7 @@ int computeBlockEdgeSize_3D(int segmentSize)
 		if(i*i*i>segmentSize)
 			break;
 	}
-	return i;	
+	return i;
 	//return (int)(pow(segmentSize, 1.0/3)+1);
 }
 
@@ -705,20 +708,20 @@ int initRandomAccessBytes(unsigned char* raBytes)
 	return k;
 }
 
-//The following functions are float-precision version of dealing with the unpredictable data points 
+//The following functions are float-precision version of dealing with the unpredictable data points
 int generateLossyCoefficients_float(float* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, float* medianValue, float* decData)
 {
 	float valueRangeSize;
-	
+
 	computeRangeSize_float(oriData, nbEle, &valueRangeSize, medianValue);
 	short radExpo = getExponent_float(valueRangeSize/2);
-	
+
 	int reqLength;
 	computeReqLength_float(precision, radExpo, &reqLength, medianValue);
-	
+
 	*reqBytesLength = reqLength/8;
 	*resiBitsLength = reqLength%8;
-	
+
 	size_t i = 0;
 	for(i = 0;i < nbEle;i++)
 	{
@@ -726,39 +729,39 @@ int generateLossyCoefficients_float(float* oriData, double precision, size_t nbE
 
 		lfloat lfBuf;
 		lfBuf.value = normValue;
-				
+
 		int ignBytesLength = 32 - reqLength;
 		if(ignBytesLength<0)
 			ignBytesLength = 0;
-			
+
 		lfBuf.ivalue = (lfBuf.ivalue >> ignBytesLength) << ignBytesLength;
-		
+
 		//float tmpValue = lfBuf.value;
-		
+
 		decData[i] = lfBuf.value + *medianValue;
 	}
 	return reqLength;
-}	
-		
+}
+
 /**
  * @param float* oriData: inplace argument (input / output)
- * 
- * */		
-int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 
+ *
+ * */
+int compressExactDataArray_float(float* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
 int reqLength, int reqBytesLength, int resiBitsLength, float medianValue)
 {
 	//allocate memory for coefficient compression arrays
 	DynamicIntArray *exactLeadNumArray;
-	new_DIA(&exactLeadNumArray, DynArrayInitLen);	
+	new_DIA(&exactLeadNumArray, DynArrayInitLen);
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	unsigned char preDataBytes[4] = {0,0,0,0};	
+	unsigned char preDataBytes[4] = {0,0,0,0};
 
 	//allocate memory for vce and lce
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
-	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));	
+	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
 
 	size_t i = 0;
 	for(i = 0;i < nbEle;i++)
@@ -774,14 +777,14 @@ int reqLength, int reqBytesLength, int resiBitsLength, float medianValue)
 	convertDIAtoInts(resiBitArray, resiArray);
 
 	size_t midArraySize = exactMidByteArray->size;
-	
+
 	free(vce);
 	free(lce);
-	
+
 	free_DIA(exactLeadNumArray);
 	free_DBA(exactMidByteArray);
 	free_DIA(resiBitArray);
-	
+
 	return midArraySize;
 }
 
@@ -792,12 +795,12 @@ void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exact
 	float exactData = 0;
 	unsigned char preBytes[4] = {0,0,0,0};
 	unsigned char curBytes[4];
-	int resiBits; 
-	unsigned char leadingNum;		
-	
+	int resiBits;
+	unsigned char leadingNum;
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
-	
+
 	for(i = 0; i<nbEle;i++)
 	{
 		// compute resiBits
@@ -826,7 +829,7 @@ void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exact
 			k += resiBitsLength;
 		}
 
-		// recover the exact data	
+		// recover the exact data
 		memset(curBytes, 0, 4);
 		leadingNum = leadNum[l++];
 		memcpy(curBytes, preBytes, leadingNum);
@@ -840,23 +843,23 @@ void decompressExactDataArray_float(unsigned char* leadNum, unsigned char* exact
 		exactData = bytesToFloat(curBytes);
 		(*decData)[i] = exactData + medianValue;
 		memcpy(preBytes,curBytes,4);
-	}	
+	}
 }
 
 //double-precision version of dealing with unpredictable data points in sz 2.0
 int generateLossyCoefficients_double(double* oriData, double precision, size_t nbEle, int* reqBytesLength, int* resiBitsLength, double* medianValue, double* decData)
 {
 	double valueRangeSize;
-	
+
 	computeRangeSize_double(oriData, nbEle, &valueRangeSize, medianValue);
 	short radExpo = getExponent_double(valueRangeSize/2);
-	
+
 	int reqLength;
 	computeReqLength_double(precision, radExpo, &reqLength, medianValue);
-	
+
 	*reqBytesLength = reqLength/8;
 	*resiBitsLength = reqLength%8;
-	
+
 	size_t i = 0;
 	for(i = 0;i < nbEle;i++)
 	{
@@ -864,37 +867,37 @@ int generateLossyCoefficients_double(double* oriData, double precision, size_t n
 
 		ldouble ldBuf;
 		ldBuf.value = normValue;
-				
+
 		int ignBytesLength = 64 - reqLength;
 		if(ignBytesLength<0)
 			ignBytesLength = 0;
-			
+
 		ldBuf.lvalue = (ldBuf.lvalue >> ignBytesLength) << ignBytesLength;
-		
+
 		decData[i] = ldBuf.value + *medianValue;
 	}
 	return reqLength;
-}	
-		
+}
+
 /**
  * @param double* oriData: inplace argument (input / output)
- * 
- * */		
-int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray, 
+ *
+ * */
+int compressExactDataArray_double(double* oriData, double precision, size_t nbEle, unsigned char** leadArray, unsigned char** midArray, unsigned char** resiArray,
 int reqLength, int reqBytesLength, int resiBitsLength, double medianValue)
 {
 	//allocate memory for coefficient compression arrays
 	DynamicIntArray *exactLeadNumArray;
-	new_DIA(&exactLeadNumArray, DynArrayInitLen);	
+	new_DIA(&exactLeadNumArray, DynArrayInitLen);
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	unsigned char preDataBytes[8] = {0,0,0,0,0,0,0,0};	
+	unsigned char preDataBytes[8] = {0,0,0,0,0,0,0,0};
 
 	//allocate memory for vce and lce
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
-	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));	
+	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
 
 	size_t i = 0;
 	for(i = 0;i < nbEle;i++)
@@ -910,14 +913,14 @@ int reqLength, int reqBytesLength, int resiBitsLength, double medianValue)
 	convertDIAtoInts(resiBitArray, resiArray);
 
 	size_t midArraySize = exactMidByteArray->size;
-	
+
 	free(vce);
 	free(lce);
-	
+
 	free_DIA(exactLeadNumArray);
 	free_DBA(exactMidByteArray);
 	free_DIA(resiBitArray);
-	
+
 	return midArraySize;
 }
 
@@ -928,12 +931,12 @@ void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exac
 	double exactData = 0;
 	unsigned char preBytes[8] = {0,0,0,0,0,0,0,0};
 	unsigned char curBytes[8];
-	int resiBits; 
-	unsigned char leadingNum;		
-	
+	int resiBits;
+	unsigned char leadingNum;
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
-	
+
 	for(i = 0; i<nbEle;i++)
 	{
 		// compute resiBits
@@ -962,7 +965,7 @@ void decompressExactDataArray_double(unsigned char* leadNum, unsigned char* exac
 			k += resiBitsLength;
 		}
 
-		// recover the exact data	
+		// recover the exact data
 		memset(curBytes, 0, 8);
 		leadingNum = leadNum[l++];
 		memcpy(curBytes, preBytes, leadingNum);
diff --git a/sz/src/dictionary.c b/sz/src/dictionary.c
index 3f0f5cfa..788a2393 100644
--- a/sz/src/dictionary.c
+++ b/sz/src/dictionary.c
@@ -18,7 +18,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 
 /** Maximum value size for integers and doubles. */
 #define MAXVALSZ    1024
@@ -38,7 +40,7 @@
 static void * mem_double(void * ptr, int size)
 {
     void * newptr ;
- 
+
     newptr = calloc(2*size, 1);
     if (newptr==NULL) {
         return NULL ;
@@ -224,7 +226,7 @@ int dictionary_set(dictionary * d, const char * key, const char * val)
     unsigned    hash ;
 
     if (d==NULL || key==NULL) return -1 ;
-    
+
     /* Compute hash for this key */
     hash = dictionary_hash(key) ;
     /* Find if value is already in dictionary */
@@ -367,7 +369,7 @@ int main(int argc, char *argv[])
     /* Allocate dictionary */
     printf("allocating...\n");
     d = dictionary_new(0);
-    
+
     /* Set values in dictionary */
     printf("setting %d values...\n", NVALS);
     for (i=0 ; i<NVALS ; i++) {
diff --git a/sz/src/rw.c b/sz/src/rw.c
index 5ead7f15..3a1c61e7 100644
--- a/sz/src/rw.c
+++ b/sz/src/rw.c
@@ -7,11 +7,14 @@
  *      See COPYRIGHT in top-level directory.
  */
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 
 #include "rw.h"
 #include "sz.h"
@@ -24,7 +27,7 @@ int checkFileExistance(char* filePath)
 	} else {
 		// file doesn't exist
 		return 0;
-	}	
+	}
 }
 
 float** create2DArray_float(size_t m, size_t n)
@@ -41,7 +44,7 @@ void free2DArray_float(float** data, size_t m)
 	size_t i = 0;
 	for(i=0;i<m;i++)
 		free(data[i]);
-	free(data);	
+	free(data);
 }
 
 float*** create3DArray_float(size_t p, size_t m, size_t n)
@@ -66,7 +69,7 @@ void free3DArray_float(float*** data, size_t p, size_t m)
 			free(data[i][j]);
 		free(data[i]);
 	}
-	free(data);	
+	free(data);
 }
 
 double** create2DArray_double(size_t m, size_t n)
@@ -75,7 +78,7 @@ double** create2DArray_double(size_t m, size_t n)
 	double **data = (double**)malloc(sizeof(double*)*m);
 	for(i=0;i<m;i++)
 			data[i] = (double*)malloc(sizeof(double)*n);
-			
+
 	return data;
 }
 
@@ -84,7 +87,7 @@ void free2DArray_double(double** data, size_t m)
 	size_t i;
 	for(i=0;i<m;i++)
 		free(data[i]);
-	free(data);	
+	free(data);
 }
 
 double*** create3DArray_double(size_t p, size_t m, size_t n)
@@ -109,7 +112,7 @@ void free3DArray_double(double*** data, size_t p, size_t m)
 			free(data[i][j]);
 		free(data[i]);
 	}
-	free(data);	
+	free(data);
 }
 
 size_t checkFileSize(char *srcFilePath, int *status)
@@ -141,9 +144,9 @@ unsigned char *readByteData(char *srcFilePath, size_t *byteLength, int *status)
 	fseek(pFile, 0, SEEK_END);
     *byteLength = ftell(pFile);
     fclose(pFile);
-    
+
     unsigned char *byteBuf = ( unsigned char *)malloc((*byteLength)*sizeof(unsigned char)); //sizeof(char)==1
-    
+
     pFile = fopen(srcFilePath, "rb");
     if (pFile == NULL)
     {
@@ -169,7 +172,7 @@ double *readDoubleData(char *srcFilePath, size_t *nbEle, int *status)
 	else
 	{
 		size_t i,j;
-		
+
 		size_t byteLength;
 		unsigned char* bytes = readByteData(srcFilePath, &byteLength, &state);
 		if(state==SZ_FERR)
@@ -179,7 +182,7 @@ double *readDoubleData(char *srcFilePath, size_t *nbEle, int *status)
 		}
 		double *daBuf = (double *)malloc(byteLength);
 		*nbEle = byteLength/8;
-		
+
 		ldouble buf;
 		for(i = 0;i<*nbEle;i++)
 		{
@@ -431,7 +434,7 @@ float *readFloatData(char *srcFilePath, size_t *nbEle, int *status)
 	else
 	{
 		size_t i,j;
-		
+
 		size_t byteLength;
 		unsigned char* bytes = readByteData(srcFilePath, &byteLength, &state);
 		if(state == SZ_FERR)
@@ -441,7 +444,7 @@ float *readFloatData(char *srcFilePath, size_t *nbEle, int *status)
 		}
 		float *daBuf = (float *)malloc(byteLength);
 		*nbEle = byteLength/4;
-		
+
 		lfloat buf;
 		for(i = 0;i<*nbEle;i++)
 		{
@@ -469,9 +472,9 @@ double *readDoubleData_systemEndian(char *srcFilePath, size_t *nbEle, int *statu
     inSize = ftell(pFile);
     *nbEle = inSize/8; //only support double in this version
     fclose(pFile);
-    
+
     double *daBuf = (double *)malloc(inSize);
-    
+
     pFile = fopen(srcFilePath, "rb");
     if (pFile == NULL)
     {
@@ -535,7 +538,7 @@ int16_t *readInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *statu
 	}
 	fseek(pFile, 0, SEEK_END);
 	inSize = ftell(pFile);
-	*nbEle = inSize/2; 
+	*nbEle = inSize/2;
 	fclose(pFile);
 
 	if(inSize<=0)
@@ -556,7 +559,7 @@ int16_t *readInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *statu
 	fread(daBuf, 2, *nbEle, pFile);
 	fclose(pFile);
 	*status = SZ_SCES;
-	return daBuf;	
+	return daBuf;
 }
 
 uint16_t *readUInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status)
@@ -571,7 +574,7 @@ uint16_t *readUInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *sta
 	}
 	fseek(pFile, 0, SEEK_END);
 	inSize = ftell(pFile);
-	*nbEle = inSize/2; 
+	*nbEle = inSize/2;
 	fclose(pFile);
 
 	if(inSize<=0)
@@ -592,7 +595,7 @@ uint16_t *readUInt16Data_systemEndian(char *srcFilePath, size_t *nbEle, int *sta
 	fread(daBuf, 2, *nbEle, pFile);
 	fclose(pFile);
 	*status = SZ_SCES;
-	return daBuf;	
+	return daBuf;
 }
 
 int32_t *readInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status)
@@ -607,7 +610,7 @@ int32_t *readInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *statu
 	}
 	fseek(pFile, 0, SEEK_END);
 	inSize = ftell(pFile);
-	*nbEle = inSize/4; 
+	*nbEle = inSize/4;
 	fclose(pFile);
 
 	if(inSize<=0)
@@ -628,7 +631,7 @@ int32_t *readInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *statu
 	fread(daBuf, 4, *nbEle, pFile);
 	fclose(pFile);
 	*status = SZ_SCES;
-	return daBuf;	
+	return daBuf;
 }
 
 uint32_t *readUInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status)
@@ -643,7 +646,7 @@ uint32_t *readUInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *sta
 	}
 	fseek(pFile, 0, SEEK_END);
 	inSize = ftell(pFile);
-	*nbEle = inSize/4; 
+	*nbEle = inSize/4;
 	fclose(pFile);
 
 	if(inSize<=0)
@@ -664,7 +667,7 @@ uint32_t *readUInt32Data_systemEndian(char *srcFilePath, size_t *nbEle, int *sta
 	fread(daBuf, 4, *nbEle, pFile);
 	fclose(pFile);
 	*status = SZ_SCES;
-	return daBuf;	
+	return daBuf;
 }
 
 int64_t *readInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *status)
@@ -679,7 +682,7 @@ int64_t *readInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *statu
 	}
 	fseek(pFile, 0, SEEK_END);
 	inSize = ftell(pFile);
-	*nbEle = inSize/8; 
+	*nbEle = inSize/8;
 	fclose(pFile);
 
 	if(inSize<=0)
@@ -715,7 +718,7 @@ uint64_t *readUInt64Data_systemEndian(char *srcFilePath, size_t *nbEle, int *sta
 	}
 	fseek(pFile, 0, SEEK_END);
 	inSize = ftell(pFile);
-	*nbEle = inSize/8; 
+	*nbEle = inSize/8;
 	fclose(pFile);
 
 	if(inSize<=0)
@@ -751,17 +754,17 @@ float *readFloatData_systemEndian(char *srcFilePath, size_t *nbEle, int *status)
     }
 	fseek(pFile, 0, SEEK_END);
     inSize = ftell(pFile);
-    *nbEle = inSize/4; 
+    *nbEle = inSize/4;
     fclose(pFile);
-    
+
     if(inSize<=0)
     {
 		printf("Error: input file is wrong!\n");
 		*status = SZ_FERR;
 	}
-    
+
     float *daBuf = (float *)malloc(inSize);
-    
+
     pFile = fopen(srcFilePath, "rb");
     if (pFile == NULL)
     {
@@ -784,7 +787,7 @@ void writeByteData(unsigned char *bytes, size_t byteLength, char *tgtFilePath, i
         *status = SZ_FERR;
         return;
     }
-    
+
     fwrite(bytes, 1, byteLength, pFile); //write outSize bytes
     fclose(pFile);
     *status = SZ_SCES;
@@ -801,13 +804,13 @@ void writeDoubleData(double *data, size_t nbEle, char *tgtFilePath, int *status)
         *status = SZ_FERR;
         return;
     }
-    
+
     for(i = 0;i<nbEle;i++)
 	{
 		sprintf(s,"%.20G\n",data[i]);
 		fputs(s, pFile);
 	}
-    
+
     fclose(pFile);
     *status = SZ_SCES;
 }
@@ -823,7 +826,7 @@ void writeFloatData(float *data, size_t nbEle, char *tgtFilePath, int *status)
         *status = SZ_FERR;
         return;
     }
-   
+
     for(i = 0;i<nbEle;i++)
 	{
 		//printf("i=%d\n",i);
@@ -831,7 +834,7 @@ void writeFloatData(float *data, size_t nbEle, char *tgtFilePath, int *status)
 		sprintf(s,"%.30G\n",data[i]);
 		fputs(s, pFile);
 	}
-    
+
     fclose(pFile);
     *status = SZ_SCES;
 }
@@ -847,7 +850,7 @@ void writeData(void *data, int dataType, size_t nbEle, char *tgtFilePath, int *s
 	else if(dataType == SZ_DOUBLE)
 	{
 		double* dataArray = (double *)data;
-		writeDoubleData(dataArray, nbEle, tgtFilePath, &state);	
+		writeDoubleData(dataArray, nbEle, tgtFilePath, &state);
 	}
 	else
 	{
@@ -860,7 +863,7 @@ void writeData(void *data, int dataType, size_t nbEle, char *tgtFilePath, int *s
 
 void writeFloatData_inBytes(float *data, size_t nbEle, char* tgtFilePath, int *status)
 {
-	size_t i = 0; 
+	size_t i = 0;
 	int state = SZ_SCES;
 	lfloat buf;
 	unsigned char* bytes = (unsigned char*)malloc(nbEle*sizeof(float));
@@ -870,7 +873,7 @@ void writeFloatData_inBytes(float *data, size_t nbEle, char* tgtFilePath, int *s
 		bytes[i*4+0] = buf.byte[0];
 		bytes[i*4+1] = buf.byte[1];
 		bytes[i*4+2] = buf.byte[2];
-		bytes[i*4+3] = buf.byte[3];					
+		bytes[i*4+3] = buf.byte[3];
 	}
 
 	size_t byteLength = nbEle*sizeof(float);
@@ -881,7 +884,7 @@ void writeFloatData_inBytes(float *data, size_t nbEle, char* tgtFilePath, int *s
 
 void writeDoubleData_inBytes(double *data, size_t nbEle, char* tgtFilePath, int *status)
 {
-	size_t i = 0, index = 0; 
+	size_t i = 0, index = 0;
 	int state = SZ_SCES;
 	ldouble buf;
 	unsigned char* bytes = (unsigned char*)malloc(nbEle*sizeof(double));
@@ -973,7 +976,7 @@ void writeULongData_inBytes(uint64_t *states, size_t stateLength, char *tgtFileP
 
 unsigned short* readShortData(char *srcFilePath, size_t *dataLength, int *status)
 {
-	size_t byteLength = 0; 
+	size_t byteLength = 0;
 	int state = SZ_SCES;
 	unsigned char * bytes = readByteData(srcFilePath, &byteLength, &state);
 	*dataLength = byteLength/2;
@@ -1016,19 +1019,19 @@ void convertToPFM_float(float *data, size_t r5, size_t r4, size_t r3, size_t r2,
 {
 	size_t i, nbEle = computeDataLength(r5, r4, r3, r2, r1);
 	int dim = computeDimension(r5, r4, r3, r2, r1);
-	
+
 	FILE *pFile = fopen(tgtFilePath, "wb");
 	if (pFile == NULL)
 	{
 		printf("Failed to open input file. 3\n");
 		*status = SZ_NSCS;
 		return;
-	}	
+	}
 	fputs("PF\n", pFile);
 	char strBuf[256];
 	switch(dim)
 	{
-	case 1: 
+	case 1:
 		sprintf(strBuf, "%zu\n", r1);
 		break;
 	case 2:
@@ -1050,8 +1053,8 @@ void convertToPFM_float(float *data, size_t r5, size_t r4, size_t r3, size_t r2,
 	else
 		fputs("1.0\n", pFile);
 
-	size_t byteLength = nbEle*sizeof(float);	
-	lfloat buf;	
+	size_t byteLength = nbEle*sizeof(float);
+	lfloat buf;
 	unsigned char* bytes = (unsigned char*)malloc(byteLength);
 	for(i=0;i<nbEle;i++)
 	{
@@ -1061,10 +1064,10 @@ void convertToPFM_float(float *data, size_t r5, size_t r4, size_t r3, size_t r2,
 		bytes[i*4+2] = buf.byte[2];
 		bytes[i*4+3] = buf.byte[3];
 	}
-	
+
 	fwrite(bytes, 1, byteLength, pFile); //write outSize bytes
 	fclose(pFile);
-	
+
 	free(bytes);
 	*status = SZ_SCES;
 }*/
diff --git a/sz/src/rwf.c b/sz/src/rwf.c
index 17e0fb4b..9af15ff7 100644
--- a/sz/src/rwf.c
+++ b/sz/src/rwf.c
@@ -14,7 +14,7 @@
 
 void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize)
 {
-	int i; 
+	int i;
 	int status;
 	char s[*len+1];
 	for(i=0;i<*len;i++)
@@ -25,7 +25,7 @@ void checkfilesizec_(char *srcFilePath, int *len, size_t *filesize)
 
 void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *byteLength)
 {
-	size_t i; 
+	size_t i;
 	int ierr;
     char s[*len+1];
     for(i=0;i<*len;i++)
@@ -38,12 +38,12 @@ void readbytefile_(char *srcFilePath, int *len, unsigned char *bytes, size_t *by
 
 void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle)
 {
-	size_t i; 
+	size_t i;
 	int ierr;
     char s[*len+1];
     for(i=0;i<*len;i++)
         s[i]=srcFilePath[i];
-    s[*len]='\0';	
+    s[*len]='\0';
 	double *tmp_data = readDoubleData(s, nbEle, &ierr);
 	memcpy(data, tmp_data, *nbEle);
 	free(tmp_data);
@@ -51,7 +51,7 @@ void readdoublefile_(char *srcFilePath, int *len, double *data, size_t *nbEle)
 
 void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle)
 {
-	size_t i; 
+	size_t i;
 	int ierr;
     char s[*len+1];
     for(i=0;i<*len;i++)
@@ -64,7 +64,7 @@ void readfloatfile_(char *srcFilePath, int *len, float *data, size_t *nbEle)
 
 void writebytefile_(unsigned char *bytes, size_t *byteLength, char *tgtFilePath, int *len)
 {
-	size_t i; 
+	size_t i;
 	int ierr;
     char s[*len+1];
     for(i=0;i<*len;i++)
@@ -80,13 +80,13 @@ void writedoublefile_(double *data, size_t *nbEle, char *tgtFilePath, int *len)
     char s[*len+1];
     for(i=0;i<*len;i++)
         s[i]=tgtFilePath[i];
-    s[*len]='\0';	
+    s[*len]='\0';
 	writeDoubleData(data, *nbEle, s, &ierr);
 }
 
 void writefloatfile_(float *data, size_t *nbEle, char *tgtFilePath, int *len)
 {
-	size_t i; 
+	size_t i;
 	int ierr;
     char s[*len+1];
     for(i=0;i<*len;i++)
diff --git a/sz/src/sz.c b/sz/src/sz.c
index acc43c1b..6a46773c 100644
--- a/sz/src/sz.c
+++ b/sz/src/sz.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include "sz.h"
 #include "CompressElement.h"
 #include "DynamicByteArray.h"
@@ -34,7 +37,7 @@ int sysEndianType; //*sysEndianType is actually set automatically.
 
 //the confparams should be separate between compression and decopmression, in case of mutual-affection when calling compression/decompression alternatively
 sz_params *confparams_cpr = NULL; //used for compression
-sz_params *confparams_dec = NULL; //used for decompression 
+sz_params *confparams_dec = NULL; //used for decompression
 
 sz_exedata *exe_params = NULL;
 
@@ -59,9 +62,9 @@ int SZ_Init(const char *configFilePath)
 	int loadFileResult = SZ_LoadConf(configFilePath);
 	if(loadFileResult==SZ_NSCS)
 		return SZ_NSCS;
-	
+
 	exe_params->SZ_SIZE_TYPE = sizeof(size_t);
-	
+
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 	{
 		initSZ_TSC();
@@ -78,7 +81,7 @@ int SZ_Init_Params(sz_params *params)
 
 	if(params->max_quant_intervals > 0)
 		params->maxRangeRadius = params->max_quant_intervals/2;
-		
+
 	memcpy(confparams_cpr, params, sizeof(sz_params));
 
 	if(params->quantization_intervals%2!=0)
@@ -93,57 +96,57 @@ int SZ_Init_Params(sz_params *params)
 int computeDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	int dimension;
-	if(r1==0) 
+	if(r1==0)
 	{
 		dimension = 0;
 	}
-	else if(r2==0) 
+	else if(r2==0)
 	{
 		dimension = 1;
 	}
-	else if(r3==0) 
+	else if(r3==0)
 	{
 		dimension = 2;
 	}
-	else if(r4==0) 
+	else if(r4==0)
 	{
 		dimension = 3;
 	}
-	else if(r5==0) 
+	else if(r5==0)
 	{
 		dimension = 4;
 	}
-	else 
+	else
 	{
 		dimension = 5;
 	}
-	return dimension;	
+	return dimension;
 }
 
 size_t computeDataLength(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	size_t dataLength;
-	if(r1==0) 
+	if(r1==0)
 	{
 		dataLength = 0;
 	}
-	else if(r2==0) 
+	else if(r2==0)
 	{
 		dataLength = r1;
 	}
-	else if(r3==0) 
+	else if(r3==0)
 	{
 		dataLength = r1*r2;
 	}
-	else if(r4==0) 
+	else if(r4==0)
 	{
 		dataLength = r1*r2*r3;
 	}
-	else if(r5==0) 
+	else if(r5==0)
 	{
 		dataLength = r1*r2*r3*r4;
 	}
-	else 
+	else
 	{
 		dataLength = r1*r2*r3*r4*r5;
 	}
@@ -280,7 +283,7 @@ int filterDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_
 
 /*-------------------------------------------------------------------------*/
 /**
-    @brief      Perform Compression 
+    @brief      Perform Compression
     @param      data           data to be compressed
     @param      outSize        the size (in bytes) after compression
     @param		r5,r4,r3,r2,r1	the sizes of each dimension (supporting only 5 dimensions at most in this version.
@@ -288,7 +291,7 @@ int filterDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_
 
  **/
 /*-------------------------------------------------------------------------*/
-unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound, 
+unsigned char* SZ_compress_args(int dataType, void *data, size_t *outSize, int errBoundMode, double absErrBound,
 double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	if(confparams_cpr == NULL)
@@ -302,9 +305,9 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 	{
 		exe_params->intvCapacity = confparams_cpr->maxRangeRadius*2;
 		exe_params->intvRadius = confparams_cpr->maxRangeRadius;
-		exe_params->optQuantMode = 1;		
+		exe_params->optQuantMode = 1;
 	}
-	
+
 	//correct dimension if needed
 	size_t _r[5];
 	filterDimension(r5, r4, r3, r2, r1, _r);
@@ -313,23 +316,23 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 	size_t _r3 = _r[2];
 	size_t _r2 = _r[1];
 	size_t _r1 = _r[0];
-	
+
 	confparams_cpr->dataType = dataType;
 	if(dataType==SZ_FLOAT)
 	{
 		unsigned char *newByteData = NULL;
-		
-		SZ_compress_args_float(-1, confparams_cpr->withRegression, &newByteData, (float *)data, _r5, _r4, _r3, _r2, _r1, 
+
+		SZ_compress_args_float(-1, confparams_cpr->withRegression, &newByteData, (float *)data, _r5, _r4, _r3, _r2, _r1,
 		outSize, errBoundMode, absErrBound, relBoundRatio, pwrBoundRatio);
-		
+
 		return newByteData;
 	}
 	else if(dataType==SZ_DOUBLE)
 	{
 		unsigned char *newByteData;
-		SZ_compress_args_double(-1, confparams_cpr->withRegression, &newByteData, (double *)data, _r5, _r4, _r3, _r2, _r1, 
+		SZ_compress_args_double(-1, confparams_cpr->withRegression, &newByteData, (double *)data, _r5, _r4, _r3, _r2, _r1,
 		outSize, errBoundMode, absErrBound, relBoundRatio, pwrBoundRatio);
-		
+
 		return newByteData;
 	}
 	else if(dataType==SZ_INT64)
@@ -337,7 +340,7 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 		unsigned char *newByteData;
 		SZ_compress_args_int64(&newByteData, data, _r5, _r4, _r3, _r2, _r1, outSize, errBoundMode, absErrBound, relBoundRatio);
 		return newByteData;
-	}		
+	}
 	else if(dataType==SZ_INT32) //int type
 	{
 		unsigned char *newByteData;
@@ -348,7 +351,7 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 	{
 		unsigned char *newByteData;
 		SZ_compress_args_int16(&newByteData, data, _r5, _r4, _r3, _r2, _r1, outSize, errBoundMode, absErrBound, relBoundRatio);
-		return newByteData;		
+		return newByteData;
 	}
 	else if(dataType==SZ_INT8)
 	{
@@ -361,7 +364,7 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 		unsigned char *newByteData;
 		SZ_compress_args_uint64(&newByteData, data, _r5, _r4, _r3, _r2, _r1, outSize, errBoundMode, absErrBound, relBoundRatio);
 		return newByteData;
-	}		
+	}
 	else if(dataType==SZ_UINT32) //int type
 	{
 		unsigned char *newByteData;
@@ -372,14 +375,14 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 	{
 		unsigned char *newByteData;
 		SZ_compress_args_uint16(&newByteData, data, _r5, _r4, _r3, _r2, _r1, outSize, errBoundMode, absErrBound, relBoundRatio);
-		return newByteData;		
+		return newByteData;
 	}
 	else if(dataType==SZ_UINT8)
 	{
 		unsigned char *newByteData;
 		SZ_compress_args_uint8(&newByteData, data, r5, r4, r3, r2, r1, outSize, errBoundMode, absErrBound, relBoundRatio);
 		return newByteData;
-	} 	
+	}
 	else
 	{
 		printf("Error: dataType can only be SZ_FLOAT, SZ_DOUBLE, SZ_INT8/16/32/64 or SZ_UINT8/16/32/64.\n");
@@ -387,17 +390,17 @@ double relBoundRatio, double pwrBoundRatio, size_t r5, size_t r4, size_t r3, siz
 	}
 }
 
-int SZ_compress_args2(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, 
-int errBoundMode, double absErrBound, double relBoundRatio, double pwrBoundRatio, 
+int SZ_compress_args2(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize,
+int errBoundMode, double absErrBound, double relBoundRatio, double pwrBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	unsigned char* bytes = SZ_compress_args(dataType, data, outSize, errBoundMode, absErrBound, relBoundRatio, pwrBoundRatio, r5, r4, r3, r2, r1);
     memcpy(compressed_bytes, bytes, *outSize);
-    free(bytes); 
+    free(bytes);
 	return SZ_SCES;
 }
 
-int SZ_compress_args3(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, 
+int SZ_compress_args3(int dataType, void *data, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1,
 size_t s5, size_t s4, size_t s3, size_t s2, size_t s1,
 size_t e5, size_t e4, size_t e3, size_t e2, size_t e1)
@@ -405,34 +408,34 @@ size_t e5, size_t e4, size_t e3, size_t e2, size_t e1)
 	confparams_cpr->dataType = dataType;
 	if(dataType==SZ_FLOAT)
 	{
-		SZ_compress_args_float_subblock(compressed_bytes, (float *)data, 
+		SZ_compress_args_float_subblock(compressed_bytes, (float *)data,
 		r5, r4, r3, r2, r1,
 		s5, s4, s3, s2, s1,
 		e5, e4, e3, e2, e1,
 		outSize, errBoundMode, absErrBound, relBoundRatio);
-		
+
 		return SZ_SCES;
 	}
 	else if(dataType==SZ_DOUBLE)
 	{
-		SZ_compress_args_double_subblock(compressed_bytes, (double *)data, 
+		SZ_compress_args_double_subblock(compressed_bytes, (double *)data,
 		r5, r4, r3, r2, r1,
 		s5, s4, s3, s2, s1,
 		e5, e4, e3, e2, e1,
 		outSize, errBoundMode, absErrBound, relBoundRatio);
-		
+
 		return SZ_SCES;
 	}
 	else
 	{
 		printf("Error (in SZ_compress_args3): dataType can only be SZ_FLOAT or SZ_DOUBLE.\n");
 		return SZ_NSCS;
-	}	
+	}
 }
 
 unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
-{	
-	unsigned char *newByteData = SZ_compress_args(dataType, data, outSize, confparams_cpr->errorBoundMode, confparams_cpr->absErrBound, confparams_cpr->relBoundRatio, 
+{
+	unsigned char *newByteData = SZ_compress_args(dataType, data, outSize, confparams_cpr->errorBoundMode, confparams_cpr->absErrBound, confparams_cpr->relBoundRatio,
 	confparams_cpr->pw_relBoundRatio, r5, r4, r3, r2, r1);
 	return newByteData;
 }
@@ -440,7 +443,7 @@ unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5,
 //////////////////
 /*-------------------------------------------------------------------------*/
 /**
-    @brief      Perform Compression 
+    @brief      Perform Compression
     @param      data           data to be compressed
     @param		reservedValue  the reserved value
     @param      outSize        the size (in bytes) after compression
@@ -449,18 +452,18 @@ unsigned char *SZ_compress(int dataType, void *data, size_t *outSize, size_t r5,
 
  **/
 /*-------------------------------------------------------------------------*/
-unsigned char *SZ_compress_rev_args(int dataType, void *data, void *reservedValue, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, 
+unsigned char *SZ_compress_rev_args(int dataType, void *data, void *reservedValue, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	unsigned char *newByteData;
 	//TODO
 	printf("SZ compression with reserved data is TO BE DONE LATER.\n");
 	exit(0);
-	
-	return newByteData;	
+
+	return newByteData;
 }
 
-int SZ_compress_rev_args2(int dataType, void *data, void *reservedValue, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio, 
+int SZ_compress_rev_args2(int dataType, void *data, void *reservedValue, unsigned char* compressed_bytes, size_t *outSize, int errBoundMode, double absErrBound, double relBoundRatio,
 size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	confparams_cpr->dataType = dataType;
@@ -476,7 +479,7 @@ unsigned char *SZ_compress_rev(int dataType, void *data, void *reservedValue, si
 	//TODO
 	printf("SZ compression with reserved data is TO BE DONE LATER.\n");
 	exit(0);
-	
+
 	return newByteData;
 }
 
@@ -489,25 +492,25 @@ void *SZ_decompress(int dataType, unsigned char *bytes, size_t byteLength, size_
 		exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
 	memset(exe_params, 0, sizeof(sz_exedata));
 	exe_params->SZ_SIZE_TYPE = 8;
-	
+
 	int x = 1;
 	char *y = (char*)&x;
 	if(*y==1)
 		sysEndianType = LITTLE_ENDIAN_SYSTEM;
 	else //=0
 		sysEndianType = BIG_ENDIAN_SYSTEM;
-	
+
 	if(dataType == SZ_FLOAT)
 	{
 		float *newFloatData;
 		SZ_decompress_args_float(&newFloatData, r5, r4, r3, r2, r1, bytes, byteLength, 0, NULL);
-		return newFloatData;	
+		return newFloatData;
 	}
 	else if(dataType == SZ_DOUBLE)
 	{
 		double *newDoubleData;
 		SZ_decompress_args_double(&newDoubleData, r5, r4, r3, r2, r1, bytes, byteLength, 0, NULL);
-		return newDoubleData;	
+		return newDoubleData;
 	}
 	else if(dataType == SZ_INT8)
 	{
@@ -557,22 +560,22 @@ void *SZ_decompress(int dataType, unsigned char *bytes, size_t byteLength, size_
 		SZ_decompress_args_uint64(&newUInt64Data, r5, r4, r3, r2, r1, bytes, byteLength);
 		return newUInt64Data;
 	}
-	else 
+	else
 	{
 		printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n");
-		return NULL;	
+		return NULL;
 	}
 }
 
 /**
- * 
- * 
+ *
+ *
  * return number of elements or -1 if any errors
  * */
 size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength, void* decompressed_array, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	//size_t i;
-	
+
 	//correct dimension if needed
 	size_t _r[5];
 	filterDimension(r5, r4, r3, r2, r1, _r);
@@ -580,18 +583,18 @@ size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength,
 	size_t _r4 = _r[3];
 	size_t _r3 = _r[2];
 	size_t _r2 = _r[1];
-	size_t _r1 = _r[0];	
-	
+	size_t _r1 = _r[0];
+
 	size_t nbEle = computeDataLength(_r5,_r4,_r3,_r2,_r1);
-	
+
 	if(dataType == SZ_FLOAT)
 	{
 		float* data = (float *)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		float* data_array = (float *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(float));
 		//for(i=0;i<nbEle;i++)
-		//	data_array[i] = data[i];	
-		free(data); //this free operation seems to not work with BlueG/Q system.	
+		//	data_array[i] = data[i];
+		free(data); //this free operation seems to not work with BlueG/Q system.
 	}
 	else if (dataType == SZ_DOUBLE)
 	{
@@ -600,7 +603,7 @@ size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength,
 		memcpy(data_array, data, nbEle*sizeof(double));
 		//for(i=0;i<nbEle;i++)
 		//	data_array[i] = data[i];
-		free(data); //this free operation seems to not work with BlueG/Q system.	
+		free(data); //this free operation seems to not work with BlueG/Q system.
 	}
 	else if(dataType == SZ_INT8)
 	{
@@ -614,21 +617,21 @@ size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength,
 		int16_t* data = (int16_t*)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		int16_t* data_array = (int16_t *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(int16_t));
-		free(data);	
+		free(data);
 	}
 	else if(dataType == SZ_INT32)
 	{
 		int32_t* data = (int32_t*)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		int32_t* data_array = (int32_t *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(int32_t));
-		free(data);	
+		free(data);
 	}
 	else if(dataType == SZ_INT64)
 	{
 		int64_t* data = (int64_t*)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		int64_t* data_array = (int64_t *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(int64_t));
-		free(data);		
+		free(data);
 	}
 	else if(dataType == SZ_UINT8)
 	{
@@ -642,26 +645,26 @@ size_t SZ_decompress_args(int dataType, unsigned char *bytes, size_t byteLength,
 		uint16_t* data = (uint16_t*)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		uint16_t* data_array = (uint16_t *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(uint16_t));
-		free(data);		
+		free(data);
 	}
 	else if(dataType == SZ_UINT32)
 	{
 		uint32_t* data = (uint32_t*)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		uint32_t* data_array = (uint32_t *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(uint32_t));
-		free(data);		
+		free(data);
 	}
 	else if(dataType == SZ_UINT64)
 	{
 		uint64_t* data = (uint64_t*)SZ_decompress(dataType, bytes, byteLength, _r5, _r4, _r3, _r2, _r1);
 		uint64_t* data_array = (uint64_t *)decompressed_array;
 		memcpy(data_array, data, nbEle*sizeof(uint64_t));
-		free(data);			
+		free(data);
 	}
 	else
-	{ 
+	{
 		printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n");
-		return SZ_NSCS; //indicating error		
+		return SZ_NSCS; //indicating error
 	}
 
 	return nbEle;
@@ -679,40 +682,40 @@ sz_metadata* SZ_getMetadata(unsigned char* bytes)
 	isConstant = sameRByte & 0x01;
 	//confparams_dec->szMode = (sameRByte & 0x06)>>1;
 	isLossless = (sameRByte & 0x10)>>4;
-	
+
 	int isRegressionBased = (sameRByte >> 7) & 0x01;
-	
+
 	if(exe_params==NULL)
 	{
 		exe_params = (sz_exedata *)malloc(sizeof(struct sz_exedata));
 		memset(exe_params, 0, sizeof(struct sz_exedata));
 	}
 	exe_params->SZ_SIZE_TYPE = ((sameRByte & 0x40)>>6)==1?8:4;
-	
+
 	if(confparams_dec==NULL)
 	{
 		confparams_dec = (sz_params*)malloc(sizeof(sz_params));
 		memset(confparams_dec, 0, sizeof(sz_params));
-	}	
-	
+	}
+
 	convertBytesToSZParams(&(bytes[index]), confparams_dec);
 	/*sz_params* params = convertBytesToSZParams(&(bytes[index]));
 	if(confparams_dec!=NULL)
 		free(confparams_dec);
-	confparams_dec = params;*/	
+	confparams_dec = params;*/
 	if(confparams_dec->dataType==SZ_FLOAT)
 		index += MetaDataByteLength;
 	else if(confparams_dec->dataType==SZ_DOUBLE)
 		index += MetaDataByteLength_double;
-	
+
 	if(confparams_dec->dataType!=SZ_FLOAT && confparams_dec->dataType!= SZ_DOUBLE) //if this type is an Int type
 		index++; //jump to the dataLength info byte address
-	dataSeriesLength = bytesToSize(&(bytes[index]));// 4 or 8	
+	dataSeriesLength = bytesToSize(&(bytes[index]));// 4 or 8
 	index += exe_params->SZ_SIZE_TYPE;
 	//index += 4; //max_quant_intervals
 
 	sz_metadata* metadata = (sz_metadata*)malloc(sizeof(struct sz_metadata));
-	
+
 	metadata->versionNumber[0] = versions[0];
 	metadata->versionNumber[1] = versions[1];
 	metadata->versionNumber[2] = versions[2];
@@ -720,9 +723,9 @@ sz_metadata* SZ_getMetadata(unsigned char* bytes)
 	metadata->isLossless = isLossless;
 	metadata->sizeType = exe_params->SZ_SIZE_TYPE;
 	metadata->dataSeriesLength = dataSeriesLength;
-	
+
 	metadata->conf_params = confparams_dec;
-	
+
 	int defactoNBBins = 0; //real # bins
 	if(isConstant==0 && isLossless==0)
 	{
@@ -740,15 +743,15 @@ sz_metadata* SZ_getMetadata(unsigned char* bytes)
 				segmentL = exe_params->SZ_SIZE_TYPE;
 				pwrErrBoundBytesL = 4;
 			}
-			
+
 			int mdbl = confparams_dec->dataType==SZ_FLOAT?MetaDataByteLength:MetaDataByteLength_double;
-			int offset_typearray = 3 + 1 + mdbl + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + (4 + confparams_dec->dataType*4) + 1 + 8 
+			int offset_typearray = 3 + 1 + mdbl + exe_params->SZ_SIZE_TYPE + 4 + radExpoL + segmentL + pwrErrBoundBytesL + 4 + (4 + confparams_dec->dataType*4) + 1 + 8
 					+ exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + exe_params->SZ_SIZE_TYPE + 4;
-			defactoNBBins = bytesToInt_bigEndian(bytes+offset_typearray);			
+			defactoNBBins = bytesToInt_bigEndian(bytes+offset_typearray);
 		}
 
-	}	
-	
+	}
+
 	metadata->defactoNBBins = defactoNBBins;
 	return metadata;
 }
@@ -759,9 +762,9 @@ void SZ_printMetadata(sz_metadata* metadata)
 	printf("Version:                        \t %d.%d.%d\n", metadata->versionNumber[0], metadata->versionNumber[1], metadata->versionNumber[2]);
 	printf("Constant data?:                 \t %s\n", metadata->isConstant==1?"YES":"NO");
 	printf("Lossless?:                      \t %s\n", metadata->isLossless==1?"YES":"NO");
-	printf("Size type (size of # elements): \t %d bytes\n", metadata->sizeType); 
+	printf("Size type (size of # elements): \t %d bytes\n", metadata->sizeType);
 	printf("Num of elements:                \t %zu\n", metadata->dataSeriesLength);
-		
+
 	sz_params* params = metadata->conf_params;
 
 	if(params->sol_ID == SZ)
@@ -775,39 +778,39 @@ void SZ_printMetadata(sz_metadata* metadata)
 	case SZ_FLOAT:
 		printf("Data type:                      \t FLOAT\n");
 		printf("min value of raw data:          \t %f\n", params->fmin);
-		printf("max value of raw data:          \t %f\n", params->fmax);		
+		printf("max value of raw data:          \t %f\n", params->fmax);
 		break;
 	case SZ_DOUBLE:
 		printf("Data type:                      \t DOUBLE\n");
 		printf("min value of raw data:          \t %f\n", params->dmin);
-		printf("max value of raw data:          \t %f\n", params->dmax);	
+		printf("max value of raw data:          \t %f\n", params->dmax);
 		break;
 	case SZ_INT8:
 		printf("Data type:                      \t INT8\n");
-		break;	
+		break;
 	case SZ_INT16:
 		printf("Data type:                      \t INT16\n");
 		break;
 	case SZ_INT32:
 		printf("Data type:                      \t INT32\n");
-		break;	
+		break;
 	case SZ_INT64:
 		printf("Data type:                      \t INT64\n");
-		break;	
+		break;
 	case SZ_UINT8:
 		printf("Data type:                      \t UINT8\n");
-		break;	
+		break;
 	case SZ_UINT16:
 		printf("Data type:                      \t UINT16\n");
 		break;
 	case SZ_UINT32:
 		printf("Data type:                      \t UINT32\n");
-		break;	
+		break;
 	case SZ_UINT64:
 		printf("Data type:                      \t UINT64\n");
-		break;				
+		break;
 	}
-	
+
 	if(exe_params->optQuantMode==1)
 	{
 		printf("quantization_intervals:         \t 0\n");
@@ -817,9 +820,9 @@ void SZ_printMetadata(sz_metadata* metadata)
 	else
 	{
 		printf("quantization_intervals:         \t %d\n", params->quantization_intervals);
-		printf("max_quant_intervals:            \t - %d\n", params->max_quant_intervals);		
+		printf("max_quant_intervals:            \t - %d\n", params->max_quant_intervals);
 	}
-	
+
 	printf("dataEndianType (prior raw data):\t %s\n", dataEndianType==BIG_ENDIAN_DATA?"BIG_ENDIAN":"LITTLE_ENDIAN");
 	printf("sysEndianType (at compression): \t %s\n", sysEndianType==1?"BIG_ENDIAN":"LITTLE_ENDIAN");
 	printf("sampleDistance:                 \t %d\n", params->sampleDistance);
@@ -840,12 +843,12 @@ void SZ_printMetadata(sz_metadata* metadata)
 		break;
 	case Z_DEFAULT_COMPRESSION:
 		printf("gzipMode:                       \t Z_BEST_SPEED\n");
-		break;	
+		break;
 	case Z_BEST_COMPRESSION:
 		printf("gzipMode:                       \t Z_BEST_COMPRESSION\n");
 		break;
 	}
-	
+
 	switch(params->errorBoundMode)
 	{
 	case ABS:
@@ -890,7 +893,7 @@ void SZ_printMetadata(sz_metadata* metadata)
 		printf("range_relBoundRatio:            \t %f\n", params->relBoundRatio);
 		break;
 	}
-	
+
 	if(params->errorBoundMode>=PW_REL && params->errorBoundMode<=REL_OR_PW_REL)
 	{
 		printf("pw_relBoundRatio:               \t %f\n", params->pw_relBoundRatio);
@@ -940,7 +943,7 @@ void filloutDimArray(size_t* dim, size_t r5, size_t r4, size_t r3, size_t r2, si
 		dim[1] = r4;
 		dim[2] = r3;
 		dim[3] = r2;
-		dim[4] = r1;		
+		dim[4] = r1;
 	}
 }
 
@@ -960,15 +963,15 @@ size_t compute_total_batch_size()
 	return totalSize;
 }
 
-void SZ_registerVar(int var_id, char* varName, int dataType, void* data, 
-			int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio, 
+void SZ_registerVar(int var_id, char* varName, int dataType, void* data,
+			int errBoundMode, double absErrBound, double relBoundRatio, double pwRelBoundRatio,
 			size_t r5, size_t r4, size_t r3, size_t r2, size_t r1)
 {
 	if(sz_tsc==NULL)
 		initSZ_TSC();
-		
+
 	//char str[256];
-	SZ_batchAddVar(var_id, varName, dataType, data, 
+	SZ_batchAddVar(var_id, varName, dataType, data,
 			errBoundMode, absErrBound, relBoundRatio, pwRelBoundRatio, r5, r4, r3, r2, r1);
 	//sprintf(str, "%d: %s : %zuX%zuX%zuX%zu%zu : %d : %f : %f : %f\n", sz_varset->count - 1, varName, r5, r4, r3, r2, r1, errBoundMode, absErrBound, relBoundRatio, pwRelBoundRatio);
 	//fputs(str, sz_tsc->metadata_file);
@@ -994,13 +997,13 @@ int SZ_compress_ts_select_var(int cmprType, unsigned char* var_ids, unsigned cha
 {
 	confparams_cpr->szMode = SZ_TEMPORAL_COMPRESSION;
 	confparams_cpr->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE;
-	
+
 	SZ_VarSet* vset = sz_varset;
-	int i = 0, j = 0, totalSize = 0;	
+	int i = 0, j = 0, totalSize = 0;
 
 	SZ_Variable* vp[256];
 
-	SZ_Variable* v = vset->header->next;	
+	SZ_Variable* v = vset->header->next;
 	for(i = 0;i<vset->count;i++)
 	{
 		int found = checkVarID(v->var_id, var_ids, var_count);
@@ -1015,7 +1018,7 @@ int SZ_compress_ts_select_var(int cmprType, unsigned char* var_ids, unsigned cha
 			{
 				SZ_compress_args_double(cmprType, confparams_cpr->withRegression, &(v->compressedBytes), (double*)v->data, v->r5, v->r4, v->r3, v->r2, v->r1, &(v->compressedSize), v->errBoundMode, v->absErrBound, v->relBoundRatio, v->pwRelBoundRatio);
 			}
-		
+
 			totalSize += v->compressedSize;
 			v->compressType = multisteps->compressionType;
 			vp[j] = v;
@@ -1023,9 +1026,9 @@ int SZ_compress_ts_select_var(int cmprType, unsigned char* var_ids, unsigned cha
 		}
 		v = v->next;
 	}
-	
+
 	*outSize = sizeof(int) + sizeof(unsigned short) + totalSize + var_count*(3*sizeof(unsigned char)+sizeof(size_t));
-	*newByteData = (unsigned char*)malloc(*outSize); 
+	*newByteData = (unsigned char*)malloc(*outSize);
 	unsigned char* p = *newByteData;
 
 	intToBytes_bigEndian(p, sz_tsc->currentStep);
@@ -1043,14 +1046,14 @@ int SZ_compress_ts_select_var(int cmprType, unsigned char* var_ids, unsigned cha
 		*p = (unsigned char)v->dataType; //1 byte
 		p++;
 		sizeToBytes(p, v->compressedSize); //size_t
-		p += sizeof(size_t);							
+		p += sizeof(size_t);
 		memcpy(p, v->compressedBytes, v->compressedSize); //outSize_[i]
 		p += v->compressedSize;
 	}
 
-	sz_tsc->currentStep ++;	
-	
-	return SZ_SCES;	
+	sz_tsc->currentStep ++;
+
+	return SZ_SCES;
 }
 
 /**
@@ -1060,16 +1063,16 @@ int SZ_compress_ts(int cmprType, unsigned char** newByteData, size_t *outSize)
 {
 	confparams_cpr->szMode = SZ_TEMPORAL_COMPRESSION;
 	confparams_cpr->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE;
-	
+
 	SZ_VarSet* vset = sz_varset;
-	
+
 	//char *metadata_str = (char*)malloc(vset->count*256);
 	//memset(metadata_str, 0, vset->count*256);
 	//sprintf(metadata_str, "step %d", sz_tsc->currentStep);
-	
+
 	int i = 0, totalSize = 0;
-	
-	SZ_Variable* v = vset->header->next;	
+
+	SZ_Variable* v = vset->header->next;
 	for(i=0;i<vset->count;i++)
 	{
 		multisteps = v->multisteps; //assign the v's multisteps to the global variable 'multisteps', which will be used in the following compression.
@@ -1083,27 +1086,27 @@ int SZ_compress_ts(int cmprType, unsigned char** newByteData, size_t *outSize)
 			SZ_compress_args_double(cmprType, confparams_cpr->withRegression, &(v->compressedBytes), (double*)v->data, v->r5, v->r4, v->r3, v->r2, v->r1, &(v->compressedSize), v->errBoundMode, v->absErrBound, v->relBoundRatio, v->pwRelBoundRatio);
 		}
 		//sprintf(metadata_str, "%s:%d,%d,%zu", metadata_str, i, multisteps->lastSnapshotStep, outSize_[i]);
-		
+
 		totalSize += v->compressedSize;
 		v->compressType = multisteps->compressionType;
 		v = v->next;
 	}
-	
+
 	//sprintf(metadata_str, "%s\n", metadata_str);
 	//fputs(metadata_str, sz_tsc->metadata_file);
 	//free(metadata_str);
-	
-	//sizeof(int)==current time step; 2*sizeof(char)+sizeof(size_t)=={compressionType + datatype + compression_data_size}; 
+
+	//sizeof(int)==current time step; 2*sizeof(char)+sizeof(size_t)=={compressionType + datatype + compression_data_size};
 	//sizeof(char)==# variables
 	*outSize = sizeof(int) + sizeof(unsigned short) + totalSize + vset->count*(3*sizeof(unsigned char)+sizeof(size_t));
-	*newByteData = (unsigned char*)malloc(*outSize); 
+	*newByteData = (unsigned char*)malloc(*outSize);
 	unsigned char* p = *newByteData;
 
 	intToBytes_bigEndian(p, sz_tsc->currentStep);
 	p+=4;
 	shortToBytes(p, vset->count);
 	p+=2;
-	
+
 	v = vset->header->next;
 
 	for(i=0;i<vset->count;i++)
@@ -1116,15 +1119,15 @@ int SZ_compress_ts(int cmprType, unsigned char** newByteData, size_t *outSize)
 		p++;
 		sizeToBytes(p, v->compressedSize); //size_t
 		p += sizeof(size_t);
-		
+
 		memcpy(p, v->compressedBytes, v->compressedSize); //outSize_[i]
 		p += v->compressedSize;
 		v = v->next;
 	}
 
-	sz_tsc->currentStep ++;	
+	sz_tsc->currentStep ++;
 	//free(outSize_);
-	
+
 	return SZ_SCES;
 }
 
@@ -1135,29 +1138,29 @@ void SZ_decompress_ts(unsigned char *bytes, size_t bytesLength)
 	memset(confparams_dec, 0, sizeof(sz_params));
 	confparams_dec->szMode = SZ_TEMPORAL_COMPRESSION;
 	confparams_dec->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE;
-	
+
 	if(exe_params==NULL)
 		exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
 	memset(exe_params, 0, sizeof(sz_exedata));
-	
+
 	int x = 1;
 	char *y = (char*)&x;
 	if(*y==1)
 		sysEndianType = LITTLE_ENDIAN_SYSTEM;
 	else //=0
 		sysEndianType = BIG_ENDIAN_SYSTEM;
-	
+
 	int i = 0;
 	size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0;
 	unsigned char* q = bytes;
-	sz_tsc->currentStep = bytesToInt_bigEndian(q); 
+	sz_tsc->currentStep = bytesToInt_bigEndian(q);
 	q += 4;
 	unsigned short nbVars = (unsigned short)bytesToShort(q);
 	q += 2;
-	
+
 	float *newFloatData = NULL;
-	double *newDoubleData = NULL;	
-	
+	double *newDoubleData = NULL;
+
 	for(i=0;i<nbVars;i++)
 	{
 		unsigned char var_id = *(q++);
@@ -1167,7 +1170,7 @@ void SZ_decompress_ts(unsigned char *bytes, size_t bytesLength)
 		unsigned char dataType = *(q++);
 		size_t cmpSize = bytesToSize(q);
 		q += sizeof(size_t);
-		
+
 		if(p==NULL)
 			q += cmpSize;
 		else
@@ -1178,9 +1181,9 @@ void SZ_decompress_ts(unsigned char *bytes, size_t bytesLength)
 			r3 = p->r3;
 			r2 = p->r2;
 			r1 = p->r1;
-			size_t dataLen = computeDataLength(r5, r4, r3, r2, r1);				
-			
-			unsigned char* cmpBytes = q;			
+			size_t dataLen = computeDataLength(r5, r4, r3, r2, r1);
+
+			unsigned char* cmpBytes = q;
 			switch(dataType)
 			{
 			case SZ_FLOAT:
@@ -1195,12 +1198,12 @@ void SZ_decompress_ts(unsigned char *bytes, size_t bytesLength)
 					break;
 			default:
 					printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n");
-					return;	
+					return;
 			}
-			
-			q += cmpSize;			
+
+			q += cmpSize;
 		}
-	}	
+	}
 }
 
 void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count, unsigned char *bytes, size_t bytesLength)
@@ -1210,29 +1213,29 @@ void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count
 	memset(confparams_dec, 0, sizeof(sz_params));
 	confparams_dec->szMode = SZ_TEMPORAL_COMPRESSION;
 	confparams_dec->predictionMode = SZ_PREVIOUS_VALUE_ESTIMATE;
-	
+
 	if(exe_params==NULL)
 		exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
 	memset(exe_params, 0, sizeof(sz_exedata));
-	
+
 	int x = 1;
 	char *y = (char*)&x;
 	if(*y==1)
 		sysEndianType = LITTLE_ENDIAN_SYSTEM;
 	else //=0
 		sysEndianType = BIG_ENDIAN_SYSTEM;
-	
+
 	int i = 0;
 	size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0;
 	unsigned char* q = bytes;
-	sz_tsc->currentStep = bytesToInt_bigEndian(q); 
+	sz_tsc->currentStep = bytesToInt_bigEndian(q);
 	q += 4;
 	unsigned short nbVars = (unsigned short)bytesToShort(q);
 	q += 2;
-	
+
 	float *newFloatData = NULL;
-	double *newDoubleData = NULL;	
-	
+	double *newDoubleData = NULL;
+
 	for(i=0;i<nbVars;i++)
 	{
 		unsigned char var_id = *(q++);
@@ -1243,7 +1246,7 @@ void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count
 		unsigned char dataType = *(q++);
 		size_t cmpSize = bytesToSize(q);
 		q += sizeof(size_t);
-		
+
 		if(p==NULL || selected == 0) //p==NULL means the variable was not registered during compression ; selected==0 means that the variable is not selected
 			q += cmpSize;
 		else // p!=NULL && selected == 1
@@ -1254,9 +1257,9 @@ void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count
 			r3 = p->r3;
 			r2 = p->r2;
 			r1 = p->r1;
-			size_t dataLen = computeDataLength(r5, r4, r3, r2, r1);				
-			
-			unsigned char* cmpBytes = q;			
+			size_t dataLen = computeDataLength(r5, r4, r3, r2, r1);
+
+			unsigned char* cmpBytes = q;
 			switch(dataType)
 			{
 			case SZ_FLOAT:
@@ -1271,19 +1274,19 @@ void SZ_decompress_ts_select_var(unsigned char* var_ids, unsigned char var_count
 					break;
 			default:
 					printf("Error: data type cannot be the types other than SZ_FLOAT or SZ_DOUBLE\n");
-					return;	
+					return;
 			}
-			
-			q += cmpSize;			
+
+			q += cmpSize;
 		}
-	}	
+	}
 }
 #endif
 
 
 void SZ_Finalize()
 {
-#ifdef HAVE_TIMECMPR		
+#ifdef HAVE_TIMECMPR
 	if(sz_varset!=NULL)
 		SZ_freeVarSet(SZ_MAINTAIN_VAR_DATA);
 #endif
@@ -1297,14 +1300,14 @@ void SZ_Finalize()
 	{
 		free(confparams_cpr);
 		confparams_cpr = NULL;
-	}	
+	}
 	if(exe_params!=NULL)
 	{
 		free(exe_params);
 		exe_params = NULL;
 	}
-	
-//#ifdef HAVE_TIMECMPR	
+
+//#ifdef HAVE_TIMECMPR
 //	if(sz_tsc!=NULL && sz_tsc->metadata_file!=NULL)
 //		fclose(sz_tsc->metadata_file);
 //#endif
@@ -1331,9 +1334,9 @@ static void sz_maybe_init_with_user_params(struct sz_params* userPara, struct sz
 
 
 /**
- * 
- * The interface for the user-customized compression method 
- * 
+ *
+ * The interface for the user-customized compression method
+ *
  * @param char* comprName : the name of the specific compression approach
  * @param void* userPara : the pointer of the user-customized data stracture containing the cusotmized compressors' requried input parameters
  * @param int dataType : data type (SZ_FLOAT, SZ_DOUBLE, SZ_INT8, SZ_UINT8, SZ_INT16, SZ_UINT16, ....)
@@ -1345,7 +1348,7 @@ static void sz_maybe_init_with_user_params(struct sz_params* userPara, struct sz
  * @param size_t r1 : the size of dimension 1
  * @param size_t outSize : the number of bytes after compression
  * @param int *status : the execution status of the compression operation (success: SZ_SCES or fail: SZ_NSCS)
- * 
+ *
  * */
 unsigned char* SZ_compress_customize(const char* cmprName, void* userPara, int dataType, void* data, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, int *status)
 {
@@ -1360,9 +1363,9 @@ unsigned char* SZ_compress_customize(const char* cmprName, void* userPara, int d
 	{
 		sz_maybe_init_with_user_params(userPara, confparams_cpr);
 		confparams_cpr->withRegression = SZ_NO_REGRESSION;
-		
+
 		result = SZ_compress(dataType, data, outSize, r5, r4, r3, r2, r1);
-		*status = SZ_SCES;		
+		*status = SZ_SCES;
     }
     else if(strcmp(cmprName, "SZ_Transpose")==0)
     {
@@ -1389,38 +1392,36 @@ unsigned char* SZ_compress_customize_threadsafe(const char* cmprName, void* user
 	unsigned char* result = NULL;
 	if(strcmp(cmprName, "SZ2.0")==0 || strcmp(cmprName, "SZ2.1")==0 || strcmp(cmprName, "SZ")==0)
 	{
-		SZ_Init(NULL);
 		struct sz_params* para = (struct sz_params*)userPara;
-		
+
 		if(dataType==SZ_FLOAT)
-		{	
-			SZ_compress_args_float(-1, SZ_WITH_LINEAR_REGRESSION, &result, (float *)data, r5, r4, r3, r2, r1, 
+		{
+			SZ_compress_args_float(-1, SZ_WITH_LINEAR_REGRESSION, &result, (float *)data, r5, r4, r3, r2, r1,
 			outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio);
 		}
 		else if(dataType==SZ_DOUBLE)
 		{
-			SZ_compress_args_double(-1, SZ_WITH_LINEAR_REGRESSION, &result, (double *)data, r5, r4, r3, r2, r1, 
+			SZ_compress_args_double(-1, SZ_WITH_LINEAR_REGRESSION, &result, (double *)data, r5, r4, r3, r2, r1,
 			outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio);
-		}		
+		}
 
 		*status = SZ_SCES;
 		return result;
 	}
 	else if(strcmp(cmprName, "SZ1.4")==0)
 	{
-		SZ_Init(NULL);
 		struct sz_params* para = (struct sz_params*)userPara;
-		
+
 		if(dataType==SZ_FLOAT)
-		{	
-			SZ_compress_args_float(-1, SZ_NO_REGRESSION, &result, (float *)data, r5, r4, r3, r2, r1, 
+		{
+			SZ_compress_args_float(-1, SZ_NO_REGRESSION, &result, (float *)data, r5, r4, r3, r2, r1,
 			outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio);
 		}
 		else if(dataType==SZ_DOUBLE)
 		{
-			SZ_compress_args_double(-1, SZ_NO_REGRESSION, &result, (double *)data, r5, r4, r3, r2, r1, 
+			SZ_compress_args_double(-1, SZ_NO_REGRESSION, &result, (double *)data, r5, r4, r3, r2, r1,
 			outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio);
-		}		
+		}
 
 		*status = SZ_SCES;
 		return result;
@@ -1429,11 +1430,11 @@ unsigned char* SZ_compress_customize_threadsafe(const char* cmprName, void* user
     {
 		void* transData = transposeData(data, dataType, r5, r4, r3, r2, r1);
 		struct sz_params* para = (struct sz_params*)userPara;
-	
+
 		size_t n = computeDataLength(r5, r4, r3, r2, r1);
-		
+
 		result = SZ_compress_args(dataType, transData, outSize, para->errorBoundMode, para->absErrBound, para->relBoundRatio, para->pw_relBoundRatio, 0, 0, 0, 0, n);
-		
+
 		*status = SZ_SCES;
 	}
     else if(strcmp(cmprName, "ExaFEL")==0){  //not sure if this part is thread safe!
@@ -1451,9 +1452,9 @@ unsigned char* SZ_compress_customize_threadsafe(const char* cmprName, void* user
 
 
 /**
- * 
- * The interface for the user-customized decompression method 
- * 
+ *
+ * The interface for the user-customized decompression method
+ *
  * @param char* comprName : the name of the specific compression approach
  * @param void* userPara : the pointer of the user-customized data stracture containing the cusotmized compressors' requried input parameters
  * @param int dataType : data type (SZ_FLOAT, SZ_DOUBLE, SZ_INT8, SZ_UINT8, SZ_INT16, SZ_UINT16, ....)
@@ -1464,7 +1465,7 @@ unsigned char* SZ_compress_customize_threadsafe(const char* cmprName, void* user
  * @param size_t r2 : the size of dimension 2
  * @param size_t r1 : the size of dimension 1
  * @param int *status : the execution status of the compression operation (success: SZ_SCES or fail: SZ_NSCS)
- * 
+ *
  * */
 void* SZ_decompress_customize(const char* cmprName, void* userPara, int dataType, unsigned char* bytes, size_t byteLength, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, int *status)
 {
@@ -1490,7 +1491,7 @@ void* SZ_decompress_customize(const char* cmprName, void* userPara, int dataType
 	{
 		*status = SZ_NSCS;
 	}
-	return result;	
+	return result;
 }
 
 
diff --git a/sz/src/sz_double.c b/sz/src/sz_double.c
index dec83fd3..a99b16cd 100644
--- a/sz/src/sz_double.c
+++ b/sz/src/sz_double.c
@@ -7,11 +7,14 @@
  *      See COPYRIGHT in top-level directory.
  */
 
+#include "config.h"
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -58,7 +61,7 @@ inline short computeReqLength_double_MSST19(double realPrecision)
 }
 
 unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	double pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -101,7 +104,7 @@ unsigned int optimize_intervals_double_1D(double *oriData, size_t dataLength, do
 }
 
 unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	double pred_value = 0, pred_err;
@@ -121,7 +124,7 @@ unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2,
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -134,7 +137,7 @@ unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2,
 			break;
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
-		i = confparams_cpr->maxRangeRadius-1;	
+		i = confparams_cpr->maxRangeRadius-1;
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
 	//printf("confparams_cpr->maxRangeRadius = %d, accIntervals=%d, powerOf2=%d\n", confparams_cpr->maxRangeRadius, accIntervals, powerOf2);
@@ -147,7 +150,7 @@ unsigned int optimize_intervals_double_2D(double *oriData, size_t r1, size_t r2,
 }
 
 unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -164,16 +167,16 @@ unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2,
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = fabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
 					if(radiusIndex>=confparams_cpr->maxRangeRadius)
 						radiusIndex = confparams_cpr->maxRangeRadius - 1;
 					intervals[radiusIndex]++;
-				}				
+				}
 			}
-			
+
 		}
 	}
 	//compute the appropriate number
@@ -187,7 +190,7 @@ unsigned int optimize_intervals_double_3D(double *oriData, size_t r1, size_t r2,
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
 
@@ -254,21 +257,21 @@ unsigned int optimize_intervals_double_4D(double *oriData, size_t r1, size_t r2,
 	return powerOf2;
 }
 
-TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData, 
+TightDataPointStorageD* SZ_compress_double_1D_MDQ(double *oriData,
 size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_d)
 {
 #ifdef HAVE_TIMECMPR
-	double* decData = NULL;	
+	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
-#endif	
-	
+#endif
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 		quantization_intervals = optimize_intervals_double_1D_opt(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	//updateQuantizationInfo(quantization_intervals);	
+	//updateQuantizationInfo(quantization_intervals);
 	int intvRadius = quantization_intervals/2;
 
 	size_t i;
@@ -276,43 +279,43 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 	double medianValue = medianValue_d;
 	short radExpo = getExponent_double(valueRangeSize/2);
 
-	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
 
 	unsigned char preDataBytes[8];
 	longToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 	double last3CmprsData[3] = {0};
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
-	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));			
-				
-	//add the first data	
+	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
+
+	//add the first data
 	type[0] = 0;
 	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_double(last3CmprsData, vce->data);
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif		
-		
+#endif
+
 	//add the second data
 	type[1] = 0;
 	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
@@ -320,7 +323,7 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_double(last3CmprsData, vce->data);
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = vce->data;
 #endif
@@ -334,12 +337,12 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 
 	double recip_realPrecision = 1/realPrecision;
 	for(i=2;i<dataLength;i++)
-	{				
+	{
 		//printf("%.30G\n",last3CmprsData[0]);
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		//pred = last3CmprsData[0];
-		predAbsErr = fabs(curData - pred);	
+		predAbsErr = fabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr*recip_realPrecision+1)*0.5;
@@ -354,67 +357,67 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 				pred = pred - state*interval;
 			}
 			//listAdd_double(last3CmprsData, pred);
-#ifdef HAVE_TIMECMPR					
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
-				decData[i] = pred;			
-#endif	
+				decData[i] = pred;
+#endif
 			continue;
 		}
-		
+
 		//unpredictable data processing
-		type[i] = 0;		
+		type[i] = 0;
 		compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 		updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 		memcpy(preDataBytes,vce->curBytes,8);
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-							
+
 		//listAdd_double(last3CmprsData, vce->data);
 		pred = vce->data;
-		
+
 #ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[i] = vce->data;
-#endif	
-		
+#endif
+
 	}//end of for
-		
+
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
+
+	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
-	
-//	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n", 
+
+//	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
 //			exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
-	
+
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
 	free(type);
 	free(vce);
-	free(lce);	
-	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);	
-	
-	return tdps;	
+	free(lce);
+	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
+
+	return tdps;
 }
 
 void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize)
-{	
+{
 	int doubleSize = sizeof(double);
 	size_t k = 0, i;
 	size_t totalByteLength = 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength;
 	/*No need to malloc because newByteData should always already be allocated with no less totalByteLength.*/
 	//*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
-	
+
 	if(exe_params->SZ_SIZE_TYPE==4)//1
 		(*newByteData)[k++] = 16; //00010000
 	else
@@ -439,11 +442,11 @@ void SZ_compress_args_double_StoreOriData(double* oriData, size_t dataLength, un
 }
 
 
-char SZ_compress_args_double_NoCkRngeNoGzip_1D(int cmprType, unsigned char** newByteData, double *oriData, 
+char SZ_compress_args_double_NoCkRngeNoGzip_1D(int cmprType, unsigned char** newByteData, double *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
 {
-	char compressionType = 0;	
-	TightDataPointStorageD* tdps = NULL; 	
+	char compressionType = 0;
+	TightDataPointStorageD* tdps = NULL;
 #ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 	{
@@ -453,49 +456,49 @@ size_t dataLength, double realPrecision, size_t *outSize, double valueRangeSize,
 			if(timestep % confparams_cpr->snapshotCmprStep != 0)
 			{
 				tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
-				compressionType = 1; //time-series based compression 
+				compressionType = 1; //time-series based compression
 			}
 			else
-			{	
+			{
 				tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
 				compressionType = 0; //snapshot-based compression
 				multisteps->lastSnapshotStep = timestep;
-			}					
+			}
 		}
 		else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION)
 		{
 			tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
 			compressionType = 0; //snapshot-based compression
-			multisteps->lastSnapshotStep = timestep;			
+			multisteps->lastSnapshotStep = timestep;
 		}
 		else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION)
 		{
 			tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
-			compressionType = 1; //time-series based compression 			
+			compressionType = 1; //time-series based compression
 		}
 
 	}
 	else
 #endif
-		tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);			
-	
+		tdps = SZ_compress_double_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_d);
+
 	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
-	
+
 	if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength)
 		SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize);
-	
-	free_TightDataPointStorageD(tdps);	
+
+	free_TightDataPointStorageD(tdps);
 	return compressionType;
 }
 
 TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_d)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
-#endif	
-	
+#endif
+
 	double recip_realPrecision = 1/realPrecision;
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
@@ -506,50 +509,50 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
-	
-	size_t i,j; 
+
+	size_t i,j;
 	int reqLength;
 	double pred1D, pred2D;
 	double diff = 0.0;
 	double itvNum = 0;
 	double *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (double*)malloc(r2*sizeof(double));
 	memset(P0, 0, r2*sizeof(double));
 	P1 = (double*)malloc(r2*sizeof(double));
 	memset(P1, 0, r2*sizeof(double));
-		
+
 	double medianValue = medianValue_d;
 	short radExpo = getExponent_double(valueRangeSize/2);
-	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
-	
+
 	unsigned char preDataBytes[8];
 	longToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-			
+
 	/* Process Row-0 data 0*/
 	type[0] = 0;
 	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
@@ -557,10 +560,10 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif	
+#endif
 
 	/* Process Row-0 data 1*/
 	pred1D = P1[0];
@@ -583,7 +586,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		P1[1] = vce->data;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -611,16 +614,16 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[j] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -643,11 +646,11 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -673,10 +676,10 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[j] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 		double *Pt;
@@ -684,19 +687,19 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 		P1 = P0;
 		P0 = Pt;
 	}
-		
-	if(r2!=1)	
+
+	if(r2!=1)
 		free(P0);
 	free(P1);
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
+
+	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 
 /*	int sum =0;
@@ -708,33 +711,33 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ(double *oriData, size_t r1, si
 		printf("%d ", type[i]);
 	printf("\n");*/
 
-//	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n", 
+//	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
 //			exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
-	
+
 //	for(i = 3800;i<3844;i++)
 //		printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
-	
+
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
+	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
 char SZ_compress_args_double_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newByteData, double *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
 {
 	size_t dataLength = r1*r2;
-	char compressionType = 0;	
-	TightDataPointStorageD* tdps = NULL; 	
+	char compressionType = 0;
+	TightDataPointStorageD* tdps = NULL;
 #ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 	{
@@ -744,36 +747,36 @@ char SZ_compress_args_double_NoCkRngeNoGzip_2D(int cmprType, unsigned char** new
 			if(timestep % confparams_cpr->snapshotCmprStep != 0)
 			{
 				tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
-				compressionType = 1; //time-series based compression 
+				compressionType = 1; //time-series based compression
 			}
 			else
-			{	
+			{
 				tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
 				compressionType = 0; //snapshot-based compression
 				multisteps->lastSnapshotStep = timestep;
-			}					
+			}
 		}
 		else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION)
 		{
 			tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
 			compressionType = 0; //snapshot-based compression
-			multisteps->lastSnapshotStep = timestep;			
+			multisteps->lastSnapshotStep = timestep;
 		}
 		else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION)
 		{
 			tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
-			compressionType = 1; //time-series based compression 			
+			compressionType = 1; //time-series based compression
 		}
 	}
 	else
 #endif
-		tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);	
-	
+		tdps = SZ_compress_double_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_d);
+
 	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
-	
+
 	if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength)
-		SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize);	
-	
+		SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize);
+
 	free_TightDataPointStorageD(tdps);
 	return compressionType;
 }
@@ -784,7 +787,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
-#endif		
+#endif
 
 	double recip_realPrecision = 1/realPrecision;
 	unsigned int quantization_intervals;
@@ -792,12 +795,12 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 	{
 		quantization_intervals = optimize_intervals_double_3D_opt(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	int intvRadius = quantization_intervals/2;	
-		
-	size_t i,j,k; 
+	int intvRadius = quantization_intervals/2;
+
+	size_t i,j,k;
 	int reqLength;
 	double pred1D, pred2D, pred3D;
 	double diff = 0.0;
@@ -813,7 +816,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 
 	double medianValue = medianValue_d;
 	short radExpo = getExponent_double(valueRangeSize/2);
-	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
@@ -849,7 +852,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[0] = P1[0];
 #endif
@@ -875,7 +878,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		P1[1] = vce->data;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -903,10 +906,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[j] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r2-1 */
@@ -935,10 +938,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[index] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P1[index];
-#endif		
+#endif
 
 		/* Process row-i data 1 --> data r3-1*/
 		for (j = 1; j < r3; j++)
@@ -965,10 +968,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P1[index] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P1[index];
-#endif			
+#endif
 		}
 	}
 
@@ -999,7 +1002,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
@@ -1029,10 +1032,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[j] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 	    /* Process Row-1 --> Row-r2-1 */
@@ -1062,10 +1065,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[index2D] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[index2D];
-#endif			
+#endif
 
 			/* Process Row-i data 1 --> data r3-1 */
 			for (j = 1; j < r3; j++)
@@ -1093,10 +1096,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 					P0[index2D] = vce->data;
 				}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					decData[index] = P0[index2D];
-#endif				
+#endif
 			}
 		}
 
@@ -1116,7 +1119,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 			type, exactMidByteArray->array, exactMidByteArray->size,
 			exactLeadNumArray->array,
 			resiBitArray->array, resiBitArray->size,
-			resiBitsLength, 
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 
 //	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
@@ -1130,18 +1133,18 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ(double *oriData, size_t r1, si
 	free_DIA(resiBitArray);
 	free(type);
 	free(vce);
-	free(lce);	
-	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);	
-	
-	return tdps;	
+	free(lce);
+	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
+
+	return tdps;
 }
 
 
 char SZ_compress_args_double_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newByteData, double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, double valueRangeSize, double medianValue_d)
 {
 	size_t dataLength = r1*r2*r3;
-	char compressionType = 0;	
-	TightDataPointStorageD* tdps = NULL; 	
+	char compressionType = 0;
+	TightDataPointStorageD* tdps = NULL;
 #ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 	{
@@ -1151,11 +1154,11 @@ char SZ_compress_args_double_NoCkRngeNoGzip_3D(int cmprType, unsigned char** new
 			if(timestep % confparams_cpr->snapshotCmprStep != 0)
 			{
 				tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
-				compressionType = 1; //time-series based compression 
+				compressionType = 1; //time-series based compression
 			}
 			else
-			{	
-				if(confparams_cpr->withRegression == SZ_NO_REGRESSION)	
+			{
+				if(confparams_cpr->withRegression == SZ_NO_REGRESSION)
 					tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
 				else
 					*newByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
@@ -1165,31 +1168,31 @@ char SZ_compress_args_double_NoCkRngeNoGzip_3D(int cmprType, unsigned char** new
 		}
 		else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION)
 		{
-			if(confparams_cpr->withRegression == SZ_NO_REGRESSION)	
+			if(confparams_cpr->withRegression == SZ_NO_REGRESSION)
 				tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
 			else
 				*newByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
 			compressionType = 0; //snapshot-based compression
-			multisteps->lastSnapshotStep = timestep;			
+			multisteps->lastSnapshotStep = timestep;
 		}
 		else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION)
 		{
 			tdps = SZ_compress_double_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_d);
-			compressionType = 1; //time-series based compression 			
-		}		
+			compressionType = 1; //time-series based compression
+		}
 	}
 	else
 #endif
-		tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);		
-	
+		tdps = SZ_compress_double_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_d);
+
 	if(tdps!=NULL)
 	{
 		convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
 		if(*outSize>3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1 + sizeof(double)*dataLength)
 			SZ_compress_args_double_StoreOriData(oriData, dataLength, newByteData, outSize);
 		free_TightDataPointStorageD(tdps);
-	}	
-	
+	}
+
 	return compressionType;
 }
 
@@ -1206,7 +1209,7 @@ TightDataPointStorageD* SZ_compress_double_4D_MDQ(double *oriData, size_t r1, si
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j,k; 
+	size_t i,j,k;
 	int reqLength;
 	double pred1D, pred2D, pred3D;
 	double diff = 0.0;
@@ -1539,14 +1542,14 @@ char SZ_compress_args_double_NoCkRngeNoGzip_4D(unsigned char** newByteData, doub
 }
 
 /*MSST19*/
-TightDataPointStorageD* SZ_compress_double_1D_MDQ_MSST19(double *oriData, 
+TightDataPointStorageD* SZ_compress_double_1D_MDQ_MSST19(double *oriData,
 size_t dataLength, double realPrecision, double valueRangeSize, double medianValue_f)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
-#endif	
+#endif
 
 	//struct ClockPoint clockPointBuild;
 	//TimeDurationStart("build", &clockPointBuild);
@@ -1557,14 +1560,14 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 		quantization_intervals = exe_params->intvCapacity;
 	//updateQuantizationInfo(quantization_intervals);
 	int intvRadius = quantization_intervals/2;
-	
+
 	double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals);
 	double inv = 2.0-pow(2, -(confparams_cpr->plus_bits));
     for(int i=0; i<quantization_intervals; i++){
         double test = pow((1+realPrecision), inv*(i - intvRadius));
         precisionTable[i] = test;
     }
-    
+
 	struct TopLevelTableWideInterval levelTable;
     MultiLevelCacheTableWideIntervalBuild(&levelTable, precisionTable, quantization_intervals, realPrecision, confparams_cpr->plus_bits);
 
@@ -1573,25 +1576,25 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 	double medianValue = medianValue_f;
 	//double medianInverse = 1 / medianValue_f;
 	//short radExpo = getExponent_double(realPrecision);
-	
-	reqLength = computeReqLength_double_MSST19(realPrecision);	
+
+	reqLength = computeReqLength_double_MSST19(realPrecision);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, dataLength/2/8);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, dataLength/2);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	unsigned char preDataBytes[8];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 	double last3CmprsData[3] = {0};
@@ -1600,8 +1603,8 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressSingleDoubleValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
@@ -1609,11 +1612,11 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_double(last3CmprsData, vce->data);
 	//miss++;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif		
-		
+#endif
+
 	//add the second data
 	type[1] = 0;
 	compressSingleDoubleValue_MSST19(vce, spaceFillingValue[1], realPrecision, reqLength, reqBytesLength, resiBitsLength);
@@ -1622,7 +1625,7 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_double(last3CmprsData, vce->data);
 	//miss++;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = vce->data;
 #endif
@@ -1676,30 +1679,30 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 #ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[i] = vce->data;
-#endif	
-		
+#endif
+
 	}//end of for
-		
+
 //	printf("miss:%d, hit:%d\n", miss, hit);
 
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
+
+	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
 			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
     tdps->plus_bits = confparams_cpr->plus_bits;
-	
+
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
+	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
 	free(precisionTable);
 	freeTopLevelTableWideInterval(&levelTable);
@@ -1709,17 +1712,17 @@ size_t dataLength, double realPrecision, double valueRangeSize, double medianVal
 TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision, double valueRangeSize, double medianValue_f)
 {
 #ifdef HAVE_TIMECMPR
-	double* decData = NULL;	
+	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
-#endif	
-	
+#endif
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_double_2D_opt_MSST19(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
@@ -1734,44 +1737,44 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 	struct TopLevelTableWideInterval levelTable;
 	MultiLevelCacheTableWideIntervalBuild(&levelTable, precisionTable, quantization_intervals, realPrecision, confparams_cpr->plus_bits);
 
-	size_t i,j; 
+	size_t i,j;
 	int reqLength;
 	double pred1D, pred2D;
 	//double diff = 0.0;
 	//double itvNum = 0;
 	double *P0, *P1;
 	double predRelErrRatio;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (double*)malloc(r2*sizeof(double));
 	memset(P0, 0, r2*sizeof(double));
 	P1 = (double*)malloc(r2*sizeof(double));
 	memset(P1, 0, r2*sizeof(double));
-		
+
 	double medianValue = medianValue_f;
 	//double medianValueInverse = 1 / medianValue_f;
 	//short radExpo = getExponent_double(valueRangeSize/2);
-	reqLength = computeReqLength_double_MSST19(realPrecision);	
+	reqLength = computeReqLength_double_MSST19(realPrecision);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	double* spaceFillingValue = oriData; //
 
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
 	unsigned char preDataBytes[8];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
@@ -1788,7 +1791,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
     for(int i=0; i<=range; i++){
         tables[i] = levelTable.subTables[i].table;
     }
-			
+
 	/* Process Row-0 data 0*/
 	type[0] = 0;
 	compressSingleDoubleValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength);
@@ -1796,10 +1799,10 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif	
+#endif
 
 	double curData;
 	int state;
@@ -1832,7 +1835,7 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		P1[1] = vce->data;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -1866,16 +1869,16 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[j] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -1904,11 +1907,11 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -1940,10 +1943,10 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[j] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 		double *Pt;
@@ -1951,19 +1954,19 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
 	free(P1);
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
+
+	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 	tdps->plus_bits = confparams_cpr->plus_bits;
 
@@ -1976,23 +1979,23 @@ TightDataPointStorageD* SZ_compress_double_2D_MDQ_MSST19(double *oriData, size_t
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
 	free(precisionTable);
 	freeTopLevelTableWideInterval(&levelTable);
-	return tdps;	
+	return tdps;
 }
 
 TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double valueRangeSize, double medianValue_f)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
-#endif		
+#endif
 
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_double_3D_opt_MSST19(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
@@ -2023,7 +2026,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 	double medianValue = medianValue_f;
 	//double medianValueInverse = 1/ medianValue_f;
 	//short radExpo = getExponent_double(valueRangeSize/2);
-	reqLength = computeReqLength_double_MSST19(realPrecision);	
+	reqLength = computeReqLength_double_MSST19(realPrecision);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
@@ -2040,7 +2043,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 
 	unsigned char preDataBytes[8];
 	longToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
@@ -2073,7 +2076,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
 	//miss++;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[0] = P1[0];
 #endif
@@ -2109,7 +2112,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 		P1[1] = vce->data;
 		//miss++;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -2146,10 +2149,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 			P1[j] = vce->data;
 			//miss++;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r2-1 */
@@ -2157,7 +2160,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		curData = spaceFillingValue[index];
         predRelErrRatio = curData / pred1D;
@@ -2186,10 +2189,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 			P1[index] = vce->data;
 			//miss++;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P1[index];
-#endif		
+#endif
 
 		/* Process row-i data 1 --> data r3-1*/
 		for (j = 1; j < r3; j++)
@@ -2228,10 +2231,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 				P1[index] = vce->data;
 				//miss++;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P1[index];
-#endif			
+#endif
 		}
 	}
 
@@ -2270,7 +2273,7 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 			P0[0] = vce->data;
 			//miss++;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
@@ -2309,10 +2312,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 				P0[j] = vce->data;
 				//miss++;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 	    /* Process Row-1 --> Row-r2-1 */
@@ -2351,10 +2354,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 				P0[index2D] = vce->data;
 				//miss++;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[index2D];
-#endif			
+#endif
 
 			/* Process Row-i data 1 --> data r3-1 */
 			for (j = 1; j < r3; j++)
@@ -2393,10 +2396,10 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 					P0[index2D] = vce->data;
 					//miss++;
 				}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					decData[index] = P0[index2D];
-#endif				
+#endif
 			}
 		}
 
@@ -2416,20 +2419,20 @@ TightDataPointStorageD* SZ_compress_double_3D_MDQ_MSST19(double *oriData, size_t
 			type, exactMidByteArray->array, exactMidByteArray->size,
 			exactLeadNumArray->array,
 			resiBitArray->array, resiBitArray->size,
-			resiBitsLength, 
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 	tdps->plus_bits = confparams_cpr->plus_bits;
 
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
 	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
 	free(precisionTable);
-	freeTopLevelTableWideInterval(&levelTable);	
-	return tdps;	
+	freeTopLevelTableWideInterval(&levelTable);
+	return tdps;
 }
 void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *oriData, size_t dataLength, size_t *outSize)
 {
@@ -2438,7 +2441,7 @@ void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *or
 	tdps->typeArray = NULL;
 	tdps->leadNumArray = NULL;
 	tdps->residualMidBits = NULL;
-	
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactMidBytes = (unsigned char*)malloc(sizeof(unsigned char)*8);
@@ -2447,7 +2450,7 @@ void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *or
 	double value = oriData[0];
 	doubleToBytes(tdps->exactMidBytes, value);
 	tdps->exactMidBytes_size = 8;
-	
+
 	size_t tmpOutSize;
 	//unsigned char *tmpByteData;
 	convertTDPStoFlatBytes_double(tdps, newByteData, &tmpOutSize);
@@ -2456,21 +2459,21 @@ void SZ_compress_args_double_withinRange(unsigned char** newByteData, double *or
 	//*newByteData = (unsigned char*)malloc(sizeof(unsigned char)*16); //for floating-point data (1+3+4+4)
 	//memcpy(*newByteData, tmpByteData, 16);
 	*outSize = tmpOutSize;//12==3+1+8(double_size)+MetaDataByteLength_double
-	free_TightDataPointStorageD(tdps);	
+	free_TightDataPointStorageD(tdps);
 }
 
-/*int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+/*int SZ_compress_args_double_wRngeNoGzip(unsigned char** newByteData, double *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	double valueRangeSize = 0, medianValue = 0;
-	
+
 	double min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
 	double max = min+valueRangeSize;
 	double realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
@@ -2482,7 +2485,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
 			if(errBoundMode>=PW_REL)
 			{
 				SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
-				//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);				
+				//SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
 			}
 			else
 				SZ_compress_args_double_NoCkRngeNoGzip_1D(newByteData, oriData, r1, realPrecision, outSize, valueRangeSize, medianValue);
@@ -2512,35 +2515,35 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
 	return status;
 }*/
 
-int SZ_compress_args_double(int cmprType, int withRegression, unsigned char** newByteData, double *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_double(int cmprType, int withRegression, unsigned char** newByteData, double *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
 {
 	confparams_cpr->dataType = SZ_DOUBLE;
 	confparams_cpr->errorBoundMode = errBoundMode;
 	if(errBoundMode==PW_REL)
 	{
-		confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;	
+		confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
 	}
-		
+
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
-	
+
 	if(dataLength <= MIN_NUM_OF_ELEMENTS)
 	{
 		*newByteData = SZ_skip_compress_double(oriData, dataLength, outSize);
 		return status;
 	}
-	
+
 	double valueRangeSize = 0, medianValue = 0;
-	
+
 	unsigned char * signs = NULL;
 	bool positive = true;
 	double nearZero = 0.0;
 	double min = 0;
 	if(pwRelBoundRatio < 0.000009999)
 		confparams_cpr->accelerate_pw_rel_compression = 0;
-		
+
 	if(confparams_cpr->errorBoundMode == PW_REL && confparams_cpr->accelerate_pw_rel_compression == 1)
 	{
 		signs = (unsigned char *) malloc(dataLength);
@@ -2548,13 +2551,13 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 		min = computeRangeSize_double_MSST19(oriData, dataLength, &valueRangeSize, &medianValue, signs, &positive, &nearZero);
 	}
 	else
-		min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);	
+		min = computeRangeSize_double(oriData, dataLength, &valueRangeSize, &medianValue);
 	double max = min+valueRangeSize;
 	confparams_cpr->dmin = min;
 	confparams_cpr->dmax = max;
-	
-	double realPrecision = 0; 
-	
+
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -2564,27 +2567,27 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 	{
 		confparams_cpr->errorBoundMode = ABS;
 		realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromNORM_ERR(confparams_cpr->normErr, dataLength);
-		//printf("realPrecision=%lf\n", realPrecision);				
-	}	
+		//printf("realPrecision=%lf\n", realPrecision);
+	}
 	else
 	{
 		realPrecision = getRealPrecision_double(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
 		confparams_cpr->absErrBound = realPrecision;
-	}	
+	}
 	if(valueRangeSize <= realPrecision)
 	{
 #ifdef HAVE_WRITESTATS
 		writeConstantFlag(1);
-#endif			
+#endif
 		if(confparams_cpr->errorBoundMode>=PW_REL && confparams_cpr->accelerate_pw_rel_compression == 1)
-			free(signs);		
+			free(signs);
 		SZ_compress_args_double_withinRange(newByteData, oriData, dataLength, outSize);
 	}
 	else
 	{
 #ifdef HAVE_WRITESTATS
 		writeConstantFlag(0);
-#endif			
+#endif
 		size_t tmpOutSize = 0;
 		unsigned char* tmpByteData;
 		if (r2==0)
@@ -2621,14 +2624,14 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 			}
 			else
 #ifdef HAVE_TIMECMPR
-				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)			
+				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 				else
 #endif
-				{	
+				{
 					if(withRegression == SZ_NO_REGRESSION)
 						SZ_compress_args_double_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
-					else 
+					else
 					{
 						tmpByteData = SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
 						if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1)
@@ -2655,15 +2658,15 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 				{
 					if(withRegression == SZ_NO_REGRESSION)
 						SZ_compress_args_double_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
-					else 
+					else
 					{
 						tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
 						if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1)
 							SZ_compress_args_double_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);
 					}
 				}
-					
-					
+
+
 		}
 		else
 		if (r5==0)
@@ -2677,33 +2680,33 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 			}
 			else
 #ifdef HAVE_TIMECMPR
-				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)			
+				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					multisteps->compressionType = SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 				else
-#endif	
+#endif
 				{
 					if(withRegression == SZ_NO_REGRESSION)
 						SZ_compress_args_double_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
-					else 
+					else
 					{
-						tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);								
+						tmpByteData = SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize);
 						if(tmpOutSize>=dataLength*sizeof(double) + 3 + MetaDataByteLength_double + exe_params->SZ_SIZE_TYPE + 1)
 							SZ_compress_args_double_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);
 					}
 				}
-		
+
 		}
 		else
 		{
 			printf("Error: doesn't support 5 dimensions for now.\n");
 			status = SZ_DERR;
 		}
-				
+
 		//Call Gzip to do the further compression.
 		if(confparams_cpr->szMode==SZ_BEST_SPEED)
 		{
 			*outSize = tmpOutSize;
-			*newByteData = tmpByteData;			
+			*newByteData = tmpByteData;
 		}
 		else if(confparams_cpr->szMode==SZ_BEST_COMPRESSION || confparams_cpr->szMode==SZ_DEFAULT_COMPRESSION || confparams_cpr->szMode==SZ_TEMPORAL_COMPRESSION)
 		{
@@ -2713,7 +2716,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the double compression.\n");
-			status = SZ_MERR;	
+			status = SZ_MERR;
 		}
 	}
 
@@ -3133,7 +3136,7 @@ size_t r1, size_t s1, size_t e1)
 	//updateQuantizationInfo(quantization_intervals);
 	int intvRadius = quantization_intervals/2;
 
-	size_t i; 
+	size_t i;
 	int reqLength;
 	double medianValue = medianValue_d;
 	short radExpo = getExponent_double(valueRangeSize/2);
@@ -3258,7 +3261,7 @@ size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j; 
+	size_t i,j;
 	int reqLength;
 	double pred1D, pred2D;
 	double diff = 0.0;
@@ -3466,7 +3469,7 @@ size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, siz
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j,k; 
+	size_t i,j,k;
 	int reqLength;
 	double pred1D, pred2D, pred3D;
 	double diff = 0.0;
@@ -3805,7 +3808,7 @@ size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, siz
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j,k; 
+	size_t i,j,k;
 	int reqLength;
 	double pred1D, pred2D, pred3D;
 	double diff = 0.0;
@@ -4142,7 +4145,7 @@ size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, siz
 }
 
 unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	double pred_value = 0;
 	double pred_err;
@@ -4157,14 +4160,14 @@ unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dat
 		if(*data_pos == 0){
         		data_pos += confparams_cpr->sampleDistance;
         		continue;
-		}			
+		}
 		tempIndex++;
 		totalSampleSize++;
 		pred_value = data_pos[-1];
 		pred_err = fabs((double)*data_pos / pred_value);
 		radiusIndex = (unsigned long)fabs(log2(pred_err)/divider+0.5);
 		if(radiusIndex>=confparams_cpr->maxRangeRadius)
-			radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+			radiusIndex = confparams_cpr->maxRangeRadius - 1;
 		intervals[radiusIndex]++;
 
 		data_pos += confparams_cpr->sampleDistance;
@@ -4180,19 +4183,19 @@ unsigned int optimize_intervals_double_1D_opt_MSST19(double *oriData, size_t dat
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<64)
 		powerOf2 = 64;
-	
+
 	free(intervals);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	double pred_value = 0, pred_err;
@@ -4210,7 +4213,7 @@ unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1,
 		if(*data_pos == 0){
         		data_pos += confparams_cpr->sampleDistance;
         		continue;
-		}			
+		}
 		totalSampleSize++;
 		pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
 		pred_err = fabs(pred_value / *data_pos);
@@ -4252,7 +4255,7 @@ unsigned int optimize_intervals_double_2D_opt_MSST19(double *oriData, size_t r1,
 }
 
 unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -4271,7 +4274,7 @@ unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1,
 		if(*data_pos == 0){
         		data_pos += confparams_cpr->sampleDistance;
         		continue;
-		}	
+		}
 		totalSampleSize++;
 		pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
 		pred_err = fabs(*data_pos / pred_value);
@@ -4295,7 +4298,7 @@ unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1,
 			if(offset_count == 0) offset_count ++;
 		}
 		else data_pos += confparams_cpr->sampleDistance;
-	}	
+	}
 	//compute the appropriate number
 	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
 	size_t sum = 0;
@@ -4315,7 +4318,7 @@ unsigned int optimize_intervals_double_3D_opt_MSST19(double *oriData, size_t r1,
 	free(intervals);
 	return powerOf2;
 }
-unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){	
+unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision){
 	size_t i;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -4353,7 +4356,7 @@ unsigned int optimize_intervals_double_3D_opt(double *oriData, size_t r1, size_t
 			if(offset_count == 0) offset_count ++;
 		}
 		else data_pos += confparams_cpr->sampleDistance;
-	}	
+	}
 	//compute the appropriate number
 	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
 	size_t sum = 0;
@@ -4405,11 +4408,11 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 		type[0] = (int) (itvNum/2) + exe_params->intvRadius;
 		P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
 		//ganrantee comporession error against the case of machine-epsilon
-		if(fabs(curData-P1[0])>realPrecision){	
+		if(fabs(curData-P1[0])>realPrecision){
 			type[0] = 0;
 			P1[0] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
-		}		
+		}
 	}
 	else{
 		type[0] = 0;
@@ -4427,11 +4430,11 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 		type[1] = (int) (itvNum/2) + exe_params->intvRadius;
 		P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
 		//ganrantee comporession error against the case of machine-epsilon
-		if(fabs(curData-P1[1])>realPrecision){	
+		if(fabs(curData-P1[1])>realPrecision){
 			type[1] = 0;
-			P1[1] = curData;	
+			P1[1] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
-		}		
+		}
 	}
 	else{
 		type[1] = 0;
@@ -4449,11 +4452,11 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 			type[j] = (int) (itvNum/2) + exe_params->intvRadius;
 			P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
 			//ganrantee comporession error against the case of machine-epsilon
-			if(fabs(curData-P1[j])>realPrecision){	
+			if(fabs(curData-P1[j])>realPrecision){
 				type[j] = 0;
-				P1[j] = curData;	
+				P1[j] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else{
 			type[j] = 0;
@@ -4468,7 +4471,7 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		curData = *cur_data_pos;
 		diff = curData - pred1D;
@@ -4480,14 +4483,14 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 			if (diff < 0) itvNum = -itvNum;
 			type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 			P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-			
+
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P1[index])>realPrecision)
-			{	
+			{
 				type[index] = 0;
-				P1[index] = curData;	
+				P1[index] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else
 		{
@@ -4512,14 +4515,14 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 				if (diff < 0) itvNum = -itvNum;
 				type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 				P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-				
+
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P1[index])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P1[index] = curData;	
+					P1[index] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
-				}				
+				}
 			}
 			else
 			{
@@ -4549,11 +4552,11 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 			P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P0[0])>realPrecision)
-			{	
+			{
 				type[index] = 0;
-				P0[0] = curData;	
+				P0[0] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else
 		{
@@ -4577,9 +4580,9 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 				P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P0[j])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P0[j] = curData;	
+					P0[j] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
 				}
 			}
@@ -4598,7 +4601,7 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			curData = *cur_data_pos;
 			diff = curData - pred2D;
@@ -4612,11 +4615,11 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 				P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P0[index2D])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P0[index2D] = curData;	
+					P0[index2D] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
-				}				
+				}
 			}
 			else
 			{
@@ -4628,7 +4631,7 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 			/* Process Row-i data 1 --> data r3-1 */
 			for (j = 1; j < r3; j++)
 			{
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -4642,14 +4645,14 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 					if (diff < 0) itvNum = -itvNum;
 					type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 					P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-					
+
 					//ganrantee comporession error against the case of machine-epsilon
 					if(fabs(curData-P0[index2D])>realPrecision)
-					{	
+					{
 						type[index] = 0;
-						P0[index2D] = curData;	
+						P0[index2D] = curData;
 						unpredictable_data[unpredictable_count ++] = curData;
-					}					
+					}
 				}
 				else
 				{
@@ -4671,7 +4674,7 @@ size_t SZ_compress_double_3D_MDQ_RA_block(double * block_ori_data, double * mean
 }
 
 unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	double pred_value = 0, pred_err;
@@ -4726,7 +4729,7 @@ unsigned int optimize_intervals_double_2D_opt(double *oriData, size_t r1, size_t
 }
 
 unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	double pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -4740,7 +4743,7 @@ unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength
 		pred_err = fabs(pred_value - *data_pos);
 		radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 		if(radiusIndex>=confparams_cpr->maxRangeRadius)
-			radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+			radiusIndex = confparams_cpr->maxRangeRadius - 1;
 		intervals[radiusIndex]++;
 
 		data_pos += confparams_cpr->sampleDistance;
@@ -4756,20 +4759,20 @@ unsigned int optimize_intervals_double_1D_opt(double *oriData, size_t dataLength
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	return powerOf2;
 }
 
 /*The above code is for sz 1.4.13; the following code is for sz 2.0*/
 unsigned int optimize_intervals_double_2D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq)
-{	
+{
 	double mean = 0.0;
 	size_t len = r1 * r2;
 	size_t mean_distance = (int) (sqrt(len));
@@ -4892,7 +4895,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 		quantization_intervals = optimize_intervals_double_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -4914,7 +4917,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 	size_t num_blocks = num_x * num_y;
 	size_t num_elements = r1 * r2;
 
-	size_t dim0_offset = r2;	
+	size_t dim0_offset = r2;
 
 	int * result_type = (int *) malloc(num_elements * sizeof(int));
 	size_t unpred_data_max_size = max_num_block_elements;
@@ -4945,7 +4948,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 				double fx = 0.0;
 				double fy = 0.0;
 				double f = 0;
-				double sum_x; 
+				double sum_x;
 				double curData;
 				for(size_t i=0; i<current_blockcount_x; i++){
 					sum_x = 0;
@@ -5048,7 +5051,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 			for(size_t j=0; j<num_y; j++){
 				offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
 				current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
-				
+
 				/*sampling: decide which predictor to use (regression or lorenzo)*/
 				{
 					double * cur_data_pos;
@@ -5063,7 +5066,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 						cur_data_pos = data_pos + i * dim0_offset + i;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
 						err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 						err_reg += fabs(pred_reg - curData);
 
@@ -5071,9 +5074,9 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 						cur_data_pos = data_pos + i*dim0_offset + bmi;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];
 						err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-						err_reg += fabs(pred_reg - curData);								
+						err_reg += fabs(pred_reg - curData);
 					}
 					use_reg = (err_reg < err_sz);
 				}
@@ -5092,11 +5095,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 								last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+								if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 									coeff_type[e][coeff_index] = 0;
-									last_coeffcients[e] = cur_coeff;	
+									last_coeffcients[e] = cur_coeff;
 									coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-								}					
+								}
 							}
 							else{
 								coeff_type[e][coeff_index] = 0;
@@ -5124,18 +5127,18 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
 								pred = curData;
 								unpredictable_data[block_unpredictable_count ++] = curData;
 							}
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -5150,11 +5153,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5164,7 +5167,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 
 							// assign value to block surfaces
 							pb_pos[ii * strip_dim0_offset + jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						cur_data_pos += dim0_offset - current_blockcount_y;
@@ -5182,11 +5185,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5195,7 +5198,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 							}
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -5210,11 +5213,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5227,13 +5230,13 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
 
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 					} // end ii == -1
 					unpredictable_count = block_unpredictable_count;
 					total_unpred += unpredictable_count;
-					unpredictable_data += unpredictable_count;					
+					unpredictable_data += unpredictable_count;
 					reg_count ++;
 				}// end use_reg
 				else{
@@ -5265,11 +5268,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 									*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
 									if(type[index] <= intvRadius) type[index] -= 1;
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(curData - *cur_pb_pos)>realPrecision){	
+									if(fabs(curData - *cur_pb_pos)>realPrecision){
 										type[index] = 0;
-										*cur_pb_pos = curData;	
+										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
-									}					
+									}
 								}
 								else{
 									type[index] = 0;
@@ -5305,11 +5308,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 									*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
 									if(type[index] <= intvRadius) type[index] -= 1;
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(curData - *cur_pb_pos)>realPrecision){	
+									if(fabs(curData - *cur_pb_pos)>realPrecision){
 										type[index] = 0;
-										*cur_pb_pos = curData;	
+										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
-									}					
+									}
 								}
 								else{
 									type[index] = 0;
@@ -5372,7 +5375,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 						cur_data_pos = data_pos + i * dim0_offset + i;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
 						err_sz += fabs(pred_sz - curData) + noise;
 						err_reg += fabs(pred_reg - curData);
 
@@ -5380,9 +5383,9 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 						cur_data_pos = data_pos + i*dim0_offset + bmi;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];
 						err_sz += fabs(pred_sz - curData) + noise;
-						err_reg += fabs(pred_reg - curData);								
+						err_reg += fabs(pred_reg - curData);
 					}
 					use_reg = (err_reg < err_sz);
 				}
@@ -5401,11 +5404,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 								last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+								if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 									coeff_type[e][coeff_index] = 0;
-									last_coeffcients[e] = cur_coeff;	
+									last_coeffcients[e] = cur_coeff;
 									coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-								}					
+								}
 							}
 							else{
 								coeff_type[e][coeff_index] = 0;
@@ -5433,18 +5436,18 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
 								pred = curData;
 								unpredictable_data[block_unpredictable_count ++] = curData;
 							}
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -5460,11 +5463,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5474,7 +5477,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 
 							// assign value to block surfaces
 							pb_pos[ii * strip_dim0_offset + jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						cur_data_pos += dim0_offset - current_blockcount_y;
@@ -5492,11 +5495,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5505,7 +5508,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 							}
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -5521,11 +5524,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>realPrecision){	
+								if(fabs(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5538,13 +5541,13 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
 
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 					} // end ii == -1
 					unpredictable_count = block_unpredictable_count;
 					total_unpred += unpredictable_count;
-					unpredictable_data += unpredictable_count;					
+					unpredictable_data += unpredictable_count;
 					reg_count ++;
 				}// end use_reg
 				else{
@@ -5569,11 +5572,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - *cur_pb_pos)>realPrecision){	
+								if(fabs(curData - *cur_pb_pos)>realPrecision){
 									type[index] = 0;
-									*cur_pb_pos = curData;	
+									*cur_pb_pos = curData;
 									unpredictable_data[unpredictable_count ++] = curData;
-								}					
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5602,11 +5605,11 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 								type[index] = (int) (itvNum/2) + intvRadius;
 								*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - *cur_pb_pos)>realPrecision){	
+								if(fabs(curData - *cur_pb_pos)>realPrecision){
 									type[index] = 0;
-									*cur_pb_pos = curData;	
+									*cur_pb_pos = curData;
 									unpredictable_data[unpredictable_count ++] = curData;
-								}					
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5635,7 +5638,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 			tmp = cur_pb_buf;
 			cur_pb_buf = next_pb_buf;
 			next_pb_buf = tmp;
-		}// end i		
+		}// end i
 	}
 	free(prediction_buffer_1);
 	free(prediction_buffer_2);
@@ -5647,7 +5650,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 	size_t i = 0;
 	init(huffmanTree, result_type, num_elements);
 	for (i = 0; i < stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -5662,7 +5665,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 
 	sizeToBytes(result_pos, num_elements);
 	result_pos += exe_params->SZ_SIZE_TYPE;
-	
+
 	intToBytes_bigEndian(result_pos, block_size);
 	result_pos += sizeof(int);
 	doubleToBytes(result_pos, realPrecision);
@@ -5684,8 +5687,8 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
-	//convert the lead/mid/resi to byte stream 	
+
+	//convert the lead/mid/resi to byte stream
 	if(reg_count>0){
 		for(int e=0; e<3; e++){
 			int stateNum = 2*coeff_intvCapacity_sz;
@@ -5694,7 +5697,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -5706,7 +5709,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -5735,14 +5738,14 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 	writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount);
 	writeBlockInfo(use_mean, block_size, reg_count, num_blocks);
 	writeUnpredictDataCounts(total_unpred, num_elements);
-#endif	
+#endif
 
 	size_t totalEncodeSize = result_pos - result;
 	free(indicator);
 	free(result_unpredictable_data);
 	free(result_type);
 	free(reg_params);
-	
+
 	SZ_ReleaseHuffman(huffmanTree);
 	*comp_size = totalEncodeSize;
 
@@ -5751,7 +5754,7 @@ unsigned char * SZ_compress_double_2D_MDQ_nonblocked_with_blocked_regression(dou
 	return result;
 }
 unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, double * dense_pos, double * max_freq, double * mean_freq)
-{	
+{
 	double mean = 0.0;
 	size_t len = r1 * r2 * r3;
 	size_t mean_distance = (int) (sqrt(len));
@@ -5839,7 +5842,7 @@ unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriDat
 		}
 		else data_pos += sampleDistance;
 		sample_count ++;
-	}	
+	}
 	*max_freq = freq_count * 1.0/ sample_count;
 
 	//compute the appropriate number
@@ -5883,7 +5886,7 @@ unsigned int optimize_intervals_double_3D_with_freq_and_dense_pos(double *oriDat
 
 unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(double *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t * comp_size){
 
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	double* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (double*)(multisteps->hist_data);
@@ -5917,7 +5920,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	size_t num_elements = r1 * r2 * r3;
 
 	size_t dim0_offset = r2 * r3;
-	size_t dim1_offset = r3;	
+	size_t dim1_offset = r3;
 
 	int * result_type = (int *) malloc(num_elements * sizeof(int));
 	memset(result_type, 0, num_elements*sizeof(int));
@@ -5947,7 +5950,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 				offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
 				offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
 				offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
-	
+
 				data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
 				/*Calculate regression coefficients*/
 				{
@@ -5956,7 +5959,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 					double fy = 0.0;
 					double fz = 0.0;
 					double f = 0;
-					double sum_x, sum_y; 
+					double sum_x, sum_y;
 					double curData;
 					for(size_t i=0; i<current_blockcount_x; i++){
 						sum_x = 0;
@@ -5990,7 +5993,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 			}
 		}
 	}
-	
+
 	//Compress coefficient arrays
 	double precision_a, precision_b, precision_c, precision_d;
 	double rel_param_err = 0.025;
@@ -6004,7 +6007,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 		quantization_intervals = optimize_intervals_double_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -6045,7 +6048,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	double * cur_pb_buf_pos;
 	double * next_pb_buf_pos;
 	int intvCapacity = quantization_intervals;// exe_params->intvCapacity;
-	int intvRadius = intvCapacity/2; //exe_params->intvRadius;	
+	int intvRadius = intvCapacity/2; //exe_params->intvRadius;
 	int use_reg = 0;
 	double noise = realPrecision * 1.22;
 
@@ -6061,7 +6064,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	double precision[4], recip_precision[4];
 	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
 	recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c, recip_precision[3] = 1/precision_d;
-	
+
 	for(int i=0; i<4; i++){
 		coeff_type[i] = coeff_result_type + i * num_blocks;
 		coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
@@ -6111,7 +6114,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 							err_reg += fabs(pred_reg - curData);
 
@@ -6119,21 +6122,21 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 							err_reg += fabs(pred_reg - curData);
 						}
@@ -6153,11 +6156,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 									coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 									last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 										coeff_type[e][coeff_index] = 0;
-										last_coeffcients[e] = cur_coeff;	
+										last_coeffcients[e] = cur_coeff;
 										coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-									}					
+									}
 								}
 								else{
 									coeff_type[e][coeff_index] = 0;
@@ -6178,7 +6181,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							for(size_t jj=0; jj<current_blockcount_y; jj++){
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabs(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -6186,29 +6189,29 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>realPrecision){	
+										if(fabs(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif									
-									
+#endif
+
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
 										pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos += dim1_offset - current_blockcount_z;
@@ -6222,7 +6225,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							for(size_t jj=0; jj<current_blockcount_y; jj++){
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabs(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -6230,11 +6233,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>realPrecision){	
+										if(fabs(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
@@ -6246,7 +6249,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif		
+#endif
 
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
@@ -6263,7 +6266,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 						unpredictable_count = block_unpredictable_count;
 						strip_unpredictable_count += unpredictable_count;
 						unpredictable_data += unpredictable_count;
-						
+
 						reg_count ++;
 					}
 					else{
@@ -6298,11 +6301,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 											*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 											if(type[index] <= intvRadius) type[index] -= 1;
 											//ganrantee comporession error against the case of machine-epsilon
-											if(fabs(curData - *cur_pb_pos)>realPrecision){	
+											if(fabs(curData - *cur_pb_pos)>realPrecision){
 												type[index] = 0;
-												*cur_pb_pos = curData;	
+												*cur_pb_pos = curData;
 												unpredictable_data[unpredictable_count ++] = curData;
-											}					
+											}
 										}
 										else{
 											type[index] = 0;
@@ -6310,13 +6313,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 											unpredictable_data[unpredictable_count ++] = curData;
 										}
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif											
-									
+#endif
+
 									index ++;
 									cur_pb_pos ++;
 									cur_data_pos ++;
@@ -6351,11 +6354,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 											*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 											if(type[index] <= intvRadius) type[index] -= 1;
 											//ganrantee comporession error against the case of machine-epsilon
-											if(fabs(curData - *cur_pb_pos)>realPrecision){	
+											if(fabs(curData - *cur_pb_pos)>realPrecision){
 												type[index] = 0;
-												*cur_pb_pos = curData;	
+												*cur_pb_pos = curData;
 												unpredictable_data[unpredictable_count ++] = curData;
-											}					
+											}
 										}
 										else{
 											type[index] = 0;
@@ -6363,14 +6366,14 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 											unpredictable_data[unpredictable_count ++] = curData;
 										}
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t ii = current_blockcount_x - 1;
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif										
-									
+#endif
+
 									next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
 									index ++;
 									cur_pb_pos ++;
@@ -6385,7 +6388,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 						// change indicator
 						indicator_pos[k] = 1;
 					}// end SZ
-					
+
 					reg_params_pos ++;
 					data_pos += current_blockcount_z;
 					pb_pos += current_blockcount_z;
@@ -6435,8 +6438,8 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 					size_t offset_z = 0;
 					offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
 					size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
-#endif							
-					
+#endif
+
 					/*sampling*/
 					{
 						// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
@@ -6450,7 +6453,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
 							err_reg += fabs(pred_reg - curData);
 
@@ -6458,21 +6461,21 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
 							err_reg += fabs(pred_reg - curData);
 						}
@@ -6494,11 +6497,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 									coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 									last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 										coeff_type[e][coeff_index] = 0;
-										last_coeffcients[e] = cur_coeff;	
+										last_coeffcients[e] = cur_coeff;
 										coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-									}					
+									}
 								}
 								else{
 									coeff_type[e][coeff_index] = 0;
@@ -6520,7 +6523,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabs(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -6528,11 +6531,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>realPrecision){	
+										if(fabs(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
@@ -6544,13 +6547,13 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif			
+#endif
 
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
 										pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos += dim1_offset - current_blockcount_z;
@@ -6564,7 +6567,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 							for(size_t jj=0; jj<current_blockcount_y; jj++){
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabs(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -6572,11 +6575,11 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>realPrecision){	
+										if(fabs(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
@@ -6604,7 +6607,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 						}
 						unpredictable_count = block_unpredictable_count;
 						strip_unpredictable_count += unpredictable_count;
-						unpredictable_data += unpredictable_count;						
+						unpredictable_data += unpredictable_count;
 						reg_count ++;
 					}
 					else{
@@ -6631,24 +6634,24 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 										type[index] = (int) (itvNum/2) + intvRadius;
 										*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - *cur_pb_pos)>realPrecision){	
+										if(fabs(curData - *cur_pb_pos)>realPrecision){
 											type[index] = 0;
-											*cur_pb_pos = curData;	
+											*cur_pb_pos = curData;
 											unpredictable_data[unpredictable_count ++] = curData;
-										}					
+										}
 									}
 									else{
 										type[index] = 0;
 										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif	
-									
+#endif
+
 									index ++;
 									cur_pb_pos ++;
 									cur_data_pos ++;
@@ -6675,25 +6678,25 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 										type[index] = (int) (itvNum/2) + intvRadius;
 										*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - *cur_pb_pos)>realPrecision){	
+										if(fabs(curData - *cur_pb_pos)>realPrecision){
 											type[index] = 0;
-											*cur_pb_pos = curData;	
+											*cur_pb_pos = curData;
 											unpredictable_data[unpredictable_count ++] = curData;
-										}					
+										}
 									}
 									else{
 										type[index] = 0;
 										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t ii = current_blockcount_x - 1;
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif											
-									
+#endif
+
 									// assign value to next prediction buffer
 									next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
 									index ++;
@@ -6709,7 +6712,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 						// change indicator
 						indicator_pos[k] = 1;
 					}// end SZ
-					
+
 					reg_params_pos ++;
 					data_pos += current_blockcount_z;
 					pb_pos += current_blockcount_z;
@@ -6741,7 +6744,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	init(huffmanTree, result_type, num_elements);
 	size_t i = 0;
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -6752,9 +6755,9 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int)+ num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(double) + total_unpred * sizeof(double) + num_elements * sizeof(int), 1);
 	unsigned char * result_pos = result;
 	initRandomAccessBytes(result_pos);
-	
+
 	result_pos += meta_data_offset;
-	
+
 	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
 	result_pos += exe_params->SZ_SIZE_TYPE;
 
@@ -6778,7 +6781,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	result_pos += sizeof(double);
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
+
 	//convert the lead/mid/resi to byte stream
 	if(reg_count > 0){
 		for(int e=0; e<4; e++){
@@ -6788,7 +6791,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -6800,7 +6803,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -6816,7 +6819,7 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	}
 	free(coeff_result_type);
 	free(coeff_unpredictable_data);
-	
+
 	//record the number of unpredictable data and also store them
 	memcpy(result_pos, &total_unpred, sizeof(size_t));
 	result_pos += sizeof(size_t);
@@ -6835,8 +6838,8 @@ unsigned char * SZ_compress_double_3D_MDQ_nonblocked_with_blocked_regression(dou
 	writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount);
 	writeBlockInfo(use_mean, block_size, reg_count, num_blocks);
 	writeUnpredictDataCounts(total_unpred, num_elements);
-#endif	
-	
+#endif
+
 	SZ_ReleaseHuffman(huffmanTree);
 	*comp_size = totalEncodeSize;
 	return result;
diff --git a/sz/src/sz_double_pwr.c b/sz/src/sz_double_pwr.c
index dc037db7..d3699165 100644
--- a/sz/src/sz_double_pwr.c
+++ b/sz/src/sz_double_pwr.c
@@ -9,10 +9,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -28,7 +31,7 @@
 void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, double* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision)
 {
 	size_t i = 0, j = 0, k = 0;
-	double realPrecision = oriData[0]!=0?fabs(confparams_cpr->pw_relBoundRatio*oriData[0]):confparams_cpr->pw_relBoundRatio; 
+	double realPrecision = oriData[0]!=0?fabs(confparams_cpr->pw_relBoundRatio*oriData[0]):confparams_cpr->pw_relBoundRatio;
 	double approxPrecision;
 	unsigned char realPrecBytes[8];
 	double curPrecision;
@@ -43,14 +46,14 @@ void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, do
 			if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
 			{
 				realPrecision = sum/confparams_cpr->segment_size;
-				sum = 0;			
+				sum = 0;
 			}
 			realPrecision *= confparams_cpr->pw_relBoundRatio;
 			if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-				realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+				realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 			else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
 				realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
-				
+
 			doubleToBytes(realPrecBytes, realPrecision);
 			memset(&realPrecBytes[2], 0, 6);
 			approxPrecision = bytesToDouble(realPrecBytes);
@@ -59,26 +62,26 @@ void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, do
 			//put the two bytes in pwrErrBoundBytes
 			pwrErrBoundBytes[k++] = realPrecBytes[0];
 			pwrErrBoundBytes[k++] = realPrecBytes[1];
-			
+
 			realPrecision = fabs(curValue);
 		}
-		
+
 		if(curValue!=0)
 		{
 			curPrecision = fabs(curValue);
-			
+
 			switch(confparams_cpr->pwr_type)
 			{
-			case SZ_PWR_MIN_TYPE: 
+			case SZ_PWR_MIN_TYPE:
 				if(realPrecision>curPrecision)
-					realPrecision = curPrecision;	
+					realPrecision = curPrecision;
 				break;
 			case SZ_PWR_AVG_TYPE:
 				sum += curPrecision;
 				break;
 			case SZ_PWR_MAX_TYPE:
 				if(realPrecision<curPrecision)
-					realPrecision = curPrecision;					
+					realPrecision = curPrecision;
 				break;
 			}
 		}
@@ -86,12 +89,12 @@ void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, do
 	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
 	{
 		int size = dataLength%confparams_cpr->segment_size==0?confparams_cpr->segment_size:dataLength%confparams_cpr->segment_size;
-		realPrecision = sum/size;		
-	}	
+		realPrecision = sum/size;
+	}
 	if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 	else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
-		realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;	
+		realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
 	doubleToBytes(realPrecBytes, realPrecision);
 	memset(&realPrecBytes[2], 0, 6);
 	approxPrecision = bytesToDouble(realPrecBytes);
@@ -103,9 +106,9 @@ void compute_segment_precisions_double_1D(double *oriData, size_t dataLength, do
 }
 
 unsigned int optimize_intervals_double_1D_pwr(double *oriData, size_t dataLength, double* pwrErrBound)
-{	
+{
 	size_t i = 0, j = 0;
-	double realPrecision = pwrErrBound[j++];	
+	double realPrecision = pwrErrBound[j++];
 	unsigned long radiusIndex;
 	double pred_value = 0, pred_err;
 	int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
@@ -122,7 +125,7 @@ unsigned int optimize_intervals_double_1D_pwr(double *oriData, size_t dataLength
 			pred_err = fabs(pred_value - oriData[i]);
 			radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -139,25 +142,25 @@ unsigned int optimize_intervals_double_1D_pwr(double *oriData, size_t dataLength
 		i = confparams_cpr->maxRangeRadius-1;
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
-void compute_segment_precisions_double_2D(double *oriData, double* pwrErrBound, 
+void compute_segment_precisions_double_2D(double *oriData, double* pwrErrBound,
 size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundBytes, double Min, double Max, double globalPrecision)
 {
 	size_t i = 0, j = 0, k = 0, p = 0, index = 0, J = 0; //I=-1,J=-1 if they are needed
-	double realPrecision; 
+	double realPrecision;
 	double approxPrecision;
 	unsigned char realPrecBytes[8];
 	double curValue, curAbsValue;
-	double* statAbsValues = (double*)malloc(R2*sizeof(double));	
-	
+	double* statAbsValues = (double*)malloc(R2*sizeof(double));
+
 	double max = fabs(Min)<fabs(Max)?fabs(Max):fabs(Min); //get the max abs value.
 	double min = fabs(Min)<fabs(Max)?fabs(Min):fabs(Max);
 	for(i=0;i<R2;i++)
@@ -174,7 +177,7 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 		for(j=0;j<r2;j++)
 		{
 			index = i*r2+j;
-			curValue = oriData[index];				
+			curValue = oriData[index];
 			if(((i%edgeSize==edgeSize-1 || i==r1-1) &&j%edgeSize==0&&j>0) || (i%edgeSize==0&&j==0&&i>0))
 			{
 				if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
@@ -182,7 +185,7 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 					int a = edgeSize, b = edgeSize;
 					if(j==0)
 					{
-						if(r2%edgeSize==0) 
+						if(r2%edgeSize==0)
 							b = edgeSize;
 						else
 							b = r2%edgeSize;
@@ -200,10 +203,10 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 					realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J];
 
 				if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-					realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+					realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 				else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
 					realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
-					
+
 				doubleToBytes(realPrecBytes, realPrecision);
 				memset(&realPrecBytes[2], 0, 6);
 				approxPrecision = bytesToDouble(realPrecBytes);
@@ -211,45 +214,45 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 				pwrErrBound[p++] = approxPrecision;
 				//put the two bytes in pwrErrBoundBytes
 				pwrErrBoundBytes[k++] = realPrecBytes[0];
-				pwrErrBoundBytes[k++] = realPrecBytes[1];	
-				
+				pwrErrBoundBytes[k++] = realPrecBytes[1];
+
 				if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE)
 					statAbsValues[J] = max;
 				else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
 					statAbsValues[J] = min;
 				else
-					statAbsValues[J] = 0; //for SZ_PWR_AVG_TYPE		
-			}	
+					statAbsValues[J] = 0; //for SZ_PWR_AVG_TYPE
+			}
 			if(j==0)
 				J = 0;
 			else if(j%edgeSize==0)
-				J++;			
+				J++;
 			if(curValue!=0)
 			{
 				curAbsValue = fabs(curValue);
-				
+
 				switch(confparams_cpr->pwr_type)
 				{
-				case SZ_PWR_MIN_TYPE: 
+				case SZ_PWR_MIN_TYPE:
 					if(statAbsValues[J]>curAbsValue)
-						statAbsValues[J] = curAbsValue;	
+						statAbsValues[J] = curAbsValue;
 					break;
 				case SZ_PWR_AVG_TYPE:
 					statAbsValues[J] += curAbsValue;
 					break;
 				case SZ_PWR_MAX_TYPE:
 					if(statAbsValues[J]<curAbsValue)
-						statAbsValues[J] = curAbsValue;					
+						statAbsValues[J] = curAbsValue;
 					break;
 				}
 			}
 		}
 	}
-		
+
 	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
 	{
 		int a = edgeSize, b = edgeSize;
-		if(r2%edgeSize==0) 
+		if(r2%edgeSize==0)
 			b = edgeSize;
 		else
 			b = r2%edgeSize;
@@ -260,13 +263,13 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 		realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]/(a*b);
 	}
 	else
-		realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J];		
+		realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J];
 
 	if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 	else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
 		realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
-		
+
 	doubleToBytes(realPrecBytes, realPrecision);
 	realPrecBytes[2] = realPrecBytes[3] = 0;
 	approxPrecision = bytesToDouble(realPrecBytes);
@@ -274,15 +277,15 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 	pwrErrBound[p++] = approxPrecision;
 	//put the two bytes in pwrErrBoundBytes
 	pwrErrBoundBytes[k++] = realPrecBytes[0];
-	pwrErrBoundBytes[k++] = realPrecBytes[1];	
-	
+	pwrErrBoundBytes[k++] = realPrecBytes[1];
+
 	free(statAbsValues);
 }
 
 unsigned int optimize_intervals_double_2D_pwr(double *oriData, size_t r1, size_t r2, size_t R2, size_t edgeSize, double* pwrErrBound)
-{	
+{
 	size_t i = 0,j = 0, index, I=0, J=0;
-	double realPrecision = pwrErrBound[0];	
+	double realPrecision = pwrErrBound[0];
 	unsigned long radiusIndex;
 	double pred_value = 0, pred_err;
 	int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
@@ -293,7 +296,7 @@ unsigned int optimize_intervals_double_2D_pwr(double *oriData, size_t r1, size_t
 	{
 		ir2 = i*r2;
 		if(i%edgeSize==0)
-		{	
+		{
 			I++;
 			J = 0;
 		}
@@ -302,7 +305,7 @@ unsigned int optimize_intervals_double_2D_pwr(double *oriData, size_t r1, size_t
 			index = ir2+j;
 			if(j%edgeSize==0)
 				J++;
-				
+
 			if((i+j)%confparams_cpr->sampleDistance==0)
 			{
 				realPrecision = pwrErrBound[I*R2+J];
@@ -312,7 +315,7 @@ unsigned int optimize_intervals_double_2D_pwr(double *oriData, size_t r1, size_t
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -337,18 +340,18 @@ unsigned int optimize_intervals_double_2D_pwr(double *oriData, size_t r1, size_t
 	return powerOf2;
 }
 
-void compute_segment_precisions_double_3D(double *oriData, double* pwrErrBound, 
+void compute_segment_precisions_double_3D(double *oriData, double* pwrErrBound,
 size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned char* pwrErrBoundBytes, double Min, double Max, double globalPrecision)
 {
 	size_t i = 0, j = 0, k = 0, p = 0, q = 0, index = 0, J = 0, K = 0; //I=-1,J=-1 if they are needed
 	size_t r23 = r2*r3, ir, jr;
-	double realPrecision; 
+	double realPrecision;
 	double approxPrecision;
 	unsigned char realPrecBytes[8];
 	double curValue, curAbsValue;
-	
+
 	double** statAbsValues = create2DArray_double(R2, R3);
-	double max = fabs(Min)<fabs(Max)?fabs(Max):fabs(Min); //get the max abs value.	
+	double max = fabs(Min)<fabs(Max)?fabs(Max):fabs(Min); //get the max abs value.
 	double min = fabs(Min)<fabs(Max)?fabs(Min):fabs(Max);
 	for(i=0;i<R2;i++)
 		for(j=0;j<R3;j++)
@@ -362,7 +365,7 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 		}
 	for(i=0;i<r1;i++)
 	{
-		ir = i*r23;		
+		ir = i*r23;
 		if(i%edgeSize==0&&i>0)
 		{
 			realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K];
@@ -379,7 +382,7 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 				statAbsValues[J][K] = max;
 			else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
 				statAbsValues[J][K] = min;
-		}		
+		}
 		for(j=0;j<r2;j++)
 		{
 			jr = j*r3;
@@ -398,18 +401,18 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 				if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE)
 					statAbsValues[J][K] = max;
 				else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
-					statAbsValues[J][K] = min;			
+					statAbsValues[J][K] = min;
 			}
-			
+
 			if(j==0)
 				J = 0;
 			else if(j%edgeSize==0)
-				J++;					
-			
+				J++;
+
 			for(k=0;k<r3;k++)
 			{
-				index = ir+jr+k;				
-				curValue = oriData[index];				
+				index = ir+jr+k;
+				curValue = oriData[index];
 				if((i%edgeSize==edgeSize-1 || i == r1-1)&&(j%edgeSize==edgeSize-1||j==r2-1)&&k%edgeSize==0&&k>0)
 				{
 					realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K];
@@ -422,18 +425,18 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 					//printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k);
 					pwrErrBoundBytes[q++] = realPrecBytes[0];
 					pwrErrBoundBytes[q++] = realPrecBytes[1];
-					
+
 					if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE)
 						statAbsValues[J][K] = max;
 					else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
-						statAbsValues[J][K] = min;	
-				}	
+						statAbsValues[J][K] = min;
+				}
 
 				if(k==0)
 					K = 0;
 				else if(k%edgeSize==0)
 					K++;
-					
+
 				if(curValue!=0)
 				{
 					curAbsValue = fabs(curValue);
@@ -452,10 +455,10 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 						}
 					}
 				}
-			}			
+			}
 		}
-	}	
-	
+	}
+
 	realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K];
 	doubleToBytes(realPrecBytes, realPrecision);
 	memset(&realPrecBytes[2], 0, 6);
@@ -465,14 +468,14 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 	//put the two bytes in pwrErrBoundBytes
 	pwrErrBoundBytes[q++] = realPrecBytes[0];
 	pwrErrBoundBytes[q++] = realPrecBytes[1];
-	
+
 	free2DArray_double(statAbsValues, R2);
 }
 
 unsigned int optimize_intervals_double_3D_pwr(double *oriData, size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, double* pwrErrBound)
-{	
+{
 	size_t i,j,k, ir,jr,index, I = 0,J=0,K=0;
-	double realPrecision = pwrErrBound[0];		
+	double realPrecision = pwrErrBound[0];
 	unsigned long radiusIndex;
 	size_t r23=r2*r3;
 	size_t R23 = R2*R3;
@@ -484,7 +487,7 @@ unsigned int optimize_intervals_double_3D_pwr(double *oriData, size_t r1, size_t
 	{
 		ir = i*r23;
 		if(i%edgeSize==0)
-		{	
+		{
 			I++;
 			J = 0;
 		}
@@ -492,19 +495,19 @@ unsigned int optimize_intervals_double_3D_pwr(double *oriData, size_t r1, size_t
 		{
 			jr = j*r3;
 			if(j%edgeSize==0)
-			{	
+			{
 				J++;
 				K = 0;
-			}			
+			}
 			for(k=1;k<r3;k++)
 			{
 				index = ir+jr+k;
 				if(k%edgeSize==0)
-					K++;		
+					K++;
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
-					realPrecision = pwrErrBound[I*R23+J*R2+K];					
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					realPrecision = pwrErrBound[I*R23+J*R2+K];
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = fabs(pred_value - oriData[index]);
 					radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
@@ -531,87 +534,87 @@ unsigned int optimize_intervals_double_3D_pwr(double *oriData, size_t r1, size_t
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
-void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, 
+void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision,
 size_t dataLength, size_t *outSize, double min, double max)
 {
 	size_t pwrLength = dataLength%confparams_cpr->segment_size==0?dataLength/confparams_cpr->segment_size:dataLength/confparams_cpr->segment_size+1;
 	double* pwrErrBound = (double*)malloc(sizeof(double)*pwrLength);
 	size_t pwrErrBoundBytes_size = sizeof(unsigned char)*pwrLength*2;
 	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);
-	
+
 	compute_segment_precisions_double_1D(oriData, dataLength, pwrErrBound, pwrErrBoundBytes, globalPrecision);
 
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
-		quantization_intervals = optimize_intervals_double_1D_pwr(oriData, dataLength, pwrErrBound);	
+		quantization_intervals = optimize_intervals_double_1D_pwr(oriData, dataLength, pwrErrBound);
 		updateQuantizationInfo(quantization_intervals);
 	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	size_t i = 0, j = 0;
 	int reqLength;
-	double realPrecision = pwrErrBound[j++];	
+	double realPrecision = pwrErrBound[j++];
 	double medianValue = 0;
 	double radius = fabs(max)<fabs(min)?fabs(min):fabs(max);
 	short radExpo = getExponent_double(radius);
-	
-	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);	
+
+	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *resiBitLengthArray;
 	new_DBA(&resiBitLengthArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
-	
+
 	unsigned char preDataBytes[8] = {0};
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 	double last3CmprsData[3] = {0};
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-						
-	//add the first data	
+
+	//add the first data
 	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
 	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_double(last3CmprsData, vce->data);
-	//printf("%.30G\n",last3CmprsData[0]);	
-		
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	//add the second data
 	type[1] = 0;
-	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);			
+	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
 	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_double(last3CmprsData, vce->data);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius;
 	double curData;
@@ -620,7 +623,7 @@ size_t dataLength, size_t *outSize, double min, double max)
 	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	double interval = 2*realPrecision;
 	int updateReqLength = 0; //a marker: 1 means already updated
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
@@ -633,7 +636,7 @@ size_t dataLength, size_t *outSize, double min, double max)
 		}
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = fabs(curData - pred);	
+		predAbsErr = fabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -647,41 +650,41 @@ size_t dataLength, size_t *outSize, double min, double max)
 				type[i] = exe_params->intvRadius-state;
 				pred = pred - state*interval;
 			}
-			listAdd_double(last3CmprsData, pred);			
+			listAdd_double(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		if(updateReqLength==0)
 		{
-			computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);				
+			computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 			reqBytesLength = reqLength/8;
 			resiBitsLength = reqLength%8;
-			updateReqLength = 1;		
+			updateReqLength = 1;
 		}
-		
+
 		type[i] = 0;
 		addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
-		
+
 		compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 		updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 		memcpy(preDataBytes,vce->curBytes,8);
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 
-		listAdd_double(last3CmprsData, vce->data);	
+		listAdd_double(last3CmprsData, vce->data);
 	}//end of for
-		
+
 //	char* expSegmentsInBytes;
 //	int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes);
 	int exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitLengthArray->array, resiBitLengthArray->size, 
+
+	new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitLengthArray->array, resiBitLengthArray->size,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo);
 
 //sdi:Debug
@@ -693,17 +696,17 @@ size_t dataLength, size_t *outSize, double min, double max)
 //	writeUShortData(type, dataLength, "compressStateBytes.sb");
 //	unsigned short type_[dataLength];
 //	SZ_Reset();
-//	decode_withTree(tdps->typeArray, tdps->typeArray_size, type_);	
+//	decode_withTree(tdps->typeArray, tdps->typeArray_size, type_);
 //	printf("tdps->typeArray_size=%d\n", tdps->typeArray_size);
-		
+
 	//free memory
 	free_DBA(resiBitLengthArray);
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
 	free(type);
-		
+
 	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
-	
+
 	int doubleSize=sizeof(double);
 	if(*outSize>dataLength*doubleSize)
 	{
@@ -711,24 +714,24 @@ size_t dataLength, size_t *outSize, double min, double max)
 		tdps->isLossless = 1;
 		size_t totalByteLength = 3 + exe_params->SZ_SIZE_TYPE + 1 + doubleSize*dataLength;
 		*newByteData = (unsigned char*)malloc(totalByteLength);
-		
+
 		unsigned char dsLengthBytes[exe_params->SZ_SIZE_TYPE];
 		intToBytes_bigEndian(dsLengthBytes, dataLength);//4
 		for (i = 0; i < 3; i++)//3
 			(*newByteData)[k++] = versionNumber[i];
-		
+
 		if(exe_params->SZ_SIZE_TYPE==4)
 		{
-			(*newByteData)[k++] = 16;	//=00010000	
+			(*newByteData)[k++] = 16;	//=00010000
 		}
-		else 
+		else
 		{
 			(*newByteData)[k++] = 80;
 		}
 		for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//4 or 8
 			(*newByteData)[k++] = dsLengthBytes[i];
 
-		
+
 		if(sysEndianType==BIG_ENDIAN_SYSTEM)
 			memcpy((*newByteData)+4+exe_params->SZ_SIZE_TYPE, oriData, dataLength*doubleSize);
 		else
@@ -739,9 +742,9 @@ size_t dataLength, size_t *outSize, double min, double max)
 		}
 		*outSize = totalByteLength;
 	}
-	
+
 	free(pwrErrBound);
-	
+
 	free(vce);
 	free(lce);
 	free_TightDataPointStorageD(tdps);
@@ -750,9 +753,9 @@ size_t dataLength, size_t *outSize, double min, double max)
 
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
 void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, size_t r1, size_t r2,
 size_t *outSize, double min, double max)
@@ -764,7 +767,7 @@ size_t *outSize, double min, double max)
 	double* pwrErrBound = (double*)malloc(sizeof(double)*R1*R2);
 	size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*2;
 	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);
-	
+
 	compute_segment_precisions_double_2D(oriData, pwrErrBound, r1, r2, R2, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision);
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
@@ -773,57 +776,57 @@ size_t *outSize, double min, double max)
 		updateQuantizationInfo(quantization_intervals);
 	}
 	else
-		quantization_intervals = exe_params->intvCapacity;	
+		quantization_intervals = exe_params->intvCapacity;
 	//printf("quantization_intervals=%d\n",quantization_intervals);
-	
-	size_t i=0,j=0,I=0,J=0; 
+
+	size_t i=0,j=0,I=0,J=0;
 	int reqLength;
-	double realPrecision = pwrErrBound[I*R2+J];	
+	double realPrecision = pwrErrBound[I*R2+J];
 	double pred1D, pred2D;
 	double diff = 0.0;
 	double itvNum = 0;
 	double *P0, *P1;
-	
+
 	P0 = (double*)malloc(r2*sizeof(double));
 	memset(P0, 0, r2*sizeof(double));
 	P1 = (double*)malloc(r2*sizeof(double));
 	memset(P1, 0, r2*sizeof(double));
-		
+
 	double medianValue = 0;
 	double radius = fabs(max)<fabs(min)?fabs(min):fabs(max);
 	short radExpo = getExponent_double(radius);
 	int updateReqLength = 1;
-	
+
 	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *resiBitLengthArray;
 	new_DBA(&resiBitLengthArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
-	
+
 	unsigned char preDataBytes[8];
 	longToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-			
+
 	/* Process Row-0 data 0*/
 	type[0] = 0;
 	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -846,7 +849,7 @@ size_t *outSize, double min, double max)
 		P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
 	}
 	else
-	{		
+	{
 		type[1] = 0;
 
 		addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -902,7 +905,7 @@ size_t *outSize, double min, double max)
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		J = 0;
@@ -910,7 +913,7 @@ size_t *outSize, double min, double max)
 			I++;
 		realPrecision = pwrErrBound[I*R2+J]; //J==0
 		updateReqLength = 0;
-		
+
 		pred1D = P1[0];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -931,7 +934,7 @@ size_t *outSize, double min, double max)
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
 			}
-			
+
 			type[index] = 0;
 
 			addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -941,7 +944,7 @@ size_t *outSize, double min, double max)
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -990,62 +993,62 @@ size_t *outSize, double min, double max)
 		P1 = P0;
 		P0 = Pt;
 	}
-		
-	if(r2!=1)	
+
+	if(r2!=1)
 		free(P0);
 	free(P1);
 	int exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitLengthArray->array, resiBitLengthArray->size, 
+
+	new_TightDataPointStorageD2(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitLengthArray->array, resiBitLengthArray->size,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo);
-	
+
 	//free memory
 	free_DBA(resiBitLengthArray);
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
 	free(type);
-		
+
 	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
 
 	free(pwrErrBound);
-	
+
 	free(vce);
 	free(lce);
-	free_TightDataPointStorageD(tdps);	
+	free_TightDataPointStorageD(tdps);
 	free(exactMidByteArray);
 }
 
-void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision, 
+void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, double *oriData, double globalPrecision,
 size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 {
 	size_t dataLength=r1*r2*r3;
-	
+
 	int blockEdgeSize = computeBlockEdgeSize_3D(confparams_cpr->segment_size);
 	size_t R1 = 1+(r1-1)/blockEdgeSize;
 	size_t R2 = 1+(r2-1)/blockEdgeSize;
 	size_t R3 = 1+(r3-1)/blockEdgeSize;
 	double* pwrErrBound = (double*)malloc(sizeof(double)*R1*R2*R3);
 	size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*R3*2;
-	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);	
-	
-	compute_segment_precisions_double_3D(oriData, pwrErrBound, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision);	
+	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);
+
+	compute_segment_precisions_double_3D(oriData, pwrErrBound, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision);
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_double_3D_pwr(oriData, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBound);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	size_t i=0,j=0,k=0, I = 0, J = 0, K = 0;
 	int reqLength;
-	double realPrecision = pwrErrBound[0];		
+	double realPrecision = pwrErrBound[0];
 	double pred1D, pred2D, pred3D;
 	double diff = 0.0;
 	double itvNum = 0;
@@ -1059,13 +1062,13 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 	double medianValue = 0;
 	short radExpo = getExponent_double(radius);
 	int updateReqLength = 0;
-	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
 
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *resiBitLengthArray;
 	new_DBA(&resiBitLengthArray, DynArrayInitLen);
 
@@ -1120,8 +1123,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 			reqBytesLength = reqLength/8;
 			resiBitsLength = reqLength%8;
 			updateReqLength = 1;
-		}		
-		
+		}
+
 		type[1] = 0;
 
 		addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1140,7 +1143,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 			J++;
 			realPrecision = pwrErrBound[J];
 			updateReqLength = 0;
-		}		
+		}
 		pred1D = 2*P1[j-1] - P1[j-2];
 		diff = spaceFillingValue[j] - pred1D;
 
@@ -1160,7 +1163,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 				reqBytesLength = reqLength/8;
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
-			}			
+			}
 
 			type[j] = 0;
 
@@ -1179,7 +1182,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 
 		J = 0;
 		if(i%blockEdgeSize==0)
@@ -1206,8 +1209,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 				reqBytesLength = reqLength/8;
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
-			}		
-						
+			}
+
 			type[index] = 0;
 
 			addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1221,14 +1224,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 		/* Process row-i data 1 --> data r3-1*/
 		for (j = 1; j < r3; j++) //note that this j refers to fastest dimension (lowest order)
 		{
-			index = i*r3+j;		
+			index = i*r3+j;
 			if(j%blockEdgeSize==0)
 			{
 				J++;
 				realPrecision = pwrErrBound[I*R3+J];
 				updateReqLength = 0;
-			}			
-		
+			}
+
 			pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
 
 			diff = spaceFillingValue[index] - pred2D;
@@ -1249,8 +1252,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 					reqBytesLength = reqLength/8;
 					resiBitsLength = reqLength%8;
 					updateReqLength = 1;
-				}						
-				
+				}
+
 				type[index] = 0;
 
 				addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1269,14 +1272,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 	for (k = 1; k < r1; k++)
 	{
 		/* Process Row-0 data 0*/
-		index = k*r23;			
+		index = k*r23;
 		I = 0;
 		J = 0;
 		if(k%blockEdgeSize==0)
 			K++;
 		realPrecision = pwrErrBound[K*R23]; //J==0
 		updateReqLength = 0;
-		
+
 		pred1D = P1[0];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -1296,8 +1299,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 				reqBytesLength = reqLength/8;
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
-			}					
-			
+			}
+
 			type[index] = 0;
 
 			addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1312,14 +1315,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 	    /* Process Row-0 data 1 --> data r3-1 */
 		for (j = 1; j < r3; j++)
 		{
-			index = k*r23+j;	
+			index = k*r23+j;
 
 			if(j%blockEdgeSize==0)
 			{
 				J++;
 				realPrecision = pwrErrBound[K*R23+J];
-				updateReqLength = 0;			
-			}					
+				updateReqLength = 0;
+			}
 			pred2D = P0[j-1] + P1[j] - P1[j-1];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -1339,8 +1342,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 					reqBytesLength = reqLength/8;
 					resiBitsLength = reqLength%8;
 					updateReqLength = 1;
-				}						
-				
+				}
+
 				type[index] = 0;
 
 				addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1363,9 +1366,9 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 			if(i%blockEdgeSize==0)
 				I++;
 			realPrecision = pwrErrBound[K*R23+I*R3+J]; //J==0
-			updateReqLength = 0;			
-			
-			index2D = i*r3;		
+			updateReqLength = 0;
+
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -1385,8 +1388,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 					reqBytesLength = reqLength/8;
 					resiBitsLength = reqLength%8;
 					updateReqLength = 1;
-				}						
-				
+				}
+
 				type[index] = 0;
 
 				addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1401,13 +1404,13 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 			for (j = 1; j < r3; j++)
 			{
 				index = k*r23 + i*r3 + j;
-				
+
 				if(j%blockEdgeSize==0)
 				{
 					J++;
 					realPrecision = pwrErrBound[K*R23+I*R3+J];
-					updateReqLength = 0;			
-				}							
+					updateReqLength = 0;
+				}
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
 				diff = spaceFillingValue[index] - pred3D;
@@ -1428,8 +1431,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 						reqBytesLength = reqLength/8;
 						resiBitsLength = reqLength%8;
 						updateReqLength = 1;
-					}							
-					
+					}
+
 					type[index] = 0;
 
 					addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1458,7 +1461,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, double min, double max)
 			type, exactMidByteArray->array, exactMidByteArray->size,
 			exactLeadNumArray->array,
 			resiBitArray->array, resiBitArray->size,
-			resiBitLengthArray->array, resiBitLengthArray->size, 
+			resiBitLengthArray->array, resiBitLengthArray->size,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo);
 
 	convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
@@ -1503,24 +1506,24 @@ void compressGroupIDArray_double(char* groupID, TightDataPointStorageD* tdps)
 	for(i=1; i<dataLength;i++)
 	{
 		curGroupIDValue = groupID[i];
-		standGroupID[i] = (curGroupIDValue - lastGroupIDValue) + offset; 
+		standGroupID[i] = (curGroupIDValue - lastGroupIDValue) + offset;
 		lastGroupIDValue = curGroupIDValue;
 	}
-	
+
 	unsigned char* out = NULL;
 	size_t outSize;
-	
+
 	HuffmanTree* huffmanTree = SZ_Reset();
 	encode_withTree(huffmanTree, standGroupID, dataLength, &out, &outSize);
 	SZ_ReleaseHuffman(huffmanTree);
-	
+
 	tdps->pwrErrBoundBytes = out; //groupIDArray
 	tdps->pwrErrBoundBytes_size = outSize;
-	
+
 	free(standGroupID);
 }
 
-TightDataPointStorageD* SZ_compress_double_1D_MDQ_pwrGroup(double* oriData, size_t dataLength, int errBoundMode, 
+TightDataPointStorageD* SZ_compress_double_1D_MDQ_pwrGroup(double* oriData, size_t dataLength, int errBoundMode,
 double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f)
 {
 	size_t i;
@@ -1543,52 +1546,52 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeS
 	getPrecisionReqLength_double(realPrecision);
 	short radExpo = getExponent_double(valueRangeSize/2);
 	short lastGroupNum = 0, groupNum, grpNum = 0;
-	
+
 	double* groupErrorBounds = generateGroupErrBounds(errBoundMode, realPrecision, pwrErrRatio);
 	exe_params->intvRadius = generateGroupMaxIntervalCount(groupErrorBounds);
-	
+
 	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	char *groupID = (char*) malloc(dataLength*sizeof(char));
 	char *gp = groupID;
-		
-	double* spaceFillingValue = oriData; 
-	
+
+	double* spaceFillingValue = oriData;
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	unsigned char preDataBytes[8];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-			
+
 	int state;
 	double curData, decValue;
 	double pred;
 	double predAbsErr;
 	double interval = 0;
-	
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-	
+
 	curData = spaceFillingValue[0];
 	groupNum = computeGroupNum_double(vce->data);
-	
+
 	if(curData > 0 && groupNum >= 0)
 	{
 		groups = posGroups;
@@ -1613,17 +1616,17 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeS
 		flags = &neg_01_flag;
 		grpNum = 0;
 	}
-		
+
 	listAdd_double_group(groups, flags, groupNum, spaceFillingValue[0], vce->data, gp);
 	gp++;
-	
+
 	for(i=1;i<dataLength;i++)
 	{
 		curData = oriData[i];
 		//printf("i=%d, posGroups[3]=%f, negGroups[3]=%f\n", i, posGroups[3], negGroups[3]);
-		
+
 		groupNum = computeGroupNum_double(curData);
-		
+
 		if(curData > 0 && groupNum >= 0)
 		{
 			groups = posGroups;
@@ -1656,25 +1659,25 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeS
 			updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 			memcpy(preDataBytes,vce->curBytes,8);
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-			listAdd_double_group(groups, flags, lastGroupNum, curData, vce->data, gp);	//set the group number to be last one in order to get the groupID array as smooth as possible.		
+			listAdd_double_group(groups, flags, lastGroupNum, curData, vce->data, gp);	//set the group number to be last one in order to get the groupID array as smooth as possible.
 		}
 		else if(flags[grpNum]==0) //the dec value may not be in the same group
-		{	
+		{
 			type[i] = 0;
 			compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 			updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 			memcpy(preDataBytes,vce->curBytes,8);
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			//decGroupNum = computeGroupNum_double(vce->data);
-			
+
 			//if(decGroupNum < groupNum)
 			//	decValue = curData>0?pow(2, groupNum):-pow(2, groupNum);
 			//else if(decGroupNum > groupNum)
 			//	decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1);
 			//else
 			//	decValue = vce->data;
-			
-			decValue = vce->data;	
+
+			decValue = vce->data;
 			listAdd_double_group(groups, flags, groupNum, curData, decValue, gp);
 			lastGroupNum = curData>0?groupNum + 2: -(groupNum+2);
 		}
@@ -1696,7 +1699,7 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeS
 				decValue = pred - state*interval;
 			}
 			//decGroupNum = computeGroupNum_double(pred);
-			
+
 			if((decValue>0&&curData<0)||(decValue<0&&curData>=0))
 				decValue = 0;
 			//else
@@ -1706,64 +1709,64 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeS
 			//	else if(decGroupNum > groupNum)
 			//		decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1);
 			//	else
-			//		decValue = pred;				
+			//		decValue = pred;
 			//}
-			
+
 			if(fabs(curData-decValue)>realGroupPrecision)
-			{	
+			{
 				type[i] = 0;
 				compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,8);
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 
-				decValue = vce->data;	
+				decValue = vce->data;
 			}
-			
-			listAdd_double_group(groups, flags, groupNum, curData, decValue, gp);			
-			lastGroupNum = curData>=0?groupNum + 2: -(groupNum+2);			
+
+			listAdd_double_group(groups, flags, groupNum, curData, decValue, gp);
+			lastGroupNum = curData>=0?groupNum + 2: -(groupNum+2);
 		}
-		gp++;	
+		gp++;
 
 	}
-	
+
 	int exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
+
 	//combineTypeAndGroupIDArray(nbBins, dataLength, &type, groupID);
 
-	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
-			realPrecision, medianValue, (char)reqLength, nbBins, NULL, 0, radExpo);	
-	
+	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
+			realPrecision, medianValue, (char)reqLength, nbBins, NULL, 0, radExpo);
+
 	compressGroupIDArray_double(groupID, tdps);
-	
+
 	free(posGroups);
 	free(negGroups);
 	free(posFlags);
 	free(negFlags);
 	free(groupID);
 	free(groupErrorBounds);
-	
+
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
-	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageD(tdps);	
-	
+	free(lce);
+	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageD(tdps);
+
 	return tdps;
 }
 
 void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, double *oriData,
 size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, double valueRangeSize, double medianValue_f, size_t *outSize)
 {
-        TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_pwrGroup(oriData, dataLength, confparams_cpr->errorBoundMode, 
-        absErrBound, relBoundRatio, pwrErrRatio, 
+        TightDataPointStorageD* tdps = SZ_compress_double_1D_MDQ_pwrGroup(oriData, dataLength, confparams_cpr->errorBoundMode,
+        absErrBound, relBoundRatio, pwrErrRatio,
         valueRangeSize, medianValue_f);
 
         convertTDPStoFlatBytes_double(tdps, newByteData, outSize);
@@ -1805,7 +1808,7 @@ void SZ_compress_args_double_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newBy
 	}
 
 	double valueRangeSize, medianValue_f;
-	computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);	
+	computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);
 	if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
 	double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16;
 	for(size_t i=0; i<dataLength; i++){
@@ -1866,7 +1869,7 @@ void SZ_compress_args_double_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newBy
 	}
 
 	double valueRangeSize, medianValue_f;
-	computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);	
+	computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);
 	if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
 	double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16;
 	for(size_t i=0; i<dataLength; i++){
@@ -1928,7 +1931,7 @@ void SZ_compress_args_double_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newBy
 	}
 
 	double valueRangeSize, medianValue_f;
-	computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);	
+	computeRangeSize_double(log_data, dataLength, &valueRangeSize, &medianValue_f);
 	if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
 	double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 2.23e-16;
 	for(size_t i=0; i<dataLength; i++){
diff --git a/sz/src/sz_double_ts.c b/sz/src/sz_double_ts.c
index 3c9b184e..e83a27c6 100644
--- a/sz/src/sz_double_ts.c
+++ b/sz/src/sz_double_ts.c
@@ -2,16 +2,19 @@
  *  @file sz_double_ts.c
  *  @author Sheng Di and Dingwen Tao
  *  @date Aug, 2016
- *  @brief 
+ *  @brief
  *  (C) 2016 by Mathematics and Computer Science (MCS), Argonne National Laboratory.
  *      See COPYRIGHT in top-level directory.
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -23,7 +26,7 @@
 #include "sz_double_ts.h"
 
 unsigned int optimize_intervals_double_1D_ts(double *oriData, size_t dataLength, double* preData, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	double pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -37,7 +40,7 @@ unsigned int optimize_intervals_double_1D_ts(double *oriData, size_t dataLength,
 			pred_err = fabs(pred_value - oriData[i]);
 			radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -52,13 +55,13 @@ unsigned int optimize_intervals_double_1D_ts(double *oriData, size_t dataLength,
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	return powerOf2;
 }
@@ -71,59 +74,59 @@ double realPrecision, double valueRangeSize, double medianValue_d)
 	//double* decData = (double*)malloc(sizeof(double)*dataLength);
 	//memset(decData, 0, sizeof(double)*dataLength);
 	double* decData = preStepData;
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 		quantization_intervals = optimize_intervals_double_1D_ts(oriData, dataLength, preStepData, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 
 	size_t i;
 	int reqLength;
 	double medianValue = medianValue_d;
 	short radExpo = getExponent_double(valueRangeSize/2);
 
-	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_double(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	double* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
 
 	unsigned char preDataBytes[8];
 	longToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	DoubleValueCompressElement *vce = (DoubleValueCompressElement*)malloc(sizeof(DoubleValueCompressElement));
-	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));			
-				
-	//add the first data	
+	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
+
+	//add the first data
 	type[0] = 0;
 	compressSingleDoubleValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	decData[0] = vce->data;
-		
+
 	//add the second data
 	type[1] = 0;
 	compressSingleDoubleValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,8);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-	decData[1] = vce->data;	
-	
+	decData[1] = vce->data;
+
 	int state = 0;
 	double checkRadius = 0;
 	double curData = 0;
@@ -136,7 +139,7 @@ double realPrecision, double valueRangeSize, double medianValue_d)
 	{
 		curData = spaceFillingValue[i];
 		pred = preStepData[i];
-		predAbsErr = fabs(curData - pred);	
+		predAbsErr = fabs(curData - pred);
 		if(predAbsErr<=checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -150,41 +153,41 @@ double realPrecision, double valueRangeSize, double medianValue_d)
 				type[i] = exe_params->intvRadius-state;
 				pred = pred - state*interval;
 			}
-			decData[i] = pred;	
+			decData[i] = pred;
 			continue;
 		}
-		
+
 		//unpredictable data processing
-		type[i] = 0;		
+		type[i] = 0;
 		compressSingleDoubleValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 		updateLossyCompElement_Double(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 		memcpy(preDataBytes,vce->curBytes,8);
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		decData[i] = vce->data;
 	}//end of for
-		
+
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageD* tdps;
-			
-	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
+
+	new_TightDataPointStorageD(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
-	
+
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
+	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
-		
+
 	//memcpy(preStepData, decData, dataLength*sizeof(double)); //update the data
 	//free(decData);
-	
+
 	return tdps;
 }
 
diff --git a/sz/src/sz_float.c b/sz/src/sz_float.c
index 6c851c51..7988ef50 100644
--- a/sz/src/sz_float.c
+++ b/sz/src/sz_float.c
@@ -8,11 +8,14 @@
  */
 
 
+#include "config.h"
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -46,10 +49,10 @@ void computeReqLength_float(double realPrecision, short radExpo, int* reqLength,
 	if(*reqLength<9)
 		*reqLength = 9;
 	if(*reqLength>32)
-	{	
+	{
 		*reqLength = 32;
 		*medianValue = 0;
-	}			
+	}
 }
 
 inline short computeReqLength_float_MSST19(double realPrecision)
@@ -59,7 +62,7 @@ inline short computeReqLength_float_MSST19(double realPrecision)
 }
 
 unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	float pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -74,7 +77,7 @@ unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, doub
 			pred_err = fabs(pred_value - oriData[i]);
 			radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -89,20 +92,20 @@ unsigned int optimize_intervals_float_1D(float *oriData, size_t dataLength, doub
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	float pred_value = 0, pred_err;
@@ -129,7 +132,7 @@ unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, d
 
 			//	if (max < oriData[index]) max = oriData[index];
 			//	if (min > oriData[index]) min = oriData[index];
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -193,7 +196,7 @@ unsigned int optimize_intervals_float_2D(float *oriData, size_t r1, size_t r2, d
 }
 
 unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -210,11 +213,11 @@ unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, s
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = fabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -247,7 +250,7 @@ unsigned int optimize_intervals_float_3D(float *oriData, size_t r1, size_t r2, s
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	//	struct timeval costStart, costEnd;
 	//	double cost_est = 0;
 	//
@@ -347,65 +350,65 @@ unsigned int optimize_intervals_float_4D(float *oriData, size_t r1, size_t r2, s
 	return powerOf2;
 }
 
-TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData, 
+TightDataPointStorageF* SZ_compress_float_1D_MDQ(float *oriData,
 size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_f)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
-#endif	
-	
+#endif
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 		quantization_intervals = optimize_intervals_float_1D_opt(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	//updateQuantizationInfo(quantization_intervals);	
+	//updateQuantizationInfo(quantization_intervals);
 	int intvRadius = quantization_intervals/2;
 
 	size_t i;
 	int reqLength;
 	float medianValue = medianValue_f;
 	short radExpo = getExponent_float(valueRangeSize/2);
-	
-	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);	
+
+	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	float* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 	float last3CmprsData[3] = {0};
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_float(last3CmprsData, vce->data);
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif		
-		
+#endif
+
 	//add the second data
 	type[1] = 0;
 	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
@@ -413,7 +416,7 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_float(last3CmprsData, vce->data);
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = vce->data;
 #endif
@@ -424,15 +427,15 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
 	float predAbsErr;
 	checkRadius = (quantization_intervals-1)*realPrecision;
 	float interval = 2*realPrecision;
-	
+
 	float recip_precision = 1/realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
-	{	
+	{
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		//pred = last3CmprsData[0];
-		predAbsErr = fabsf(curData - pred);	
+		predAbsErr = fabsf(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = ((int)(predAbsErr*recip_precision+1))>>1;
@@ -446,36 +449,36 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
 				type[i] = intvRadius-state;
 				pred = pred - state*interval;
 			}
-				
-			//double-check the prediction error in case of machine-epsilon impact	
+
+			//double-check the prediction error in case of machine-epsilon impact
 			if(fabs(curData-pred)>realPrecision)
-			{	
-				type[i] = 0;				
+			{
+				type[i] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);		
-				
-				//listAdd_float(last3CmprsData, vce->data);	
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+				//listAdd_float(last3CmprsData, vce->data);
 				pred = vce->data;
-#ifdef HAVE_TIMECMPR					
+#ifdef HAVE_TIMECMPR
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					decData[i] = vce->data;
-#endif					
+#endif
 			}
 			else
 			{
 				//listAdd_float(last3CmprsData, pred);
-#ifdef HAVE_TIMECMPR					
+#ifdef HAVE_TIMECMPR
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
-					decData[i] = pred;			
-#endif	
-			}	
+					decData[i] = pred;
+#endif
+			}
 			continue;
 		}
-		
-		//unpredictable data processing		
-		type[i] = 0;		
+
+		//unpredictable data processing
+		type[i] = 0;
 		compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 		updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 		memcpy(preDataBytes,vce->curBytes,4);
@@ -486,20 +489,20 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
 #ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[i] = vce->data;
-#endif	
-		
+#endif
+
 	}//end of for
-		
+
 //	char* expSegmentsInBytes;
 //	int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes);
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
+
+	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
 			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 
@@ -508,26 +511,26 @@ size_t dataLength, float realPrecision, float valueRangeSize, float medianValue_
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%zu, sum=%d\n",quantization_intervals, exactDataNum, sum);
-*/	
+*/
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
+	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
 void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, unsigned char** newByteData, size_t *outSize)
-{	
-	int floatSize=sizeof(float);	
+{
+	int floatSize=sizeof(float);
 	size_t k = 0, i;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength;
 	/*No need to malloc because newByteData should always already be allocated with no less totalByteLength.*/
 	//*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -536,14 +539,14 @@ void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, unsi
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;	
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*floatSize);
 	else
@@ -551,15 +554,15 @@ void SZ_compress_args_float_StoreOriData(float* oriData, size_t dataLength, unsi
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=floatSize)
 			floatToBytes(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-char SZ_compress_args_float_NoCkRngeNoGzip_1D(int cmprType, unsigned char** newByteData, float *oriData, 
+char SZ_compress_args_float_NoCkRngeNoGzip_1D(int cmprType, unsigned char** newByteData, float *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f)
-{		
-	char compressionType = 0;	
-	TightDataPointStorageF* tdps = NULL;	
+{
+	char compressionType = 0;
+	TightDataPointStorageF* tdps = NULL;
 
 #ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
@@ -570,10 +573,10 @@ size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize,
 			if(timestep % confparams_cpr->snapshotCmprStep != 0)
 			{
 				tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
-				compressionType = 1; //time-series based compression 
+				compressionType = 1; //time-series based compression
 			}
 			else
-			{	
+			{
 				tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
 				compressionType = 0; //snapshot-based compression
 				multisteps->lastSnapshotStep = timestep;
@@ -583,23 +586,23 @@ size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize,
 		{
 			tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
 			compressionType = 0; //snapshot-based compression
-			multisteps->lastSnapshotStep = timestep;			
+			multisteps->lastSnapshotStep = timestep;
 		}
 		else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION)
 		{
 			tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
-			compressionType = 1; //time-series based compression 			
-		}		
+			compressionType = 1; //time-series based compression
+		}
 	}
 	else
 #endif
-		tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);	
+		tdps = SZ_compress_float_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, medianValue_f);
 
 	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
-	
+
 	if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength)
 		SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize);
-	
+
 	free_TightDataPointStorageF(tdps);
 	return compressionType;
 }
@@ -607,64 +610,64 @@ size_t dataLength, double realPrecision, size_t *outSize, float valueRangeSize,
 TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size_t r2, float realPrecision, float valueRangeSize, float medianValue_f)
 {
 #ifdef HAVE_TIMECMPR
-	float* decData = NULL;	
+	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
-#endif	
-	
+#endif
+
 	float recip_realPrecision = 1/realPrecision;
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_float_2D_opt(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
-	
-	size_t i,j; 
+
+	size_t i,j;
 	int reqLength;
 	float pred1D, pred2D;
 	float diff = 0.0;
 	float itvNum = 0;
 	float *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (float*)malloc(r2*sizeof(float));
 	memset(P0, 0, r2*sizeof(float));
 	P1 = (float*)malloc(r2*sizeof(float));
 	memset(P1, 0, r2*sizeof(float));
-		
+
 	float medianValue = medianValue_f;
 	short radExpo = getExponent_float(valueRangeSize/2);
-	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	float* spaceFillingValue = oriData; //
 
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-			
+
 	/* Process Row-0 data 0*/
 	type[0] = 0;
 	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
@@ -672,10 +675,10 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif	
+#endif
 
 	float curData;
 
@@ -690,19 +693,19 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 	{
 		if (diff < 0) itvNum = -itvNum;
 		type[1] = (int) (itvNum/2) + intvRadius;
-		P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision;	
+		P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision;
 
 		//ganrantee comporession error against the case of machine-epsilon
 		if(fabs(spaceFillingValue[1]-P1[1])>realPrecision)
-		{	
-			type[1] = 0;			
+		{
+			type[1] = 0;
 			compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 			updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 			memcpy(preDataBytes,vce->curBytes,4);
-			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-			
+			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
 			P1[1] = vce->data;
-		}		
+		}
 	}
 	else
 	{
@@ -713,7 +716,7 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		P1[1] = vce->data;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -732,17 +735,17 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 			if (diff < 0) itvNum = -itvNum;
 			type[j] = (int) (itvNum/2) + intvRadius;
 			P1[j] = pred1D + 2 * (type[j] - intvRadius) * realPrecision;
-		
+
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P1[j])>realPrecision)
-			{	
-				type[j] = 0;				
+			{
+				type[j] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-				
-				P1[j] = vce->data;	
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+				P1[j] = vce->data;
 			}
 		}
 		else
@@ -754,16 +757,16 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[j] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -780,14 +783,14 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P0[0])>realPrecision)
-			{	
-				type[index] = 0;				
+			{
+				type[index] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-				
-				P0[0] = vce->data;	
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+				P0[0] = vce->data;
 			}
 		}
 		else
@@ -799,11 +802,11 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -820,18 +823,18 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 				if (diff < 0) itvNum = -itvNum;
 				type[index] = (int) (itvNum/2) + intvRadius;
 				P0[j] = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
-			
+
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P0[j])>realPrecision)
-				{	
-					type[index] = 0;					
+				{
+					type[index] = 0;
 					compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 					updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 					memcpy(preDataBytes,vce->curBytes,4);
-					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-					
-					P0[j] = vce->data;	
-				}			
+					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+					P0[j] = vce->data;
+				}
 			}
 			else
 			{
@@ -842,10 +845,10 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[j] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 		float *Pt;
@@ -853,27 +856,27 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
+	free(P1);
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
+
+	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 
-//	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n", 
+//	printf("exactDataNum=%d, expSegmentsInBytes_size=%d, exactMidByteArray->size=%d\n",
 //			exactDataNum, expSegmentsInBytes_size, exactMidByteArray->size);
-	
+
 //	for(i = 3800;i<3844;i++)
 //		printf("exactLeadNumArray->array[%d]=%d\n",i,exactLeadNumArray->array[i]);
-	
+
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
@@ -881,20 +884,20 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ(float *oriData, size_t r1, size
 	free(vce);
 	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
 char SZ_compress_args_float_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newByteData, float *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f)
-{	
+{
 	size_t dataLength = r1*r2;
-	char compressionType = 0;	
-	TightDataPointStorageF* tdps = NULL; 
+	char compressionType = 0;
+	TightDataPointStorageF* tdps = NULL;
 
 #ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
@@ -905,48 +908,48 @@ char SZ_compress_args_float_NoCkRngeNoGzip_2D(int cmprType, unsigned char** newB
 			if(timestep % confparams_cpr->snapshotCmprStep != 0)
 			{
 				tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
-				compressionType = 1; //time-series based compression 
+				compressionType = 1; //time-series based compression
 			}
 			else
-			{	
+			{
 				tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
 				compressionType = 0; //snapshot-based compression
 				multisteps->lastSnapshotStep = timestep;
-			}					
+			}
 		}
 		else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION)
 		{
 			tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
 			compressionType = 0; //snapshot-based compression
-			multisteps->lastSnapshotStep = timestep;			
+			multisteps->lastSnapshotStep = timestep;
 		}
 		else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION)
 		{
 			tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
-			compressionType = 1; //time-series based compression 			
+			compressionType = 1; //time-series based compression
 		}
 	}
 	else
 #endif
-		tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);	
+		tdps = SZ_compress_float_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, medianValue_f);
 
 	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
 
 	if(*outSize>3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + sizeof(float)*dataLength)
 		SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize);
-	
-	free_TightDataPointStorageF(tdps);	
-	
+
+	free_TightDataPointStorageF(tdps);
+
 	return compressionType;
 }
 
 TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, float valueRangeSize, float medianValue_f)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
-#endif		
+#endif
 
 	float recip_realPrecision = 1/realPrecision;
 	unsigned int quantization_intervals;
@@ -954,12 +957,12 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 	{
 		quantization_intervals = optimize_intervals_float_3D_opt(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	int intvRadius = quantization_intervals/2;	
-		
-	size_t i,j,k; 
+	int intvRadius = quantization_intervals/2;
+
+	size_t i,j,k;
 	int reqLength;
 	float pred1D, pred2D, pred3D;
 	float diff = 0.0;
@@ -973,7 +976,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 
 	float medianValue = medianValue_f;
 	short radExpo = getExponent_float(valueRangeSize/2);
-	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);	
+	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
@@ -1008,7 +1011,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[0] = P1[0];
 #endif
@@ -1027,18 +1030,18 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 		if (diff < 0) itvNum = -itvNum;
 		type[1] = (int) (itvNum/2) + intvRadius;
 		P1[1] = pred1D + 2 * (type[1] - intvRadius) * realPrecision;
-		
+
 		//ganrantee comporession error against the case of machine-epsilon
 		if(fabs(curData-P1[1])>realPrecision)
-		{	
-			type[1] = 0;			
+		{
+			type[1] = 0;
 			compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 			updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 			memcpy(preDataBytes,vce->curBytes,4);
-			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-			
-			P1[1] = vce->data;	
-		}				
+			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+			P1[1] = vce->data;
+		}
 	}
 	else
 	{
@@ -1049,7 +1052,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		P1[1] = vce->data;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -1068,18 +1071,18 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			if (diff < 0) itvNum = -itvNum;
 			type[j] = (int) (itvNum/2) + intvRadius;
 			P1[j] = pred1D + 2 * (type[j] - intvRadius) * realPrecision;
-			
+
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P1[j])>realPrecision)
-			{	
-				type[j] = 0;				
+			{
+				type[j] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-				
-				P1[j] = vce->data;	
-			}			
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+				P1[j] = vce->data;
+			}
 		}
 		else
 		{
@@ -1090,10 +1093,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[j] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r2-1 */
@@ -1101,7 +1104,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		curData = spaceFillingValue[index];
 		diff = curData - pred1D;
@@ -1113,18 +1116,18 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			if (diff < 0) itvNum = -itvNum;
 			type[index] = (int) (itvNum/2) + intvRadius;
 			P1[index] = pred1D + 2 * (type[index] - intvRadius) * realPrecision;
-			
+
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P1[index])>realPrecision)
-			{	
-				type[index] = 0;				
+			{
+				type[index] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-				
-				P1[index] = vce->data;	
-			}			
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+				P1[index] = vce->data;
+			}
 		}
 		else
 		{
@@ -1135,10 +1138,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[index] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P1[index];
-#endif		
+#endif
 
 		/* Process row-i data 1 --> data r3-1*/
 		for (j = 1; j < r3; j++)
@@ -1156,18 +1159,18 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 				if (diff < 0) itvNum = -itvNum;
 				type[index] = (int) (itvNum/2) + intvRadius;
 				P1[index] = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
-				
+
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P1[index])>realPrecision)
-				{	
-					type[index] = 0;					
+				{
+					type[index] = 0;
 					compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 					updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 					memcpy(preDataBytes,vce->curBytes,4);
-					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-					
-					P1[index] = vce->data;	
-				}				
+					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+					P1[index] = vce->data;
+				}
 			}
 			else
 			{
@@ -1178,10 +1181,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P1[index] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P1[index];
-#endif			
+#endif
 		}
 	}
 
@@ -1203,18 +1206,18 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			if (diff < 0) itvNum = -itvNum;
 			type[index] = (int) (itvNum/2) + intvRadius;
 			P0[0] = pred1D + 2 * (type[index] - intvRadius) * realPrecision;
-			
+
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabs(curData-P0[0])>realPrecision)
-			{	
-				type[index] = 0;				
+			{
+				type[index] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-				
-				P0[0] = vce->data;	
-			}			
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+				P0[0] = vce->data;
+			}
 		}
 		else
 		{
@@ -1225,7 +1228,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
@@ -1248,14 +1251,14 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 				P0[j] = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P0[j])>realPrecision)
-				{	
-					type[index] = 0;					
+				{
+					type[index] = 0;
 					compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 					updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 					memcpy(preDataBytes,vce->curBytes,4);
-					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-					
-					P0[j] = vce->data;	
+					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+					P0[j] = vce->data;
 				}
 			}
 			else
@@ -1267,10 +1270,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[j] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 	    /* Process Row-1 --> Row-r2-1 */
@@ -1279,7 +1282,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			curData = spaceFillingValue[index];
 			diff = spaceFillingValue[index] - pred2D;
@@ -1293,15 +1296,15 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 				P0[index2D] = pred2D + 2 * (type[index] - intvRadius) * realPrecision;
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P0[index2D])>realPrecision)
-				{	
-					type[index] = 0;					
+				{
+					type[index] = 0;
 					compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 					updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 					memcpy(preDataBytes,vce->curBytes,4);
-					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-					
-					P0[index2D] = vce->data;	
-				}				
+					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+					P0[index2D] = vce->data;
+				}
 			}
 			else
 			{
@@ -1312,17 +1315,17 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[index2D] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[index2D];
-#endif			
+#endif
 
 			/* Process Row-i data 1 --> data r3-1 */
 			for (j = 1; j < r3; j++)
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -1336,18 +1339,18 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 					if (diff < 0) itvNum = -itvNum;
 					type[index] = (int) (itvNum/2) + intvRadius;
 					P0[index2D] = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
-					
+
 					//ganrantee comporession error against the case of machine-epsilon
 					if(fabs(curData-P0[index2D])>realPrecision)
-					{	
-						type[index] = 0;						
+					{
+						type[index] = 0;
 						compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 						updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 						memcpy(preDataBytes,vce->curBytes,4);
-						addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);	
-						
-						P0[index2D] = vce->data;	
-					}					
+						addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
+
+						P0[index2D] = vce->data;
+					}
 				}
 				else
 				{
@@ -1358,10 +1361,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 					addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 					P0[index2D] = vce->data;
 				}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					decData[index] = P0[index2D];
-#endif				
+#endif
 			}
 		}
 
@@ -1381,7 +1384,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 			type, exactMidByteArray->array, exactMidByteArray->size,
 			exactLeadNumArray->array,
 			resiBitArray->array, resiBitArray->size,
-			resiBitsLength, 
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 
 //sdi:Debug
@@ -1397,24 +1400,24 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ(float *oriData, size_t r1, size
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
 	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * @cmprType compressionType (SZ_FORCE_SNAPSHOT_COMPRESSION, SZ_FORCE_TEMPORAL_COMPRESSION or SZ_PEORI_TEMPORAL_COMPRESSION)
- * 
+ *
  * */
 char SZ_compress_args_float_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newByteData, float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, float valueRangeSize, float medianValue_f)
 {
 	size_t dataLength = r1*r2*r3;
-	char compressionType = 0;	
-	TightDataPointStorageF* tdps = NULL; 
+	char compressionType = 0;
+	TightDataPointStorageF* tdps = NULL;
 
 #ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
@@ -1425,31 +1428,31 @@ char SZ_compress_args_float_NoCkRngeNoGzip_3D(int cmprType, unsigned char** newB
 			if(timestep % confparams_cpr->snapshotCmprStep != 0)
 			{
 				tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
-				compressionType = 1; //time-series based compression 
+				compressionType = 1; //time-series based compression
 			}
 			else
 			{
-				if(confparams_cpr->withRegression == SZ_NO_REGRESSION)	
+				if(confparams_cpr->withRegression == SZ_NO_REGRESSION)
 					tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
 				else
 					*newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
 				compressionType = 0; //snapshot-based compression
 				multisteps->lastSnapshotStep = timestep;
-			}					
+			}
 		}
 		else if(cmprType == SZ_FORCE_SNAPSHOT_COMPRESSION)
 		{
-			if(confparams_cpr->withRegression == SZ_NO_REGRESSION)	
+			if(confparams_cpr->withRegression == SZ_NO_REGRESSION)
 				tdps = SZ_compress_float_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, medianValue_f);
 			else
 				*newByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r1, r2, r3, realPrecision, outSize);
 			compressionType = 0; //snapshot-based compression
-			multisteps->lastSnapshotStep = timestep;			
+			multisteps->lastSnapshotStep = timestep;
 		}
 		else if(cmprType == SZ_FORCE_TEMPORAL_COMPRESSION)
 		{
 			tdps = SZ_compress_float_1D_MDQ_ts(oriData, dataLength, multisteps, realPrecision, valueRangeSize, medianValue_f);
-			compressionType = 1; //time-series based compression 			
+			compressionType = 1; //time-series based compression
 		}
 	}
 	else
@@ -1480,7 +1483,7 @@ TightDataPointStorageF* SZ_compress_float_4D_MDQ(float *oriData, size_t r1, size
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j,k; 
+	size_t i,j,k;
 	int reqLength;
 	float pred1D, pred2D, pred3D;
 	float diff = 0.0;
@@ -1807,19 +1810,19 @@ char SZ_compress_args_float_NoCkRngeNoGzip_4D(unsigned char** newByteData, float
 		SZ_compress_args_float_StoreOriData(oriData, dataLength, newByteData, outSize);
 
 	free_TightDataPointStorageF(tdps);
-	
+
 	return 0;
 }
 
 /*MSST19*/
-TightDataPointStorageF* SZ_compress_float_1D_MDQ_MSST19(float *oriData, 
+TightDataPointStorageF* SZ_compress_float_1D_MDQ_MSST19(float *oriData,
 size_t dataLength, double realPrecision, float valueRangeSize, float medianValue_f)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
-#endif	
+#endif
 
 	//struct ClockPoint clockPointBuild;
 	//TimeDurationStart("build", &clockPointBuild);
@@ -1830,7 +1833,7 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 		quantization_intervals = exe_params->intvCapacity;
 	//updateQuantizationInfo(quantization_intervals);
 	int intvRadius = quantization_intervals/2;
-	
+
 	double* precisionTable = (double*)malloc(sizeof(double) * quantization_intervals);
 	double inv = 2.0-pow(2, -(confparams_cpr->plus_bits));
     for(int i=0; i<quantization_intervals; i++){
@@ -1848,25 +1851,25 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 	float medianValue = medianValue_f;
 	//float medianInverse = 1 / medianValue_f;
 	//short radExpo = getExponent_float(valueRangeSize/2);
-	
-	reqLength = computeReqLength_float_MSST19(realPrecision);	
+
+	reqLength = computeReqLength_float_MSST19(realPrecision);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	float* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, dataLength/2/8);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, dataLength/2);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 	float last3CmprsData[3] = {0};
@@ -1875,8 +1878,8 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressSingleFloatValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
@@ -1884,11 +1887,11 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_float(last3CmprsData, vce->data);
 	//miss++;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif		
-		
+#endif
+
 	//add the second data
 	type[1] = 0;
 	compressSingleFloatValue_MSST19(vce, spaceFillingValue[1], realPrecision, reqLength, reqBytesLength, resiBitsLength);
@@ -1897,7 +1900,7 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_float(last3CmprsData, vce->data);
 	//miss++;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = vce->data;
 #endif
@@ -1951,30 +1954,30 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 #ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[i] = vce->data;
-#endif	
-		
+#endif
+
 	}//end of for
-		
+
 //	printf("miss:%d, hit:%d\n", miss, hit);
 
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
+
+	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
 			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
     tdps->plus_bits = confparams_cpr->plus_bits;
-	
+
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
+	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
 	free(precisionTable);
 	freeTopLevelTableWideInterval(&levelTable);
@@ -1984,17 +1987,17 @@ size_t dataLength, double realPrecision, float valueRangeSize, float medianValue
 TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r1, size_t r2, double realPrecision, float valueRangeSize, float medianValue_f)
 {
 #ifdef HAVE_TIMECMPR
-	float* decData = NULL;	
+	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
-#endif	
-	
+#endif
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_float_2D_opt_MSST19(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
@@ -2009,44 +2012,44 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 	struct TopLevelTableWideInterval levelTable;
 	MultiLevelCacheTableWideIntervalBuild(&levelTable, precisionTable, quantization_intervals, realPrecision, confparams_cpr->plus_bits);
 
-	size_t i,j; 
+	size_t i,j;
 	int reqLength;
 	float pred1D, pred2D;
 	//float diff = 0.0;
 	//double itvNum = 0;
 	float *P0, *P1;
 	double predRelErrRatio;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (float*)malloc(r2*sizeof(float));
 	memset(P0, 0, r2*sizeof(float));
 	P1 = (float*)malloc(r2*sizeof(float));
 	memset(P1, 0, r2*sizeof(float));
-		
+
 	float medianValue = medianValue_f;
 	//float medianValueInverse = 1 / medianValue_f;
 	//short radExpo = getExponent_float(valueRangeSize/2);
-	reqLength = computeReqLength_double_MSST19(realPrecision);	
+	reqLength = computeReqLength_double_MSST19(realPrecision);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	float* spaceFillingValue = oriData; //
 
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
@@ -2063,7 +2066,7 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
     for(int i=0; i<=range; i++){
         tables[i] = levelTable.subTables[i].table;
     }
-			
+
 	/* Process Row-0 data 0*/
 	type[0] = 0;
 	compressSingleFloatValue_MSST19(vce, spaceFillingValue[0], realPrecision, reqLength, reqBytesLength, resiBitsLength);
@@ -2071,10 +2074,10 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[0] = vce->data;
-#endif	
+#endif
 
 	float curData;
 	int state;
@@ -2107,7 +2110,7 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		P1[1] = vce->data;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -2141,16 +2144,16 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P1[j] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -2179,11 +2182,11 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -2215,10 +2218,10 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				P0[j] = vce->data;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 		float *Pt;
@@ -2226,19 +2229,19 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
 	free(P1);
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
+
+	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 	tdps->plus_bits = confparams_cpr->plus_bits;
 
@@ -2250,24 +2253,24 @@ TightDataPointStorageF* SZ_compress_float_2D_MDQ_MSST19(float *oriData, size_t r
 	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
 	free(precisionTable);
-	freeTopLevelTableWideInterval(&levelTable);	
-	return tdps;	
+	freeTopLevelTableWideInterval(&levelTable);
+	return tdps;
 }
 
 TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float valueRangeSize, float medianValue_f)
 {
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
-#endif		
+#endif
 
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_float_3D_opt_MSST19(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
@@ -2298,7 +2301,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 	float medianValue = medianValue_f;
 	//float medianValueInverse = 1/ medianValue_f;
 	//short radExpo = getExponent_float(valueRangeSize/2);
-	reqLength = computeReqLength_float_MSST19(realPrecision);	
+	reqLength = computeReqLength_float_MSST19(realPrecision);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
@@ -2315,7 +2318,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
@@ -2348,7 +2351,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	P1[0] = vce->data;
 	//miss++;
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[0] = P1[0];
 #endif
@@ -2384,7 +2387,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 		P1[1] = vce->data;
 		//miss++;
 	}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData[1] = P1[1];
 #endif
@@ -2421,10 +2424,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 			P1[j] = vce->data;
 			//miss++;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[j] = P1[j];
-#endif		
+#endif
 	}
 
 	/* Process Row-1 --> Row-r2-1 */
@@ -2432,7 +2435,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		curData = spaceFillingValue[index];
         predRelErrRatio = curData / pred1D;
@@ -2461,10 +2464,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 			P1[index] = vce->data;
 			//miss++;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P1[index];
-#endif		
+#endif
 
 		/* Process row-i data 1 --> data r3-1*/
 		for (j = 1; j < r3; j++)
@@ -2505,10 +2508,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 				P1[index] = vce->data;
 				//miss++;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P1[index];
-#endif			
+#endif
 		}
 	}
 
@@ -2547,7 +2550,7 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 			P0[0] = vce->data;
 			//miss++;
 		}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 		if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 			decData[index] = P0[0];
 #endif
@@ -2586,10 +2589,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 				P0[j] = vce->data;
 				//miss++;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[j];
-#endif			
+#endif
 		}
 
 	    /* Process Row-1 --> Row-r2-1 */
@@ -2628,10 +2631,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 				P0[index2D] = vce->data;
 				//miss++;
 			}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 			if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 				decData[index] = P0[index2D];
-#endif			
+#endif
 
 			/* Process Row-i data 1 --> data r3-1 */
 			for (j = 1; j < r3; j++)
@@ -2670,10 +2673,10 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 					P0[index2D] = vce->data;
 					//miss++;
 				}
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					decData[index] = P0[index2D];
-#endif				
+#endif
 			}
 		}
 
@@ -2693,20 +2696,20 @@ TightDataPointStorageF* SZ_compress_float_3D_MDQ_MSST19(float *oriData, size_t r
 			type, exactMidByteArray->array, exactMidByteArray->size,
 			exactLeadNumArray->array,
 			resiBitArray->array, resiBitArray->size,
-			resiBitsLength, 
+			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 	tdps->plus_bits = confparams_cpr->plus_bits;
 
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
 	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
 	free(precisionTable);
-	freeTopLevelTableWideInterval(&levelTable);	
-	return tdps;	
+	freeTopLevelTableWideInterval(&levelTable);
+	return tdps;
 }
 
 
@@ -2714,10 +2717,10 @@ void SZ_compress_args_float_withinRange(unsigned char** newByteData, float *oriD
 {
 	TightDataPointStorageF* tdps = (TightDataPointStorageF*) malloc(sizeof(TightDataPointStorageF));
 	tdps->rtypeArray = NULL;
-	tdps->typeArray = NULL;	
+	tdps->typeArray = NULL;
 	tdps->leadNumArray = NULL;
 	tdps->residualMidBits = NULL;
-	
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactMidBytes = (unsigned char*)malloc(sizeof(unsigned char)*4);
@@ -2726,7 +2729,7 @@ void SZ_compress_args_float_withinRange(unsigned char** newByteData, float *oriD
 	float value = oriData[0];
 	floatToBytes(tdps->exactMidBytes, value);
 	tdps->exactMidBytes_size = 4;
-	
+
 	size_t tmpOutSize;
 	//unsigned char *tmpByteData;
 	convertTDPStoFlatBytes_float(tdps, newByteData, &tmpOutSize);
@@ -2734,22 +2737,22 @@ void SZ_compress_args_float_withinRange(unsigned char** newByteData, float *oriD
 	//*newByteData = (unsigned char*)malloc(sizeof(unsigned char)*12); //for floating-point data (1+3+4+4)
 	//memcpy(*newByteData, tmpByteData, 12);
 	*outSize = tmpOutSize; //8+SZ_SIZE_TYPE; //8==3+1+4(float_size)
-	free_TightDataPointStorageF(tdps);	
+	free_TightDataPointStorageF(tdps);
 }
 
 /*
-int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_float_wRngeNoGzip(unsigned char** newByteData, float *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	float valueRangeSize = 0, medianValue = 0;
-	
+
 	float min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
 	float max = min+valueRangeSize;
 	double realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
@@ -2760,7 +2763,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
 		if(r5==0&&r4==0&&r3==0&&r2==0)
 		{
 			if(errBoundMode>=PW_REL)
-			{	
+			{
 				SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(newByteData, oriData, pwrErrRatio, r1, outSize, min, max);
 				//SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(newByteData, oriData, r1, absErr_Bound, relBoundRatio, pwrErrRatio, valueRangeSize, medianValue, outSize);
 			}
@@ -2793,27 +2796,27 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwrErrRatio)
 }
 */
 
-int SZ_compress_args_float(int cmprType, int withRegression, unsigned char** newByteData, float *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_float(int cmprType, int withRegression, unsigned char** newByteData, float *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRatio)
 {
 	confparams_cpr->dataType = SZ_FLOAT;
 	confparams_cpr->errorBoundMode = errBoundMode; //this is used to print the metadata if needed...
 	if(errBoundMode==PW_REL)
 	{
-		confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;	
+		confparams_cpr->pw_relBoundRatio = pwRelBoundRatio;
 	}
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
-	
+
 	if(dataLength <= MIN_NUM_OF_ELEMENTS)
 	{
 		*newByteData = SZ_skip_compress_float(oriData, dataLength, outSize);
 		return status;
 	}
-	
+
 	float valueRangeSize = 0, medianValue = 0;
-	
+
 	unsigned char * signs = NULL;
 	bool positive = true;
 	float nearZero = 0.0;
@@ -2827,13 +2830,13 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 		min = computeRangeSize_float_MSST19(oriData, dataLength, &valueRangeSize, &medianValue, signs, &positive, &nearZero);
 	}
 	else
-		min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);	
+		min = computeRangeSize_float(oriData, dataLength, &valueRangeSize, &medianValue);
 	float max = min+valueRangeSize;
 	confparams_cpr->fmin = min;
 	confparams_cpr->fmax = max;
-	
-	double realPrecision = 0; 
-	
+
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -2844,30 +2847,30 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 	{
 		confparams_cpr->errorBoundMode = ABS;
 		realPrecision = confparams_cpr->absErrBound = computeABSErrBoundFromNORM_ERR(confparams_cpr->normErr, dataLength);
-		//printf("realPrecision=%lf\n", realPrecision);				
+		//printf("realPrecision=%lf\n", realPrecision);
 	}
 	else
 	{
 		realPrecision = getRealPrecision_float(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
 		confparams_cpr->absErrBound = realPrecision;
-	}	
+	}
 	if(valueRangeSize <= realPrecision)
 	{
 #ifdef HAVE_WRITESTATS
 		writeConstantFlag(1);
-#endif	
+#endif
 		if(confparams_cpr->errorBoundMode>=PW_REL && confparams_cpr->accelerate_pw_rel_compression == 1)
-			free(signs);		
+			free(signs);
 		SZ_compress_args_float_withinRange(newByteData, oriData, dataLength, outSize);
 	}
 	else
 	{
 #ifdef HAVE_WRITESTATS
 		writeConstantFlag(0);
-#endif			
+#endif
 		size_t tmpOutSize = 0;
 		unsigned char* tmpByteData;
-		
+
 		if (r2==0)
 		{
 			if(confparams_cpr->errorBoundMode>=PW_REL)
@@ -2883,25 +2886,25 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_1D(cmprType, &tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 				else
-#endif				
+#endif
 					{
-#ifdef HAVE_RANDOMACCESS						
+#ifdef HAVE_RANDOMACCESS
 						if(confparams_cpr->randomAccess == 0)
 						{
-#endif							
+#endif
 							SZ_compress_args_float_NoCkRngeNoGzip_1D(cmprType, &tmpByteData, oriData, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 							if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1)
 								SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);
 #ifdef HAVE_RANDOMACCESS
 						}
 						else
-							tmpByteData = SZ_compress_float_1D_MDQ_decompression_random_access_with_blocked_regression(oriData, r1, realPrecision, &tmpOutSize);			
-#endif							
+							tmpByteData = SZ_compress_float_1D_MDQ_decompression_random_access_with_blocked_regression(oriData, r1, realPrecision, &tmpOutSize);
+#endif
 					}
 		}
 		else
 		if (r3==0)
-		{			
+		{
 			if(confparams_cpr->errorBoundMode>=PW_REL)
 			{
 				if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768)
@@ -2911,28 +2914,28 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 			}
 			else
 #ifdef HAVE_TIMECMPR
-				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)				
+				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 				else
 #endif
 				{
-#ifdef HAVE_RANDOMACCESS															
+#ifdef HAVE_RANDOMACCESS
 					if(confparams_cpr->randomAccess == 0)
 					{
-#endif							
+#endif
 						if(withRegression == SZ_NO_REGRESSION)
 							SZ_compress_args_float_NoCkRngeNoGzip_2D(cmprType, &tmpByteData, oriData, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 						else // SZ 2.1 (2D)
 						{
 							tmpByteData = SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);//SZ 2.1 (2D)
 							if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1)
-								SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);						
+								SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);
 						}
-#ifdef HAVE_RANDOMACCESS							
-					}					
-					else 
-						tmpByteData = SZ_compress_float_2D_MDQ_decompression_random_access_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize); 
-#endif	
+#ifdef HAVE_RANDOMACCESS
+					}
+					else
+						tmpByteData = SZ_compress_float_2D_MDQ_decompression_random_access_with_blocked_regression(oriData, r2, r1, realPrecision, &tmpOutSize);
+#endif
 				}
 		}
 		else
@@ -2947,28 +2950,28 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 			}
 			else
 #ifdef HAVE_TIMECMPR
-				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)				
+				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 						multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 				else
 #endif
 				{
-#ifdef HAVE_RANDOMACCESS															
+#ifdef HAVE_RANDOMACCESS
 					if(confparams_cpr->randomAccess == 0)
 					{
-#endif						
+#endif
 						if(withRegression == SZ_NO_REGRESSION)
 							SZ_compress_args_float_NoCkRngeNoGzip_3D(cmprType, &tmpByteData, oriData, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 						else  //SZ 2.1 (3D)
 						{
 							tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize); //SZ 2.1 (3D)
 							if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1)
-								SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);		
+								SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);
 						}
-#ifdef HAVE_RANDOMACCESS							
-					}					
+#ifdef HAVE_RANDOMACCESS
+					}
 					else
-						tmpByteData = SZ_compress_float_3D_MDQ_decompression_random_access_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);	
-#endif					
+						tmpByteData = SZ_compress_float_3D_MDQ_decompression_random_access_with_blocked_regression(oriData, r3, r2, r1, realPrecision, &tmpOutSize);
+#endif
 				}
 		}
 		else
@@ -2979,22 +2982,22 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 				if(confparams_cpr->accelerate_pw_rel_compression && confparams_cpr->maxRangeRadius <= 32768)
 					SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log_MSST19(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, valueRangeSize, signs, &positive, min, max, nearZero);
 				else
-					SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);				
+					SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(&tmpByteData, oriData, pwRelBoundRatio, r4*r3, r2, r1, &tmpOutSize, min, max);
 			}
 			else
 #ifdef HAVE_TIMECMPR
-				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)				
+				if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 					multisteps->compressionType = SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
 				else
 #endif
 				{
 					if(withRegression == SZ_NO_REGRESSION)
 						SZ_compress_args_float_NoCkRngeNoGzip_4D(&tmpByteData, oriData, r4, r3, r2, r1, realPrecision, &tmpOutSize, valueRangeSize, medianValue);
-					else 
+					else
 					{
 						tmpByteData = SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(oriData, r4*r3, r2, r1, realPrecision, &tmpOutSize); //SZ 2.1 4D
 						if(tmpOutSize>=dataLength*sizeof(float) + 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1)
-							SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);						
+							SZ_compress_args_float_StoreOriData(oriData, dataLength, &tmpByteData, &tmpOutSize);
 					}
 				}
 		}
@@ -3017,10 +3020,10 @@ int errBoundMode, double absErr_Bound, double relBoundRatio, double pwRelBoundRa
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the float compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
 
@@ -3435,7 +3438,7 @@ size_t r1, size_t s1, size_t e1)
 	//updateQuantizationInfo(quantization_intervals);
 	int intvRadius = quantization_intervals/2;
 
-	size_t i; 
+	size_t i;
 	int reqLength;
 	float medianValue = medianValue_f;
 	short radExpo = getExponent_float(valueRangeSize/2);
@@ -3558,7 +3561,7 @@ size_t r1, size_t r2, size_t s1, size_t s2, size_t e1, size_t e2)
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j; 
+	size_t i,j;
 	int reqLength;
 	float pred1D, pred2D;
 	float diff = 0.0;
@@ -3769,7 +3772,7 @@ size_t r1, size_t r2, size_t r3, size_t s1, size_t s2, size_t s3, size_t e1, siz
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j,k; 
+	size_t i,j,k;
 	int reqLength;
 	float pred1D, pred2D, pred3D;
 	float diff = 0.0;
@@ -4110,7 +4113,7 @@ size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, siz
 		quantization_intervals = exe_params->intvCapacity;
 	int intvRadius = quantization_intervals/2;
 
-	size_t i,j,k; 
+	size_t i,j,k;
 	int reqLength;
 	float pred1D, pred2D, pred3D;
 	float diff = 0.0;
@@ -4448,7 +4451,7 @@ size_t r1, size_t r2, size_t r3, size_t r4, size_t s1, size_t s2, size_t s3, siz
 }
 
 unsigned int optimize_intervals_float_1D_opt_MSST19(float *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	float pred_value = 0;
 	double pred_err;
@@ -4463,14 +4466,14 @@ unsigned int optimize_intervals_float_1D_opt_MSST19(float *oriData, size_t dataL
 		if(*data_pos == 0){
     		data_pos += confparams_cpr->sampleDistance;
             continue;
-		}	
+		}
 	    tempIndex++;
 		totalSampleSize++;
 		pred_value = data_pos[-1];
 		pred_err = fabs((double)*data_pos / pred_value);
 		radiusIndex = (unsigned long)fabs(log2(pred_err)/divider+0.5);
 		if(radiusIndex>=confparams_cpr->maxRangeRadius)
-			radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+			radiusIndex = confparams_cpr->maxRangeRadius - 1;
 		intervals[radiusIndex]++;
 
 		data_pos += confparams_cpr->sampleDistance;
@@ -4486,19 +4489,19 @@ unsigned int optimize_intervals_float_1D_opt_MSST19(float *oriData, size_t dataL
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_float_2D_opt_MSST19(float *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	float pred_value = 0, pred_err;
@@ -4516,7 +4519,7 @@ unsigned int optimize_intervals_float_2D_opt_MSST19(float *oriData, size_t r1, s
 		if(*data_pos == 0){
         	data_pos += confparams_cpr->sampleDistance;
         	continue;
-		}		
+		}
 		totalSampleSize++;
 		pred_value = data_pos[-1] + data_pos[-r2] - data_pos[-r2-1];
 		pred_err = fabs(pred_value / *data_pos);
@@ -4558,7 +4561,7 @@ unsigned int optimize_intervals_float_2D_opt_MSST19(float *oriData, size_t r1, s
 }
 
 unsigned int optimize_intervals_float_3D_opt_MSST19(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -4577,7 +4580,7 @@ unsigned int optimize_intervals_float_3D_opt_MSST19(float *oriData, size_t r1, s
 		if(*data_pos == 0){
     		data_pos += confparams_cpr->sampleDistance;
         	continue;
-		}		
+		}
 		totalSampleSize++;
 		pred_value = data_pos[-1] + data_pos[-r3] + data_pos[-r23] - data_pos[-1-r23] - data_pos[-r3-1] - data_pos[-r3-r23] + data_pos[-r3-r23-1];
 		pred_err = fabsf(*data_pos / pred_value);
@@ -4601,7 +4604,7 @@ unsigned int optimize_intervals_float_3D_opt_MSST19(float *oriData, size_t r1, s
 			if(offset_count == 0) offset_count ++;
 		}
 		else data_pos += confparams_cpr->sampleDistance;
-	}	
+	}
 	//compute the appropriate number
 	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
 	size_t sum = 0;
@@ -4624,7 +4627,7 @@ unsigned int optimize_intervals_float_3D_opt_MSST19(float *oriData, size_t r1, s
 
 
 unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -4662,7 +4665,7 @@ unsigned int optimize_intervals_float_3D_opt(float *oriData, size_t r1, size_t r
 			if(offset_count == 0) offset_count ++;
 		}
 		else data_pos += confparams_cpr->sampleDistance;
-	}	
+	}
 	//compute the appropriate number
 	size_t targetCount = totalSampleSize*confparams_cpr->predThreshold;
 	size_t sum = 0;
@@ -4728,11 +4731,11 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 		type[0] = (int) (itvNum/2) + exe_params->intvRadius;
 		P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
 		//ganrantee comporession error against the case of machine-epsilon
-		if(fabsf(curData-P1[0])>realPrecision){	
+		if(fabsf(curData-P1[0])>realPrecision){
 			type[0] = 0;
 			P1[0] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
-		}		
+		}
 	}
 	else{
 		type[0] = 0;
@@ -4750,11 +4753,11 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 		type[1] = (int) (itvNum/2) + exe_params->intvRadius;
 		P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
 		//ganrantee comporession error against the case of machine-epsilon
-		if(fabsf(curData-P1[1])>realPrecision){	
+		if(fabsf(curData-P1[1])>realPrecision){
 			type[1] = 0;
-			P1[1] = curData;	
+			P1[1] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
-		}		
+		}
 	}
 	else{
 		type[1] = 0;
@@ -4772,11 +4775,11 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 			type[j] = (int) (itvNum/2) + exe_params->intvRadius;
 			P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
 			//ganrantee comporession error against the case of machine-epsilon
-			if(fabsf(curData-P1[j])>realPrecision){	
+			if(fabsf(curData-P1[j])>realPrecision){
 				type[j] = 0;
-				P1[j] = curData;	
+				P1[j] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else{
 			type[j] = 0;
@@ -4791,7 +4794,7 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		curData = *cur_data_pos;
 		diff = curData - pred1D;
@@ -4803,14 +4806,14 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 			if (diff < 0) itvNum = -itvNum;
 			type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 			P1[index] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-			
+
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabsf(curData-P1[index])>realPrecision)
-			{	
+			{
 				type[index] = 0;
-				P1[index] = curData;	
+				P1[index] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else
 		{
@@ -4835,14 +4838,14 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 				if (diff < 0) itvNum = -itvNum;
 				type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 				P1[index] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-				
+
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabsf(curData-P1[index])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P1[index] = curData;	
+					P1[index] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
-				}				
+				}
 			}
 			else
 			{
@@ -4872,11 +4875,11 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 			P0[0] = pred1D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
 			//ganrantee comporession error against the case of machine-epsilon
 			if(fabsf(curData-P0[0])>realPrecision)
-			{	
+			{
 				type[index] = 0;
-				P0[0] = curData;	
+				P0[0] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else
 		{
@@ -4900,9 +4903,9 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 				P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabsf(curData-P0[j])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P0[j] = curData;	
+					P0[j] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
 				}
 			}
@@ -4921,7 +4924,7 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			curData = *cur_data_pos;
 			diff = curData - pred2D;
@@ -4935,11 +4938,11 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 				P0[index2D] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabsf(curData-P0[index2D])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P0[index2D] = curData;	
+					P0[index2D] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
-				}				
+				}
 			}
 			else
 			{
@@ -4951,7 +4954,7 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 			/* Process Row-i data 1 --> data r3-1 */
 			for (j = 1; j < r3; j++)
 			{
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -4965,14 +4968,14 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 					if (diff < 0) itvNum = -itvNum;
 					type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 					P0[index2D] = pred3D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-					
+
 					//ganrantee comporession error against the case of machine-epsilon
 					if(fabsf(curData-P0[index2D])>realPrecision)
-					{	
+					{
 						type[index] = 0;
-						P0[index2D] = curData;	
+						P0[index2D] = curData;
 						unpredictable_data[unpredictable_count ++] = curData;
-					}					
+					}
 				}
 				else
 				{
@@ -4995,7 +4998,7 @@ size_t SZ_compress_float_3D_MDQ_RA_block(float * block_ori_data, float * mean, s
 
 
 unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i;
 	size_t radiusIndex;
 	float pred_value = 0, pred_err;
@@ -5050,7 +5053,7 @@ unsigned int optimize_intervals_float_2D_opt(float *oriData, size_t r1, size_t r
 }
 
 unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	float pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -5064,7 +5067,7 @@ unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength,
 		pred_err = fabs(pred_value - *data_pos);
 		radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 		if(radiusIndex>=confparams_cpr->maxRangeRadius)
-			radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+			radiusIndex = confparams_cpr->maxRangeRadius - 1;
 		intervals[radiusIndex]++;
 
 		data_pos += confparams_cpr->sampleDistance;
@@ -5080,13 +5083,13 @@ unsigned int optimize_intervals_float_1D_opt(float *oriData, size_t dataLength,
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	return powerOf2;
 }
@@ -5113,7 +5116,7 @@ size_t SZ_compress_float_1D_MDQ_RA_block(float * block_ori_data, float * mean, s
 		itvNum = fabs(diff)/realPrecision + 1;
 		if (itvNum < exe_params->intvCapacity){
 			if (diff < 0) itvNum = -itvNum;
-			type[type_index] = (int) (itvNum/2) + exe_params->intvRadius;	
+			type[type_index] = (int) (itvNum/2) + exe_params->intvRadius;
 			last_over_thres = pred1D + 2 * (type[type_index] - exe_params->intvRadius) * realPrecision;
 			if(fabs(curData-last_over_thres)>realPrecision){
 				type[type_index] = 0;
@@ -5160,11 +5163,11 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 		type[0] = (int) (itvNum/2) + exe_params->intvRadius;
 		P1[0] = pred1D + 2 * (type[0] - exe_params->intvRadius) * realPrecision;
 		//ganrantee comporession error against the case of machine-epsilon
-		if(fabs(curData-P1[0])>realPrecision){	
+		if(fabs(curData-P1[0])>realPrecision){
 			type[0] = 0;
 			P1[0] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
-		}		
+		}
 	}
 	else{
 		type[0] = 0;
@@ -5182,11 +5185,11 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 		type[1] = (int) (itvNum/2) + exe_params->intvRadius;
 		P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
 		//ganrantee comporession error against the case of machine-epsilon
-		if(fabs(curData-P1[1])>realPrecision){	
+		if(fabs(curData-P1[1])>realPrecision){
 			type[1] = 0;
-			P1[1] = curData;	
+			P1[1] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
-		}		
+		}
 	}
 	else{
 		type[1] = 0;
@@ -5206,11 +5209,11 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 			type[j] = (int) (itvNum/2) + exe_params->intvRadius;
 			P1[j] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
 			//ganrantee comporession error against the case of machine-epsilon
-			if(fabs(curData-P1[j])>realPrecision){	
+			if(fabs(curData-P1[j])>realPrecision){
 				type[j] = 0;
-				P1[j] = curData;	
+				P1[j] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else{
 			type[j] = 0;
@@ -5222,7 +5225,7 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		curData = *cur_data_pos;
@@ -5234,18 +5237,18 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 			type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 			P0[0] = pred1D + 2 * (type[j] - exe_params->intvRadius) * realPrecision;
 			//ganrantee comporession error against the case of machine-epsilon
-			if(fabs(curData-P0[0])>realPrecision){	
+			if(fabs(curData-P0[0])>realPrecision){
 				type[index] = 0;
-				P0[0] = curData;	
+				P0[0] = curData;
 				unpredictable_data[unpredictable_count ++] = curData;
-			}			
+			}
 		}
 		else{
 			type[index] = 0;
 			P0[0] = curData;
 			unpredictable_data[unpredictable_count ++] = curData;
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -5259,14 +5262,14 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 				if (diff < 0) itvNum = -itvNum;
 				type[index] = (int) (itvNum/2) + exe_params->intvRadius;
 				P0[j] = pred2D + 2 * (type[index] - exe_params->intvRadius) * realPrecision;
-				
+
 				//ganrantee comporession error against the case of machine-epsilon
 				if(fabs(curData-P0[j])>realPrecision)
-				{	
+				{
 					type[index] = 0;
-					P0[j] = curData;	
+					P0[j] = curData;
 					unpredictable_data[unpredictable_count ++] = curData;
-				}				
+				}
 			}
 			else
 			{
@@ -5287,7 +5290,7 @@ size_t SZ_compress_float_2D_MDQ_RA_block(float * block_ori_data, float * mean, s
 
 /*The above code is for sz 1.4.13; the following code is for sz 2.0*/
 static unsigned int optimize_intervals_float_1D_with_freq_and_dense_pos(float *oriData, size_t r1, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq)
-{	
+{
 	float mean = 0.0;
 	size_t len = r1;
 	size_t mean_distance = (int) (sqrt(len));
@@ -5385,7 +5388,7 @@ static unsigned int optimize_intervals_float_1D_with_freq_and_dense_pos(float *o
 }
 
 unsigned int optimize_intervals_float_2D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq)
-{	
+{
 	float mean = 0.0;
 	size_t len = r1 * r2;
 	size_t mean_distance = (int) (sqrt(len));
@@ -5509,7 +5512,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 		quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -5531,7 +5534,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 	size_t num_blocks = num_x * num_y;
 	size_t num_elements = r1 * r2;
 
-	size_t dim0_offset = r2;	
+	size_t dim0_offset = r2;
 
 	int * result_type = (int *) malloc(num_elements * sizeof(int));
 	size_t unpred_data_max_size = max_num_block_elements;
@@ -5562,7 +5565,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 				float fx = 0.0;
 				float fy = 0.0;
 				float f = 0;
-				float sum_x; 
+				float sum_x;
 				float curData;
 				for(size_t i=0; i<current_blockcount_x; i++){
 					sum_x = 0;
@@ -5645,8 +5648,8 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 	float precision[3], recip_precision[3];
 	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c;
 	//compute the recip_precision
-	recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c;	
-	
+	recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c;
+
 	for(int i=0; i<3; i++){
 		coeff_type[i] = coeff_result_type + i * num_blocks;
 		coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
@@ -5670,7 +5673,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 			for(size_t j=0; j<num_y; j++){
 				offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
 				current_blockcount_y = (j < split_index_y) ? early_blockcount_y : late_blockcount_y;
-				
+
 				/*sampling: decide which predictor to use (regression or lorenzo)*/
 				{
 					float * cur_data_pos;
@@ -5685,7 +5688,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 						cur_data_pos = data_pos + i * dim0_offset + i;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
 						err_sz += MIN(fabsf(pred_sz - curData) + noise, fabsf(mean - curData));
 						err_reg += fabsf(pred_reg - curData);
 
@@ -5693,9 +5696,9 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 						cur_data_pos = data_pos + i*dim0_offset + bmi;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];
 						err_sz += MIN(fabsf(pred_sz - curData) + noise, fabsf(mean - curData));
-						err_reg += fabsf(pred_reg - curData);								
+						err_reg += fabsf(pred_reg - curData);
 					}
 					use_reg = (err_reg < err_sz);
 				}
@@ -5714,11 +5717,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 								last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){	
+								if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){
 									coeff_type[e][coeff_index] = 0;
-									last_coeffcients[e] = cur_coeff;	
+									last_coeffcients[e] = cur_coeff;
 									coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-								}					
+								}
 							}
 							else{
 								coeff_type[e][coeff_index] = 0;
@@ -5746,18 +5749,18 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
 								pred = curData;
 								unpredictable_data[block_unpredictable_count ++] = curData;
 							}
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -5772,11 +5775,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5786,7 +5789,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 
 							// assign value to block surfaces
 							pb_pos[ii * strip_dim0_offset + jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						cur_data_pos += dim0_offset - current_blockcount_y;
@@ -5804,11 +5807,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5817,7 +5820,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 							}
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -5832,11 +5835,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -5849,13 +5852,13 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
 
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 					} // end ii == -1
 					unpredictable_count = block_unpredictable_count;
 					total_unpred += unpredictable_count;
-					unpredictable_data += unpredictable_count;					
+					unpredictable_data += unpredictable_count;
 					reg_count ++;
 				}// end use_reg
 				else{
@@ -5887,11 +5890,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 									*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 									if(type[index] <= intvRadius) type[index] -= 1;
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){	
+									if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){
 										type[index] = 0;
-										*cur_pb_pos = curData;	
+										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
-									}					
+									}
 								}
 								else{
 									type[index] = 0;
@@ -5927,11 +5930,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 									*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 									if(type[index] <= intvRadius) type[index] -= 1;
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){	
+									if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){
 										type[index] = 0;
-										*cur_pb_pos = curData;	
+										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
-									}					
+									}
 								}
 								else{
 									type[index] = 0;
@@ -5994,7 +5997,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 						cur_data_pos = data_pos + i * dim0_offset + i;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c];
 						err_sz += fabsf(pred_sz - curData) + noise;
 						err_reg += fabsf(pred_reg - curData);
 
@@ -6002,9 +6005,9 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 						cur_data_pos = data_pos + i*dim0_offset + bmi;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-dim0_offset] - cur_data_pos[-dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];
 						err_sz += fabsf(pred_sz - curData) + noise;
-						err_reg += fabsf(pred_reg - curData);								
+						err_reg += fabsf(pred_reg - curData);
 					}
 					use_reg = (err_reg < err_sz);
 				}
@@ -6023,11 +6026,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 								last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){	
+								if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){
 									coeff_type[e][coeff_index] = 0;
-									last_coeffcients[e] = cur_coeff;	
+									last_coeffcients[e] = cur_coeff;
 									coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-								}					
+								}
 							}
 							else{
 								coeff_type[e][coeff_index] = 0;
@@ -6055,18 +6058,18 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
 								pred = curData;
 								unpredictable_data[block_unpredictable_count ++] = curData;
 							}
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -6082,11 +6085,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -6096,7 +6099,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 
 							// assign value to block surfaces
 							pb_pos[ii * strip_dim0_offset + jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						cur_data_pos += dim0_offset - current_blockcount_y;
@@ -6114,11 +6117,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -6127,7 +6130,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 							}
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						/*dealing with the last jj (boundary)*/
@@ -6143,11 +6146,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - pred)>realPrecision){	
+								if(fabsf(curData - pred)>realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
@@ -6160,13 +6163,13 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 							// assign value to next prediction buffer
 							next_pb_pos[jj] = pred;
 
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 					} // end ii == -1
 					unpredictable_count = block_unpredictable_count;
 					total_unpred += unpredictable_count;
-					unpredictable_data += unpredictable_count;					
+					unpredictable_data += unpredictable_count;
 					reg_count ++;
 				}// end use_reg
 				else{
@@ -6191,11 +6194,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){	
+								if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){
 									type[index] = 0;
-									*cur_pb_pos = curData;	
+									*cur_pb_pos = curData;
 									unpredictable_data[unpredictable_count ++] = curData;
-								}					
+								}
 							}
 							else{
 								type[index] = 0;
@@ -6224,11 +6227,11 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 								type[index] = (int) (itvNum/2) + intvRadius;
 								*cur_pb_pos = pred2D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){	
+								if(fabsf(curData - *cur_pb_pos)>tmp_realPrecision){
 									type[index] = 0;
-									*cur_pb_pos = curData;	
+									*cur_pb_pos = curData;
 									unpredictable_data[unpredictable_count ++] = curData;
-								}					
+								}
 							}
 							else{
 								type[index] = 0;
@@ -6257,7 +6260,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 			tmp = cur_pb_buf;
 			cur_pb_buf = next_pb_buf;
 			next_pb_buf = tmp;
-		}// end i		
+		}// end i
 	}
 	free(prediction_buffer_1);
 	free(prediction_buffer_2);
@@ -6269,7 +6272,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 	size_t i = 0;
 	init(huffmanTree, result_type, num_elements);
 	for (i = 0; i < stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -6284,7 +6287,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 
 	sizeToBytes(result_pos, num_elements);
 	result_pos += exe_params->SZ_SIZE_TYPE;
-	
+
 	intToBytes_bigEndian(result_pos, block_size);
 	result_pos += sizeof(int);
 	floatToBytes(result_pos, realPrecision);
@@ -6306,8 +6309,8 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
-	//convert the lead/mid/resi to byte stream 	
+
+	//convert the lead/mid/resi to byte stream
 	if(reg_count>0){
 		for(int e=0; e<3; e++){
 			int stateNum = 2*coeff_intvCapacity_sz;
@@ -6316,7 +6319,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -6328,7 +6331,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -6353,19 +6356,19 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 	size_t typeArray_size = 0;
 	encode(huffmanTree, result_type, num_elements, result_pos, &typeArray_size);
 	result_pos += typeArray_size;
-	
+
 #ifdef HAVE_WRITESTATS
 	writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount);
 	writeBlockInfo(use_mean, block_size, reg_count, num_blocks);
 	writeUnpredictDataCounts(total_unpred, num_elements);
-#endif	
+#endif
 
 	size_t totalEncodeSize = result_pos - result;
 	free(indicator);
 	free(result_unpredictable_data);
 	free(result_type);
 	free(reg_params);
-	
+
 	SZ_ReleaseHuffman(huffmanTree);
 	*comp_size = totalEncodeSize;
 
@@ -6375,7 +6378,7 @@ unsigned char * SZ_compress_float_2D_MDQ_nonblocked_with_blocked_regression(floa
 
 
 unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, float * dense_pos, float * max_freq, float * mean_freq)
-{	
+{
 	float mean = 0.0;
 	size_t len = r1 * r2 * r3;
 	size_t mean_distance = (int) (sqrt(len));
@@ -6463,7 +6466,7 @@ unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData,
 		}
 		else data_pos += sampleDistance;
 		sample_count ++;
-	}	
+	}
 	*max_freq = freq_count * 1.0/ sample_count;
 
 	//compute the appropriate number
@@ -6507,7 +6510,7 @@ unsigned int optimize_intervals_float_3D_with_freq_and_dense_pos(float *oriData,
 // 3D:  modified for higher performance
 unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(float *oriData, size_t r1, size_t r2, size_t r3, float realPrecision, size_t * comp_size){
 
-#ifdef HAVE_TIMECMPR	
+#ifdef HAVE_TIMECMPR
 	float* decData = NULL;
 	if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 		decData = (float*)(multisteps->hist_data);
@@ -6543,7 +6546,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	size_t num_elements = r1 * r2 * r3;
 
 	size_t dim0_offset = r2 * r3;
-	size_t dim1_offset = r3;	
+	size_t dim1_offset = r3;
 
 	int * result_type = (int *) malloc(num_elements * sizeof(int));
 	memset(result_type, 0, num_elements*sizeof(int));
@@ -6573,7 +6576,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 				offset_x = (i < split_index_x) ? i * early_blockcount_x : i * late_blockcount_x + split_index_x;
 				offset_y = (j < split_index_y) ? j * early_blockcount_y : j * late_blockcount_y + split_index_y;
 				offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
-	
+
 				data_pos = oriData + offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
 				/*Calculate regression coefficients*/
 				{
@@ -6582,7 +6585,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 					float fy = 0.0;
 					float fz = 0.0;
 					float f = 0;
-					float sum_x, sum_y; 
+					float sum_x, sum_y;
 					float curData;
 					for(size_t i=0; i<current_blockcount_x; i++){
 						sum_x = 0;
@@ -6616,7 +6619,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 			}
 		}
 	}
-	
+
 	//Compress coefficient arrays
 	float precision_a, precision_b, precision_c, precision_d;
 	float rel_param_err = 0.025;
@@ -6630,7 +6633,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 		quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -6673,7 +6676,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	float * cur_pb_buf_pos;
 	float * next_pb_buf_pos;
 	int intvCapacity = quantization_intervals;// exe_params->intvCapacity;
-	int intvRadius = intvCapacity/2; //exe_params->intvRadius;	
+	int intvRadius = intvCapacity/2; //exe_params->intvRadius;
 	int use_reg = 0;
 	float noise = realPrecision * 1.22;
 
@@ -6689,7 +6692,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	float precision[4], recip_precision[4];
 	precision[0] = precision_a, precision[1] = precision_b, precision[2] = precision_c, precision[3] = precision_d;
 	recip_precision[0] = 1/precision_a, recip_precision[1] = 1/precision_b, recip_precision[2] = 1/precision_c, recip_precision[3] = 1/precision_d;
-	
+
 	for(int i=0; i<4; i++){
 		coeff_type[i] = coeff_result_type + i * num_blocks;
 		coeff_unpred_data[i] = coeff_unpredictable_data + i * num_blocks;
@@ -6737,7 +6740,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabsf(pred_sz - curData) + noise, fabsf(mean - curData));
 							err_reg += fabsf(pred_reg - curData);
 
@@ -6745,21 +6748,21 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabsf(pred_sz - curData) + noise, fabsf(mean - curData));
-							err_reg += fabsf(pred_reg - curData);								
+							err_reg += fabsf(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabsf(pred_sz - curData) + noise, fabsf(mean - curData));
-							err_reg += fabsf(pred_reg - curData);								
+							err_reg += fabsf(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabsf(pred_sz - curData) + noise, fabsf(mean - curData));
 							err_reg += fabsf(pred_reg - curData);
 						}
@@ -6779,11 +6782,11 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 									coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 									last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){	
+									if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){
 										coeff_type[e][coeff_index] = 0;
-										last_coeffcients[e] = cur_coeff;	
+										last_coeffcients[e] = cur_coeff;
 										coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-									}					
+									}
 								}
 								else{
 									coeff_type[e][coeff_index] = 0;
@@ -6804,7 +6807,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							for(size_t jj=0; jj<current_blockcount_y; jj++){
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabsf(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -6812,29 +6815,29 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabsf(curData - pred)>tmp_realPrecision){	
+										if(fabsf(curData - pred)>tmp_realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif									
-									
+#endif
+
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
 										pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos += dim1_offset - current_blockcount_z;
@@ -6848,7 +6851,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							for(size_t jj=0; jj<current_blockcount_y; jj++){
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabsf(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -6856,11 +6859,11 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabsf(curData - pred)>realPrecision){	
+										if(fabsf(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
@@ -6872,7 +6875,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif									
+#endif
 
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
@@ -6889,7 +6892,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 						unpredictable_count = block_unpredictable_count;
 						strip_unpredictable_count += unpredictable_count;
 						unpredictable_data += unpredictable_count;
-						
+
 						reg_count ++;
 					}
 					else{
@@ -6924,11 +6927,11 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 											*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 											if(type[index] <= intvRadius) type[index] -= 1;
 											//ganrantee comporession error against the case of machine-epsilon
-											if(fabsf(curData - *cur_pb_pos)>realPrecision){	
+											if(fabsf(curData - *cur_pb_pos)>realPrecision){
 												type[index] = 0;
-												*cur_pb_pos = curData;	
+												*cur_pb_pos = curData;
 												unpredictable_data[unpredictable_count ++] = curData;
-											}					
+											}
 										}
 										else{
 											type[index] = 0;
@@ -6940,8 +6943,8 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif																		
-									
+#endif
+
 									index ++;
 									cur_pb_pos ++;
 									cur_data_pos ++;
@@ -6976,11 +6979,11 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 											*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 											if(type[index] <= intvRadius) type[index] -= 1;
 											//ganrantee comporession error against the case of machine-epsilon
-											if(fabsf(curData - *cur_pb_pos)>realPrecision){	
+											if(fabsf(curData - *cur_pb_pos)>realPrecision){
 												type[index] = 0;
-												*cur_pb_pos = curData;	
+												*cur_pb_pos = curData;
 												unpredictable_data[unpredictable_count ++] = curData;
-											}					
+											}
 										}
 										else{
 											type[index] = 0;
@@ -6993,8 +6996,8 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif																		
-									
+#endif
+
 									next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
 									index ++;
 									cur_pb_pos ++;
@@ -7009,7 +7012,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 						// change indicator
 						indicator_pos[k] = 1;
 					}// end SZ
-					
+
 					reg_params_pos ++;
 					data_pos += current_blockcount_z;
 					pb_pos += current_blockcount_z;
@@ -7059,7 +7062,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 					size_t offset_z = 0;
 					offset_z = (k < split_index_z) ? k * early_blockcount_z : k * late_blockcount_z + split_index_z;
 					size_t block_offset = offset_x * dim0_offset + offset_y * dim1_offset + offset_z;
-#endif														
+#endif
 					/*sampling*/
 					{
 						// sample point [1, 1, 1] [1, 1, 4] [1, 4, 1] [1, 4, 4] [4, 1, 1] [4, 1, 4] [4, 4, 1] [4, 4, 4]
@@ -7073,7 +7076,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += fabsf(pred_sz - curData) + noise;
 							err_reg += fabsf(pred_reg - curData);
 
@@ -7081,21 +7084,21 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							cur_data_pos = data_pos + i*dim0_offset + i*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * i + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabsf(pred_sz - curData) + noise;
-							err_reg += fabsf(pred_reg - curData);								
+							err_reg += fabsf(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * i + reg_params_pos[params_offset_d];
 							err_sz += fabsf(pred_sz - curData) + noise;
-							err_reg += fabsf(pred_reg - curData);								
+							err_reg += fabsf(pred_reg - curData);
 
 							cur_data_pos = data_pos + i*dim0_offset + bmi*dim1_offset + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-dim1_offset]+ cur_data_pos[-dim0_offset] - cur_data_pos[-dim1_offset - 1] - cur_data_pos[-dim0_offset - 1] - cur_data_pos[-dim0_offset - dim1_offset] + cur_data_pos[-dim0_offset - dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * i + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabsf(pred_sz - curData) + noise;
 							err_reg += fabsf(pred_reg - curData);
 						}
@@ -7117,11 +7120,11 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 									coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 									last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){	
+									if(fabsf(cur_coeff - last_coeffcients[e])>precision[e]){
 										coeff_type[e][coeff_index] = 0;
-										last_coeffcients[e] = cur_coeff;	
+										last_coeffcients[e] = cur_coeff;
 										coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-									}					
+									}
 								}
 								else{
 									coeff_type[e][coeff_index] = 0;
@@ -7143,7 +7146,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabsf(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -7151,11 +7154,11 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabsf(curData - pred)>realPrecision){	
+										if(fabsf(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
@@ -7167,14 +7170,14 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif																		
+#endif
 
 
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
 										pb_pos[ii * strip_dim0_offset + jj * strip_dim1_offset + kk] = pred;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos += dim1_offset - current_blockcount_z;
@@ -7188,7 +7191,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 							for(size_t jj=0; jj<current_blockcount_y; jj++){
 								for(size_t kk=0; kk<current_blockcount_z; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabsf(diff)*recip_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -7196,23 +7199,23 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabsf(curData - pred)>realPrecision){	
+										if(fabsf(curData - pred)>realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = pred;
-#endif																											
+#endif
 
 									if((jj == current_blockcount_y - 1) || (kk == current_blockcount_z - 1)){
 										// assign value to block surfaces
@@ -7228,7 +7231,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 						}
 						unpredictable_count = block_unpredictable_count;
 						strip_unpredictable_count += unpredictable_count;
-						unpredictable_data += unpredictable_count;						
+						unpredictable_data += unpredictable_count;
 						reg_count ++;
 					}
 					else{
@@ -7255,23 +7258,23 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 										type[index] = (int) (itvNum/2) + intvRadius;
 										*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabsf(curData - *cur_pb_pos)>realPrecision){	
+										if(fabsf(curData - *cur_pb_pos)>realPrecision){
 											type[index] = 0;
-											*cur_pb_pos = curData;	
+											*cur_pb_pos = curData;
 											unpredictable_data[unpredictable_count ++] = curData;
-										}					
+										}
 									}
 									else{
 										type[index] = 0;
 										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif																											
+#endif
 									index ++;
 									cur_pb_pos ++;
 									cur_data_pos ++;
@@ -7298,25 +7301,25 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 										type[index] = (int) (itvNum/2) + intvRadius;
 										*cur_pb_pos = pred3D + 2 * (type[index] - intvRadius) * realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabsf(curData - *cur_pb_pos)>realPrecision){	
+										if(fabsf(curData - *cur_pb_pos)>realPrecision){
 											type[index] = 0;
-											*cur_pb_pos = curData;	
+											*cur_pb_pos = curData;
 											unpredictable_data[unpredictable_count ++] = curData;
-										}					
+										}
 									}
 									else{
 										type[index] = 0;
 										*cur_pb_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
 									}
-									
+
 #ifdef HAVE_TIMECMPR
 									size_t ii = current_blockcount_x - 1;
 									size_t point_offset = ii*dim0_offset + jj*dim1_offset + kk;
 									if(confparams_cpr->szMode == SZ_TEMPORAL_COMPRESSION)
 										decData[block_offset + point_offset] = *cur_pb_pos;
-#endif																											
-									
+#endif
+
 									// assign value to next prediction buffer
 									next_pb_pos[jj * strip_dim1_offset + kk] = *cur_pb_pos;
 									index ++;
@@ -7332,7 +7335,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 						// change indicator
 						indicator_pos[k] = 1;
 					}// end SZ
-					
+
 					reg_params_pos ++;
 					data_pos += current_blockcount_z;
 					pb_pos += current_blockcount_z;
@@ -7364,7 +7367,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	init(huffmanTree, result_type, num_elements);
 	size_t i = 0;
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -7375,9 +7378,9 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(float) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
 	unsigned char * result_pos = result;
 	initRandomAccessBytes(result_pos);
-	
+
 	result_pos += meta_data_offset;
-	
+
 	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
 	result_pos += exe_params->SZ_SIZE_TYPE;
 
@@ -7399,12 +7402,12 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	result_pos += sizeof(unsigned char);
 	memcpy(result_pos, &mean, sizeof(float));
 	result_pos += sizeof(float);
-		
+
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
+
 	//convert the lead/mid/resi to byte stream
-	
+
 	//printf("reg_count = %d, num_blocks = %d\n", reg_count, num_blocks);
 	if(reg_count > 0){
 		for(int e=0; e<4; e++){
@@ -7414,7 +7417,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -7426,7 +7429,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -7442,7 +7445,7 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	}
 	free(coeff_result_type);
 	free(coeff_unpredictable_data);
-	
+
 	//record the number of unpredictable data and also store them
 	memcpy(result_pos, &total_unpred, sizeof(size_t));
 	result_pos += sizeof(size_t);
@@ -7461,8 +7464,8 @@ unsigned char * SZ_compress_float_3D_MDQ_nonblocked_with_blocked_regression(floa
 	writeHuffmanInfo(treeByteSize, typeArray_size, num_elements*sizeof(float), nodeCount);
 	writeBlockInfo(use_mean, block_size, reg_count, num_blocks);
 	writeUnpredictDataCounts(total_unpred, num_elements);
-#endif	
-	
+#endif
+
 	SZ_ReleaseHuffman(huffmanTree);
 	*comp_size = totalEncodeSize;
 	return result;
@@ -7489,7 +7492,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	size_t num_elements = r1 * r2 * r3;
 
 	size_t dim0_offset = r2 * r3;
-	size_t dim1_offset = r3;	
+	size_t dim1_offset = r3;
 
 	int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int));
 	size_t unpred_data_max_size = max_num_block_elements;
@@ -7536,7 +7539,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 					float fy = 0.0;
 					float fz = 0.0;
 					float f = 0;
-					float sum_x, sum_y; 
+					float sum_x, sum_y;
 					float curData;
 					for(size_t i=0; i<block_size; i++){
 						sum_x = 0;
@@ -7564,7 +7567,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 			}
 		}
 	}
-	
+
 	//Compress coefficient arrays
 	double precision_a, precision_b, precision_c, precision_d;
 	float rel_param_err = 0.025;
@@ -7578,7 +7581,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 		quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -7607,7 +7610,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	unsigned char * indicator_pos = indicator;
 
 	int intvCapacity = quantization_intervals; //exe_params->intvCapacity;
-	int intvRadius = intvCapacity/2; //exe_params->intvRadius;	
+	int intvRadius = intvCapacity/2; //exe_params->intvRadius;
 	int use_reg = 0;
 	float noise = realPrecision * 1.22;
 
@@ -7673,7 +7676,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 							err_reg += fabs(pred_reg - curData);
 
@@ -7681,25 +7684,25 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 							err_reg += fabs(pred_reg - curData);
 						}
-						
+
 						use_reg = (err_reg < err_sz);
 					}
 					if(use_reg){
@@ -7716,11 +7719,11 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 									coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 									last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 										coeff_type[e][coeff_index] = 0;
-										last_coeffcients[e] = cur_coeff;	
+										last_coeffcients[e] = cur_coeff;
 										coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-									}					
+									}
 								}
 								else{
 									coeff_type[e][coeff_index] = 0;
@@ -7741,7 +7744,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							for(size_t jj=0; jj<block_size; jj++){
 								for(size_t kk=0; kk<block_size; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabs(diff)/tmp_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -7749,27 +7752,27 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>tmp_realPrecision){	
+										if(fabs(curData - pred)>tmp_realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos ++;
 							}
 							cur_data_pos += pred_buffer_block_size;
 						}
-						
+
 						total_unpred += block_unpredictable_count;
-						unpredictable_data += block_unpredictable_count;						
+						unpredictable_data += block_unpredictable_count;
 						reg_count ++;
 					}
 					else{
@@ -7801,11 +7804,11 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 											type[index] = (int) (itvNum/2) + intvRadius;
 											*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 											//ganrantee comporession error against the case of machine-epsilon
-											if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+											if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 												type[index] = 0;
-												*cur_data_pos = curData;	
+												*cur_data_pos = curData;
 												unpredictable_data[unpredictable_count ++] = curData;
-											}					
+											}
 										}
 										else{
 											type[index] = 0;
@@ -7871,7 +7874,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
 							err_reg += fabs(pred_reg - curData);
 
@@ -7879,25 +7882,25 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + bmi*pred_buffer_block_size + bmi;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
 							err_reg += fabs(pred_reg - curData);
 						}
-						
+
 						use_reg = (err_reg < err_sz);
 
 					}
@@ -7916,11 +7919,11 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 									coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 									last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+									if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 										coeff_type[e][coeff_index] = 0;
-										last_coeffcients[e] = cur_coeff;	
+										last_coeffcients[e] = cur_coeff;
 										coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-									}					
+									}
 								}
 								else{
 									coeff_type[e][coeff_index] = 0;
@@ -7941,7 +7944,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							for(size_t jj=0; jj<block_size; jj++){
 								for(size_t kk=0; kk<block_size; kk++){
 									curData = *cur_data_pos;
-									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];									
+									pred = last_coeffcients[0] * ii + last_coeffcients[1] * jj + last_coeffcients[2] * kk + last_coeffcients[3];
 									diff = curData - pred;
 									itvNum = fabs(diff)/tmp_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -7949,18 +7952,18 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>tmp_realPrecision){	
+										if(fabs(curData - pred)>tmp_realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos ++;
@@ -7968,7 +7971,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 							cur_data_pos += pred_buffer_block_size;
 						}
 						total_unpred += block_unpredictable_count;
-						unpredictable_data += block_unpredictable_count;						
+						unpredictable_data += block_unpredictable_count;
 						reg_count ++;
 					}
 					else{
@@ -7993,11 +7996,11 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 										type[index] = (int) (itvNum/2) + intvRadius;
 										*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+										if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 											type[index] = 0;
-											*cur_data_pos = curData;	
+											*cur_data_pos = curData;
 											unpredictable_data[unpredictable_count ++] = curData;
-										}					
+										}
 									}
 									else{
 										type[index] = 0;
@@ -8015,7 +8018,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 						unpredictable_data += unpredictable_count;
 						// change indicator
 						indicator_pos[k] = 1;
-					}// end SZ					
+					}// end SZ
 					reg_params_pos ++;
 					type += block_size * block_size * block_size;
 				}
@@ -8031,7 +8034,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
 	size_t i = 0;
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -8042,9 +8045,9 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int)+ num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
 	unsigned char * result_pos = result;
 	initRandomAccessBytes(result_pos);
-	
+
 	result_pos += meta_data_offset;
-	
+
 	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
 	result_pos += exe_params->SZ_SIZE_TYPE;
 
@@ -8068,7 +8071,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	result_pos += sizeof(float);
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
+
 	//convert the lead/mid/resi to byte stream
 	if(reg_count > 0){
 		for(int e=0; e<4; e++){
@@ -8078,7 +8081,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -8090,7 +8093,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -8106,7 +8109,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	}
 	free(coeff_result_type);
 	free(coeff_unpredictable_data);
-	
+
 	//record the number of unpredictable data and also store them
 	memcpy(result_pos, &total_unpred, sizeof(size_t));
 	result_pos += sizeof(size_t);
@@ -8121,7 +8124,7 @@ unsigned char * SZ_compress_float_3D_MDQ_random_access_with_blocked_regression(f
 	free(result_type);
 	free(reg_params);
 
-	
+
 	SZ_ReleaseHuffman(huffmanTree);
 	*comp_size = totalEncodeSize;
 	return result;
@@ -8192,7 +8195,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 		quantization_intervals = optimize_intervals_float_1D_with_freq_and_dense_pos(oriData, r1, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -8220,7 +8223,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 	unsigned char * indicator_pos = indicator;
 
 	int intvCapacity = quantization_intervals; //exe_params->intvCapacity;
-	int intvRadius = intvCapacity/2; //exe_params->intvRadius;	
+	int intvRadius = intvCapacity/2; //exe_params->intvRadius;
 	float noise = realPrecision * 0.5;
 	reg_params_pos = reg_params;
 
@@ -8248,9 +8251,9 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 					cur_data_pos = pred_buffer + i;
 					curData = *cur_data_pos;
 					pred_sz = cur_data_pos[-1];
-					pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b];							
+					pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b];
 					err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-					err_reg += fabs(pred_reg - curData);								
+					err_reg += fabs(pred_reg - curData);
 				}
 				*indicator_pos = !(err_reg < err_sz);
 			}
@@ -8279,9 +8282,9 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 					cur_data_pos = pred_buffer + i;
 					curData = *cur_data_pos;
 					pred_sz = cur_data_pos[-1];
-					pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b];							
+					pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b];
 					err_sz += fabs(pred_sz - curData) + noise;
-					err_reg += fabs(pred_reg - curData);								
+					err_reg += fabs(pred_reg - curData);
 				}
 				*indicator_pos = !(err_reg < err_sz);
 			}
@@ -8336,11 +8339,11 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 				coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 				last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 				//ganrantee compression error against the case of machine-epsilon
-				if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+				if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 					coeff_type[e][coeff_index] = 0;
-					last_coeffcients[e] = cur_coeff;	
+					last_coeffcients[e] = cur_coeff;
 					coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-				}					
+				}
 			}
 			else{
 				coeff_type[e][coeff_index] = 0;
@@ -8379,7 +8382,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 				float * cur_data_pos = pred_buffer + 1;
 				for(size_t ii=0; ii<block_size; ii++){
 					curData = *cur_data_pos;
-					pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b];									
+					pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b];
 					diff = curData - pred;
 					itvNum = fabs(diff)/tmp_realPrecision + 1;
 					if (itvNum < intvCapacity){
@@ -8387,11 +8390,11 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 						type[index] = (int) (itvNum/2) + intvRadius;
 						pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 						//ganrantee comporession error against the case of machine-epsilon
-						if(fabs(curData - pred)>tmp_realPrecision){	
+						if(fabs(curData - pred)>tmp_realPrecision){
 							type[index] = 0;
 							pred = curData;
 							unpredictable_data[block_unpredictable_count ++] = curData;
-						}		
+						}
 					}
 					else{
 						type[index] = 0;
@@ -8431,11 +8434,11 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 							type[index] = (int) (itvNum/2) + intvRadius;
 							*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 							//ganrantee comporession error against the case of machine-epsilon
-							if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+							if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 								type[index] = 0;
-								*cur_data_pos = curData;	
+								*cur_data_pos = curData;
 								unpredictable_data[unpredictable_count ++] = curData;
-							}					
+							}
 						}
 						else{
 							type[index] = 0;
@@ -8478,7 +8481,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 				float * cur_data_pos = pred_buffer + 1;
 				for(size_t ii=0; ii<block_size; ii++){
 					curData = *cur_data_pos;
-					pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b];									
+					pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b];
 					diff = curData - pred;
 					itvNum = fabs(diff)/tmp_realPrecision + 1;
 					if (itvNum < intvCapacity){
@@ -8486,11 +8489,11 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 						type[index] = (int) (itvNum/2) + intvRadius;
 						pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 						//ganrantee comporession error against the case of machine-epsilon
-						if(fabs(curData - pred)>tmp_realPrecision){	
+						if(fabs(curData - pred)>tmp_realPrecision){
 							type[index] = 0;
 							pred = curData;
 							unpredictable_data[block_unpredictable_count ++] = curData;
-						}		
+						}
 					}
 					else{
 						type[index] = 0;
@@ -8515,7 +8518,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 				double itvNum, diff;
 				size_t index = 0;
 				for(size_t ii=0; ii<block_size; ii++){
-					curData = *cur_data_pos;					
+					curData = *cur_data_pos;
 					pred3D = cur_data_pos[-1];
 					diff = curData - pred3D;
 					itvNum = fabs(diff)/realPrecision + 1;
@@ -8524,11 +8527,11 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 						type[index] = (int) (itvNum/2) + intvRadius;
 						*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 						//ganrantee comporession error against the case of machine-epsilon
-						if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+						if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 							type[index] = 0;
-							*cur_data_pos = curData;	
+							*cur_data_pos = curData;
 							unpredictable_data[unpredictable_count ++] = curData;
-						}					
+						}
 					}
 					else{
 						type[index] = 0;
@@ -8546,7 +8549,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 			type += block_size;
 			indicator_pos ++;
 		}// end i
-	}	
+	}
 	free(pred_buffer);
 	int stateNum = 2*quantization_intervals;
 	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
@@ -8555,7 +8558,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
 	size_t i = 0;
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -8566,9 +8569,9 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 	unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize +4*num_blocks*sizeof(int) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
 	unsigned char * result_pos = result;
 	initRandomAccessBytes(result_pos);
-	
+
 	result_pos += meta_data_offset;
-	
+
 	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
 	result_pos += exe_params->SZ_SIZE_TYPE;
 
@@ -8592,7 +8595,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 	result_pos += sizeof(float);
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
+
 	//convert the lead/mid/resi to byte stream
 	if(reg_count > 0){
 		for(int e=0; e<2; e++){
@@ -8602,7 +8605,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -8614,7 +8617,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -8630,7 +8633,7 @@ unsigned char * SZ_compress_float_1D_MDQ_decompression_random_access_with_blocke
 	}
 	free(coeff_result_type);
 	free(coeff_unpredictable_data);
-	
+
 	//record the number of unpredictable data and also store them
 	memcpy(result_pos, &total_unpred, sizeof(size_t));
 	result_pos += sizeof(size_t);
@@ -8744,7 +8747,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 				float fx = 0.0;
 				float fy = 0.0;
 				float f = 0;
-				float sum_x; 
+				float sum_x;
 				float curData;
 				for(size_t i=0; i<block_size; i++){
 					sum_x = 0;
@@ -8770,7 +8773,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 		quantization_intervals = optimize_intervals_float_2D_with_freq_and_dense_pos(oriData, r1, r2, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -8798,7 +8801,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 	unsigned char * indicator_pos = indicator;
 
 	int intvCapacity = quantization_intervals; //exe_params->intvCapacity;
-	int intvRadius = intvCapacity/2; //exe_params->intvRadius;	
+	int intvRadius = intvCapacity/2; //exe_params->intvRadius;
 	float noise = realPrecision * 0.81;
 	reg_params_pos = reg_params;
 
@@ -8836,7 +8839,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 						cur_data_pos = pred_buffer + i*pred_buffer_block_size + i;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c];
 						err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 						err_reg += fabs(pred_reg - curData);
 
@@ -8844,9 +8847,9 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 						cur_data_pos = pred_buffer + i*pred_buffer_block_size + (bmi+1);
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];
 						err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-						err_reg += fabs(pred_reg - curData);								
+						err_reg += fabs(pred_reg - curData);
 					}
 					*indicator_pos = !(err_reg < err_sz);
 				}
@@ -8884,7 +8887,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 						cur_data_pos = pred_buffer + i*pred_buffer_block_size + i;
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c];
 						err_sz += fabs(pred_sz - curData) + noise;
 						err_reg += fabs(pred_reg - curData);
 
@@ -8892,9 +8895,9 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 						cur_data_pos = pred_buffer + i*pred_buffer_block_size + (bmi+1);
 						curData = *cur_data_pos;
 						pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim0_offset - 1];
-						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];							
+						pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c];
 						err_sz += fabs(pred_sz - curData) + noise;
-						err_reg += fabs(pred_reg - curData);								
+						err_reg += fabs(pred_reg - curData);
 					}
 					*indicator_pos = !(err_reg < err_sz);
 				}
@@ -8952,11 +8955,11 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 				coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 				last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 				//ganrantee compression error against the case of machine-epsilon
-				if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+				if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 					coeff_type[e][coeff_index] = 0;
-					last_coeffcients[e] = cur_coeff;	
+					last_coeffcients[e] = cur_coeff;
 					coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-				}					
+				}
 			}
 			else{
 				coeff_type[e][coeff_index] = 0;
@@ -9003,7 +9006,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 					for(size_t ii=0; ii<block_size; ii++){
 						for(size_t jj=0; jj<block_size; jj++){
 							curData = *cur_data_pos;
-							pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c];									
+							pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c];
 							diff = curData - pred;
 							itvNum = fabs(diff)/tmp_realPrecision + 1;
 							if (itvNum < intvCapacity){
@@ -9011,18 +9014,18 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>tmp_realPrecision){	
+								if(fabs(curData - pred)>tmp_realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
 								pred = curData;
 								unpredictable_data[block_unpredictable_count ++] = curData;
 							}
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						cur_data_pos ++;
@@ -9058,11 +9061,11 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 									type[index] = (int) (itvNum/2) + intvRadius;
 									*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 									//ganrantee comporession error against the case of machine-epsilon
-									if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+									if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 										type[index] = 0;
-										*cur_data_pos = curData;	
+										*cur_data_pos = curData;
 										unpredictable_data[unpredictable_count ++] = curData;
-									}					
+									}
 								}
 								else{
 									type[index] = 0;
@@ -9116,7 +9119,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 					for(size_t ii=0; ii<block_size; ii++){
 						for(size_t jj=0; jj<block_size; jj++){
 							curData = *cur_data_pos;
-							pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c];									
+							pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c];
 							diff = curData - pred;
 							itvNum = fabs(diff)/tmp_realPrecision + 1;
 							if (itvNum < intvCapacity){
@@ -9124,18 +9127,18 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 								type[index] = (int) (itvNum/2) + intvRadius;
 								pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - pred)>tmp_realPrecision){	
+								if(fabs(curData - pred)>tmp_realPrecision){
 									type[index] = 0;
 									pred = curData;
 									unpredictable_data[block_unpredictable_count ++] = curData;
-								}		
+								}
 							}
 							else{
 								type[index] = 0;
 								pred = curData;
 								unpredictable_data[block_unpredictable_count ++] = curData;
 							}
-							index ++;	
+							index ++;
 							cur_data_pos ++;
 						}
 						cur_data_pos ++;
@@ -9165,11 +9168,11 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 								type[index] = (int) (itvNum/2) + intvRadius;
 								*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 								//ganrantee comporession error against the case of machine-epsilon
-								if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+								if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 									type[index] = 0;
-									*cur_data_pos = curData;	
+									*cur_data_pos = curData;
 									unpredictable_data[unpredictable_count ++] = curData;
-								}					
+								}
 							}
 							else{
 								type[index] = 0;
@@ -9190,7 +9193,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 				indicator_pos ++;
 			}// end j
 		}// end i
-	}	
+	}
 
 	free(pred_buffer);
 	int stateNum = 2*quantization_intervals;
@@ -9200,7 +9203,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
 	size_t i = 0;
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -9211,9 +9214,9 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 	unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int) +num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
 	unsigned char * result_pos = result;
 	initRandomAccessBytes(result_pos);
-	
+
 	result_pos += meta_data_offset;
-	
+
 	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
 	result_pos += exe_params->SZ_SIZE_TYPE;
 
@@ -9237,7 +9240,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 	result_pos += sizeof(float);
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
+
 	//convert the lead/mid/resi to byte stream
 	if(reg_count > 0){
 		for(int e=0; e<3; e++){
@@ -9247,7 +9250,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -9259,7 +9262,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -9275,7 +9278,7 @@ unsigned char * SZ_compress_float_2D_MDQ_decompression_random_access_with_blocke
 	}
 	free(coeff_result_type);
 	free(coeff_unpredictable_data);
-	
+
 	//record the number of unpredictable data and also store them
 	memcpy(result_pos, &total_unpred, sizeof(size_t));
 	result_pos += sizeof(size_t);
@@ -9355,7 +9358,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	size_t num_elements = r1 * r2 * r3;
 
 	size_t dim0_offset = r2 * r3;
-	size_t dim1_offset = r3;	
+	size_t dim1_offset = r3;
 
 	int * result_type = (int *) malloc(num_blocks*max_num_block_elements * sizeof(int));
 	size_t unpred_data_max_size = max_num_block_elements;
@@ -9402,7 +9405,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 					float fy = 0.0;
 					float fz = 0.0;
 					float f = 0;
-					float sum_x, sum_y; 
+					float sum_x, sum_y;
 					float curData;
 					for(size_t i=0; i<block_size; i++){
 						sum_x = 0;
@@ -9430,13 +9433,13 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 			}
 		}
 	}
-	
+
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_float_3D_with_freq_and_dense_pos(oriData, r1, r2, r3, realPrecision, &dense_pos, &sz_sample_correct_freq, &mean_flush_freq);
 		if(mean_flush_freq > 0.5 || mean_flush_freq > sz_sample_correct_freq) use_mean = 1;
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -9464,7 +9467,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	unsigned char * indicator_pos = indicator;
 
 	int intvCapacity = quantization_intervals; //exe_params->intvCapacity;
-	int intvRadius = intvCapacity/2; //exe_params->intvRadius;	
+	int intvRadius = intvCapacity/2; //exe_params->intvRadius;
 	float noise = realPrecision * 1.22;
 	reg_params_pos = reg_params;
 
@@ -9511,7 +9514,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 							err_reg += fabs(pred_reg - curData);
 
@@ -9519,21 +9522,21 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + (bmi+1);
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + (bmi+1)*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + (bmi+1)*pred_buffer_block_size + (bmi+1);
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += MIN(fabs(pred_sz - curData) + noise, fabs(mean - curData));
 							err_reg += fabs(pred_reg - curData);
 						}
@@ -9583,7 +9586,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
 							err_reg += fabs(pred_reg - curData);
 
@@ -9591,21 +9594,21 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + i*pred_buffer_block_size + (bmi+1);
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * (i-1) + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + (bmi+1)*pred_buffer_block_size + i;
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * (i-1) + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
-							err_reg += fabs(pred_reg - curData);								
+							err_reg += fabs(pred_reg - curData);
 
 							cur_data_pos = pred_buffer + i*pred_buffer_block_size*pred_buffer_block_size + (bmi+1)*pred_buffer_block_size + (bmi+1);
 							curData = *cur_data_pos;
 							pred_sz = cur_data_pos[-1] + cur_data_pos[-strip_dim1_offset]+ cur_data_pos[-strip_dim0_offset] - cur_data_pos[-strip_dim1_offset - 1] - cur_data_pos[-strip_dim0_offset - 1] - cur_data_pos[-strip_dim0_offset - strip_dim1_offset] + cur_data_pos[-strip_dim0_offset - strip_dim1_offset - 1];
-							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];							
+							pred_reg = reg_params_pos[0] * (i-1) + reg_params_pos[params_offset_b] * bmi + reg_params_pos[params_offset_c] * bmi + reg_params_pos[params_offset_d];
 							err_sz += fabs(pred_sz - curData) + noise;
 							err_reg += fabs(pred_reg - curData);
 						}
@@ -9669,11 +9672,11 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 				coeff_type[e][coeff_index] = (int) (itvNum/2) + coeff_intvRadius;
 				last_coeffcients[e] = last_coeffcients[e] + 2 * (coeff_type[e][coeff_index] - coeff_intvRadius) * precision[e];
 				//ganrantee compression error against the case of machine-epsilon
-				if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){	
+				if(fabs(cur_coeff - last_coeffcients[e])>precision[e]){
 					coeff_type[e][coeff_index] = 0;
-					last_coeffcients[e] = cur_coeff;	
+					last_coeffcients[e] = cur_coeff;
 					coeff_unpred_data[e][coeff_unpredictable_count[e] ++] = cur_coeff;
-				}					
+				}
 			}
 			else{
 				coeff_type[e][coeff_index] = 0;
@@ -9728,7 +9731,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 							for(size_t jj=0; jj<block_size; jj++){
 								for(size_t kk=0; kk<block_size; kk++){
 									curData = *cur_data_pos;
-									pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c] * kk + reg_params_pos[params_offset_d];									
+									pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c] * kk + reg_params_pos[params_offset_d];
 									diff = curData - pred;
 									itvNum = fabs(diff)/tmp_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -9736,18 +9739,18 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>tmp_realPrecision){	
+										if(fabs(curData - pred)>tmp_realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos ++;
@@ -9788,11 +9791,11 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 											type[index] = (int) (itvNum/2) + intvRadius;
 											*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 											//ganrantee comporession error against the case of machine-epsilon
-											if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+											if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 												type[index] = 0;
-												*cur_data_pos = curData;	
+												*cur_data_pos = curData;
 												unpredictable_data[unpredictable_count ++] = curData;
-											}					
+											}
 										}
 										else{
 											type[index] = 0;
@@ -9858,7 +9861,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 							for(size_t jj=0; jj<block_size; jj++){
 								for(size_t kk=0; kk<block_size; kk++){
 									curData = *cur_data_pos;
-									pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c] * kk + reg_params_pos[params_offset_d];									
+									pred = reg_params_pos[0] * ii + reg_params_pos[params_offset_b] * jj + reg_params_pos[params_offset_c] * kk + reg_params_pos[params_offset_d];
 									diff = curData - pred;
 									itvNum = fabs(diff)/tmp_realPrecision + 1;
 									if (itvNum < intvCapacity){
@@ -9866,18 +9869,18 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 										type[index] = (int) (itvNum/2) + intvRadius;
 										pred = pred + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - pred)>tmp_realPrecision){	
+										if(fabs(curData - pred)>tmp_realPrecision){
 											type[index] = 0;
 											pred = curData;
 											unpredictable_data[block_unpredictable_count ++] = curData;
-										}		
+										}
 									}
 									else{
 										type[index] = 0;
 										pred = curData;
 										unpredictable_data[block_unpredictable_count ++] = curData;
 									}
-									index ++;	
+									index ++;
 									cur_data_pos ++;
 								}
 								cur_data_pos ++;
@@ -9886,7 +9889,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 						}
 						reg_params_pos ++;
 						total_unpred += block_unpredictable_count;
-						unpredictable_data += block_unpredictable_count;						
+						unpredictable_data += block_unpredictable_count;
 						*blockwise_unpred_count_pos = block_unpredictable_count;
 					}
 					else{
@@ -9911,11 +9914,11 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 										type[index] = (int) (itvNum/2) + intvRadius;
 										*cur_data_pos = pred3D + 2 * (type[index] - intvRadius) * tmp_realPrecision;
 										//ganrantee comporession error against the case of machine-epsilon
-										if(fabs(curData - *cur_data_pos)>tmp_realPrecision){	
+										if(fabs(curData - *cur_data_pos)>tmp_realPrecision){
 											type[index] = 0;
-											*cur_data_pos = curData;	
+											*cur_data_pos = curData;
 											unpredictable_data[unpredictable_count ++] = curData;
-										}					
+										}
 									}
 									else{
 										type[index] = 0;
@@ -9932,14 +9935,14 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 						total_unpred += unpredictable_count;
 						unpredictable_data += unpredictable_count;
 						*blockwise_unpred_count_pos = unpredictable_count;
-					}// end SZ	
+					}// end SZ
 					blockwise_unpred_count_pos ++;
 					type += block_size * block_size * block_size;
 				}
 				indicator_pos += num_z;
 			}
 		}
-	}	
+	}
 
 	free(pred_buffer);
 	int stateNum = 2*quantization_intervals;
@@ -9949,7 +9952,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	init(huffmanTree, result_type, num_blocks*max_num_block_elements);
 	size_t i = 0;
 	for (i = 0; i < huffmanTree->stateNum; i++)
-		if (huffmanTree->code[i]) nodeCount++; 
+		if (huffmanTree->code[i]) nodeCount++;
 	nodeCount = nodeCount*2-1;
 
 	unsigned char *treeBytes;
@@ -9960,9 +9963,9 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	unsigned char * result = (unsigned char *) calloc(meta_data_offset + exe_params->SZ_SIZE_TYPE + sizeof(double) + sizeof(int) + sizeof(int) + 5*treeByteSize + 4*num_blocks*sizeof(int)+num_blocks * sizeof(unsigned short) + num_blocks * sizeof(unsigned short) + num_blocks * sizeof(float) + total_unpred * sizeof(float) + num_elements * sizeof(int), 1);
 	unsigned char * result_pos = result;
 	initRandomAccessBytes(result_pos);
-	
+
 	result_pos += meta_data_offset;
-	
+
 	sizeToBytes(result_pos,num_elements); //SZ_SIZE_TYPE: 4 or 8
 	result_pos += exe_params->SZ_SIZE_TYPE;
 
@@ -9986,7 +9989,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	result_pos += sizeof(float);
 	size_t indicator_size = convertIntArray2ByteArray_fast_1b_to_result(indicator, num_blocks, result_pos);
 	result_pos += indicator_size;
-	
+
 	//convert the lead/mid/resi to byte stream
 	if(reg_count > 0){
 		for(int e=0; e<4; e++){
@@ -9996,7 +9999,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 			init(huffmanTree, coeff_type[e], reg_count);
 			size_t i = 0;
 			for (i = 0; i < huffmanTree->stateNum; i++)
-				if (huffmanTree->code[i]) nodeCount++; 
+				if (huffmanTree->code[i]) nodeCount++;
 			nodeCount = nodeCount*2-1;
 			unsigned char *treeBytes;
 			unsigned int treeByteSize = convert_HuffTree_to_bytes_anyStates(huffmanTree, nodeCount, &treeBytes);
@@ -10008,7 +10011,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 			result_pos += sizeof(int);
 			intToBytes_bigEndian(result_pos, nodeCount);
 			result_pos += sizeof(int);
-			memcpy(result_pos, treeBytes, treeByteSize);		
+			memcpy(result_pos, treeBytes, treeByteSize);
 			result_pos += treeByteSize;
 			free(treeBytes);
 			size_t typeArray_size = 0;
@@ -10024,7 +10027,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	}
 	free(coeff_result_type);
 	free(coeff_unpredictable_data);
-	
+
 	//record the number of unpredictable data and also store them
 	memcpy(result_pos, &total_unpred, sizeof(size_t));
 	result_pos += sizeof(size_t);
@@ -10052,7 +10055,7 @@ unsigned char * SZ_compress_float_3D_MDQ_decompression_random_access_with_blocke
 	unsigned short * type_array_block_size_pos = type_array_block_size;
 	for(size_t i=0; i<num_x; i++){
 		for(size_t j=0; j<num_y; j++){
-			for(size_t k=0; k<num_z; k++){	
+			for(size_t k=0; k<num_z; k++){
 				size_t typeArray_size = 0;
 				encode(huffmanTree, type, max_num_block_elements, type_array_buffer_pos, &typeArray_size);
 				total_type_array_size += typeArray_size;
diff --git a/sz/src/sz_float_pwr.c b/sz/src/sz_float_pwr.c
index 3ff1e61e..477272e8 100644
--- a/sz/src/sz_float_pwr.c
+++ b/sz/src/sz_float_pwr.c
@@ -9,10 +9,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -28,7 +31,7 @@
 void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, float* pwrErrBound, unsigned char* pwrErrBoundBytes, double globalPrecision)
 {
 	size_t i = 0, j = 0, k = 0;
-	float realPrecision = oriData[0]!=0?fabs(confparams_cpr->pw_relBoundRatio*oriData[0]):confparams_cpr->pw_relBoundRatio; 
+	float realPrecision = oriData[0]!=0?fabs(confparams_cpr->pw_relBoundRatio*oriData[0]):confparams_cpr->pw_relBoundRatio;
 	float approxPrecision;
 	unsigned char realPrecBytes[4];
 	float curPrecision;
@@ -43,15 +46,15 @@ void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, floa
 			if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
 			{
 				realPrecision = sum/confparams_cpr->segment_size;
-				sum = 0;			
+				sum = 0;
 			}
 			realPrecision *= confparams_cpr->pw_relBoundRatio;
-			
+
 			if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-				realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+				realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 			else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
 				realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
-				
+
 			floatToBytes(realPrecBytes, realPrecision);
 			realPrecBytes[2] = realPrecBytes[3] = 0;
 			approxPrecision = bytesToFloat(realPrecBytes);
@@ -60,26 +63,26 @@ void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, floa
 			//put the two bytes in pwrErrBoundBytes
 			pwrErrBoundBytes[k++] = realPrecBytes[0];
 			pwrErrBoundBytes[k++] = realPrecBytes[1];
-			
+
 			realPrecision = fabs(curValue);
 		}
-		
+
 		if(curValue!=0)
 		{
 			curPrecision = fabs(curValue);
-			
+
 			switch(confparams_cpr->pwr_type)
 			{
-			case SZ_PWR_MIN_TYPE: 
+			case SZ_PWR_MIN_TYPE:
 				if(realPrecision>curPrecision)
-					realPrecision = curPrecision;	
+					realPrecision = curPrecision;
 				break;
 			case SZ_PWR_AVG_TYPE:
 				sum += curPrecision;
 				break;
 			case SZ_PWR_MAX_TYPE:
 				if(realPrecision<curPrecision)
-					realPrecision = curPrecision;					
+					realPrecision = curPrecision;
 				break;
 			}
 		}
@@ -87,12 +90,12 @@ void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, floa
 	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
 	{
 		int size = dataLength%confparams_cpr->segment_size==0?confparams_cpr->segment_size:dataLength%confparams_cpr->segment_size;
-		realPrecision = sum/size;		
-	}	
+		realPrecision = sum/size;
+	}
 	if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 	else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
-		realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;	
+		realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
 	floatToBytes(realPrecBytes, realPrecision);
 	realPrecBytes[2] = realPrecBytes[3] = 0;
 	approxPrecision = bytesToFloat(realPrecBytes);
@@ -104,9 +107,9 @@ void compute_segment_precisions_float_1D(float *oriData, size_t dataLength, floa
 }
 
 unsigned int optimize_intervals_float_1D_pwr(float *oriData, size_t dataLength, float* pwrErrBound)
-{	
+{
 	size_t i = 0, j = 0;
-	float realPrecision = pwrErrBound[j++];	
+	float realPrecision = pwrErrBound[j++];
 	unsigned long radiusIndex;
 	float pred_value = 0, pred_err;
 	int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
@@ -123,7 +126,7 @@ unsigned int optimize_intervals_float_1D_pwr(float *oriData, size_t dataLength,
 			pred_err = fabs(pred_value - oriData[i]);
 			radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -141,25 +144,25 @@ unsigned int optimize_intervals_float_1D_pwr(float *oriData, size_t dataLength,
 
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
-void compute_segment_precisions_float_2D(float *oriData, float* pwrErrBound, 
+void compute_segment_precisions_float_2D(float *oriData, float* pwrErrBound,
 size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundBytes, float Min, float Max, double globalPrecision)
 {
 	size_t i = 0, j = 0, k = 0, p = 0, index = 0, J = 0; //I=-1,J=-1 if they are needed
-	float realPrecision; 
+	float realPrecision;
 	float approxPrecision;
 	unsigned char realPrecBytes[4];
 	float curValue, curAbsValue;
 	float* statAbsValues = (float*)malloc(R2*sizeof(float));
-	
+
 	float max = fabs(Min)<fabs(Max)?fabs(Max):fabs(Min); //get the max abs value.
 	float min = fabs(Min)<fabs(Max)?fabs(Min):fabs(Max);
 	for(i=0;i<R2;i++)
@@ -176,7 +179,7 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 		for(j=0;j<r2;j++)
 		{
 			index = i*r2+j;
-			curValue = oriData[index];				
+			curValue = oriData[index];
 			if(((i%edgeSize==edgeSize-1 || i==r1-1) &&j%edgeSize==0&&j>0) || (i%edgeSize==0&&j==0&&i>0))
 			{
 				if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
@@ -184,7 +187,7 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 					int a = edgeSize, b = edgeSize;
 					if(j==0)
 					{
-						if(r2%edgeSize==0) 
+						if(r2%edgeSize==0)
 							b = edgeSize;
 						else
 							b = r2%edgeSize;
@@ -202,56 +205,56 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 					realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J];
 
 				if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-					realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+					realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 				else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
 					realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
-					
+
 				floatToBytes(realPrecBytes, realPrecision);
 				realPrecBytes[2] = realPrecBytes[3] = 0;
 				approxPrecision = bytesToFloat(realPrecBytes);
-				//put the realPrecision in float* pwrErBound		
+				//put the realPrecision in float* pwrErBound
 				pwrErrBound[p++] = approxPrecision;
 				//put the two bytes in pwrErrBoundBytes
 				pwrErrBoundBytes[k++] = realPrecBytes[0];
-				pwrErrBoundBytes[k++] = realPrecBytes[1];	
-				
+				pwrErrBoundBytes[k++] = realPrecBytes[1];
+
 				if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE)
 					statAbsValues[J] = max;
 				else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
 					statAbsValues[J] = min;
 				else
-					statAbsValues[J] = 0; //for SZ_PWR_AVG_TYPE	
-			}	
+					statAbsValues[J] = 0; //for SZ_PWR_AVG_TYPE
+			}
 			if(j==0)
 				J = 0;
 			else if(j%edgeSize==0)
-				J++;			
+				J++;
 			if(curValue!=0)
 			{
 				curAbsValue = fabs(curValue);
-				
+
 				switch(confparams_cpr->pwr_type)
 				{
-				case SZ_PWR_MIN_TYPE: 
+				case SZ_PWR_MIN_TYPE:
 					if(statAbsValues[J]>curAbsValue)
-						statAbsValues[J] = curAbsValue;	
+						statAbsValues[J] = curAbsValue;
 					break;
 				case SZ_PWR_AVG_TYPE:
 					statAbsValues[J] += curAbsValue;
 					break;
 				case SZ_PWR_MAX_TYPE:
 					if(statAbsValues[J]<curAbsValue)
-						statAbsValues[J] = curAbsValue;					
+						statAbsValues[J] = curAbsValue;
 					break;
 				}
 			}
 		}
 	}
-		
+
 	if(confparams_cpr->pwr_type==SZ_PWR_AVG_TYPE)
 	{
 		int a = edgeSize, b = edgeSize;
-		if(r2%edgeSize==0) 
+		if(r2%edgeSize==0)
 			b = edgeSize;
 		else
 			b = r2%edgeSize;
@@ -262,13 +265,13 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 		realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J]/(a*b);
 	}
 	else
-		realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J];		
+		realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J];
 
 	if(confparams_cpr->errorBoundMode==ABS_AND_PW_REL||confparams_cpr->errorBoundMode==REL_AND_PW_REL)
-		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision; 
+		realPrecision = realPrecision<globalPrecision?realPrecision:globalPrecision;
 	else if(confparams_cpr->errorBoundMode==ABS_OR_PW_REL||confparams_cpr->errorBoundMode==REL_OR_PW_REL)
 		realPrecision = realPrecision<globalPrecision?globalPrecision:realPrecision;
-		
+
 	floatToBytes(realPrecBytes, realPrecision);
 	realPrecBytes[2] = realPrecBytes[3] = 0;
 	approxPrecision = bytesToFloat(realPrecBytes);
@@ -276,15 +279,15 @@ size_t r1, size_t r2, size_t R2, size_t edgeSize, unsigned char* pwrErrBoundByte
 	pwrErrBound[p++] = approxPrecision;
 	//put the two bytes in pwrErrBoundBytes
 	pwrErrBoundBytes[k++] = realPrecBytes[0];
-	pwrErrBoundBytes[k++] = realPrecBytes[1];	
-	
+	pwrErrBoundBytes[k++] = realPrecBytes[1];
+
 	free(statAbsValues);
 }
 
 unsigned int optimize_intervals_float_2D_pwr(float *oriData, size_t r1, size_t r2, size_t R2, size_t edgeSize, float* pwrErrBound)
-{	
+{
 	size_t i = 0,j = 0, index, I=0, J=0;
-	float realPrecision = pwrErrBound[0];	
+	float realPrecision = pwrErrBound[0];
 	unsigned long radiusIndex;
 	float pred_value = 0, pred_err;
 	int *intervals = (int*)malloc(confparams_cpr->maxRangeRadius*sizeof(int));
@@ -295,7 +298,7 @@ unsigned int optimize_intervals_float_2D_pwr(float *oriData, size_t r1, size_t r
 	{
 		ir2 = i*r2;
 		if(i%edgeSize==0)
-		{	
+		{
 			I++;
 			J = 0;
 		}
@@ -304,7 +307,7 @@ unsigned int optimize_intervals_float_2D_pwr(float *oriData, size_t r1, size_t r
 			index = ir2+j;
 			if(j%edgeSize==0)
 				J++;
-				
+
 			if((i+j)%confparams_cpr->sampleDistance==0)
 			{
 				realPrecision = pwrErrBound[I*R2+J];
@@ -314,7 +317,7 @@ unsigned int optimize_intervals_float_2D_pwr(float *oriData, size_t r1, size_t r
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -339,20 +342,20 @@ unsigned int optimize_intervals_float_2D_pwr(float *oriData, size_t r1, size_t r
 	return powerOf2;
 }
 
-void compute_segment_precisions_float_3D(float *oriData, float* pwrErrBound, 
+void compute_segment_precisions_float_3D(float *oriData, float* pwrErrBound,
 size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned char* pwrErrBoundBytes, float Min, float Max, double globalPrecision)
 {
 	size_t i = 0, j = 0, k = 0, p = 0, q = 0, index = 0, J = 0, K = 0; //I=-1,J=-1 if they are needed
 	size_t r23 = r2*r3, ir, jr;
-	float realPrecision; 
+	float realPrecision;
 	float approxPrecision;
 	unsigned char realPrecBytes[4];
 	float curValue, curAbsValue;
-	
+
 	float** statAbsValues = create2DArray_float(R2, R3);
-	float max = fabs(Min)<fabs(Max)?fabs(Max):fabs(Min); //get the max abs value.	
+	float max = fabs(Min)<fabs(Max)?fabs(Max):fabs(Min); //get the max abs value.
 	float min = fabs(Min)<fabs(Max)?fabs(Min):fabs(Max);
-	
+
 	for(i=0;i<R2;i++)
 		for(j=0;j<R3;j++)
 		{
@@ -365,7 +368,7 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 		}
 	for(i=0;i<r1;i++)
 	{
-		ir = i*r23;		
+		ir = i*r23;
 		if(i%edgeSize==0&&i>0)
 		{
 			realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K];
@@ -382,8 +385,8 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 				statAbsValues[J][K] = max;
 			else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
 				statAbsValues[J][K] = min;
-			
-		}		
+
+		}
 		for(j=0;j<r2;j++)
 		{
 			jr = j*r3;
@@ -402,18 +405,18 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 				if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE)
 					statAbsValues[J][K] = max;
 				else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
-					statAbsValues[J][K] = min;			
+					statAbsValues[J][K] = min;
 			}
-			
+
 			if(j==0)
 				J = 0;
 			else if(j%edgeSize==0)
-				J++;					
-			
+				J++;
+
 			for(k=0;k<r3;k++)
 			{
-				index = ir+jr+k;				
-				curValue = oriData[index];				
+				index = ir+jr+k;
+				curValue = oriData[index];
 				if((i%edgeSize==edgeSize-1 || i == r1-1)&&(j%edgeSize==edgeSize-1||j==r2-1)&&k%edgeSize==0&&k>0)
 				{
 					realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K];
@@ -426,18 +429,18 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 					//printf("q=%d, i=%d, j=%d, k=%d\n",q,i,j,k);
 					pwrErrBoundBytes[q++] = realPrecBytes[0];
 					pwrErrBoundBytes[q++] = realPrecBytes[1];
-					
+
 					if(confparams_cpr->pwr_type == SZ_PWR_MIN_TYPE)
 						statAbsValues[J][K] = max;
 					else if(confparams_cpr->pwr_type == SZ_PWR_MAX_TYPE)
-						statAbsValues[J][K] = min;	
-				}	
+						statAbsValues[J][K] = min;
+				}
 
 				if(k==0)
 					K = 0;
 				else if(k%edgeSize==0)
 					K++;
-					
+
 				if(curValue!=0)
 				{
 					curAbsValue = fabs(curValue);
@@ -456,10 +459,10 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 						}
 					}
 				}
-			}			
+			}
 		}
-	}	
-	
+	}
+
 	realPrecision = confparams_cpr->pw_relBoundRatio*statAbsValues[J][K];
 	floatToBytes(realPrecBytes, realPrecision);
 	realPrecBytes[2] = realPrecBytes[3] = 0;
@@ -469,14 +472,14 @@ size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, unsigned
 	//put the two bytes in pwrErrBoundBytes
 	pwrErrBoundBytes[q++] = realPrecBytes[0];
 	pwrErrBoundBytes[q++] = realPrecBytes[1];
-	
+
 	free2DArray_float(statAbsValues, R2);
 }
 
 unsigned int optimize_intervals_float_3D_pwr(float *oriData, size_t r1, size_t r2, size_t r3, size_t R2, size_t R3, size_t edgeSize, float* pwrErrBound)
-{	
+{
 	size_t i,j,k, ir,jr,index, I = 0,J=0,K=0;
-	float realPrecision = pwrErrBound[0];		
+	float realPrecision = pwrErrBound[0];
 	unsigned long radiusIndex;
 	size_t r23=r2*r3;
 	size_t R23 = R2*R3;
@@ -488,7 +491,7 @@ unsigned int optimize_intervals_float_3D_pwr(float *oriData, size_t r1, size_t r
 	{
 		ir = i*r23;
 		if(i%edgeSize==0)
-		{	
+		{
 			I++;
 			J = 0;
 		}
@@ -496,19 +499,19 @@ unsigned int optimize_intervals_float_3D_pwr(float *oriData, size_t r1, size_t r
 		{
 			jr = j*r3;
 			if(j%edgeSize==0)
-			{	
+			{
 				J++;
 				K = 0;
-			}			
+			}
 			for(k=1;k<r3;k++)
 			{
 				index = ir+jr+k;
 				if(k%edgeSize==0)
-					K++;		
+					K++;
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
-					realPrecision = pwrErrBound[I*R23+J*R2+K];					
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					realPrecision = pwrErrBound[I*R23+J*R2+K];
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = fabs(pred_value - oriData[index]);
 					radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
@@ -535,87 +538,87 @@ unsigned int optimize_intervals_float_3D_pwr(float *oriData, size_t r1, size_t r
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
-void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, 
+void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision,
 size_t dataLength, size_t *outSize, float min, float max)
 {
 	size_t pwrLength = dataLength%confparams_cpr->segment_size==0?dataLength/confparams_cpr->segment_size:dataLength/confparams_cpr->segment_size+1;
 	float* pwrErrBound = (float*)malloc(sizeof(float)*pwrLength);
 	size_t pwrErrBoundBytes_size = sizeof(unsigned char)*pwrLength*2;
 	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);
-	
+
 	compute_segment_precisions_float_1D(oriData, dataLength, pwrErrBound, pwrErrBoundBytes, globalPrecision);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
-		quantization_intervals = optimize_intervals_float_1D_pwr(oriData, dataLength, pwrErrBound);	
+		quantization_intervals = optimize_intervals_float_1D_pwr(oriData, dataLength, pwrErrBound);
 		updateQuantizationInfo(quantization_intervals);
 	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	size_t i = 0, j = 0;
 	int reqLength;
-	float realPrecision = pwrErrBound[j++];	
+	float realPrecision = pwrErrBound[j++];
 	float medianValue = 0;
 	float radius = fabs(max)<fabs(min)?fabs(min):fabs(max);
 	short radExpo = getExponent_float(radius);
-	
+
 	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	float* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *resiBitLengthArray;
 	new_DBA(&resiBitLengthArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
-	
+
 	unsigned char preDataBytes[4] = {0};
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 	float last3CmprsData[3] = {0};
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-						
-	//add the first data	
+
+	//add the first data
 	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
 	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_float(last3CmprsData, vce->data);
-	//printf("%.30G\n",last3CmprsData[0]);	
-		
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	//add the second data
 	type[1] = 0;
-	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);			
+	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
 	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	listAdd_float(last3CmprsData, vce->data);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius;
 	float curData;
@@ -624,7 +627,7 @@ size_t dataLength, size_t *outSize, float min, float max)
 	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	double interval = 2*realPrecision;
 	int updateReqLength = 0; //a marker: 1 means already updated
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
@@ -637,7 +640,7 @@ size_t dataLength, size_t *outSize, float min, float max)
 		}
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = fabs(curData - pred);	
+		predAbsErr = fabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -651,41 +654,41 @@ size_t dataLength, size_t *outSize, float min, float max)
 				type[i] = exe_params->intvRadius-state;
 				pred = pred - state*interval;
 			}
-			listAdd_float(last3CmprsData, pred);			
+			listAdd_float(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		if(updateReqLength==0)
 		{
 			computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 			reqBytesLength = reqLength/8;
 			resiBitsLength = reqLength%8;
-			updateReqLength = 1;		
+			updateReqLength = 1;
 		}
-		
+
 		type[i] = 0;
 		addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
-		
+
 		compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 		updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 		memcpy(preDataBytes,vce->curBytes,4);
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 
-		listAdd_float(last3CmprsData, vce->data);	
+		listAdd_float(last3CmprsData, vce->data);
 	}//end of for
-		
+
 //	char* expSegmentsInBytes;
 //	int expSegmentsInBytes_size = convertESCToBytes(esc, &expSegmentsInBytes);
 	int exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitLengthArray->array, resiBitLengthArray->size, 
+
+	new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitLengthArray->array, resiBitLengthArray->size,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo);
 
 //sdi:Debug
@@ -697,17 +700,17 @@ size_t dataLength, size_t *outSize, float min, float max)
 //	writeUShortData(type, dataLength, "compressStateBytes.sb");
 //	unsigned short type_[dataLength];
 //	SZ_Reset();
-//	decode_withTree(tdps->typeArray, tdps->typeArray_size, type_);	
+//	decode_withTree(tdps->typeArray, tdps->typeArray_size, type_);
 //	printf("tdps->typeArray_size=%d\n", tdps->typeArray_size);
-	
+
 	//free memory
 	free_DBA(resiBitLengthArray);
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
 	free(type);
-	
+
 	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
-	
+
 	int floatSize=sizeof(float);
 	if(*outSize>dataLength*floatSize)
 	{
@@ -715,24 +718,24 @@ size_t dataLength, size_t *outSize, float min, float max)
 		tdps->isLossless = 1;
 		size_t totalByteLength = 3 + exe_params->SZ_SIZE_TYPE + 1 + floatSize*dataLength;
 		*newByteData = (unsigned char*)malloc(totalByteLength);
-		
+
 		unsigned char dsLengthBytes[exe_params->SZ_SIZE_TYPE];
 		intToBytes_bigEndian(dsLengthBytes, dataLength);//4
 		for (i = 0; i < 3; i++)//3
 			(*newByteData)[k++] = versionNumber[i];
-		
+
 		if(exe_params->SZ_SIZE_TYPE==4)
 		{
-			(*newByteData)[k++] = 16;	//=00010000	
+			(*newByteData)[k++] = 16;	//=00010000
 		}
-		else 
+		else
 		{
 			(*newByteData)[k++] = 80;
 		}
 		for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)//4 or 8
 			(*newByteData)[k++] = dsLengthBytes[i];
 
-		
+
 		if(sysEndianType==BIG_ENDIAN_SYSTEM)
 			memcpy((*newByteData)+4+exe_params->SZ_SIZE_TYPE, oriData, dataLength*floatSize);
 		else
@@ -745,14 +748,14 @@ size_t dataLength, size_t *outSize, float min, float max)
 	}
 
 	free(pwrErrBound);
-	
+
 	free(vce);
 	free(lce);
 	free_TightDataPointStorageF(tdps);
 	free(exactMidByteArray);
 }
 
-void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, size_t r1, size_t r2, 
+void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, size_t r1, size_t r2,
 size_t *outSize, float min, float max)
 {
 	size_t dataLength=r1*r2;
@@ -762,67 +765,67 @@ size_t *outSize, float min, float max)
 	float* pwrErrBound = (float*)malloc(sizeof(float)*R1*R2);
 	size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*2;
 	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);
-	
+
 	compute_segment_precisions_float_2D(oriData, pwrErrBound, r1, r2, R2, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision);
-		
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
-	{	
+	{
 		quantization_intervals = optimize_intervals_float_2D_pwr(oriData, r1, r2, R2, blockEdgeSize, pwrErrBound);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	//printf("quantization_intervals=%d\n",quantization_intervals);
-	
-	size_t i=0,j=0,I=0,J=0; 
+
+	size_t i=0,j=0,I=0,J=0;
 	int reqLength;
-	float realPrecision = pwrErrBound[I*R2+J];	
+	float realPrecision = pwrErrBound[I*R2+J];
 	float pred1D, pred2D;
 	float diff = 0.0;
 	double itvNum = 0;
 	float *P0, *P1;
-	
+
 	P0 = (float*)malloc(r2*sizeof(float));
 	memset(P0, 0, r2*sizeof(float));
 	P1 = (float*)malloc(r2*sizeof(float));
 	memset(P1, 0, r2*sizeof(float));
-		
+
 	float medianValue = 0;
-	float radius = fabs(max)<fabs(min)?fabs(min):fabs(max);	
+	float radius = fabs(max)<fabs(min)?fabs(min):fabs(max);
 	short radExpo = getExponent_float(radius);
 	int updateReqLength = 1;
-	
+
 	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	float* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *resiBitLengthArray;
 	new_DBA(&resiBitLengthArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	type[0] = 0;
-	
+
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-			
+
 	/* Process Row-0 data 0*/
 	type[0] = 0;
 	addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -845,7 +848,7 @@ size_t *outSize, float min, float max)
 		P1[1] = pred1D + 2 * (type[1] - exe_params->intvRadius) * realPrecision;
 	}
 	else
-	{		
+	{
 		type[1] = 0;
 
 		addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -901,7 +904,7 @@ size_t *outSize, float min, float max)
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		J = 0;
@@ -909,7 +912,7 @@ size_t *outSize, float min, float max)
 			I++;
 		realPrecision = pwrErrBound[I*R2+J]; //J==0
 		updateReqLength = 0;
-		
+
 		pred1D = P1[0];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -930,7 +933,7 @@ size_t *outSize, float min, float max)
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
 			}
-			
+
 			type[index] = 0;
 
 			addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -940,12 +943,12 @@ size_t *outSize, float min, float max)
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			P0[0] = vce->data;
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
 			index = i*r2+j;
-			
+
 			if(j%blockEdgeSize==0)
 			{
 				J++;
@@ -990,63 +993,63 @@ size_t *outSize, float min, float max)
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
+	free(P1);
 	int exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitLengthArray->array, resiBitLengthArray->size, 
+
+	new_TightDataPointStorageF2(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitLengthArray->array, resiBitLengthArray->size,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo);
-	
+
 	//free memory
 	free_DBA(resiBitLengthArray);
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
 	free(type);
-	
+
 	convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
-	
+
 	free(pwrErrBound);
 
 	free(vce);
 	free(lce);
-	free_TightDataPointStorageF(tdps);	
+	free_TightDataPointStorageF(tdps);
 	free(exactMidByteArray);
 }
 
-void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision, 
+void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr(unsigned char** newByteData, float *oriData, double globalPrecision,
 size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 {
 	size_t dataLength=r1*r2*r3;
-	
+
 	int blockEdgeSize = computeBlockEdgeSize_3D(confparams_cpr->segment_size);
 	size_t R1 = 1+(r1-1)/blockEdgeSize;
 	size_t R2 = 1+(r2-1)/blockEdgeSize;
 	size_t R3 = 1+(r3-1)/blockEdgeSize;
 	float* pwrErrBound = (float*)malloc(sizeof(float)*R1*R2*R3);
 	size_t pwrErrBoundBytes_size = sizeof(unsigned char)*R1*R2*R3*2;
-	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);	
-	
-	compute_segment_precisions_float_3D(oriData, pwrErrBound, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision);	
+	unsigned char* pwrErrBoundBytes = (unsigned char*)malloc(pwrErrBoundBytes_size);
+
+	compute_segment_precisions_float_3D(oriData, pwrErrBound, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBoundBytes, min, max, globalPrecision);
 
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_float_3D_pwr(oriData, r1, r2, r3, R2, R3, blockEdgeSize, pwrErrBound);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
 	size_t i=0,j=0,k=0, I = 0, J = 0, K = 0;
 	int reqLength;
-	float realPrecision = pwrErrBound[0];		
+	float realPrecision = pwrErrBound[0];
 	float pred1D, pred2D, pred3D;
 	float diff = 0.0;
 	double itvNum = 0;
@@ -1060,14 +1063,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 	float medianValue = 0;
 	short radExpo = getExponent_float(radius);
 	int updateReqLength = 0;
-	
+
 	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;realPrecision
 
 	float* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *resiBitLengthArray;
 	new_DBA(&resiBitLengthArray, DynArrayInitLen);
 
@@ -1084,7 +1087,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
@@ -1122,8 +1125,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 			reqBytesLength = reqLength/8;
 			resiBitsLength = reqLength%8;
 			updateReqLength = 1;
-		}		
-		
+		}
+
 		type[1] = 0;
 
 		addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1142,7 +1145,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 			J++;
 			realPrecision = pwrErrBound[J];
 			updateReqLength = 0;
-		}		
+		}
 		pred1D = 2*P1[j-1] - P1[j-2];
 		diff = spaceFillingValue[j] - pred1D;
 
@@ -1162,7 +1165,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 				reqBytesLength = reqLength/8;
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
-			}			
+			}
 
 			type[j] = 0;
 
@@ -1181,7 +1184,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 
 		J = 0;
 		if(i%blockEdgeSize==0)
@@ -1208,8 +1211,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 				reqBytesLength = reqLength/8;
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
-			}		
-						
+			}
+
 			type[index] = 0;
 
 			addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1223,14 +1226,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 		/* Process row-i data 1 --> data r3-1*/
 		for (j = 1; j < r3; j++) //note that this j refers to fastest dimension (lowest order)
 		{
-			index = i*r3+j;		
+			index = i*r3+j;
 			if(j%blockEdgeSize==0)
 			{
 				J++;
 				realPrecision = pwrErrBound[I*R3+J];
 				updateReqLength = 0;
-			}			
-		
+			}
+
 			pred2D = P1[index-1] + P1[index-r3] - P1[index-r3-1];
 
 			diff = spaceFillingValue[index] - pred2D;
@@ -1251,8 +1254,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 					reqBytesLength = reqLength/8;
 					resiBitsLength = reqLength%8;
 					updateReqLength = 1;
-				}						
-				
+				}
+
 				type[index] = 0;
 
 				addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1270,14 +1273,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 	for (k = 1; k < r1; k++)
 	{
 		/* Process Row-0 data 0*/
-		index = k*r23;			
+		index = k*r23;
 		I = 0;
 		J = 0;
 		if(k%blockEdgeSize==0)
 			K++;
 		realPrecision = pwrErrBound[K*R23]; //J==0
 		updateReqLength = 0;
-		
+
 		pred1D = P1[0];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -1297,8 +1300,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 				reqBytesLength = reqLength/8;
 				resiBitsLength = reqLength%8;
 				updateReqLength = 1;
-			}					
-			
+			}
+
 			type[index] = 0;
 
 			addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1312,14 +1315,14 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 	    /* Process Row-0 data 1 --> data r3-1 */
 		for (j = 1; j < r3; j++)
 		{
-			index = k*r23+j;	
+			index = k*r23+j;
 
 			if(j%blockEdgeSize==0)
 			{
 				J++;
 				realPrecision = pwrErrBound[K*R23+J];
-				updateReqLength = 0;			
-			}					
+				updateReqLength = 0;
+			}
 			pred2D = P0[j-1] + P1[j] - P1[j-1];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -1341,8 +1344,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 					reqBytesLength = reqLength/8;
 					resiBitsLength = reqLength%8;
 					updateReqLength = 1;
-				}						
-				
+				}
+
 				type[index] = 0;
 
 				addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1364,9 +1367,9 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 			if(i%blockEdgeSize==0)
 				I++;
 			realPrecision = pwrErrBound[K*R23+I*R3+J]; //J==0
-			updateReqLength = 0;			
-			
-			index2D = i*r3;		
+			updateReqLength = 0;
+
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -1386,8 +1389,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 					reqBytesLength = reqLength/8;
 					resiBitsLength = reqLength%8;
 					updateReqLength = 1;
-				}						
-				
+				}
+
 				type[index] = 0;
 
 				addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1406,8 +1409,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 				{
 					J++;
 					realPrecision = pwrErrBound[K*R23+I*R3+J];
-					updateReqLength = 0;			
-				}							
+					updateReqLength = 0;
+				}
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
 				diff = spaceFillingValue[index] - pred3D;
@@ -1428,8 +1431,8 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 						reqBytesLength = reqLength/8;
 						resiBitsLength = reqLength%8;
 						updateReqLength = 1;
-					}							
-					
+					}
+
 					type[index] = 0;
 
 					addDBA_Data(resiBitLengthArray, (unsigned char)resiBitsLength);
@@ -1458,7 +1461,7 @@ size_t r1, size_t r2, size_t r3, size_t *outSize, float min, float max)
 			type, exactMidByteArray->array, exactMidByteArray->size,
 			exactLeadNumArray->array,
 			resiBitArray->array, resiBitArray->size,
-			resiBitLengthArray->array, resiBitLengthArray->size, 
+			resiBitLengthArray->array, resiBitLengthArray->size,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, pwrErrBoundBytes, pwrErrBoundBytes_size, radExpo);
 
 //sdi:Debug
@@ -1511,24 +1514,24 @@ void compressGroupIDArray_float(char* groupID, TightDataPointStorageF* tdps)
 	for(i=1; i<dataLength;i++)
 	{
 		curGroupIDValue = groupID[i];
-		standGroupID[i] = (curGroupIDValue - lastGroupIDValue) + offset; 
+		standGroupID[i] = (curGroupIDValue - lastGroupIDValue) + offset;
 		lastGroupIDValue = curGroupIDValue;
 	}
-	
+
 	unsigned char* out = NULL;
 	size_t outSize;
-	
+
 	HuffmanTree* huffmanTree = SZ_Reset();
 	encode_withTree(huffmanTree, standGroupID, dataLength, &out, &outSize);
 	SZ_ReleaseHuffman(huffmanTree);
-	
+
 	tdps->pwrErrBoundBytes = out; //groupIDArray
 	tdps->pwrErrBoundBytes_size = outSize;
-	
+
 	free(standGroupID);
 }
 
-TightDataPointStorageF* SZ_compress_float_1D_MDQ_pwrGroup(float* oriData, size_t dataLength, int errBoundMode, 
+TightDataPointStorageF* SZ_compress_float_1D_MDQ_pwrGroup(float* oriData, size_t dataLength, int errBoundMode,
 double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f)
 {
 	size_t i;
@@ -1551,49 +1554,49 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSi
 	getPrecisionReqLength_float(realPrecision);
 	short radExpo = getExponent_float(valueRangeSize/2);
 	short lastGroupNum = 0, groupNum, grpNum = 0;
-	
+
 	double* groupErrorBounds = generateGroupErrBounds(errBoundMode, realPrecision, pwrErrRatio);
 	exe_params->intvRadius = generateGroupMaxIntervalCount(groupErrorBounds);
-	
+
 	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	char *groupID = (char*) malloc(dataLength*sizeof(char));
 	char *gp = groupID;
-		
-	float* spaceFillingValue = oriData; 
-	
+
+	float* spaceFillingValue = oriData;
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-			
+
 	int state;
 	float curData, decValue;
 	float pred;
 	float predAbsErr;
 	double interval = 0;
-	
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-	
+
 	curData = spaceFillingValue[0];
 	groupNum = computeGroupNum_float(vce->data);
 
@@ -1624,14 +1627,14 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSi
 
 	listAdd_float_group(groups, flags, groupNum, spaceFillingValue[0], vce->data, gp);
 	gp++;
-	
+
 	for(i=1;i<dataLength;i++)
 	{
 		curData = oriData[i];
 		//printf("i=%d, posGroups[3]=%f, negGroups[3]=%f\n", i, posGroups[3], negGroups[3]);
-		
+
 		groupNum = computeGroupNum_float(curData);
-		
+
 		if(curData > 0 && groupNum >= 0)
 		{
 			groups = posGroups;
@@ -1664,25 +1667,25 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSi
 			updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 			memcpy(preDataBytes,vce->curBytes,4);
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-			listAdd_float_group(groups, flags, lastGroupNum, curData, vce->data, gp);	//set the group number to be last one in order to get the groupID array as smooth as possible.		
+			listAdd_float_group(groups, flags, lastGroupNum, curData, vce->data, gp);	//set the group number to be last one in order to get the groupID array as smooth as possible.
 		}
 		else if(flags[grpNum]==0) //the dec value may not be in the same group
-		{	
+		{
 			type[i] = 0;
 			compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 			updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 			memcpy(preDataBytes,vce->curBytes,4);
 			addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 			//decGroupNum = computeGroupNum_float(vce->data);
-			
+
 			//if(decGroupNum < groupNum)
 			//	decValue = curData>0?pow(2, groupNum):-pow(2, groupNum);
 			//else if(decGroupNum > groupNum)
 			//	decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1);
 			//else
 			//	decValue = vce->data;
-			
-			decValue = vce->data;	
+
+			decValue = vce->data;
 			listAdd_float_group(groups, flags, groupNum, curData, decValue, gp);
 			lastGroupNum = curData>0?groupNum + 2: -(groupNum+2);
 		}
@@ -1704,7 +1707,7 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSi
 				decValue = pred - state*interval;
 			}
 			//decGroupNum = computeGroupNum_float(pred);
-			
+
 			if((decValue>0&&curData<0)||(decValue<0&&curData>=0))
 				decValue = 0;
 			//else
@@ -1714,64 +1717,64 @@ double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSi
 			//	else if(decGroupNum > groupNum)
 			//		decValue = curData>0?pow(2, groupNum+1):-pow(2, groupNum+1);
 			//	else
-			//		decValue = pred;				
+			//		decValue = pred;
 			//}
-			
+
 			if(fabs(curData-decValue)>realGroupPrecision)
-			{	
+			{
 				type[i] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
 				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 
-				decValue = vce->data;	
+				decValue = vce->data;
 			}
-			
-			listAdd_float_group(groups, flags, groupNum, curData, decValue, gp);			
-			lastGroupNum = curData>=0?groupNum + 2: -(groupNum+2);			
+
+			listAdd_float_group(groups, flags, groupNum, curData, decValue, gp);
+			lastGroupNum = curData>=0?groupNum + 2: -(groupNum+2);
 		}
-		gp++;	
+		gp++;
 
 	}
-	
+
 	int exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
+
 	//combineTypeAndGroupIDArray(nbBins, dataLength, &type, groupID);
 
-	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
-			resiBitsLength, 
-			realPrecision, medianValue, (char)reqLength, nbBins, NULL, 0, radExpo);	
-	
+	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
+			resiBitsLength,
+			realPrecision, medianValue, (char)reqLength, nbBins, NULL, 0, radExpo);
+
 	compressGroupIDArray_float(groupID, tdps);
-	
+
 	free(posGroups);
 	free(negGroups);
 	free(posFlags);
 	free(negFlags);
 	free(groupID);
 	free(groupErrorBounds);
-	
+
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
-	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);	
-	
+	free(lce);
+	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
+
 	return tdps;
 }
 
 void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwrgroup(unsigned char** newByteData, float *oriData,
 size_t dataLength, double absErrBound, double relBoundRatio, double pwrErrRatio, float valueRangeSize, float medianValue_f, size_t *outSize)
 {
-        TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_pwrGroup(oriData, dataLength, confparams_cpr->errorBoundMode, 
-        absErrBound, relBoundRatio, pwrErrRatio, 
+        TightDataPointStorageF* tdps = SZ_compress_float_1D_MDQ_pwrGroup(oriData, dataLength, confparams_cpr->errorBoundMode,
+        absErrBound, relBoundRatio, pwrErrRatio,
         valueRangeSize, medianValue_f);
 
         convertTDPStoFlatBytes_float(tdps, newByteData, outSize);
@@ -1813,7 +1816,7 @@ void SZ_compress_args_float_NoCkRngeNoGzip_1D_pwr_pre_log(unsigned char** newByt
 	}
 
 	float valueRangeSize, medianValue_f;
-	computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);	
+	computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);
 	if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
 	double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7;
 	for(size_t i=0; i<dataLength; i++){
@@ -1875,7 +1878,7 @@ void SZ_compress_args_float_NoCkRngeNoGzip_2D_pwr_pre_log(unsigned char** newByt
 	}
 
 	float valueRangeSize, medianValue_f;
-	computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);	
+	computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);
 	if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
 	double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7;
 	for(size_t i=0; i<dataLength; i++){
@@ -1937,7 +1940,7 @@ void SZ_compress_args_float_NoCkRngeNoGzip_3D_pwr_pre_log(unsigned char** newByt
 	}
 
 	float valueRangeSize, medianValue_f;
-	computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);	
+	computeRangeSize_float(log_data, dataLength, &valueRangeSize, &medianValue_f);
 	if(fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
 	double realPrecision = log2(1.0 + pwrErrRatio) - max_abs_log_data * 1.2e-7;
 	for(size_t i=0; i<dataLength; i++){
diff --git a/sz/src/sz_float_ts.c b/sz/src/sz_float_ts.c
index ea29245e..72ef818e 100644
--- a/sz/src/sz_float_ts.c
+++ b/sz/src/sz_float_ts.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -23,7 +26,7 @@
 #include "sz_float_ts.h"
 
 unsigned int optimize_intervals_float_1D_ts(float *oriData, size_t dataLength, float* preData, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	float pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -37,7 +40,7 @@ unsigned int optimize_intervals_float_1D_ts(float *oriData, size_t dataLength, f
 			pred_err = fabs(pred_value - oriData[i]);
 			radiusIndex = (unsigned long)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -52,13 +55,13 @@ unsigned int optimize_intervals_float_1D_ts(float *oriData, size_t dataLength, f
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	return powerOf2;
 }
@@ -72,59 +75,59 @@ double realPrecision, float valueRangeSize, float medianValue_f)
 	//float* decData = (float*)malloc(sizeof(float)*dataLength);
 	//memset(decData, 0, sizeof(float)*dataLength);
 	float* decData = preStepData;
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 		quantization_intervals = optimize_intervals_float_1D_ts(oriData, dataLength, preStepData, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 
 	size_t i;
 	int reqLength;
 	float medianValue = medianValue_f;
 	short radExpo = getExponent_float(valueRangeSize/2);
-	
-	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);	
+
+	computeReqLength_float(realPrecision, radExpo, &reqLength, &medianValue);
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	float* spaceFillingValue = oriData; //
-	
+
 	DynamicIntArray *exactLeadNumArray;
 	new_DIA(&exactLeadNumArray, DynArrayInitLen);
-	
+
 	DynamicByteArray *exactMidByteArray;
 	new_DBA(&exactMidByteArray, DynArrayInitLen);
-	
+
 	DynamicIntArray *resiBitArray;
 	new_DIA(&resiBitArray, DynArrayInitLen);
-	
+
 	unsigned char preDataBytes[4];
 	intToBytes_bigEndian(preDataBytes, 0);
-	
+
 	int reqBytesLength = reqLength/8;
 	int resiBitsLength = reqLength%8;
 
 	FloatValueCompressElement *vce = (FloatValueCompressElement*)malloc(sizeof(FloatValueCompressElement));
 	LossyCompressionElement *lce = (LossyCompressionElement*)malloc(sizeof(LossyCompressionElement));
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressSingleFloatValue(vce, spaceFillingValue[0], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 	decData[0] = vce->data;
-		
+
 	//add the second data
 	type[1] = 0;
 	compressSingleFloatValue(vce, spaceFillingValue[1], realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 	updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 	memcpy(preDataBytes,vce->curBytes,4);
 	addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
-	decData[1] = vce->data;	
-	
+	decData[1] = vce->data;
+
 	int state = 0;
 	double checkRadius = 0;
 	float curData = 0;
@@ -132,12 +135,12 @@ double realPrecision, float valueRangeSize, float medianValue_f)
 	float predAbsErr = 0;
 	checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
 		pred = preStepData[i];
-		predAbsErr = fabs(curData - pred);	
+		predAbsErr = fabs(curData - pred);
 		if(predAbsErr<=checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -151,56 +154,56 @@ double realPrecision, float valueRangeSize, float medianValue_f)
 				type[i] = exe_params->intvRadius-state;
 				pred = pred - state*interval;
 			}
-				
-			//double-check the prediction error in case of machine-epsilon impact	
+
+			//double-check the prediction error in case of machine-epsilon impact
 			if(fabs(curData-pred)>realPrecision)
-			{	
-				type[i] = 0;				
+			{
+				type[i] = 0;
 				compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 				updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 				memcpy(preDataBytes,vce->curBytes,4);
-				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);		
+				addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 				decData[i] = vce->data;
 			}
 			else
 			{
 				decData[i] = pred;
 			}
-			
+
 			continue;
 		}
-		
-		//unpredictable data processing		
-		type[i] = 0;		
+
+		//unpredictable data processing
+		type[i] = 0;
 		compressSingleFloatValue(vce, curData, realPrecision, medianValue, reqLength, reqBytesLength, resiBitsLength);
 		updateLossyCompElement_Float(vce->curBytes, preDataBytes, reqBytesLength, resiBitsLength, lce);
 		memcpy(preDataBytes,vce->curBytes,4);
 		addExactData(exactMidByteArray, exactLeadNumArray, resiBitArray, lce);
 		decData[i] = vce->data;
 	}//end of for
-		
+
 	size_t exactDataNum = exactLeadNumArray->size;
-	
+
 	TightDataPointStorageF* tdps;
-			
-	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum, 
-			type, exactMidByteArray->array, exactMidByteArray->size,  
-			exactLeadNumArray->array,  
-			resiBitArray->array, resiBitArray->size, 
+
+	new_TightDataPointStorageF(&tdps, dataLength, exactDataNum,
+			type, exactMidByteArray->array, exactMidByteArray->size,
+			exactLeadNumArray->array,
+			resiBitArray->array, resiBitArray->size,
 			resiBitsLength,
 			realPrecision, medianValue, (char)reqLength, quantization_intervals, NULL, 0, 0);
 
 	//free memory
 	free_DIA(exactLeadNumArray);
 	free_DIA(resiBitArray);
-	free(type);	
+	free(type);
 	free(vce);
-	free(lce);	
+	free(lce);
 	free(exactMidByteArray); //exactMidByteArray->array has been released in free_TightDataPointStorageF(tdps);
-		
+
 	//memcpy(preStepData, decData, dataLength*sizeof(float)); //update the data
 	//free(decData);
-	
+
 	return tdps;
 }
 
diff --git a/sz/src/sz_int16.c b/sz/src/sz_int16.c
index 0d0c2299..21337af8 100644
--- a/sz/src/sz_int16.c
+++ b/sz/src/sz_int16.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, do
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_int16_1D(int16_t *oriData, size_t dataLength, do
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_int16_2D(int16_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_int16_2D(int16_t *oriData, size_t r1, size_t r2,
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_int16_2D(int16_t *oriData, size_t r1, size_t r2,
 }
 
 unsigned int optimize_intervals_int16_3D(int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_int16_3D(int16_t *oriData, size_t r1, size_t r2,
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_int16_3D(int16_t *oriData, size_t r1, size_t r2,
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,42 +234,42 @@ TightDataPointStorageI* SZ_compress_int16_1D_MDQ(int16_t *oriData, size_t dataLe
 		quantization_intervals = optimize_intervals_int16_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	int16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressInt16Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressInt16Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int64_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -281,24 +284,24 @@ TightDataPointStorageI* SZ_compress_int16_1D_MDQ(int16_t *oriData, size_t dataLe
 				pred = pred - state*interval;
 			}
 			if(pred>SZ_INT16_MAX) pred = SZ_INT16_MAX;
-			if(pred<SZ_INT16_MIN) pred = SZ_INT16_MIN;			
-			listAdd_int(last3CmprsData, pred);					
+			if(pred<SZ_INT16_MIN) pred = SZ_INT16_MIN;
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressInt16Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT16);
 
 //sdi:Debug
@@ -306,23 +309,23 @@ TightDataPointStorageI* SZ_compress_int16_1D_MDQ(int16_t *oriData, size_t dataLe
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_int16_StoreOriData(int16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_int16_StoreOriData(int16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(int16_t);	
+	int intSize=sizeof(int16_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -331,14 +334,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;		
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -346,11 +349,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			int16ToBytes_bigEndian(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_int16_NoCkRngeNoGzip_1D(unsigned char** newByteData, int16_t *oriData, 
+void SZ_compress_args_int16_NoCkRngeNoGzip_1D(unsigned char** newByteData, int16_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int16_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int16_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -365,35 +368,35 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int16_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	int16_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (int16_t*)malloc(r2*sizeof(int16_t));
 	memset(P0, 0, r2*sizeof(int16_t));
 	P1 = (int16_t*)malloc(r2*sizeof(int16_t));
 	memset(P1, 0, r2*sizeof(int16_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	int16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -444,7 +447,7 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 			else if(tmp < SZ_INT16_MIN)
 				P1[j] = SZ_INT16_MIN;
 			else
-				P1[j] = SZ_INT16_MAX;			
+				P1[j] = SZ_INT16_MAX;
 		}
 		else
 		{
@@ -458,7 +461,7 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -476,7 +479,7 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 			else if(tmp < SZ_INT16_MIN)
 				P0[0] = SZ_INT16_MIN;
 			else
-				P0[0] = SZ_INT16_MAX;			
+				P0[0] = SZ_INT16_MAX;
 		}
 		else
 		{
@@ -485,7 +488,7 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 			compressInt16Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -506,7 +509,7 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 				else if(tmp < SZ_INT16_MIN)
 					P0[j] = SZ_INT16_MIN;
 				else
-					P0[j] = SZ_INT16_MAX;						
+					P0[j] = SZ_INT16_MAX;
 			}
 			else
 			{
@@ -522,32 +525,32 @@ TightDataPointStorageI* SZ_compress_int16_2D_MDQ(int16_t *oriData, size_t r1, si
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT16);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_int16_NoCkRngeNoGzip_2D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int16_NoCkRngeNoGzip_2D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int16_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int16_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -557,30 +560,30 @@ int64_t valueRangeSize, int16_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(int16_t))
 		SZ_compress_args_int16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int16_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	int16_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (int16_t*)malloc(r23*sizeof(int16_t));
@@ -589,9 +592,9 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -614,7 +617,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 		else if(tmp < SZ_INT16_MIN)
 			P1[1] = SZ_INT16_MIN;
 		else
-			P1[1] = SZ_INT16_MAX;		
+			P1[1] = SZ_INT16_MAX;
 	}
 	else
 	{
@@ -642,7 +645,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 			else if(tmp < SZ_INT16_MIN)
 				P1[j] = SZ_INT16_MIN;
 			else
-				P1[j] = SZ_INT16_MAX;			
+				P1[j] = SZ_INT16_MAX;
 		}
 		else
 		{
@@ -658,7 +661,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -674,7 +677,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 			else if(tmp < SZ_INT16_MIN)
 				P1[index] = SZ_INT16_MIN;
 			else
-				P1[index] = SZ_INT16_MAX;			
+				P1[index] = SZ_INT16_MAX;
 		}
 		else
 		{
@@ -704,7 +707,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 				else if(tmp < SZ_INT16_MIN)
 					P1[index] = SZ_INT16_MIN;
 				else
-					P1[index] = SZ_INT16_MAX;				
+					P1[index] = SZ_INT16_MAX;
 			}
 			else
 			{
@@ -769,7 +772,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 				else if(tmp < SZ_INT16_MIN)
 					P0[j] = SZ_INT16_MIN;
 				else
-					P0[j] = SZ_INT16_MAX;				
+					P0[j] = SZ_INT16_MAX;
 			}
 			else
 			{
@@ -786,7 +789,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -817,7 +820,7 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -857,22 +860,22 @@ TightDataPointStorageI* SZ_compress_int16_3D_MDQ(int16_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT16);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_int16_NoCkRngeNoGzip_3D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int16_NoCkRngeNoGzip_3D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int16_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
@@ -882,8 +885,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(int16_t))
 		SZ_compress_args_int16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -891,35 +894,35 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int16_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	int16_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (int16_t*)malloc(r34*sizeof(int16_t));
 	P1 = (int16_t*)malloc(r34*sizeof(int16_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -954,7 +957,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 			else if(tmp < SZ_INT16_MIN)
 				P1[index2D] = SZ_INT16_MIN;
 			else
-				P1[index2D] = SZ_INT16_MAX;			
+				P1[index2D] = SZ_INT16_MAX;
 		}
 		else
 		{
@@ -986,7 +989,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 				else if(tmp < SZ_INT16_MIN)
 					P1[index2D] = SZ_INT16_MIN;
 				else
-					P1[index2D] = SZ_INT16_MAX;					
+					P1[index2D] = SZ_INT16_MAX;
 			}
 			else
 			{
@@ -1020,7 +1023,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 				else if(tmp < SZ_INT16_MIN)
 					P1[index2D] = SZ_INT16_MIN;
 				else
-					P1[index2D] = SZ_INT16_MAX;					
+					P1[index2D] = SZ_INT16_MAX;
 			}
 			else
 			{
@@ -1053,7 +1056,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 					else if(tmp < SZ_INT16_MIN)
 						P1[index2D] = SZ_INT16_MIN;
 					else
-						P1[index2D] = SZ_INT16_MAX;						
+						P1[index2D] = SZ_INT16_MAX;
 				}
 				else
 				{
@@ -1090,7 +1093,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 				else if(tmp < SZ_INT16_MIN)
 					P0[index2D] = SZ_INT16_MIN;
 				else
-					P0[index2D] = SZ_INT16_MAX;					
+					P0[index2D] = SZ_INT16_MAX;
 			}
 			else
 			{
@@ -1122,7 +1125,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 					else if(tmp < SZ_INT16_MIN)
 						P0[index2D] = SZ_INT16_MIN;
 					else
-						P0[index2D] = SZ_INT16_MAX;						
+						P0[index2D] = SZ_INT16_MAX;
 				}
 				else
 				{
@@ -1156,7 +1159,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 					else if(tmp < SZ_INT16_MIN)
 						P0[index2D] = SZ_INT16_MIN;
 					else
-						P0[index2D] = SZ_INT16_MAX;						
+						P0[index2D] = SZ_INT16_MAX;
 				}
 				else
 				{
@@ -1189,7 +1192,7 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 						else if(tmp < SZ_INT16_MIN)
 							P0[index2D] = SZ_INT16_MIN;
 						else
-							P0[index2D] = SZ_INT16_MAX;							
+							P0[index2D] = SZ_INT16_MAX;
 					}
 					else
 					{
@@ -1213,21 +1216,21 @@ TightDataPointStorageI* SZ_compress_int16_4D_MDQ(int16_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT16);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_int16_NoCkRngeNoGzip_4D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_int16_NoCkRngeNoGzip_4D(unsigned char** newByteData, int16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int16_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1244,8 +1247,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_int16_withinRange(unsigned char** newByteData, int16_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*2);
@@ -1254,28 +1257,28 @@ void SZ_compress_args_int16_withinRange(unsigned char** newByteData, int16_t *or
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 2;
 	tdps->dataTypeSize = convertDataTypeSize(sizeof(int16_t));
-	
+
 	int16_t value = oriData[0];
 	int16ToBytes_bigEndian(tdps->exactDataBytes, value);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(int16_t)+SZ_SIZE_TYPE; //8==3+1+4(int16_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_int16_wRngeNoGzip(unsigned char** newByteData, int16_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int16_wRngeNoGzip(unsigned char** newByteData, int16_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	int16_t minValue = computeRangeSize_int(oriData, SZ_INT16, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_int16_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1303,12 +1306,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_int16(unsigned char** newByteData, int16_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int16(unsigned char** newByteData, int16_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1320,8 +1323,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	int16_t minValue = (int16_t)computeRangeSize_int(oriData, SZ_INT16, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1377,9 +1380,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the int16_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/sz/src/sz_int32.c b/sz/src/sz_int32.c
index 7b559c94..04cbd241 100644
--- a/sz/src/sz_int32.c
+++ b/sz/src/sz_int32.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, do
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_int32_1D(int32_t *oriData, size_t dataLength, do
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_int32_2D(int32_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_int32_2D(int32_t *oriData, size_t r1, size_t r2,
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_int32_2D(int32_t *oriData, size_t r1, size_t r2,
 }
 
 unsigned int optimize_intervals_int32_3D(int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_int32_3D(int32_t *oriData, size_t r1, size_t r2,
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_int32_3D(int32_t *oriData, size_t r1, size_t r2,
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,36 +234,36 @@ TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLe
 		quantization_intervals = optimize_intervals_int32_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	int32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressInt32Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressInt32Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int32_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 //		if(i==2869438)
@@ -268,7 +271,7 @@ TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLe
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -284,23 +287,23 @@ TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLe
 			}
 /*			if(type[i]==0)
 				printf("err:type[%d]=0\n", i);*/
-			listAdd_int(last3CmprsData, pred);					
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressInt32Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT32);
 
 //sdi:Debug
@@ -308,23 +311,23 @@ TightDataPointStorageI* SZ_compress_int32_1D_MDQ(int32_t *oriData, size_t dataLe
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_int32_StoreOriData(int32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_int32_StoreOriData(int32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(int32_t);	
+	int intSize=sizeof(int32_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -333,14 +336,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;		
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -348,11 +351,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			int32ToBytes_bigEndian(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_int32_NoCkRngeNoGzip_1D(unsigned char** newByteData, int32_t *oriData, 
+void SZ_compress_args_int32_NoCkRngeNoGzip_1D(unsigned char** newByteData, int32_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int32_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int32_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -367,35 +370,35 @@ TightDataPointStorageI* SZ_compress_int32_2D_MDQ(int32_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int32_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int32_t pred1D, pred2D, curValue;
 	int32_t diff = 0.0;
 	double itvNum = 0;
 	int32_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (int32_t*)malloc(r2*sizeof(int32_t));
 	memset(P0, 0, r2*sizeof(int32_t));
 	P1 = (int32_t*)malloc(r2*sizeof(int32_t));
 	memset(P1, 0, r2*sizeof(int32_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	int32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -448,7 +451,7 @@ TightDataPointStorageI* SZ_compress_int32_2D_MDQ(int32_t *oriData, size_t r1, si
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -469,7 +472,7 @@ TightDataPointStorageI* SZ_compress_int32_2D_MDQ(int32_t *oriData, size_t r1, si
 			compressInt32Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -500,32 +503,32 @@ TightDataPointStorageI* SZ_compress_int32_2D_MDQ(int32_t *oriData, size_t r1, si
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT32);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_int32_NoCkRngeNoGzip_2D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int32_NoCkRngeNoGzip_2D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int32_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int32_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -535,30 +538,30 @@ int64_t valueRangeSize, int32_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(int32_t))
 		SZ_compress_args_int32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int32_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int32_t pred1D, pred2D, pred3D, curValue;
 	int32_t diff = 0.0;
 	double itvNum = 0;
 	int32_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (int32_t*)malloc(r23*sizeof(int32_t));
@@ -567,9 +570,9 @@ TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, si
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -624,7 +627,7 @@ TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, si
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -730,7 +733,7 @@ TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, si
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -755,7 +758,7 @@ TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, si
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -789,24 +792,24 @@ TightDataPointStorageI* SZ_compress_int32_3D_MDQ(int32_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT32);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_int32_NoCkRngeNoGzip_3D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int32_NoCkRngeNoGzip_3D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
-{	
+{
 	TightDataPointStorageI* tdps = SZ_compress_int32_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
 
 	convertTDPStoFlatBytes_int(tdps, newByteData, outSize);
@@ -814,8 +817,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(int32_t))
 		SZ_compress_args_int32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -823,35 +826,35 @@ TightDataPointStorageI* SZ_compress_int32_4D_MDQ(int32_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int32_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int32_t pred1D, pred2D, pred3D, curValue;
 	int32_t diff = 0.0;
 	double itvNum = 0;
 	int32_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (int32_t*)malloc(r34*sizeof(int32_t));
 	P1 = (int32_t*)malloc(r34*sizeof(int32_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -1097,21 +1100,21 @@ TightDataPointStorageI* SZ_compress_int32_4D_MDQ(int32_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT32);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_int32_NoCkRngeNoGzip_4D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_int32_NoCkRngeNoGzip_4D(unsigned char** newByteData, int32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int32_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1128,8 +1131,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_int32_withinRange(unsigned char** newByteData, int32_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*4);
@@ -1138,28 +1141,28 @@ void SZ_compress_args_int32_withinRange(unsigned char** newByteData, int32_t *or
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 4;
 	tdps->dataTypeSize = convertDataTypeSize(sizeof(int32_t));
-	
+
 	int32_t value = oriData[0];
 	int32ToBytes_bigEndian(tdps->exactDataBytes, value);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(int32_t)+SZ_SIZE_TYPE; //8==3+1+4(int32_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_int32_wRngeNoGzip(unsigned char** newByteData, int32_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int32_wRngeNoGzip(unsigned char** newByteData, int32_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	int32_t minValue = computeRangeSize_int(oriData, SZ_INT32, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_int32_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1187,12 +1190,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_int32(unsigned char** newByteData, int32_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int32(unsigned char** newByteData, int32_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1204,8 +1207,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	int32_t minValue = (int32_t)computeRangeSize_int(oriData, SZ_INT32, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1261,9 +1264,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the int32_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/sz/src/sz_int64.c b/sz/src/sz_int64.c
index 065fb16e..1fce540e 100644
--- a/sz/src/sz_int64.c
+++ b/sz/src/sz_int64.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, do
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_int64_1D(int64_t *oriData, size_t dataLength, do
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_int64_2D(int64_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_int64_2D(int64_t *oriData, size_t r1, size_t r2,
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_int64_2D(int64_t *oriData, size_t r1, size_t r2,
 }
 
 unsigned int optimize_intervals_int64_3D(int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_int64_3D(int64_t *oriData, size_t r1, size_t r2,
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_int64_3D(int64_t *oriData, size_t r1, size_t r2,
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,37 +234,37 @@ TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLe
 		quantization_intervals = optimize_intervals_int64_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	int64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressInt64Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressInt64Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int64_t pred;
 	int64_t predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 //		if(i==2869438)
@@ -269,7 +272,7 @@ TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLe
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -285,23 +288,23 @@ TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLe
 			}
 /*			if(type[i]==0)
 				printf("err:type[%d]=0\n", i);*/
-			listAdd_int(last3CmprsData, pred);					
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressInt64Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT64);
 
 //sdi:Debug
@@ -309,23 +312,23 @@ TightDataPointStorageI* SZ_compress_int64_1D_MDQ(int64_t *oriData, size_t dataLe
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_int64_StoreOriData(int64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_int64_StoreOriData(int64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(int64_t);	
+	int intSize=sizeof(int64_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -334,14 +337,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;		
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -349,11 +352,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			int64ToBytes_bigEndian(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_int64_NoCkRngeNoGzip_1D(unsigned char** newByteData, int64_t *oriData, 
+void SZ_compress_args_int64_NoCkRngeNoGzip_1D(unsigned char** newByteData, int64_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int64_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -368,35 +371,35 @@ TightDataPointStorageI* SZ_compress_int64_2D_MDQ(int64_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int64_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	int64_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (int64_t*)malloc(r2*sizeof(int64_t));
 	memset(P0, 0, r2*sizeof(int64_t));
 	P1 = (int64_t*)malloc(r2*sizeof(int64_t));
 	memset(P1, 0, r2*sizeof(int64_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	int64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -449,7 +452,7 @@ TightDataPointStorageI* SZ_compress_int64_2D_MDQ(int64_t *oriData, size_t r1, si
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -470,7 +473,7 @@ TightDataPointStorageI* SZ_compress_int64_2D_MDQ(int64_t *oriData, size_t r1, si
 			compressInt64Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -501,32 +504,32 @@ TightDataPointStorageI* SZ_compress_int64_2D_MDQ(int64_t *oriData, size_t r1, si
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT64);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_int64_NoCkRngeNoGzip_2D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int64_NoCkRngeNoGzip_2D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int64_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -536,30 +539,30 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(int64_t))
 		SZ_compress_args_int64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int64_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	int64_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (int64_t*)malloc(r23*sizeof(int64_t));
@@ -568,9 +571,9 @@ TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, si
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -625,7 +628,7 @@ TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, si
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -731,7 +734,7 @@ TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, si
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -756,7 +759,7 @@ TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, si
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -790,22 +793,22 @@ TightDataPointStorageI* SZ_compress_int64_3D_MDQ(int64_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT64);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_int64_NoCkRngeNoGzip_3D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int64_NoCkRngeNoGzip_3D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int64_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
@@ -815,8 +818,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(int64_t))
 		SZ_compress_args_int64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -824,35 +827,35 @@ TightDataPointStorageI* SZ_compress_int64_4D_MDQ(int64_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int64_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	int64_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (int64_t*)malloc(r34*sizeof(int64_t));
 	P1 = (int64_t*)malloc(r34*sizeof(int64_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -1098,21 +1101,21 @@ TightDataPointStorageI* SZ_compress_int64_4D_MDQ(int64_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT64);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_int64_NoCkRngeNoGzip_4D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_int64_NoCkRngeNoGzip_4D(unsigned char** newByteData, int64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int64_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1129,8 +1132,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_int64_withinRange(unsigned char** newByteData, int64_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*8);
@@ -1138,28 +1141,28 @@ void SZ_compress_args_int64_withinRange(unsigned char** newByteData, int64_t *or
 	//tdps->exactByteSize = 4;
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 8;
-	
+
 	int64_t value = oriData[0];
 	int64ToBytes_bigEndian(tdps->exactDataBytes, value);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(int64_t)+SZ_SIZE_TYPE; //8==3+1+4(int64_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_int64_wRngeNoGzip(unsigned char** newByteData, int64_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int64_wRngeNoGzip(unsigned char** newByteData, int64_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	int64_t minValue = computeRangeSize_int(oriData, SZ_INT64, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_int64_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1187,12 +1190,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_int64(unsigned char** newByteData, int64_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int64(unsigned char** newByteData, int64_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1204,8 +1207,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	int64_t minValue = (int64_t)computeRangeSize_int(oriData, SZ_INT64, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1261,9 +1264,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the int64_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/sz/src/sz_int8.c b/sz/src/sz_int8.c
index 83febd0d..33c19e5d 100644
--- a/sz/src/sz_int8.c
+++ b/sz/src/sz_int8.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, doub
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_int8_1D(int8_t *oriData, size_t dataLength, doub
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_int8_2D(int8_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_int8_2D(int8_t *oriData, size_t r1, size_t r2, d
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_int8_2D(int8_t *oriData, size_t r1, size_t r2, d
 }
 
 unsigned int optimize_intervals_int8_3D(int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_int8_3D(int8_t *oriData, size_t r1, size_t r2, s
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_int8_3D(int8_t *oriData, size_t r1, size_t r2, s
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,42 +234,42 @@ TightDataPointStorageI* SZ_compress_int8_1D_MDQ(int8_t *oriData, size_t dataLeng
 		quantization_intervals = optimize_intervals_int8_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	int8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressInt8Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressInt8Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int64_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -281,24 +284,24 @@ TightDataPointStorageI* SZ_compress_int8_1D_MDQ(int8_t *oriData, size_t dataLeng
 				pred = pred - state*interval;
 			}
 			if(pred>SZ_INT8_MAX) pred = SZ_INT8_MAX;
-			if(pred<SZ_INT8_MIN) pred = SZ_INT8_MIN;			
-			listAdd_int(last3CmprsData, pred);					
+			if(pred<SZ_INT8_MIN) pred = SZ_INT8_MIN;
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressInt8Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT8);
 
 //sdi:Debug
@@ -306,23 +309,23 @@ TightDataPointStorageI* SZ_compress_int8_1D_MDQ(int8_t *oriData, size_t dataLeng
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_int8_StoreOriData(int8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_int8_StoreOriData(int8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(int8_t);	
+	int intSize=sizeof(int8_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -331,14 +334,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-		
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;			
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -346,11 +349,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			*p = oriData[i];
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_int8_NoCkRngeNoGzip_1D(unsigned char** newByteData, int8_t *oriData, 
+void SZ_compress_args_int8_NoCkRngeNoGzip_1D(unsigned char** newByteData, int8_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, int8_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int8_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -365,35 +368,35 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int8_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	int8_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (int8_t*)malloc(r2*sizeof(int8_t));
 	memset(P0, 0, r2*sizeof(int8_t));
 	P1 = (int8_t*)malloc(r2*sizeof(int8_t));
 	memset(P1, 0, r2*sizeof(int8_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	int8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -444,7 +447,7 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 			else if(tmp < SZ_INT8_MIN)
 				P1[j] = SZ_INT8_MIN;
 			else
-				P1[j] = SZ_INT8_MAX;			
+				P1[j] = SZ_INT8_MAX;
 		}
 		else
 		{
@@ -458,7 +461,7 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -476,7 +479,7 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 			else if(tmp < SZ_INT8_MIN)
 				P0[0] = SZ_INT8_MIN;
 			else
-				P0[0] = SZ_INT8_MAX;			
+				P0[0] = SZ_INT8_MAX;
 		}
 		else
 		{
@@ -485,7 +488,7 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 			compressInt8Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -506,7 +509,7 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 				else if(tmp < SZ_INT8_MIN)
 					P0[j] = SZ_INT8_MIN;
 				else
-					P0[j] = SZ_INT8_MAX;						
+					P0[j] = SZ_INT8_MAX;
 			}
 			else
 			{
@@ -522,32 +525,32 @@ TightDataPointStorageI* SZ_compress_int8_2D_MDQ(int8_t *oriData, size_t r1, size
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT8);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_int8_NoCkRngeNoGzip_2D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int8_NoCkRngeNoGzip_2D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int8_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int8_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -557,30 +560,30 @@ int64_t valueRangeSize, int8_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(int8_t))
 		SZ_compress_args_int8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int8_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	int8_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (int8_t*)malloc(r23*sizeof(int8_t));
@@ -589,9 +592,9 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -614,7 +617,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 		else if(tmp < SZ_INT8_MIN)
 			P1[1] = SZ_INT8_MIN;
 		else
-			P1[1] = SZ_INT8_MAX;		
+			P1[1] = SZ_INT8_MAX;
 	}
 	else
 	{
@@ -642,7 +645,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 			else if(tmp < SZ_INT8_MIN)
 				P1[j] = SZ_INT8_MIN;
 			else
-				P1[j] = SZ_INT8_MAX;			
+				P1[j] = SZ_INT8_MAX;
 		}
 		else
 		{
@@ -658,7 +661,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -674,7 +677,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 			else if(tmp < SZ_INT8_MIN)
 				P1[index] = SZ_INT8_MIN;
 			else
-				P1[index] = SZ_INT8_MAX;			
+				P1[index] = SZ_INT8_MAX;
 		}
 		else
 		{
@@ -704,7 +707,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 				else if(tmp < SZ_INT8_MIN)
 					P1[index] = SZ_INT8_MIN;
 				else
-					P1[index] = SZ_INT8_MAX;				
+					P1[index] = SZ_INT8_MAX;
 			}
 			else
 			{
@@ -769,7 +772,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 				else if(tmp < SZ_INT8_MIN)
 					P0[j] = SZ_INT8_MIN;
 				else
-					P0[j] = SZ_INT8_MAX;				
+					P0[j] = SZ_INT8_MAX;
 			}
 			else
 			{
@@ -786,7 +789,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -817,7 +820,7 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -857,24 +860,24 @@ TightDataPointStorageI* SZ_compress_int8_3D_MDQ(int8_t *oriData, size_t r1, size
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT8);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_int8_NoCkRngeNoGzip_3D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_int8_NoCkRngeNoGzip_3D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
-{	
+{
 	TightDataPointStorageI* tdps = SZ_compress_int8_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
 
 	convertTDPStoFlatBytes_int(tdps, newByteData, outSize);
@@ -882,8 +885,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(int8_t))
 		SZ_compress_args_int8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -891,35 +894,35 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_int8_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	int8_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (int8_t*)malloc(r34*sizeof(int8_t));
 	P1 = (int8_t*)malloc(r34*sizeof(int8_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	int8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -954,7 +957,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 			else if(tmp < SZ_INT8_MIN)
 				P1[index2D] = SZ_INT8_MIN;
 			else
-				P1[index2D] = SZ_INT8_MAX;			
+				P1[index2D] = SZ_INT8_MAX;
 		}
 		else
 		{
@@ -986,7 +989,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 				else if(tmp < SZ_INT8_MIN)
 					P1[index2D] = SZ_INT8_MIN;
 				else
-					P1[index2D] = SZ_INT8_MAX;					
+					P1[index2D] = SZ_INT8_MAX;
 			}
 			else
 			{
@@ -1020,7 +1023,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 				else if(tmp < SZ_INT8_MIN)
 					P1[index2D] = SZ_INT8_MIN;
 				else
-					P1[index2D] = SZ_INT8_MAX;					
+					P1[index2D] = SZ_INT8_MAX;
 			}
 			else
 			{
@@ -1053,7 +1056,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 					else if(tmp < SZ_INT8_MIN)
 						P1[index2D] = SZ_INT8_MIN;
 					else
-						P1[index2D] = SZ_INT8_MAX;						
+						P1[index2D] = SZ_INT8_MAX;
 				}
 				else
 				{
@@ -1090,7 +1093,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 				else if(tmp < SZ_INT8_MIN)
 					P0[index2D] = SZ_INT8_MIN;
 				else
-					P0[index2D] = SZ_INT8_MAX;					
+					P0[index2D] = SZ_INT8_MAX;
 			}
 			else
 			{
@@ -1122,7 +1125,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 					else if(tmp < SZ_INT8_MIN)
 						P0[index2D] = SZ_INT8_MIN;
 					else
-						P0[index2D] = SZ_INT8_MAX;						
+						P0[index2D] = SZ_INT8_MAX;
 				}
 				else
 				{
@@ -1156,7 +1159,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 					else if(tmp < SZ_INT8_MIN)
 						P0[index2D] = SZ_INT8_MIN;
 					else
-						P0[index2D] = SZ_INT8_MAX;						
+						P0[index2D] = SZ_INT8_MAX;
 				}
 				else
 				{
@@ -1189,7 +1192,7 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 						else if(tmp < SZ_INT8_MIN)
 							P0[index2D] = SZ_INT8_MIN;
 						else
-							P0[index2D] = SZ_INT8_MAX;							
+							P0[index2D] = SZ_INT8_MAX;
 					}
 					else
 					{
@@ -1213,21 +1216,21 @@ TightDataPointStorageI* SZ_compress_int8_4D_MDQ(int8_t *oriData, size_t r1, size
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_INT8);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_int8_NoCkRngeNoGzip_4D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_int8_NoCkRngeNoGzip_4D(unsigned char** newByteData, int8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_int8_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1244,8 +1247,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_int8_withinRange(unsigned char** newByteData, int8_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char));
@@ -1253,29 +1256,29 @@ void SZ_compress_args_int8_withinRange(unsigned char** newByteData, int8_t *oriD
 	//tdps->exactByteSize = 4;
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 1;
-	
+
 	int8_t value = oriData[0];
 	//intToBytes_bigEndian(tdps->exactDataBytes, value);
 	memcpy(tdps->exactDataBytes, &value, 1);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(int8_t)+SZ_SIZE_TYPE; //8==3+1+4(int8_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_int8_wRngeNoGzip(unsigned char** newByteData, int8_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int8_wRngeNoGzip(unsigned char** newByteData, int8_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	int8_t minValue = computeRangeSize_int(oriData, SZ_INT8, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_int8_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1303,12 +1306,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_int8(unsigned char** newByteData, int8_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_int8(unsigned char** newByteData, int8_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1320,8 +1323,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	int8_t minValue = (int8_t)computeRangeSize_int(oriData, SZ_INT8, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1377,7 +1380,7 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the int8_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
 
diff --git a/sz/src/sz_omp.c b/sz/src/sz_omp.c
index e3619ecd..68561f55 100644
--- a/sz/src/sz_omp.c
+++ b/sz/src/sz_omp.c
@@ -15,10 +15,19 @@ double sz_wtime(){
 #ifdef _OPENMP
     return omp_get_wtime();
 #else
+#if defined(HAVE_CLOCK_GETTIME)
     struct timespec ts;
     clock_gettime(CLOCK_MONOTONIC, &ts);
 
-    return (double)ts.tv_sec + (double)ts.tv_nsec / 1000000000.0;
+    return (double)ts.tv_sec + ((double)ts.tv_nsec / 1000000000.0);
+#elif defined(HAVE_GETTIMEOFDAY)
+    struct timeval now_tv;
+    gettimeofday(&now_tv, NULL);
+
+    return (double)now_tv.tv_sec + ((double)now_tv.tv_usec / 1000000.0);
+#else
+    return (double)time(NULL);
+#endif
 #endif
 }
 
@@ -65,9 +74,9 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
 #ifdef DEBUG
 		printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision);
 #endif
-		// exit(0);		
+		// exit(0);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -127,7 +136,7 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
 
 	size_t dim0_offset = r2 * r3;
 	size_t dim1_offset = r3;
-	
+
 	// printf("malloc blockinfo array start\n");
 	// fflush(stdout);
 
@@ -146,10 +155,10 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
 	size_t * block_offset = (size_t *) malloc(num_blocks * sizeof(size_t));
 	size_t *freq = (size_t *)malloc(thread_num*quantization_intervals*4*sizeof(size_t));
 	memset(freq, 0, thread_num*quantization_intervals*4*sizeof(size_t));
-	
+
 	size_t stateNum = quantization_intervals*2;
-	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);	
-	
+	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+
 	int num_yz = num_y * num_z;
 	#pragma omp parallel for
 	for(int t=0; t<thread_num; t++){
@@ -234,13 +243,13 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
 	memcpy(result_pos, unpredictable_count, num_blocks * sizeof(unsigned int));
 	result_pos += num_blocks * sizeof(unsigned int);
 	memcpy(result_pos, mean, num_blocks * sizeof(float));
-	result_pos += num_blocks * sizeof(float);	
+	result_pos += num_blocks * sizeof(float);
 	// printf("unpred offset: %ld\n", result_pos - result);
 	// store unpredicable data
 	// float * unpred_pos = (float *) result_pos;
 	// for(int t=0; t<thread_num; t++){
 	// 	float * unpredictable_data = result_unpredictable_data + t * unpred_data_max_size;
-	// 	memcpy(result_pos, unpredictable_data, unpredictable_count[t] * sizeof(float));		
+	// 	memcpy(result_pos, unpredictable_data, unpredictable_count[t] * sizeof(float));
 	// 	result_pos += unpredictable_count[t]*sizeof(float);
 	// }
 	unpred_offset[0] = 0;
@@ -251,7 +260,7 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
 	for(int t=0; t<thread_num; t++){
 		int id = sz_get_thread_num();
 		float * unpredictable_data = result_unpredictable_data + id * unpred_data_max_size;
-		memcpy(result_pos + unpred_offset[id] * sizeof(float), unpredictable_data, unpredictable_count[id] * sizeof(float));		
+		memcpy(result_pos + unpred_offset[id] * sizeof(float), unpredictable_data, unpredictable_count[id] * sizeof(float));
 	}
 	result_pos += total_unpred * sizeof(float);
 
@@ -299,7 +308,7 @@ unsigned char * SZ_compress_float_3D_MDQ_openmp(float *oriData, size_t r1, size_
 	#pragma omp parallel for
 	for(int t=0; t<thread_num; t++){
 		int id = sz_get_thread_num();
-		memcpy(result_pos + block_offset[id], encoding_buffer + t * max_num_block_elements * sizeof(int), block_pos[t]);		
+		memcpy(result_pos + block_offset[id], encoding_buffer + t * max_num_block_elements * sizeof(int), block_pos[t]);
 	}
 	result_pos += block_offset[thread_num - 1] + block_pos[thread_num - 1];
 
@@ -343,14 +352,14 @@ void decompressDataSeries_float_2D_openmp(float** data, size_t r1, size_t r2, un
 }
 
 void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, size_t r3, unsigned char* comp_data){
-	
+
 	if(confparams_dec==NULL)
 		confparams_dec = (sz_params*)malloc(sizeof(sz_params));
 	memset(confparams_dec, 0, sizeof(sz_params));
 	if(exe_params==NULL)
 		exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
-	memset(exe_params, 0, sizeof(sz_exedata));	
-	
+	memset(exe_params, 0, sizeof(sz_exedata));
+
 	// printf("num_block_elements %d num_blocks %d\n", max_num_block_elements, num_blocks);
 	// fflush(stdout);
 	double elapsed_time = 0.0;
@@ -359,7 +368,7 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
 	size_t dim0_offset = r2 * r3;
 	size_t dim1_offset = r3;
 	size_t num_elements = r1 * r2 * r3;
-	
+
 	unsigned char * comp_data_pos = comp_data;
 	//int meta_data_offset = 3 + 1 + MetaDataByteLength;
 	//comp_data_pos += meta_data_offset;
@@ -394,7 +403,7 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
 			}
 		}
 	}
-	
+
 #ifdef DEBUG
 	printf("number of blocks: %zu %zu %zu, thread_num %d\n", num_x, num_y, num_z, thread_num);
 #endif
@@ -453,7 +462,7 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
 	// 	printf("%.2f ", result_unpredictable_data[i]);
 	// }
 	// printf("\ntotal_unpred num: %d\n", total_unpred);
-	
+
 	// for(int i=0; i<num_blocks; i++){
 	// 	printf("%d unpred offset %ld\n", i, unpred_offset[i]);
 	// 	for(int tmp=0; tmp<10; tmp++){
@@ -526,7 +535,7 @@ void decompressDataSeries_float_3D_openmp(float** data, size_t r1, size_t r2, si
 		// }
 		// printf("\n\n");
 		decompressDataSeries_float_3D_RA_block(data_pos, mean, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, realPrecision, type, unpredictable_data);
-	}	
+	}
 	elapsed_time += sz_wtime();
 #ifdef DEBUG
 	printf("Parallel decompress elapsed time: %.4f\n", elapsed_time);
@@ -562,9 +571,9 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
 #ifdef DEBUG
 		printf("3D number of bins: %d\nerror bound %.20f\n", quantization_intervals, realPrecision);
 #endif
-		// exit(0);		
+		// exit(0);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else{
 		quantization_intervals = exe_params->intvCapacity;
 	}
@@ -624,7 +633,7 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
 
 	size_t dim0_offset = r2 * r3;
 	size_t dim1_offset = r3;
-	
+
 	// printf("malloc blockinfo array start\n");
 	// fflush(stdout);
 
@@ -643,10 +652,10 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
 	size_t * block_offset = (size_t *) malloc(num_blocks * sizeof(size_t));
 	size_t *freq = (size_t *)malloc(thread_num*quantization_intervals*4*sizeof(size_t));
 	memset(freq, 0, thread_num*quantization_intervals*4*sizeof(size_t));
-	
+
 	size_t stateNum = quantization_intervals*2;
-	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);	
-	
+	HuffmanTree* huffmanTree = createHuffmanTree(stateNum);
+
 	int num_yz = num_y * num_z;
 	#pragma omp parallel for
 	for(int t=0; t<thread_num; t++){
@@ -728,18 +737,18 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
 	memcpy(result_pos, unpredictable_count, num_blocks * sizeof(unsigned int));
 	result_pos += num_blocks * sizeof(unsigned int);
 	memcpy(result_pos, mean, num_blocks * sizeof(double));
-	result_pos += num_blocks * sizeof(double);	
+	result_pos += num_blocks * sizeof(double);
 
 	unpred_offset[0] = 0;
 	for(int t=1; t<thread_num; t++){
 		unpred_offset[t] = unpredictable_count[t-1] + unpred_offset[t-1];
 	}
-	
+
 	#pragma omp parallel for
 	for(int t=0; t<thread_num; t++){
 		int id = sz_get_thread_num();
 		double * unpredictable_data = result_unpredictable_data + id * unpred_data_max_size;
-		memcpy(result_pos + unpred_offset[id] * sizeof(double), unpredictable_data, unpredictable_count[id] * sizeof(double));		
+		memcpy(result_pos + unpred_offset[id] * sizeof(double), unpredictable_data, unpredictable_count[id] * sizeof(double));
 	}
 	result_pos += total_unpred * sizeof(double);
 
@@ -787,7 +796,7 @@ unsigned char * SZ_compress_double_3D_MDQ_openmp(double *oriData, size_t r1, siz
 	#pragma omp parallel for
 	for(int t=0; t<thread_num; t++){
 		int id = sz_get_thread_num();
-		memcpy(result_pos + block_offset[id], encoding_buffer + t * max_num_block_elements * sizeof(int), block_pos[t]);		
+		memcpy(result_pos + block_offset[id], encoding_buffer + t * max_num_block_elements * sizeof(int), block_pos[t]);
 	}
 	result_pos += block_offset[thread_num - 1] + block_pos[thread_num - 1];
 
@@ -837,8 +846,8 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
 	memset(confparams_dec, 0, sizeof(sz_params));
 	if(exe_params==NULL)
 		exe_params = (sz_exedata*)malloc(sizeof(sz_exedata));
-	memset(exe_params, 0, sizeof(sz_exedata));	
-	
+	memset(exe_params, 0, sizeof(sz_exedata));
+
 	// printf("num_block_elements %d num_blocks %d\n", max_num_block_elements, num_blocks);
 	// fflush(stdout);
 	double elapsed_time = 0.0;
@@ -847,7 +856,7 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
 	size_t dim0_offset = r2 * r3;
 	size_t dim1_offset = r3;
 	size_t num_elements = r1 * r2 * r3;
-	
+
 	unsigned char * comp_data_pos = comp_data;
 	//int meta_data_offset = 3 + 1 + MetaDataByteLength;
 	//comp_data_pos += meta_data_offset;
@@ -882,7 +891,7 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
 			}
 		}
 	}
-	
+
 #ifdef DEBUG
 	printf("number of blocks: %zu %zu %zu, thread_num %d\n", num_x, num_y, num_z, thread_num);
 #endif
@@ -988,7 +997,7 @@ void decompressDataSeries_double_3D_openmp(double** data, size_t r1, size_t r2,
 		double mean = mean_pos[id];
 
 		decompressDataSeries_double_3D_RA_block(data_pos, mean, r1, r2, r3, current_blockcount_x, current_blockcount_y, current_blockcount_z, realPrecision, type, unpredictable_data);
-	}	
+	}
 	elapsed_time += sz_wtime();
 #ifdef DEBUG
 	printf("Parallel decompress elapsed time: %.4f\n", elapsed_time);
@@ -1032,12 +1041,12 @@ void Huffman_init_openmp(HuffmanTree* huffmanTree, int *s, size_t length, int th
 	}
 
 	for (i = 0; i < huffmanTree->allNodes; i++)
-		if (freq[i]) 
+		if (freq[i])
 			qinsert(huffmanTree, new_node(huffmanTree, freq[i], i, 0, 0));
- 
-	while (huffmanTree->qend > 2) 
+
+	while (huffmanTree->qend > 2)
 		qinsert(huffmanTree, new_node(huffmanTree, 0, 0, qremove(huffmanTree), qremove(huffmanTree)));
- 
+
 	build_code(huffmanTree, huffmanTree->qq[1], 0, 0, 0);
 	// free(freq);
 }
diff --git a/sz/src/sz_uint16.c b/sz/src/sz_uint16.c
index ae557da3..a9752ba9 100644
--- a/sz/src/sz_uint16.c
+++ b/sz/src/sz_uint16.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength,
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_uint16_1D(uint16_t *oriData, size_t dataLength,
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_uint16_2D(uint16_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_uint16_2D(uint16_t *oriData, size_t r1, size_t r
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_uint16_2D(uint16_t *oriData, size_t r1, size_t r
 }
 
 unsigned int optimize_intervals_uint16_3D(uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_uint16_3D(uint16_t *oriData, size_t r1, size_t r
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_uint16_3D(uint16_t *oriData, size_t r1, size_t r
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,42 +234,42 @@ TightDataPointStorageI* SZ_compress_uint16_1D_MDQ(uint16_t *oriData, size_t data
 		quantization_intervals = optimize_intervals_uint16_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	uint16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressUInt16Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressUInt16Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int64_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -281,24 +284,24 @@ TightDataPointStorageI* SZ_compress_uint16_1D_MDQ(uint16_t *oriData, size_t data
 				pred = pred - state*interval;
 			}
 			if(pred>SZ_UINT16_MAX) pred = SZ_UINT16_MAX;
-			if(pred<SZ_UINT16_MIN) pred = SZ_UINT16_MIN;			
-			listAdd_int(last3CmprsData, pred);					
+			if(pred<SZ_UINT16_MIN) pred = SZ_UINT16_MIN;
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressUInt16Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT16);
 
 //sdi:Debug
@@ -306,39 +309,39 @@ TightDataPointStorageI* SZ_compress_uint16_1D_MDQ(uint16_t *oriData, size_t data
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_uint16_StoreOriData(uint16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_uint16_StoreOriData(uint16_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(uint16_t);	
+	int intSize=sizeof(uint16_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
-		(*newByteData)[k++] = versionNumber[i];	
+		(*newByteData)[k++] = versionNumber[i];
 
 	if(exe_params->SZ_SIZE_TYPE==4)//1
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;	
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -346,11 +349,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			int16ToBytes_bigEndian(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_uint16_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint16_t *oriData, 
+void SZ_compress_args_uint16_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint16_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, uint16_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint16_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -365,35 +368,35 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint16_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	uint16_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (uint16_t*)malloc(r2*sizeof(uint16_t));
 	memset(P0, 0, r2*sizeof(uint16_t));
 	P1 = (uint16_t*)malloc(r2*sizeof(uint16_t));
 	memset(P1, 0, r2*sizeof(uint16_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	uint16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -444,7 +447,7 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 			else if(tmp < SZ_UINT16_MIN)
 				P1[j] = SZ_UINT16_MIN;
 			else
-				P1[j] = SZ_UINT16_MAX;			
+				P1[j] = SZ_UINT16_MAX;
 		}
 		else
 		{
@@ -458,7 +461,7 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -476,7 +479,7 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 			else if(tmp < SZ_UINT16_MIN)
 				P0[0] = SZ_UINT16_MIN;
 			else
-				P0[0] = SZ_UINT16_MAX;			
+				P0[0] = SZ_UINT16_MAX;
 		}
 		else
 		{
@@ -485,7 +488,7 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 			compressUInt16Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -506,7 +509,7 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 				else if(tmp < SZ_UINT16_MIN)
 					P0[j] = SZ_UINT16_MIN;
 				else
-					P0[j] = SZ_UINT16_MAX;						
+					P0[j] = SZ_UINT16_MAX;
 			}
 			else
 			{
@@ -522,32 +525,32 @@ TightDataPointStorageI* SZ_compress_uint16_2D_MDQ(uint16_t *oriData, size_t r1,
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT16);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_uint16_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint16_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, uint16_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint16_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -557,30 +560,30 @@ int64_t valueRangeSize, uint16_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(uint16_t))
 		SZ_compress_args_uint16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint16_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	uint16_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (uint16_t*)malloc(r23*sizeof(uint16_t));
@@ -589,9 +592,9 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -614,7 +617,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 		else if(tmp < SZ_UINT16_MIN)
 			P1[1] = SZ_UINT16_MIN;
 		else
-			P1[1] = SZ_UINT16_MAX;		
+			P1[1] = SZ_UINT16_MAX;
 	}
 	else
 	{
@@ -642,7 +645,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 			else if(tmp < SZ_UINT16_MIN)
 				P1[j] = SZ_UINT16_MIN;
 			else
-				P1[j] = SZ_UINT16_MAX;			
+				P1[j] = SZ_UINT16_MAX;
 		}
 		else
 		{
@@ -658,7 +661,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -674,7 +677,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 			else if(tmp < SZ_UINT16_MIN)
 				P1[index] = SZ_UINT16_MIN;
 			else
-				P1[index] = SZ_UINT16_MAX;			
+				P1[index] = SZ_UINT16_MAX;
 		}
 		else
 		{
@@ -704,7 +707,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 				else if(tmp < SZ_UINT16_MIN)
 					P1[index] = SZ_UINT16_MIN;
 				else
-					P1[index] = SZ_UINT16_MAX;				
+					P1[index] = SZ_UINT16_MAX;
 			}
 			else
 			{
@@ -769,7 +772,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 				else if(tmp < SZ_UINT16_MIN)
 					P0[j] = SZ_UINT16_MIN;
 				else
-					P0[j] = SZ_UINT16_MAX;				
+					P0[j] = SZ_UINT16_MAX;
 			}
 			else
 			{
@@ -786,7 +789,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -817,7 +820,7 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -857,24 +860,24 @@ TightDataPointStorageI* SZ_compress_uint16_3D_MDQ(uint16_t *oriData, size_t r1,
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT16);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_uint16_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint16_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
-{	
+{
 	TightDataPointStorageI* tdps = SZ_compress_uint16_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
 
 	convertTDPStoFlatBytes_int(tdps, newByteData, outSize);
@@ -882,8 +885,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(uint16_t))
 		SZ_compress_args_uint16_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -891,35 +894,35 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint16_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	uint16_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (uint16_t*)malloc(r34*sizeof(uint16_t));
 	P1 = (uint16_t*)malloc(r34*sizeof(uint16_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint16_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -954,7 +957,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 			else if(tmp < SZ_UINT16_MIN)
 				P1[index2D] = SZ_UINT16_MIN;
 			else
-				P1[index2D] = SZ_UINT16_MAX;			
+				P1[index2D] = SZ_UINT16_MAX;
 		}
 		else
 		{
@@ -986,7 +989,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 				else if(tmp < SZ_UINT16_MIN)
 					P1[index2D] = SZ_UINT16_MIN;
 				else
-					P1[index2D] = SZ_UINT16_MAX;					
+					P1[index2D] = SZ_UINT16_MAX;
 			}
 			else
 			{
@@ -1020,7 +1023,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 				else if(tmp < SZ_UINT16_MIN)
 					P1[index2D] = SZ_UINT16_MIN;
 				else
-					P1[index2D] = SZ_UINT16_MAX;					
+					P1[index2D] = SZ_UINT16_MAX;
 			}
 			else
 			{
@@ -1053,7 +1056,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 					else if(tmp < SZ_UINT16_MIN)
 						P1[index2D] = SZ_UINT16_MIN;
 					else
-						P1[index2D] = SZ_UINT16_MAX;						
+						P1[index2D] = SZ_UINT16_MAX;
 				}
 				else
 				{
@@ -1090,7 +1093,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 				else if(tmp < SZ_UINT16_MIN)
 					P0[index2D] = SZ_UINT16_MIN;
 				else
-					P0[index2D] = SZ_UINT16_MAX;					
+					P0[index2D] = SZ_UINT16_MAX;
 			}
 			else
 			{
@@ -1122,7 +1125,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 					else if(tmp < SZ_UINT16_MIN)
 						P0[index2D] = SZ_UINT16_MIN;
 					else
-						P0[index2D] = SZ_UINT16_MAX;						
+						P0[index2D] = SZ_UINT16_MAX;
 				}
 				else
 				{
@@ -1156,7 +1159,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 					else if(tmp < SZ_UINT16_MIN)
 						P0[index2D] = SZ_UINT16_MIN;
 					else
-						P0[index2D] = SZ_UINT16_MAX;						
+						P0[index2D] = SZ_UINT16_MAX;
 				}
 				else
 				{
@@ -1189,7 +1192,7 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 						else if(tmp < SZ_UINT16_MIN)
 							P0[index2D] = SZ_UINT16_MIN;
 						else
-							P0[index2D] = SZ_UINT16_MAX;							
+							P0[index2D] = SZ_UINT16_MAX;
 					}
 					else
 					{
@@ -1213,21 +1216,21 @@ TightDataPointStorageI* SZ_compress_uint16_4D_MDQ(uint16_t *oriData, size_t r1,
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT16);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_uint16_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_uint16_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint16_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint16_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1244,8 +1247,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_uint16_withinRange(unsigned char** newByteData, uint16_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*2);
@@ -1254,28 +1257,28 @@ void SZ_compress_args_uint16_withinRange(unsigned char** newByteData, uint16_t *
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 2;
 	tdps->dataTypeSize = convertDataTypeSize(sizeof(uint16_t));
-	
+
 	uint16_t value = oriData[0];
 	int16ToBytes_bigEndian(tdps->exactDataBytes, value);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(uint16_t)+SZ_SIZE_TYPE; //8==3+1+4(uint16_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_uint16_wRngeNoGzip(unsigned char** newByteData, uint16_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint16_wRngeNoGzip(unsigned char** newByteData, uint16_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	uint16_t minValue = computeRangeSize_int(oriData, SZ_UINT16, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_uint16_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1303,12 +1306,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_uint16(unsigned char** newByteData, uint16_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint16(unsigned char** newByteData, uint16_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1320,8 +1323,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	uint16_t minValue = (uint16_t)computeRangeSize_int(oriData, SZ_UINT16, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1377,9 +1380,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the uint16_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/sz/src/sz_uint32.c b/sz/src/sz_uint32.c
index 6f27510f..e30ad373 100644
--- a/sz/src/sz_uint32.c
+++ b/sz/src/sz_uint32.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength,
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_uint32_1D(uint32_t *oriData, size_t dataLength,
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_uint32_2D(uint32_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_uint32_2D(uint32_t *oriData, size_t r1, size_t r
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_uint32_2D(uint32_t *oriData, size_t r1, size_t r
 }
 
 unsigned int optimize_intervals_uint32_3D(uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_uint32_3D(uint32_t *oriData, size_t r1, size_t r
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_uint32_3D(uint32_t *oriData, size_t r1, size_t r
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,36 +234,36 @@ TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t data
 		quantization_intervals = optimize_intervals_uint32_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	uint32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressUInt32Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressUInt32Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	uint32_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 //		if(i==2869438)
@@ -268,7 +271,7 @@ TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t data
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -284,23 +287,23 @@ TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t data
 			}
 /*			if(type[i]==0)
 				printf("err:type[%d]=0\n", i);*/
-			listAdd_int(last3CmprsData, pred);					
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressUInt32Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT32);
 
 //sdi:Debug
@@ -308,23 +311,23 @@ TightDataPointStorageI* SZ_compress_uint32_1D_MDQ(uint32_t *oriData, size_t data
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_uint32_StoreOriData(uint32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_uint32_StoreOriData(uint32_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(uint32_t);	
+	int intSize=sizeof(uint32_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -333,14 +336,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;		
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -348,11 +351,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			int32ToBytes_bigEndian(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_uint32_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint32_t *oriData, 
+void SZ_compress_args_uint32_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint32_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, uint32_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint32_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -367,35 +370,35 @@ TightDataPointStorageI* SZ_compress_uint32_2D_MDQ(uint32_t *oriData, size_t r1,
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint32_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	uint32_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (uint32_t*)malloc(r2*sizeof(uint32_t));
 	memset(P0, 0, r2*sizeof(uint32_t));
 	P1 = (uint32_t*)malloc(r2*sizeof(uint32_t));
 	memset(P1, 0, r2*sizeof(uint32_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	uint32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -448,7 +451,7 @@ TightDataPointStorageI* SZ_compress_uint32_2D_MDQ(uint32_t *oriData, size_t r1,
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -469,7 +472,7 @@ TightDataPointStorageI* SZ_compress_uint32_2D_MDQ(uint32_t *oriData, size_t r1,
 			compressUInt32Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -500,32 +503,32 @@ TightDataPointStorageI* SZ_compress_uint32_2D_MDQ(uint32_t *oriData, size_t r1,
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT32);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_uint32_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint32_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, uint32_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint32_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -535,30 +538,30 @@ int64_t valueRangeSize, uint32_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(uint32_t))
 		SZ_compress_args_uint32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint32_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	uint32_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (uint32_t*)malloc(r23*sizeof(uint32_t));
@@ -567,9 +570,9 @@ TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1,
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -624,7 +627,7 @@ TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1,
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -730,7 +733,7 @@ TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1,
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -755,7 +758,7 @@ TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1,
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -789,22 +792,22 @@ TightDataPointStorageI* SZ_compress_uint32_3D_MDQ(uint32_t *oriData, size_t r1,
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT32);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_uint32_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint32_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint32_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
@@ -814,8 +817,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(uint32_t))
 		SZ_compress_args_uint32_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -823,35 +826,35 @@ TightDataPointStorageI* SZ_compress_uint32_4D_MDQ(uint32_t *oriData, size_t r1,
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint32_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	uint32_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (uint32_t*)malloc(r34*sizeof(uint32_t));
 	P1 = (uint32_t*)malloc(r34*sizeof(uint32_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint32_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -1097,21 +1100,21 @@ TightDataPointStorageI* SZ_compress_uint32_4D_MDQ(uint32_t *oriData, size_t r1,
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT32);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_uint32_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_uint32_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint32_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint32_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1128,8 +1131,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_uint32_withinRange(unsigned char** newByteData, uint32_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*4);
@@ -1137,28 +1140,28 @@ void SZ_compress_args_uint32_withinRange(unsigned char** newByteData, uint32_t *
 	//tdps->exactByteSize = 4;
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 4;
-	
+
 	uint32_t value = oriData[0];
 	int32ToBytes_bigEndian(tdps->exactDataBytes, value);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(uint32_t)+SZ_SIZE_TYPE; //8==3+1+4(uint32_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_uint32_wRngeNoGzip(unsigned char** newByteData, uint32_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint32_wRngeNoGzip(unsigned char** newByteData, uint32_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	uint32_t minValue = computeRangeSize_int(oriData, SZ_UINT32, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_uint32_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1186,12 +1189,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_uint32(unsigned char** newByteData, uint32_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint32(unsigned char** newByteData, uint32_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1203,8 +1206,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	uint32_t minValue = (uint32_t)computeRangeSize_int(oriData, SZ_UINT32, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1260,9 +1263,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the uint32_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/sz/src/sz_uint64.c b/sz/src/sz_uint64.c
index 7d2eca84..a9437b78 100644
--- a/sz/src/sz_uint64.c
+++ b/sz/src/sz_uint64.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength,
 			pred_err = llabs(pred_value - (int64_t)(oriData[i]));
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_uint64_1D(uint64_t *oriData, size_t dataLength,
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_uint64_2D(uint64_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_uint64_2D(uint64_t *oriData, size_t r1, size_t r
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_uint64_2D(uint64_t *oriData, size_t r1, size_t r
 }
 
 unsigned int optimize_intervals_uint64_3D(uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_uint64_3D(uint64_t *oriData, size_t r1, size_t r
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - (int64_t)(oriData[index]));
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_uint64_3D(uint64_t *oriData, size_t r1, size_t r
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,36 +234,36 @@ TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t data
 		quantization_intervals = optimize_intervals_uint64_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	uint64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressUInt64Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressUInt64Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int64_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 //		if(i==2869438)
@@ -268,7 +271,7 @@ TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t data
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -284,23 +287,23 @@ TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t data
 			}
 /*			if(type[i]==0)
 				printf("err:type[%d]=0\n", i);*/
-			listAdd_int(last3CmprsData, pred);					
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressUInt64Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT64);
 
 //sdi:Debug
@@ -308,23 +311,23 @@ TightDataPointStorageI* SZ_compress_uint64_1D_MDQ(uint64_t *oriData, size_t data
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_uint64_StoreOriData(uint64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_uint64_StoreOriData(uint64_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(uint64_t);	
+	int intSize=sizeof(uint64_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -333,14 +336,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;		
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -348,11 +351,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			int64ToBytes_bigEndian(p, oriData[i]);
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_uint64_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint64_t *oriData, 
+void SZ_compress_args_uint64_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint64_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, uint64_t valueRangeSize, uint64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint64_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -367,35 +370,35 @@ TightDataPointStorageI* SZ_compress_uint64_2D_MDQ(uint64_t *oriData, size_t r1,
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint64_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	uint64_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (uint64_t*)malloc(r2*sizeof(uint64_t));
 	memset(P0, 0, r2*sizeof(uint64_t));
 	P1 = (uint64_t*)malloc(r2*sizeof(uint64_t));
 	memset(P1, 0, r2*sizeof(uint64_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	uint64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -448,7 +451,7 @@ TightDataPointStorageI* SZ_compress_uint64_2D_MDQ(uint64_t *oriData, size_t r1,
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -469,7 +472,7 @@ TightDataPointStorageI* SZ_compress_uint64_2D_MDQ(uint64_t *oriData, size_t r1,
 			compressUInt64Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -500,32 +503,32 @@ TightDataPointStorageI* SZ_compress_uint64_2D_MDQ(uint64_t *oriData, size_t r1,
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT64);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_uint64_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint64_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, uint64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint64_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -535,30 +538,30 @@ int64_t valueRangeSize, uint64_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(uint64_t))
 		SZ_compress_args_uint64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, uint64_t valueRangeSize, uint64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint64_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	uint64_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (uint64_t*)malloc(r23*sizeof(uint64_t));
@@ -567,9 +570,9 @@ TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1,
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -624,7 +627,7 @@ TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1,
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred1D);
 
@@ -730,7 +733,7 @@ TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1,
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = (int64_t)(spaceFillingValue[index]) - (int64_t)(pred2D);
 
@@ -755,7 +758,7 @@ TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1,
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -789,22 +792,22 @@ TightDataPointStorageI* SZ_compress_uint64_3D_MDQ(uint64_t *oriData, size_t r1,
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT64);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_uint64_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint64_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 uint64_t valueRangeSize, uint64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint64_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
@@ -814,8 +817,8 @@ uint64_t valueRangeSize, uint64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(uint64_t))
 		SZ_compress_args_uint64_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -823,35 +826,35 @@ TightDataPointStorageI* SZ_compress_uint64_4D_MDQ(uint64_t *oriData, size_t r1,
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint64_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue;
 	int64_t diff = 0.0;
 	double itvNum = 0;
 	uint64_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (uint64_t*)malloc(r34*sizeof(uint64_t));
 	P1 = (uint64_t*)malloc(r34*sizeof(uint64_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint64_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -1097,21 +1100,21 @@ TightDataPointStorageI* SZ_compress_uint64_4D_MDQ(uint64_t *oriData, size_t r1,
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT64);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_uint64_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_uint64_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint64_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, uint64_t valueRangeSize, uint64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint64_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1128,8 +1131,8 @@ size_t *outSize, uint64_t valueRangeSize, uint64_t minValue)
 void SZ_compress_args_uint64_withinRange(unsigned char** newByteData, uint64_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char)*8);
@@ -1137,28 +1140,28 @@ void SZ_compress_args_uint64_withinRange(unsigned char** newByteData, uint64_t *
 	//tdps->exactByteSize = 8;
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 8;
-	
+
 	uint64_t value = oriData[0];
 	int64ToBytes_bigEndian(tdps->exactDataBytes, value);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(uint64_t)+SZ_SIZE_TYPE; //8==3+1+4(uint64_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_uint64_wRngeNoGzip(unsigned char** newByteData, uint64_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint64_wRngeNoGzip(unsigned char** newByteData, uint64_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	uint64_t minValue = computeRangeSize_int(oriData, SZ_UINT64, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_uint64_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1186,12 +1189,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_uint64(unsigned char** newByteData, uint64_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint64(unsigned char** newByteData, uint64_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1203,8 +1206,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	uint64_t minValue = (uint64_t)computeRangeSize_int(oriData, SZ_UINT64, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1260,9 +1263,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the uint64_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/sz/src/sz_uint8.c b/sz/src/sz_uint8.c
index 6865564d..2e0387a7 100644
--- a/sz/src/sz_uint8.c
+++ b/sz/src/sz_uint8.c
@@ -8,10 +8,13 @@
  */
 
 
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
 #include <math.h>
 #include "sz.h"
 #include "CompressElement.h"
@@ -24,7 +27,7 @@
 #include "utility.h"
 
 unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, double realPrecision)
-{	
+{
 	size_t i = 0, radiusIndex;
 	int64_t pred_value = 0, pred_err;
 	size_t *intervals = (size_t*)malloc(confparams_cpr->maxRangeRadius*sizeof(size_t));
@@ -39,7 +42,7 @@ unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, do
 			pred_err = llabs(pred_value - oriData[i]);
 			radiusIndex = (uint64_t)((pred_err/realPrecision+1)/2);
 			if(radiusIndex>=confparams_cpr->maxRangeRadius)
-				radiusIndex = confparams_cpr->maxRangeRadius - 1;			
+				radiusIndex = confparams_cpr->maxRangeRadius - 1;
 			intervals[radiusIndex]++;
 		}
 	}
@@ -54,20 +57,20 @@ unsigned int optimize_intervals_uint8_1D(uint8_t *oriData, size_t dataLength, do
 	}
 	if(i>=confparams_cpr->maxRangeRadius)
 		i = confparams_cpr->maxRangeRadius-1;
-		
+
 	unsigned int accIntervals = 2*(i+1);
 	unsigned int powerOf2 = roundUpToPowerOf2(accIntervals);
-	
+
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("accIntervals=%d, powerOf2=%d\n", accIntervals, powerOf2);
 	return powerOf2;
 }
 
 unsigned int optimize_intervals_uint8_2D(uint8_t *oriData, size_t r1, size_t r2, double realPrecision)
-{	
+{
 	size_t i,j, index;
 	size_t radiusIndex;
 	int64_t pred_value = 0, pred_err;
@@ -87,7 +90,7 @@ unsigned int optimize_intervals_uint8_2D(uint8_t *oriData, size_t r1, size_t r2,
 				if(radiusIndex>=confparams_cpr->maxRangeRadius)
 					radiusIndex = confparams_cpr->maxRangeRadius - 1;
 				intervals[radiusIndex]++;
-			}			
+			}
 		}
 	}
 	//compute the appropriate number
@@ -113,7 +116,7 @@ unsigned int optimize_intervals_uint8_2D(uint8_t *oriData, size_t r1, size_t r2,
 }
 
 unsigned int optimize_intervals_uint8_3D(uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision)
-{	
+{
 	size_t i,j,k, index;
 	size_t radiusIndex;
 	size_t r23=r2*r3;
@@ -126,11 +129,11 @@ unsigned int optimize_intervals_uint8_3D(uint8_t *oriData, size_t r1, size_t r2,
 		for(j=1;j<r2;j++)
 		{
 			for(k=1;k<r3;k++)
-			{			
+			{
 				if((i+j+k)%confparams_cpr->sampleDistance==0)
 				{
 					index = i*r23+j*r3+k;
-					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23] 
+					pred_value = oriData[index-1] + oriData[index-r3] + oriData[index-r23]
 					- oriData[index-1-r23] - oriData[index-r3-1] - oriData[index-r3-r23] + oriData[index-r3-r23-1];
 					pred_err = llabs(pred_value - oriData[index]);
 					radiusIndex = (pred_err/realPrecision+1)/2;
@@ -160,7 +163,7 @@ unsigned int optimize_intervals_uint8_3D(uint8_t *oriData, size_t r1, size_t r2,
 
 	if(powerOf2<32)
 		powerOf2 = 32;
-	
+
 	free(intervals);
 	//printf("targetCount=%d, sum=%d, totalSampleSize=%d, ratio=%f, accIntervals=%d, powerOf2=%d\n", targetCount, sum, totalSampleSize, (double)sum/(double)totalSampleSize, accIntervals, powerOf2);
 	return powerOf2;
@@ -231,42 +234,42 @@ TightDataPointStorageI* SZ_compress_uint8_1D_MDQ(uint8_t *oriData, size_t dataLe
 		quantization_intervals = optimize_intervals_uint8_1D(oriData, dataLength, realPrecision);
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	updateQuantizationInfo(quantization_intervals);	
+	updateQuantizationInfo(quantization_intervals);
 	size_t i;
 
 	int* type = (int*) malloc(dataLength*sizeof(int));
-		
+
 	uint8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
 	new_DBA(&exactDataByteArray, DynArrayInitLen);
-		
+
 	int64_t last3CmprsData[3] = {0,0,0};
-				
-	//add the first data	
+
+	//add the first data
 	type[0] = 0;
 	compressUInt8Value(spaceFillingValue[0], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[0]);
-		
+
 	type[1] = 0;
 	compressUInt8Value(spaceFillingValue[1], minValue, byteSize, bytes);
 	memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 	listAdd_int(last3CmprsData, spaceFillingValue[1]);
-	//printf("%.30G\n",last3CmprsData[0]);	
-	
+	//printf("%.30G\n",last3CmprsData[0]);
+
 	int state;
 	double checkRadius = (exe_params->intvCapacity-1)*realPrecision;
 	int64_t curData;
 	int64_t pred, predAbsErr;
 	double interval = 2*realPrecision;
-	
+
 	for(i=2;i<dataLength;i++)
 	{
 		curData = spaceFillingValue[i];
 		//pred = 2*last3CmprsData[0] - last3CmprsData[1];
 		pred = last3CmprsData[0];
-		predAbsErr = llabs(curData - pred);	
+		predAbsErr = llabs(curData - pred);
 		if(predAbsErr<checkRadius)
 		{
 			state = (predAbsErr/realPrecision+1)/2;
@@ -281,24 +284,24 @@ TightDataPointStorageI* SZ_compress_uint8_1D_MDQ(uint8_t *oriData, size_t dataLe
 				pred = pred - state*interval;
 			}
 			if(pred>SZ_UINT8_MAX) pred = SZ_UINT8_MAX;
-			if(pred<SZ_UINT8_MIN) pred = SZ_UINT8_MIN;			
-			listAdd_int(last3CmprsData, pred);					
+			if(pred<SZ_UINT8_MIN) pred = SZ_UINT8_MIN;
+			listAdd_int(last3CmprsData, pred);
 			continue;
 		}
-		
-		//unpredictable data processing		
+
+		//unpredictable data processing
 		type[i] = 0;
 		compressUInt8Value(curData, minValue, byteSize, bytes);
 		memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		listAdd_int(last3CmprsData, curData);
 	}//end of for
-		
+
 	size_t exactDataNum = exactDataByteArray->size / byteSize;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT8);
 
 //sdi:Debug
@@ -306,23 +309,23 @@ TightDataPointStorageI* SZ_compress_uint8_1D_MDQ(uint8_t *oriData, size_t dataLe
 	for(i=0;i<dataLength;i++)
 		if(type[i]==0) sum++;
 	printf("opt_quantizations=%d, exactDataNum=%d, sum=%d\n",quantization_intervals, exactDataNum, sum);*/
-	
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
+
 	return tdps;
 }
 
-void SZ_compress_args_uint8_StoreOriData(uint8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps, 
+void SZ_compress_args_uint8_StoreOriData(uint8_t* oriData, size_t dataLength, TightDataPointStorageI* tdps,
 unsigned char** newByteData, size_t *outSize)
 {
-	int intSize=sizeof(uint8_t);	
+	int intSize=sizeof(uint8_t);
 	size_t k = 0, i;
 	tdps->isLossless = 1;
 	size_t totalByteLength = 3 + MetaDataByteLength + exe_params->SZ_SIZE_TYPE + 1 + intSize*dataLength;
 	*newByteData = (unsigned char*)malloc(totalByteLength);
-	
+
 	unsigned char dsLengthBytes[8];
 	for (i = 0; i < 3; i++)//3
 		(*newByteData)[k++] = versionNumber[i];
@@ -331,14 +334,14 @@ unsigned char** newByteData, size_t *outSize)
 		(*newByteData)[k++] = 16; //00010000
 	else
 		(*newByteData)[k++] = 80;	//01010000: 01000000 indicates the SZ_SIZE_TYPE=8
-	
+
 	convertSZParamsToBytes(confparams_cpr, &((*newByteData)[k]));
-	k = k + MetaDataByteLength;		
-	
-	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8	
+	k = k + MetaDataByteLength;
+
+	sizeToBytes(dsLengthBytes,dataLength); //SZ_SIZE_TYPE: 4 or 8
 	for (i = 0; i < exe_params->SZ_SIZE_TYPE; i++)
 		(*newByteData)[k++] = dsLengthBytes[i];
-		
+
 	if(sysEndianType==BIG_ENDIAN_SYSTEM)
 		memcpy((*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE, oriData, dataLength*intSize);
 	else
@@ -346,11 +349,11 @@ unsigned char** newByteData, size_t *outSize)
 		unsigned char* p = (*newByteData)+4+MetaDataByteLength+exe_params->SZ_SIZE_TYPE;
 		for(i=0;i<dataLength;i++,p+=intSize)
 			*p = oriData[i];
-	}	
+	}
 	*outSize = totalByteLength;
 }
 
-void SZ_compress_args_uint8_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint8_t *oriData, 
+void SZ_compress_args_uint8_NoCkRngeNoGzip_1D(unsigned char** newByteData, uint8_t *oriData,
 size_t dataLength, double realPrecision, size_t *outSize, int64_t valueRangeSize, uint8_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint8_1D_MDQ(oriData, dataLength, realPrecision, valueRangeSize, minValue);
@@ -365,35 +368,35 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint8_2D(oriData, r1, r2, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j; 
+	size_t i,j;
 	int64_t pred1D, pred2D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	uint8_t *P0, *P1;
-		
-	size_t dataLength = r1*r2;	
-	
+
+	size_t dataLength = r1*r2;
+
 	P0 = (uint8_t*)malloc(r2*sizeof(uint8_t));
 	memset(P0, 0, r2*sizeof(uint8_t));
 	P1 = (uint8_t*)malloc(r2*sizeof(uint8_t));
 	memset(P1, 0, r2*sizeof(uint8_t));
-		
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 	//type[dataLength]=0;
-		
+
 	uint8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	curValue = P1[0] = spaceFillingValue[0];
@@ -444,7 +447,7 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 			else if(tmp < SZ_UINT8_MIN)
 				P1[j] = SZ_UINT8_MIN;
 			else
-				P1[j] = SZ_UINT8_MAX;			
+				P1[j] = SZ_UINT8_MAX;
 		}
 		else
 		{
@@ -458,7 +461,7 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 	/* Process Row-1 --> Row-r1-1 */
 	size_t index;
 	for (i = 1; i < r1; i++)
-	{	
+	{
 		/* Process row-i data 0 */
 		index = i*r2;
 		pred1D = P1[0];
@@ -476,7 +479,7 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 			else if(tmp < SZ_UINT8_MIN)
 				P0[0] = SZ_UINT8_MIN;
 			else
-				P0[0] = SZ_UINT8_MAX;			
+				P0[0] = SZ_UINT8_MAX;
 		}
 		else
 		{
@@ -485,7 +488,7 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 			compressUInt8Value(curValue, minValue, byteSize, bytes);
 			memcpyDBA_Data(exactDataByteArray, bytes, byteSize);
 		}
-									
+
 		/* Process row-i data 1 --> r2-1*/
 		for (j = 1; j < r2; j++)
 		{
@@ -506,7 +509,7 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 				else if(tmp < SZ_UINT8_MIN)
 					P0[j] = SZ_UINT8_MIN;
 				else
-					P0[j] = SZ_UINT8_MAX;						
+					P0[j] = SZ_UINT8_MAX;
 			}
 			else
 			{
@@ -522,32 +525,32 @@ TightDataPointStorageI* SZ_compress_uint8_2D_MDQ(uint8_t *oriData, size_t r1, si
 		P1 = P0;
 		P0 = Pt;
 	}
-	
+
 	if(r2!=1)
 		free(P0);
-	free(P1);			
-	
+	free(P1);
+
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT8);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 /**
- * 
+ *
  * Note: @r1 is high dimension
- * 		 @r2 is low dimension 
+ * 		 @r2 is low dimension
  * */
-void SZ_compress_args_uint8_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint8_NoCkRngeNoGzip_2D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, uint8_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint8_2D_MDQ(oriData, r1, r2, realPrecision, valueRangeSize, minValue);
@@ -557,30 +560,30 @@ int64_t valueRangeSize, uint8_t minValue)
 	size_t dataLength = r1*r2;
 	if(*outSize>dataLength*sizeof(uint8_t))
 		SZ_compress_args_uint8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, int64_t valueRangeSize, int64_t minValue)
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint8_3D(oriData, r1, r2, r3, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	uint8_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3;		
+
+	size_t dataLength = r1*r2*r3;
 
 	size_t r23 = r2*r3;
 	P0 = (uint8_t*)malloc(r23*sizeof(uint8_t));
@@ -589,9 +592,9 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	type[0] = 0;
 	P1[0] = spaceFillingValue[0];
@@ -614,7 +617,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 		else if(tmp < SZ_UINT8_MIN)
 			P1[1] = SZ_UINT8_MIN;
 		else
-			P1[1] = SZ_UINT8_MAX;		
+			P1[1] = SZ_UINT8_MAX;
 	}
 	else
 	{
@@ -642,7 +645,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 			else if(tmp < SZ_UINT8_MIN)
 				P1[j] = SZ_UINT8_MIN;
 			else
-				P1[j] = SZ_UINT8_MAX;			
+				P1[j] = SZ_UINT8_MAX;
 		}
 		else
 		{
@@ -658,7 +661,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 	for (i = 1; i < r2; i++)
 	{
 		/* Process row-i data 0 */
-		index = i*r3;	
+		index = i*r3;
 		pred1D = P1[index-r3];
 		diff = spaceFillingValue[index] - pred1D;
 
@@ -674,7 +677,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 			else if(tmp < SZ_UINT8_MIN)
 				P1[index] = SZ_UINT8_MIN;
 			else
-				P1[index] = SZ_UINT8_MAX;			
+				P1[index] = SZ_UINT8_MAX;
 		}
 		else
 		{
@@ -704,7 +707,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 				else if(tmp < SZ_UINT8_MIN)
 					P1[index] = SZ_UINT8_MIN;
 				else
-					P1[index] = SZ_UINT8_MAX;				
+					P1[index] = SZ_UINT8_MAX;
 			}
 			else
 			{
@@ -769,7 +772,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 				else if(tmp < SZ_UINT8_MIN)
 					P0[j] = SZ_UINT8_MIN;
 				else
-					P0[j] = SZ_UINT8_MAX;				
+					P0[j] = SZ_UINT8_MAX;
 			}
 			else
 			{
@@ -786,7 +789,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 		{
 			/* Process Row-i data 0 */
 			index = k*r23 + i*r3;
-			index2D = i*r3;		
+			index2D = i*r3;
 			pred2D = P0[index2D-r3] + P1[index2D] - P1[index2D-r3];
 			diff = spaceFillingValue[index] - pred2D;
 
@@ -817,7 +820,7 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 			{
 //				if(k==63&&i==43&&j==27)
 //					printf("i=%d\n", i);
-				//index = k*r2*r3 + i*r3 + j;			
+				//index = k*r2*r3 + i*r3 + j;
 				index ++;
 				index2D = i*r3 + j;
 				pred3D = P0[index2D-1] + P0[index2D-r3]+ P1[index2D] - P0[index2D-r3-1] - P1[index2D-r3] - P1[index2D-1] + P1[index2D-r3-1];
@@ -857,22 +860,22 @@ TightDataPointStorageI* SZ_compress_uint8_3D_MDQ(uint8_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT8);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
 
-void SZ_compress_args_uint8_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize, 
+void SZ_compress_args_uint8_NoCkRngeNoGzip_3D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, double realPrecision, size_t *outSize,
 int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint8_3D_MDQ(oriData, r1, r2, r3, realPrecision, valueRangeSize, minValue);
@@ -882,8 +885,8 @@ int64_t valueRangeSize, int64_t minValue)
 	size_t dataLength = r1*r2*r3;
 	if(*outSize>dataLength*sizeof(uint8_t))
 		SZ_compress_args_uint8_StoreOriData(oriData, dataLength, tdps, newByteData, outSize);
-	
-	free_TightDataPointStorageI(tdps);	
+
+	free_TightDataPointStorageI(tdps);
 }
 
 
@@ -891,35 +894,35 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 {
 	unsigned char bytes[8] = {0,0,0,0,0,0,0,0};
 	int byteSize = computeByteSizePerIntValue(valueRangeSize);
-	
+
 	unsigned int quantization_intervals;
 	if(exe_params->optQuantMode==1)
 	{
 		quantization_intervals = optimize_intervals_uint8_4D(oriData, r1, r2, r3, r4, realPrecision);
 		updateQuantizationInfo(quantization_intervals);
-	}	
+	}
 	else
 		quantization_intervals = exe_params->intvCapacity;
-	size_t i,j,k; 
+	size_t i,j,k;
 	int64_t pred1D, pred2D, pred3D, curValue, tmp;
 	int diff = 0.0;
 	double itvNum = 0;
 	uint8_t *P0, *P1;
-		
-	size_t dataLength = r1*r2*r3*r4;		
+
+	size_t dataLength = r1*r2*r3*r4;
 
 	size_t r234 = r2*r3*r4;
 	size_t r34 = r3*r4;
 
 	P0 = (uint8_t*)malloc(r34*sizeof(uint8_t));
 	P1 = (uint8_t*)malloc(r34*sizeof(uint8_t));
-	
+
 	int* type = (int*) malloc(dataLength*sizeof(int));
 
 	uint8_t* spaceFillingValue = oriData; //
-	
+
 	DynamicByteArray *exactDataByteArray;
-	new_DBA(&exactDataByteArray, DynArrayInitLen);	
+	new_DBA(&exactDataByteArray, DynArrayInitLen);
 
 	size_t l;
 	for (l = 0; l < r1; l++)
@@ -954,7 +957,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 			else if(tmp < SZ_UINT8_MIN)
 				P1[index2D] = SZ_UINT8_MIN;
 			else
-				P1[index2D] = SZ_UINT8_MAX;			
+				P1[index2D] = SZ_UINT8_MAX;
 		}
 		else
 		{
@@ -986,7 +989,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 				else if(tmp < SZ_UINT8_MIN)
 					P1[index2D] = SZ_UINT8_MIN;
 				else
-					P1[index2D] = SZ_UINT8_MAX;					
+					P1[index2D] = SZ_UINT8_MAX;
 			}
 			else
 			{
@@ -1020,7 +1023,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 				else if(tmp < SZ_UINT8_MIN)
 					P1[index2D] = SZ_UINT8_MIN;
 				else
-					P1[index2D] = SZ_UINT8_MAX;					
+					P1[index2D] = SZ_UINT8_MAX;
 			}
 			else
 			{
@@ -1053,7 +1056,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 					else if(tmp < SZ_UINT8_MIN)
 						P1[index2D] = SZ_UINT8_MIN;
 					else
-						P1[index2D] = SZ_UINT8_MAX;						
+						P1[index2D] = SZ_UINT8_MAX;
 				}
 				else
 				{
@@ -1090,7 +1093,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 				else if(tmp < SZ_UINT8_MIN)
 					P0[index2D] = SZ_UINT8_MIN;
 				else
-					P0[index2D] = SZ_UINT8_MAX;					
+					P0[index2D] = SZ_UINT8_MAX;
 			}
 			else
 			{
@@ -1122,7 +1125,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 					else if(tmp < SZ_UINT8_MIN)
 						P0[index2D] = SZ_UINT8_MIN;
 					else
-						P0[index2D] = SZ_UINT8_MAX;						
+						P0[index2D] = SZ_UINT8_MAX;
 				}
 				else
 				{
@@ -1156,7 +1159,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 					else if(tmp < SZ_UINT8_MIN)
 						P0[index2D] = SZ_UINT8_MIN;
 					else
-						P0[index2D] = SZ_UINT8_MAX;						
+						P0[index2D] = SZ_UINT8_MAX;
 				}
 				else
 				{
@@ -1189,7 +1192,7 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 						else if(tmp < SZ_UINT8_MIN)
 							P0[index2D] = SZ_UINT8_MIN;
 						else
-							P0[index2D] = SZ_UINT8_MAX;							
+							P0[index2D] = SZ_UINT8_MAX;
 					}
 					else
 					{
@@ -1213,21 +1216,21 @@ TightDataPointStorageI* SZ_compress_uint8_4D_MDQ(uint8_t *oriData, size_t r1, si
 	free(P1);
 
 	size_t exactDataNum = exactDataByteArray->size;
-	
-	TightDataPointStorageI* tdps;	
-			
-	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize, 
-			type, exactDataByteArray->array, exactDataByteArray->size,  
+
+	TightDataPointStorageI* tdps;
+
+	new_TightDataPointStorageI(&tdps, dataLength, exactDataNum, byteSize,
+			type, exactDataByteArray->array, exactDataByteArray->size,
 			realPrecision, minValue, quantization_intervals, SZ_UINT8);
-			
+
 	//free memory
-	free(type);	
+	free(type);
 	free(exactDataByteArray); //exactDataByteArray->array has been released in free_TightDataPointStorageF(tdps);
-	
-	return tdps;	
+
+	return tdps;
 }
 
-void SZ_compress_args_uint8_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision, 
+void SZ_compress_args_uint8_NoCkRngeNoGzip_4D(unsigned char** newByteData, uint8_t *oriData, size_t r1, size_t r2, size_t r3, size_t r4, double realPrecision,
 size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 {
 	TightDataPointStorageI* tdps = SZ_compress_uint8_4D_MDQ(oriData, r1, r2, r3, r4, realPrecision, valueRangeSize, minValue);
@@ -1244,8 +1247,8 @@ size_t *outSize, int64_t valueRangeSize, int64_t minValue)
 void SZ_compress_args_uint8_withinRange(unsigned char** newByteData, uint8_t *oriData, size_t dataLength, size_t *outSize)
 {
 	TightDataPointStorageI* tdps = (TightDataPointStorageI*) malloc(sizeof(TightDataPointStorageI));
-	tdps->typeArray = NULL;	
-	
+	tdps->typeArray = NULL;
+
 	tdps->allSameData = 1;
 	tdps->dataSeriesLength = dataLength;
 	tdps->exactDataBytes = (unsigned char*)malloc(sizeof(unsigned char));
@@ -1253,29 +1256,29 @@ void SZ_compress_args_uint8_withinRange(unsigned char** newByteData, uint8_t *or
 	//tdps->exactByteSize = 4;
 	tdps->exactDataNum = 1;
 	tdps->exactDataBytes_size = 1;
-	
+
 	uint8_t value = oriData[0];
 	//intToBytes_bigEndian(tdps->exactDataBytes, value);
 	memcpy(tdps->exactDataBytes, &value, 1);
-	
+
 	size_t tmpOutSize;
 	convertTDPStoFlatBytes_int(tdps, newByteData, &tmpOutSize);
 
 	*outSize = tmpOutSize;//3+1+sizeof(uint8_t)+SZ_SIZE_TYPE; //8==3+1+4(uint8_size)
-	free_TightDataPointStorageI(tdps);	
+	free_TightDataPointStorageI(tdps);
 }
 
-int SZ_compress_args_uint8_wRngeNoGzip(unsigned char** newByteData, uint8_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint8_wRngeNoGzip(unsigned char** newByteData, uint8_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	int status = SZ_SCES;
 	size_t dataLength = computeDataLength(r5,r4,r3,r2,r1);
 	int64_t valueRangeSize = 0;
-	
+
 	uint8_t minValue = computeRangeSize_int(oriData, SZ_UINT8, dataLength, &valueRangeSize);
 	double realPrecision = getRealPrecision_int(valueRangeSize, errBoundMode, absErr_Bound, relBoundRatio, &status);
-		
+
 	if(valueRangeSize <= realPrecision)
 	{
 		SZ_compress_args_uint8_withinRange(newByteData, oriData, dataLength, outSize);
@@ -1303,12 +1306,12 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	return status;
 }
 
-int SZ_compress_args_uint8(unsigned char** newByteData, uint8_t *oriData, 
-size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize, 
+int SZ_compress_args_uint8(unsigned char** newByteData, uint8_t *oriData,
+size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *outSize,
 int errBoundMode, double absErr_Bound, double relBoundRatio)
 {
 	confparams_cpr->errorBoundMode = errBoundMode;
-	
+
 	if(errBoundMode>=PW_REL)
 	{
 		printf("Error: Current SZ version doesn't support integer data compression with point-wise relative error bound being based on pwrType=AVG\n");
@@ -1320,8 +1323,8 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 	int64_t valueRangeSize = 0;
 
 	uint8_t minValue = (uint8_t)computeRangeSize_int(oriData, SZ_UINT8, dataLength, &valueRangeSize);
-	double realPrecision = 0; 
-	
+	double realPrecision = 0;
+
 	if(confparams_cpr->errorBoundMode==PSNR)
 	{
 		confparams_cpr->errorBoundMode = ABS;
@@ -1377,9 +1380,9 @@ int errBoundMode, double absErr_Bound, double relBoundRatio)
 		else
 		{
 			printf("Error: Wrong setting of confparams_cpr->szMode in the uint8_t compression.\n");
-			status = SZ_MERR; //mode error			
+			status = SZ_MERR; //mode error
 		}
 	}
-	
+
 	return status;
 }
diff --git a/zlib/CMakeLists.txt b/zlib/CMakeLists.txt
index 14a0ead6..305a1e2e 100644
--- a/zlib/CMakeLists.txt
+++ b/zlib/CMakeLists.txt
@@ -1,4 +1,10 @@
-add_library(ZLIB SHARED 
+if (BUILD_SHARED_LIBS)
+  set (BUILD_EXT_LIBS_TYPE "SHARED")
+else ()
+  set (BUILD_EXT_LIBS_TYPE "STATIC")
+endif ()
+
+add_library(ZLIB ${BUILD_EXT_LIBS_TYPE} 
   ./gzclose.c
   ./uncompr.c
   ./trees.c
@@ -20,6 +26,25 @@ target_include_directories(ZLIB
   PUBLIC 
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
   )
+if (${BUILD_EXT_LIBS_TYPE} MATCHES "SHARED")
+  set (LIB_RELEASE_NAME "ZLIB")
+  set (LIB_DEBUG_NAME "ZLIB${CMAKE_DEBUG_POSTFIX}")
+else ()
+  if (WIN32 AND NOT MINGW)
+    set (LIB_RELEASE_NAME "libZLIB")
+    set (LIB_DEBUG_NAME "libZLIB${CMAKE_DEBUG_POSTFIX}")
+  else ()
+    set (LIB_RELEASE_NAME "ZLIB")
+    set (LIB_DEBUG_NAME "ZLIB${CMAKE_DEBUG_POSTFIX}")
+  endif ()
+endif ()
+set_target_properties (ZLIB PROPERTIES
+    OUTPUT_NAME                ${LIB_RELEASE_NAME}
+#    OUTPUT_NAME_DEBUG          ${LIB_DEBUG_NAME}
+    OUTPUT_NAME_RELEASE        ${LIB_RELEASE_NAME}
+    OUTPUT_NAME_MINSIZEREL     ${LIB_RELEASE_NAME}
+    OUTPUT_NAME_RELWITHDEBINFO ${LIB_RELEASE_NAME}
+  )
 
 install(TARGETS ZLIB EXPORT ZLIBConfig
   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE
diff --git a/zlib/zconf.h b/zlib/zconf.h
index 77398c11..5e1d68a0 100644
--- a/zlib/zconf.h
+++ b/zlib/zconf.h
@@ -431,11 +431,11 @@ typedef uLong FAR uLongf;
    typedef unsigned long z_crc_t;
 #endif
 
-#if 1    /* was set to #if 1 by ./configure */
+#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
 #  define Z_HAVE_UNISTD_H
 #endif
 
-#if 1    /* was set to #if 1 by ./configure */
+#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
 #  define Z_HAVE_STDARG_H
 #endif
 
diff --git a/zstd/CMakeLists.txt b/zstd/CMakeLists.txt
index 8a28529c..9a6bc395 100644
--- a/zstd/CMakeLists.txt
+++ b/zstd/CMakeLists.txt
@@ -1,4 +1,10 @@
-add_library(zstd SHARED 
+if (BUILD_SHARED_LIBS)
+  set (BUILD_EXT_LIBS_TYPE "SHARED")
+else ()
+  set (BUILD_EXT_LIBS_TYPE "STATIC")
+endif ()
+
+add_library(zstd ${BUILD_EXT_LIBS_TYPE} 
   ./common/entropy_common.c
   ./common/pool.c
   ./common/threading.c
@@ -46,6 +52,25 @@ target_include_directories(zstd
     ${CMAKE_CURRENT_SOURCE_DIR}/dll
     ${CMAKE_CURRENT_SOURCE_DIR}/legacy
   )
+if (${BUILD_EXT_LIBS_TYPE} MATCHES "SHARED")
+  set (LIB_RELEASE_NAME "zstd")
+  set (LIB_DEBUG_NAME "zstd${CMAKE_DEBUG_POSTFIX}")
+else ()
+  if (WIN32 AND NOT MINGW)
+    set (LIB_RELEASE_NAME "libzstd")
+    set (LIB_DEBUG_NAME "libzstd${CMAKE_DEBUG_POSTFIX}")
+  else ()
+    set (LIB_RELEASE_NAME "zstd")
+    set (LIB_DEBUG_NAME "zstd${CMAKE_DEBUG_POSTFIX}")
+  endif ()
+endif ()
+set_target_properties (zstd PROPERTIES
+    OUTPUT_NAME                ${LIB_RELEASE_NAME}
+#    OUTPUT_NAME_DEBUG          ${LIB_DEBUG_NAME}
+    OUTPUT_NAME_RELEASE        ${LIB_RELEASE_NAME}
+    OUTPUT_NAME_MINSIZEREL     ${LIB_RELEASE_NAME}
+    OUTPUT_NAME_RELWITHDEBINFO ${LIB_RELEASE_NAME}
+  )
 
 
 install(TARGETS zstd EXPORT ZSTDConfig