From b95350e85116702e5cfe20a391dced3492f92f6f Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sun, 16 Apr 2023 09:12:11 +0000
Subject: [PATCH 01/24] Add PixelTimeSeries class

---
 carst/libdhdt.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 73fa685..bdfb75a 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -293,6 +293,50 @@ def onclick(event):
 	    figi.show()
 	return onclick
 
+class PixelTimeSeries(object):
+    """
+    Store all measurements of a single pixel in the reference DEM.
+    Always an n-by-3 array:
+    Column 1: date
+    Column 2: value
+    Column 3: uncertainty
+    """
+    def __init__(self, data=np.ndarray([0, 3])):
+        data = np.array(data) if type(data) is not np.ndarray else data
+        self.verify_record(data)
+        self.data = data
+
+    def __repr__(self):
+        return 'PixelTimeSeries({})'.format(self.data)
+    
+    @staticmethod
+    def verify_record(record):
+        """
+        Verify if the input record meets the format requirements.
+        """
+        if record.ndim == 1 and record.size == 3:
+            pass
+        elif record.ndim == 2 and record.shape[1] == 3:
+            pass
+        else:
+            raise ValueError("Inconsistent input record. Must be an n-by-3 array.")
+    
+    def add_record(self, record):
+        """
+        Add one or more records to the time series.
+        """
+        record = np.array(record) if type(record) is not np.ndarray else record
+        self.verify_record(record)
+        self.data = np.vstack((self.data, record))
+        
+    def get_date(self):
+        return self.data[:, 0]
+    
+    def get_value(self):
+        return self.data[:, 1]
+    
+    def get_uncertainty(self):
+        return self.data[:, 2]
 
 
 class DemPile(object):

From c709b33700a29e3154f8afe247d6b002c24145f4 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sun, 16 Apr 2023 14:13:10 +0000
Subject: [PATCH 02/24] Optimize data structure: PixelTimeSeries

---
 .gitignore       |   3 +-
 carst/libdhdt.py | 353 +++++++++++++++++++++++++----------------------
 2 files changed, 187 insertions(+), 169 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5b1c7de..3f567ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,4 +36,5 @@ examples/dhdt/Demo_DEMs/HookerFJL_10m_dhdt*
 examples/dhdt/Demo_DEMs/refgeo_10m_TSpickle.p
 examples/dhdt/tmp.tif
 other_process/log_getUncertaintyDEM_grdtrack_output.xyz
-Utilities/unused/PXData/*
\ No newline at end of file
+Utilities/unused/PXData/*
+*.p
diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index bdfb75a..ad72951 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -301,8 +301,9 @@ class PixelTimeSeries(object):
     Column 2: value
     Column 3: uncertainty
     """
-    def __init__(self, data=np.ndarray([0, 3])):
-        data = np.array(data) if type(data) is not np.ndarray else data
+    def __init__(self, data=[]):
+        data = np.ndarray([0, 3]) if not data else data                      # [] --> np.ndarray([0, 3])
+        data = np.array(data) if type(data) is not np.ndarray else data      # [1, 2, 3] --> np.array([1, 2, 3])
         self.verify_record(data)
         self.data = data
 
@@ -341,173 +342,189 @@ def get_uncertainty(self):
 
 class DemPile(object):
 
-	"""
-	New class in replace of TimeSeriesDEM. It doesn't use nparray for avoiding huge memory consumption.
-	Instead, it uses a novel method for stacking all DEMs and saves them as a dict array (which is what 
-	is stored in the intermediate pickle file. 
-	"""
+    """
+    New class in replace of TimeSeriesDEM. It doesn't use nparray for avoiding huge memory consumption.
+    Instead, it uses a novel method for stacking all DEMs and saves them as a dict array (which is what 
+    is stored in the intermediate pickle file. 
+    """
 
-	def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None):
-		self.picklepath = picklepath
-		self.dhdtprefix = dhdtprefix
-		self.ts = None
-		self.dems = []
-		self.refdate = None
-		if refdate is not None:
-			self.refdate = datetime.strptime(refdate, '%Y-%m-%d')
-		self.refgeo = refgeo
-		self.refgeomask = None
-		if refgeo is not None:
-			self.refgeomask = refgeo.ReadAsArray().astype(bool)
-		self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []}
-		self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0}
-		self.evmd_threshold = 6
-
-	def AddDEM(self, dems):
-		# ==== Add DEM object list ====
-		if type(dems) is list:
-			self.dems = self.dems + dems
-		elif type(dems) is SingleRaster:
-			self.dems.append(dems)
-		else:
-			raise ValueError("This DEM type is neither a SingleRaster object nor a list of SingleRaster objects.")
-
-	def SortByDate(self):
-		# ==== sort DEMs by date (ascending order) ====
-		dem_dates = [i.date for i in self.dems]
-		dateidx = np.argsort(dem_dates)
-		self.dems = [self.dems[i] for i in dateidx]
-
-	def SetRefGeo(self, refgeo):
-		# ==== Prepare the reference geometry ====
-		if type(refgeo) is str:
-			self.refgeo = SingleRaster(refgeo)
-		elif type(refgeo) is SingleRaster:
-			self.refgeo = refgeo
-		else:
-			raise ValueError("This refgeo must be either a SingleRaster object or a path to a geotiff file.")
-		self.refgeomask = self.refgeo.ReadAsArray().astype(bool)
-
-	def SetRefDate(self, datestr):
-		self.refdate = datetime.strptime(datestr, '%Y-%m-%d')
-
-	def SetMaskParam(self, ini):
-		if 'max_uncertainty' in ini.settings:
-			self.maskparam['max_uncertainty'] = float(ini.settings['max_uncertainty'])
-		if 'min_time_span' in ini.settings:
-			self.maskparam['min_time_span'] = float(ini.settings['min_time_span'])
-
-	def SetEVMDThreshold(self, ini):
-		if 'evmd_threshold' in ini.regression:
-			self.evmd_threshold = float(ini.regression['evmd_threshold'])
-
-	def InitTS(self):
-		# ==== Prepare the reference geometry ====
-		refgeo_Ysize = self.refgeo.GetRasterYSize()
-		refgeo_Xsize = self.refgeo.GetRasterXSize()
-		self.ts = [[{'date': [], 'uncertainty': [], 'value': []} for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)]
-		print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask)))
-
-	def ReadConfig(self, ini):
-		self.picklepath = ini.result['picklefile']
-		self.dhdtprefix = ini.result['dhdt_prefix']
-		self.AddDEM(ini.GetDEM())
-		self.SortByDate()
-		self.SetRefGeo(ini.refgeometry['gtiff'])
-		self.SetRefDate(ini.settings['refdate'])
-		self.SetMaskParam(ini)
-		self.SetEVMDThreshold(ini)
-
-	@timeit
-	def PileUp(self):
-		# ==== Start to read every DEM and save it to our final array ====
-		for i in range(len(self.dems)):
-			print('{}) {}'.format(i + 1, os.path.basename(self.dems[i].fpath) ))
-			if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']:
-				datedelta = self.dems[i].date - self.refdate
-				# znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear')
-				znew = Resample_Array2(self.dems[i], self.refgeo)
-				znew_mask = np.logical_and(znew > 0, self.refgeomask)
-				fill_idx = np.where(znew_mask)
-				for m,n in zip(fill_idx[0], fill_idx[1]):
-					self.ts[m][n]['date'] += [datedelta.days]
-					self.ts[m][n]['uncertainty'] += [self.dems[i].uncertainty]
-					self.ts[m][n]['value'] += [znew[m, n]]
-			else:
-				print("This one won't be piled up because its uncertainty ({}) exceeds the maximum uncertainty allowed ({}).".format(self.dems[i].uncertainty, self.maskparam['max_uncertainty']))
-
-	def DumpPickle(self):
-		pickle.dump(self.ts, open(self.picklepath, "wb"))
-
-	def LoadPickle(self):
-		self.ts = pickle.load( open(self.picklepath, "rb") )
-
-	@timeit
-	def Polyfit(self):
-		# ==== Create final array ====
-		self.fitdata['slope']     = np.ones_like(self.ts, dtype=float) * -9999
-		self.fitdata['slope_err'] = np.ones_like(self.ts, dtype=float) * -9999
-		self.fitdata['residual']  = np.ones_like(self.ts, dtype=float) * -9999
-		self.fitdata['count']     = np.ones_like(self.ts, dtype=float) * -9999
-		# ==== Weighted regression ====
-		print('m total: ', len(self.ts))
-		for m in range(len(self.ts)):
-			# if m < 600:
-			# 	continue
-			if m % 100 == 0:
-				print(m)
-			for n in range(len(self.ts[0])):
-				# self.fitdata['count'][m, n] = len(self.ts[m][n]['date'])
-				# if len(self.ts[m][n]['date']) >= 3:
-				date = np.array(self.ts[m][n]['date'])
-				uncertainty = np.array(self.ts[m][n]['uncertainty'])
-				value = np.array(self.ts[m][n]['value'])
-				# pin_value = pin_dem_array[m ,n]
-				# pin_date = pin_dem_date_array[m, n]
-				# date, uncertainty, value = filter_by_slope(date, uncertainty, value, pin_date, pin_value)
-				# date, uncertainty, value = filter_by_redundancy(date, uncertainty, value)
-
-				# slope_ref = [(value[i] - pin_value) / float(date[i] - pin_date) * 365.25 for i in range(len(value))]
-				# for i in reversed(range(len(slope_ref))):
-				# 	if (slope_ref[i] > dhdt_limit_upper) or (slope_ref[i] < dhdt_limit_lower):
-				# 		_ = date.pop(i)
-				# 		_ = uncertainty.pop(i)
-				# 		_ = value.pop(i)
-				# self.fitdata['count'][m, n] = len(date)
-
-				# Whyjay: May 10, 2018: cancelled the min date span (date[-1] - date[0] > 0), previously > 200
-				# if (len(np.unique(date)) >= 3) and (date[-1] - date[0] > 0):
-				if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
-					slope, slope_err, residual, count = wlr_corefun(date, value, uncertainty, self.evmd_threshold)
-					# if residual > 100:
-					# 	print(date, value, uncertainty)
-					self.fitdata['slope'][m, n] = slope
-					self.fitdata['slope_err'][m, n] = slope_err
-					self.fitdata['residual'][m, n] = residual
-					self.fitdata['count'][m, n] = count
-				# else:
-					# self.fitdata['count'] = len(ref_dem_TS[m][n]['date'])
-					# elif (date[-1] - date[0] > 0):
-					# 	slope_arr[m, n] = (value[1] - value[0]) / float(date[1] - date[0]) * 365.25
-		# self.fitdata['count'][~self.refgeomask] = -9999
-
-	def Fitdata2File(self):
-		# ==== Write to file ====
-		dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif')
-		dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif')
-		dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif')
-		dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
-		dhdt_dem.Array2Raster(self.fitdata['slope'], self.refgeo)
-		dhdt_error.Array2Raster(self.fitdata['slope_err'], self.refgeo)
-		dhdt_res.Array2Raster(self.fitdata['residual'], self.refgeo)
-		dhdt_count.Array2Raster(self.fitdata['count'], self.refgeo)
-
-	def ShowDhdtTifs(self):
-		dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif')
-		dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif')
-		dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif')
-		dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
-		return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
+    def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None):
+        self.picklepath = picklepath
+        self.dhdtprefix = dhdtprefix
+        self.ts = None
+        self.dems = []
+        self.refdate = None
+        if refdate is not None:
+            self.refdate = datetime.strptime(refdate, '%Y-%m-%d')
+        self.refgeo = refgeo
+        self.refgeomask = None
+        if refgeo is not None:
+            self.refgeomask = refgeo.ReadAsArray().astype(bool)
+        self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []}
+        self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0}
+        self.evmd_threshold = 6
+
+    def AddDEM(self, dems):
+        # ==== Add DEM object list ====
+        if type(dems) is list:
+            self.dems = self.dems + dems
+        elif type(dems) is SingleRaster:
+            self.dems.append(dems)
+        else:
+            raise ValueError("This DEM type is neither a SingleRaster object nor a list of SingleRaster objects.")
+
+    def SortByDate(self):
+        # ==== sort DEMs by date (ascending order) ====
+        dem_dates = [i.date for i in self.dems]
+        dateidx = np.argsort(dem_dates)
+        self.dems = [self.dems[i] for i in dateidx]
+
+    def SetRefGeo(self, refgeo):
+        # ==== Prepare the reference geometry ====
+        if type(refgeo) is str:
+            self.refgeo = SingleRaster(refgeo)
+        elif type(refgeo) is SingleRaster:
+            self.refgeo = refgeo
+        else:
+            raise ValueError("This refgeo must be either a SingleRaster object or a path to a geotiff file.")
+        self.refgeomask = self.refgeo.ReadAsArray().astype(bool)
+
+    def SetRefDate(self, datestr):
+        self.refdate = datetime.strptime(datestr, '%Y-%m-%d')
+
+    def SetMaskParam(self, ini):
+        if 'max_uncertainty' in ini.settings:
+            self.maskparam['max_uncertainty'] = float(ini.settings['max_uncertainty'])
+        if 'min_time_span' in ini.settings:
+            self.maskparam['min_time_span'] = float(ini.settings['min_time_span'])
+
+    def SetEVMDThreshold(self, ini):
+        if 'evmd_threshold' in ini.regression:
+            self.evmd_threshold = float(ini.regression['evmd_threshold'])
+
+    def InitTS(self):
+        # ==== Prepare the reference geometry ====
+        refgeo_Ysize = self.refgeo.GetRasterYSize()
+        refgeo_Xsize = self.refgeo.GetRasterXSize()
+        self.ts = np.empty([refgeo_Ysize, refgeo_Xsize], dtype=object)
+        # for j in range(self.ts.shape[0]):
+        #     for i in range(self.ts.shape[1]):
+        #         self.ts[j, i] = PixelTimeSeries()
+        # self.ts = [[{'date': [], 'uncertainty': [], 'value': []} for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)]
+        # self.ts = [[ [] for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)]
+        print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask)))
+
+    def ReadConfig(self, ini):
+        self.picklepath = ini.result['picklefile']
+        self.dhdtprefix = ini.result['dhdt_prefix']
+        self.AddDEM(ini.GetDEM())
+        self.SortByDate()
+        self.SetRefGeo(ini.refgeometry['gtiff'])
+        self.SetRefDate(ini.settings['refdate'])
+        self.SetMaskParam(ini)
+        self.SetEVMDThreshold(ini)
+
+    @timeit
+    def PileUp(self):
+        # ==== Start to read every DEM and save it to our final array ====
+        ts = [[ [] for n in range(self.ts.shape[1])] for m in range(self.ts.shape[0])]
+        for i in range(len(self.dems)):
+            print('{}) {}'.format(i + 1, os.path.basename(self.dems[i].fpath) ))
+            if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']:
+                datedelta = self.dems[i].date - self.refdate
+                # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear')
+                znew = Resample_Array2(self.dems[i], self.refgeo)
+                znew_mask = np.logical_and(znew > 0, self.refgeomask)
+                fill_idx = np.where(znew_mask)
+                for m,n in zip(fill_idx[0], fill_idx[1]):
+                    record = [datedelta.days, znew[m, n], self.dems[i].uncertainty]
+                    # self.ts[m, n].add_record(record)
+                    ts[m][n] += [record]
+                    # self.ts[m][n]['date'] += [datedelta.days]
+                    # self.ts[m][n]['uncertainty'] += [self.dems[i].uncertainty]
+                    # self.ts[m][n]['value'] += [znew[m, n]]
+            else:
+                print("This one won't be piled up because its uncertainty ({}) exceeds the maximum uncertainty allowed ({})."
+                      .format(self.dems[i].uncertainty, self.maskparam['max_uncertainty']))
+                
+        # After the content of ts is all populated, we move the data to self.ts as an array of PixelTimeSeries.
+        for m in range(self.ts.shape[0]):
+            for n in range(self.ts.shape[1]):
+                self.ts[m, n] = PixelTimeSeries(ts[m][n])
+                
+    def DumpPickle(self):
+        pickle.dump(self.ts, open(self.picklepath, "wb"))
+
+    def LoadPickle(self):
+        self.ts = pickle.load( open(self.picklepath, "rb") )
+
+    @timeit
+    def Polyfit(self):
+        # ==== Create final array ====
+        self.fitdata['slope']     = np.ones_like(self.ts, dtype=float) * -9999
+        self.fitdata['slope_err'] = np.ones_like(self.ts, dtype=float) * -9999
+        self.fitdata['residual']  = np.ones_like(self.ts, dtype=float) * -9999
+        self.fitdata['count']     = np.ones_like(self.ts, dtype=float) * -9999
+        # ==== Weighted regression ====
+        print('m total: ', len(self.ts))
+        for m in range(len(self.ts)):
+            if m % 100 == 0:
+                print(m)
+            for n in range(len(self.ts[0])):
+                # self.fitdata['count'][m, n] = len(self.ts[m][n]['date'])
+                # if len(self.ts[m][n]['date']) >= 3:
+                date = self.ts[m, n].get_date()
+                # date = np.array(self.ts[m][n]['date'])
+                uncertainty = self.ts[m, n].get_uncertainty()
+                # uncertainty = np.array(self.ts[m][n]['uncertainty'])
+                value = self.ts[m, n].get_value()
+                # value = np.array(self.ts[m][n]['value'])
+                # pin_value = pin_dem_array[m ,n]
+                # pin_date = pin_dem_date_array[m, n]
+                # date, uncertainty, value = filter_by_slope(date, uncertainty, value, pin_date, pin_value)
+                # date, uncertainty, value = filter_by_redundancy(date, uncertainty, value)
+
+                # slope_ref = [(value[i] - pin_value) / float(date[i] - pin_date) * 365.25 for i in range(len(value))]
+                # for i in reversed(range(len(slope_ref))):
+                #    if (slope_ref[i] > dhdt_limit_upper) or (slope_ref[i] < dhdt_limit_lower):
+                #        _ = date.pop(i)
+                #        _ = uncertainty.pop(i)
+                #        _ = value.pop(i)
+                # self.fitdata['count'][m, n] = len(date)
+
+                # Whyjay: May 10, 2018: cancelled the min date span (date[-1] - date[0] > 0), previously > 200
+                # if (len(np.unique(date)) >= 3) and (date[-1] - date[0] > 0):
+                if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
+                    slope, slope_err, residual, count = wlr_corefun(date, value, uncertainty, self.evmd_threshold)
+                    # if residual > 100:
+                    #    print(date, value, uncertainty)
+                    self.fitdata['slope'][m, n] = slope
+                    self.fitdata['slope_err'][m, n] = slope_err
+                    self.fitdata['residual'][m, n] = residual
+                    self.fitdata['count'][m, n] = count
+                # else:
+                    # self.fitdata['count'] = len(ref_dem_TS[m][n]['date'])
+                    # elif (date[-1] - date[0] > 0):
+                    #    slope_arr[m, n] = (value[1] - value[0]) / float(date[1] - date[0]) * 365.25
+        # self.fitdata['count'][~self.refgeomask] = -9999
+
+    def Fitdata2File(self):
+        # ==== Write to file ====
+        dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif')
+        dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif')
+        dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif')
+        dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
+        dhdt_dem.Array2Raster(self.fitdata['slope'], self.refgeo)
+        dhdt_error.Array2Raster(self.fitdata['slope_err'], self.refgeo)
+        dhdt_res.Array2Raster(self.fitdata['residual'], self.refgeo)
+        dhdt_count.Array2Raster(self.fitdata['count'], self.refgeo)
+
+    def ShowDhdtTifs(self):
+        dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif')
+        dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif')
+        dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif')
+        dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
+        return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
 
 
 

From 0ae6ca3d3c9695c7a87edc0f522c9d18884d0aec Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sun, 16 Apr 2023 16:34:42 +0000
Subject: [PATCH 03/24] New viz and ConfParam ini-zation

---
 bin/dhdt.py      |  55 +++++++++++++------------
 carst/libdhdt.py | 102 +++++++++++++++++++++++++++++------------------
 2 files changed, 93 insertions(+), 64 deletions(-)

diff --git a/bin/dhdt.py b/bin/dhdt.py
index d8b9599..9d0e5a3 100755
--- a/bin/dhdt.py
+++ b/bin/dhdt.py
@@ -30,46 +30,49 @@
 # ==== Read ini file ====
 
 inipath = args.config_file
-ini = ConfParams(inipath)
-ini.ReadParam()
-ini.VerifyParam()
+# ini = ConfParams(inipath)
+# ini.ReadParam()
+# ini.VerifyParam()
 
 # ==== Create a DemPile object and load the config file into the object ====
 
 a = DemPile()
-a.ReadConfig(ini)
+# a.ReadConfig(ini)
+a.ReadConfig(inipath)
 
 # ==== Run main processes ====
 
 if args.step is None:
-	a.InitTS()
-	a.PileUp()
-	a.DumpPickle()
-	a.Polyfit()
-	a.Fitdata2File()
+    a.InitTS()
+    a.PileUp()
+    a.DumpPickle()
+    a.Polyfit()
+    a.Fitdata2File()
 elif args.step == 'stack':
-	a.InitTS()
-	a.PileUp()
-	a.DumpPickle()
+    a.InitTS()
+    a.PileUp()
+    a.DumpPickle()
 elif args.step == 'dhdt':
-	a.LoadPickle()
-	a.Polyfit()
-	a.Fitdata2File()
+    a.LoadPickle()
+    a.Polyfit()
+    a.Fitdata2File()
 elif args.step == 'viewts':
-	a.LoadPickle()
-	data = a.ts
-	dhdt_raster, _, _, _ = a.ShowDhdtTifs()
-	img = dhdt_raster.ReadAsArray()
+    a.LoadPickle()
+    a.viz()
+    plt.show()
+#     data = a.ts
+#     dhdt_raster, _, _, _ = a.ShowDhdtTifs()
+#     img = dhdt_raster.ReadAsArray()
+
+#     fig, ax = plt.subplots()
+#     img[img < -9000] = np.nan
+#     ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6)
+#     onclick = onclick_wrapper(data, fig, ax)
 
-	fig, ax = plt.subplots()
-	img[img < -9000] = np.nan
-	ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6)
-	onclick = onclick_wrapper(data, fig, ax)
+#     cid = fig.canvas.mpl_connect('button_press_event', onclick)
 
-	cid = fig.canvas.mpl_connect('button_press_event', onclick)
-	plt.show()
 else:
-	print('Wrong Way!')
+    print('Wrong Way!')
 
 
 # ==== Codes for test ====
diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index ad72951..f59397f 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -2,22 +2,23 @@
 # Func: Resample_Array(UtilRaster.SingleRaster, UtilRaster.SingleRaster)
 # used for dhdt
 # by Whyjay Zheng, Jul 27 2016
-# last edit: Jun 22 2017
+# last edit: Apr 17 2023
 
 import numpy as np
 from numpy.linalg import inv
 import os
 import sys
 try:
-	import gdal
+    import gdal
 except:
-	from osgeo import gdal        # sometimes gdal is part of osgeo modules
+    from osgeo import gdal        # sometimes gdal is part of osgeo modules
 from datetime import datetime
 from shapely.geometry import Polygon
 from scipy.interpolate import interp2d
 from scipy.interpolate import NearestNDInterpolator
 from scipy.interpolate import griddata
 import gc
+from carst import ConfParams
 from carst.libraster import SingleRaster
 import pickle
 import matplotlib.pyplot as plt
@@ -257,41 +258,10 @@ def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
 	# 	residual = np.sum((np.polyval(p, x[:-2]) - y[:-2]) ** 2)
 	# return slope, slope_err, residual
 
-def onclick_wrapper(data, fig, ax):
-	def onclick(event):
-	    print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' %
-	          ('double' if event.dblclick else 'single', event.button,
-	           event.x, event.y, event.xdata, event.ydata))
-	    col = int(event.xdata)
-	    row = int(event.ydata)
-	    xx = np.array(data[row][col]['date'])
-	    yy = np.array(data[row][col]['value'])
-	    ye = np.array(data[row][col]['uncertainty'])
-	    slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=80, detailed=True)
-	    SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2)
-	    SSRes = np.sum((y_good - y_goodest) ** 2)
-	    SST = np.sum((y_good - np.mean(y_good)) ** 2)
-	    Rsquared = 1 - SSRes / SST
-	    ax.plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green')
-	    fig.canvas.draw()
-	    figi = plt.figure()
-	    axi = plt.gca()
-	    axi.errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko')
-	    np.set_printoptions(precision=3)
-	    np.set_printoptions(suppress=True)
-	    xye = np.vstack((xx,yy,ye)).T
-	    print(xye[xye[:,1].argsort()])
-	    # print(xx)
-	    # print(yy)
-	    # print(ye)
-	    axi.plot(x_good, y_goodest, color='g', linewidth=2, zorder=20)
-	    axi.plot(x_good, y_good, '.', color='r', markersize=8, zorder=30)
-	    axi.text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes)
-	    axi.set_xlabel('days, from 2015-01-01')
-	    axi.set_ylabel('height (m)')
-	    # plt.plot(xx, yy)
-	    figi.show()
-	return onclick
+
+
+
+
 
 class PixelTimeSeries(object):
     """
@@ -415,6 +385,10 @@ def InitTS(self):
         print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask)))
 
     def ReadConfig(self, ini):
+        if type(ini) is str:
+            ini = ConfParams(ini)
+            ini.ReadParam()
+            ini.VerifyParam()
         self.picklepath = ini.result['picklefile']
         self.dhdtprefix = ini.result['dhdt_prefix']
         self.AddDEM(ini.GetDEM())
@@ -525,6 +499,58 @@ def ShowDhdtTifs(self):
         dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif')
         dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
         return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
+    
+    def viz(self):
+        dhdt_raster, _, _, _ = self.ShowDhdtTifs()
+        img = dhdt_raster.ReadAsArray()
+        img[img < -9000] = np.nan   # might need to change
+        
+        fig, axs = plt.subplots(2, 1, figsize=(8,8))                   # figsize might need to change
+        first_img = axs[0].imshow(img, cmap='RdBu', vmin=-6, vmax=6)   # minmax might need to change
+        
+        onclick = onclick_wrapper(self.ts, axs)
+        cid = fig.canvas.mpl_connect('button_press_event', onclick)
+    
+def onclick_wrapper(data, axs):
+    def onclick_ipynb(event):
+        """
+        Callback function for mouse click
+        """
+        print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' %
+          ('double' if event.dblclick else 'single', event.button,
+           event.x, event.y, event.xdata, event.ydata))
+        col = int(event.xdata)
+        row = int(event.ydata)
+        xx = data[row, col].get_date()
+        yy = data[row, col].get_value()
+        ye = data[row, col].get_uncertainty()
+        slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=80, detailed=True)
+        SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2)
+        SSRes = np.sum((y_good - y_goodest) ** 2)
+        SST = np.sum((y_good - np.mean(y_good)) ** 2)
+        Rsquared = 1 - SSRes / SST
+        axs[0].plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green')
+        axs[1].cla()
+        axs[1].errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko')
+        np.set_printoptions(precision=3)
+        np.set_printoptions(suppress=True)
+        xye = np.vstack((xx,yy,ye)).T
+        print(xye[xye[:,1].argsort()])
+        axs[1].plot(x_good, y_goodest, color='g', linewidth=2, zorder=20)
+        axs[1].plot(x_good, y_good, '.', color='r', markersize=8, zorder=30)
+        axs[1].text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes)
+        axs[1].set_xlabel('days, from xxxx-01-01')
+        axs[1].set_ylabel('height (m)')
+    return onclick_ipynb
+
+
+# fig, ax = plt.subplots()
+# img[img < -9000] = np.nan
+# ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6)
+# onclick = onclick_wrapper(data, fig, ax)
+
+# cid = fig.canvas.mpl_connect('button_press_event', onclick)
+# cid = fig.canvas.mpl_connect('button_press_event', onclick2)
 
 
 

From 2409a88fc7d1d4a44faae986ad59e9ed6d01c7ce Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Mon, 17 Apr 2023 13:49:04 +0000
Subject: [PATCH 04/24] Add mosaic function; update live check (onclick)

---
 carst/libdhdt.py | 83 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 56 insertions(+), 27 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index f59397f..20d3d8c 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -118,20 +118,22 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
 		return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0
 
 def EVMD(y, threshold=6):
-	validated_value = None
-	if y.size <= 1:
-		exitstate = y.size
-	else:
-		exitstate = 1
-		for i in range(y.size - 1):
-			comparison = y[i+1:] - y[i]
-			if any(abs(comparison) < threshold):
-				validated_value = y[i]
-				exitstate = -10
-				break
-			else:
-				exitstate += 1
-	return exitstate, validated_value
+    validated_value = None
+    validated_value_idx = None
+    if y.size <= 1:
+        exitstate = y.size
+    else:
+        exitstate = 1
+        for i in range(y.size - 1):
+            comparison = y[i+1:] - y[i]
+            if any(abs(comparison) < threshold):
+                validated_value_idx = i
+                validated_value = y[i]
+                exitstate = -10
+                break
+            else:
+                exitstate += 1
+    return exitstate, validated_value, validated_value_idx
 
 def EVMD_idx(y, validated_value, threshold=6):
 	validated_range = [validated_value - threshold, validated_value + threshold]
@@ -145,7 +147,7 @@ def EVMD_idx(y, validated_value, threshold=6):
 
 def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
 	# wlr = weighted linear regression.
-	exitstate, validated_value = EVMD(y, threshold=evmd_threshold)
+	exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
 	if exitstate >= 0 or (max(x) - min(x)) < 1:
 		slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
 		return slope, slope_err, resid, count
@@ -331,6 +333,7 @@ def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None):
         if refgeo is not None:
             self.refgeomask = refgeo.ReadAsArray().astype(bool)
         self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []}
+        self.mosaic = {'value': [], 'date': [], 'uncertainty': []}
         self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0}
         self.evmd_threshold = 6
 
@@ -500,6 +503,41 @@ def ShowDhdtTifs(self):
         dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
         return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
     
+    def form_mosaic(self, order='ascending'):
+        """
+        order options:
+            ascending: early elevations will be populated first
+            descending: late elevations will be populated first
+        """
+        # ==== Create mosaicked array ====
+        self.mosaic['value']           = np.full_like(self.ts, np.nan, dtype=float)
+        self.mosaic['date']            = np.full_like(self.ts, np.nan, dtype=float)
+        self.mosaic['uncertainty']     = np.full_like(self.ts, np.nan, dtype=float)
+        for m in range(self.ts.shape[0]):
+            if m % 100 == 0:
+                print(m)
+            for n in range(self.ts.shape[1]):
+                date = self.ts[m, n].get_date()
+                uncertainty = self.ts[m, n].get_uncertainty()
+                value = self.ts[m, n].get_value()
+                if order == 'descending':
+                    date = np.flip(date)
+                    uncertainty = np.flip(uncertainty)
+                    value = np.flip(value)
+                elif order != 'ascending':
+                    raise ValueError('order must be "ascending" or "descending".')
+                exitstate, validated_value, validated_value_idx = EVMD(value, threshold=self.evmd_threshold)
+                if exitstate < 0:
+                    self.mosaic['value'][m, n] = validated_value
+                    self.mosaic['date'][m, n] = date[validated_value_idx]
+                    self.mosaic['uncertainty'][m, n] = uncertainty[validated_value_idx]
+        mosaic_value       = SingleRaster('{}_mosaic-{}_value.tif'.format(self.dhdtprefix, order))
+        mosaic_date        = SingleRaster('{}_mosaic-{}_date.tif'.format(self.dhdtprefix, order))
+        mosaic_uncertainty = SingleRaster('{}_mosaic-{}_uncertainty.tif'.format(self.dhdtprefix, order))
+        mosaic_value.Array2Raster(self.mosaic['value'], self.refgeo)
+        mosaic_date.Array2Raster(self.mosaic['date'], self.refgeo)
+        mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo)                
+    
     def viz(self):
         dhdt_raster, _, _, _ = self.ShowDhdtTifs()
         img = dhdt_raster.ReadAsArray()
@@ -508,10 +546,10 @@ def viz(self):
         fig, axs = plt.subplots(2, 1, figsize=(8,8))                   # figsize might need to change
         first_img = axs[0].imshow(img, cmap='RdBu', vmin=-6, vmax=6)   # minmax might need to change
         
-        onclick = onclick_wrapper(self.ts, axs)
+        onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold)
         cid = fig.canvas.mpl_connect('button_press_event', onclick)
     
-def onclick_wrapper(data, axs):
+def onclick_wrapper(data, axs, evmd_threshold):
     def onclick_ipynb(event):
         """
         Callback function for mouse click
@@ -524,7 +562,7 @@ def onclick_ipynb(event):
         xx = data[row, col].get_date()
         yy = data[row, col].get_value()
         ye = data[row, col].get_uncertainty()
-        slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=80, detailed=True)
+        slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True)   
         SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2)
         SSRes = np.sum((y_good - y_goodest) ** 2)
         SST = np.sum((y_good - np.mean(y_good)) ** 2)
@@ -544,15 +582,6 @@ def onclick_ipynb(event):
     return onclick_ipynb
 
 
-# fig, ax = plt.subplots()
-# img[img < -9000] = np.nan
-# ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6)
-# onclick = onclick_wrapper(data, fig, ax)
-
-# cid = fig.canvas.mpl_connect('button_press_event', onclick)
-# cid = fig.canvas.mpl_connect('button_press_event', onclick2)
-
-
 
 class TimeSeriesDEM(np.ndarray):
 

From 36c62655a7de2b64109eaef61bf41c350616f4c6 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Wed, 19 Apr 2023 04:06:48 +0000
Subject: [PATCH 05/24] Convert tabs to spaces: EVMD_idx and wlr_corefun

---
 carst/libdhdt.py | 104 ++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 51 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 20d3d8c..d3dbe61 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -136,59 +136,61 @@ def EVMD(y, threshold=6):
     return exitstate, validated_value, validated_value_idx
 
 def EVMD_idx(y, validated_value, threshold=6):
-	validated_range = [validated_value - threshold, validated_value + threshold]
-	idx = np.zeros_like(y, dtype=bool)
-	for i in range(y.size):
-		if validated_range[0] <= y[i] <= validated_range[1]:
-			idx[i] = True
-			tmp = np.append(validated_range, [y[i] - threshold, y[i] + threshold])
-			validated_range = [min(tmp), max(tmp)]
-	return idx
+    validated_range = [validated_value - threshold, validated_value + threshold]
+    idx = np.zeros_like(y, dtype=bool)
+    for i in range(y.size):
+        if validated_range[0] <= y[i] <= validated_range[1]:
+            idx[i] = True
+            tmp = np.append(validated_range, [y[i] - threshold, y[i] + threshold])
+            validated_range = [min(tmp), max(tmp)]
+    return idx
 
 def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
-	# wlr = weighted linear regression.
-	exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
-	if exitstate >= 0 or (max(x) - min(x)) < 1:
-		slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
-		return slope, slope_err, resid, count
-	else:
-		# if x.size == 3:
-		# print(x, y, ye)
-		idx = EVMD_idx(y, validated_value, threshold=evmd_threshold)
-		if sum(idx) >= 3:
-			x = x[idx]
-			y = y[idx]
-			ye = ye[idx]
-			w     = [1 / k for k in ye]
-			G     = np.vstack([x, np.ones(x.size)]).T   # defines the model (y = a + bx)
-			W     = np.diag(w)
-			Gw    = W @ G
-			yw    = W @ y.T                          # assuming y is a 1-by-N array
-			cov_m =     inv(Gw.T @ Gw)               # covariance matrix
-			p     =     inv(Gw.T @ Gw) @ Gw.T @ yw   # model coefficients
-			H     = G @ inv(Gw.T @ Gw) @ Gw.T @ W    # projection matrix
-			h     = np.diag(H)                       # leverage
-			y_est = np.polyval(p, x)                 # the estimate of y
-			ri2   = (y - y_est) ** 2
-			resid = np.sum(ri2)                      # sum of squared error
-			error =  np.sqrt(cov_m[0, 0])
-			p_num = 2                                # how many coefficients we want (in this case, 2 comes from y = a + bx)
-			mse   = resid / (x.size - p_num)         # mean squared error
-			slope = p[0] * 365.25
-			slope_err = np.sqrt(cov_m[0, 0]) * 365.25
-			count = x.size
-			# if resid > 100000:
-			# 	print(x,y,ye,cookd,goodpoint)
-			if detailed:
-				return slope, slope_err, resid, count, x, y, y_est
-			else:
-				return slope, slope_err, resid, count
-		else:
-			slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
-			if detailed:
-				return slope, slope_err, resid, count, x, y, y
-			else:
-				return slope, slope_err, resid, count
+    # wlr = weighted linear regression.
+    exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
+    if exitstate >= 0 or (max(x) - min(x)) < 1:
+        slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
+        return slope, slope_err, resid, count
+    else:
+        # if x.size == 3:
+        # print(x, y, ye)
+        idx = EVMD_idx(y, validated_value, threshold=evmd_threshold)
+        if sum(idx) >= 3:
+            x = x[idx]
+            y = y[idx]
+            ye = ye[idx]
+            w     = [1 / k for k in ye]
+            G     = np.vstack([x, np.ones(x.size)]).T   # defines the model (y = a + bx)
+            W     = np.diag(w)
+            Gw    = W @ G
+            yw    = W @ y.T                          # assuming y is a 1-by-N array
+            cov_m =     inv(Gw.T @ Gw)               # covariance matrix
+            p     =     inv(Gw.T @ Gw) @ Gw.T @ yw   # model coefficients
+            H     = G @ inv(Gw.T @ Gw) @ Gw.T @ W    # projection matrix
+            h     = np.diag(H)                       # leverage
+            y_est = np.polyval(p, x)                 # the estimate of y
+            ri2   = (y - y_est) ** 2
+            resid = np.sum(ri2)                      # sum of squared error
+            error =  np.sqrt(cov_m[0, 0])
+            p_num = 2                                # how many coefficients we want (in this case, 2 comes from y = a + bx)
+            mse   = resid / (x.size - p_num)         # mean squared error
+            slope = p[0] * 365.25
+            slope_err = np.sqrt(cov_m[0, 0]) * 365.25
+            count = x.size
+            # if resid > 100000:
+            #   print(x,y,ye,cookd,goodpoint)
+            if detailed:
+                return slope, slope_err, resid, count, x, y, y_est
+            else:
+                return slope, slope_err, resid, count
+        else:
+            slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
+            if detailed:
+                x = x[idx]
+                y = y[idx]
+                return slope, slope_err, resid, count, x, y, y
+            else:
+                return slope, slope_err, resid, count
 
 
 		# ============ Using Cook's Distance ============

From f53c8d4dc90d08989bc6d052dca196b786581f59 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Wed, 19 Apr 2023 14:44:32 +0000
Subject: [PATCH 06/24] Test DBSCAN as EVMD

---
 carst/libdhdt.py | 172 ++++++++++++++++++++++++++++++++---------------
 requirements.txt |   1 +
 2 files changed, 120 insertions(+), 53 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index d3dbe61..ed5bb22 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -22,6 +22,7 @@
 from carst.libraster import SingleRaster
 import pickle
 import matplotlib.pyplot as plt
+from sklearn.cluster import DBSCAN
 
 def timeit(func):
     def time_wrapper(*args, **kwargs):
@@ -117,22 +118,66 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
 	else:
 		return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0
 
-def EVMD(y, threshold=6):
-    validated_value = None
-    validated_value_idx = None
-    if y.size <= 1:
-        exitstate = y.size
+    
+def EVMD_DBSCAN(x, y, eps=6, min_samples=4):
+    """
+    See EVMD & EVMD_idx.
+    """
+    # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration.
+    x2d = np.column_stack((x / 365, y))
+    if x2d.shape[0] >= 2:
+        db = DBSCAN(eps=eps, min_samples=min_samples).fit(x2d)
+        verified_y_labels = db.labels_ >= 0
+        # verified_y_labels_idx = np.where(verified_y_labels)
+        if any(verified_y_labels):
+            exitstate = 1
+        else:
+            exitstate = -1
     else:
-        exitstate = 1
-        for i in range(y.size - 1):
-            comparison = y[i+1:] - y[i]
-            if any(abs(comparison) < threshold):
-                validated_value_idx = i
-                validated_value = y[i]
-                exitstate = -10
-                break
-            else:
-                exitstate += 1
+        exitstate = -1
+        verified_y_labels = np.full_like(y, False)
+    return exitstate, verified_y_labels
+    
+    
+def EVMD(y, x=None, threshold=6, method='legacy'):
+    """
+    Elevation Verification from Multiple DEMs.
+    method: 'DBSCAN' or 'legacy'
+    """
+    if method == 'DBSCAN':
+        # x, assuming the temporal axis (unit=days), is scaled by 100 for proper eps consideration.
+        x2d = np.coulmn_stack((x / 100, y))
+        # min samples is fixed at 4 (i.e., four DEMs together can be considered as a cluster!)
+        min_samples = 3
+        db = DBSCAN(eps=threshold, min_samples=min_samples).fit(x2d)
+        verified_y_labels = db.labels_ >= 0
+        verified_y_labels_idx = np.where(verified_y_labels)
+        if any(verified_y_labels):
+            exitstate = -10
+            validated_value = y[verified_y_labels_idx[0]]
+            validated_value_idx = verified_y_labels_idx[0]
+        else:
+            exitstate = 1
+            validated_value = None
+            validated_value_idx = None
+    elif method == 'legacy':
+        validated_value = None
+        validated_value_idx = None
+        if y.size <= 1:
+            exitstate = y.size
+        else:
+            exitstate = 1
+            for i in range(y.size - 1):
+                comparison = y[i+1:] - y[i]
+                if any(abs(comparison) < threshold):
+                    validated_value_idx = i
+                    validated_value = y[i]
+                    exitstate = -10
+                    break
+                else:
+                    exitstate += 1
+    else:
+        raise ValueError('method must be "DBSCAN" or "legacy".')
     return exitstate, validated_value, validated_value_idx
 
 def EVMD_idx(y, validated_value, threshold=6):
@@ -147,14 +192,16 @@ def EVMD_idx(y, validated_value, threshold=6):
 
 def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
     # wlr = weighted linear regression.
-    exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
-    if exitstate >= 0 or (max(x) - min(x)) < 1:
+    # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
+    exitstate, idx = EVMD_DBSCAN(x, y, eps=evmd_threshold)
+    # if exitstate >= 0 or (max(x) - min(x)) < 1:
+    if exitstate < 0 or (max(x) - min(x)) < 1:
         slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
         return slope, slope_err, resid, count
     else:
         # if x.size == 3:
         # print(x, y, ye)
-        idx = EVMD_idx(y, validated_value, threshold=evmd_threshold)
+        # idx = EVMD_idx(y, validated_value, threshold=evmd_threshold)
         if sum(idx) >= 3:
             x = x[idx]
             y = y[idx]
@@ -505,11 +552,13 @@ def ShowDhdtTifs(self):
         dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif')
         return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
     
-    def form_mosaic(self, order='ascending'):
+    @timeit
+    def form_mosaic(self, order='ascending', method='DBSCAN'):
         """
         order options:
             ascending: early elevations will be populated first
             descending: late elevations will be populated first
+        method: 'DBSCAN' or 'legacy'
         """
         # ==== Create mosaicked array ====
         self.mosaic['value']           = np.full_like(self.ts, np.nan, dtype=float)
@@ -522,17 +571,33 @@ def form_mosaic(self, order='ascending'):
                 date = self.ts[m, n].get_date()
                 uncertainty = self.ts[m, n].get_uncertainty()
                 value = self.ts[m, n].get_value()
-                if order == 'descending':
-                    date = np.flip(date)
-                    uncertainty = np.flip(uncertainty)
-                    value = np.flip(value)
-                elif order != 'ascending':
-                    raise ValueError('order must be "ascending" or "descending".')
-                exitstate, validated_value, validated_value_idx = EVMD(value, threshold=self.evmd_threshold)
-                if exitstate < 0:
-                    self.mosaic['value'][m, n] = validated_value
-                    self.mosaic['date'][m, n] = date[validated_value_idx]
-                    self.mosaic['uncertainty'][m, n] = uncertainty[validated_value_idx]
+                if method == 'DBSCAN':
+                    exitstate, verified_y_labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
+                    if exitstate >= 0:
+                        verified_y_labels_idx = np.where(verified_y_labels)[0]
+                        if order == 'descending':
+                            idx = verified_y_labels_idx[-1]
+                        elif order == 'ascending':
+                            idx = verified_y_labels_idx[0]
+                        else:
+                            raise ValueError('order must be "ascending" or "descending".')
+                        self.mosaic['value'][m, n] = value[idx]
+                        self.mosaic['date'][m, n] = date[idx]
+                        self.mosaic['uncertainty'][m, n] = uncertainty[idx]
+                elif method == 'legacy':
+                    if order == 'descending':
+                        date = np.flip(date)
+                        uncertainty = np.flip(uncertainty)
+                        value = np.flip(value)
+                    elif order != 'ascending':
+                        raise ValueError('order must be "ascending" or "descending".')
+                    exitstate, validated_value, validated_value_idx = EVMD(value, threshold=self.evmd_threshold)
+                    if exitstate < 0:
+                        self.mosaic['value'][m, n] = validated_value
+                        self.mosaic['date'][m, n] = date[validated_value_idx]
+                        self.mosaic['uncertainty'][m, n] = uncertainty[validated_value_idx]
+                else:
+                    raise ValueError('method must be "DBSCAN" or "legacy".')
         mosaic_value       = SingleRaster('{}_mosaic-{}_value.tif'.format(self.dhdtprefix, order))
         mosaic_date        = SingleRaster('{}_mosaic-{}_date.tif'.format(self.dhdtprefix, order))
         mosaic_uncertainty = SingleRaster('{}_mosaic-{}_uncertainty.tif'.format(self.dhdtprefix, order))
@@ -558,29 +623,30 @@ def onclick_ipynb(event):
         """
         print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' %
           ('double' if event.dblclick else 'single', event.button,
-           event.x, event.y, event.xdata, event.ydata))
-        col = int(event.xdata)
-        row = int(event.ydata)
-        xx = data[row, col].get_date()
-        yy = data[row, col].get_value()
-        ye = data[row, col].get_uncertainty()
-        slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True)   
-        SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2)
-        SSRes = np.sum((y_good - y_goodest) ** 2)
-        SST = np.sum((y_good - np.mean(y_good)) ** 2)
-        Rsquared = 1 - SSRes / SST
-        axs[0].plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green')
-        axs[1].cla()
-        axs[1].errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko')
-        np.set_printoptions(precision=3)
-        np.set_printoptions(suppress=True)
-        xye = np.vstack((xx,yy,ye)).T
-        print(xye[xye[:,1].argsort()])
-        axs[1].plot(x_good, y_goodest, color='g', linewidth=2, zorder=20)
-        axs[1].plot(x_good, y_good, '.', color='r', markersize=8, zorder=30)
-        axs[1].text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes)
-        axs[1].set_xlabel('days, from xxxx-01-01')
-        axs[1].set_ylabel('height (m)')
+           event.x, event.y, event.xdata, event.ydata))     # only works for non-Notebook backend?
+        if event.inaxes is axs[0]:
+            col = int(event.xdata)
+            row = int(event.ydata)
+            xx = data[row, col].get_date()
+            yy = data[row, col].get_value()
+            ye = data[row, col].get_uncertainty()
+            slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True)   
+            SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2)
+            SSRes = np.sum((y_good - y_goodest) ** 2)
+            SST = np.sum((y_good - np.mean(y_good)) ** 2)
+            Rsquared = 1 - SSRes / SST
+            axs[0].plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green')
+            axs[1].cla()
+            axs[1].errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko')
+            np.set_printoptions(precision=3)
+            np.set_printoptions(suppress=True)
+            xye = np.vstack((xx,yy,ye)).T
+            print(xye[xye[:,1].argsort()])
+            axs[1].plot(x_good, y_goodest, color='g', linewidth=2, zorder=20)
+            axs[1].plot(x_good, y_good, '.', color='r', markersize=8, zorder=30)
+            # axs[1].text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes)
+            axs[1].set_xlabel('data[{}, {}] (days, from xxxx-01-01)'.format(row, col))
+            axs[1].set_ylabel('height (m)')
     return onclick_ipynb
 
 
diff --git a/requirements.txt b/requirements.txt
index aa7eaef..61a89a8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ rasterio
 geopandas
 matplotlib
 scikit-image
+scikit-learn
\ No newline at end of file

From fd308eed788faeefd691e167a32059b80fb6994b Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Fri, 21 Apr 2023 06:14:45 +0000
Subject: [PATCH 07/24] Dealing with the removed DODS driver

---
 carst/libdhdt.py   | 24 ++++++++++++------------
 carst/libraster.py | 14 ++++++++++----
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index ed5bb22..38439c9 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -37,20 +37,20 @@ def time_wrapper(*args, **kwargs):
 
 def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'):
 
-	"""
-	resample orig_dem using the extent and spacing provided by resamp_ref_dem
-	orig_dem: class UtilRaster.SingleRaster object
-	resamp_ref_dem: class UtilRaster.SingleRaster object
-	returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster
+    """
+    resample orig_dem using the extent and spacing provided by resamp_ref_dem
+    orig_dem: class UtilRaster.SingleRaster object
+    resamp_ref_dem: class UtilRaster.SingleRaster object
+    returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster
 
-	This one uses gdal.Warp API.
-	"""
+    This one uses gdal.Warp API.
+    """
 
-	ds = gdal.Open(orig_dem.fpath)
-	ulx, uly, lrx, lry = resamp_ref_dem.GetExtent()
-	opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.GetXRes(), yRes=resamp_ref_dem.GetYRes(), resampleAlg=resampleAlg)
-	out_ds = gdal.Warp('tmp.tif', ds, options=opts)
-	return out_ds.GetRasterBand(1).ReadAsArray()
+    ds = gdal.Open(orig_dem.fpath)
+    ulx, uly, lrx, lry = resamp_ref_dem.GetExtent()
+    opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.GetXRes(), yRes=resamp_ref_dem.GetYRes(), resampleAlg=resampleAlg)
+    out_ds = gdal.Warp('tmp.tif', ds, options=opts)
+    return out_ds.GetRasterBand(1).ReadAsArray()
 
 
 def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
diff --git a/carst/libraster.py b/carst/libraster.py
index a2eb824..2ff0c4b 100644
--- a/carst/libraster.py
+++ b/carst/libraster.py
@@ -13,11 +13,13 @@
 ##### Make URL file to work: deactive the DODS driver
 # https://github.com/mapbox/rasterio/issues/1000
 # https://trac.osgeo.org/gdal/ticket/2696
+# Now using the first method since DODS driver was removed in gdal 3.5.
+# https://github.com/OSGeo/gdal/issues/5173
 
 ### method #1
 # gdal.GetDriverByName('DODS').Deregister()
 ### method #2
-os.environ["GDAL_SKIP"] = 'DODS'
+# os.environ["GDAL_SKIP"] = 'DODS'
 #####################################################
 
 
@@ -26,12 +28,16 @@
 import numpy as np
 from datetime import datetime
 try:
-	import gdal
-except:
-	from osgeo import gdal        # sometimes gdal is part of osgeo modules
+    from osgeo import gdal
+except ImportError:
+    import gdal    # this was used until GDAL 3.1
 from osgeo import osr
 # we assume the fpath is the file with .tif or .TIF suffix.
 
+try:
+    gdal.GetDriverByName('DODS').Deregister()
+except AttributeError:          # In Case that DODS driver does not exist (for GDAL >= 3.5)
+    pass
 
 def timeit(func):
     def time_wrapper(*args, **kwargs):

From 40aad9ab4299599fc3c70bb84abb036363038556 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Fri, 21 Apr 2023 11:23:12 +0000
Subject: [PATCH 08/24] Update resample_array for COG efficiency

---
 carst/libdhdt.py   |  67 ++-
 carst/libraster.py | 990 +++++++++++++++++++++++----------------------
 2 files changed, 553 insertions(+), 504 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 38439c9..47685dd 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -35,6 +35,35 @@ def time_wrapper(*args, **kwargs):
         return dec_func
     return time_wrapper
 
+def resample_array(source, reference, method='bilinear', destination=None):
+    """
+    resample the source raster using the extent and spacing provided by the reference raster. Two rasters must be in the same CRS.
+    
+    source: class UtilRaster.SingleRaster object
+    reference: class UtilRaster.SingleRaster object
+    destination: str, filename for the output to be written. If None, a temporary file (/vismem/) will be used.
+    
+    returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster.
+
+    For better COG processing efficiency, we rasterio to check the if the source and the reference intersect each other.
+    For fasting resampling, we use GDAL and it C library.
+    """
+    
+    s_ulx, s_uly, s_lrx, s_lry = source.get_extent()
+    source_extent = Polygon([(s_ulx, s_uly), (s_lrx, s_uly), (s_lrx, s_lry), (s_ulx, s_lry)])
+    ulx, uly, lrx, lry = reference.get_extent()
+    reference_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)])
+    if source_extent.intersects(reference_extent):
+        ds = gdal.Open(source.fpath)
+        opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=reference.get_x_res(), yRes=reference.get_y_res(), resampleAlg=method)
+        if not destination:
+            out_ds = gdal.Warp('/vsimem/resampled.tif', ds, options=opts)
+        else:
+            out_ds = gdal.Warp(destination, ds, options=opts)
+        return out_ds.GetRasterBand(1).ReadAsArray()
+    else:
+        return np.full((reference.get_y_size(), reference.get_x_size()), reference.get_nodata())
+
 def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'):
 
     """
@@ -47,8 +76,8 @@ def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'):
     """
 
     ds = gdal.Open(orig_dem.fpath)
-    ulx, uly, lrx, lry = resamp_ref_dem.GetExtent()
-    opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.GetXRes(), yRes=resamp_ref_dem.GetYRes(), resampleAlg=resampleAlg)
+    ulx, uly, lrx, lry = resamp_ref_dem.get_extent()
+    opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.get_x_res(), yRes=resamp_ref_dem.get_y_res(), resampleAlg=resampleAlg)
     out_ds = gdal.Warp('tmp.tif', ds, options=opts)
     return out_ds.GetRasterBand(1).ReadAsArray()
 
@@ -67,33 +96,33 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
 	resamp_method: 'linear', 'cubic', 'quintic', 'nearest'
 
 	"""
-	o_ulx, o_uly, o_lrx, o_lry = orig_dem.GetExtent()
+	o_ulx, o_uly, o_lrx, o_lry = orig_dem.get_extent()
 	o_ulx, o_xres, o_xskew, o_uly, o_yskew, o_yres = orig_dem.GetGeoTransform()
 	orig_dem_extent = Polygon([(o_ulx, o_uly), (o_lrx, o_uly), (o_lrx, o_lry), (o_ulx, o_lry)])
-	ulx, uly, lrx, lry = resamp_ref_dem.GetExtent()
+	ulx, uly, lrx, lry = resamp_ref_dem.get_extent()
 	ulx, xres, xskew, uly, yskew, yres = resamp_ref_dem.GetGeoTransform()
 	resamp_ref_dem_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)])
 	if orig_dem_extent.intersects(resamp_ref_dem_extent):
-		x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.GetRasterXSize())
-		y = np.linspace(o_uly, o_lry - o_yres, orig_dem.GetRasterYSize())
+		x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.get_x_size())
+		y = np.linspace(o_uly, o_lry - o_yres, orig_dem.get_y_size())
 		z = orig_dem.ReadAsArray()
-		# z[z == orig_dem.GetNoDataValue()] = np.nan    # this line probably needs improvement
+		# z[z == orig_dem.get_nodata()] = np.nan    # this line probably needs improvement
 		if resamp_method == 'nearest':
 			print('resampling method = nearest')
 			xx, yy = np.meshgrid(x, y)
 			points = np.stack((np.reshape(xx, xx.size), np.reshape(yy, yy.size)), axis=-1)
 			values = np.reshape(z, z.size)
 			fun = NearestNDInterpolator(points, values)
-			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.GetRasterXSize())
-			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.GetRasterYSize())
+			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
+			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
 			xxnew, yynew = np.meshgrid(xnew, ynew)
 			znew = fun(xxnew, yynew)    # if the image is big, this may take a long time (much longer than linear approach)
 		elif resamp_method == 'linear':
-			nan_value = orig_dem.GetNoDataValue()
+			nan_value = orig_dem.get_nodata()
 			print('resampling method = interp2d - ' + resamp_method)
 			fun = interp2d(x, y, z, kind=resamp_method, bounds_error=False, copy=False, fill_value=nan_value)
-			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.GetRasterXSize())
-			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.GetRasterYSize())
+			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
+			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
 			znew = np.flipud(fun(xnew, ynew))    # I don't know why, but it seems f(xnew, ynew) is upside down.
 			fun2 = interp2d(x, y, z != nan_value, kind=resamp_method, bounds_error=False, copy=False, fill_value=0)
 			zmask = np.flipud(fun2(xnew, ynew))
@@ -105,12 +134,12 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
 			xx = xx.ravel()
 			yy = yy.ravel()
 			zz = z.ravel()
-			realdata_pos = zz != orig_dem.GetNoDataValue()
+			realdata_pos = zz != orig_dem.get_nodata()
 			xx = xx[realdata_pos]
 			yy = yy[realdata_pos]
 			zz = zz[realdata_pos]
-			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.GetRasterXSize())
-			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.GetRasterYSize())
+			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
+			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
 			znew = griddata((xx, yy), zz, (xnew[None,:], ynew[:,None]), method='linear')
 		del z
 		gc.collect()
@@ -426,8 +455,8 @@ def SetEVMDThreshold(self, ini):
 
     def InitTS(self):
         # ==== Prepare the reference geometry ====
-        refgeo_Ysize = self.refgeo.GetRasterYSize()
-        refgeo_Xsize = self.refgeo.GetRasterXSize()
+        refgeo_Ysize = self.refgeo.get_y_size()
+        refgeo_Xsize = self.refgeo.get_x_size()
         self.ts = np.empty([refgeo_Ysize, refgeo_Xsize], dtype=object)
         # for j in range(self.ts.shape[0]):
         #     for i in range(self.ts.shape[1]):
@@ -682,7 +711,7 @@ def __new__(cls, dem=None, array=None, date=None, uncertainty=None):
 		if dem is not None:
 			# retrieve band 1 array, and then replace NoDataValue by np.nan
 			dem_array = dem.ReadAsArray()
-			dem_array[dem_array == dem.GetNoDataValue()] = np.nan
+			dem_array[dem_array == dem.get_nodata()] = np.nan
 			obj = np.asarray(dem_array).view(cls)
 			obj.date = [dem.date]
 			obj.uncertainty = [dem.uncertainty]
@@ -718,7 +747,7 @@ def AddDEM(self, dem):
 		self.uncertainty.append(dem.uncertainty)
 		# Add the first band, and then replace NoDataValue by np.nan
 		dem_array = dem.ReadAsArray()
-		dem_array[dem_array == dem.GetNoDataValue()] = np.nan
+		dem_array[dem_array == dem.get_nodata()] = np.nan
 		return TimeSeriesDEM(array=np.dstack([self, dem_array]), date=self.date, uncertainty=self.uncertainty)
 
 	def Date2DayDelta(self):
diff --git a/carst/libraster.py b/carst/libraster.py
index 2ff0c4b..b06fe4e 100644
--- a/carst/libraster.py
+++ b/carst/libraster.py
@@ -6,6 +6,7 @@
 # last edit: Doc 12 2016 (Change the self.uncertainty in __init__ and add ReadGeolocPoint method) 
 # last edit: Sep 11 2018 (added a new class: RasterVelos)
 # last edit: Mar 01 2021 (enable URL support)
+# last edit: Apr 21 2023 (using rasterio for fast COG processing without changing the input URL)
 
 import sys
 import os
@@ -32,6 +33,7 @@
 except ImportError:
     import gdal    # this was used until GDAL 3.1
 from osgeo import osr
+import rasterio
 # we assume the fpath is the file with .tif or .TIF suffix.
 
 try:
@@ -52,480 +54,498 @@ def time_wrapper(*args, **kwargs):
 
 class SingleRaster:
 
-	"""
-	DEM object. Provide operations like "Unify" (gdalwarp) and "GetPointsFromXYZ" (grdtrack).
-	Future improvement: detect I/O error, like this:
-
-	if not os.path.exists(image1_path):
-	print("\n***** WARNING: \"" + image1_path + "\" not found, make sure full path is provided, skipping corresponding pair...\n");
-	continue;
-	"""
-
-	def __init__(self, fpath, date=None, uncertainty=None):
-		self.fpath = fpath
-		self.uncertainty = float(uncertainty) if uncertainty is not None      else None
-		if type(date) is str:
-			if len(date) == 10:
-				self.date = datetime.strptime(date, '%Y-%m-%d')
-			elif len(date) == 8:
-				self.date = datetime.strptime(date, '%Y%m%d')
-			elif len(date) == 7:
-				self.date = datetime.strptime(date, '%y%b%d')  # YYMMMDD
-			else:
-				raise ValueError("This format of date string is not currently supported." +\
-				                 "Please use YYYY-MM-DD, YYYYMMDD, or YYMMMDD.")
-		elif type(date) is datetime:
-			self.date = date
-		elif date is None:
-			self.date = None
-		else:
-			raise TypeError('The date format is not currently supported.')
-		self.iscepointer = None
-
-	def SetPath(self, fpath):
-		self.fpath = fpath
-
-	def GetProjection(self):
-		ds = gdal.Open(self.fpath)
-		return ds.GetProjection()
-
-	def GetGeoTransform(self):
-		""" returns [ulx, xres, xskew, uly, yskew, yres] """
-		ds = gdal.Open(self.fpath)
-		return ds.GetGeoTransform()
-
-	def GetXRes(self):
-		ds = gdal.Open(self.fpath)
-		geot = ds.GetGeoTransform()
-		return geot[1]
-
-	def GetYRes(self):
-		ds = gdal.Open(self.fpath)
-		geot = ds.GetGeoTransform()
-		return geot[-1]
-
-	def GetNoDataValue(self, band=1):
-		ds = gdal.Open(self.fpath)
-		dsband = ds.GetRasterBand(band)
-		return dsband.GetNoDataValue()
-
-	def SetNoDataValue(self, nodata_val, band=1):
-		ds = gdal.Open(self.fpath, gdal.GA_Update)
-		dsband = ds.GetRasterBand(band)
-		dsband.SetNoDataValue(nodata_val)
-		dsband.FlushCache()
-		ds = dsband = None
-
-	def GetProj4(self):
-
-		"""
-		return projection as Proj.4 string.
-		"""
-
-		ds = gdal.Open(self.fpath)
-		wkt_text = ds.GetProjection()
-		srs = osr.SpatialReference()
-		srs.ImportFromWkt(wkt_text)
-		return srs.ExportToProj4()
-
-	def GetRasterXSize(self):
-		""" This is 'samples' of a image """
-		ds = gdal.Open(self.fpath)
-		return ds.RasterXSize
-
-	def GetRasterYSize(self):
-		""" This is 'lines' of a image """
-		ds = gdal.Open(self.fpath)
-		return ds.RasterYSize
-
-	def GetExtent(self):
-
-		"""
-		return extent: ul_x, ul_y, lr_x, lr_y. ul = upper left; lr = lower right.
-		"""
-
-		ds = gdal.Open(self.fpath)
-		ulx, xres, xskew, uly, yskew, yres  = ds.GetGeoTransform()
-		lrx = ulx + (ds.RasterXSize * xres)
-		lry = uly + (ds.RasterYSize * yres)
-		return ulx, uly, lrx, lry
-
-	def GetDataType(self, band=1):
-
-		"""
-		return DataType: http://www.gdal.org/gdal_8h.html#a22e22ce0a55036a96f652765793fb7a4
-		"""
-
-		ds = gdal.Open(self.fpath)
-		dsband = ds.GetRasterBand(band)
-		return dsband.DataType
-
-
-	def Unify(self, params):
-
-		""" Use gdalwarp to clip, reproject, and resample a DEM using a given set of params (which can 'unify' all DEMs). """
-
-		print('Calling Gdalwarp...')
-		fpath_warped = self.fpath[:-4] + '_warped' + self.fpath[-4:]
-		if 'output_dir' in params:
-			newpath = '/'.join([params['output_dir'], fpath_warped.split('/')[-1]])
-		else:
-			# for pixel tracking (temporarily)
-			newfolder = 'test_folder'
-			if not os.path.exists(newfolder):
-				os.makedirs(newfolder)
-			newpath = '/'.join([newfolder, fpath_warped.split('/')[-1]])
-			newpath = newpath.replace(newpath.split('.')[-1], 'img')
-		gdalwarp_cmd = 'gdalwarp -t_srs ' + params['t_srs'] + ' -tr ' + params['tr'] + ' -te ' + params['te']
-		if 'of' in params:
-			gdalwarp_cmd += ' -of ' + params['of']
-		if 'ot' in params:
-			gdalwarp_cmd += ' -ot ' + params['ot']
-		gdalwarp_cmd += ' -overwrite ' + self.fpath + ' ' + newpath
-		print(gdalwarp_cmd)
-		retcode = subprocess.call(gdalwarp_cmd, shell=True)
-		if retcode != 0:
-			print('Gdalwarp failed. Please check if all the input parameters are properly set.')
-			sys.exit(retcode)
-		self.fpath = newpath
-
-	def ReadGeolocPoint(self, x, y, band=1):
-
-		"""
-		It's almost the same as gdallocationinfo -geoloc srcfile x y
-		Read a point in the georeferencing system of the raster, and then return the pixel value at that point.
-		Returns NaN if (x, y) is not within the extent of the raster.
-		"""
-
-		ulx, uly, lrx, lry = self.GetExtent()
-		ulx, xres, xskew, uly, yskew, yres  = self.GetGeoTransform()
-		# print('xbound:', ulx, lrx, 'ybound:', uly, lry)
-		# if y > 220000:
-		# 	print(x, y, 'xbound:', ulx, lrx, 'ybound:', uly, lry)
-		nodatval = self.GetNoDataValue(band=band)
-		if nodatval is None:
-			nodatval = 0
-		
-		if (ulx <= x < lrx) & (uly >= y > lry):
-			ds = gdal.Open(self.fpath)
-			px = int((x - ulx) / xres) # x pixel coor
-			py = int((y - uly) / yres) # y pixel coor
-			dsband = ds.GetRasterBand(band)
-			pixel_val = dsband.ReadAsArray(px, py, 1, 1)   # it numpy.array with shape of (1, 1)
-			pixel_val = float(pixel_val)
-			# print('pixel_val: {}'.format(pixel_val))
-			# print('nodata: {}'.format(self.GetNoDataValue(band=band)))
-			# print('abs: {}'.format(abs(pixel_val - self.GetNoDataValue(band=band))))
-			if abs(pixel_val - nodatval) < 1:
-				pixel_val = np.nan
-			return pixel_val
-		else:
-			return np.nan
-
-	def ReadGeolocPoints(self, x, y, band=1):
-
-		"""
-		Batch routine for running ReadGeolocPoint many times.
-		x & y: 1-D array showing the (x, y) of many points.
-		Returns NaN if (x, y) is not within the extent of the raster.
-		"""
-
-		ulx, uly, lrx, lry = self.GetExtent()
-		ulx, xres, xskew, uly, yskew, yres  = self.GetGeoTransform()
-
-		x_in = np.logical_and(ulx <= x, x < lrx)
-		y_in = np.logical_and(uly >= y, y > lry)
-		xy_in = np.logical_and(x_in, y_in)
-
-		z = np.empty_like(x)
-		z[:] = np.nan
-		idx = np.where(xy_in)
-		ds = gdal.Open(self.fpath)
-		dsband = ds.GetRasterBand(band)
-
-		nodatval = self.GetNoDataValue(band=band)
-		if nodatval is None:
-			nodatval = 0
-
-		for i in idx[0]:
-			# print(x[i])
-			px = int((x[i] - ulx) / xres) # x pixel coor
-			py = int((y[i] - uly) / yres) # y pixel coor
-			pixel_val = dsband.ReadAsArray(px, py, 1, 1)   # it numpy.array with shape of (1, 1)
-			pixel_val = float(pixel_val)
-			if abs(pixel_val - nodatval) < 1:
-				pixel_val = np.nan
-			z[i] = pixel_val
-
-		return z
-
-
-	def ReadAsArray(self, band=1):
-
-		""" The default will return the first band. """
-
-		ds = gdal.Open(self.fpath)
-		dsband = ds.GetRasterBand(band)
-		return dsband.ReadAsArray()
-
-	def Array2Raster(self, array, refdem):
-
-		""" 
-		This is to write array to raster. Be cautious overwritting the old one! 
-		refdem (reference DEM) can be either a SingleRaster object or a gdal.Dataset object.
-		This method will use the projection and the geotransform values from the refdem for the new geotiff file.
-		"""
-
-		driver = gdal.GetDriverByName('GTiff')
-		# Saved in gdal 32-bit float geotiff format
-		out_raster = driver.Create(self.fpath, array.shape[1], array.shape[0], 1, gdal.GDT_Float32)
-		out_raster.SetGeoTransform( refdem.GetGeoTransform() )
-		out_raster.SetProjection(   refdem.GetProjection()   )
-		# Write data to band 1 (becuase this is a brand new file)
-		# Of course, set nan to NoDataValue
-		nodatavalue = refdem.GetNoDataValue() if refdem.GetNoDataValue() is not None else -9999.0
-		array[np.isnan(array)] = nodatavalue
-		out_raster.GetRasterBand(1).SetNoDataValue( nodatavalue )
-		out_raster.GetRasterBand(1).WriteArray(array)
-		# Save to file
-		out_raster.FlushCache()
-
-	def XYZArray2Raster(self, array, projection=''):
-
-		""" 
-		This is to write an xyzfile-like array to raster. Be cautious overwritting the old one! 
-		projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object.
-		This method will use the geotransform values from the xyzfile-like array itself; i.e. 
-		ul coordinate is (min(x), max(y)) and the spacing is unique(diff(unique(x))) and unique(diff(unique(y))).
-		The nodata value is not set.
-		"""
-
-		driver = gdal.GetDriverByName('GTiff')
-		# Saved in gdal 32-bit float geotiff format
-		xaxis = np.unique(array[:,0])
-		yaxis = np.unique(array[:,1])
-		out_raster = driver.Create(self.fpath, len(xaxis),  len(yaxis), 1, gdal.GDT_Float32)
-		# here I rounded the number to the 6th decimal to get the spacing, since a small error (around 1e-10)
-		# would generate when reading the ampcor-off file.
-		xspacing = np.unique(np.diff(xaxis).round(decimals=7))
-		yspacing = np.unique(np.diff(yaxis).round(decimals=7))
-		# print(xspacing)
-		# print(yspacing)
-		if len(xspacing) == 1 and len(yspacing) == 1:
-			# assuming no rotation in the Affine transform
-			new_geotransform = (min(xaxis), xspacing[0], 0, max(yaxis), 0, -yspacing[0])
-		else:
-			raise TypeError('There is someting wrong in XYZ array format. Check the consistency of the x- or y-spacing.')
-		out_raster.SetGeoTransform( new_geotransform )
-		out_raster.SetProjection(projection)
-		# out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue)
-		out_raster.GetRasterBand(1).WriteArray(np.reshape(array[:, 2], (len(yaxis), len(xaxis))))
-		out_raster.FlushCache()
-
-	def XYZ2Raster(self, xyzfilename, projection=''):
-
-		""" 
-		This is to write an xyzfile (ascii, sorted) to raster. Be cautious overwritting the old one! 
-		projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object.
-		This method will use the geotransform values from the xyzfile itself. (that is, the first data is at "ul" position.)
-		The nodata value is not set.
-		"""
-
-		driver = gdal.GetDriverByName('GTiff')
-		src_ds = gdal.Open(xyzfilename)
-		out_raster = driver.CreateCopy(self.fpath, src_ds, 0 )
-		out_raster.SetProjection(projection)
-		out_raster.SetGeoTransform(   src_ds.GetGeoTransform()   )
-		# out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue)
-		out_raster.FlushCache()
-
-
-	def GetPointsFromXYZ(self, xyzfilename):
-
-		"""
-		Get points from a xyzfile, using grdtrack. Return the output .xyz file. 
-		Currently the output .xyz file is fixed as 'log_getUncertaintyDEM_grdtrack_output.xyz'
-		and will be overwritten by later commands.
-		"""
-
-		print('Calling Grdtrack...')
-		newpath = 'log_getUncertaintyDEM_grdtrack_output.xyz'
-		grdtrack_cmd = 'grdtrack ' + xyzfilename +\
-					   ' -G' + self.fpath +\
-					   ' > ' + newpath
-		print(grdtrack_cmd)
-		retcode = subprocess.call(grdtrack_cmd, shell=True)
-		if retcode != 0:
-			print('Grdtrack failed. Please check if all the input parameters are properly set.')
-			sys.exit(retcode)
-		return newpath
-
-	def AmpcorPrep(self):
-
-		"""
-		Prepare to be used in ampcor.
-		Ampcor package in ISCE uses a special file pointer for accessing geotiff data.
-		Therefore, we need ISCE module "Image" for this purpose.
-		"""
-
-		import isce
-		from isceobj.Image.Image import Image
-
-		# ==== need a vrt file
-		# >= Python 3.4 
-		from pathlib import Path
-		vrtpath = Path(self.fpath + '.vrt')
-		if not vrtpath.is_file():
-			print('Calling gdalbuildvrt...')
-			if self.fpath.startswith('http'):
-				gdalbuildvrt_cmd = 'gdalbuildvrt ' + os.path.basename(self.fpath) + '.vrt ' + self.fpath
-			else:
-				gdalbuildvrt_cmd = 'gdalbuildvrt ' + self.fpath + '.vrt ' + self.fpath
-			print(gdalbuildvrt_cmd)
-			retcode = subprocess.call(gdalbuildvrt_cmd, shell=True)
-			if retcode != 0:
-				print('gdalbuildvrt failed. Please check if all the input parameters are properly set.')
-				sys.exit(retcode)
-		# ====================
-
-		obj = Image()
-		obj.setFilename(self.fpath)
-		obj.setWidth(self.GetRasterXSize())      # gdalinfo, first number
-		if self.GetDataType() <= 3:
-			obj.setDataType('SHORT')
-		elif 4 <= self.GetDataType() <= 5:
-			obj.setDataType('LONG')
-		elif self.GetDataType() == 6:
-			obj.setDataType('FLOAT')     
-		elif self.GetDataType() == 7:
-			obj.setDataType('DOUBLE') # SHORT, LONG, FLOAT, DOUBLE, etc
-		else:
-			obj.setDataType('CFLOAT') # not totally right, may be wrong
-		# obj.setBands(1)   # "self" requires a single band 
-		# obj.setAccessMode('read')
-		obj.setCaster('read', 'FLOAT')    # fixed as float
-		obj.createImage()
-		self.iscepointer = obj
-
-	def ClippedByPolygon(self, polygon_shapefile):
-
-		"""
-		return all pixel values within a given polygon shapefile.
-		according to
-		https://gis.stackexchange.com/questions/260304/extract-raster-values-within-shapefile-with-pygeoprocessing-or-gdal
-		"""
-		# from rasterio import logging
-		# log = logging.getLogger()
-		# log.setLevel(logging.ERROR)
-
-		import rasterio
-		from rasterio.mask import mask
-		import geopandas as gpd
-		from shapely.geometry import mapping
-
-		shapefile = gpd.read_file(polygon_shapefile)
-		geoms = shapefile.geometry.values
-		# geometry = geoms[0] # shapely geometry
-		# geoms = [mapping(geoms[0])] # transform to GeJSON format
-		geoms = [mapping(geoms[i]) for i in range(len(geoms))]
-		with rasterio.open(self.fpath) as src:
-			out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)
-			# The out_image result is a Numpy masked array
-			# no_data = src.nodata
-			# if no_data is None:
-			# 	no_data = 0.0
-			nodata = -9999.0
-		# print(out_image)
-		# print(out_image.data.shape)
-		# print(out_image.data)
-		try:
-			clipped_data = out_image.data[0]
-		except NotImplementedError:
-			clipped_data = out_image[0]
-		# PROBABLY HAVE TO CHANGE TO out_image[0] HERE
-		# extract the valid values
-		# and return them as a numpy 1-D array
-		# return np.extract(clipped_data != nodata, clipped_data)
-		return clipped_data
-
-	def GaussianHighPass(self, sigma=3, truncate=1.0):
-
-		"""
-		Gaussian High Pass filter. Default sigma = 3.
-		"""
-
-		from scipy.ndimage import gaussian_filter
-		data = self.ReadAsArray()
-		data = data.astype(float)
-		if self.GetNoDataValue() is not None:
-			data[data == self.GetNoDataValue()] = np.nan
-		else:
-			data[data == 0] = np.nan    # LS-8 case
-		lowpass = gaussian_filter(data, sigma, truncate=truncate)
-		highpass = data - lowpass
-		if self.fpath.startswith('http://') or self.fpath.startswith('https://'):
-			tmp = os.path.basename(self.fpath)
-			hp_raster_path = tmp.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif'
-		else:
-			hp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif'
-		hp_raster = SingleRaster(hp_raster_path)
-		hp_raster.Array2Raster(highpass, self)
-		self.SetPath(hp_raster_path)
-
-	def GaussianLowPass(self, sigma=1):
-
-		"""
-		Gaussian Low Pass filter. Default sigma = 1.
-		deal with nodata values.
-		https://stackoverflow.com/questions/18697532/gaussian-filtering-a-image-with-nan-in-python
-		"""
-
-		from scipy.ndimage import gaussian_filter
-		data = self.ReadAsArray()
-		# data[data == self.GetNoDataValue()] = np.nan
-		# lowpass = gaussian_filter(data.astype(float), sigma, truncate=truncate)
-		nodata_pos = data == self.GetNoDataValue()
-		data[nodata_pos] = 0
-		w = np.ones_like(data)
-		w[nodata_pos] = 0
-		vv = gaussian_filter(data.astype(float), sigma)
-		ww = gaussian_filter(w.astype(float), sigma)
-		ww[ww == 0] = np.finfo(float).eps
-		lowpass = vv / ww
-		lowpass[nodata_pos] = self.GetNoDataValue()
-		if self.fpath.startswith('http://') or self.fpath.startswith('https://'):
-			tmp = os.path.basename(self.fpath)
-			lp_raster_path = tmp.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif'
-		else:
-			lp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif'
-		lp_raster = SingleRaster(lp_raster_path)
-		lp_raster.Array2Raster(lowpass, self)
-		self.SetPath(lp_raster_path)
-
-	def CannyEdge(self, sigma=3):
-
-		"""
-		Canny Edge filter. Default sigma = 3.
-		deal with nodata values.
-		https://scikit-image.org/docs/dev/auto_examples/edges/plot_canny.html
-		"""
-
-		from skimage import feature
-		data = self.ReadAsArray()
-		nodata_pos = data == self.GetNoDataValue()
-		data[nodata_pos] = 0
-
-		edges = feature.canny(data, sigma=sigma)
-		edges[nodata_pos] = self.GetNoDataValue()
-
-		if self.fpath.startswith('http://') or self.fpath.startswith('https://'):
-			tmp = os.path.basename(self.fpath)
-			canny_raster_path = tmp.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif'
-		else:
-			canny_raster_path = self.fpath.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif'
-		canny_raster = SingleRaster(canny_raster_path)
-		canny_raster.Array2Raster(edges, self)
-		self.SetPath(canny_raster_path)
+    """
+    DEM object. Provide operations like "Unify" (gdalwarp) and "GetPointsFromXYZ" (grdtrack).
+    Future improvement: detect I/O error
+    """
+
+    def __init__(self, fpath, date=None, uncertainty=None):
+        self.fpath = fpath
+        self.uncertainty = float(uncertainty) if uncertainty is not None      else None
+        self.set_date(date)
+        self.iscepointer = None
+
+    def set_date(self, date):
+        if type(date) is str:
+            if len(date) == 10:
+                self.date = datetime.strptime(date, '%Y-%m-%d')
+            elif len(date) == 8:
+                self.date = datetime.strptime(date, '%Y%m%d')
+            elif len(date) == 7:
+                self.date = datetime.strptime(date, '%y%b%d')  # YYMMMDD
+            else:
+                raise ValueError("This format of date string is not currently supported." +\
+                                 "Please use YYYY-MM-DD, YYYYMMDD, or YYMMMDD.")
+        elif type(date) is datetime:
+            self.date = date
+        elif date is None:
+            self.date = None
+        else:
+            raise TypeError('The date format is not currently supported.')
+        
+    def set_path(self, fpath):
+        self.fpath = fpath
+
+    def GetProjection(self):
+        """
+        Still using Gdal.
+        """
+        ds = gdal.Open(self.fpath)
+        return ds.GetProjection()
+
+    def GetGeoTransform(self):
+        """ 
+        returns [ulx, xres, xskew, uly, yskew, yres] 
+        Still using Gdal.
+        """
+        ds = gdal.Open(self.fpath)
+        return ds.GetGeoTransform()
+
+    def get_x_res(self):
+        with rasterio.open(self.fpath) as src:
+            transform = src.transform
+            return transform.a
+
+    
+    def get_y_res(self):
+        with rasterio.open(self.fpath) as src:
+            transform = src.transform
+            return transform.e
+
+    def get_nodata(self):
+        with rasterio.open(self.fpath) as src:
+            return src.nodata
+
+    def SetNoDataValue(self, nodata_val, band=1):
+        """
+        Still using Gdal.
+        """
+        ds = gdal.Open(self.fpath, gdal.GA_Update)
+        dsband = ds.GetRasterBand(band)
+        dsband.SetNoDataValue(nodata_val)
+        dsband.FlushCache()
+        ds = dsband = None
+
+    def GetProj4(self):
+
+        """
+        return projection as Proj.4 string.
+        Still using Gdal.
+        """
+
+        ds = gdal.Open(self.fpath)
+        wkt_text = ds.GetProjection()
+        srs = osr.SpatialReference()
+        srs.ImportFromWkt(wkt_text)
+        return srs.ExportToProj4()
+
+    def get_x_size(self):
+        """ This is 'samples' of a image """
+        with rasterio.open(self.fpath) as src:
+            return src.width
+
+    def get_y_size(self):
+        """ This is 'lines' of a image """
+        with rasterio.open(self.fpath) as src:
+            return src.height
+
+    def get_extent(self):
+        """
+        return extent: ul_x, ul_y, lr_x, lr_y. ul = upper left; lr = lower right.
+        """
+        with rasterio.open(self.fpath) as src:
+            ulx, lry, lrx, uly = src.bounds
+        return ulx, uly, lrx, lry
+
+    def GetDataType(self, band=1):
+
+        """
+        return DataType: http://www.gdal.org/gdal_8h.html#a22e22ce0a55036a96f652765793fb7a4
+        Still using Gdal.
+        """
+
+        ds = gdal.Open(self.fpath)
+        dsband = ds.GetRasterBand(band)
+        return dsband.DataType
+
+
+    def Unify(self, params):
+
+        """ Use gdalwarp to clip, reproject, and resample a DEM using a given set of params (which can 'unify' all DEMs). """
+
+        print('Calling Gdalwarp...')
+        fpath_warped = self.fpath[:-4] + '_warped' + self.fpath[-4:]
+        if 'output_dir' in params:
+            newpath = '/'.join([params['output_dir'], fpath_warped.split('/')[-1]])
+        else:
+            # for pixel tracking (temporarily)
+            newfolder = 'test_folder'
+            if not os.path.exists(newfolder):
+                os.makedirs(newfolder)
+            newpath = '/'.join([newfolder, fpath_warped.split('/')[-1]])
+            newpath = newpath.replace(newpath.split('.')[-1], 'img')
+        gdalwarp_cmd = 'gdalwarp -t_srs ' + params['t_srs'] + ' -tr ' + params['tr'] + ' -te ' + params['te']
+        if 'of' in params:
+            gdalwarp_cmd += ' -of ' + params['of']
+        if 'ot' in params:
+            gdalwarp_cmd += ' -ot ' + params['ot']
+        gdalwarp_cmd += ' -overwrite ' + self.fpath + ' ' + newpath
+        print(gdalwarp_cmd)
+        retcode = subprocess.call(gdalwarp_cmd, shell=True)
+        if retcode != 0:
+            print('Gdalwarp failed. Please check if all the input parameters are properly set.')
+            sys.exit(retcode)
+        self.fpath = newpath
+
+    def ReadGeolocPoint(self, x, y, band=1):
+
+        """
+        It's almost the same as gdallocationinfo -geoloc srcfile x y
+        Read a point in the georeferencing system of the raster, and then return the pixel value at that point.
+        Returns NaN if (x, y) is not within the extent of the raster.
+        Still using Gdal.
+        """
+
+        ulx, uly, lrx, lry = self.get_extent()
+        ulx, xres, xskew, uly, yskew, yres  = self.GetGeoTransform()
+        # print('xbound:', ulx, lrx, 'ybound:', uly, lry)
+        # if y > 220000:
+        #   print(x, y, 'xbound:', ulx, lrx, 'ybound:', uly, lry)
+        nodatval = self.get_nodata()
+        if nodatval is None:
+            nodatval = 0
+        
+        if (ulx <= x < lrx) & (uly >= y > lry):
+            ds = gdal.Open(self.fpath)
+            px = int((x - ulx) / xres) # x pixel coor
+            py = int((y - uly) / yres) # y pixel coor
+            dsband = ds.GetRasterBand(band)
+            pixel_val = dsband.ReadAsArray(px, py, 1, 1)   # it numpy.array with shape of (1, 1)
+            pixel_val = float(pixel_val)
+            # print('pixel_val: {}'.format(pixel_val))
+            # print('nodata: {}'.format(self.get_nodata()))
+            # print('abs: {}'.format(abs(pixel_val - self.get_nodata())))
+            if abs(pixel_val - nodatval) < 1:
+                pixel_val = np.nan
+            return pixel_val
+        else:
+            return np.nan
+
+    def ReadGeolocPoints(self, x, y, band=1):
+
+        """
+        Batch routine for running ReadGeolocPoint many times.
+        x & y: 1-D array showing the (x, y) of many points.
+        Returns NaN if (x, y) is not within the extent of the raster.
+        Still using Gdal.
+        """
+
+        ulx, uly, lrx, lry = self.get_extent()
+        ulx, xres, xskew, uly, yskew, yres  = self.GetGeoTransform()
+
+        x_in = np.logical_and(ulx <= x, x < lrx)
+        y_in = np.logical_and(uly >= y, y > lry)
+        xy_in = np.logical_and(x_in, y_in)
+
+        z = np.empty_like(x)
+        z[:] = np.nan
+        idx = np.where(xy_in)
+        ds = gdal.Open(self.fpath)
+        dsband = ds.GetRasterBand(band)
+
+        nodatval = self.get_nodata()
+        if nodatval is None:
+            nodatval = 0
+
+        for i in idx[0]:
+            # print(x[i])
+            px = int((x[i] - ulx) / xres) # x pixel coor
+            py = int((y[i] - uly) / yres) # y pixel coor
+            pixel_val = dsband.ReadAsArray(px, py, 1, 1)   # it numpy.array with shape of (1, 1)
+            pixel_val = float(pixel_val)
+            if abs(pixel_val - nodatval) < 1:
+                pixel_val = np.nan
+            z[i] = pixel_val
+
+        return z
+
+
+    def ReadAsArray(self, band=1):
+
+        """ The default will return the first band. 
+        Still using Gdal.
+        """
+
+        ds = gdal.Open(self.fpath)
+        dsband = ds.GetRasterBand(band)
+        return dsband.ReadAsArray()
+
+    def Array2Raster(self, array, refdem):
+
+        """ 
+        This is to write array to raster. Be cautious overwritting the old one! 
+        refdem (reference DEM) can be either a SingleRaster object or a gdal.Dataset object.
+        This method will use the projection and the geotransform values from the refdem for the new geotiff file.
+        Using Gdal.
+        """
+
+        driver = gdal.GetDriverByName('GTiff')
+        # Saved in gdal 32-bit float geotiff format
+        out_raster = driver.Create(self.fpath, array.shape[1], array.shape[0], 1, gdal.GDT_Float32)
+        out_raster.SetGeoTransform( refdem.GetGeoTransform() )
+        out_raster.SetProjection(   refdem.GetProjection()   )
+        # Write data to band 1 (becuase this is a brand new file)
+        # Of course, set nan to NoDataValue
+        nodatavalue = refdem.get_nodata() if refdem.get_nodata() is not None else -9999.0
+        array[np.isnan(array)] = nodatavalue
+        out_raster.GetRasterBand(1).SetNoDataValue( nodatavalue )
+        out_raster.GetRasterBand(1).WriteArray(array)
+        # Save to file
+        out_raster.FlushCache()
+
+    def XYZArray2Raster(self, array, projection=''):
+
+        """ 
+        This is to write an xyzfile-like array to raster. Be cautious overwritting the old one! 
+        projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object.
+        This method will use the geotransform values from the xyzfile-like array itself; i.e. 
+        ul coordinate is (min(x), max(y)) and the spacing is unique(diff(unique(x))) and unique(diff(unique(y))).
+        The nodata value is not set.
+        Using Gdal.
+        """
+
+        driver = gdal.GetDriverByName('GTiff')
+        # Saved in gdal 32-bit float geotiff format
+        xaxis = np.unique(array[:,0])
+        yaxis = np.unique(array[:,1])
+        out_raster = driver.Create(self.fpath, len(xaxis),  len(yaxis), 1, gdal.GDT_Float32)
+        # here I rounded the number to the 6th decimal to get the spacing, since a small error (around 1e-10)
+        # would generate when reading the ampcor-off file.
+        xspacing = np.unique(np.diff(xaxis).round(decimals=7))
+        yspacing = np.unique(np.diff(yaxis).round(decimals=7))
+        # print(xspacing)
+        # print(yspacing)
+        if len(xspacing) == 1 and len(yspacing) == 1:
+            # assuming no rotation in the Affine transform
+            new_geotransform = (min(xaxis), xspacing[0], 0, max(yaxis), 0, -yspacing[0])
+        else:
+            raise TypeError('There is someting wrong in XYZ array format. Check the consistency of the x- or y-spacing.')
+        out_raster.SetGeoTransform( new_geotransform )
+        out_raster.SetProjection(projection)
+        # out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue)
+        out_raster.GetRasterBand(1).WriteArray(np.reshape(array[:, 2], (len(yaxis), len(xaxis))))
+        out_raster.FlushCache()
+
+    def XYZ2Raster(self, xyzfilename, projection=''):
+
+        """ 
+        This is to write an xyzfile (ascii, sorted) to raster. Be cautious overwritting the old one! 
+        projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object.
+        This method will use the geotransform values from the xyzfile itself. (that is, the first data is at "ul" position.)
+        The nodata value is not set.
+        Using Gdal.
+        """
+
+        driver = gdal.GetDriverByName('GTiff')
+        src_ds = gdal.Open(xyzfilename)
+        out_raster = driver.CreateCopy(self.fpath, src_ds, 0 )
+        out_raster.SetProjection(projection)
+        out_raster.SetGeoTransform(   src_ds.GetGeoTransform()   )
+        # out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue)
+        out_raster.FlushCache()
+
+
+    def GetPointsFromXYZ(self, xyzfilename):
+
+        """
+        Get points from a xyzfile, using grdtrack. Return the output .xyz file. 
+        Currently the output .xyz file is fixed as 'log_getUncertaintyDEM_grdtrack_output.xyz'
+        and will be overwritten by later commands.
+        Using Gdal.
+        """
+
+        print('Calling Grdtrack...')
+        newpath = 'log_getUncertaintyDEM_grdtrack_output.xyz'
+        grdtrack_cmd = 'grdtrack ' + xyzfilename +\
+                       ' -G' + self.fpath +\
+                       ' > ' + newpath
+        print(grdtrack_cmd)
+        retcode = subprocess.call(grdtrack_cmd, shell=True)
+        if retcode != 0:
+            print('Grdtrack failed. Please check if all the input parameters are properly set.')
+            sys.exit(retcode)
+        return newpath
+
+    def AmpcorPrep(self):
+
+        """
+        Prepare to be used in ampcor.
+        Ampcor package in ISCE uses a special file pointer for accessing geotiff data.
+        Therefore, we need ISCE module "Image" for this purpose.
+        Using Gdal.
+        """
+
+        import isce
+        from isceobj.Image.Image import Image
+
+        # ==== need a vrt file
+        # >= Python 3.4 
+        from pathlib import Path
+        vrtpath = Path(self.fpath + '.vrt')
+        if not vrtpath.is_file():
+            print('Calling gdalbuildvrt...')
+            if self.fpath.startswith('http'):
+                gdalbuildvrt_cmd = 'gdalbuildvrt ' + os.path.basename(self.fpath) + '.vrt ' + self.fpath
+            else:
+                gdalbuildvrt_cmd = 'gdalbuildvrt ' + self.fpath + '.vrt ' + self.fpath
+            print(gdalbuildvrt_cmd)
+            retcode = subprocess.call(gdalbuildvrt_cmd, shell=True)
+            if retcode != 0:
+                print('gdalbuildvrt failed. Please check if all the input parameters are properly set.')
+                sys.exit(retcode)
+        # ====================
+
+        obj = Image()
+        obj.setFilename(self.fpath)
+        obj.setWidth(self.get_x_size())      # gdalinfo, first number
+        if self.GetDataType() <= 3:
+            obj.setDataType('SHORT')
+        elif 4 <= self.GetDataType() <= 5:
+            obj.setDataType('LONG')
+        elif self.GetDataType() == 6:
+            obj.setDataType('FLOAT')     
+        elif self.GetDataType() == 7:
+            obj.setDataType('DOUBLE') # SHORT, LONG, FLOAT, DOUBLE, etc
+        else:
+            obj.setDataType('CFLOAT') # not totally right, may be wrong
+        # obj.setBands(1)   # "self" requires a single band 
+        # obj.setAccessMode('read')
+        obj.setCaster('read', 'FLOAT')    # fixed as float
+        obj.createImage()
+        self.iscepointer = obj
+
+    def ClippedByPolygon(self, polygon_shapefile):
+
+        """
+        return all pixel values within a given polygon shapefile.
+        according to
+        https://gis.stackexchange.com/questions/260304/extract-raster-values-within-shapefile-with-pygeoprocessing-or-gdal
+        """
+        # from rasterio import logging
+        # log = logging.getLogger()
+        # log.setLevel(logging.ERROR)
+
+        import rasterio
+        from rasterio.mask import mask
+        import geopandas as gpd
+        from shapely.geometry import mapping
+
+        shapefile = gpd.read_file(polygon_shapefile)
+        geoms = shapefile.geometry.values
+        # geometry = geoms[0] # shapely geometry
+        # geoms = [mapping(geoms[0])] # transform to GeJSON format
+        geoms = [mapping(geoms[i]) for i in range(len(geoms))]
+        with rasterio.open(self.fpath) as src:
+            out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)
+            # The out_image result is a Numpy masked array
+            # no_data = src.nodata
+            # if no_data is None:
+            #   no_data = 0.0
+            nodata = -9999.0
+        # print(out_image)
+        # print(out_image.data.shape)
+        # print(out_image.data)
+        try:
+            clipped_data = out_image.data[0]
+        except NotImplementedError:
+            clipped_data = out_image[0]
+        # PROBABLY HAVE TO CHANGE TO out_image[0] HERE
+        # extract the valid values
+        # and return them as a numpy 1-D array
+        # return np.extract(clipped_data != nodata, clipped_data)
+        return clipped_data
+
+    def GaussianHighPass(self, sigma=3, truncate=1.0):
+
+        """
+        Gaussian High Pass filter. Default sigma = 3.
+        Using Gdal.
+        """
+
+        from scipy.ndimage import gaussian_filter
+        data = self.ReadAsArray()
+        data = data.astype(float)
+        if self.get_nodata() is not None:
+            data[data == self.get_nodata()] = np.nan
+        else:
+            data[data == 0] = np.nan    # LS-8 case
+        lowpass = gaussian_filter(data, sigma, truncate=truncate)
+        highpass = data - lowpass
+        if self.fpath.startswith('http://') or self.fpath.startswith('https://'):
+            tmp = os.path.basename(self.fpath)
+            hp_raster_path = tmp.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif'
+        else:
+            hp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif'
+        hp_raster = SingleRaster(hp_raster_path)
+        hp_raster.Array2Raster(highpass, self)
+        self.set_path(hp_raster_path)
+
+    def GaussianLowPass(self, sigma=1):
+
+        """
+        Gaussian Low Pass filter. Default sigma = 1.
+        deal with nodata values.
+        https://stackoverflow.com/questions/18697532/gaussian-filtering-a-image-with-nan-in-python
+        Using Gdal.
+        """
+
+        from scipy.ndimage import gaussian_filter
+        data = self.ReadAsArray()
+        # data[data == self.get_nodata()] = np.nan
+        # lowpass = gaussian_filter(data.astype(float), sigma, truncate=truncate)
+        nodata_pos = data == self.get_nodata()
+        data[nodata_pos] = 0
+        w = np.ones_like(data)
+        w[nodata_pos] = 0
+        vv = gaussian_filter(data.astype(float), sigma)
+        ww = gaussian_filter(w.astype(float), sigma)
+        ww[ww == 0] = np.finfo(float).eps
+        lowpass = vv / ww
+        lowpass[nodata_pos] = self.get_nodata()
+        if self.fpath.startswith('http://') or self.fpath.startswith('https://'):
+            tmp = os.path.basename(self.fpath)
+            lp_raster_path = tmp.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif'
+        else:
+            lp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif'
+        lp_raster = SingleRaster(lp_raster_path)
+        lp_raster.Array2Raster(lowpass, self)
+        self.set_path(lp_raster_path)
+
+    def CannyEdge(self, sigma=3):
+
+        """
+        Canny Edge filter. Default sigma = 3.
+        deal with nodata values.
+        https://scikit-image.org/docs/dev/auto_examples/edges/plot_canny.html
+        Using Gdal.
+        """
+
+        from skimage import feature
+        data = self.ReadAsArray()
+        nodata_pos = data == self.get_nodata()
+        data[nodata_pos] = 0
+
+        edges = feature.canny(data, sigma=sigma)
+        edges[nodata_pos] = self.get_nodata()
+
+        if self.fpath.startswith('http://') or self.fpath.startswith('https://'):
+            tmp = os.path.basename(self.fpath)
+            canny_raster_path = tmp.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif'
+        else:
+            canny_raster_path = self.fpath.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif'
+        canny_raster = SingleRaster(canny_raster_path)
+        canny_raster.Array2Raster(edges, self)
+        self.set_path(canny_raster_path)
 
 
 
@@ -666,7 +686,7 @@ def VeloCorrection(self, vx_zarray, vy_zarray, output_raster_prefix):
 		vx_zarray and vy_zarray are ZArray objects.
 		'''
 
-		nodata_val = self.mag.GetNoDataValue()
+		nodata_val = self.mag.get_nodata()
 		nodata_pos = self.mag.ReadAsArray() == nodata_val
 
 		vx_val_corrected = self.vx.ReadAsArray() - vx_zarray.MAD_median
@@ -729,21 +749,21 @@ def VeloCorrection(self, vx_zarray, vy_zarray, output_raster_prefix):
 	def SNR_CutNoise(self, snr_threshold=5):
 		bad_pts = self.snr.ReadAsArray() <= snr_threshold
 		mag_val = self.mag.ReadAsArray()
-		mag_val[bad_pts] = self.mag.GetNoDataValue()
+		mag_val[bad_pts] = self.mag.get_nodata()
 		raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0]  + '_SNT.tif')
 		raster_mag_cutnoise.Array2Raster(mag_val, self.mag)
 		self.SetMag(raster_mag_cutnoise)
 
 	def Gaussian_CutNoise(self, sigma=1):
 		mag_val = self.mag.ReadAsArray()
-		mag_val_ok = Gussian_noise_remover(mag_val, sigma=sigma, nodata_val=self.mag.GetNoDataValue())
+		mag_val_ok = Gussian_noise_remover(mag_val, sigma=sigma, nodata_val=self.mag.get_nodata())
 		raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0]  + '-GAU.tif')
 		raster_mag_cutnoise.Array2Raster(mag_val_ok, self.mag)
 		self.SetMag(raster_mag_cutnoise)
 
 	def MorphoOpen_CutNoise(self, iterations=1):
 		mag_val = self.mag.ReadAsArray()
-		mag_val_ok = MorphoOpen_noise_remover(mag_val, nodata_val=self.mag.GetNoDataValue(), iterations=1)
+		mag_val_ok = MorphoOpen_noise_remover(mag_val, nodata_val=self.mag.get_nodata(), iterations=1)
 		raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0]  + '-MOR.tif')
 		raster_mag_cutnoise.Array2Raster(mag_val_ok, self.mag)
 		self.SetMag(raster_mag_cutnoise)
@@ -751,7 +771,7 @@ def MorphoOpen_CutNoise(self, iterations=1):
 	def SmallObjects_CutNoise(self, min_size=17):
 		from skimage.morphology import remove_small_objects
 		mag_val = self.mag.ReadAsArray()
-		mag_val_ok = SmallObjects_noise_remover(mag_val, nodata_val=self.mag.GetNoDataValue(), min_size=min_size)
+		mag_val_ok = SmallObjects_noise_remover(mag_val, nodata_val=self.mag.get_nodata(), min_size=min_size)
 		raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0]  + '-RSO.tif')
 		raster_mag_cutnoise.Array2Raster(mag_val_ok, self.mag)
 		self.SetMag(raster_mag_cutnoise)
@@ -766,29 +786,29 @@ def Fahnestock_CutNoise(self):
 
 	def MaskAllRasters(self):
 		mag_val = self.mag.ReadAsArray()
-		nodata = mag_val == self.mag.GetNoDataValue()
+		nodata = mag_val == self.mag.get_nodata()
 		vx_val = self.vx.ReadAsArray()
-		vx_val[nodata] = self.vx.GetNoDataValue()
+		vx_val[nodata] = self.vx.get_nodata()
 		raster_vx_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'vx'))
 		raster_vx_cutnoise.Array2Raster(vx_val, self.vx)
 		self.SetVx(raster_vx_cutnoise)
 		vy_val = self.vy.ReadAsArray()
-		vy_val[nodata] = self.vy.GetNoDataValue()
+		vy_val[nodata] = self.vy.get_nodata()
 		raster_vy_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'vy'))
 		raster_vy_cutnoise.Array2Raster(vy_val, self.vy)
 		self.SetVy(raster_vy_cutnoise)
 		errx_val = self.errx.ReadAsArray()
-		errx_val[nodata] = self.errx.GetNoDataValue()
+		errx_val[nodata] = self.errx.get_nodata()
 		raster_errx_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'errx'))
 		raster_errx_cutnoise.Array2Raster(errx_val, self.errx)
 		self.SetErrx(raster_errx_cutnoise)
 		erry_val = self.erry.ReadAsArray()
-		erry_val[nodata] = self.erry.GetNoDataValue()
+		erry_val[nodata] = self.erry.get_nodata()
 		raster_erry_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'erry'))
 		raster_erry_cutnoise.Array2Raster(erry_val, self.erry)
 		self.SetErry(raster_erry_cutnoise)
 		errmag_val = self.errmag.ReadAsArray()
-		errmag_val[nodata] = self.errmag.GetNoDataValue()
+		errmag_val[nodata] = self.errmag.get_nodata()
 		raster_errmag_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'errmag'))
 		raster_errmag_cutnoise.Array2Raster(errmag_val, self.errmag)
 		self.SetErrmag(raster_errmag_cutnoise)

From 9637faa070054757bf21b4e4560dfaed0efa9020 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 22 Apr 2023 01:06:41 +0000
Subject: [PATCH 09/24] Organize libdhdt

---
 carst/libdhdt.py               | 320 ++-------------------------------
 extra/unused/Resample_Array.py |  67 +++++++
 extra/unused/TimeSeriesDEM.py  | 137 ++++++++++++++
 extra/unused/wlr.py            |  70 ++++++++
 4 files changed, 290 insertions(+), 304 deletions(-)
 create mode 100644 extra/unused/Resample_Array.py
 create mode 100644 extra/unused/TimeSeriesDEM.py
 create mode 100644 extra/unused/wlr.py

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 47685dd..1fb8337 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -1,23 +1,22 @@
-# Class: TimeSeriesDEM(np.ndarray)
-# Func: Resample_Array(UtilRaster.SingleRaster, UtilRaster.SingleRaster)
-# used for dhdt
+# Class: PixelTimeSeries, DemPile
+# Func: timeit, resample_array, EVMD group, wlr_corefun, onclick_ipynb
 # by Whyjay Zheng, Jul 27 2016
 # last edit: Apr 17 2023
 
 import numpy as np
 from numpy.linalg import inv
 import os
-import sys
+# import sys
 try:
-    import gdal
-except:
-    from osgeo import gdal        # sometimes gdal is part of osgeo modules
+    from osgeo import gdal
+except ImportError:
+    import gdal    # this was used until GDAL 3.1
 from datetime import datetime
 from shapely.geometry import Polygon
-from scipy.interpolate import interp2d
-from scipy.interpolate import NearestNDInterpolator
-from scipy.interpolate import griddata
-import gc
+# from scipy.interpolate import interp2d
+# from scipy.interpolate import NearestNDInterpolator
+# from scipy.interpolate import griddata
+# import gc
 from carst import ConfParams
 from carst.libraster import SingleRaster
 import pickle
@@ -37,6 +36,7 @@ def time_wrapper(*args, **kwargs):
 
 def resample_array(source, reference, method='bilinear', destination=None):
     """
+    latest version. 
     resample the source raster using the extent and spacing provided by the reference raster. Two rasters must be in the same CRS.
     
     source: class UtilRaster.SingleRaster object
@@ -46,7 +46,7 @@ def resample_array(source, reference, method='bilinear', destination=None):
     returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster.
 
     For better COG processing efficiency, we rasterio to check the if the source and the reference intersect each other.
-    For fasting resampling, we use GDAL and it C library.
+    For fasting resampling, we use GDAL and its C library.
     """
     
     s_ulx, s_uly, s_lrx, s_lry = source.get_extent()
@@ -64,93 +64,12 @@ def resample_array(source, reference, method='bilinear', destination=None):
     else:
         return np.full((reference.get_y_size(), reference.get_x_size()), reference.get_nodata())
 
-def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'):
-
-    """
-    resample orig_dem using the extent and spacing provided by resamp_ref_dem
-    orig_dem: class UtilRaster.SingleRaster object
-    resamp_ref_dem: class UtilRaster.SingleRaster object
-    returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster
-
-    This one uses gdal.Warp API.
-    """
-
-    ds = gdal.Open(orig_dem.fpath)
-    ulx, uly, lrx, lry = resamp_ref_dem.get_extent()
-    opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.get_x_res(), yRes=resamp_ref_dem.get_y_res(), resampleAlg=resampleAlg)
-    out_ds = gdal.Warp('tmp.tif', ds, options=opts)
-    return out_ds.GetRasterBand(1).ReadAsArray()
-
-
-def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
-
-	"""
-	resample orig_dem using the extent and spacing provided by resamp_ref_dem
-	orig_dem: class UtilRaster.SingleRaster object
-	resamp_ref_dem: class UtilRaster.SingleRaster object
-	returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster
-
-	Uses linear interpolation because it best represent flat ice surface.
-	-9999.0 (default nan in a Geotiff) is used to fill area outside the extent of orig_dem.
-
-	resamp_method: 'linear', 'cubic', 'quintic', 'nearest'
-
-	"""
-	o_ulx, o_uly, o_lrx, o_lry = orig_dem.get_extent()
-	o_ulx, o_xres, o_xskew, o_uly, o_yskew, o_yres = orig_dem.GetGeoTransform()
-	orig_dem_extent = Polygon([(o_ulx, o_uly), (o_lrx, o_uly), (o_lrx, o_lry), (o_ulx, o_lry)])
-	ulx, uly, lrx, lry = resamp_ref_dem.get_extent()
-	ulx, xres, xskew, uly, yskew, yres = resamp_ref_dem.GetGeoTransform()
-	resamp_ref_dem_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)])
-	if orig_dem_extent.intersects(resamp_ref_dem_extent):
-		x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.get_x_size())
-		y = np.linspace(o_uly, o_lry - o_yres, orig_dem.get_y_size())
-		z = orig_dem.ReadAsArray()
-		# z[z == orig_dem.get_nodata()] = np.nan    # this line probably needs improvement
-		if resamp_method == 'nearest':
-			print('resampling method = nearest')
-			xx, yy = np.meshgrid(x, y)
-			points = np.stack((np.reshape(xx, xx.size), np.reshape(yy, yy.size)), axis=-1)
-			values = np.reshape(z, z.size)
-			fun = NearestNDInterpolator(points, values)
-			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
-			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
-			xxnew, yynew = np.meshgrid(xnew, ynew)
-			znew = fun(xxnew, yynew)    # if the image is big, this may take a long time (much longer than linear approach)
-		elif resamp_method == 'linear':
-			nan_value = orig_dem.get_nodata()
-			print('resampling method = interp2d - ' + resamp_method)
-			fun = interp2d(x, y, z, kind=resamp_method, bounds_error=False, copy=False, fill_value=nan_value)
-			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
-			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
-			znew = np.flipud(fun(xnew, ynew))    # I don't know why, but it seems f(xnew, ynew) is upside down.
-			fun2 = interp2d(x, y, z != nan_value, kind=resamp_method, bounds_error=False, copy=False, fill_value=0)
-			zmask = np.flipud(fun2(xnew, ynew))
-			znew[zmask <= 0.999] = nan_value
-		else:
-			# This is deprecated...
-			print('resampling method = griddata - ' + resamp_method)
-			xx, yy = np.meshgrid(x, y)
-			xx = xx.ravel()
-			yy = yy.ravel()
-			zz = z.ravel()
-			realdata_pos = zz != orig_dem.get_nodata()
-			xx = xx[realdata_pos]
-			yy = yy[realdata_pos]
-			zz = zz[realdata_pos]
-			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
-			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
-			znew = griddata((xx, yy), zz, (xnew[None,:], ynew[:,None]), method='linear')
-		del z
-		gc.collect()
-		return znew
-	else:
-		return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0
 
     
 def EVMD_DBSCAN(x, y, eps=6, min_samples=4):
     """
     See EVMD & EVMD_idx.
+    Recommended to use this inreplace of EVMD and EVMD_idx.
     """
     # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration.
     x2d = np.column_stack((x / 365, y))
@@ -269,80 +188,6 @@ def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
                 return slope, slope_err, resid, count
 
 
-		# ============ Using Cook's Distance ============
-		# cookd     = ri2 / (p_num * mse) * (h / (1 - h) ** 2)    # Cook's Distance
-		# goodpoint = cookd < 4 / x.size                          # Bollen, Kenneth A.; Jackman, Robert W. (1990) (see wikipedia)
-		# # xnew = x[goodpoint]
-		# # ynew = y[goodpoint]
-		# # yenew = ye[goodpoint]
-		# # wnew = [1 / k for k in yenew]
-		# if np.all(goodpoint):
-		# 	slope = p[0] * 365.25
-		# 	slope_err = np.sqrt(cov_m[0, 0]) * 365.25
-		# 	count = x.size
-		# 	if resid > 100000:
-		# 		print(x,y,ye,cookd,goodpoint)
-		# 	return slope, slope_err, resid, count
-		# else:
-		# 	xnew = x[goodpoint]
-		# 	ynew = y[goodpoint]
-		# 	yenew = ye[goodpoint]
-		# 	return wlr_corefun(xnew, ynew, yenew)
-
-	# =========== Old function ===========
-	# if xnew.size > 4:
-	# 	p, c            = np.polyfit(xnew, ynew, 1, w=wnew, cov=True)
-	# 	_, res, _, _, _ = np.polyfit(xnew, ynew, 1, w=wnew, full=True)
-	# 	residual = np.sum((np.polyval(p, xnew) - ynew) ** 2)
-	# 	cov_m = c * (len(w) - 4) / res
-	# 	error =  np.sqrt(cov_m[0, 0])
-	# 	slope = p[0] * 365.25
-	# 	slope_err = np.sqrt(cov_m[0, 0]) * 365.25
-	# else:
-	# 	error = -9999.0
-	# 	slope = -9999.0
-	# 	slope_err = -9999.0
-
-	# case = 0
-	# w = [1 / k for k in ye]
-	# if len(x) == 4:
-	# 	case = 1
-	# 	# This is to avoid dividing by zero when N = 4 and to give us a better error estimate
-	# 	x  = np.append(x,   x[-1])
-	# 	y  = np.append(y,   y[-1])
-	# 	ye = np.append(ye, ye[-1])
-	# 	w  = np.append(w, sys.float_info.epsilon)
-	# elif len(x) == 3:
-	# 	case = 2
-	# 	# This is to avoid negative Cd^2 when N = 3 and to give us a better error estimate
-	# 	x  = np.append(x,  [x[-1],  x[-1]])
-	# 	y  = np.append(y,  [y[-1],  y[-1]])
-	# 	ye = np.append(ye, [ye[-1], ye[-1]])
-	# 	w  = np.append(w,  [sys.float_info.epsilon, sys.float_info.epsilon])
-	# p, c            = np.polyfit(x, y, 1, w=w, cov=True)
-	# _, res, _, _, _ = np.polyfit(x, y, 1, w=w, full=True)
-	# # where c is the covariance matrix of p -> c[0, 0] is the variance estimate of the slope.
-	# # what we want is ({G_w}^T G_w)^{-1}, which is equal to c * (N - m - 2) / res
-	# cov_m = c * (len(w) - 4) / res
-	# slope = p[0] * 365.25
-	# slope_err = np.sqrt(cov_m[0, 0]) * 365.25
-	# # slope_error_arr[m, n] = np.sqrt(c[0, 0]) * 365.25
-	# # ./point_TS_ver2-2_linreg.py:^^^^^^^^: RuntimeWarning: invalid value encountered in sqrt
-	# # /data/whyj/Software/anaconda3/lib/python3.5/site-packages/numpy/lib/polynomial.py:606: RuntimeWarning: divide by zero encountered in true_divide
-	# # fac = resids / (len(x) - order - 2.0)
-	# if case == 0:
-	# 	residual = np.sum((np.polyval(p, x) - y) ** 2)
-	# elif case == 1:
-	# 	residual = np.sum((np.polyval(p, x[:-1]) - y[:-1]) ** 2)
-	# elif case == 2:
-	# 	residual = np.sum((np.polyval(p, x[:-2]) - y[:-2]) ** 2)
-	# return slope, slope_err, residual
-
-
-
-
-
-
 class PixelTimeSeries(object):
     """
     Store all measurements of a single pixel in the reference DEM.
@@ -398,7 +243,7 @@ class DemPile(object):
     is stored in the intermediate pickle file. 
     """
 
-    def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None):
+    def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None, evmd_threshold=6):
         self.picklepath = picklepath
         self.dhdtprefix = dhdtprefix
         self.ts = None
@@ -413,7 +258,7 @@ def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None):
         self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []}
         self.mosaic = {'value': [], 'date': [], 'uncertainty': []}
         self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0}
-        self.evmd_threshold = 6
+        self.evmd_threshold = evmd_threshold
 
     def AddDEM(self, dems):
         # ==== Add DEM object list ====
@@ -488,7 +333,7 @@ def PileUp(self):
             if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']:
                 datedelta = self.dems[i].date - self.refdate
                 # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear')
-                znew = Resample_Array2(self.dems[i], self.refgeo)
+                znew = resample_array(self.dems[i], self.refgeo)
                 znew_mask = np.logical_and(znew > 0, self.refgeomask)
                 fill_idx = np.where(znew_mask)
                 for m,n in zip(fill_idx[0], fill_idx[1]):
@@ -680,137 +525,4 @@ def onclick_ipynb(event):
 
 
 
-class TimeSeriesDEM(np.ndarray):
-
-	"""
-	DEPRECATED IN DHDT v1.0.
-
-	This class can include many DEMs (in UtilRaster.SingleRaster object) and then create a 3-D matrix.
-	each DEM is aligned along z-axis so that TimeSeriesDEM[m, n, :] would be the time series at pixel (m, n).
-	"""
-
-	def __new__(cls, dem=None, array=None, date=None, uncertainty=None):
-
-		""" 
-		You need to provide a UtilRaster.SingleRaster object, or provide array, date, and uncertainty separately.
-		TimeSeriesDEM is a n-m-k ndarray matrix, which n-m is the dem dimension, k is the count of dems.
-		You need to make sure all input dems have the same size (pixel number).
-		TimeSeriesDEM.date: a list of datetime object, which the length is k.
-		TimeSeriesDEM.uncertainty: a list of uncertainty for each DEM. The length is also k.
-
-		example:
-		tsdem = TimeSeriesDEM(dem=foo)  ---> Add single DEM.  the method "AddDEM" also does the trick after tsdem is created.
-		tsdem = TimeSeriesDEM(array=bar, date=bar2, uncertainty=bar3)   ---> Add single DEMs or multiple DEMs.
-		        Note that bar.shape[2] == len(bar2) == len(bar3)
-
-		Refered to
-		http://docs.scipy.org/doc/numpy/user/basics.subclassing.html 
-		http://stackoverflow.com/questions/27910013/how-can-a-class-that-inherits-from-a-numpy-array-change-its-own-values
-		"""
-
-		if dem is not None:
-			# retrieve band 1 array, and then replace NoDataValue by np.nan
-			dem_array = dem.ReadAsArray()
-			dem_array[dem_array == dem.get_nodata()] = np.nan
-			obj = np.asarray(dem_array).view(cls)
-			obj.date = [dem.date]
-			obj.uncertainty = [dem.uncertainty]
-		elif all([arg is not None for arg in [array, date, uncertainty]]):
-			obj = np.asarray(array).view(cls)
-			obj.date = date if type(date) is list else [date]
-			obj.uncertainty = uncertainty if type(uncertainty) is list else [uncertainty]
-		else:
-			raise ValueError('You need to either set "dem" or set "array, date and uncertainty".') 
-		obj.daydelta = None
-		obj.weight = None
-		return obj
-
-	def __array_finalize__(self, obj):
-
-		""" See TimeSeriesDEM.__new__ for comments """
-
-		if obj is None: return
-		self.date = getattr(obj, 'date', None)
-		self.uncertainty = getattr(obj, 'uncertainty', None)
-		self.daydelta = getattr(obj, 'daydelta', None)
-		self.weight = getattr(obj, 'weight', None)
-
-
-	def AddDEM(self, dem):
-
-		"""
-		Add a new DEM to the DEM time series.
-		dem is a UtilRaster.SingleRaster object.
-		"""
-
-		self.date.append(dem.date)
-		self.uncertainty.append(dem.uncertainty)
-		# Add the first band, and then replace NoDataValue by np.nan
-		dem_array = dem.ReadAsArray()
-		dem_array[dem_array == dem.get_nodata()] = np.nan
-		return TimeSeriesDEM(array=np.dstack([self, dem_array]), date=self.date, uncertainty=self.uncertainty)
-
-	def Date2DayDelta(self):
-
-		"""
-		Make self.daydelta from [self.date - min(self.date)]
-		"""
-
-		t = np.array(self.date) - min(self.date)
-		self.daydelta = np.array([i.days for i in t])
-
-	def SetWeight(self):
-
-		"""
-		Weight is set to 1/sigma^2
-		"""
-
-		self.weight = 1.0 / np.array(self.uncertainty) ** 2
-
-	def Polyfit(self, min_count=5, min_time_span=365, min_year=2000, max_year=2016):
-
-		"""
-		Note that x and w are all 1-d array like, with the same length of the third dimension of the reg_array.
-		w is the weight, which is often set to the inverse of data covariance matrix, C_d^-1
-		Here w must have the same length of x.
-		"""
-
-		reg_size = list(self.shape)
-		pixel_count = reg_size[0] * reg_size[1]
-
-		y = self.reshape(pixel_count, reg_size[2]).T
-		slope         = np.zeros(pixel_count)
-		slope_err     = np.zeros(pixel_count)
-		intercept     = np.zeros(pixel_count)
-		intercept_err = np.zeros(pixel_count)
-		for i in range(y.shape[1]):
-			if i % 10000 == 0:
-				print("processing " + str(i) + " pixels out of " + str(y.shape[1]) + " pixels")
-			px_y = y[:, i]
-			valid_idx = ~np.isnan(px_y)
-			# judge if a pixel is able to do regression using the given arguments
-			minlim_idx = np.array(self.date) > datetime(min_year, 1, 1)
-			maxlim_idx = np.array(self.date) < datetime(max_year, 1, 1)
-			valid_idx = valid_idx & minlim_idx & maxlim_idx
-			if sum(valid_idx) < min_count:
-				slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan
-			elif max(self.daydelta[valid_idx]) - min(self.daydelta[valid_idx]) < min_time_span:
-				slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan
-			# begin the polyfits
-			else:
-				px_y = px_y[valid_idx]
-				px_x = self.daydelta[valid_idx]
-				px_w = self.weight[valid_idx]
-				# This method minimizes sum(w * (y_i - y^hat_i) ^ 2)
-				#    Here we set w=np.sqrt(px_w) becuase np.polyfit minimizes sum( (w * (y_i - y^hat_i)) ^ 2) by default.
-				# Covariance is estimated from multivariate t-distribution.
-				# Comparing to the v0.1 version, this new method is slightly more conservative
-				#    because it considers the case of small d.o.f.
-				p, c = np.polyfit(px_x, px_y, 1, w=np.sqrt(px_w), cov=True)
-				slope[i]         = p[0]
-				slope_err[i]     = np.sqrt(c[0, 0])
-				intercept[i]     = p[1]
-				intercept_err[i] = np.sqrt(c[1, 1])
-
-		return slope.reshape(reg_size[:-1]), intercept.reshape(reg_size[:-1]), slope_err.reshape(reg_size[:-1]), intercept_err.reshape(reg_size[:-1])
 
diff --git a/extra/unused/Resample_Array.py b/extra/unused/Resample_Array.py
new file mode 100644
index 0000000..06bf972
--- /dev/null
+++ b/extra/unused/Resample_Array.py
@@ -0,0 +1,67 @@
+# Old code to resample array.
+# A newer version is in libdhdt.py.
+
+def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'):
+
+	"""
+	resample orig_dem using the extent and spacing provided by resamp_ref_dem
+	orig_dem: class UtilRaster.SingleRaster object
+	resamp_ref_dem: class UtilRaster.SingleRaster object
+	returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster
+
+	Uses linear interpolation because it best represent flat ice surface.
+	-9999.0 (default nan in a Geotiff) is used to fill area outside the extent of orig_dem.
+
+	resamp_method: 'linear', 'cubic', 'quintic', 'nearest'
+
+	"""
+	o_ulx, o_uly, o_lrx, o_lry = orig_dem.get_extent()
+	o_ulx, o_xres, o_xskew, o_uly, o_yskew, o_yres = orig_dem.GetGeoTransform()
+	orig_dem_extent = Polygon([(o_ulx, o_uly), (o_lrx, o_uly), (o_lrx, o_lry), (o_ulx, o_lry)])
+	ulx, uly, lrx, lry = resamp_ref_dem.get_extent()
+	ulx, xres, xskew, uly, yskew, yres = resamp_ref_dem.GetGeoTransform()
+	resamp_ref_dem_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)])
+	if orig_dem_extent.intersects(resamp_ref_dem_extent):
+		x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.get_x_size())
+		y = np.linspace(o_uly, o_lry - o_yres, orig_dem.get_y_size())
+		z = orig_dem.ReadAsArray()
+		# z[z == orig_dem.get_nodata()] = np.nan    # this line probably needs improvement
+		if resamp_method == 'nearest':
+			print('resampling method = nearest')
+			xx, yy = np.meshgrid(x, y)
+			points = np.stack((np.reshape(xx, xx.size), np.reshape(yy, yy.size)), axis=-1)
+			values = np.reshape(z, z.size)
+			fun = NearestNDInterpolator(points, values)
+			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
+			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
+			xxnew, yynew = np.meshgrid(xnew, ynew)
+			znew = fun(xxnew, yynew)    # if the image is big, this may take a long time (much longer than linear approach)
+		elif resamp_method == 'linear':
+			nan_value = orig_dem.get_nodata()
+			print('resampling method = interp2d - ' + resamp_method)
+			fun = interp2d(x, y, z, kind=resamp_method, bounds_error=False, copy=False, fill_value=nan_value)
+			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
+			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
+			znew = np.flipud(fun(xnew, ynew))    # I don't know why, but it seems f(xnew, ynew) is upside down.
+			fun2 = interp2d(x, y, z != nan_value, kind=resamp_method, bounds_error=False, copy=False, fill_value=0)
+			zmask = np.flipud(fun2(xnew, ynew))
+			znew[zmask <= 0.999] = nan_value
+		else:
+			# This is deprecated...
+			print('resampling method = griddata - ' + resamp_method)
+			xx, yy = np.meshgrid(x, y)
+			xx = xx.ravel()
+			yy = yy.ravel()
+			zz = z.ravel()
+			realdata_pos = zz != orig_dem.get_nodata()
+			xx = xx[realdata_pos]
+			yy = yy[realdata_pos]
+			zz = zz[realdata_pos]
+			xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size())
+			ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size())
+			znew = griddata((xx, yy), zz, (xnew[None,:], ynew[:,None]), method='linear')
+		del z
+		gc.collect()
+		return znew
+	else:
+		return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0
\ No newline at end of file
diff --git a/extra/unused/TimeSeriesDEM.py b/extra/unused/TimeSeriesDEM.py
new file mode 100644
index 0000000..d9ab888
--- /dev/null
+++ b/extra/unused/TimeSeriesDEM.py
@@ -0,0 +1,137 @@
+# TimeSeriesDEM is the predecessor of DemPile. 
+# It is a subclass of np.ndarray, but when the reference area is much bigger than the extent of each DEM,
+# This method will take huge unnecessary memory space.
+
+class TimeSeriesDEM(np.ndarray):
+
+	"""
+	DEPRECATED IN DHDT v1.0.
+
+	This class can include many DEMs (in UtilRaster.SingleRaster object) and then create a 3-D matrix.
+	each DEM is aligned along z-axis so that TimeSeriesDEM[m, n, :] would be the time series at pixel (m, n).
+	"""
+
+	def __new__(cls, dem=None, array=None, date=None, uncertainty=None):
+
+		""" 
+		You need to provide a UtilRaster.SingleRaster object, or provide array, date, and uncertainty separately.
+		TimeSeriesDEM is a n-m-k ndarray matrix, which n-m is the dem dimension, k is the count of dems.
+		You need to make sure all input dems have the same size (pixel number).
+		TimeSeriesDEM.date: a list of datetime object, which the length is k.
+		TimeSeriesDEM.uncertainty: a list of uncertainty for each DEM. The length is also k.
+
+		example:
+		tsdem = TimeSeriesDEM(dem=foo)  ---> Add single DEM.  the method "AddDEM" also does the trick after tsdem is created.
+		tsdem = TimeSeriesDEM(array=bar, date=bar2, uncertainty=bar3)   ---> Add single DEMs or multiple DEMs.
+		        Note that bar.shape[2] == len(bar2) == len(bar3)
+
+		Refered to
+		http://docs.scipy.org/doc/numpy/user/basics.subclassing.html 
+		http://stackoverflow.com/questions/27910013/how-can-a-class-that-inherits-from-a-numpy-array-change-its-own-values
+		"""
+
+		if dem is not None:
+			# retrieve band 1 array, and then replace NoDataValue by np.nan
+			dem_array = dem.ReadAsArray()
+			dem_array[dem_array == dem.get_nodata()] = np.nan
+			obj = np.asarray(dem_array).view(cls)
+			obj.date = [dem.date]
+			obj.uncertainty = [dem.uncertainty]
+		elif all([arg is not None for arg in [array, date, uncertainty]]):
+			obj = np.asarray(array).view(cls)
+			obj.date = date if type(date) is list else [date]
+			obj.uncertainty = uncertainty if type(uncertainty) is list else [uncertainty]
+		else:
+			raise ValueError('You need to either set "dem" or set "array, date and uncertainty".') 
+		obj.daydelta = None
+		obj.weight = None
+		return obj
+
+	def __array_finalize__(self, obj):
+
+		""" See TimeSeriesDEM.__new__ for comments """
+
+		if obj is None: return
+		self.date = getattr(obj, 'date', None)
+		self.uncertainty = getattr(obj, 'uncertainty', None)
+		self.daydelta = getattr(obj, 'daydelta', None)
+		self.weight = getattr(obj, 'weight', None)
+
+
+	def AddDEM(self, dem):
+
+		"""
+		Add a new DEM to the DEM time series.
+		dem is a UtilRaster.SingleRaster object.
+		"""
+
+		self.date.append(dem.date)
+		self.uncertainty.append(dem.uncertainty)
+		# Add the first band, and then replace NoDataValue by np.nan
+		dem_array = dem.ReadAsArray()
+		dem_array[dem_array == dem.get_nodata()] = np.nan
+		return TimeSeriesDEM(array=np.dstack([self, dem_array]), date=self.date, uncertainty=self.uncertainty)
+
+	def Date2DayDelta(self):
+
+		"""
+		Make self.daydelta from [self.date - min(self.date)]
+		"""
+
+		t = np.array(self.date) - min(self.date)
+		self.daydelta = np.array([i.days for i in t])
+
+	def SetWeight(self):
+
+		"""
+		Weight is set to 1/sigma^2
+		"""
+
+		self.weight = 1.0 / np.array(self.uncertainty) ** 2
+
+	def Polyfit(self, min_count=5, min_time_span=365, min_year=2000, max_year=2016):
+
+		"""
+		Note that x and w are all 1-d array like, with the same length of the third dimension of the reg_array.
+		w is the weight, which is often set to the inverse of data covariance matrix, C_d^-1
+		Here w must have the same length of x.
+		"""
+
+		reg_size = list(self.shape)
+		pixel_count = reg_size[0] * reg_size[1]
+
+		y = self.reshape(pixel_count, reg_size[2]).T
+		slope         = np.zeros(pixel_count)
+		slope_err     = np.zeros(pixel_count)
+		intercept     = np.zeros(pixel_count)
+		intercept_err = np.zeros(pixel_count)
+		for i in range(y.shape[1]):
+			if i % 10000 == 0:
+				print("processing " + str(i) + " pixels out of " + str(y.shape[1]) + " pixels")
+			px_y = y[:, i]
+			valid_idx = ~np.isnan(px_y)
+			# judge if a pixel is able to do regression using the given arguments
+			minlim_idx = np.array(self.date) > datetime(min_year, 1, 1)
+			maxlim_idx = np.array(self.date) < datetime(max_year, 1, 1)
+			valid_idx = valid_idx & minlim_idx & maxlim_idx
+			if sum(valid_idx) < min_count:
+				slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan
+			elif max(self.daydelta[valid_idx]) - min(self.daydelta[valid_idx]) < min_time_span:
+				slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan
+			# begin the polyfits
+			else:
+				px_y = px_y[valid_idx]
+				px_x = self.daydelta[valid_idx]
+				px_w = self.weight[valid_idx]
+				# This method minimizes sum(w * (y_i - y^hat_i) ^ 2)
+				#    Here we set w=np.sqrt(px_w) becuase np.polyfit minimizes sum( (w * (y_i - y^hat_i)) ^ 2) by default.
+				# Covariance is estimated from multivariate t-distribution.
+				# Comparing to the v0.1 version, this new method is slightly more conservative
+				#    because it considers the case of small d.o.f.
+				p, c = np.polyfit(px_x, px_y, 1, w=np.sqrt(px_w), cov=True)
+				slope[i]         = p[0]
+				slope_err[i]     = np.sqrt(c[0, 0])
+				intercept[i]     = p[1]
+				intercept_err[i] = np.sqrt(c[1, 1])
+
+		return slope.reshape(reg_size[:-1]), intercept.reshape(reg_size[:-1]), slope_err.reshape(reg_size[:-1]), intercept_err.reshape(reg_size[:-1])
diff --git a/extra/unused/wlr.py b/extra/unused/wlr.py
new file mode 100644
index 0000000..eab937a
--- /dev/null
+++ b/extra/unused/wlr.py
@@ -0,0 +1,70 @@
+# This script archives old code for wlr_corefun in libdhdt.py.
+
+# ============ Using Cook's Distance ============
+		# cookd     = ri2 / (p_num * mse) * (h / (1 - h) ** 2)    # Cook's Distance
+		# goodpoint = cookd < 4 / x.size                          # Bollen, Kenneth A.; Jackman, Robert W. (1990) (see wikipedia)
+		# # xnew = x[goodpoint]
+		# # ynew = y[goodpoint]
+		# # yenew = ye[goodpoint]
+		# # wnew = [1 / k for k in yenew]
+		# if np.all(goodpoint):
+		# 	slope = p[0] * 365.25
+		# 	slope_err = np.sqrt(cov_m[0, 0]) * 365.25
+		# 	count = x.size
+		# 	if resid > 100000:
+		# 		print(x,y,ye,cookd,goodpoint)
+		# 	return slope, slope_err, resid, count
+		# else:
+		# 	xnew = x[goodpoint]
+		# 	ynew = y[goodpoint]
+		# 	yenew = ye[goodpoint]
+		# 	return wlr_corefun(xnew, ynew, yenew)
+
+	# =========== Old function ===========
+	# if xnew.size > 4:
+	# 	p, c            = np.polyfit(xnew, ynew, 1, w=wnew, cov=True)
+	# 	_, res, _, _, _ = np.polyfit(xnew, ynew, 1, w=wnew, full=True)
+	# 	residual = np.sum((np.polyval(p, xnew) - ynew) ** 2)
+	# 	cov_m = c * (len(w) - 4) / res
+	# 	error =  np.sqrt(cov_m[0, 0])
+	# 	slope = p[0] * 365.25
+	# 	slope_err = np.sqrt(cov_m[0, 0]) * 365.25
+	# else:
+	# 	error = -9999.0
+	# 	slope = -9999.0
+	# 	slope_err = -9999.0
+
+	# case = 0
+	# w = [1 / k for k in ye]
+	# if len(x) == 4:
+	# 	case = 1
+	# 	# This is to avoid dividing by zero when N = 4 and to give us a better error estimate
+	# 	x  = np.append(x,   x[-1])
+	# 	y  = np.append(y,   y[-1])
+	# 	ye = np.append(ye, ye[-1])
+	# 	w  = np.append(w, sys.float_info.epsilon)
+	# elif len(x) == 3:
+	# 	case = 2
+	# 	# This is to avoid negative Cd^2 when N = 3 and to give us a better error estimate
+	# 	x  = np.append(x,  [x[-1],  x[-1]])
+	# 	y  = np.append(y,  [y[-1],  y[-1]])
+	# 	ye = np.append(ye, [ye[-1], ye[-1]])
+	# 	w  = np.append(w,  [sys.float_info.epsilon, sys.float_info.epsilon])
+	# p, c            = np.polyfit(x, y, 1, w=w, cov=True)
+	# _, res, _, _, _ = np.polyfit(x, y, 1, w=w, full=True)
+	# # where c is the covariance matrix of p -> c[0, 0] is the variance estimate of the slope.
+	# # what we want is ({G_w}^T G_w)^{-1}, which is equal to c * (N - m - 2) / res
+	# cov_m = c * (len(w) - 4) / res
+	# slope = p[0] * 365.25
+	# slope_err = np.sqrt(cov_m[0, 0]) * 365.25
+	# # slope_error_arr[m, n] = np.sqrt(c[0, 0]) * 365.25
+	# # ./point_TS_ver2-2_linreg.py:^^^^^^^^: RuntimeWarning: invalid value encountered in sqrt
+	# # /data/whyj/Software/anaconda3/lib/python3.5/site-packages/numpy/lib/polynomial.py:606: RuntimeWarning: divide by zero encountered in true_divide
+	# # fac = resids / (len(x) - order - 2.0)
+	# if case == 0:
+	# 	residual = np.sum((np.polyval(p, x) - y) ** 2)
+	# elif case == 1:
+	# 	residual = np.sum((np.polyval(p, x[:-1]) - y[:-1]) ** 2)
+	# elif case == 2:
+	# 	residual = np.sum((np.polyval(p, x[:-2]) - y[:-2]) ** 2)
+	# return slope, slope_err, residual
\ No newline at end of file

From f73a1fc14203f748042050aefd22cc1de2d2f2f7 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 22 Apr 2023 03:23:45 +0000
Subject: [PATCH 10/24] Add do_evmd

---
 carst/libdhdt.py | 79 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 71 insertions(+), 8 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 1fb8337..1349ecc 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -70,12 +70,18 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4):
     """
     See EVMD & EVMD_idx.
     Recommended to use this inreplace of EVMD and EVMD_idx.
+    verified_y_labels:
+        -1: outliers
+        0: 1st cluster
+        1: 2nd cluster
+        ...
     """
     # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration.
     x2d = np.column_stack((x / 365, y))
     if x2d.shape[0] >= 2:
         db = DBSCAN(eps=eps, min_samples=min_samples).fit(x2d)
-        verified_y_labels = db.labels_ >= 0
+        # verified_y_labels = db.labels_ >= 0
+        verified_y_labels = db.labels_
         # verified_y_labels_idx = np.where(verified_y_labels)
         if any(verified_y_labels):
             exitstate = 1
@@ -83,7 +89,8 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4):
             exitstate = -1
     else:
         exitstate = -1
-        verified_y_labels = np.full_like(y, False)
+        # verified_y_labels = np.full_like(y, False)
+        verified_y_labels = np.full_like(y, -1)
     return exitstate, verified_y_labels
     
     
@@ -141,7 +148,8 @@ def EVMD_idx(y, validated_value, threshold=6):
 def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
     # wlr = weighted linear regression.
     # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
-    exitstate, idx = EVMD_DBSCAN(x, y, eps=evmd_threshold)
+    exitstate, labels = EVMD_DBSCAN(x, y, eps=evmd_threshold)
+    idx = labels >= 0
     # if exitstate >= 0 or (max(x) - min(x)) < 1:
     if exitstate < 0 or (max(x) - min(x)) < 1:
         slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
@@ -195,12 +203,14 @@ class PixelTimeSeries(object):
     Column 1: date
     Column 2: value
     Column 3: uncertainty
+    EVMD labels are also stored in this object.
     """
     def __init__(self, data=[]):
         data = np.ndarray([0, 3]) if not data else data                      # [] --> np.ndarray([0, 3])
         data = np.array(data) if type(data) is not np.ndarray else data      # [1, 2, 3] --> np.array([1, 2, 3])
         self.verify_record(data)
         self.data = data
+        self.evmd_labels = None
 
     def __repr__(self):
         return 'PixelTimeSeries({})'.format(self.data)
@@ -216,7 +226,7 @@ def verify_record(record):
             pass
         else:
             raise ValueError("Inconsistent input record. Must be an n-by-3 array.")
-    
+           
     def add_record(self, record):
         """
         Add one or more records to the time series.
@@ -225,6 +235,20 @@ def add_record(self, record):
         self.verify_record(record)
         self.data = np.vstack((self.data, record))
         
+    def verify_evmd_labels(self, labels):
+        """
+        Verify if the input EVMD labels meet the format requirements.
+        """
+        if labels.size == self.data.shape[0]:
+            pass
+        else:
+            raise ValueError("Inconsistent EVMD label input. Must be n labels where n = data points in the data.")
+        
+    def add_evmd_labels(self, labels):
+        labels = np.array(labels) if type(labels) is not np.ndarray else labels
+        self.verify_evmd_labels(labels)
+        self.evmd_labels = labels
+        
     def get_date(self):
         return self.data[:, 0]
     
@@ -309,7 +333,7 @@ def InitTS(self):
         # self.ts = [[{'date': [], 'uncertainty': [], 'value': []} for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)]
         # self.ts = [[ [] for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)]
         print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask)))
-
+        
     def ReadConfig(self, ini):
         if type(ini) is str:
             ini = ConfParams(ini)
@@ -446,9 +470,9 @@ def form_mosaic(self, order='ascending', method='DBSCAN'):
                 uncertainty = self.ts[m, n].get_uncertainty()
                 value = self.ts[m, n].get_value()
                 if method == 'DBSCAN':
-                    exitstate, verified_y_labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
+                    exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
                     if exitstate >= 0:
-                        verified_y_labels_idx = np.where(verified_y_labels)[0]
+                        verified_y_labels_idx = np.where(labels >= 0)[0]
                         if order == 'descending':
                             idx = verified_y_labels_idx[-1]
                         elif order == 'ascending':
@@ -477,7 +501,46 @@ def form_mosaic(self, order='ascending', method='DBSCAN'):
         mosaic_uncertainty = SingleRaster('{}_mosaic-{}_uncertainty.tif'.format(self.dhdtprefix, order))
         mosaic_value.Array2Raster(self.mosaic['value'], self.refgeo)
         mosaic_date.Array2Raster(self.mosaic['date'], self.refgeo)
-        mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo)                
+        mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo)
+        
+    @timeit
+    def do_evmd(self, parallel=True):
+        if parallel:
+            import dask
+            from dask.diagnostics import ProgressBar
+            def batch(seq):
+                sub_results = []
+                for x in seq:
+                    date = x.get_date()
+                    value = x.get_value()
+                    exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
+                    sub_results.append(labels)
+                return sub_results
+            
+            batches = []
+            for m in range(self.ts.shape[0]):
+                result_batch = dask.delayed(batch)(self.ts[m, :])
+                batches.append(result_batch)
+                
+            with ProgressBar():
+                results = dask.compute(batches)
+            
+            for m in range(self.ts.shape[0]):
+                for n in range(self.ts.shape[1]):
+                    self.ts[m, n].add_evmd_labels(results[0][m][n])
+        else:
+            for m in range(self.ts.shape[0]):
+                self.display_progress(m, self.ts.shape[0])
+                for n in range(self.ts.shape[1]):
+                    date = self.ts[m, n].get_date()
+                    value = self.ts[m, n].get_value()
+                    exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
+                    self.ts[m, n].add_evmd_labels(labels)
+                
+    @staticmethod                
+    def display_progress(m, total):
+        if m % 100 == 0:
+            print(f'{m}/{total} lines processed')
     
     def viz(self):
         dhdt_raster, _, _, _ = self.ShowDhdtTifs()

From c43795cbb32641b4982883400bc08522aacfed2d Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 22 Apr 2023 04:06:24 +0000
Subject: [PATCH 11/24] Parallelize Polyfit

---
 carst/libdhdt.py | 155 +++++++++++++++++++++++++++--------------------
 1 file changed, 90 insertions(+), 65 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 1349ecc..5bcb4ff 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -78,7 +78,7 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4):
     """
     # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration.
     x2d = np.column_stack((x / 365, y))
-    if x2d.shape[0] >= 2:
+    if x2d.shape[0] >= min_samples:
         db = DBSCAN(eps=eps, min_samples=min_samples).fit(x2d)
         # verified_y_labels = db.labels_ >= 0
         verified_y_labels = db.labels_
@@ -145,11 +145,15 @@ def EVMD_idx(y, validated_value, threshold=6):
             validated_range = [min(tmp), max(tmp)]
     return idx
 
-def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False):
+def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False):
     # wlr = weighted linear regression.
     # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
-    exitstate, labels = EVMD_DBSCAN(x, y, eps=evmd_threshold)
-    idx = labels >= 0
+    if evmd_labels is None:
+        exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold)
+    else:
+        exitstate = 1 if any(evmd_labels >= 0) else -1
+        
+    idx = evmd_labels >= 0
     # if exitstate >= 0 or (max(x) - min(x)) < 1:
     if exitstate < 0 or (max(x) - min(x)) < 1:
         slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
@@ -215,6 +219,15 @@ def __init__(self, data=[]):
     def __repr__(self):
         return 'PixelTimeSeries({})'.format(self.data)
     
+    def get_date(self):
+        return self.data[:, 0]
+    
+    def get_value(self):
+        return self.data[:, 1]
+    
+    def get_uncertainty(self):
+        return self.data[:, 2]
+    
     @staticmethod
     def verify_record(record):
         """
@@ -249,14 +262,20 @@ def add_evmd_labels(self, labels):
         self.verify_evmd_labels(labels)
         self.evmd_labels = labels
         
-    def get_date(self):
-        return self.data[:, 0]
-    
-    def get_value(self):
-        return self.data[:, 1]
+    def do_evmd(self, eps=6):
+        date = self.get_date()
+        value = self.get_value()
+        exitstate, labels = EVMD_DBSCAN(date, value, eps=eps)
+        return exitstate, labels
     
-    def get_uncertainty(self):
-        return self.data[:, 2]
+    def do_wlr(self, evmd_threshold=6):
+        date = self.get_date()
+        value = self.get_value()
+        uncertainty = self.get_uncertainty()
+        evmd_labels = self.evmd_labels
+        single_results = wlr_corefun(date, value, uncertainty, evmd_labels=evmd_labels, evmd_threshold=evmd_threshold)
+        # single_results is (slope, slope_err, residual, count)
+        return single_results
 
 
 class DemPile(object):
@@ -381,56 +400,65 @@ def DumpPickle(self):
 
     def LoadPickle(self):
         self.ts = pickle.load( open(self.picklepath, "rb") )
-
+        
+    def init_fitdata(self):
+        # ==== Create final array ====
+        self.fitdata['slope']     = np.full_like(self.ts, self.refgeo.get_nodata())
+        self.fitdata['slope_err'] = np.full_like(self.ts, self.refgeo.get_nodata())
+        self.fitdata['residual']  = np.full_like(self.ts, self.refgeo.get_nodata())
+        self.fitdata['count']     = np.full_like(self.ts, self.refgeo.get_nodata())
+        
     @timeit
-    def Polyfit(self):
+    def Polyfit(self, parallel=False):
         # ==== Create final array ====
-        self.fitdata['slope']     = np.ones_like(self.ts, dtype=float) * -9999
-        self.fitdata['slope_err'] = np.ones_like(self.ts, dtype=float) * -9999
-        self.fitdata['residual']  = np.ones_like(self.ts, dtype=float) * -9999
-        self.fitdata['count']     = np.ones_like(self.ts, dtype=float) * -9999
+        self.init_fitdata()
         # ==== Weighted regression ====
-        print('m total: ', len(self.ts))
-        for m in range(len(self.ts)):
-            if m % 100 == 0:
-                print(m)
-            for n in range(len(self.ts[0])):
-                # self.fitdata['count'][m, n] = len(self.ts[m][n]['date'])
-                # if len(self.ts[m][n]['date']) >= 3:
-                date = self.ts[m, n].get_date()
-                # date = np.array(self.ts[m][n]['date'])
-                uncertainty = self.ts[m, n].get_uncertainty()
-                # uncertainty = np.array(self.ts[m][n]['uncertainty'])
-                value = self.ts[m, n].get_value()
-                # value = np.array(self.ts[m][n]['value'])
-                # pin_value = pin_dem_array[m ,n]
-                # pin_date = pin_dem_date_array[m, n]
-                # date, uncertainty, value = filter_by_slope(date, uncertainty, value, pin_date, pin_value)
-                # date, uncertainty, value = filter_by_redundancy(date, uncertainty, value)
-
-                # slope_ref = [(value[i] - pin_value) / float(date[i] - pin_date) * 365.25 for i in range(len(value))]
-                # for i in reversed(range(len(slope_ref))):
-                #    if (slope_ref[i] > dhdt_limit_upper) or (slope_ref[i] < dhdt_limit_lower):
-                #        _ = date.pop(i)
-                #        _ = uncertainty.pop(i)
-                #        _ = value.pop(i)
-                # self.fitdata['count'][m, n] = len(date)
-
-                # Whyjay: May 10, 2018: cancelled the min date span (date[-1] - date[0] > 0), previously > 200
-                # if (len(np.unique(date)) >= 3) and (date[-1] - date[0] > 0):
-                if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
-                    slope, slope_err, residual, count = wlr_corefun(date, value, uncertainty, self.evmd_threshold)
-                    # if residual > 100:
-                    #    print(date, value, uncertainty)
-                    self.fitdata['slope'][m, n] = slope
-                    self.fitdata['slope_err'][m, n] = slope_err
-                    self.fitdata['residual'][m, n] = residual
-                    self.fitdata['count'][m, n] = count
-                # else:
-                    # self.fitdata['count'] = len(ref_dem_TS[m][n]['date'])
-                    # elif (date[-1] - date[0] > 0):
-                    #    slope_arr[m, n] = (value[1] - value[0]) / float(date[1] - date[0]) * 365.25
-        # self.fitdata['count'][~self.refgeomask] = -9999
+        if parallel:
+            print('sds')
+            import dask
+            from dask.diagnostics import ProgressBar
+            def batch(seq):
+                sub_results = []
+                for x in seq:
+                    date = x.get_date()
+                    if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
+                        single_results = x.do_wlr(evmd_threshold=self.evmd_threshold)
+                        # single_results is (slope, slope_err, residual, count)
+                    sub_results.append(single_results)
+                return sub_results
+            
+            batches = []
+            for m in range(self.ts.shape[0]):
+                result_batch = dask.delayed(batch)(self.ts[m, :])
+                batches.append(result_batch)
+                
+            with ProgressBar():
+                results = dask.compute(batches)
+            
+            for m in range(self.ts.shape[0]):
+                for n in range(self.ts.shape[1]):
+                    self.fitdata['slope'][m, n] = results[0][m][n][0]
+                    self.fitdata['slope_err'][m, n] = results[0][m][n][1]
+                    self.fitdata['residual'][m, n] = results[0][m][n][2]
+                    self.fitdata['count'][m, n] = results[0][m][n][3]
+
+        else:
+            for m in range(self.ts.shape[0]):
+                self.display_progress(m, self.ts.shape[0])
+                for n in range(self.ts.shape[1]):
+                    date = self.ts[m, n].get_date()
+                    # uncertainty = self.ts[m, n].get_uncertainty()
+                    # value = self.ts[m, n].get_value()
+                    # evmd_labels = self.ts[m, n].evmd_labels
+
+                    if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
+                        slope, slope_err, residual, count = self.ts[m, n].do_wlr(evmd_threshold=self.evmd_threshold)
+                        # if residual > 100:
+                        #    print(date, value, uncertainty)
+                        self.fitdata['slope'][m, n] = slope
+                        self.fitdata['slope_err'][m, n] = slope_err
+                        self.fitdata['residual'][m, n] = residual
+                        self.fitdata['count'][m, n] = count
 
     def Fitdata2File(self):
         # ==== Write to file ====
@@ -504,16 +532,14 @@ def form_mosaic(self, order='ascending', method='DBSCAN'):
         mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo)
         
     @timeit
-    def do_evmd(self, parallel=True):
+    def do_evmd(self, parallel=False):
         if parallel:
             import dask
             from dask.diagnostics import ProgressBar
             def batch(seq):
                 sub_results = []
                 for x in seq:
-                    date = x.get_date()
-                    value = x.get_value()
-                    exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
+                    exitstate, labels = x.do_evmd(eps=self.evmd_threshold)
                     sub_results.append(labels)
                 return sub_results
             
@@ -532,10 +558,9 @@ def batch(seq):
             for m in range(self.ts.shape[0]):
                 self.display_progress(m, self.ts.shape[0])
                 for n in range(self.ts.shape[1]):
-                    date = self.ts[m, n].get_date()
-                    value = self.ts[m, n].get_value()
-                    exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
+                    exitstate, labels = self.ts[m, n].do_evmd(eps=self.evmd_threshold)
                     self.ts[m, n].add_evmd_labels(labels)
+                    
                 
     @staticmethod                
     def display_progress(m, total):

From b5c8430479ba9eb02e86880a0f2acd1bbf6cd482 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 22 Apr 2023 06:09:52 +0000
Subject: [PATCH 12/24] Change variable names for PEP8 (dhdt only for now)

---
 bin/dhdt.py      | 30 ++++++++++++---------------
 carst/libdhdt.py | 54 ++++++++++++++++++++++--------------------------
 2 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/bin/dhdt.py b/bin/dhdt.py
index 9d0e5a3..5e0336b 100755
--- a/bin/dhdt.py
+++ b/bin/dhdt.py
@@ -30,34 +30,30 @@
 # ==== Read ini file ====
 
 inipath = args.config_file
-# ini = ConfParams(inipath)
-# ini.ReadParam()
-# ini.VerifyParam()
 
 # ==== Create a DemPile object and load the config file into the object ====
 
 a = DemPile()
-# a.ReadConfig(ini)
-a.ReadConfig(inipath)
+a.read_config(inipath)
 
 # ==== Run main processes ====
 
 if args.step is None:
-    a.InitTS()
-    a.PileUp()
-    a.DumpPickle()
-    a.Polyfit()
-    a.Fitdata2File()
+    a.init_ts()
+    a.pileup()
+    a.dump_pickle()
+    a.polyfit()
+    a.fitdata2file()
 elif args.step == 'stack':
-    a.InitTS()
-    a.PileUp()
-    a.DumpPickle()
+    a.init_ts()
+    a.pileup()
+    a.dump_pickle()
 elif args.step == 'dhdt':
-    a.LoadPickle()
-    a.Polyfit()
-    a.Fitdata2File()
+    a.load_pickle()
+    a.polyfit()
+    a.fitdata2file()
 elif args.step == 'viewts':
-    a.LoadPickle()
+    a.load_pickle()
     a.viz()
     plt.show()
 #     data = a.ts
diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 5bcb4ff..17fb0fc 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -13,10 +13,6 @@
     import gdal    # this was used until GDAL 3.1
 from datetime import datetime
 from shapely.geometry import Polygon
-# from scipy.interpolate import interp2d
-# from scipy.interpolate import NearestNDInterpolator
-# from scipy.interpolate import griddata
-# import gc
 from carst import ConfParams
 from carst.libraster import SingleRaster
 import pickle
@@ -303,7 +299,7 @@ def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None,
         self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0}
         self.evmd_threshold = evmd_threshold
 
-    def AddDEM(self, dems):
+    def add_dem(self, dems):
         # ==== Add DEM object list ====
         if type(dems) is list:
             self.dems = self.dems + dems
@@ -312,13 +308,13 @@ def AddDEM(self, dems):
         else:
             raise ValueError("This DEM type is neither a SingleRaster object nor a list of SingleRaster objects.")
 
-    def SortByDate(self):
+    def sort_by_date(self):
         # ==== sort DEMs by date (ascending order) ====
         dem_dates = [i.date for i in self.dems]
         dateidx = np.argsort(dem_dates)
         self.dems = [self.dems[i] for i in dateidx]
 
-    def SetRefGeo(self, refgeo):
+    def set_refgeo(self, refgeo):
         # ==== Prepare the reference geometry ====
         if type(refgeo) is str:
             self.refgeo = SingleRaster(refgeo)
@@ -328,20 +324,20 @@ def SetRefGeo(self, refgeo):
             raise ValueError("This refgeo must be either a SingleRaster object or a path to a geotiff file.")
         self.refgeomask = self.refgeo.ReadAsArray().astype(bool)
 
-    def SetRefDate(self, datestr):
+    def set_refdate(self, datestr):
         self.refdate = datetime.strptime(datestr, '%Y-%m-%d')
 
-    def SetMaskParam(self, ini):
+    def set_mask_params(self, ini):
         if 'max_uncertainty' in ini.settings:
             self.maskparam['max_uncertainty'] = float(ini.settings['max_uncertainty'])
         if 'min_time_span' in ini.settings:
             self.maskparam['min_time_span'] = float(ini.settings['min_time_span'])
 
-    def SetEVMDThreshold(self, ini):
+    def set_evmd_threshold(self, ini):
         if 'evmd_threshold' in ini.regression:
             self.evmd_threshold = float(ini.regression['evmd_threshold'])
 
-    def InitTS(self):
+    def init_ts(self):
         # ==== Prepare the reference geometry ====
         refgeo_Ysize = self.refgeo.get_y_size()
         refgeo_Xsize = self.refgeo.get_x_size()
@@ -353,22 +349,22 @@ def InitTS(self):
         # self.ts = [[ [] for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)]
         print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask)))
         
-    def ReadConfig(self, ini):
+    def read_config(self, ini):
         if type(ini) is str:
             ini = ConfParams(ini)
             ini.ReadParam()
             ini.VerifyParam()
         self.picklepath = ini.result['picklefile']
         self.dhdtprefix = ini.result['dhdt_prefix']
-        self.AddDEM(ini.GetDEM())
-        self.SortByDate()
-        self.SetRefGeo(ini.refgeometry['gtiff'])
-        self.SetRefDate(ini.settings['refdate'])
-        self.SetMaskParam(ini)
-        self.SetEVMDThreshold(ini)
+        self.add_dem(ini.GetDEM())
+        self.sort_by_date()
+        self.set_refgeo(ini.refgeometry['gtiff'])
+        self.set_refdate(ini.settings['refdate'])
+        self.set_mask_params(ini)
+        self.set_evmd_threshold(ini)
 
     @timeit
-    def PileUp(self):
+    def pileup(self):
         # ==== Start to read every DEM and save it to our final array ====
         ts = [[ [] for n in range(self.ts.shape[1])] for m in range(self.ts.shape[0])]
         for i in range(len(self.dems)):
@@ -395,21 +391,21 @@ def PileUp(self):
             for n in range(self.ts.shape[1]):
                 self.ts[m, n] = PixelTimeSeries(ts[m][n])
                 
-    def DumpPickle(self):
+    def dump_pickle(self):
         pickle.dump(self.ts, open(self.picklepath, "wb"))
 
-    def LoadPickle(self):
+    def load_pickle(self):
         self.ts = pickle.load( open(self.picklepath, "rb") )
         
     def init_fitdata(self):
         # ==== Create final array ====
-        self.fitdata['slope']     = np.full_like(self.ts, self.refgeo.get_nodata())
-        self.fitdata['slope_err'] = np.full_like(self.ts, self.refgeo.get_nodata())
-        self.fitdata['residual']  = np.full_like(self.ts, self.refgeo.get_nodata())
-        self.fitdata['count']     = np.full_like(self.ts, self.refgeo.get_nodata())
+        self.fitdata['slope']     = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
+        self.fitdata['slope_err'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
+        self.fitdata['residual']  = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
+        self.fitdata['count']     = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
         
     @timeit
-    def Polyfit(self, parallel=False):
+    def polyfit(self, parallel=False):
         # ==== Create final array ====
         self.init_fitdata()
         # ==== Weighted regression ====
@@ -460,7 +456,7 @@ def batch(seq):
                         self.fitdata['residual'][m, n] = residual
                         self.fitdata['count'][m, n] = count
 
-    def Fitdata2File(self):
+    def fitdata2file(self):
         # ==== Write to file ====
         dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif')
         dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif')
@@ -471,7 +467,7 @@ def Fitdata2File(self):
         dhdt_res.Array2Raster(self.fitdata['residual'], self.refgeo)
         dhdt_count.Array2Raster(self.fitdata['count'], self.refgeo)
 
-    def ShowDhdtTifs(self):
+    def show_dhdt_tifs(self):
         dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif')
         dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif')
         dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif')
@@ -568,7 +564,7 @@ def display_progress(m, total):
             print(f'{m}/{total} lines processed')
     
     def viz(self):
-        dhdt_raster, _, _, _ = self.ShowDhdtTifs()
+        dhdt_raster, _, _, _ = self.show_dhdt_tifs()
         img = dhdt_raster.ReadAsArray()
         img[img < -9000] = np.nan   # might need to change
         

From f044c5561f5a7460c7658b49feaa3c414161ffdc Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 22 Apr 2023 08:02:52 +0000
Subject: [PATCH 13/24] Handling COG IO error

---
 carst/libdhdt.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 17fb0fc..8316555 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -18,6 +18,7 @@
 import pickle
 import matplotlib.pyplot as plt
 from sklearn.cluster import DBSCAN
+from rasterio.errors import RasterioIOError
 
 def timeit(func):
     def time_wrapper(*args, **kwargs):
@@ -371,17 +372,17 @@ def pileup(self):
             print('{}) {}'.format(i + 1, os.path.basename(self.dems[i].fpath) ))
             if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']:
                 datedelta = self.dems[i].date - self.refdate
-                # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear')
-                znew = resample_array(self.dems[i], self.refgeo)
+                try:
+                    znew = resample_array(self.dems[i], self.refgeo)
+                except RasterioIOError as inst:    # To show and skip the error of a bad url
+                    print(inst)
+                    continue
                 znew_mask = np.logical_and(znew > 0, self.refgeomask)
                 fill_idx = np.where(znew_mask)
                 for m,n in zip(fill_idx[0], fill_idx[1]):
                     record = [datedelta.days, znew[m, n], self.dems[i].uncertainty]
-                    # self.ts[m, n].add_record(record)
                     ts[m][n] += [record]
-                    # self.ts[m][n]['date'] += [datedelta.days]
-                    # self.ts[m][n]['uncertainty'] += [self.dems[i].uncertainty]
-                    # self.ts[m][n]['value'] += [znew[m, n]]
+
             else:
                 print("This one won't be piled up because its uncertainty ({}) exceeds the maximum uncertainty allowed ({})."
                       .format(self.dems[i].uncertainty, self.maskparam['max_uncertainty']))

From c704e3ff1a826da956ee3f6fa418b970134ce915 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 22 Apr 2023 14:39:36 +0000
Subject: [PATCH 14/24] Fix parallel bug for distributed scheduler

---
 carst/libdhdt.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 8316555..e6b4fff 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -411,22 +411,23 @@ def polyfit(self, parallel=False):
         self.init_fitdata()
         # ==== Weighted regression ====
         if parallel:
-            print('sds')
             import dask
             from dask.diagnostics import ProgressBar
-            def batch(seq):
+            def batch(seq, min_time_span, evmd_threshold, nodata):
                 sub_results = []
                 for x in seq:
                     date = x.get_date()
-                    if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
-                        single_results = x.do_wlr(evmd_threshold=self.evmd_threshold)
+                    if date.size >= 2 and date[-1] - date[0] > min_time_span:
+                        single_results = x.do_wlr(evmd_threshold=evmd_threshold)
                         # single_results is (slope, slope_err, residual, count)
+                    else:
+                        single_results = (nodata, nodata, nodata, nodata)
                     sub_results.append(single_results)
                 return sub_results
             
             batches = []
             for m in range(self.ts.shape[0]):
-                result_batch = dask.delayed(batch)(self.ts[m, :])
+                result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata())
                 batches.append(result_batch)
                 
             with ProgressBar():
@@ -533,16 +534,16 @@ def do_evmd(self, parallel=False):
         if parallel:
             import dask
             from dask.diagnostics import ProgressBar
-            def batch(seq):
+            def batch(seq, evmd_threshold):
                 sub_results = []
                 for x in seq:
-                    exitstate, labels = x.do_evmd(eps=self.evmd_threshold)
+                    exitstate, labels = x.do_evmd(eps=evmd_threshold)
                     sub_results.append(labels)
                 return sub_results
             
             batches = []
             for m in range(self.ts.shape[0]):
-                result_batch = dask.delayed(batch)(self.ts[m, :])
+                result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold)
                 batches.append(result_batch)
                 
             with ProgressBar():

From b7672c1550df0f71292a159daa6e224993db1234 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sun, 23 Apr 2023 01:07:21 +0000
Subject: [PATCH 15/24] Update form mosaic

---
 carst/libdhdt.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index e6b4fff..6c5a3be 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -1,7 +1,7 @@
 # Class: PixelTimeSeries, DemPile
 # Func: timeit, resample_array, EVMD group, wlr_corefun, onclick_ipynb
 # by Whyjay Zheng, Jul 27 2016
-# last edit: Apr 17 2023
+# last edit: Apr 23 2023
 
 import numpy as np
 from numpy.linalg import inv
@@ -80,7 +80,7 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4):
         # verified_y_labels = db.labels_ >= 0
         verified_y_labels = db.labels_
         # verified_y_labels_idx = np.where(verified_y_labels)
-        if any(verified_y_labels):
+        if any(verified_y_labels >= 0):
             exitstate = 1
         else:
             exitstate = -1
@@ -405,6 +405,11 @@ def init_fitdata(self):
         self.fitdata['residual']  = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
         self.fitdata['count']     = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
         
+    def init_mosaic(self):
+        self.mosaic['value']      = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
+        self.mosaic['date']       = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
+        self.mosaic['uncertainty']= np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
+        
     @timeit
     def polyfit(self, parallel=False):
         # ==== Create final array ====
@@ -477,7 +482,7 @@ def show_dhdt_tifs(self):
         return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
     
     @timeit
-    def form_mosaic(self, order='ascending', method='DBSCAN'):
+    def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False):
         """
         order options:
             ascending: early elevations will be populated first
@@ -485,19 +490,19 @@ def form_mosaic(self, order='ascending', method='DBSCAN'):
         method: 'DBSCAN' or 'legacy'
         """
         # ==== Create mosaicked array ====
-        self.mosaic['value']           = np.full_like(self.ts, np.nan, dtype=float)
-        self.mosaic['date']            = np.full_like(self.ts, np.nan, dtype=float)
-        self.mosaic['uncertainty']     = np.full_like(self.ts, np.nan, dtype=float)
+        self.init_mosaic()
+        if method == 'DBSCAN' and self.ts[0, 0].evmd_labels is None:
+            print('No EVMD labels detected. Run do_evmd first.')
+            self.do_evmd(parallel=parallel)
         for m in range(self.ts.shape[0]):
-            if m % 100 == 0:
-                print(m)
+            self.display_progress(m, self.ts.shape[0])
             for n in range(self.ts.shape[1]):
                 date = self.ts[m, n].get_date()
                 uncertainty = self.ts[m, n].get_uncertainty()
                 value = self.ts[m, n].get_value()
                 if method == 'DBSCAN':
-                    exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold)
-                    if exitstate >= 0:
+                    labels = self.ts[m, n].evmd_labels
+                    if any(labels >= 0):
                         verified_y_labels_idx = np.where(labels >= 0)[0]
                         if order == 'descending':
                             idx = verified_y_labels_idx[-1]
@@ -565,13 +570,18 @@ def display_progress(m, total):
         if m % 100 == 0:
             print(f'{m}/{total} lines processed')
     
-    def viz(self):
+    def viz(self, figsize=(8,8), clim=(-6, 6)):
         dhdt_raster, _, _, _ = self.show_dhdt_tifs()
+        try:
+            nodata = dhdt_raster.get_nodata()
+        except RasterioIOError:
+            print(f'No such file: {dhdt_raster.fname} -- We recommend to run polyfit first.')
+            print('TBD')
         img = dhdt_raster.ReadAsArray()
-        img[img < -9000] = np.nan   # might need to change
+        img[img == nodata] = np.nan
         
-        fig, axs = plt.subplots(2, 1, figsize=(8,8))                   # figsize might need to change
-        first_img = axs[0].imshow(img, cmap='RdBu', vmin=-6, vmax=6)   # minmax might need to change
+        fig, axs = plt.subplots(2, 1, figsize=figsize)                 
+        first_img = axs[0].imshow(img, cmap='RdBu', vmin=clim[0], vmax=clim[1])
         
         onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold)
         cid = fig.canvas.mpl_connect('button_press_event', onclick)

From 881a6b7343e6fd860e1e975abcc8c66d2cebf5be Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Mon, 24 Apr 2023 09:46:45 +0000
Subject: [PATCH 16/24] Add adjustable chunk size for parallel (evmd and
 polyfit only)

---
 carst/libdhdt.py | 111 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 26 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 6c5a3be..57bdd16 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -411,7 +411,7 @@ def init_mosaic(self):
         self.mosaic['uncertainty']= np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
         
     @timeit
-    def polyfit(self, parallel=False):
+    def polyfit(self, parallel=False, chunksize=(1000, 1000)):
         # ==== Create final array ====
         self.init_fitdata()
         # ==== Weighted regression ====
@@ -430,20 +430,52 @@ def batch(seq, min_time_span, evmd_threshold, nodata):
                     sub_results.append(single_results)
                 return sub_results
             
-            batches = []
-            for m in range(self.ts.shape[0]):
-                result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata())
-                batches.append(result_batch)
+            if self.ts.shape[0] <= chunksize[0] and self.ts.shape[1] <= chunksize[1]:
+                ### Single chunk
+                batches = []
+                for m in range(self.ts.shape[0]):
+                    result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata())
+                    batches.append(result_batch)
+
+                with ProgressBar():
+                    results = dask.compute(batches)
+
+                for m in range(self.ts.shape[0]):
+                    for n in range(self.ts.shape[1]):
+                        self.fitdata['slope'][m, n] = results[0][m][n][0]
+                        self.fitdata['slope_err'][m, n] = results[0][m][n][1]
+                        self.fitdata['residual'][m, n] = results[0][m][n][2]
+                        self.fitdata['count'][m, n] = results[0][m][n][3]
+                        
+            else:
+                ### Multiple chunks
+                msize = chunksize[0]
+                nsize = chunksize[1]
+                m_nodes = np.arange(0, self.ts.shape[0], msize)
+                n_nodes = np.arange(0, self.ts.shape[1], nsize)
+                super_results = []
+                for super_m in range(m_nodes.size):
+                    batches = []
+                    ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :]
+                    for m in range(ts_slice.shape[0]):
+                        for n in range(n_nodes.size):
+                            result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata())
+                            batches.append(result_batch)
+                            
+                    with ProgressBar():
+                        results = dask.compute(batches)
+                    super_results.append(results[0])
+
+                for m in range(self.ts.shape[0]):
+                    for n in range(self.ts.shape[1]):
+                        idx1 = m // msize
+                        idx2 = n_nodes.size * (m % msize) + n // nsize
+                        idx3 = n % nsize
+                        self.fitdata['slope'][m, n] = super_results[idx1][idx2][idx3][0]
+                        self.fitdata['slope_err'][m, n] = super_results[idx1][idx2][idx3][1]
+                        self.fitdata['residual'][m, n] = super_results[idx1][idx2][idx3][2]
+                        self.fitdata['count'][m, n] = super_results[idx1][idx2][idx3][3]
                 
-            with ProgressBar():
-                results = dask.compute(batches)
-            
-            for m in range(self.ts.shape[0]):
-                for n in range(self.ts.shape[1]):
-                    self.fitdata['slope'][m, n] = results[0][m][n][0]
-                    self.fitdata['slope_err'][m, n] = results[0][m][n][1]
-                    self.fitdata['residual'][m, n] = results[0][m][n][2]
-                    self.fitdata['count'][m, n] = results[0][m][n][3]
 
         else:
             for m in range(self.ts.shape[0]):
@@ -535,7 +567,7 @@ def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False):
         mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo)
         
     @timeit
-    def do_evmd(self, parallel=False):
+    def do_evmd(self, parallel=False, chunksize=(1000, 1000)):
         if parallel:
             import dask
             from dask.diagnostics import ProgressBar
@@ -546,17 +578,44 @@ def batch(seq, evmd_threshold):
                     sub_results.append(labels)
                 return sub_results
             
-            batches = []
-            for m in range(self.ts.shape[0]):
-                result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold)
-                batches.append(result_batch)
-                
-            with ProgressBar():
-                results = dask.compute(batches)
-            
-            for m in range(self.ts.shape[0]):
-                for n in range(self.ts.shape[1]):
-                    self.ts[m, n].add_evmd_labels(results[0][m][n])
+            if self.ts.shape[0] <= chunksize[0] and self.ts.shape[1] <= chunksize[1]:
+                ### Single chunk
+                batches = []
+                for m in range(self.ts.shape[0]):
+                    result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold)
+                    batches.append(result_batch)
+
+                with ProgressBar():
+                    results = dask.compute(batches)
+
+                for m in range(self.ts.shape[0]):
+                    for n in range(self.ts.shape[1]):
+                        self.ts[m, n].add_evmd_labels(results[0][m][n])
+            else:
+                ### Multile chunks
+                msize = chunksize[0]
+                nsize = chunksize[1]
+                m_nodes = np.arange(0, self.ts.shape[0], msize)
+                n_nodes = np.arange(0, self.ts.shape[1], nsize)
+                super_results = []
+                for super_m in range(m_nodes.size):
+                    batches = []
+                    ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :]
+                    for m in range(ts_slice.shape[0]):
+                        for n in range(n_nodes.size):
+                            result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.evmd_threshold)
+                            batches.append(result_batch)
+                            
+                    with ProgressBar():
+                        results = dask.compute(batches)
+                    super_results.append(results[0])
+
+                for m in range(self.ts.shape[0]):
+                    for n in range(self.ts.shape[1]):
+                        idx1 = m // msize
+                        idx2 = n_nodes.size * (m % msize) + n // nsize
+                        idx3 = n % nsize
+                        self.ts[m, n].add_evmd_labels(super_results[idx1][idx2][idx3])
         else:
             for m in range(self.ts.shape[0]):
                 self.display_progress(m, self.ts.shape[0])

From ce646e1f9420a84357cd4fc60f9e6f14470be09f Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Tue, 25 Apr 2023 07:31:40 +0000
Subject: [PATCH 17/24] Add progress information for chunked parallel processes

---
 carst/libdhdt.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 57bdd16..e2a5c8b 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -455,6 +455,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata):
                 n_nodes = np.arange(0, self.ts.shape[1], nsize)
                 super_results = []
                 for super_m in range(m_nodes.size):
+                    self.display_progress(m_nodes[super_m], self.ts.shape[0])
                     batches = []
                     ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :]
                     for m in range(ts_slice.shape[0]):
@@ -599,6 +600,7 @@ def batch(seq, evmd_threshold):
                 n_nodes = np.arange(0, self.ts.shape[1], nsize)
                 super_results = []
                 for super_m in range(m_nodes.size):
+                    self.display_progress(m_nodes[super_m], self.ts.shape[0])
                     batches = []
                     ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :]
                     for m in range(ts_slice.shape[0]):

From f8848531f8c0bd84040a81efd9b8bd9bea65a674 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Tue, 25 Apr 2023 09:28:19 +0000
Subject: [PATCH 18/24] Add min_samples argument

---
 carst/libdhdt.py | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index e2a5c8b..95febc4 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -142,11 +142,11 @@ def EVMD_idx(y, validated_value, threshold=6):
             validated_range = [min(tmp), max(tmp)]
     return idx
 
-def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False):
+def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, min_samples=4):
     # wlr = weighted linear regression.
     # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
     if evmd_labels is None:
-        exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold)
+        exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold, min_samples=min_samples)
     else:
         exitstate = 1 if any(evmd_labels >= 0) else -1
         
@@ -259,18 +259,18 @@ def add_evmd_labels(self, labels):
         self.verify_evmd_labels(labels)
         self.evmd_labels = labels
         
-    def do_evmd(self, eps=6):
+    def do_evmd(self, eps=6, min_samples=4):
         date = self.get_date()
         value = self.get_value()
-        exitstate, labels = EVMD_DBSCAN(date, value, eps=eps)
+        exitstate, labels = EVMD_DBSCAN(date, value, eps=eps, min_samples=min_samples)
         return exitstate, labels
     
-    def do_wlr(self, evmd_threshold=6):
+    def do_wlr(self, evmd_threshold=6, min_samples=4):
         date = self.get_date()
         value = self.get_value()
         uncertainty = self.get_uncertainty()
         evmd_labels = self.evmd_labels
-        single_results = wlr_corefun(date, value, uncertainty, evmd_labels=evmd_labels, evmd_threshold=evmd_threshold)
+        single_results = wlr_corefun(date, value, uncertainty, evmd_labels=evmd_labels, evmd_threshold=evmd_threshold, min_samples=min_samples)
         # single_results is (slope, slope_err, residual, count)
         return single_results
 
@@ -411,19 +411,19 @@ def init_mosaic(self):
         self.mosaic['uncertainty']= np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float)
         
     @timeit
-    def polyfit(self, parallel=False, chunksize=(1000, 1000)):
+    def polyfit(self, parallel=False, chunksize=(1000, 1000), min_samples=4):
         # ==== Create final array ====
         self.init_fitdata()
         # ==== Weighted regression ====
         if parallel:
             import dask
             from dask.diagnostics import ProgressBar
-            def batch(seq, min_time_span, evmd_threshold, nodata):
+            def batch(seq, min_time_span, evmd_threshold, nodata, min_samples):
                 sub_results = []
                 for x in seq:
                     date = x.get_date()
                     if date.size >= 2 and date[-1] - date[0] > min_time_span:
-                        single_results = x.do_wlr(evmd_threshold=evmd_threshold)
+                        single_results = x.do_wlr(evmd_threshold=evmd_threshold, min_samples=min_samples)
                         # single_results is (slope, slope_err, residual, count)
                     else:
                         single_results = (nodata, nodata, nodata, nodata)
@@ -434,7 +434,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata):
                 ### Single chunk
                 batches = []
                 for m in range(self.ts.shape[0]):
-                    result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata())
+                    result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata(), min_samples)
                     batches.append(result_batch)
 
                 with ProgressBar():
@@ -460,7 +460,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata):
                     ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :]
                     for m in range(ts_slice.shape[0]):
                         for n in range(n_nodes.size):
-                            result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata())
+                            result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata(), min_samples)
                             batches.append(result_batch)
                             
                     with ProgressBar():
@@ -488,7 +488,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata):
                     # evmd_labels = self.ts[m, n].evmd_labels
 
                     if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']:
-                        slope, slope_err, residual, count = self.ts[m, n].do_wlr(evmd_threshold=self.evmd_threshold)
+                        slope, slope_err, residual, count = self.ts[m, n].do_wlr(evmd_threshold=self.evmd_threshold, min_samples=min_samples)
                         # if residual > 100:
                         #    print(date, value, uncertainty)
                         self.fitdata['slope'][m, n] = slope
@@ -515,7 +515,7 @@ def show_dhdt_tifs(self):
         return dhdt_dem, dhdt_error, dhdt_res, dhdt_count
     
     @timeit
-    def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False):
+    def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False, min_samples=4):
         """
         order options:
             ascending: early elevations will be populated first
@@ -526,7 +526,7 @@ def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False):
         self.init_mosaic()
         if method == 'DBSCAN' and self.ts[0, 0].evmd_labels is None:
             print('No EVMD labels detected. Run do_evmd first.')
-            self.do_evmd(parallel=parallel)
+            self.do_evmd(parallel=parallel, min_samples=min_samples)
         for m in range(self.ts.shape[0]):
             self.display_progress(m, self.ts.shape[0])
             for n in range(self.ts.shape[1]):
@@ -568,14 +568,14 @@ def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False):
         mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo)
         
     @timeit
-    def do_evmd(self, parallel=False, chunksize=(1000, 1000)):
+    def do_evmd(self, parallel=False, chunksize=(1000, 1000), min_samples=4):
         if parallel:
             import dask
             from dask.diagnostics import ProgressBar
-            def batch(seq, evmd_threshold):
+            def batch(seq, evmd_threshold, min_samples):
                 sub_results = []
                 for x in seq:
-                    exitstate, labels = x.do_evmd(eps=evmd_threshold)
+                    exitstate, labels = x.do_evmd(eps=evmd_threshold, min_samples=min_samples)
                     sub_results.append(labels)
                 return sub_results
             
@@ -583,7 +583,7 @@ def batch(seq, evmd_threshold):
                 ### Single chunk
                 batches = []
                 for m in range(self.ts.shape[0]):
-                    result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold)
+                    result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold, min_samples)
                     batches.append(result_batch)
 
                 with ProgressBar():
@@ -605,7 +605,7 @@ def batch(seq, evmd_threshold):
                     ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :]
                     for m in range(ts_slice.shape[0]):
                         for n in range(n_nodes.size):
-                            result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.evmd_threshold)
+                            result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.evmd_threshold, min_samples)
                             batches.append(result_batch)
                             
                     with ProgressBar():
@@ -622,7 +622,7 @@ def batch(seq, evmd_threshold):
             for m in range(self.ts.shape[0]):
                 self.display_progress(m, self.ts.shape[0])
                 for n in range(self.ts.shape[1]):
-                    exitstate, labels = self.ts[m, n].do_evmd(eps=self.evmd_threshold)
+                    exitstate, labels = self.ts[m, n].do_evmd(eps=self.evmd_threshold, min_samples=min_samples)
                     self.ts[m, n].add_evmd_labels(labels)
                     
                 

From ce630cfcbe23bc29b2694877b3e47e7ac250fecf Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Wed, 26 Apr 2023 05:08:23 +0000
Subject: [PATCH 19/24] Add min_samples argument to onclick

---
 carst/libdhdt.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 95febc4..e5dcfeb 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -631,7 +631,7 @@ def display_progress(m, total):
         if m % 100 == 0:
             print(f'{m}/{total} lines processed')
     
-    def viz(self, figsize=(8,8), clim=(-6, 6)):
+    def viz(self, figsize=(8,8), clim=(-6, 6), min_samples=4):
         dhdt_raster, _, _, _ = self.show_dhdt_tifs()
         try:
             nodata = dhdt_raster.get_nodata()
@@ -644,10 +644,10 @@ def viz(self, figsize=(8,8), clim=(-6, 6)):
         fig, axs = plt.subplots(2, 1, figsize=figsize)                 
         first_img = axs[0].imshow(img, cmap='RdBu', vmin=clim[0], vmax=clim[1])
         
-        onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold)
+        onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold, min_samples=min_samples)
         cid = fig.canvas.mpl_connect('button_press_event', onclick)
     
-def onclick_wrapper(data, axs, evmd_threshold):
+def onclick_wrapper(data, axs, evmd_threshold, min_samples=4):
     def onclick_ipynb(event):
         """
         Callback function for mouse click
@@ -661,7 +661,7 @@ def onclick_ipynb(event):
             xx = data[row, col].get_date()
             yy = data[row, col].get_value()
             ye = data[row, col].get_uncertainty()
-            slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True)   
+            slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, min_samples=min_samples, detailed=True)   
             SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2)
             SSRes = np.sum((y_good - y_goodest) ** 2)
             SST = np.sum((y_good - np.mean(y_good)) ** 2)

From ce0630b2459415ed94640b545f5d8a62af0ee5ab Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Wed, 26 Apr 2023 05:35:44 +0000
Subject: [PATCH 20/24] Fix onclick behavior when EVMD returns nothing

---
 carst/libdhdt.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index e5dcfeb..c0b6d6d 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -154,7 +154,10 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi
     # if exitstate >= 0 or (max(x) - min(x)) < 1:
     if exitstate < 0 or (max(x) - min(x)) < 1:
         slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
-        return slope, slope_err, resid, count
+        if detailed:
+            return slope, slope_err, resid, count, np.array([]), np.array([]), np.array([])
+        else:
+            return slope, slope_err, resid, count
     else:
         # if x.size == 3:
         # print(x, y, ye)

From 171eaf61787d32d4a27b85e3144374633c2b5db8 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Tue, 2 May 2023 01:57:12 +0000
Subject: [PATCH 21/24] WLR hotfix for singular matrix issue

---
 carst/libdhdt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index c0b6d6d..4d94a0e 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -152,7 +152,7 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi
         
     idx = evmd_labels >= 0
     # if exitstate >= 0 or (max(x) - min(x)) < 1:
-    if exitstate < 0 or (max(x) - min(x)) < 1:
+    if exitstate < 0:
         slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size
         if detailed:
             return slope, slope_err, resid, count, np.array([]), np.array([]), np.array([])
@@ -162,7 +162,7 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi
         # if x.size == 3:
         # print(x, y, ye)
         # idx = EVMD_idx(y, validated_value, threshold=evmd_threshold)
-        if sum(idx) >= 3:
+        if sum(idx) >= 3 or (max(x[idx]) - min(x[idx])) < 1:
             x = x[idx]
             y = y[idx]
             ye = ye[idx]

From aa081faafcbcdf6eef375634747d50a1e32d881e Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Tue, 2 May 2023 02:15:02 +0000
Subject: [PATCH 22/24] WLR issue hotfix 2

---
 carst/libdhdt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index 4d94a0e..ce58790 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -162,7 +162,7 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi
         # if x.size == 3:
         # print(x, y, ye)
         # idx = EVMD_idx(y, validated_value, threshold=evmd_threshold)
-        if sum(idx) >= 3 or (max(x[idx]) - min(x[idx])) < 1:
+        if sum(idx) >= 3 and (max(x[idx]) - min(x[idx])) > 1:
             x = x[idx]
             y = y[idx]
             ye = ye[idx]

From 018a6c4da0907398f97a9239bebf7eaa92dfa52a Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 20 May 2023 03:19:00 +0000
Subject: [PATCH 23/24] libconfig Tab --> Space

---
 carst/libconfig.py | 338 ++++++++++++++++++++++-----------------------
 carst/libdhdt.py   |   1 +
 2 files changed, 170 insertions(+), 169 deletions(-)

diff --git a/carst/libconfig.py b/carst/libconfig.py
index 3cfd761..2839294 100644
--- a/carst/libconfig.py
+++ b/carst/libconfig.py
@@ -90,175 +90,175 @@ def Write2File(self):
 
 class ConfParams:
 
-	"""
-	Read variables in a configuration file. The file should have the specified structure like CARST/dhdt/defaults.ini
-	"""
-
-	def __init__(self, fpath=None):
-		self.fpath = fpath
-		# self.gdalwarp = {}
-		# self.demlist = {}
-		# self.regression = {}
-		# self.result = {}
-
-	def ReadParam(self):
-
-		"""
-		Read parameters and save them as self.xxxx
-		example: if there is a section called [gdalwarp]
-		and there is an option named tr = 30 30
-		then self.gdalwarp['tr'] = '30 30'
-		"""
-
-		if self.fpath is not None:
-			config = ConfigParser.RawConfigParser()
-			config.read(self.fpath)
-
-			for section in config.sections():
-				section_contents = {}
-				for item in config.items(section):
-					section_contents[item[0]] = item[1]
-				setattr(self, section, section_contents)
-
-		else:
-			print('Warning: No ini file is given. Nothing will run.')
-
-	def VerifyParam(self):
-
-		"""
-		Verify params and modify them to proper types.
-		"""
-
-		if hasattr(self, 'regression'):
-			for key in self.regression:
-				self.regression[key] = int(self.regression[key])
-		if hasattr(self, 'gdalwarp'):
-			if 'output_dir' in self.gdalwarp:
-				if not os.path.exists(self.gdalwarp['output_dir']):
-					os.makedirs(self.gdalwarp['output_dir'])    # create gdalwarp output folder
-		if hasattr(self, 'splitampcor'):
-			for key in self.splitampcor:
-				self.splitampcor[key] = int(self.splitampcor[key])
-		if hasattr(self, 'parallel'):
-			if 'gnu_parallel' in self.parallel:
-				s = self.parallel['gnu_parallel'].lower()
-				self.parallel['gnu_parallel'] = s in ['true', 't', 'yes', 'y', '1']
-		if hasattr(self, 'pxsettings'):
-			for key in self.pxsettings:
-				if not self.pxsettings[key]:
-					# empty string
-					self.pxsettings[key] = None
-				else:
-					self.pxsettings[key] = int(self.pxsettings[key])
-			if 'size_across' not in self.pxsettings:
-				self.pxsettings['size_across'] = None
-			if 'size_down' not in self.pxsettings:
-				self.pxsettings['size_down'] = None
-			if 'gaussian_hp' in self.pxsettings:
-				self.pxsettings['gaussian_hp'] = bool(int(self.pxsettings['gaussian_hp']))
-			else:
-				self.pxsettings['gaussian_hp'] = True
-			if 'gaussian_hp_sigma' in self.pxsettings:
-				self.pxsettings['gaussian_hp_sigma'] = float(self.pxsettings['gaussian_hp_sigma'])
-			else:
-				self.pxsettings['gaussian_hp_sigma'] = 3.0
-
-		if hasattr(self, 'outputcontrol'):
-			if 'datepair_prefix' in self.outputcontrol:
-				if self.outputcontrol['datepair_prefix'] in ['false', 'f', 'no', 'n', '0']:
-					self.outputcontrol['if_generate_xyztext'] = False
-				else:
-					self.outputcontrol['datepair_prefix'] = bool(int(self.outputcontrol['datepair_prefix']))
-					atime = datetime.strptime(self.imagepair['image1_date'], '%Y-%m-%d')
-					btime = datetime.strptime(self.imagepair['image2_date'], '%Y-%m-%d')
-					self.outputcontrol['label_datepair'] = atime.strftime('%Y%m%d') + '-' + btime.strftime('%Y%m%d' + '_')
-			if 'output_folder' not in self.outputcontrol:
-				self.outputcontrol['output_folder'] = '.'
-		if hasattr(self, 'rawoutput'):
-			if 'if_generate_xyztext' in self.rawoutput:
-				self.rawoutput['if_generate_xyztext'] = bool(int(self.rawoutput['if_generate_xyztext']))
-			else:
-				self.rawoutput['if_generate_xyztext'] = False
-			if 'if_generate_ampofftxt' in self.rawoutput:
-				self.rawoutput['if_generate_ampofftxt'] = bool(int(self.rawoutput['if_generate_ampofftxt']))
-			else:
-				self.rawoutput['if_generate_ampofftxt'] = False
-			if 'label_ampcor' in self.rawoutput:
-				if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
-					self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_ampcor'])
-				else:
-					self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_ampcor'])
-			if 'label_geotiff' in self.rawoutput:
-				if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
-					self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_geotiff'])
-				else:
-					self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_geotiff'])
-		if hasattr(self, 'velocorrection'):
-			if 'label_bedrock_histogram' in self.velocorrection:
-				if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
-					self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_bedrock_histogram'])
-				else:
-					self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_bedrock_histogram'])
-			if 'label_geotiff' in self.velocorrection:
-				if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
-					self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_geotiff'])
-				else:
-					self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_geotiff'])
-			if 'label_logfile' in self.velocorrection:
-				if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
-					self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_logfile'])
-				else:
-					self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_logfile'])
-			if 'refvelo_outlier_sigma' in self.velocorrection:
-				self.velocorrection['refvelo_outlier_sigma'] = float(self.velocorrection['refvelo_outlier_sigma'])
-			else:
-				self.velocorrection['refvelo_outlier_sigma'] = 3.0
-		if hasattr(self, 'noiseremoval'):
-			for key in self.noiseremoval:
-				self.noiseremoval[key] = float(self.noiseremoval[key])
-
-
-	"""
-	# ======== Check if PAIRS_DIR, METADATA_DIR, and PAIRS exist ========
-	if not os.path.exists(PAIRS_DIR):
-		print("\n***** ERROR: Pair directory specified (\"" + PAIRS_DIR + "\") not found, make sure full path is provided, exiting...\n");
-		sys.exit(1)
-
-	if not os.path.exists(METADATA_DIR):
-		print("\n***** ERROR: Metadata directory specified (\"" + METADATA_DIR + "\") not found, make sure full path is provided, exiting...\n");
-		sys.exit(1)
-
-	if not os.path.exists(PAIRS):
-		print("\n***** ERROR: Pair list \"" + PAIRS + "\" not found, make sure full path is provided, exiting...\n");
-	# ===================================================================
-	"""
-
-
-	def GetDEM(self):
-
-		"""
-		Get DEMs from "csvfile" field. Return a list of SingleRaster objects.
-		"""
-
-		if 'csvfile' in self.demlist:
-			csv = CsvTable(self.demlist['csvfile'])
-			return csv.GetDEM()
-		else:
-			print('Warning: No DEM-list file is given. Nothing will run.')
-			return []
-
-	def GetImgPair(self, delimiter=','):
-
-		"""
-		Get ImgPair from the contents of this csv file
-		"""
-
-		if 'pairs_list' in self.io:
-			csv = CsvTable(self.io['pairs_list'])
-			return csv.GetImgPair(delimiter=delimiter)
-		else:
-			print('Warning: No Img-list file is given. Nothing will run.')
-			return []
+    """
+    Read variables in a configuration file. The file should have the specified structure like CARST/dhdt/defaults.ini
+    """
+
+    def __init__(self, fpath=None):
+        self.fpath = fpath
+        # self.gdalwarp = {}
+        # self.demlist = {}
+        # self.regression = {}
+        # self.result = {}
+
+    def ReadParam(self):
+
+        """
+        Read parameters and save them as self.xxxx
+        example: if there is a section called [gdalwarp]
+        and there is an option named tr = 30 30
+        then self.gdalwarp['tr'] = '30 30'
+        """
+
+        if self.fpath is not None:
+            config = ConfigParser.RawConfigParser()
+            config.read(self.fpath)
+
+            for section in config.sections():
+                section_contents = {}
+                for item in config.items(section):
+                    section_contents[item[0]] = item[1]
+                setattr(self, section, section_contents)
+
+        else:
+            print('Warning: No ini file is given. Nothing will run.')
+
+    def VerifyParam(self):
+
+        """
+        Verify params and modify them to proper types.
+        """
+
+        if hasattr(self, 'regression'):
+            for key in self.regression:
+                self.regression[key] = int(self.regression[key])
+        if hasattr(self, 'gdalwarp'):
+            if 'output_dir' in self.gdalwarp:
+                if not os.path.exists(self.gdalwarp['output_dir']):
+                    os.makedirs(self.gdalwarp['output_dir'])    # create gdalwarp output folder
+        if hasattr(self, 'splitampcor'):
+            for key in self.splitampcor:
+                self.splitampcor[key] = int(self.splitampcor[key])
+        if hasattr(self, 'parallel'):
+            if 'gnu_parallel' in self.parallel:
+                s = self.parallel['gnu_parallel'].lower()
+                self.parallel['gnu_parallel'] = s in ['true', 't', 'yes', 'y', '1']
+        if hasattr(self, 'pxsettings'):
+            for key in self.pxsettings:
+                if not self.pxsettings[key]:
+                    # empty string
+                    self.pxsettings[key] = None
+                else:
+                    self.pxsettings[key] = int(self.pxsettings[key])
+            if 'size_across' not in self.pxsettings:
+                self.pxsettings['size_across'] = None
+            if 'size_down' not in self.pxsettings:
+                self.pxsettings['size_down'] = None
+            if 'gaussian_hp' in self.pxsettings:
+                self.pxsettings['gaussian_hp'] = bool(int(self.pxsettings['gaussian_hp']))
+            else:
+                self.pxsettings['gaussian_hp'] = True
+            if 'gaussian_hp_sigma' in self.pxsettings:
+                self.pxsettings['gaussian_hp_sigma'] = float(self.pxsettings['gaussian_hp_sigma'])
+            else:
+                self.pxsettings['gaussian_hp_sigma'] = 3.0
+
+        if hasattr(self, 'outputcontrol'):
+            if 'datepair_prefix' in self.outputcontrol:
+                if self.outputcontrol['datepair_prefix'] in ['false', 'f', 'no', 'n', '0']:
+                    self.outputcontrol['if_generate_xyztext'] = False
+                else:
+                    self.outputcontrol['datepair_prefix'] = bool(int(self.outputcontrol['datepair_prefix']))
+                    atime = datetime.strptime(self.imagepair['image1_date'], '%Y-%m-%d')
+                    btime = datetime.strptime(self.imagepair['image2_date'], '%Y-%m-%d')
+                    self.outputcontrol['label_datepair'] = atime.strftime('%Y%m%d') + '-' + btime.strftime('%Y%m%d' + '_')
+            if 'output_folder' not in self.outputcontrol:
+                self.outputcontrol['output_folder'] = '.'
+        if hasattr(self, 'rawoutput'):
+            if 'if_generate_xyztext' in self.rawoutput:
+                self.rawoutput['if_generate_xyztext'] = bool(int(self.rawoutput['if_generate_xyztext']))
+            else:
+                self.rawoutput['if_generate_xyztext'] = False
+            if 'if_generate_ampofftxt' in self.rawoutput:
+                self.rawoutput['if_generate_ampofftxt'] = bool(int(self.rawoutput['if_generate_ampofftxt']))
+            else:
+                self.rawoutput['if_generate_ampofftxt'] = False
+            if 'label_ampcor' in self.rawoutput:
+                if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
+                    self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_ampcor'])
+                else:
+                    self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_ampcor'])
+            if 'label_geotiff' in self.rawoutput:
+                if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
+                    self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_geotiff'])
+                else:
+                    self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_geotiff'])
+        if hasattr(self, 'velocorrection'):
+            if 'label_bedrock_histogram' in self.velocorrection:
+                if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
+                    self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_bedrock_histogram'])
+                else:
+                    self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_bedrock_histogram'])
+            if 'label_geotiff' in self.velocorrection:
+                if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
+                    self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_geotiff'])
+                else:
+                    self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_geotiff'])
+            if 'label_logfile' in self.velocorrection:
+                if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']:
+                    self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_logfile'])
+                else:
+                    self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_logfile'])
+            if 'refvelo_outlier_sigma' in self.velocorrection:
+                self.velocorrection['refvelo_outlier_sigma'] = float(self.velocorrection['refvelo_outlier_sigma'])
+            else:
+                self.velocorrection['refvelo_outlier_sigma'] = 3.0
+        if hasattr(self, 'noiseremoval'):
+            for key in self.noiseremoval:
+                self.noiseremoval[key] = float(self.noiseremoval[key])
+
+
+    """
+    # ======== Check if PAIRS_DIR, METADATA_DIR, and PAIRS exist ========
+    if not os.path.exists(PAIRS_DIR):
+        print("\n***** ERROR: Pair directory specified (\"" + PAIRS_DIR + "\") not found, make sure full path is provided, exiting...\n");
+        sys.exit(1)
+
+    if not os.path.exists(METADATA_DIR):
+        print("\n***** ERROR: Metadata directory specified (\"" + METADATA_DIR + "\") not found, make sure full path is provided, exiting...\n");
+        sys.exit(1)
+
+    if not os.path.exists(PAIRS):
+        print("\n***** ERROR: Pair list \"" + PAIRS + "\" not found, make sure full path is provided, exiting...\n");
+    # ===================================================================
+    """
+
+
+    def GetDEM(self):
+
+        """
+        Get DEMs from "csvfile" field. Return a list of SingleRaster objects.
+        """
+
+        if 'csvfile' in self.demlist:
+            csv = CsvTable(self.demlist['csvfile'])
+            return csv.GetDEM()
+        else:
+            print('Warning: No DEM-list file is given. Nothing will run.')
+            return [    
+
+    def GetImgPair(self, delimiter=','):
+
+        """
+        Get ImgPair from the contents of this csv file
+        """
+
+        if 'pairs_list' in self.io:
+            csv = CsvTable(self.io['pairs_list'])
+            return csv.GetImgPair(delimiter=delimiter)
+        else:
+            print('Warning: No Img-list file is given. Nothing will run.')
+            return []
 
 
 class LS8MTL:
diff --git a/carst/libdhdt.py b/carst/libdhdt.py
index ce58790..c2d3316 100644
--- a/carst/libdhdt.py
+++ b/carst/libdhdt.py
@@ -145,6 +145,7 @@ def EVMD_idx(y, validated_value, threshold=6):
 def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, min_samples=4):
     # wlr = weighted linear regression.
     # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold)
+    # WARNING issue: count does not represent the same meaning in the following if-else statement!!!
     if evmd_labels is None:
         exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold, min_samples=min_samples)
     else:

From 7e404c947f2fd19b393a7aa3fd7c8c947621acb8 Mon Sep 17 00:00:00 2001
From: Whyjay Zheng <whyjz@berkeley.edu>
Date: Sat, 20 May 2023 04:26:42 +0000
Subject: [PATCH 24/24] Hotfix

---
 carst/libconfig.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/carst/libconfig.py b/carst/libconfig.py
index 2839294..2a25b96 100644
--- a/carst/libconfig.py
+++ b/carst/libconfig.py
@@ -245,7 +245,7 @@ def GetDEM(self):
             return csv.GetDEM()
         else:
             print('Warning: No DEM-list file is given. Nothing will run.')
-            return [    
+            return []
 
     def GetImgPair(self, delimiter=','):