From b95350e85116702e5cfe20a391dced3492f92f6f Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sun, 16 Apr 2023 09:12:11 +0000 Subject: [PATCH 01/24] Add PixelTimeSeries class --- carst/libdhdt.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 73fa685..bdfb75a 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -293,6 +293,50 @@ def onclick(event): figi.show() return onclick +class PixelTimeSeries(object): + """ + Store all measurements of a single pixel in the reference DEM. + Always an n-by-3 array: + Column 1: date + Column 2: value + Column 3: uncertainty + """ + def __init__(self, data=np.ndarray([0, 3])): + data = np.array(data) if type(data) is not np.ndarray else data + self.verify_record(data) + self.data = data + + def __repr__(self): + return 'PixelTimeSeries({})'.format(self.data) + + @staticmethod + def verify_record(record): + """ + Verify if the input record meets the format requirements. + """ + if record.ndim == 1 and record.size == 3: + pass + elif record.ndim == 2 and record.shape[1] == 3: + pass + else: + raise ValueError("Inconsistent input record. Must be an n-by-3 array.") + + def add_record(self, record): + """ + Add one or more records to the time series. + """ + record = np.array(record) if type(record) is not np.ndarray else record + self.verify_record(record) + self.data = np.vstack((self.data, record)) + + def get_date(self): + return self.data[:, 0] + + def get_value(self): + return self.data[:, 1] + + def get_uncertainty(self): + return self.data[:, 2] class DemPile(object): From c709b33700a29e3154f8afe247d6b002c24145f4 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sun, 16 Apr 2023 14:13:10 +0000 Subject: [PATCH 02/24] Optimize data structure: PixelTimeSeries --- .gitignore | 3 +- carst/libdhdt.py | 353 +++++++++++++++++++++++++---------------------- 2 files changed, 187 insertions(+), 169 deletions(-) diff --git a/.gitignore b/.gitignore index 5b1c7de..3f567ea 100644 --- a/.gitignore +++ b/.gitignore @@ -36,4 +36,5 @@ examples/dhdt/Demo_DEMs/HookerFJL_10m_dhdt* examples/dhdt/Demo_DEMs/refgeo_10m_TSpickle.p examples/dhdt/tmp.tif other_process/log_getUncertaintyDEM_grdtrack_output.xyz -Utilities/unused/PXData/* \ No newline at end of file +Utilities/unused/PXData/* +*.p diff --git a/carst/libdhdt.py b/carst/libdhdt.py index bdfb75a..ad72951 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -301,8 +301,9 @@ class PixelTimeSeries(object): Column 2: value Column 3: uncertainty """ - def __init__(self, data=np.ndarray([0, 3])): - data = np.array(data) if type(data) is not np.ndarray else data + def __init__(self, data=[]): + data = np.ndarray([0, 3]) if not data else data # [] --> np.ndarray([0, 3]) + data = np.array(data) if type(data) is not np.ndarray else data # [1, 2, 3] --> np.array([1, 2, 3]) self.verify_record(data) self.data = data @@ -341,173 +342,189 @@ def get_uncertainty(self): class DemPile(object): - """ - New class in replace of TimeSeriesDEM. It doesn't use nparray for avoiding huge memory consumption. - Instead, it uses a novel method for stacking all DEMs and saves them as a dict array (which is what - is stored in the intermediate pickle file. - """ + """ + New class in replace of TimeSeriesDEM. It doesn't use nparray for avoiding huge memory consumption. + Instead, it uses a novel method for stacking all DEMs and saves them as a dict array (which is what + is stored in the intermediate pickle file. + """ - def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None): - self.picklepath = picklepath - self.dhdtprefix = dhdtprefix - self.ts = None - self.dems = [] - self.refdate = None - if refdate is not None: - self.refdate = datetime.strptime(refdate, '%Y-%m-%d') - self.refgeo = refgeo - self.refgeomask = None - if refgeo is not None: - self.refgeomask = refgeo.ReadAsArray().astype(bool) - self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []} - self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0} - self.evmd_threshold = 6 - - def AddDEM(self, dems): - # ==== Add DEM object list ==== - if type(dems) is list: - self.dems = self.dems + dems - elif type(dems) is SingleRaster: - self.dems.append(dems) - else: - raise ValueError("This DEM type is neither a SingleRaster object nor a list of SingleRaster objects.") - - def SortByDate(self): - # ==== sort DEMs by date (ascending order) ==== - dem_dates = [i.date for i in self.dems] - dateidx = np.argsort(dem_dates) - self.dems = [self.dems[i] for i in dateidx] - - def SetRefGeo(self, refgeo): - # ==== Prepare the reference geometry ==== - if type(refgeo) is str: - self.refgeo = SingleRaster(refgeo) - elif type(refgeo) is SingleRaster: - self.refgeo = refgeo - else: - raise ValueError("This refgeo must be either a SingleRaster object or a path to a geotiff file.") - self.refgeomask = self.refgeo.ReadAsArray().astype(bool) - - def SetRefDate(self, datestr): - self.refdate = datetime.strptime(datestr, '%Y-%m-%d') - - def SetMaskParam(self, ini): - if 'max_uncertainty' in ini.settings: - self.maskparam['max_uncertainty'] = float(ini.settings['max_uncertainty']) - if 'min_time_span' in ini.settings: - self.maskparam['min_time_span'] = float(ini.settings['min_time_span']) - - def SetEVMDThreshold(self, ini): - if 'evmd_threshold' in ini.regression: - self.evmd_threshold = float(ini.regression['evmd_threshold']) - - def InitTS(self): - # ==== Prepare the reference geometry ==== - refgeo_Ysize = self.refgeo.GetRasterYSize() - refgeo_Xsize = self.refgeo.GetRasterXSize() - self.ts = [[{'date': [], 'uncertainty': [], 'value': []} for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)] - print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask))) - - def ReadConfig(self, ini): - self.picklepath = ini.result['picklefile'] - self.dhdtprefix = ini.result['dhdt_prefix'] - self.AddDEM(ini.GetDEM()) - self.SortByDate() - self.SetRefGeo(ini.refgeometry['gtiff']) - self.SetRefDate(ini.settings['refdate']) - self.SetMaskParam(ini) - self.SetEVMDThreshold(ini) - - @timeit - def PileUp(self): - # ==== Start to read every DEM and save it to our final array ==== - for i in range(len(self.dems)): - print('{}) {}'.format(i + 1, os.path.basename(self.dems[i].fpath) )) - if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']: - datedelta = self.dems[i].date - self.refdate - # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear') - znew = Resample_Array2(self.dems[i], self.refgeo) - znew_mask = np.logical_and(znew > 0, self.refgeomask) - fill_idx = np.where(znew_mask) - for m,n in zip(fill_idx[0], fill_idx[1]): - self.ts[m][n]['date'] += [datedelta.days] - self.ts[m][n]['uncertainty'] += [self.dems[i].uncertainty] - self.ts[m][n]['value'] += [znew[m, n]] - else: - print("This one won't be piled up because its uncertainty ({}) exceeds the maximum uncertainty allowed ({}).".format(self.dems[i].uncertainty, self.maskparam['max_uncertainty'])) - - def DumpPickle(self): - pickle.dump(self.ts, open(self.picklepath, "wb")) - - def LoadPickle(self): - self.ts = pickle.load( open(self.picklepath, "rb") ) - - @timeit - def Polyfit(self): - # ==== Create final array ==== - self.fitdata['slope'] = np.ones_like(self.ts, dtype=float) * -9999 - self.fitdata['slope_err'] = np.ones_like(self.ts, dtype=float) * -9999 - self.fitdata['residual'] = np.ones_like(self.ts, dtype=float) * -9999 - self.fitdata['count'] = np.ones_like(self.ts, dtype=float) * -9999 - # ==== Weighted regression ==== - print('m total: ', len(self.ts)) - for m in range(len(self.ts)): - # if m < 600: - # continue - if m % 100 == 0: - print(m) - for n in range(len(self.ts[0])): - # self.fitdata['count'][m, n] = len(self.ts[m][n]['date']) - # if len(self.ts[m][n]['date']) >= 3: - date = np.array(self.ts[m][n]['date']) - uncertainty = np.array(self.ts[m][n]['uncertainty']) - value = np.array(self.ts[m][n]['value']) - # pin_value = pin_dem_array[m ,n] - # pin_date = pin_dem_date_array[m, n] - # date, uncertainty, value = filter_by_slope(date, uncertainty, value, pin_date, pin_value) - # date, uncertainty, value = filter_by_redundancy(date, uncertainty, value) - - # slope_ref = [(value[i] - pin_value) / float(date[i] - pin_date) * 365.25 for i in range(len(value))] - # for i in reversed(range(len(slope_ref))): - # if (slope_ref[i] > dhdt_limit_upper) or (slope_ref[i] < dhdt_limit_lower): - # _ = date.pop(i) - # _ = uncertainty.pop(i) - # _ = value.pop(i) - # self.fitdata['count'][m, n] = len(date) - - # Whyjay: May 10, 2018: cancelled the min date span (date[-1] - date[0] > 0), previously > 200 - # if (len(np.unique(date)) >= 3) and (date[-1] - date[0] > 0): - if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: - slope, slope_err, residual, count = wlr_corefun(date, value, uncertainty, self.evmd_threshold) - # if residual > 100: - # print(date, value, uncertainty) - self.fitdata['slope'][m, n] = slope - self.fitdata['slope_err'][m, n] = slope_err - self.fitdata['residual'][m, n] = residual - self.fitdata['count'][m, n] = count - # else: - # self.fitdata['count'] = len(ref_dem_TS[m][n]['date']) - # elif (date[-1] - date[0] > 0): - # slope_arr[m, n] = (value[1] - value[0]) / float(date[1] - date[0]) * 365.25 - # self.fitdata['count'][~self.refgeomask] = -9999 - - def Fitdata2File(self): - # ==== Write to file ==== - dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif') - dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif') - dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif') - dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') - dhdt_dem.Array2Raster(self.fitdata['slope'], self.refgeo) - dhdt_error.Array2Raster(self.fitdata['slope_err'], self.refgeo) - dhdt_res.Array2Raster(self.fitdata['residual'], self.refgeo) - dhdt_count.Array2Raster(self.fitdata['count'], self.refgeo) - - def ShowDhdtTifs(self): - dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif') - dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif') - dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif') - dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') - return dhdt_dem, dhdt_error, dhdt_res, dhdt_count + def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None): + self.picklepath = picklepath + self.dhdtprefix = dhdtprefix + self.ts = None + self.dems = [] + self.refdate = None + if refdate is not None: + self.refdate = datetime.strptime(refdate, '%Y-%m-%d') + self.refgeo = refgeo + self.refgeomask = None + if refgeo is not None: + self.refgeomask = refgeo.ReadAsArray().astype(bool) + self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []} + self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0} + self.evmd_threshold = 6 + + def AddDEM(self, dems): + # ==== Add DEM object list ==== + if type(dems) is list: + self.dems = self.dems + dems + elif type(dems) is SingleRaster: + self.dems.append(dems) + else: + raise ValueError("This DEM type is neither a SingleRaster object nor a list of SingleRaster objects.") + + def SortByDate(self): + # ==== sort DEMs by date (ascending order) ==== + dem_dates = [i.date for i in self.dems] + dateidx = np.argsort(dem_dates) + self.dems = [self.dems[i] for i in dateidx] + + def SetRefGeo(self, refgeo): + # ==== Prepare the reference geometry ==== + if type(refgeo) is str: + self.refgeo = SingleRaster(refgeo) + elif type(refgeo) is SingleRaster: + self.refgeo = refgeo + else: + raise ValueError("This refgeo must be either a SingleRaster object or a path to a geotiff file.") + self.refgeomask = self.refgeo.ReadAsArray().astype(bool) + + def SetRefDate(self, datestr): + self.refdate = datetime.strptime(datestr, '%Y-%m-%d') + + def SetMaskParam(self, ini): + if 'max_uncertainty' in ini.settings: + self.maskparam['max_uncertainty'] = float(ini.settings['max_uncertainty']) + if 'min_time_span' in ini.settings: + self.maskparam['min_time_span'] = float(ini.settings['min_time_span']) + + def SetEVMDThreshold(self, ini): + if 'evmd_threshold' in ini.regression: + self.evmd_threshold = float(ini.regression['evmd_threshold']) + + def InitTS(self): + # ==== Prepare the reference geometry ==== + refgeo_Ysize = self.refgeo.GetRasterYSize() + refgeo_Xsize = self.refgeo.GetRasterXSize() + self.ts = np.empty([refgeo_Ysize, refgeo_Xsize], dtype=object) + # for j in range(self.ts.shape[0]): + # for i in range(self.ts.shape[1]): + # self.ts[j, i] = PixelTimeSeries() + # self.ts = [[{'date': [], 'uncertainty': [], 'value': []} for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)] + # self.ts = [[ [] for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)] + print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask))) + + def ReadConfig(self, ini): + self.picklepath = ini.result['picklefile'] + self.dhdtprefix = ini.result['dhdt_prefix'] + self.AddDEM(ini.GetDEM()) + self.SortByDate() + self.SetRefGeo(ini.refgeometry['gtiff']) + self.SetRefDate(ini.settings['refdate']) + self.SetMaskParam(ini) + self.SetEVMDThreshold(ini) + + @timeit + def PileUp(self): + # ==== Start to read every DEM and save it to our final array ==== + ts = [[ [] for n in range(self.ts.shape[1])] for m in range(self.ts.shape[0])] + for i in range(len(self.dems)): + print('{}) {}'.format(i + 1, os.path.basename(self.dems[i].fpath) )) + if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']: + datedelta = self.dems[i].date - self.refdate + # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear') + znew = Resample_Array2(self.dems[i], self.refgeo) + znew_mask = np.logical_and(znew > 0, self.refgeomask) + fill_idx = np.where(znew_mask) + for m,n in zip(fill_idx[0], fill_idx[1]): + record = [datedelta.days, znew[m, n], self.dems[i].uncertainty] + # self.ts[m, n].add_record(record) + ts[m][n] += [record] + # self.ts[m][n]['date'] += [datedelta.days] + # self.ts[m][n]['uncertainty'] += [self.dems[i].uncertainty] + # self.ts[m][n]['value'] += [znew[m, n]] + else: + print("This one won't be piled up because its uncertainty ({}) exceeds the maximum uncertainty allowed ({})." + .format(self.dems[i].uncertainty, self.maskparam['max_uncertainty'])) + + # After the content of ts is all populated, we move the data to self.ts as an array of PixelTimeSeries. + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + self.ts[m, n] = PixelTimeSeries(ts[m][n]) + + def DumpPickle(self): + pickle.dump(self.ts, open(self.picklepath, "wb")) + + def LoadPickle(self): + self.ts = pickle.load( open(self.picklepath, "rb") ) + + @timeit + def Polyfit(self): + # ==== Create final array ==== + self.fitdata['slope'] = np.ones_like(self.ts, dtype=float) * -9999 + self.fitdata['slope_err'] = np.ones_like(self.ts, dtype=float) * -9999 + self.fitdata['residual'] = np.ones_like(self.ts, dtype=float) * -9999 + self.fitdata['count'] = np.ones_like(self.ts, dtype=float) * -9999 + # ==== Weighted regression ==== + print('m total: ', len(self.ts)) + for m in range(len(self.ts)): + if m % 100 == 0: + print(m) + for n in range(len(self.ts[0])): + # self.fitdata['count'][m, n] = len(self.ts[m][n]['date']) + # if len(self.ts[m][n]['date']) >= 3: + date = self.ts[m, n].get_date() + # date = np.array(self.ts[m][n]['date']) + uncertainty = self.ts[m, n].get_uncertainty() + # uncertainty = np.array(self.ts[m][n]['uncertainty']) + value = self.ts[m, n].get_value() + # value = np.array(self.ts[m][n]['value']) + # pin_value = pin_dem_array[m ,n] + # pin_date = pin_dem_date_array[m, n] + # date, uncertainty, value = filter_by_slope(date, uncertainty, value, pin_date, pin_value) + # date, uncertainty, value = filter_by_redundancy(date, uncertainty, value) + + # slope_ref = [(value[i] - pin_value) / float(date[i] - pin_date) * 365.25 for i in range(len(value))] + # for i in reversed(range(len(slope_ref))): + # if (slope_ref[i] > dhdt_limit_upper) or (slope_ref[i] < dhdt_limit_lower): + # _ = date.pop(i) + # _ = uncertainty.pop(i) + # _ = value.pop(i) + # self.fitdata['count'][m, n] = len(date) + + # Whyjay: May 10, 2018: cancelled the min date span (date[-1] - date[0] > 0), previously > 200 + # if (len(np.unique(date)) >= 3) and (date[-1] - date[0] > 0): + if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: + slope, slope_err, residual, count = wlr_corefun(date, value, uncertainty, self.evmd_threshold) + # if residual > 100: + # print(date, value, uncertainty) + self.fitdata['slope'][m, n] = slope + self.fitdata['slope_err'][m, n] = slope_err + self.fitdata['residual'][m, n] = residual + self.fitdata['count'][m, n] = count + # else: + # self.fitdata['count'] = len(ref_dem_TS[m][n]['date']) + # elif (date[-1] - date[0] > 0): + # slope_arr[m, n] = (value[1] - value[0]) / float(date[1] - date[0]) * 365.25 + # self.fitdata['count'][~self.refgeomask] = -9999 + + def Fitdata2File(self): + # ==== Write to file ==== + dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif') + dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif') + dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif') + dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') + dhdt_dem.Array2Raster(self.fitdata['slope'], self.refgeo) + dhdt_error.Array2Raster(self.fitdata['slope_err'], self.refgeo) + dhdt_res.Array2Raster(self.fitdata['residual'], self.refgeo) + dhdt_count.Array2Raster(self.fitdata['count'], self.refgeo) + + def ShowDhdtTifs(self): + dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif') + dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif') + dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif') + dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') + return dhdt_dem, dhdt_error, dhdt_res, dhdt_count From 0ae6ca3d3c9695c7a87edc0f522c9d18884d0aec Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sun, 16 Apr 2023 16:34:42 +0000 Subject: [PATCH 03/24] New viz and ConfParam ini-zation --- bin/dhdt.py | 55 +++++++++++++------------ carst/libdhdt.py | 102 +++++++++++++++++++++++++++++------------------ 2 files changed, 93 insertions(+), 64 deletions(-) diff --git a/bin/dhdt.py b/bin/dhdt.py index d8b9599..9d0e5a3 100755 --- a/bin/dhdt.py +++ b/bin/dhdt.py @@ -30,46 +30,49 @@ # ==== Read ini file ==== inipath = args.config_file -ini = ConfParams(inipath) -ini.ReadParam() -ini.VerifyParam() +# ini = ConfParams(inipath) +# ini.ReadParam() +# ini.VerifyParam() # ==== Create a DemPile object and load the config file into the object ==== a = DemPile() -a.ReadConfig(ini) +# a.ReadConfig(ini) +a.ReadConfig(inipath) # ==== Run main processes ==== if args.step is None: - a.InitTS() - a.PileUp() - a.DumpPickle() - a.Polyfit() - a.Fitdata2File() + a.InitTS() + a.PileUp() + a.DumpPickle() + a.Polyfit() + a.Fitdata2File() elif args.step == 'stack': - a.InitTS() - a.PileUp() - a.DumpPickle() + a.InitTS() + a.PileUp() + a.DumpPickle() elif args.step == 'dhdt': - a.LoadPickle() - a.Polyfit() - a.Fitdata2File() + a.LoadPickle() + a.Polyfit() + a.Fitdata2File() elif args.step == 'viewts': - a.LoadPickle() - data = a.ts - dhdt_raster, _, _, _ = a.ShowDhdtTifs() - img = dhdt_raster.ReadAsArray() + a.LoadPickle() + a.viz() + plt.show() +# data = a.ts +# dhdt_raster, _, _, _ = a.ShowDhdtTifs() +# img = dhdt_raster.ReadAsArray() + +# fig, ax = plt.subplots() +# img[img < -9000] = np.nan +# ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6) +# onclick = onclick_wrapper(data, fig, ax) - fig, ax = plt.subplots() - img[img < -9000] = np.nan - ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6) - onclick = onclick_wrapper(data, fig, ax) +# cid = fig.canvas.mpl_connect('button_press_event', onclick) - cid = fig.canvas.mpl_connect('button_press_event', onclick) - plt.show() else: - print('Wrong Way!') + print('Wrong Way!') # ==== Codes for test ==== diff --git a/carst/libdhdt.py b/carst/libdhdt.py index ad72951..f59397f 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -2,22 +2,23 @@ # Func: Resample_Array(UtilRaster.SingleRaster, UtilRaster.SingleRaster) # used for dhdt # by Whyjay Zheng, Jul 27 2016 -# last edit: Jun 22 2017 +# last edit: Apr 17 2023 import numpy as np from numpy.linalg import inv import os import sys try: - import gdal + import gdal except: - from osgeo import gdal # sometimes gdal is part of osgeo modules + from osgeo import gdal # sometimes gdal is part of osgeo modules from datetime import datetime from shapely.geometry import Polygon from scipy.interpolate import interp2d from scipy.interpolate import NearestNDInterpolator from scipy.interpolate import griddata import gc +from carst import ConfParams from carst.libraster import SingleRaster import pickle import matplotlib.pyplot as plt @@ -257,41 +258,10 @@ def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): # residual = np.sum((np.polyval(p, x[:-2]) - y[:-2]) ** 2) # return slope, slope_err, residual -def onclick_wrapper(data, fig, ax): - def onclick(event): - print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' % - ('double' if event.dblclick else 'single', event.button, - event.x, event.y, event.xdata, event.ydata)) - col = int(event.xdata) - row = int(event.ydata) - xx = np.array(data[row][col]['date']) - yy = np.array(data[row][col]['value']) - ye = np.array(data[row][col]['uncertainty']) - slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=80, detailed=True) - SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2) - SSRes = np.sum((y_good - y_goodest) ** 2) - SST = np.sum((y_good - np.mean(y_good)) ** 2) - Rsquared = 1 - SSRes / SST - ax.plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green') - fig.canvas.draw() - figi = plt.figure() - axi = plt.gca() - axi.errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko') - np.set_printoptions(precision=3) - np.set_printoptions(suppress=True) - xye = np.vstack((xx,yy,ye)).T - print(xye[xye[:,1].argsort()]) - # print(xx) - # print(yy) - # print(ye) - axi.plot(x_good, y_goodest, color='g', linewidth=2, zorder=20) - axi.plot(x_good, y_good, '.', color='r', markersize=8, zorder=30) - axi.text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes) - axi.set_xlabel('days, from 2015-01-01') - axi.set_ylabel('height (m)') - # plt.plot(xx, yy) - figi.show() - return onclick + + + + class PixelTimeSeries(object): """ @@ -415,6 +385,10 @@ def InitTS(self): print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask))) def ReadConfig(self, ini): + if type(ini) is str: + ini = ConfParams(ini) + ini.ReadParam() + ini.VerifyParam() self.picklepath = ini.result['picklefile'] self.dhdtprefix = ini.result['dhdt_prefix'] self.AddDEM(ini.GetDEM()) @@ -525,6 +499,58 @@ def ShowDhdtTifs(self): dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif') dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') return dhdt_dem, dhdt_error, dhdt_res, dhdt_count + + def viz(self): + dhdt_raster, _, _, _ = self.ShowDhdtTifs() + img = dhdt_raster.ReadAsArray() + img[img < -9000] = np.nan # might need to change + + fig, axs = plt.subplots(2, 1, figsize=(8,8)) # figsize might need to change + first_img = axs[0].imshow(img, cmap='RdBu', vmin=-6, vmax=6) # minmax might need to change + + onclick = onclick_wrapper(self.ts, axs) + cid = fig.canvas.mpl_connect('button_press_event', onclick) + +def onclick_wrapper(data, axs): + def onclick_ipynb(event): + """ + Callback function for mouse click + """ + print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' % + ('double' if event.dblclick else 'single', event.button, + event.x, event.y, event.xdata, event.ydata)) + col = int(event.xdata) + row = int(event.ydata) + xx = data[row, col].get_date() + yy = data[row, col].get_value() + ye = data[row, col].get_uncertainty() + slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=80, detailed=True) + SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2) + SSRes = np.sum((y_good - y_goodest) ** 2) + SST = np.sum((y_good - np.mean(y_good)) ** 2) + Rsquared = 1 - SSRes / SST + axs[0].plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green') + axs[1].cla() + axs[1].errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko') + np.set_printoptions(precision=3) + np.set_printoptions(suppress=True) + xye = np.vstack((xx,yy,ye)).T + print(xye[xye[:,1].argsort()]) + axs[1].plot(x_good, y_goodest, color='g', linewidth=2, zorder=20) + axs[1].plot(x_good, y_good, '.', color='r', markersize=8, zorder=30) + axs[1].text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes) + axs[1].set_xlabel('days, from xxxx-01-01') + axs[1].set_ylabel('height (m)') + return onclick_ipynb + + +# fig, ax = plt.subplots() +# img[img < -9000] = np.nan +# ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6) +# onclick = onclick_wrapper(data, fig, ax) + +# cid = fig.canvas.mpl_connect('button_press_event', onclick) +# cid = fig.canvas.mpl_connect('button_press_event', onclick2) From 2409a88fc7d1d4a44faae986ad59e9ed6d01c7ce Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Mon, 17 Apr 2023 13:49:04 +0000 Subject: [PATCH 04/24] Add mosaic function; update live check (onclick) --- carst/libdhdt.py | 83 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index f59397f..20d3d8c 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -118,20 +118,22 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0 def EVMD(y, threshold=6): - validated_value = None - if y.size <= 1: - exitstate = y.size - else: - exitstate = 1 - for i in range(y.size - 1): - comparison = y[i+1:] - y[i] - if any(abs(comparison) < threshold): - validated_value = y[i] - exitstate = -10 - break - else: - exitstate += 1 - return exitstate, validated_value + validated_value = None + validated_value_idx = None + if y.size <= 1: + exitstate = y.size + else: + exitstate = 1 + for i in range(y.size - 1): + comparison = y[i+1:] - y[i] + if any(abs(comparison) < threshold): + validated_value_idx = i + validated_value = y[i] + exitstate = -10 + break + else: + exitstate += 1 + return exitstate, validated_value, validated_value_idx def EVMD_idx(y, validated_value, threshold=6): validated_range = [validated_value - threshold, validated_value + threshold] @@ -145,7 +147,7 @@ def EVMD_idx(y, validated_value, threshold=6): def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): # wlr = weighted linear regression. - exitstate, validated_value = EVMD(y, threshold=evmd_threshold) + exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) if exitstate >= 0 or (max(x) - min(x)) < 1: slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size return slope, slope_err, resid, count @@ -331,6 +333,7 @@ def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None): if refgeo is not None: self.refgeomask = refgeo.ReadAsArray().astype(bool) self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []} + self.mosaic = {'value': [], 'date': [], 'uncertainty': []} self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0} self.evmd_threshold = 6 @@ -500,6 +503,41 @@ def ShowDhdtTifs(self): dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') return dhdt_dem, dhdt_error, dhdt_res, dhdt_count + def form_mosaic(self, order='ascending'): + """ + order options: + ascending: early elevations will be populated first + descending: late elevations will be populated first + """ + # ==== Create mosaicked array ==== + self.mosaic['value'] = np.full_like(self.ts, np.nan, dtype=float) + self.mosaic['date'] = np.full_like(self.ts, np.nan, dtype=float) + self.mosaic['uncertainty'] = np.full_like(self.ts, np.nan, dtype=float) + for m in range(self.ts.shape[0]): + if m % 100 == 0: + print(m) + for n in range(self.ts.shape[1]): + date = self.ts[m, n].get_date() + uncertainty = self.ts[m, n].get_uncertainty() + value = self.ts[m, n].get_value() + if order == 'descending': + date = np.flip(date) + uncertainty = np.flip(uncertainty) + value = np.flip(value) + elif order != 'ascending': + raise ValueError('order must be "ascending" or "descending".') + exitstate, validated_value, validated_value_idx = EVMD(value, threshold=self.evmd_threshold) + if exitstate < 0: + self.mosaic['value'][m, n] = validated_value + self.mosaic['date'][m, n] = date[validated_value_idx] + self.mosaic['uncertainty'][m, n] = uncertainty[validated_value_idx] + mosaic_value = SingleRaster('{}_mosaic-{}_value.tif'.format(self.dhdtprefix, order)) + mosaic_date = SingleRaster('{}_mosaic-{}_date.tif'.format(self.dhdtprefix, order)) + mosaic_uncertainty = SingleRaster('{}_mosaic-{}_uncertainty.tif'.format(self.dhdtprefix, order)) + mosaic_value.Array2Raster(self.mosaic['value'], self.refgeo) + mosaic_date.Array2Raster(self.mosaic['date'], self.refgeo) + mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo) + def viz(self): dhdt_raster, _, _, _ = self.ShowDhdtTifs() img = dhdt_raster.ReadAsArray() @@ -508,10 +546,10 @@ def viz(self): fig, axs = plt.subplots(2, 1, figsize=(8,8)) # figsize might need to change first_img = axs[0].imshow(img, cmap='RdBu', vmin=-6, vmax=6) # minmax might need to change - onclick = onclick_wrapper(self.ts, axs) + onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold) cid = fig.canvas.mpl_connect('button_press_event', onclick) -def onclick_wrapper(data, axs): +def onclick_wrapper(data, axs, evmd_threshold): def onclick_ipynb(event): """ Callback function for mouse click @@ -524,7 +562,7 @@ def onclick_ipynb(event): xx = data[row, col].get_date() yy = data[row, col].get_value() ye = data[row, col].get_uncertainty() - slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=80, detailed=True) + slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True) SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2) SSRes = np.sum((y_good - y_goodest) ** 2) SST = np.sum((y_good - np.mean(y_good)) ** 2) @@ -544,15 +582,6 @@ def onclick_ipynb(event): return onclick_ipynb -# fig, ax = plt.subplots() -# img[img < -9000] = np.nan -# ax.imshow(img, cmap='RdBu', vmin=-6, vmax=6) -# onclick = onclick_wrapper(data, fig, ax) - -# cid = fig.canvas.mpl_connect('button_press_event', onclick) -# cid = fig.canvas.mpl_connect('button_press_event', onclick2) - - class TimeSeriesDEM(np.ndarray): From 36c62655a7de2b64109eaef61bf41c350616f4c6 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Wed, 19 Apr 2023 04:06:48 +0000 Subject: [PATCH 05/24] Convert tabs to spaces: EVMD_idx and wlr_corefun --- carst/libdhdt.py | 104 ++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 20d3d8c..d3dbe61 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -136,59 +136,61 @@ def EVMD(y, threshold=6): return exitstate, validated_value, validated_value_idx def EVMD_idx(y, validated_value, threshold=6): - validated_range = [validated_value - threshold, validated_value + threshold] - idx = np.zeros_like(y, dtype=bool) - for i in range(y.size): - if validated_range[0] <= y[i] <= validated_range[1]: - idx[i] = True - tmp = np.append(validated_range, [y[i] - threshold, y[i] + threshold]) - validated_range = [min(tmp), max(tmp)] - return idx + validated_range = [validated_value - threshold, validated_value + threshold] + idx = np.zeros_like(y, dtype=bool) + for i in range(y.size): + if validated_range[0] <= y[i] <= validated_range[1]: + idx[i] = True + tmp = np.append(validated_range, [y[i] - threshold, y[i] + threshold]) + validated_range = [min(tmp), max(tmp)] + return idx def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): - # wlr = weighted linear regression. - exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) - if exitstate >= 0 or (max(x) - min(x)) < 1: - slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size - return slope, slope_err, resid, count - else: - # if x.size == 3: - # print(x, y, ye) - idx = EVMD_idx(y, validated_value, threshold=evmd_threshold) - if sum(idx) >= 3: - x = x[idx] - y = y[idx] - ye = ye[idx] - w = [1 / k for k in ye] - G = np.vstack([x, np.ones(x.size)]).T # defines the model (y = a + bx) - W = np.diag(w) - Gw = W @ G - yw = W @ y.T # assuming y is a 1-by-N array - cov_m = inv(Gw.T @ Gw) # covariance matrix - p = inv(Gw.T @ Gw) @ Gw.T @ yw # model coefficients - H = G @ inv(Gw.T @ Gw) @ Gw.T @ W # projection matrix - h = np.diag(H) # leverage - y_est = np.polyval(p, x) # the estimate of y - ri2 = (y - y_est) ** 2 - resid = np.sum(ri2) # sum of squared error - error = np.sqrt(cov_m[0, 0]) - p_num = 2 # how many coefficients we want (in this case, 2 comes from y = a + bx) - mse = resid / (x.size - p_num) # mean squared error - slope = p[0] * 365.25 - slope_err = np.sqrt(cov_m[0, 0]) * 365.25 - count = x.size - # if resid > 100000: - # print(x,y,ye,cookd,goodpoint) - if detailed: - return slope, slope_err, resid, count, x, y, y_est - else: - return slope, slope_err, resid, count - else: - slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size - if detailed: - return slope, slope_err, resid, count, x, y, y - else: - return slope, slope_err, resid, count + # wlr = weighted linear regression. + exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) + if exitstate >= 0 or (max(x) - min(x)) < 1: + slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size + return slope, slope_err, resid, count + else: + # if x.size == 3: + # print(x, y, ye) + idx = EVMD_idx(y, validated_value, threshold=evmd_threshold) + if sum(idx) >= 3: + x = x[idx] + y = y[idx] + ye = ye[idx] + w = [1 / k for k in ye] + G = np.vstack([x, np.ones(x.size)]).T # defines the model (y = a + bx) + W = np.diag(w) + Gw = W @ G + yw = W @ y.T # assuming y is a 1-by-N array + cov_m = inv(Gw.T @ Gw) # covariance matrix + p = inv(Gw.T @ Gw) @ Gw.T @ yw # model coefficients + H = G @ inv(Gw.T @ Gw) @ Gw.T @ W # projection matrix + h = np.diag(H) # leverage + y_est = np.polyval(p, x) # the estimate of y + ri2 = (y - y_est) ** 2 + resid = np.sum(ri2) # sum of squared error + error = np.sqrt(cov_m[0, 0]) + p_num = 2 # how many coefficients we want (in this case, 2 comes from y = a + bx) + mse = resid / (x.size - p_num) # mean squared error + slope = p[0] * 365.25 + slope_err = np.sqrt(cov_m[0, 0]) * 365.25 + count = x.size + # if resid > 100000: + # print(x,y,ye,cookd,goodpoint) + if detailed: + return slope, slope_err, resid, count, x, y, y_est + else: + return slope, slope_err, resid, count + else: + slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size + if detailed: + x = x[idx] + y = y[idx] + return slope, slope_err, resid, count, x, y, y + else: + return slope, slope_err, resid, count # ============ Using Cook's Distance ============ From f53c8d4dc90d08989bc6d052dca196b786581f59 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Wed, 19 Apr 2023 14:44:32 +0000 Subject: [PATCH 06/24] Test DBSCAN as EVMD --- carst/libdhdt.py | 172 ++++++++++++++++++++++++++++++++--------------- requirements.txt | 1 + 2 files changed, 120 insertions(+), 53 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index d3dbe61..ed5bb22 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -22,6 +22,7 @@ from carst.libraster import SingleRaster import pickle import matplotlib.pyplot as plt +from sklearn.cluster import DBSCAN def timeit(func): def time_wrapper(*args, **kwargs): @@ -117,22 +118,66 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): else: return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0 -def EVMD(y, threshold=6): - validated_value = None - validated_value_idx = None - if y.size <= 1: - exitstate = y.size + +def EVMD_DBSCAN(x, y, eps=6, min_samples=4): + """ + See EVMD & EVMD_idx. + """ + # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration. + x2d = np.column_stack((x / 365, y)) + if x2d.shape[0] >= 2: + db = DBSCAN(eps=eps, min_samples=min_samples).fit(x2d) + verified_y_labels = db.labels_ >= 0 + # verified_y_labels_idx = np.where(verified_y_labels) + if any(verified_y_labels): + exitstate = 1 + else: + exitstate = -1 else: - exitstate = 1 - for i in range(y.size - 1): - comparison = y[i+1:] - y[i] - if any(abs(comparison) < threshold): - validated_value_idx = i - validated_value = y[i] - exitstate = -10 - break - else: - exitstate += 1 + exitstate = -1 + verified_y_labels = np.full_like(y, False) + return exitstate, verified_y_labels + + +def EVMD(y, x=None, threshold=6, method='legacy'): + """ + Elevation Verification from Multiple DEMs. + method: 'DBSCAN' or 'legacy' + """ + if method == 'DBSCAN': + # x, assuming the temporal axis (unit=days), is scaled by 100 for proper eps consideration. + x2d = np.coulmn_stack((x / 100, y)) + # min samples is fixed at 4 (i.e., four DEMs together can be considered as a cluster!) + min_samples = 3 + db = DBSCAN(eps=threshold, min_samples=min_samples).fit(x2d) + verified_y_labels = db.labels_ >= 0 + verified_y_labels_idx = np.where(verified_y_labels) + if any(verified_y_labels): + exitstate = -10 + validated_value = y[verified_y_labels_idx[0]] + validated_value_idx = verified_y_labels_idx[0] + else: + exitstate = 1 + validated_value = None + validated_value_idx = None + elif method == 'legacy': + validated_value = None + validated_value_idx = None + if y.size <= 1: + exitstate = y.size + else: + exitstate = 1 + for i in range(y.size - 1): + comparison = y[i+1:] - y[i] + if any(abs(comparison) < threshold): + validated_value_idx = i + validated_value = y[i] + exitstate = -10 + break + else: + exitstate += 1 + else: + raise ValueError('method must be "DBSCAN" or "legacy".') return exitstate, validated_value, validated_value_idx def EVMD_idx(y, validated_value, threshold=6): @@ -147,14 +192,16 @@ def EVMD_idx(y, validated_value, threshold=6): def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): # wlr = weighted linear regression. - exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) - if exitstate >= 0 or (max(x) - min(x)) < 1: + # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) + exitstate, idx = EVMD_DBSCAN(x, y, eps=evmd_threshold) + # if exitstate >= 0 or (max(x) - min(x)) < 1: + if exitstate < 0 or (max(x) - min(x)) < 1: slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size return slope, slope_err, resid, count else: # if x.size == 3: # print(x, y, ye) - idx = EVMD_idx(y, validated_value, threshold=evmd_threshold) + # idx = EVMD_idx(y, validated_value, threshold=evmd_threshold) if sum(idx) >= 3: x = x[idx] y = y[idx] @@ -505,11 +552,13 @@ def ShowDhdtTifs(self): dhdt_count = SingleRaster(self.dhdtprefix + '_dhdt_count.tif') return dhdt_dem, dhdt_error, dhdt_res, dhdt_count - def form_mosaic(self, order='ascending'): + @timeit + def form_mosaic(self, order='ascending', method='DBSCAN'): """ order options: ascending: early elevations will be populated first descending: late elevations will be populated first + method: 'DBSCAN' or 'legacy' """ # ==== Create mosaicked array ==== self.mosaic['value'] = np.full_like(self.ts, np.nan, dtype=float) @@ -522,17 +571,33 @@ def form_mosaic(self, order='ascending'): date = self.ts[m, n].get_date() uncertainty = self.ts[m, n].get_uncertainty() value = self.ts[m, n].get_value() - if order == 'descending': - date = np.flip(date) - uncertainty = np.flip(uncertainty) - value = np.flip(value) - elif order != 'ascending': - raise ValueError('order must be "ascending" or "descending".') - exitstate, validated_value, validated_value_idx = EVMD(value, threshold=self.evmd_threshold) - if exitstate < 0: - self.mosaic['value'][m, n] = validated_value - self.mosaic['date'][m, n] = date[validated_value_idx] - self.mosaic['uncertainty'][m, n] = uncertainty[validated_value_idx] + if method == 'DBSCAN': + exitstate, verified_y_labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) + if exitstate >= 0: + verified_y_labels_idx = np.where(verified_y_labels)[0] + if order == 'descending': + idx = verified_y_labels_idx[-1] + elif order == 'ascending': + idx = verified_y_labels_idx[0] + else: + raise ValueError('order must be "ascending" or "descending".') + self.mosaic['value'][m, n] = value[idx] + self.mosaic['date'][m, n] = date[idx] + self.mosaic['uncertainty'][m, n] = uncertainty[idx] + elif method == 'legacy': + if order == 'descending': + date = np.flip(date) + uncertainty = np.flip(uncertainty) + value = np.flip(value) + elif order != 'ascending': + raise ValueError('order must be "ascending" or "descending".') + exitstate, validated_value, validated_value_idx = EVMD(value, threshold=self.evmd_threshold) + if exitstate < 0: + self.mosaic['value'][m, n] = validated_value + self.mosaic['date'][m, n] = date[validated_value_idx] + self.mosaic['uncertainty'][m, n] = uncertainty[validated_value_idx] + else: + raise ValueError('method must be "DBSCAN" or "legacy".') mosaic_value = SingleRaster('{}_mosaic-{}_value.tif'.format(self.dhdtprefix, order)) mosaic_date = SingleRaster('{}_mosaic-{}_date.tif'.format(self.dhdtprefix, order)) mosaic_uncertainty = SingleRaster('{}_mosaic-{}_uncertainty.tif'.format(self.dhdtprefix, order)) @@ -558,29 +623,30 @@ def onclick_ipynb(event): """ print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' % ('double' if event.dblclick else 'single', event.button, - event.x, event.y, event.xdata, event.ydata)) - col = int(event.xdata) - row = int(event.ydata) - xx = data[row, col].get_date() - yy = data[row, col].get_value() - ye = data[row, col].get_uncertainty() - slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True) - SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2) - SSRes = np.sum((y_good - y_goodest) ** 2) - SST = np.sum((y_good - np.mean(y_good)) ** 2) - Rsquared = 1 - SSRes / SST - axs[0].plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green') - axs[1].cla() - axs[1].errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko') - np.set_printoptions(precision=3) - np.set_printoptions(suppress=True) - xye = np.vstack((xx,yy,ye)).T - print(xye[xye[:,1].argsort()]) - axs[1].plot(x_good, y_goodest, color='g', linewidth=2, zorder=20) - axs[1].plot(x_good, y_good, '.', color='r', markersize=8, zorder=30) - axs[1].text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes) - axs[1].set_xlabel('days, from xxxx-01-01') - axs[1].set_ylabel('height (m)') + event.x, event.y, event.xdata, event.ydata)) # only works for non-Notebook backend? + if event.inaxes is axs[0]: + col = int(event.xdata) + row = int(event.ydata) + xx = data[row, col].get_date() + yy = data[row, col].get_value() + ye = data[row, col].get_uncertainty() + slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True) + SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2) + SSRes = np.sum((y_good - y_goodest) ** 2) + SST = np.sum((y_good - np.mean(y_good)) ** 2) + Rsquared = 1 - SSRes / SST + axs[0].plot(event.xdata, event.ydata, '.', markersize=10, markeredgewidth=1, markeredgecolor='k', color='xkcd:green') + axs[1].cla() + axs[1].errorbar(xx, yy, yerr=ye * 2, linewidth=2, fmt='ko') + np.set_printoptions(precision=3) + np.set_printoptions(suppress=True) + xye = np.vstack((xx,yy,ye)).T + print(xye[xye[:,1].argsort()]) + axs[1].plot(x_good, y_goodest, color='g', linewidth=2, zorder=20) + axs[1].plot(x_good, y_good, '.', color='r', markersize=8, zorder=30) + # axs[1].text(0.1, 0.1, 'R^2 = {:.4f}'.format(Rsquared), transform=ax.transAxes) + axs[1].set_xlabel('data[{}, {}] (days, from xxxx-01-01)'.format(row, col)) + axs[1].set_ylabel('height (m)') return onclick_ipynb diff --git a/requirements.txt b/requirements.txt index aa7eaef..61a89a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ rasterio geopandas matplotlib scikit-image +scikit-learn \ No newline at end of file From fd308eed788faeefd691e167a32059b80fb6994b Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Fri, 21 Apr 2023 06:14:45 +0000 Subject: [PATCH 07/24] Dealing with the removed DODS driver --- carst/libdhdt.py | 24 ++++++++++++------------ carst/libraster.py | 14 ++++++++++---- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index ed5bb22..38439c9 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -37,20 +37,20 @@ def time_wrapper(*args, **kwargs): def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'): - """ - resample orig_dem using the extent and spacing provided by resamp_ref_dem - orig_dem: class UtilRaster.SingleRaster object - resamp_ref_dem: class UtilRaster.SingleRaster object - returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster + """ + resample orig_dem using the extent and spacing provided by resamp_ref_dem + orig_dem: class UtilRaster.SingleRaster object + resamp_ref_dem: class UtilRaster.SingleRaster object + returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster - This one uses gdal.Warp API. - """ + This one uses gdal.Warp API. + """ - ds = gdal.Open(orig_dem.fpath) - ulx, uly, lrx, lry = resamp_ref_dem.GetExtent() - opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.GetXRes(), yRes=resamp_ref_dem.GetYRes(), resampleAlg=resampleAlg) - out_ds = gdal.Warp('tmp.tif', ds, options=opts) - return out_ds.GetRasterBand(1).ReadAsArray() + ds = gdal.Open(orig_dem.fpath) + ulx, uly, lrx, lry = resamp_ref_dem.GetExtent() + opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.GetXRes(), yRes=resamp_ref_dem.GetYRes(), resampleAlg=resampleAlg) + out_ds = gdal.Warp('tmp.tif', ds, options=opts) + return out_ds.GetRasterBand(1).ReadAsArray() def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): diff --git a/carst/libraster.py b/carst/libraster.py index a2eb824..2ff0c4b 100644 --- a/carst/libraster.py +++ b/carst/libraster.py @@ -13,11 +13,13 @@ ##### Make URL file to work: deactive the DODS driver # https://github.com/mapbox/rasterio/issues/1000 # https://trac.osgeo.org/gdal/ticket/2696 +# Now using the first method since DODS driver was removed in gdal 3.5. +# https://github.com/OSGeo/gdal/issues/5173 ### method #1 # gdal.GetDriverByName('DODS').Deregister() ### method #2 -os.environ["GDAL_SKIP"] = 'DODS' +# os.environ["GDAL_SKIP"] = 'DODS' ##################################################### @@ -26,12 +28,16 @@ import numpy as np from datetime import datetime try: - import gdal -except: - from osgeo import gdal # sometimes gdal is part of osgeo modules + from osgeo import gdal +except ImportError: + import gdal # this was used until GDAL 3.1 from osgeo import osr # we assume the fpath is the file with .tif or .TIF suffix. +try: + gdal.GetDriverByName('DODS').Deregister() +except AttributeError: # In Case that DODS driver does not exist (for GDAL >= 3.5) + pass def timeit(func): def time_wrapper(*args, **kwargs): From 40aad9ab4299599fc3c70bb84abb036363038556 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Fri, 21 Apr 2023 11:23:12 +0000 Subject: [PATCH 08/24] Update resample_array for COG efficiency --- carst/libdhdt.py | 67 ++- carst/libraster.py | 990 +++++++++++++++++++++++---------------------- 2 files changed, 553 insertions(+), 504 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 38439c9..47685dd 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -35,6 +35,35 @@ def time_wrapper(*args, **kwargs): return dec_func return time_wrapper +def resample_array(source, reference, method='bilinear', destination=None): + """ + resample the source raster using the extent and spacing provided by the reference raster. Two rasters must be in the same CRS. + + source: class UtilRaster.SingleRaster object + reference: class UtilRaster.SingleRaster object + destination: str, filename for the output to be written. If None, a temporary file (/vismem/) will be used. + + returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster. + + For better COG processing efficiency, we rasterio to check the if the source and the reference intersect each other. + For fasting resampling, we use GDAL and it C library. + """ + + s_ulx, s_uly, s_lrx, s_lry = source.get_extent() + source_extent = Polygon([(s_ulx, s_uly), (s_lrx, s_uly), (s_lrx, s_lry), (s_ulx, s_lry)]) + ulx, uly, lrx, lry = reference.get_extent() + reference_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)]) + if source_extent.intersects(reference_extent): + ds = gdal.Open(source.fpath) + opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=reference.get_x_res(), yRes=reference.get_y_res(), resampleAlg=method) + if not destination: + out_ds = gdal.Warp('/vsimem/resampled.tif', ds, options=opts) + else: + out_ds = gdal.Warp(destination, ds, options=opts) + return out_ds.GetRasterBand(1).ReadAsArray() + else: + return np.full((reference.get_y_size(), reference.get_x_size()), reference.get_nodata()) + def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'): """ @@ -47,8 +76,8 @@ def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'): """ ds = gdal.Open(orig_dem.fpath) - ulx, uly, lrx, lry = resamp_ref_dem.GetExtent() - opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.GetXRes(), yRes=resamp_ref_dem.GetYRes(), resampleAlg=resampleAlg) + ulx, uly, lrx, lry = resamp_ref_dem.get_extent() + opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.get_x_res(), yRes=resamp_ref_dem.get_y_res(), resampleAlg=resampleAlg) out_ds = gdal.Warp('tmp.tif', ds, options=opts) return out_ds.GetRasterBand(1).ReadAsArray() @@ -67,33 +96,33 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): resamp_method: 'linear', 'cubic', 'quintic', 'nearest' """ - o_ulx, o_uly, o_lrx, o_lry = orig_dem.GetExtent() + o_ulx, o_uly, o_lrx, o_lry = orig_dem.get_extent() o_ulx, o_xres, o_xskew, o_uly, o_yskew, o_yres = orig_dem.GetGeoTransform() orig_dem_extent = Polygon([(o_ulx, o_uly), (o_lrx, o_uly), (o_lrx, o_lry), (o_ulx, o_lry)]) - ulx, uly, lrx, lry = resamp_ref_dem.GetExtent() + ulx, uly, lrx, lry = resamp_ref_dem.get_extent() ulx, xres, xskew, uly, yskew, yres = resamp_ref_dem.GetGeoTransform() resamp_ref_dem_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)]) if orig_dem_extent.intersects(resamp_ref_dem_extent): - x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.GetRasterXSize()) - y = np.linspace(o_uly, o_lry - o_yres, orig_dem.GetRasterYSize()) + x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.get_x_size()) + y = np.linspace(o_uly, o_lry - o_yres, orig_dem.get_y_size()) z = orig_dem.ReadAsArray() - # z[z == orig_dem.GetNoDataValue()] = np.nan # this line probably needs improvement + # z[z == orig_dem.get_nodata()] = np.nan # this line probably needs improvement if resamp_method == 'nearest': print('resampling method = nearest') xx, yy = np.meshgrid(x, y) points = np.stack((np.reshape(xx, xx.size), np.reshape(yy, yy.size)), axis=-1) values = np.reshape(z, z.size) fun = NearestNDInterpolator(points, values) - xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.GetRasterXSize()) - ynew = np.linspace(uly, lry - yres, resamp_ref_dem.GetRasterYSize()) + xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) + ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) xxnew, yynew = np.meshgrid(xnew, ynew) znew = fun(xxnew, yynew) # if the image is big, this may take a long time (much longer than linear approach) elif resamp_method == 'linear': - nan_value = orig_dem.GetNoDataValue() + nan_value = orig_dem.get_nodata() print('resampling method = interp2d - ' + resamp_method) fun = interp2d(x, y, z, kind=resamp_method, bounds_error=False, copy=False, fill_value=nan_value) - xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.GetRasterXSize()) - ynew = np.linspace(uly, lry - yres, resamp_ref_dem.GetRasterYSize()) + xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) + ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) znew = np.flipud(fun(xnew, ynew)) # I don't know why, but it seems f(xnew, ynew) is upside down. fun2 = interp2d(x, y, z != nan_value, kind=resamp_method, bounds_error=False, copy=False, fill_value=0) zmask = np.flipud(fun2(xnew, ynew)) @@ -105,12 +134,12 @@ def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): xx = xx.ravel() yy = yy.ravel() zz = z.ravel() - realdata_pos = zz != orig_dem.GetNoDataValue() + realdata_pos = zz != orig_dem.get_nodata() xx = xx[realdata_pos] yy = yy[realdata_pos] zz = zz[realdata_pos] - xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.GetRasterXSize()) - ynew = np.linspace(uly, lry - yres, resamp_ref_dem.GetRasterYSize()) + xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) + ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) znew = griddata((xx, yy), zz, (xnew[None,:], ynew[:,None]), method='linear') del z gc.collect() @@ -426,8 +455,8 @@ def SetEVMDThreshold(self, ini): def InitTS(self): # ==== Prepare the reference geometry ==== - refgeo_Ysize = self.refgeo.GetRasterYSize() - refgeo_Xsize = self.refgeo.GetRasterXSize() + refgeo_Ysize = self.refgeo.get_y_size() + refgeo_Xsize = self.refgeo.get_x_size() self.ts = np.empty([refgeo_Ysize, refgeo_Xsize], dtype=object) # for j in range(self.ts.shape[0]): # for i in range(self.ts.shape[1]): @@ -682,7 +711,7 @@ def __new__(cls, dem=None, array=None, date=None, uncertainty=None): if dem is not None: # retrieve band 1 array, and then replace NoDataValue by np.nan dem_array = dem.ReadAsArray() - dem_array[dem_array == dem.GetNoDataValue()] = np.nan + dem_array[dem_array == dem.get_nodata()] = np.nan obj = np.asarray(dem_array).view(cls) obj.date = [dem.date] obj.uncertainty = [dem.uncertainty] @@ -718,7 +747,7 @@ def AddDEM(self, dem): self.uncertainty.append(dem.uncertainty) # Add the first band, and then replace NoDataValue by np.nan dem_array = dem.ReadAsArray() - dem_array[dem_array == dem.GetNoDataValue()] = np.nan + dem_array[dem_array == dem.get_nodata()] = np.nan return TimeSeriesDEM(array=np.dstack([self, dem_array]), date=self.date, uncertainty=self.uncertainty) def Date2DayDelta(self): diff --git a/carst/libraster.py b/carst/libraster.py index 2ff0c4b..b06fe4e 100644 --- a/carst/libraster.py +++ b/carst/libraster.py @@ -6,6 +6,7 @@ # last edit: Doc 12 2016 (Change the self.uncertainty in __init__ and add ReadGeolocPoint method) # last edit: Sep 11 2018 (added a new class: RasterVelos) # last edit: Mar 01 2021 (enable URL support) +# last edit: Apr 21 2023 (using rasterio for fast COG processing without changing the input URL) import sys import os @@ -32,6 +33,7 @@ except ImportError: import gdal # this was used until GDAL 3.1 from osgeo import osr +import rasterio # we assume the fpath is the file with .tif or .TIF suffix. try: @@ -52,480 +54,498 @@ def time_wrapper(*args, **kwargs): class SingleRaster: - """ - DEM object. Provide operations like "Unify" (gdalwarp) and "GetPointsFromXYZ" (grdtrack). - Future improvement: detect I/O error, like this: - - if not os.path.exists(image1_path): - print("\n***** WARNING: \"" + image1_path + "\" not found, make sure full path is provided, skipping corresponding pair...\n"); - continue; - """ - - def __init__(self, fpath, date=None, uncertainty=None): - self.fpath = fpath - self.uncertainty = float(uncertainty) if uncertainty is not None else None - if type(date) is str: - if len(date) == 10: - self.date = datetime.strptime(date, '%Y-%m-%d') - elif len(date) == 8: - self.date = datetime.strptime(date, '%Y%m%d') - elif len(date) == 7: - self.date = datetime.strptime(date, '%y%b%d') # YYMMMDD - else: - raise ValueError("This format of date string is not currently supported." +\ - "Please use YYYY-MM-DD, YYYYMMDD, or YYMMMDD.") - elif type(date) is datetime: - self.date = date - elif date is None: - self.date = None - else: - raise TypeError('The date format is not currently supported.') - self.iscepointer = None - - def SetPath(self, fpath): - self.fpath = fpath - - def GetProjection(self): - ds = gdal.Open(self.fpath) - return ds.GetProjection() - - def GetGeoTransform(self): - """ returns [ulx, xres, xskew, uly, yskew, yres] """ - ds = gdal.Open(self.fpath) - return ds.GetGeoTransform() - - def GetXRes(self): - ds = gdal.Open(self.fpath) - geot = ds.GetGeoTransform() - return geot[1] - - def GetYRes(self): - ds = gdal.Open(self.fpath) - geot = ds.GetGeoTransform() - return geot[-1] - - def GetNoDataValue(self, band=1): - ds = gdal.Open(self.fpath) - dsband = ds.GetRasterBand(band) - return dsband.GetNoDataValue() - - def SetNoDataValue(self, nodata_val, band=1): - ds = gdal.Open(self.fpath, gdal.GA_Update) - dsband = ds.GetRasterBand(band) - dsband.SetNoDataValue(nodata_val) - dsband.FlushCache() - ds = dsband = None - - def GetProj4(self): - - """ - return projection as Proj.4 string. - """ - - ds = gdal.Open(self.fpath) - wkt_text = ds.GetProjection() - srs = osr.SpatialReference() - srs.ImportFromWkt(wkt_text) - return srs.ExportToProj4() - - def GetRasterXSize(self): - """ This is 'samples' of a image """ - ds = gdal.Open(self.fpath) - return ds.RasterXSize - - def GetRasterYSize(self): - """ This is 'lines' of a image """ - ds = gdal.Open(self.fpath) - return ds.RasterYSize - - def GetExtent(self): - - """ - return extent: ul_x, ul_y, lr_x, lr_y. ul = upper left; lr = lower right. - """ - - ds = gdal.Open(self.fpath) - ulx, xres, xskew, uly, yskew, yres = ds.GetGeoTransform() - lrx = ulx + (ds.RasterXSize * xres) - lry = uly + (ds.RasterYSize * yres) - return ulx, uly, lrx, lry - - def GetDataType(self, band=1): - - """ - return DataType: http://www.gdal.org/gdal_8h.html#a22e22ce0a55036a96f652765793fb7a4 - """ - - ds = gdal.Open(self.fpath) - dsband = ds.GetRasterBand(band) - return dsband.DataType - - - def Unify(self, params): - - """ Use gdalwarp to clip, reproject, and resample a DEM using a given set of params (which can 'unify' all DEMs). """ - - print('Calling Gdalwarp...') - fpath_warped = self.fpath[:-4] + '_warped' + self.fpath[-4:] - if 'output_dir' in params: - newpath = '/'.join([params['output_dir'], fpath_warped.split('/')[-1]]) - else: - # for pixel tracking (temporarily) - newfolder = 'test_folder' - if not os.path.exists(newfolder): - os.makedirs(newfolder) - newpath = '/'.join([newfolder, fpath_warped.split('/')[-1]]) - newpath = newpath.replace(newpath.split('.')[-1], 'img') - gdalwarp_cmd = 'gdalwarp -t_srs ' + params['t_srs'] + ' -tr ' + params['tr'] + ' -te ' + params['te'] - if 'of' in params: - gdalwarp_cmd += ' -of ' + params['of'] - if 'ot' in params: - gdalwarp_cmd += ' -ot ' + params['ot'] - gdalwarp_cmd += ' -overwrite ' + self.fpath + ' ' + newpath - print(gdalwarp_cmd) - retcode = subprocess.call(gdalwarp_cmd, shell=True) - if retcode != 0: - print('Gdalwarp failed. Please check if all the input parameters are properly set.') - sys.exit(retcode) - self.fpath = newpath - - def ReadGeolocPoint(self, x, y, band=1): - - """ - It's almost the same as gdallocationinfo -geoloc srcfile x y - Read a point in the georeferencing system of the raster, and then return the pixel value at that point. - Returns NaN if (x, y) is not within the extent of the raster. - """ - - ulx, uly, lrx, lry = self.GetExtent() - ulx, xres, xskew, uly, yskew, yres = self.GetGeoTransform() - # print('xbound:', ulx, lrx, 'ybound:', uly, lry) - # if y > 220000: - # print(x, y, 'xbound:', ulx, lrx, 'ybound:', uly, lry) - nodatval = self.GetNoDataValue(band=band) - if nodatval is None: - nodatval = 0 - - if (ulx <= x < lrx) & (uly >= y > lry): - ds = gdal.Open(self.fpath) - px = int((x - ulx) / xres) # x pixel coor - py = int((y - uly) / yres) # y pixel coor - dsband = ds.GetRasterBand(band) - pixel_val = dsband.ReadAsArray(px, py, 1, 1) # it numpy.array with shape of (1, 1) - pixel_val = float(pixel_val) - # print('pixel_val: {}'.format(pixel_val)) - # print('nodata: {}'.format(self.GetNoDataValue(band=band))) - # print('abs: {}'.format(abs(pixel_val - self.GetNoDataValue(band=band)))) - if abs(pixel_val - nodatval) < 1: - pixel_val = np.nan - return pixel_val - else: - return np.nan - - def ReadGeolocPoints(self, x, y, band=1): - - """ - Batch routine for running ReadGeolocPoint many times. - x & y: 1-D array showing the (x, y) of many points. - Returns NaN if (x, y) is not within the extent of the raster. - """ - - ulx, uly, lrx, lry = self.GetExtent() - ulx, xres, xskew, uly, yskew, yres = self.GetGeoTransform() - - x_in = np.logical_and(ulx <= x, x < lrx) - y_in = np.logical_and(uly >= y, y > lry) - xy_in = np.logical_and(x_in, y_in) - - z = np.empty_like(x) - z[:] = np.nan - idx = np.where(xy_in) - ds = gdal.Open(self.fpath) - dsband = ds.GetRasterBand(band) - - nodatval = self.GetNoDataValue(band=band) - if nodatval is None: - nodatval = 0 - - for i in idx[0]: - # print(x[i]) - px = int((x[i] - ulx) / xres) # x pixel coor - py = int((y[i] - uly) / yres) # y pixel coor - pixel_val = dsband.ReadAsArray(px, py, 1, 1) # it numpy.array with shape of (1, 1) - pixel_val = float(pixel_val) - if abs(pixel_val - nodatval) < 1: - pixel_val = np.nan - z[i] = pixel_val - - return z - - - def ReadAsArray(self, band=1): - - """ The default will return the first band. """ - - ds = gdal.Open(self.fpath) - dsband = ds.GetRasterBand(band) - return dsband.ReadAsArray() - - def Array2Raster(self, array, refdem): - - """ - This is to write array to raster. Be cautious overwritting the old one! - refdem (reference DEM) can be either a SingleRaster object or a gdal.Dataset object. - This method will use the projection and the geotransform values from the refdem for the new geotiff file. - """ - - driver = gdal.GetDriverByName('GTiff') - # Saved in gdal 32-bit float geotiff format - out_raster = driver.Create(self.fpath, array.shape[1], array.shape[0], 1, gdal.GDT_Float32) - out_raster.SetGeoTransform( refdem.GetGeoTransform() ) - out_raster.SetProjection( refdem.GetProjection() ) - # Write data to band 1 (becuase this is a brand new file) - # Of course, set nan to NoDataValue - nodatavalue = refdem.GetNoDataValue() if refdem.GetNoDataValue() is not None else -9999.0 - array[np.isnan(array)] = nodatavalue - out_raster.GetRasterBand(1).SetNoDataValue( nodatavalue ) - out_raster.GetRasterBand(1).WriteArray(array) - # Save to file - out_raster.FlushCache() - - def XYZArray2Raster(self, array, projection=''): - - """ - This is to write an xyzfile-like array to raster. Be cautious overwritting the old one! - projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object. - This method will use the geotransform values from the xyzfile-like array itself; i.e. - ul coordinate is (min(x), max(y)) and the spacing is unique(diff(unique(x))) and unique(diff(unique(y))). - The nodata value is not set. - """ - - driver = gdal.GetDriverByName('GTiff') - # Saved in gdal 32-bit float geotiff format - xaxis = np.unique(array[:,0]) - yaxis = np.unique(array[:,1]) - out_raster = driver.Create(self.fpath, len(xaxis), len(yaxis), 1, gdal.GDT_Float32) - # here I rounded the number to the 6th decimal to get the spacing, since a small error (around 1e-10) - # would generate when reading the ampcor-off file. - xspacing = np.unique(np.diff(xaxis).round(decimals=7)) - yspacing = np.unique(np.diff(yaxis).round(decimals=7)) - # print(xspacing) - # print(yspacing) - if len(xspacing) == 1 and len(yspacing) == 1: - # assuming no rotation in the Affine transform - new_geotransform = (min(xaxis), xspacing[0], 0, max(yaxis), 0, -yspacing[0]) - else: - raise TypeError('There is someting wrong in XYZ array format. Check the consistency of the x- or y-spacing.') - out_raster.SetGeoTransform( new_geotransform ) - out_raster.SetProjection(projection) - # out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue) - out_raster.GetRasterBand(1).WriteArray(np.reshape(array[:, 2], (len(yaxis), len(xaxis)))) - out_raster.FlushCache() - - def XYZ2Raster(self, xyzfilename, projection=''): - - """ - This is to write an xyzfile (ascii, sorted) to raster. Be cautious overwritting the old one! - projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object. - This method will use the geotransform values from the xyzfile itself. (that is, the first data is at "ul" position.) - The nodata value is not set. - """ - - driver = gdal.GetDriverByName('GTiff') - src_ds = gdal.Open(xyzfilename) - out_raster = driver.CreateCopy(self.fpath, src_ds, 0 ) - out_raster.SetProjection(projection) - out_raster.SetGeoTransform( src_ds.GetGeoTransform() ) - # out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue) - out_raster.FlushCache() - - - def GetPointsFromXYZ(self, xyzfilename): - - """ - Get points from a xyzfile, using grdtrack. Return the output .xyz file. - Currently the output .xyz file is fixed as 'log_getUncertaintyDEM_grdtrack_output.xyz' - and will be overwritten by later commands. - """ - - print('Calling Grdtrack...') - newpath = 'log_getUncertaintyDEM_grdtrack_output.xyz' - grdtrack_cmd = 'grdtrack ' + xyzfilename +\ - ' -G' + self.fpath +\ - ' > ' + newpath - print(grdtrack_cmd) - retcode = subprocess.call(grdtrack_cmd, shell=True) - if retcode != 0: - print('Grdtrack failed. Please check if all the input parameters are properly set.') - sys.exit(retcode) - return newpath - - def AmpcorPrep(self): - - """ - Prepare to be used in ampcor. - Ampcor package in ISCE uses a special file pointer for accessing geotiff data. - Therefore, we need ISCE module "Image" for this purpose. - """ - - import isce - from isceobj.Image.Image import Image - - # ==== need a vrt file - # >= Python 3.4 - from pathlib import Path - vrtpath = Path(self.fpath + '.vrt') - if not vrtpath.is_file(): - print('Calling gdalbuildvrt...') - if self.fpath.startswith('http'): - gdalbuildvrt_cmd = 'gdalbuildvrt ' + os.path.basename(self.fpath) + '.vrt ' + self.fpath - else: - gdalbuildvrt_cmd = 'gdalbuildvrt ' + self.fpath + '.vrt ' + self.fpath - print(gdalbuildvrt_cmd) - retcode = subprocess.call(gdalbuildvrt_cmd, shell=True) - if retcode != 0: - print('gdalbuildvrt failed. Please check if all the input parameters are properly set.') - sys.exit(retcode) - # ==================== - - obj = Image() - obj.setFilename(self.fpath) - obj.setWidth(self.GetRasterXSize()) # gdalinfo, first number - if self.GetDataType() <= 3: - obj.setDataType('SHORT') - elif 4 <= self.GetDataType() <= 5: - obj.setDataType('LONG') - elif self.GetDataType() == 6: - obj.setDataType('FLOAT') - elif self.GetDataType() == 7: - obj.setDataType('DOUBLE') # SHORT, LONG, FLOAT, DOUBLE, etc - else: - obj.setDataType('CFLOAT') # not totally right, may be wrong - # obj.setBands(1) # "self" requires a single band - # obj.setAccessMode('read') - obj.setCaster('read', 'FLOAT') # fixed as float - obj.createImage() - self.iscepointer = obj - - def ClippedByPolygon(self, polygon_shapefile): - - """ - return all pixel values within a given polygon shapefile. - according to - https://gis.stackexchange.com/questions/260304/extract-raster-values-within-shapefile-with-pygeoprocessing-or-gdal - """ - # from rasterio import logging - # log = logging.getLogger() - # log.setLevel(logging.ERROR) - - import rasterio - from rasterio.mask import mask - import geopandas as gpd - from shapely.geometry import mapping - - shapefile = gpd.read_file(polygon_shapefile) - geoms = shapefile.geometry.values - # geometry = geoms[0] # shapely geometry - # geoms = [mapping(geoms[0])] # transform to GeJSON format - geoms = [mapping(geoms[i]) for i in range(len(geoms))] - with rasterio.open(self.fpath) as src: - out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0) - # The out_image result is a Numpy masked array - # no_data = src.nodata - # if no_data is None: - # no_data = 0.0 - nodata = -9999.0 - # print(out_image) - # print(out_image.data.shape) - # print(out_image.data) - try: - clipped_data = out_image.data[0] - except NotImplementedError: - clipped_data = out_image[0] - # PROBABLY HAVE TO CHANGE TO out_image[0] HERE - # extract the valid values - # and return them as a numpy 1-D array - # return np.extract(clipped_data != nodata, clipped_data) - return clipped_data - - def GaussianHighPass(self, sigma=3, truncate=1.0): - - """ - Gaussian High Pass filter. Default sigma = 3. - """ - - from scipy.ndimage import gaussian_filter - data = self.ReadAsArray() - data = data.astype(float) - if self.GetNoDataValue() is not None: - data[data == self.GetNoDataValue()] = np.nan - else: - data[data == 0] = np.nan # LS-8 case - lowpass = gaussian_filter(data, sigma, truncate=truncate) - highpass = data - lowpass - if self.fpath.startswith('http://') or self.fpath.startswith('https://'): - tmp = os.path.basename(self.fpath) - hp_raster_path = tmp.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif' - else: - hp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif' - hp_raster = SingleRaster(hp_raster_path) - hp_raster.Array2Raster(highpass, self) - self.SetPath(hp_raster_path) - - def GaussianLowPass(self, sigma=1): - - """ - Gaussian Low Pass filter. Default sigma = 1. - deal with nodata values. - https://stackoverflow.com/questions/18697532/gaussian-filtering-a-image-with-nan-in-python - """ - - from scipy.ndimage import gaussian_filter - data = self.ReadAsArray() - # data[data == self.GetNoDataValue()] = np.nan - # lowpass = gaussian_filter(data.astype(float), sigma, truncate=truncate) - nodata_pos = data == self.GetNoDataValue() - data[nodata_pos] = 0 - w = np.ones_like(data) - w[nodata_pos] = 0 - vv = gaussian_filter(data.astype(float), sigma) - ww = gaussian_filter(w.astype(float), sigma) - ww[ww == 0] = np.finfo(float).eps - lowpass = vv / ww - lowpass[nodata_pos] = self.GetNoDataValue() - if self.fpath.startswith('http://') or self.fpath.startswith('https://'): - tmp = os.path.basename(self.fpath) - lp_raster_path = tmp.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif' - else: - lp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif' - lp_raster = SingleRaster(lp_raster_path) - lp_raster.Array2Raster(lowpass, self) - self.SetPath(lp_raster_path) - - def CannyEdge(self, sigma=3): - - """ - Canny Edge filter. Default sigma = 3. - deal with nodata values. - https://scikit-image.org/docs/dev/auto_examples/edges/plot_canny.html - """ - - from skimage import feature - data = self.ReadAsArray() - nodata_pos = data == self.GetNoDataValue() - data[nodata_pos] = 0 - - edges = feature.canny(data, sigma=sigma) - edges[nodata_pos] = self.GetNoDataValue() - - if self.fpath.startswith('http://') or self.fpath.startswith('https://'): - tmp = os.path.basename(self.fpath) - canny_raster_path = tmp.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif' - else: - canny_raster_path = self.fpath.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif' - canny_raster = SingleRaster(canny_raster_path) - canny_raster.Array2Raster(edges, self) - self.SetPath(canny_raster_path) + """ + DEM object. Provide operations like "Unify" (gdalwarp) and "GetPointsFromXYZ" (grdtrack). + Future improvement: detect I/O error + """ + + def __init__(self, fpath, date=None, uncertainty=None): + self.fpath = fpath + self.uncertainty = float(uncertainty) if uncertainty is not None else None + self.set_date(date) + self.iscepointer = None + + def set_date(self, date): + if type(date) is str: + if len(date) == 10: + self.date = datetime.strptime(date, '%Y-%m-%d') + elif len(date) == 8: + self.date = datetime.strptime(date, '%Y%m%d') + elif len(date) == 7: + self.date = datetime.strptime(date, '%y%b%d') # YYMMMDD + else: + raise ValueError("This format of date string is not currently supported." +\ + "Please use YYYY-MM-DD, YYYYMMDD, or YYMMMDD.") + elif type(date) is datetime: + self.date = date + elif date is None: + self.date = None + else: + raise TypeError('The date format is not currently supported.') + + def set_path(self, fpath): + self.fpath = fpath + + def GetProjection(self): + """ + Still using Gdal. + """ + ds = gdal.Open(self.fpath) + return ds.GetProjection() + + def GetGeoTransform(self): + """ + returns [ulx, xres, xskew, uly, yskew, yres] + Still using Gdal. + """ + ds = gdal.Open(self.fpath) + return ds.GetGeoTransform() + + def get_x_res(self): + with rasterio.open(self.fpath) as src: + transform = src.transform + return transform.a + + + def get_y_res(self): + with rasterio.open(self.fpath) as src: + transform = src.transform + return transform.e + + def get_nodata(self): + with rasterio.open(self.fpath) as src: + return src.nodata + + def SetNoDataValue(self, nodata_val, band=1): + """ + Still using Gdal. + """ + ds = gdal.Open(self.fpath, gdal.GA_Update) + dsband = ds.GetRasterBand(band) + dsband.SetNoDataValue(nodata_val) + dsband.FlushCache() + ds = dsband = None + + def GetProj4(self): + + """ + return projection as Proj.4 string. + Still using Gdal. + """ + + ds = gdal.Open(self.fpath) + wkt_text = ds.GetProjection() + srs = osr.SpatialReference() + srs.ImportFromWkt(wkt_text) + return srs.ExportToProj4() + + def get_x_size(self): + """ This is 'samples' of a image """ + with rasterio.open(self.fpath) as src: + return src.width + + def get_y_size(self): + """ This is 'lines' of a image """ + with rasterio.open(self.fpath) as src: + return src.height + + def get_extent(self): + """ + return extent: ul_x, ul_y, lr_x, lr_y. ul = upper left; lr = lower right. + """ + with rasterio.open(self.fpath) as src: + ulx, lry, lrx, uly = src.bounds + return ulx, uly, lrx, lry + + def GetDataType(self, band=1): + + """ + return DataType: http://www.gdal.org/gdal_8h.html#a22e22ce0a55036a96f652765793fb7a4 + Still using Gdal. + """ + + ds = gdal.Open(self.fpath) + dsband = ds.GetRasterBand(band) + return dsband.DataType + + + def Unify(self, params): + + """ Use gdalwarp to clip, reproject, and resample a DEM using a given set of params (which can 'unify' all DEMs). """ + + print('Calling Gdalwarp...') + fpath_warped = self.fpath[:-4] + '_warped' + self.fpath[-4:] + if 'output_dir' in params: + newpath = '/'.join([params['output_dir'], fpath_warped.split('/')[-1]]) + else: + # for pixel tracking (temporarily) + newfolder = 'test_folder' + if not os.path.exists(newfolder): + os.makedirs(newfolder) + newpath = '/'.join([newfolder, fpath_warped.split('/')[-1]]) + newpath = newpath.replace(newpath.split('.')[-1], 'img') + gdalwarp_cmd = 'gdalwarp -t_srs ' + params['t_srs'] + ' -tr ' + params['tr'] + ' -te ' + params['te'] + if 'of' in params: + gdalwarp_cmd += ' -of ' + params['of'] + if 'ot' in params: + gdalwarp_cmd += ' -ot ' + params['ot'] + gdalwarp_cmd += ' -overwrite ' + self.fpath + ' ' + newpath + print(gdalwarp_cmd) + retcode = subprocess.call(gdalwarp_cmd, shell=True) + if retcode != 0: + print('Gdalwarp failed. Please check if all the input parameters are properly set.') + sys.exit(retcode) + self.fpath = newpath + + def ReadGeolocPoint(self, x, y, band=1): + + """ + It's almost the same as gdallocationinfo -geoloc srcfile x y + Read a point in the georeferencing system of the raster, and then return the pixel value at that point. + Returns NaN if (x, y) is not within the extent of the raster. + Still using Gdal. + """ + + ulx, uly, lrx, lry = self.get_extent() + ulx, xres, xskew, uly, yskew, yres = self.GetGeoTransform() + # print('xbound:', ulx, lrx, 'ybound:', uly, lry) + # if y > 220000: + # print(x, y, 'xbound:', ulx, lrx, 'ybound:', uly, lry) + nodatval = self.get_nodata() + if nodatval is None: + nodatval = 0 + + if (ulx <= x < lrx) & (uly >= y > lry): + ds = gdal.Open(self.fpath) + px = int((x - ulx) / xres) # x pixel coor + py = int((y - uly) / yres) # y pixel coor + dsband = ds.GetRasterBand(band) + pixel_val = dsband.ReadAsArray(px, py, 1, 1) # it numpy.array with shape of (1, 1) + pixel_val = float(pixel_val) + # print('pixel_val: {}'.format(pixel_val)) + # print('nodata: {}'.format(self.get_nodata())) + # print('abs: {}'.format(abs(pixel_val - self.get_nodata()))) + if abs(pixel_val - nodatval) < 1: + pixel_val = np.nan + return pixel_val + else: + return np.nan + + def ReadGeolocPoints(self, x, y, band=1): + + """ + Batch routine for running ReadGeolocPoint many times. + x & y: 1-D array showing the (x, y) of many points. + Returns NaN if (x, y) is not within the extent of the raster. + Still using Gdal. + """ + + ulx, uly, lrx, lry = self.get_extent() + ulx, xres, xskew, uly, yskew, yres = self.GetGeoTransform() + + x_in = np.logical_and(ulx <= x, x < lrx) + y_in = np.logical_and(uly >= y, y > lry) + xy_in = np.logical_and(x_in, y_in) + + z = np.empty_like(x) + z[:] = np.nan + idx = np.where(xy_in) + ds = gdal.Open(self.fpath) + dsband = ds.GetRasterBand(band) + + nodatval = self.get_nodata() + if nodatval is None: + nodatval = 0 + + for i in idx[0]: + # print(x[i]) + px = int((x[i] - ulx) / xres) # x pixel coor + py = int((y[i] - uly) / yres) # y pixel coor + pixel_val = dsband.ReadAsArray(px, py, 1, 1) # it numpy.array with shape of (1, 1) + pixel_val = float(pixel_val) + if abs(pixel_val - nodatval) < 1: + pixel_val = np.nan + z[i] = pixel_val + + return z + + + def ReadAsArray(self, band=1): + + """ The default will return the first band. + Still using Gdal. + """ + + ds = gdal.Open(self.fpath) + dsband = ds.GetRasterBand(band) + return dsband.ReadAsArray() + + def Array2Raster(self, array, refdem): + + """ + This is to write array to raster. Be cautious overwritting the old one! + refdem (reference DEM) can be either a SingleRaster object or a gdal.Dataset object. + This method will use the projection and the geotransform values from the refdem for the new geotiff file. + Using Gdal. + """ + + driver = gdal.GetDriverByName('GTiff') + # Saved in gdal 32-bit float geotiff format + out_raster = driver.Create(self.fpath, array.shape[1], array.shape[0], 1, gdal.GDT_Float32) + out_raster.SetGeoTransform( refdem.GetGeoTransform() ) + out_raster.SetProjection( refdem.GetProjection() ) + # Write data to band 1 (becuase this is a brand new file) + # Of course, set nan to NoDataValue + nodatavalue = refdem.get_nodata() if refdem.get_nodata() is not None else -9999.0 + array[np.isnan(array)] = nodatavalue + out_raster.GetRasterBand(1).SetNoDataValue( nodatavalue ) + out_raster.GetRasterBand(1).WriteArray(array) + # Save to file + out_raster.FlushCache() + + def XYZArray2Raster(self, array, projection=''): + + """ + This is to write an xyzfile-like array to raster. Be cautious overwritting the old one! + projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object. + This method will use the geotransform values from the xyzfile-like array itself; i.e. + ul coordinate is (min(x), max(y)) and the spacing is unique(diff(unique(x))) and unique(diff(unique(y))). + The nodata value is not set. + Using Gdal. + """ + + driver = gdal.GetDriverByName('GTiff') + # Saved in gdal 32-bit float geotiff format + xaxis = np.unique(array[:,0]) + yaxis = np.unique(array[:,1]) + out_raster = driver.Create(self.fpath, len(xaxis), len(yaxis), 1, gdal.GDT_Float32) + # here I rounded the number to the 6th decimal to get the spacing, since a small error (around 1e-10) + # would generate when reading the ampcor-off file. + xspacing = np.unique(np.diff(xaxis).round(decimals=7)) + yspacing = np.unique(np.diff(yaxis).round(decimals=7)) + # print(xspacing) + # print(yspacing) + if len(xspacing) == 1 and len(yspacing) == 1: + # assuming no rotation in the Affine transform + new_geotransform = (min(xaxis), xspacing[0], 0, max(yaxis), 0, -yspacing[0]) + else: + raise TypeError('There is someting wrong in XYZ array format. Check the consistency of the x- or y-spacing.') + out_raster.SetGeoTransform( new_geotransform ) + out_raster.SetProjection(projection) + # out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue) + out_raster.GetRasterBand(1).WriteArray(np.reshape(array[:, 2], (len(yaxis), len(xaxis)))) + out_raster.FlushCache() + + def XYZ2Raster(self, xyzfilename, projection=''): + + """ + This is to write an xyzfile (ascii, sorted) to raster. Be cautious overwritting the old one! + projection is in wkt string. it can be given by the GetProjection() from a SingleRaster or a gdal.Dataset object. + This method will use the geotransform values from the xyzfile itself. (that is, the first data is at "ul" position.) + The nodata value is not set. + Using Gdal. + """ + + driver = gdal.GetDriverByName('GTiff') + src_ds = gdal.Open(xyzfilename) + out_raster = driver.CreateCopy(self.fpath, src_ds, 0 ) + out_raster.SetProjection(projection) + out_raster.SetGeoTransform( src_ds.GetGeoTransform() ) + # out_raster.GetRasterBand(1).SetNoDataValue(nodatavalue) + out_raster.FlushCache() + + + def GetPointsFromXYZ(self, xyzfilename): + + """ + Get points from a xyzfile, using grdtrack. Return the output .xyz file. + Currently the output .xyz file is fixed as 'log_getUncertaintyDEM_grdtrack_output.xyz' + and will be overwritten by later commands. + Using Gdal. + """ + + print('Calling Grdtrack...') + newpath = 'log_getUncertaintyDEM_grdtrack_output.xyz' + grdtrack_cmd = 'grdtrack ' + xyzfilename +\ + ' -G' + self.fpath +\ + ' > ' + newpath + print(grdtrack_cmd) + retcode = subprocess.call(grdtrack_cmd, shell=True) + if retcode != 0: + print('Grdtrack failed. Please check if all the input parameters are properly set.') + sys.exit(retcode) + return newpath + + def AmpcorPrep(self): + + """ + Prepare to be used in ampcor. + Ampcor package in ISCE uses a special file pointer for accessing geotiff data. + Therefore, we need ISCE module "Image" for this purpose. + Using Gdal. + """ + + import isce + from isceobj.Image.Image import Image + + # ==== need a vrt file + # >= Python 3.4 + from pathlib import Path + vrtpath = Path(self.fpath + '.vrt') + if not vrtpath.is_file(): + print('Calling gdalbuildvrt...') + if self.fpath.startswith('http'): + gdalbuildvrt_cmd = 'gdalbuildvrt ' + os.path.basename(self.fpath) + '.vrt ' + self.fpath + else: + gdalbuildvrt_cmd = 'gdalbuildvrt ' + self.fpath + '.vrt ' + self.fpath + print(gdalbuildvrt_cmd) + retcode = subprocess.call(gdalbuildvrt_cmd, shell=True) + if retcode != 0: + print('gdalbuildvrt failed. Please check if all the input parameters are properly set.') + sys.exit(retcode) + # ==================== + + obj = Image() + obj.setFilename(self.fpath) + obj.setWidth(self.get_x_size()) # gdalinfo, first number + if self.GetDataType() <= 3: + obj.setDataType('SHORT') + elif 4 <= self.GetDataType() <= 5: + obj.setDataType('LONG') + elif self.GetDataType() == 6: + obj.setDataType('FLOAT') + elif self.GetDataType() == 7: + obj.setDataType('DOUBLE') # SHORT, LONG, FLOAT, DOUBLE, etc + else: + obj.setDataType('CFLOAT') # not totally right, may be wrong + # obj.setBands(1) # "self" requires a single band + # obj.setAccessMode('read') + obj.setCaster('read', 'FLOAT') # fixed as float + obj.createImage() + self.iscepointer = obj + + def ClippedByPolygon(self, polygon_shapefile): + + """ + return all pixel values within a given polygon shapefile. + according to + https://gis.stackexchange.com/questions/260304/extract-raster-values-within-shapefile-with-pygeoprocessing-or-gdal + """ + # from rasterio import logging + # log = logging.getLogger() + # log.setLevel(logging.ERROR) + + import rasterio + from rasterio.mask import mask + import geopandas as gpd + from shapely.geometry import mapping + + shapefile = gpd.read_file(polygon_shapefile) + geoms = shapefile.geometry.values + # geometry = geoms[0] # shapely geometry + # geoms = [mapping(geoms[0])] # transform to GeJSON format + geoms = [mapping(geoms[i]) for i in range(len(geoms))] + with rasterio.open(self.fpath) as src: + out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0) + # The out_image result is a Numpy masked array + # no_data = src.nodata + # if no_data is None: + # no_data = 0.0 + nodata = -9999.0 + # print(out_image) + # print(out_image.data.shape) + # print(out_image.data) + try: + clipped_data = out_image.data[0] + except NotImplementedError: + clipped_data = out_image[0] + # PROBABLY HAVE TO CHANGE TO out_image[0] HERE + # extract the valid values + # and return them as a numpy 1-D array + # return np.extract(clipped_data != nodata, clipped_data) + return clipped_data + + def GaussianHighPass(self, sigma=3, truncate=1.0): + + """ + Gaussian High Pass filter. Default sigma = 3. + Using Gdal. + """ + + from scipy.ndimage import gaussian_filter + data = self.ReadAsArray() + data = data.astype(float) + if self.get_nodata() is not None: + data[data == self.get_nodata()] = np.nan + else: + data[data == 0] = np.nan # LS-8 case + lowpass = gaussian_filter(data, sigma, truncate=truncate) + highpass = data - lowpass + if self.fpath.startswith('http://') or self.fpath.startswith('https://'): + tmp = os.path.basename(self.fpath) + hp_raster_path = tmp.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif' + else: + hp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GHP-' + str(sigma) + 'sig.tif' + hp_raster = SingleRaster(hp_raster_path) + hp_raster.Array2Raster(highpass, self) + self.set_path(hp_raster_path) + + def GaussianLowPass(self, sigma=1): + + """ + Gaussian Low Pass filter. Default sigma = 1. + deal with nodata values. + https://stackoverflow.com/questions/18697532/gaussian-filtering-a-image-with-nan-in-python + Using Gdal. + """ + + from scipy.ndimage import gaussian_filter + data = self.ReadAsArray() + # data[data == self.get_nodata()] = np.nan + # lowpass = gaussian_filter(data.astype(float), sigma, truncate=truncate) + nodata_pos = data == self.get_nodata() + data[nodata_pos] = 0 + w = np.ones_like(data) + w[nodata_pos] = 0 + vv = gaussian_filter(data.astype(float), sigma) + ww = gaussian_filter(w.astype(float), sigma) + ww[ww == 0] = np.finfo(float).eps + lowpass = vv / ww + lowpass[nodata_pos] = self.get_nodata() + if self.fpath.startswith('http://') or self.fpath.startswith('https://'): + tmp = os.path.basename(self.fpath) + lp_raster_path = tmp.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif' + else: + lp_raster_path = self.fpath.rsplit('.', 1)[0] + '_GLP-' + str(sigma) + 'sig.tif' + lp_raster = SingleRaster(lp_raster_path) + lp_raster.Array2Raster(lowpass, self) + self.set_path(lp_raster_path) + + def CannyEdge(self, sigma=3): + + """ + Canny Edge filter. Default sigma = 3. + deal with nodata values. + https://scikit-image.org/docs/dev/auto_examples/edges/plot_canny.html + Using Gdal. + """ + + from skimage import feature + data = self.ReadAsArray() + nodata_pos = data == self.get_nodata() + data[nodata_pos] = 0 + + edges = feature.canny(data, sigma=sigma) + edges[nodata_pos] = self.get_nodata() + + if self.fpath.startswith('http://') or self.fpath.startswith('https://'): + tmp = os.path.basename(self.fpath) + canny_raster_path = tmp.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif' + else: + canny_raster_path = self.fpath.rsplit('.', 1)[0] + '_Canny-' + str(sigma) + 'sig.tif' + canny_raster = SingleRaster(canny_raster_path) + canny_raster.Array2Raster(edges, self) + self.set_path(canny_raster_path) @@ -666,7 +686,7 @@ def VeloCorrection(self, vx_zarray, vy_zarray, output_raster_prefix): vx_zarray and vy_zarray are ZArray objects. ''' - nodata_val = self.mag.GetNoDataValue() + nodata_val = self.mag.get_nodata() nodata_pos = self.mag.ReadAsArray() == nodata_val vx_val_corrected = self.vx.ReadAsArray() - vx_zarray.MAD_median @@ -729,21 +749,21 @@ def VeloCorrection(self, vx_zarray, vy_zarray, output_raster_prefix): def SNR_CutNoise(self, snr_threshold=5): bad_pts = self.snr.ReadAsArray() <= snr_threshold mag_val = self.mag.ReadAsArray() - mag_val[bad_pts] = self.mag.GetNoDataValue() + mag_val[bad_pts] = self.mag.get_nodata() raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0] + '_SNT.tif') raster_mag_cutnoise.Array2Raster(mag_val, self.mag) self.SetMag(raster_mag_cutnoise) def Gaussian_CutNoise(self, sigma=1): mag_val = self.mag.ReadAsArray() - mag_val_ok = Gussian_noise_remover(mag_val, sigma=sigma, nodata_val=self.mag.GetNoDataValue()) + mag_val_ok = Gussian_noise_remover(mag_val, sigma=sigma, nodata_val=self.mag.get_nodata()) raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0] + '-GAU.tif') raster_mag_cutnoise.Array2Raster(mag_val_ok, self.mag) self.SetMag(raster_mag_cutnoise) def MorphoOpen_CutNoise(self, iterations=1): mag_val = self.mag.ReadAsArray() - mag_val_ok = MorphoOpen_noise_remover(mag_val, nodata_val=self.mag.GetNoDataValue(), iterations=1) + mag_val_ok = MorphoOpen_noise_remover(mag_val, nodata_val=self.mag.get_nodata(), iterations=1) raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0] + '-MOR.tif') raster_mag_cutnoise.Array2Raster(mag_val_ok, self.mag) self.SetMag(raster_mag_cutnoise) @@ -751,7 +771,7 @@ def MorphoOpen_CutNoise(self, iterations=1): def SmallObjects_CutNoise(self, min_size=17): from skimage.morphology import remove_small_objects mag_val = self.mag.ReadAsArray() - mag_val_ok = SmallObjects_noise_remover(mag_val, nodata_val=self.mag.GetNoDataValue(), min_size=min_size) + mag_val_ok = SmallObjects_noise_remover(mag_val, nodata_val=self.mag.get_nodata(), min_size=min_size) raster_mag_cutnoise = SingleRaster(self.mag.fpath.rsplit('.', 1)[0] + '-RSO.tif') raster_mag_cutnoise.Array2Raster(mag_val_ok, self.mag) self.SetMag(raster_mag_cutnoise) @@ -766,29 +786,29 @@ def Fahnestock_CutNoise(self): def MaskAllRasters(self): mag_val = self.mag.ReadAsArray() - nodata = mag_val == self.mag.GetNoDataValue() + nodata = mag_val == self.mag.get_nodata() vx_val = self.vx.ReadAsArray() - vx_val[nodata] = self.vx.GetNoDataValue() + vx_val[nodata] = self.vx.get_nodata() raster_vx_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'vx')) raster_vx_cutnoise.Array2Raster(vx_val, self.vx) self.SetVx(raster_vx_cutnoise) vy_val = self.vy.ReadAsArray() - vy_val[nodata] = self.vy.GetNoDataValue() + vy_val[nodata] = self.vy.get_nodata() raster_vy_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'vy')) raster_vy_cutnoise.Array2Raster(vy_val, self.vy) self.SetVy(raster_vy_cutnoise) errx_val = self.errx.ReadAsArray() - errx_val[nodata] = self.errx.GetNoDataValue() + errx_val[nodata] = self.errx.get_nodata() raster_errx_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'errx')) raster_errx_cutnoise.Array2Raster(errx_val, self.errx) self.SetErrx(raster_errx_cutnoise) erry_val = self.erry.ReadAsArray() - erry_val[nodata] = self.erry.GetNoDataValue() + erry_val[nodata] = self.erry.get_nodata() raster_erry_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'erry')) raster_erry_cutnoise.Array2Raster(erry_val, self.erry) self.SetErry(raster_erry_cutnoise) errmag_val = self.errmag.ReadAsArray() - errmag_val[nodata] = self.errmag.GetNoDataValue() + errmag_val[nodata] = self.errmag.get_nodata() raster_errmag_cutnoise = SingleRaster(self.mag.fpath.replace('mag', 'errmag')) raster_errmag_cutnoise.Array2Raster(errmag_val, self.errmag) self.SetErrmag(raster_errmag_cutnoise) From 9637faa070054757bf21b4e4560dfaed0efa9020 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 22 Apr 2023 01:06:41 +0000 Subject: [PATCH 09/24] Organize libdhdt --- carst/libdhdt.py | 320 ++------------------------------- extra/unused/Resample_Array.py | 67 +++++++ extra/unused/TimeSeriesDEM.py | 137 ++++++++++++++ extra/unused/wlr.py | 70 ++++++++ 4 files changed, 290 insertions(+), 304 deletions(-) create mode 100644 extra/unused/Resample_Array.py create mode 100644 extra/unused/TimeSeriesDEM.py create mode 100644 extra/unused/wlr.py diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 47685dd..1fb8337 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -1,23 +1,22 @@ -# Class: TimeSeriesDEM(np.ndarray) -# Func: Resample_Array(UtilRaster.SingleRaster, UtilRaster.SingleRaster) -# used for dhdt +# Class: PixelTimeSeries, DemPile +# Func: timeit, resample_array, EVMD group, wlr_corefun, onclick_ipynb # by Whyjay Zheng, Jul 27 2016 # last edit: Apr 17 2023 import numpy as np from numpy.linalg import inv import os -import sys +# import sys try: - import gdal -except: - from osgeo import gdal # sometimes gdal is part of osgeo modules + from osgeo import gdal +except ImportError: + import gdal # this was used until GDAL 3.1 from datetime import datetime from shapely.geometry import Polygon -from scipy.interpolate import interp2d -from scipy.interpolate import NearestNDInterpolator -from scipy.interpolate import griddata -import gc +# from scipy.interpolate import interp2d +# from scipy.interpolate import NearestNDInterpolator +# from scipy.interpolate import griddata +# import gc from carst import ConfParams from carst.libraster import SingleRaster import pickle @@ -37,6 +36,7 @@ def time_wrapper(*args, **kwargs): def resample_array(source, reference, method='bilinear', destination=None): """ + latest version. resample the source raster using the extent and spacing provided by the reference raster. Two rasters must be in the same CRS. source: class UtilRaster.SingleRaster object @@ -46,7 +46,7 @@ def resample_array(source, reference, method='bilinear', destination=None): returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster. For better COG processing efficiency, we rasterio to check the if the source and the reference intersect each other. - For fasting resampling, we use GDAL and it C library. + For fasting resampling, we use GDAL and its C library. """ s_ulx, s_uly, s_lrx, s_lry = source.get_extent() @@ -64,93 +64,12 @@ def resample_array(source, reference, method='bilinear', destination=None): else: return np.full((reference.get_y_size(), reference.get_x_size()), reference.get_nodata()) -def Resample_Array2(orig_dem, resamp_ref_dem, resampleAlg='bilinear'): - - """ - resample orig_dem using the extent and spacing provided by resamp_ref_dem - orig_dem: class UtilRaster.SingleRaster object - resamp_ref_dem: class UtilRaster.SingleRaster object - returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster - - This one uses gdal.Warp API. - """ - - ds = gdal.Open(orig_dem.fpath) - ulx, uly, lrx, lry = resamp_ref_dem.get_extent() - opts = gdal.WarpOptions(outputBounds=(ulx, lry, lrx, uly), xRes=resamp_ref_dem.get_x_res(), yRes=resamp_ref_dem.get_y_res(), resampleAlg=resampleAlg) - out_ds = gdal.Warp('tmp.tif', ds, options=opts) - return out_ds.GetRasterBand(1).ReadAsArray() - - -def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): - - """ - resample orig_dem using the extent and spacing provided by resamp_ref_dem - orig_dem: class UtilRaster.SingleRaster object - resamp_ref_dem: class UtilRaster.SingleRaster object - returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster - - Uses linear interpolation because it best represent flat ice surface. - -9999.0 (default nan in a Geotiff) is used to fill area outside the extent of orig_dem. - - resamp_method: 'linear', 'cubic', 'quintic', 'nearest' - - """ - o_ulx, o_uly, o_lrx, o_lry = orig_dem.get_extent() - o_ulx, o_xres, o_xskew, o_uly, o_yskew, o_yres = orig_dem.GetGeoTransform() - orig_dem_extent = Polygon([(o_ulx, o_uly), (o_lrx, o_uly), (o_lrx, o_lry), (o_ulx, o_lry)]) - ulx, uly, lrx, lry = resamp_ref_dem.get_extent() - ulx, xres, xskew, uly, yskew, yres = resamp_ref_dem.GetGeoTransform() - resamp_ref_dem_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)]) - if orig_dem_extent.intersects(resamp_ref_dem_extent): - x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.get_x_size()) - y = np.linspace(o_uly, o_lry - o_yres, orig_dem.get_y_size()) - z = orig_dem.ReadAsArray() - # z[z == orig_dem.get_nodata()] = np.nan # this line probably needs improvement - if resamp_method == 'nearest': - print('resampling method = nearest') - xx, yy = np.meshgrid(x, y) - points = np.stack((np.reshape(xx, xx.size), np.reshape(yy, yy.size)), axis=-1) - values = np.reshape(z, z.size) - fun = NearestNDInterpolator(points, values) - xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) - ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) - xxnew, yynew = np.meshgrid(xnew, ynew) - znew = fun(xxnew, yynew) # if the image is big, this may take a long time (much longer than linear approach) - elif resamp_method == 'linear': - nan_value = orig_dem.get_nodata() - print('resampling method = interp2d - ' + resamp_method) - fun = interp2d(x, y, z, kind=resamp_method, bounds_error=False, copy=False, fill_value=nan_value) - xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) - ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) - znew = np.flipud(fun(xnew, ynew)) # I don't know why, but it seems f(xnew, ynew) is upside down. - fun2 = interp2d(x, y, z != nan_value, kind=resamp_method, bounds_error=False, copy=False, fill_value=0) - zmask = np.flipud(fun2(xnew, ynew)) - znew[zmask <= 0.999] = nan_value - else: - # This is deprecated... - print('resampling method = griddata - ' + resamp_method) - xx, yy = np.meshgrid(x, y) - xx = xx.ravel() - yy = yy.ravel() - zz = z.ravel() - realdata_pos = zz != orig_dem.get_nodata() - xx = xx[realdata_pos] - yy = yy[realdata_pos] - zz = zz[realdata_pos] - xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) - ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) - znew = griddata((xx, yy), zz, (xnew[None,:], ynew[:,None]), method='linear') - del z - gc.collect() - return znew - else: - return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0 def EVMD_DBSCAN(x, y, eps=6, min_samples=4): """ See EVMD & EVMD_idx. + Recommended to use this inreplace of EVMD and EVMD_idx. """ # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration. x2d = np.column_stack((x / 365, y)) @@ -269,80 +188,6 @@ def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): return slope, slope_err, resid, count - # ============ Using Cook's Distance ============ - # cookd = ri2 / (p_num * mse) * (h / (1 - h) ** 2) # Cook's Distance - # goodpoint = cookd < 4 / x.size # Bollen, Kenneth A.; Jackman, Robert W. (1990) (see wikipedia) - # # xnew = x[goodpoint] - # # ynew = y[goodpoint] - # # yenew = ye[goodpoint] - # # wnew = [1 / k for k in yenew] - # if np.all(goodpoint): - # slope = p[0] * 365.25 - # slope_err = np.sqrt(cov_m[0, 0]) * 365.25 - # count = x.size - # if resid > 100000: - # print(x,y,ye,cookd,goodpoint) - # return slope, slope_err, resid, count - # else: - # xnew = x[goodpoint] - # ynew = y[goodpoint] - # yenew = ye[goodpoint] - # return wlr_corefun(xnew, ynew, yenew) - - # =========== Old function =========== - # if xnew.size > 4: - # p, c = np.polyfit(xnew, ynew, 1, w=wnew, cov=True) - # _, res, _, _, _ = np.polyfit(xnew, ynew, 1, w=wnew, full=True) - # residual = np.sum((np.polyval(p, xnew) - ynew) ** 2) - # cov_m = c * (len(w) - 4) / res - # error = np.sqrt(cov_m[0, 0]) - # slope = p[0] * 365.25 - # slope_err = np.sqrt(cov_m[0, 0]) * 365.25 - # else: - # error = -9999.0 - # slope = -9999.0 - # slope_err = -9999.0 - - # case = 0 - # w = [1 / k for k in ye] - # if len(x) == 4: - # case = 1 - # # This is to avoid dividing by zero when N = 4 and to give us a better error estimate - # x = np.append(x, x[-1]) - # y = np.append(y, y[-1]) - # ye = np.append(ye, ye[-1]) - # w = np.append(w, sys.float_info.epsilon) - # elif len(x) == 3: - # case = 2 - # # This is to avoid negative Cd^2 when N = 3 and to give us a better error estimate - # x = np.append(x, [x[-1], x[-1]]) - # y = np.append(y, [y[-1], y[-1]]) - # ye = np.append(ye, [ye[-1], ye[-1]]) - # w = np.append(w, [sys.float_info.epsilon, sys.float_info.epsilon]) - # p, c = np.polyfit(x, y, 1, w=w, cov=True) - # _, res, _, _, _ = np.polyfit(x, y, 1, w=w, full=True) - # # where c is the covariance matrix of p -> c[0, 0] is the variance estimate of the slope. - # # what we want is ({G_w}^T G_w)^{-1}, which is equal to c * (N - m - 2) / res - # cov_m = c * (len(w) - 4) / res - # slope = p[0] * 365.25 - # slope_err = np.sqrt(cov_m[0, 0]) * 365.25 - # # slope_error_arr[m, n] = np.sqrt(c[0, 0]) * 365.25 - # # ./point_TS_ver2-2_linreg.py:^^^^^^^^: RuntimeWarning: invalid value encountered in sqrt - # # /data/whyj/Software/anaconda3/lib/python3.5/site-packages/numpy/lib/polynomial.py:606: RuntimeWarning: divide by zero encountered in true_divide - # # fac = resids / (len(x) - order - 2.0) - # if case == 0: - # residual = np.sum((np.polyval(p, x) - y) ** 2) - # elif case == 1: - # residual = np.sum((np.polyval(p, x[:-1]) - y[:-1]) ** 2) - # elif case == 2: - # residual = np.sum((np.polyval(p, x[:-2]) - y[:-2]) ** 2) - # return slope, slope_err, residual - - - - - - class PixelTimeSeries(object): """ Store all measurements of a single pixel in the reference DEM. @@ -398,7 +243,7 @@ class DemPile(object): is stored in the intermediate pickle file. """ - def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None): + def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None, evmd_threshold=6): self.picklepath = picklepath self.dhdtprefix = dhdtprefix self.ts = None @@ -413,7 +258,7 @@ def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None): self.fitdata = {'slope': [], 'slope_err': [], 'residual': [], 'count': []} self.mosaic = {'value': [], 'date': [], 'uncertainty': []} self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0} - self.evmd_threshold = 6 + self.evmd_threshold = evmd_threshold def AddDEM(self, dems): # ==== Add DEM object list ==== @@ -488,7 +333,7 @@ def PileUp(self): if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']: datedelta = self.dems[i].date - self.refdate # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear') - znew = Resample_Array2(self.dems[i], self.refgeo) + znew = resample_array(self.dems[i], self.refgeo) znew_mask = np.logical_and(znew > 0, self.refgeomask) fill_idx = np.where(znew_mask) for m,n in zip(fill_idx[0], fill_idx[1]): @@ -680,137 +525,4 @@ def onclick_ipynb(event): -class TimeSeriesDEM(np.ndarray): - - """ - DEPRECATED IN DHDT v1.0. - - This class can include many DEMs (in UtilRaster.SingleRaster object) and then create a 3-D matrix. - each DEM is aligned along z-axis so that TimeSeriesDEM[m, n, :] would be the time series at pixel (m, n). - """ - - def __new__(cls, dem=None, array=None, date=None, uncertainty=None): - - """ - You need to provide a UtilRaster.SingleRaster object, or provide array, date, and uncertainty separately. - TimeSeriesDEM is a n-m-k ndarray matrix, which n-m is the dem dimension, k is the count of dems. - You need to make sure all input dems have the same size (pixel number). - TimeSeriesDEM.date: a list of datetime object, which the length is k. - TimeSeriesDEM.uncertainty: a list of uncertainty for each DEM. The length is also k. - - example: - tsdem = TimeSeriesDEM(dem=foo) ---> Add single DEM. the method "AddDEM" also does the trick after tsdem is created. - tsdem = TimeSeriesDEM(array=bar, date=bar2, uncertainty=bar3) ---> Add single DEMs or multiple DEMs. - Note that bar.shape[2] == len(bar2) == len(bar3) - - Refered to - http://docs.scipy.org/doc/numpy/user/basics.subclassing.html - http://stackoverflow.com/questions/27910013/how-can-a-class-that-inherits-from-a-numpy-array-change-its-own-values - """ - - if dem is not None: - # retrieve band 1 array, and then replace NoDataValue by np.nan - dem_array = dem.ReadAsArray() - dem_array[dem_array == dem.get_nodata()] = np.nan - obj = np.asarray(dem_array).view(cls) - obj.date = [dem.date] - obj.uncertainty = [dem.uncertainty] - elif all([arg is not None for arg in [array, date, uncertainty]]): - obj = np.asarray(array).view(cls) - obj.date = date if type(date) is list else [date] - obj.uncertainty = uncertainty if type(uncertainty) is list else [uncertainty] - else: - raise ValueError('You need to either set "dem" or set "array, date and uncertainty".') - obj.daydelta = None - obj.weight = None - return obj - - def __array_finalize__(self, obj): - - """ See TimeSeriesDEM.__new__ for comments """ - - if obj is None: return - self.date = getattr(obj, 'date', None) - self.uncertainty = getattr(obj, 'uncertainty', None) - self.daydelta = getattr(obj, 'daydelta', None) - self.weight = getattr(obj, 'weight', None) - - - def AddDEM(self, dem): - - """ - Add a new DEM to the DEM time series. - dem is a UtilRaster.SingleRaster object. - """ - - self.date.append(dem.date) - self.uncertainty.append(dem.uncertainty) - # Add the first band, and then replace NoDataValue by np.nan - dem_array = dem.ReadAsArray() - dem_array[dem_array == dem.get_nodata()] = np.nan - return TimeSeriesDEM(array=np.dstack([self, dem_array]), date=self.date, uncertainty=self.uncertainty) - - def Date2DayDelta(self): - - """ - Make self.daydelta from [self.date - min(self.date)] - """ - - t = np.array(self.date) - min(self.date) - self.daydelta = np.array([i.days for i in t]) - - def SetWeight(self): - - """ - Weight is set to 1/sigma^2 - """ - - self.weight = 1.0 / np.array(self.uncertainty) ** 2 - - def Polyfit(self, min_count=5, min_time_span=365, min_year=2000, max_year=2016): - - """ - Note that x and w are all 1-d array like, with the same length of the third dimension of the reg_array. - w is the weight, which is often set to the inverse of data covariance matrix, C_d^-1 - Here w must have the same length of x. - """ - - reg_size = list(self.shape) - pixel_count = reg_size[0] * reg_size[1] - - y = self.reshape(pixel_count, reg_size[2]).T - slope = np.zeros(pixel_count) - slope_err = np.zeros(pixel_count) - intercept = np.zeros(pixel_count) - intercept_err = np.zeros(pixel_count) - for i in range(y.shape[1]): - if i % 10000 == 0: - print("processing " + str(i) + " pixels out of " + str(y.shape[1]) + " pixels") - px_y = y[:, i] - valid_idx = ~np.isnan(px_y) - # judge if a pixel is able to do regression using the given arguments - minlim_idx = np.array(self.date) > datetime(min_year, 1, 1) - maxlim_idx = np.array(self.date) < datetime(max_year, 1, 1) - valid_idx = valid_idx & minlim_idx & maxlim_idx - if sum(valid_idx) < min_count: - slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan - elif max(self.daydelta[valid_idx]) - min(self.daydelta[valid_idx]) < min_time_span: - slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan - # begin the polyfits - else: - px_y = px_y[valid_idx] - px_x = self.daydelta[valid_idx] - px_w = self.weight[valid_idx] - # This method minimizes sum(w * (y_i - y^hat_i) ^ 2) - # Here we set w=np.sqrt(px_w) becuase np.polyfit minimizes sum( (w * (y_i - y^hat_i)) ^ 2) by default. - # Covariance is estimated from multivariate t-distribution. - # Comparing to the v0.1 version, this new method is slightly more conservative - # because it considers the case of small d.o.f. - p, c = np.polyfit(px_x, px_y, 1, w=np.sqrt(px_w), cov=True) - slope[i] = p[0] - slope_err[i] = np.sqrt(c[0, 0]) - intercept[i] = p[1] - intercept_err[i] = np.sqrt(c[1, 1]) - - return slope.reshape(reg_size[:-1]), intercept.reshape(reg_size[:-1]), slope_err.reshape(reg_size[:-1]), intercept_err.reshape(reg_size[:-1]) diff --git a/extra/unused/Resample_Array.py b/extra/unused/Resample_Array.py new file mode 100644 index 0000000..06bf972 --- /dev/null +++ b/extra/unused/Resample_Array.py @@ -0,0 +1,67 @@ +# Old code to resample array. +# A newer version is in libdhdt.py. + +def Resample_Array(orig_dem, resamp_ref_dem, resamp_method='linear'): + + """ + resample orig_dem using the extent and spacing provided by resamp_ref_dem + orig_dem: class UtilRaster.SingleRaster object + resamp_ref_dem: class UtilRaster.SingleRaster object + returns: an numpy array, which you can use the methods in UtilRaster to trasform it into a raster + + Uses linear interpolation because it best represent flat ice surface. + -9999.0 (default nan in a Geotiff) is used to fill area outside the extent of orig_dem. + + resamp_method: 'linear', 'cubic', 'quintic', 'nearest' + + """ + o_ulx, o_uly, o_lrx, o_lry = orig_dem.get_extent() + o_ulx, o_xres, o_xskew, o_uly, o_yskew, o_yres = orig_dem.GetGeoTransform() + orig_dem_extent = Polygon([(o_ulx, o_uly), (o_lrx, o_uly), (o_lrx, o_lry), (o_ulx, o_lry)]) + ulx, uly, lrx, lry = resamp_ref_dem.get_extent() + ulx, xres, xskew, uly, yskew, yres = resamp_ref_dem.GetGeoTransform() + resamp_ref_dem_extent = Polygon([(ulx, uly), (lrx, uly), (lrx, lry), (ulx, lry)]) + if orig_dem_extent.intersects(resamp_ref_dem_extent): + x = np.linspace(o_ulx, o_lrx - o_xres, orig_dem.get_x_size()) + y = np.linspace(o_uly, o_lry - o_yres, orig_dem.get_y_size()) + z = orig_dem.ReadAsArray() + # z[z == orig_dem.get_nodata()] = np.nan # this line probably needs improvement + if resamp_method == 'nearest': + print('resampling method = nearest') + xx, yy = np.meshgrid(x, y) + points = np.stack((np.reshape(xx, xx.size), np.reshape(yy, yy.size)), axis=-1) + values = np.reshape(z, z.size) + fun = NearestNDInterpolator(points, values) + xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) + ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) + xxnew, yynew = np.meshgrid(xnew, ynew) + znew = fun(xxnew, yynew) # if the image is big, this may take a long time (much longer than linear approach) + elif resamp_method == 'linear': + nan_value = orig_dem.get_nodata() + print('resampling method = interp2d - ' + resamp_method) + fun = interp2d(x, y, z, kind=resamp_method, bounds_error=False, copy=False, fill_value=nan_value) + xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) + ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) + znew = np.flipud(fun(xnew, ynew)) # I don't know why, but it seems f(xnew, ynew) is upside down. + fun2 = interp2d(x, y, z != nan_value, kind=resamp_method, bounds_error=False, copy=False, fill_value=0) + zmask = np.flipud(fun2(xnew, ynew)) + znew[zmask <= 0.999] = nan_value + else: + # This is deprecated... + print('resampling method = griddata - ' + resamp_method) + xx, yy = np.meshgrid(x, y) + xx = xx.ravel() + yy = yy.ravel() + zz = z.ravel() + realdata_pos = zz != orig_dem.get_nodata() + xx = xx[realdata_pos] + yy = yy[realdata_pos] + zz = zz[realdata_pos] + xnew = np.linspace(ulx, lrx - xres, resamp_ref_dem.get_x_size()) + ynew = np.linspace(uly, lry - yres, resamp_ref_dem.get_y_size()) + znew = griddata((xx, yy), zz, (xnew[None,:], ynew[:,None]), method='linear') + del z + gc.collect() + return znew + else: + return np.ones_like(resamp_ref_dem.ReadAsArray()) * -9999.0 \ No newline at end of file diff --git a/extra/unused/TimeSeriesDEM.py b/extra/unused/TimeSeriesDEM.py new file mode 100644 index 0000000..d9ab888 --- /dev/null +++ b/extra/unused/TimeSeriesDEM.py @@ -0,0 +1,137 @@ +# TimeSeriesDEM is the predecessor of DemPile. +# It is a subclass of np.ndarray, but when the reference area is much bigger than the extent of each DEM, +# This method will take huge unnecessary memory space. + +class TimeSeriesDEM(np.ndarray): + + """ + DEPRECATED IN DHDT v1.0. + + This class can include many DEMs (in UtilRaster.SingleRaster object) and then create a 3-D matrix. + each DEM is aligned along z-axis so that TimeSeriesDEM[m, n, :] would be the time series at pixel (m, n). + """ + + def __new__(cls, dem=None, array=None, date=None, uncertainty=None): + + """ + You need to provide a UtilRaster.SingleRaster object, or provide array, date, and uncertainty separately. + TimeSeriesDEM is a n-m-k ndarray matrix, which n-m is the dem dimension, k is the count of dems. + You need to make sure all input dems have the same size (pixel number). + TimeSeriesDEM.date: a list of datetime object, which the length is k. + TimeSeriesDEM.uncertainty: a list of uncertainty for each DEM. The length is also k. + + example: + tsdem = TimeSeriesDEM(dem=foo) ---> Add single DEM. the method "AddDEM" also does the trick after tsdem is created. + tsdem = TimeSeriesDEM(array=bar, date=bar2, uncertainty=bar3) ---> Add single DEMs or multiple DEMs. + Note that bar.shape[2] == len(bar2) == len(bar3) + + Refered to + http://docs.scipy.org/doc/numpy/user/basics.subclassing.html + http://stackoverflow.com/questions/27910013/how-can-a-class-that-inherits-from-a-numpy-array-change-its-own-values + """ + + if dem is not None: + # retrieve band 1 array, and then replace NoDataValue by np.nan + dem_array = dem.ReadAsArray() + dem_array[dem_array == dem.get_nodata()] = np.nan + obj = np.asarray(dem_array).view(cls) + obj.date = [dem.date] + obj.uncertainty = [dem.uncertainty] + elif all([arg is not None for arg in [array, date, uncertainty]]): + obj = np.asarray(array).view(cls) + obj.date = date if type(date) is list else [date] + obj.uncertainty = uncertainty if type(uncertainty) is list else [uncertainty] + else: + raise ValueError('You need to either set "dem" or set "array, date and uncertainty".') + obj.daydelta = None + obj.weight = None + return obj + + def __array_finalize__(self, obj): + + """ See TimeSeriesDEM.__new__ for comments """ + + if obj is None: return + self.date = getattr(obj, 'date', None) + self.uncertainty = getattr(obj, 'uncertainty', None) + self.daydelta = getattr(obj, 'daydelta', None) + self.weight = getattr(obj, 'weight', None) + + + def AddDEM(self, dem): + + """ + Add a new DEM to the DEM time series. + dem is a UtilRaster.SingleRaster object. + """ + + self.date.append(dem.date) + self.uncertainty.append(dem.uncertainty) + # Add the first band, and then replace NoDataValue by np.nan + dem_array = dem.ReadAsArray() + dem_array[dem_array == dem.get_nodata()] = np.nan + return TimeSeriesDEM(array=np.dstack([self, dem_array]), date=self.date, uncertainty=self.uncertainty) + + def Date2DayDelta(self): + + """ + Make self.daydelta from [self.date - min(self.date)] + """ + + t = np.array(self.date) - min(self.date) + self.daydelta = np.array([i.days for i in t]) + + def SetWeight(self): + + """ + Weight is set to 1/sigma^2 + """ + + self.weight = 1.0 / np.array(self.uncertainty) ** 2 + + def Polyfit(self, min_count=5, min_time_span=365, min_year=2000, max_year=2016): + + """ + Note that x and w are all 1-d array like, with the same length of the third dimension of the reg_array. + w is the weight, which is often set to the inverse of data covariance matrix, C_d^-1 + Here w must have the same length of x. + """ + + reg_size = list(self.shape) + pixel_count = reg_size[0] * reg_size[1] + + y = self.reshape(pixel_count, reg_size[2]).T + slope = np.zeros(pixel_count) + slope_err = np.zeros(pixel_count) + intercept = np.zeros(pixel_count) + intercept_err = np.zeros(pixel_count) + for i in range(y.shape[1]): + if i % 10000 == 0: + print("processing " + str(i) + " pixels out of " + str(y.shape[1]) + " pixels") + px_y = y[:, i] + valid_idx = ~np.isnan(px_y) + # judge if a pixel is able to do regression using the given arguments + minlim_idx = np.array(self.date) > datetime(min_year, 1, 1) + maxlim_idx = np.array(self.date) < datetime(max_year, 1, 1) + valid_idx = valid_idx & minlim_idx & maxlim_idx + if sum(valid_idx) < min_count: + slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan + elif max(self.daydelta[valid_idx]) - min(self.daydelta[valid_idx]) < min_time_span: + slope[i] = slope_err[i] = intercept[i] = intercept_err[i] = np.nan + # begin the polyfits + else: + px_y = px_y[valid_idx] + px_x = self.daydelta[valid_idx] + px_w = self.weight[valid_idx] + # This method minimizes sum(w * (y_i - y^hat_i) ^ 2) + # Here we set w=np.sqrt(px_w) becuase np.polyfit minimizes sum( (w * (y_i - y^hat_i)) ^ 2) by default. + # Covariance is estimated from multivariate t-distribution. + # Comparing to the v0.1 version, this new method is slightly more conservative + # because it considers the case of small d.o.f. + p, c = np.polyfit(px_x, px_y, 1, w=np.sqrt(px_w), cov=True) + slope[i] = p[0] + slope_err[i] = np.sqrt(c[0, 0]) + intercept[i] = p[1] + intercept_err[i] = np.sqrt(c[1, 1]) + + return slope.reshape(reg_size[:-1]), intercept.reshape(reg_size[:-1]), slope_err.reshape(reg_size[:-1]), intercept_err.reshape(reg_size[:-1]) diff --git a/extra/unused/wlr.py b/extra/unused/wlr.py new file mode 100644 index 0000000..eab937a --- /dev/null +++ b/extra/unused/wlr.py @@ -0,0 +1,70 @@ +# This script archives old code for wlr_corefun in libdhdt.py. + +# ============ Using Cook's Distance ============ + # cookd = ri2 / (p_num * mse) * (h / (1 - h) ** 2) # Cook's Distance + # goodpoint = cookd < 4 / x.size # Bollen, Kenneth A.; Jackman, Robert W. (1990) (see wikipedia) + # # xnew = x[goodpoint] + # # ynew = y[goodpoint] + # # yenew = ye[goodpoint] + # # wnew = [1 / k for k in yenew] + # if np.all(goodpoint): + # slope = p[0] * 365.25 + # slope_err = np.sqrt(cov_m[0, 0]) * 365.25 + # count = x.size + # if resid > 100000: + # print(x,y,ye,cookd,goodpoint) + # return slope, slope_err, resid, count + # else: + # xnew = x[goodpoint] + # ynew = y[goodpoint] + # yenew = ye[goodpoint] + # return wlr_corefun(xnew, ynew, yenew) + + # =========== Old function =========== + # if xnew.size > 4: + # p, c = np.polyfit(xnew, ynew, 1, w=wnew, cov=True) + # _, res, _, _, _ = np.polyfit(xnew, ynew, 1, w=wnew, full=True) + # residual = np.sum((np.polyval(p, xnew) - ynew) ** 2) + # cov_m = c * (len(w) - 4) / res + # error = np.sqrt(cov_m[0, 0]) + # slope = p[0] * 365.25 + # slope_err = np.sqrt(cov_m[0, 0]) * 365.25 + # else: + # error = -9999.0 + # slope = -9999.0 + # slope_err = -9999.0 + + # case = 0 + # w = [1 / k for k in ye] + # if len(x) == 4: + # case = 1 + # # This is to avoid dividing by zero when N = 4 and to give us a better error estimate + # x = np.append(x, x[-1]) + # y = np.append(y, y[-1]) + # ye = np.append(ye, ye[-1]) + # w = np.append(w, sys.float_info.epsilon) + # elif len(x) == 3: + # case = 2 + # # This is to avoid negative Cd^2 when N = 3 and to give us a better error estimate + # x = np.append(x, [x[-1], x[-1]]) + # y = np.append(y, [y[-1], y[-1]]) + # ye = np.append(ye, [ye[-1], ye[-1]]) + # w = np.append(w, [sys.float_info.epsilon, sys.float_info.epsilon]) + # p, c = np.polyfit(x, y, 1, w=w, cov=True) + # _, res, _, _, _ = np.polyfit(x, y, 1, w=w, full=True) + # # where c is the covariance matrix of p -> c[0, 0] is the variance estimate of the slope. + # # what we want is ({G_w}^T G_w)^{-1}, which is equal to c * (N - m - 2) / res + # cov_m = c * (len(w) - 4) / res + # slope = p[0] * 365.25 + # slope_err = np.sqrt(cov_m[0, 0]) * 365.25 + # # slope_error_arr[m, n] = np.sqrt(c[0, 0]) * 365.25 + # # ./point_TS_ver2-2_linreg.py:^^^^^^^^: RuntimeWarning: invalid value encountered in sqrt + # # /data/whyj/Software/anaconda3/lib/python3.5/site-packages/numpy/lib/polynomial.py:606: RuntimeWarning: divide by zero encountered in true_divide + # # fac = resids / (len(x) - order - 2.0) + # if case == 0: + # residual = np.sum((np.polyval(p, x) - y) ** 2) + # elif case == 1: + # residual = np.sum((np.polyval(p, x[:-1]) - y[:-1]) ** 2) + # elif case == 2: + # residual = np.sum((np.polyval(p, x[:-2]) - y[:-2]) ** 2) + # return slope, slope_err, residual \ No newline at end of file From f73a1fc14203f748042050aefd22cc1de2d2f2f7 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 22 Apr 2023 03:23:45 +0000 Subject: [PATCH 10/24] Add do_evmd --- carst/libdhdt.py | 79 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 1fb8337..1349ecc 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -70,12 +70,18 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4): """ See EVMD & EVMD_idx. Recommended to use this inreplace of EVMD and EVMD_idx. + verified_y_labels: + -1: outliers + 0: 1st cluster + 1: 2nd cluster + ... """ # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration. x2d = np.column_stack((x / 365, y)) if x2d.shape[0] >= 2: db = DBSCAN(eps=eps, min_samples=min_samples).fit(x2d) - verified_y_labels = db.labels_ >= 0 + # verified_y_labels = db.labels_ >= 0 + verified_y_labels = db.labels_ # verified_y_labels_idx = np.where(verified_y_labels) if any(verified_y_labels): exitstate = 1 @@ -83,7 +89,8 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4): exitstate = -1 else: exitstate = -1 - verified_y_labels = np.full_like(y, False) + # verified_y_labels = np.full_like(y, False) + verified_y_labels = np.full_like(y, -1) return exitstate, verified_y_labels @@ -141,7 +148,8 @@ def EVMD_idx(y, validated_value, threshold=6): def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): # wlr = weighted linear regression. # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) - exitstate, idx = EVMD_DBSCAN(x, y, eps=evmd_threshold) + exitstate, labels = EVMD_DBSCAN(x, y, eps=evmd_threshold) + idx = labels >= 0 # if exitstate >= 0 or (max(x) - min(x)) < 1: if exitstate < 0 or (max(x) - min(x)) < 1: slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size @@ -195,12 +203,14 @@ class PixelTimeSeries(object): Column 1: date Column 2: value Column 3: uncertainty + EVMD labels are also stored in this object. """ def __init__(self, data=[]): data = np.ndarray([0, 3]) if not data else data # [] --> np.ndarray([0, 3]) data = np.array(data) if type(data) is not np.ndarray else data # [1, 2, 3] --> np.array([1, 2, 3]) self.verify_record(data) self.data = data + self.evmd_labels = None def __repr__(self): return 'PixelTimeSeries({})'.format(self.data) @@ -216,7 +226,7 @@ def verify_record(record): pass else: raise ValueError("Inconsistent input record. Must be an n-by-3 array.") - + def add_record(self, record): """ Add one or more records to the time series. @@ -225,6 +235,20 @@ def add_record(self, record): self.verify_record(record) self.data = np.vstack((self.data, record)) + def verify_evmd_labels(self, labels): + """ + Verify if the input EVMD labels meet the format requirements. + """ + if labels.size == self.data.shape[0]: + pass + else: + raise ValueError("Inconsistent EVMD label input. Must be n labels where n = data points in the data.") + + def add_evmd_labels(self, labels): + labels = np.array(labels) if type(labels) is not np.ndarray else labels + self.verify_evmd_labels(labels) + self.evmd_labels = labels + def get_date(self): return self.data[:, 0] @@ -309,7 +333,7 @@ def InitTS(self): # self.ts = [[{'date': [], 'uncertainty': [], 'value': []} for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)] # self.ts = [[ [] for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)] print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask))) - + def ReadConfig(self, ini): if type(ini) is str: ini = ConfParams(ini) @@ -446,9 +470,9 @@ def form_mosaic(self, order='ascending', method='DBSCAN'): uncertainty = self.ts[m, n].get_uncertainty() value = self.ts[m, n].get_value() if method == 'DBSCAN': - exitstate, verified_y_labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) + exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) if exitstate >= 0: - verified_y_labels_idx = np.where(verified_y_labels)[0] + verified_y_labels_idx = np.where(labels >= 0)[0] if order == 'descending': idx = verified_y_labels_idx[-1] elif order == 'ascending': @@ -477,7 +501,46 @@ def form_mosaic(self, order='ascending', method='DBSCAN'): mosaic_uncertainty = SingleRaster('{}_mosaic-{}_uncertainty.tif'.format(self.dhdtprefix, order)) mosaic_value.Array2Raster(self.mosaic['value'], self.refgeo) mosaic_date.Array2Raster(self.mosaic['date'], self.refgeo) - mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo) + mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo) + + @timeit + def do_evmd(self, parallel=True): + if parallel: + import dask + from dask.diagnostics import ProgressBar + def batch(seq): + sub_results = [] + for x in seq: + date = x.get_date() + value = x.get_value() + exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) + sub_results.append(labels) + return sub_results + + batches = [] + for m in range(self.ts.shape[0]): + result_batch = dask.delayed(batch)(self.ts[m, :]) + batches.append(result_batch) + + with ProgressBar(): + results = dask.compute(batches) + + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + self.ts[m, n].add_evmd_labels(results[0][m][n]) + else: + for m in range(self.ts.shape[0]): + self.display_progress(m, self.ts.shape[0]) + for n in range(self.ts.shape[1]): + date = self.ts[m, n].get_date() + value = self.ts[m, n].get_value() + exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) + self.ts[m, n].add_evmd_labels(labels) + + @staticmethod + def display_progress(m, total): + if m % 100 == 0: + print(f'{m}/{total} lines processed') def viz(self): dhdt_raster, _, _, _ = self.ShowDhdtTifs() From c43795cbb32641b4982883400bc08522aacfed2d Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 22 Apr 2023 04:06:24 +0000 Subject: [PATCH 11/24] Parallelize Polyfit --- carst/libdhdt.py | 155 +++++++++++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 65 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 1349ecc..5bcb4ff 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -78,7 +78,7 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4): """ # x, assuming the temporal axis (unit=days), is scaled by 365 for proper eps consideration. x2d = np.column_stack((x / 365, y)) - if x2d.shape[0] >= 2: + if x2d.shape[0] >= min_samples: db = DBSCAN(eps=eps, min_samples=min_samples).fit(x2d) # verified_y_labels = db.labels_ >= 0 verified_y_labels = db.labels_ @@ -145,11 +145,15 @@ def EVMD_idx(y, validated_value, threshold=6): validated_range = [min(tmp), max(tmp)] return idx -def wlr_corefun(x, y, ye, evmd_threshold=6, detailed=False): +def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False): # wlr = weighted linear regression. # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) - exitstate, labels = EVMD_DBSCAN(x, y, eps=evmd_threshold) - idx = labels >= 0 + if evmd_labels is None: + exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold) + else: + exitstate = 1 if any(evmd_labels >= 0) else -1 + + idx = evmd_labels >= 0 # if exitstate >= 0 or (max(x) - min(x)) < 1: if exitstate < 0 or (max(x) - min(x)) < 1: slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size @@ -215,6 +219,15 @@ def __init__(self, data=[]): def __repr__(self): return 'PixelTimeSeries({})'.format(self.data) + def get_date(self): + return self.data[:, 0] + + def get_value(self): + return self.data[:, 1] + + def get_uncertainty(self): + return self.data[:, 2] + @staticmethod def verify_record(record): """ @@ -249,14 +262,20 @@ def add_evmd_labels(self, labels): self.verify_evmd_labels(labels) self.evmd_labels = labels - def get_date(self): - return self.data[:, 0] - - def get_value(self): - return self.data[:, 1] + def do_evmd(self, eps=6): + date = self.get_date() + value = self.get_value() + exitstate, labels = EVMD_DBSCAN(date, value, eps=eps) + return exitstate, labels - def get_uncertainty(self): - return self.data[:, 2] + def do_wlr(self, evmd_threshold=6): + date = self.get_date() + value = self.get_value() + uncertainty = self.get_uncertainty() + evmd_labels = self.evmd_labels + single_results = wlr_corefun(date, value, uncertainty, evmd_labels=evmd_labels, evmd_threshold=evmd_threshold) + # single_results is (slope, slope_err, residual, count) + return single_results class DemPile(object): @@ -381,56 +400,65 @@ def DumpPickle(self): def LoadPickle(self): self.ts = pickle.load( open(self.picklepath, "rb") ) - + + def init_fitdata(self): + # ==== Create final array ==== + self.fitdata['slope'] = np.full_like(self.ts, self.refgeo.get_nodata()) + self.fitdata['slope_err'] = np.full_like(self.ts, self.refgeo.get_nodata()) + self.fitdata['residual'] = np.full_like(self.ts, self.refgeo.get_nodata()) + self.fitdata['count'] = np.full_like(self.ts, self.refgeo.get_nodata()) + @timeit - def Polyfit(self): + def Polyfit(self, parallel=False): # ==== Create final array ==== - self.fitdata['slope'] = np.ones_like(self.ts, dtype=float) * -9999 - self.fitdata['slope_err'] = np.ones_like(self.ts, dtype=float) * -9999 - self.fitdata['residual'] = np.ones_like(self.ts, dtype=float) * -9999 - self.fitdata['count'] = np.ones_like(self.ts, dtype=float) * -9999 + self.init_fitdata() # ==== Weighted regression ==== - print('m total: ', len(self.ts)) - for m in range(len(self.ts)): - if m % 100 == 0: - print(m) - for n in range(len(self.ts[0])): - # self.fitdata['count'][m, n] = len(self.ts[m][n]['date']) - # if len(self.ts[m][n]['date']) >= 3: - date = self.ts[m, n].get_date() - # date = np.array(self.ts[m][n]['date']) - uncertainty = self.ts[m, n].get_uncertainty() - # uncertainty = np.array(self.ts[m][n]['uncertainty']) - value = self.ts[m, n].get_value() - # value = np.array(self.ts[m][n]['value']) - # pin_value = pin_dem_array[m ,n] - # pin_date = pin_dem_date_array[m, n] - # date, uncertainty, value = filter_by_slope(date, uncertainty, value, pin_date, pin_value) - # date, uncertainty, value = filter_by_redundancy(date, uncertainty, value) - - # slope_ref = [(value[i] - pin_value) / float(date[i] - pin_date) * 365.25 for i in range(len(value))] - # for i in reversed(range(len(slope_ref))): - # if (slope_ref[i] > dhdt_limit_upper) or (slope_ref[i] < dhdt_limit_lower): - # _ = date.pop(i) - # _ = uncertainty.pop(i) - # _ = value.pop(i) - # self.fitdata['count'][m, n] = len(date) - - # Whyjay: May 10, 2018: cancelled the min date span (date[-1] - date[0] > 0), previously > 200 - # if (len(np.unique(date)) >= 3) and (date[-1] - date[0] > 0): - if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: - slope, slope_err, residual, count = wlr_corefun(date, value, uncertainty, self.evmd_threshold) - # if residual > 100: - # print(date, value, uncertainty) - self.fitdata['slope'][m, n] = slope - self.fitdata['slope_err'][m, n] = slope_err - self.fitdata['residual'][m, n] = residual - self.fitdata['count'][m, n] = count - # else: - # self.fitdata['count'] = len(ref_dem_TS[m][n]['date']) - # elif (date[-1] - date[0] > 0): - # slope_arr[m, n] = (value[1] - value[0]) / float(date[1] - date[0]) * 365.25 - # self.fitdata['count'][~self.refgeomask] = -9999 + if parallel: + print('sds') + import dask + from dask.diagnostics import ProgressBar + def batch(seq): + sub_results = [] + for x in seq: + date = x.get_date() + if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: + single_results = x.do_wlr(evmd_threshold=self.evmd_threshold) + # single_results is (slope, slope_err, residual, count) + sub_results.append(single_results) + return sub_results + + batches = [] + for m in range(self.ts.shape[0]): + result_batch = dask.delayed(batch)(self.ts[m, :]) + batches.append(result_batch) + + with ProgressBar(): + results = dask.compute(batches) + + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + self.fitdata['slope'][m, n] = results[0][m][n][0] + self.fitdata['slope_err'][m, n] = results[0][m][n][1] + self.fitdata['residual'][m, n] = results[0][m][n][2] + self.fitdata['count'][m, n] = results[0][m][n][3] + + else: + for m in range(self.ts.shape[0]): + self.display_progress(m, self.ts.shape[0]) + for n in range(self.ts.shape[1]): + date = self.ts[m, n].get_date() + # uncertainty = self.ts[m, n].get_uncertainty() + # value = self.ts[m, n].get_value() + # evmd_labels = self.ts[m, n].evmd_labels + + if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: + slope, slope_err, residual, count = self.ts[m, n].do_wlr(evmd_threshold=self.evmd_threshold) + # if residual > 100: + # print(date, value, uncertainty) + self.fitdata['slope'][m, n] = slope + self.fitdata['slope_err'][m, n] = slope_err + self.fitdata['residual'][m, n] = residual + self.fitdata['count'][m, n] = count def Fitdata2File(self): # ==== Write to file ==== @@ -504,16 +532,14 @@ def form_mosaic(self, order='ascending', method='DBSCAN'): mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo) @timeit - def do_evmd(self, parallel=True): + def do_evmd(self, parallel=False): if parallel: import dask from dask.diagnostics import ProgressBar def batch(seq): sub_results = [] for x in seq: - date = x.get_date() - value = x.get_value() - exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) + exitstate, labels = x.do_evmd(eps=self.evmd_threshold) sub_results.append(labels) return sub_results @@ -532,10 +558,9 @@ def batch(seq): for m in range(self.ts.shape[0]): self.display_progress(m, self.ts.shape[0]) for n in range(self.ts.shape[1]): - date = self.ts[m, n].get_date() - value = self.ts[m, n].get_value() - exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) + exitstate, labels = self.ts[m, n].do_evmd(eps=self.evmd_threshold) self.ts[m, n].add_evmd_labels(labels) + @staticmethod def display_progress(m, total): From b5c8430479ba9eb02e86880a0f2acd1bbf6cd482 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 22 Apr 2023 06:09:52 +0000 Subject: [PATCH 12/24] Change variable names for PEP8 (dhdt only for now) --- bin/dhdt.py | 30 ++++++++++++--------------- carst/libdhdt.py | 54 ++++++++++++++++++++++-------------------------- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/bin/dhdt.py b/bin/dhdt.py index 9d0e5a3..5e0336b 100755 --- a/bin/dhdt.py +++ b/bin/dhdt.py @@ -30,34 +30,30 @@ # ==== Read ini file ==== inipath = args.config_file -# ini = ConfParams(inipath) -# ini.ReadParam() -# ini.VerifyParam() # ==== Create a DemPile object and load the config file into the object ==== a = DemPile() -# a.ReadConfig(ini) -a.ReadConfig(inipath) +a.read_config(inipath) # ==== Run main processes ==== if args.step is None: - a.InitTS() - a.PileUp() - a.DumpPickle() - a.Polyfit() - a.Fitdata2File() + a.init_ts() + a.pileup() + a.dump_pickle() + a.polyfit() + a.fitdata2file() elif args.step == 'stack': - a.InitTS() - a.PileUp() - a.DumpPickle() + a.init_ts() + a.pileup() + a.dump_pickle() elif args.step == 'dhdt': - a.LoadPickle() - a.Polyfit() - a.Fitdata2File() + a.load_pickle() + a.polyfit() + a.fitdata2file() elif args.step == 'viewts': - a.LoadPickle() + a.load_pickle() a.viz() plt.show() # data = a.ts diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 5bcb4ff..17fb0fc 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -13,10 +13,6 @@ import gdal # this was used until GDAL 3.1 from datetime import datetime from shapely.geometry import Polygon -# from scipy.interpolate import interp2d -# from scipy.interpolate import NearestNDInterpolator -# from scipy.interpolate import griddata -# import gc from carst import ConfParams from carst.libraster import SingleRaster import pickle @@ -303,7 +299,7 @@ def __init__(self, picklepath=None, refgeo=None, refdate=None, dhdtprefix=None, self.maskparam = {'max_uncertainty': 9999, 'min_time_span': 0} self.evmd_threshold = evmd_threshold - def AddDEM(self, dems): + def add_dem(self, dems): # ==== Add DEM object list ==== if type(dems) is list: self.dems = self.dems + dems @@ -312,13 +308,13 @@ def AddDEM(self, dems): else: raise ValueError("This DEM type is neither a SingleRaster object nor a list of SingleRaster objects.") - def SortByDate(self): + def sort_by_date(self): # ==== sort DEMs by date (ascending order) ==== dem_dates = [i.date for i in self.dems] dateidx = np.argsort(dem_dates) self.dems = [self.dems[i] for i in dateidx] - def SetRefGeo(self, refgeo): + def set_refgeo(self, refgeo): # ==== Prepare the reference geometry ==== if type(refgeo) is str: self.refgeo = SingleRaster(refgeo) @@ -328,20 +324,20 @@ def SetRefGeo(self, refgeo): raise ValueError("This refgeo must be either a SingleRaster object or a path to a geotiff file.") self.refgeomask = self.refgeo.ReadAsArray().astype(bool) - def SetRefDate(self, datestr): + def set_refdate(self, datestr): self.refdate = datetime.strptime(datestr, '%Y-%m-%d') - def SetMaskParam(self, ini): + def set_mask_params(self, ini): if 'max_uncertainty' in ini.settings: self.maskparam['max_uncertainty'] = float(ini.settings['max_uncertainty']) if 'min_time_span' in ini.settings: self.maskparam['min_time_span'] = float(ini.settings['min_time_span']) - def SetEVMDThreshold(self, ini): + def set_evmd_threshold(self, ini): if 'evmd_threshold' in ini.regression: self.evmd_threshold = float(ini.regression['evmd_threshold']) - def InitTS(self): + def init_ts(self): # ==== Prepare the reference geometry ==== refgeo_Ysize = self.refgeo.get_y_size() refgeo_Xsize = self.refgeo.get_x_size() @@ -353,22 +349,22 @@ def InitTS(self): # self.ts = [[ [] for i in range(refgeo_Xsize)] for j in range(refgeo_Ysize)] print('total number of pixels to be processed: {}'.format(np.sum(self.refgeomask))) - def ReadConfig(self, ini): + def read_config(self, ini): if type(ini) is str: ini = ConfParams(ini) ini.ReadParam() ini.VerifyParam() self.picklepath = ini.result['picklefile'] self.dhdtprefix = ini.result['dhdt_prefix'] - self.AddDEM(ini.GetDEM()) - self.SortByDate() - self.SetRefGeo(ini.refgeometry['gtiff']) - self.SetRefDate(ini.settings['refdate']) - self.SetMaskParam(ini) - self.SetEVMDThreshold(ini) + self.add_dem(ini.GetDEM()) + self.sort_by_date() + self.set_refgeo(ini.refgeometry['gtiff']) + self.set_refdate(ini.settings['refdate']) + self.set_mask_params(ini) + self.set_evmd_threshold(ini) @timeit - def PileUp(self): + def pileup(self): # ==== Start to read every DEM and save it to our final array ==== ts = [[ [] for n in range(self.ts.shape[1])] for m in range(self.ts.shape[0])] for i in range(len(self.dems)): @@ -395,21 +391,21 @@ def PileUp(self): for n in range(self.ts.shape[1]): self.ts[m, n] = PixelTimeSeries(ts[m][n]) - def DumpPickle(self): + def dump_pickle(self): pickle.dump(self.ts, open(self.picklepath, "wb")) - def LoadPickle(self): + def load_pickle(self): self.ts = pickle.load( open(self.picklepath, "rb") ) def init_fitdata(self): # ==== Create final array ==== - self.fitdata['slope'] = np.full_like(self.ts, self.refgeo.get_nodata()) - self.fitdata['slope_err'] = np.full_like(self.ts, self.refgeo.get_nodata()) - self.fitdata['residual'] = np.full_like(self.ts, self.refgeo.get_nodata()) - self.fitdata['count'] = np.full_like(self.ts, self.refgeo.get_nodata()) + self.fitdata['slope'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + self.fitdata['slope_err'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + self.fitdata['residual'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + self.fitdata['count'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) @timeit - def Polyfit(self, parallel=False): + def polyfit(self, parallel=False): # ==== Create final array ==== self.init_fitdata() # ==== Weighted regression ==== @@ -460,7 +456,7 @@ def batch(seq): self.fitdata['residual'][m, n] = residual self.fitdata['count'][m, n] = count - def Fitdata2File(self): + def fitdata2file(self): # ==== Write to file ==== dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif') dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif') @@ -471,7 +467,7 @@ def Fitdata2File(self): dhdt_res.Array2Raster(self.fitdata['residual'], self.refgeo) dhdt_count.Array2Raster(self.fitdata['count'], self.refgeo) - def ShowDhdtTifs(self): + def show_dhdt_tifs(self): dhdt_dem = SingleRaster(self.dhdtprefix + '_dhdt.tif') dhdt_error = SingleRaster(self.dhdtprefix + '_dhdt_error.tif') dhdt_res = SingleRaster(self.dhdtprefix + '_dhdt_residual.tif') @@ -568,7 +564,7 @@ def display_progress(m, total): print(f'{m}/{total} lines processed') def viz(self): - dhdt_raster, _, _, _ = self.ShowDhdtTifs() + dhdt_raster, _, _, _ = self.show_dhdt_tifs() img = dhdt_raster.ReadAsArray() img[img < -9000] = np.nan # might need to change From f044c5561f5a7460c7658b49feaa3c414161ffdc Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 22 Apr 2023 08:02:52 +0000 Subject: [PATCH 13/24] Handling COG IO error --- carst/libdhdt.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 17fb0fc..8316555 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -18,6 +18,7 @@ import pickle import matplotlib.pyplot as plt from sklearn.cluster import DBSCAN +from rasterio.errors import RasterioIOError def timeit(func): def time_wrapper(*args, **kwargs): @@ -371,17 +372,17 @@ def pileup(self): print('{}) {}'.format(i + 1, os.path.basename(self.dems[i].fpath) )) if self.dems[i].uncertainty <= self.maskparam['max_uncertainty']: datedelta = self.dems[i].date - self.refdate - # znew = Resample_Array(self.dems[i], self.refgeo, resamp_method='linear') - znew = resample_array(self.dems[i], self.refgeo) + try: + znew = resample_array(self.dems[i], self.refgeo) + except RasterioIOError as inst: # To show and skip the error of a bad url + print(inst) + continue znew_mask = np.logical_and(znew > 0, self.refgeomask) fill_idx = np.where(znew_mask) for m,n in zip(fill_idx[0], fill_idx[1]): record = [datedelta.days, znew[m, n], self.dems[i].uncertainty] - # self.ts[m, n].add_record(record) ts[m][n] += [record] - # self.ts[m][n]['date'] += [datedelta.days] - # self.ts[m][n]['uncertainty'] += [self.dems[i].uncertainty] - # self.ts[m][n]['value'] += [znew[m, n]] + else: print("This one won't be piled up because its uncertainty ({}) exceeds the maximum uncertainty allowed ({})." .format(self.dems[i].uncertainty, self.maskparam['max_uncertainty'])) From c704e3ff1a826da956ee3f6fa418b970134ce915 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 22 Apr 2023 14:39:36 +0000 Subject: [PATCH 14/24] Fix parallel bug for distributed scheduler --- carst/libdhdt.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 8316555..e6b4fff 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -411,22 +411,23 @@ def polyfit(self, parallel=False): self.init_fitdata() # ==== Weighted regression ==== if parallel: - print('sds') import dask from dask.diagnostics import ProgressBar - def batch(seq): + def batch(seq, min_time_span, evmd_threshold, nodata): sub_results = [] for x in seq: date = x.get_date() - if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: - single_results = x.do_wlr(evmd_threshold=self.evmd_threshold) + if date.size >= 2 and date[-1] - date[0] > min_time_span: + single_results = x.do_wlr(evmd_threshold=evmd_threshold) # single_results is (slope, slope_err, residual, count) + else: + single_results = (nodata, nodata, nodata, nodata) sub_results.append(single_results) return sub_results batches = [] for m in range(self.ts.shape[0]): - result_batch = dask.delayed(batch)(self.ts[m, :]) + result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata()) batches.append(result_batch) with ProgressBar(): @@ -533,16 +534,16 @@ def do_evmd(self, parallel=False): if parallel: import dask from dask.diagnostics import ProgressBar - def batch(seq): + def batch(seq, evmd_threshold): sub_results = [] for x in seq: - exitstate, labels = x.do_evmd(eps=self.evmd_threshold) + exitstate, labels = x.do_evmd(eps=evmd_threshold) sub_results.append(labels) return sub_results batches = [] for m in range(self.ts.shape[0]): - result_batch = dask.delayed(batch)(self.ts[m, :]) + result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold) batches.append(result_batch) with ProgressBar(): From b7672c1550df0f71292a159daa6e224993db1234 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sun, 23 Apr 2023 01:07:21 +0000 Subject: [PATCH 15/24] Update form mosaic --- carst/libdhdt.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index e6b4fff..6c5a3be 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -1,7 +1,7 @@ # Class: PixelTimeSeries, DemPile # Func: timeit, resample_array, EVMD group, wlr_corefun, onclick_ipynb # by Whyjay Zheng, Jul 27 2016 -# last edit: Apr 17 2023 +# last edit: Apr 23 2023 import numpy as np from numpy.linalg import inv @@ -80,7 +80,7 @@ def EVMD_DBSCAN(x, y, eps=6, min_samples=4): # verified_y_labels = db.labels_ >= 0 verified_y_labels = db.labels_ # verified_y_labels_idx = np.where(verified_y_labels) - if any(verified_y_labels): + if any(verified_y_labels >= 0): exitstate = 1 else: exitstate = -1 @@ -405,6 +405,11 @@ def init_fitdata(self): self.fitdata['residual'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) self.fitdata['count'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + def init_mosaic(self): + self.mosaic['value'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + self.mosaic['date'] = np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + self.mosaic['uncertainty']= np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) + @timeit def polyfit(self, parallel=False): # ==== Create final array ==== @@ -477,7 +482,7 @@ def show_dhdt_tifs(self): return dhdt_dem, dhdt_error, dhdt_res, dhdt_count @timeit - def form_mosaic(self, order='ascending', method='DBSCAN'): + def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False): """ order options: ascending: early elevations will be populated first @@ -485,19 +490,19 @@ def form_mosaic(self, order='ascending', method='DBSCAN'): method: 'DBSCAN' or 'legacy' """ # ==== Create mosaicked array ==== - self.mosaic['value'] = np.full_like(self.ts, np.nan, dtype=float) - self.mosaic['date'] = np.full_like(self.ts, np.nan, dtype=float) - self.mosaic['uncertainty'] = np.full_like(self.ts, np.nan, dtype=float) + self.init_mosaic() + if method == 'DBSCAN' and self.ts[0, 0].evmd_labels is None: + print('No EVMD labels detected. Run do_evmd first.') + self.do_evmd(parallel=parallel) for m in range(self.ts.shape[0]): - if m % 100 == 0: - print(m) + self.display_progress(m, self.ts.shape[0]) for n in range(self.ts.shape[1]): date = self.ts[m, n].get_date() uncertainty = self.ts[m, n].get_uncertainty() value = self.ts[m, n].get_value() if method == 'DBSCAN': - exitstate, labels = EVMD_DBSCAN(date, value, eps=self.evmd_threshold) - if exitstate >= 0: + labels = self.ts[m, n].evmd_labels + if any(labels >= 0): verified_y_labels_idx = np.where(labels >= 0)[0] if order == 'descending': idx = verified_y_labels_idx[-1] @@ -565,13 +570,18 @@ def display_progress(m, total): if m % 100 == 0: print(f'{m}/{total} lines processed') - def viz(self): + def viz(self, figsize=(8,8), clim=(-6, 6)): dhdt_raster, _, _, _ = self.show_dhdt_tifs() + try: + nodata = dhdt_raster.get_nodata() + except RasterioIOError: + print(f'No such file: {dhdt_raster.fname} -- We recommend to run polyfit first.') + print('TBD') img = dhdt_raster.ReadAsArray() - img[img < -9000] = np.nan # might need to change + img[img == nodata] = np.nan - fig, axs = plt.subplots(2, 1, figsize=(8,8)) # figsize might need to change - first_img = axs[0].imshow(img, cmap='RdBu', vmin=-6, vmax=6) # minmax might need to change + fig, axs = plt.subplots(2, 1, figsize=figsize) + first_img = axs[0].imshow(img, cmap='RdBu', vmin=clim[0], vmax=clim[1]) onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold) cid = fig.canvas.mpl_connect('button_press_event', onclick) From 881a6b7343e6fd860e1e975abcc8c66d2cebf5be Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Mon, 24 Apr 2023 09:46:45 +0000 Subject: [PATCH 16/24] Add adjustable chunk size for parallel (evmd and polyfit only) --- carst/libdhdt.py | 111 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 26 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 6c5a3be..57bdd16 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -411,7 +411,7 @@ def init_mosaic(self): self.mosaic['uncertainty']= np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) @timeit - def polyfit(self, parallel=False): + def polyfit(self, parallel=False, chunksize=(1000, 1000)): # ==== Create final array ==== self.init_fitdata() # ==== Weighted regression ==== @@ -430,20 +430,52 @@ def batch(seq, min_time_span, evmd_threshold, nodata): sub_results.append(single_results) return sub_results - batches = [] - for m in range(self.ts.shape[0]): - result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata()) - batches.append(result_batch) + if self.ts.shape[0] <= chunksize[0] and self.ts.shape[1] <= chunksize[1]: + ### Single chunk + batches = [] + for m in range(self.ts.shape[0]): + result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata()) + batches.append(result_batch) + + with ProgressBar(): + results = dask.compute(batches) + + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + self.fitdata['slope'][m, n] = results[0][m][n][0] + self.fitdata['slope_err'][m, n] = results[0][m][n][1] + self.fitdata['residual'][m, n] = results[0][m][n][2] + self.fitdata['count'][m, n] = results[0][m][n][3] + + else: + ### Multiple chunks + msize = chunksize[0] + nsize = chunksize[1] + m_nodes = np.arange(0, self.ts.shape[0], msize) + n_nodes = np.arange(0, self.ts.shape[1], nsize) + super_results = [] + for super_m in range(m_nodes.size): + batches = [] + ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :] + for m in range(ts_slice.shape[0]): + for n in range(n_nodes.size): + result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata()) + batches.append(result_batch) + + with ProgressBar(): + results = dask.compute(batches) + super_results.append(results[0]) + + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + idx1 = m // msize + idx2 = n_nodes.size * (m % msize) + n // nsize + idx3 = n % nsize + self.fitdata['slope'][m, n] = super_results[idx1][idx2][idx3][0] + self.fitdata['slope_err'][m, n] = super_results[idx1][idx2][idx3][1] + self.fitdata['residual'][m, n] = super_results[idx1][idx2][idx3][2] + self.fitdata['count'][m, n] = super_results[idx1][idx2][idx3][3] - with ProgressBar(): - results = dask.compute(batches) - - for m in range(self.ts.shape[0]): - for n in range(self.ts.shape[1]): - self.fitdata['slope'][m, n] = results[0][m][n][0] - self.fitdata['slope_err'][m, n] = results[0][m][n][1] - self.fitdata['residual'][m, n] = results[0][m][n][2] - self.fitdata['count'][m, n] = results[0][m][n][3] else: for m in range(self.ts.shape[0]): @@ -535,7 +567,7 @@ def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False): mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo) @timeit - def do_evmd(self, parallel=False): + def do_evmd(self, parallel=False, chunksize=(1000, 1000)): if parallel: import dask from dask.diagnostics import ProgressBar @@ -546,17 +578,44 @@ def batch(seq, evmd_threshold): sub_results.append(labels) return sub_results - batches = [] - for m in range(self.ts.shape[0]): - result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold) - batches.append(result_batch) - - with ProgressBar(): - results = dask.compute(batches) - - for m in range(self.ts.shape[0]): - for n in range(self.ts.shape[1]): - self.ts[m, n].add_evmd_labels(results[0][m][n]) + if self.ts.shape[0] <= chunksize[0] and self.ts.shape[1] <= chunksize[1]: + ### Single chunk + batches = [] + for m in range(self.ts.shape[0]): + result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold) + batches.append(result_batch) + + with ProgressBar(): + results = dask.compute(batches) + + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + self.ts[m, n].add_evmd_labels(results[0][m][n]) + else: + ### Multile chunks + msize = chunksize[0] + nsize = chunksize[1] + m_nodes = np.arange(0, self.ts.shape[0], msize) + n_nodes = np.arange(0, self.ts.shape[1], nsize) + super_results = [] + for super_m in range(m_nodes.size): + batches = [] + ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :] + for m in range(ts_slice.shape[0]): + for n in range(n_nodes.size): + result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.evmd_threshold) + batches.append(result_batch) + + with ProgressBar(): + results = dask.compute(batches) + super_results.append(results[0]) + + for m in range(self.ts.shape[0]): + for n in range(self.ts.shape[1]): + idx1 = m // msize + idx2 = n_nodes.size * (m % msize) + n // nsize + idx3 = n % nsize + self.ts[m, n].add_evmd_labels(super_results[idx1][idx2][idx3]) else: for m in range(self.ts.shape[0]): self.display_progress(m, self.ts.shape[0]) From ce646e1f9420a84357cd4fc60f9e6f14470be09f Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Tue, 25 Apr 2023 07:31:40 +0000 Subject: [PATCH 17/24] Add progress information for chunked parallel processes --- carst/libdhdt.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 57bdd16..e2a5c8b 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -455,6 +455,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata): n_nodes = np.arange(0, self.ts.shape[1], nsize) super_results = [] for super_m in range(m_nodes.size): + self.display_progress(m_nodes[super_m], self.ts.shape[0]) batches = [] ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :] for m in range(ts_slice.shape[0]): @@ -599,6 +600,7 @@ def batch(seq, evmd_threshold): n_nodes = np.arange(0, self.ts.shape[1], nsize) super_results = [] for super_m in range(m_nodes.size): + self.display_progress(m_nodes[super_m], self.ts.shape[0]) batches = [] ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :] for m in range(ts_slice.shape[0]): From f8848531f8c0bd84040a81efd9b8bd9bea65a674 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Tue, 25 Apr 2023 09:28:19 +0000 Subject: [PATCH 18/24] Add min_samples argument --- carst/libdhdt.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index e2a5c8b..95febc4 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -142,11 +142,11 @@ def EVMD_idx(y, validated_value, threshold=6): validated_range = [min(tmp), max(tmp)] return idx -def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False): +def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, min_samples=4): # wlr = weighted linear regression. # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) if evmd_labels is None: - exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold) + exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold, min_samples=min_samples) else: exitstate = 1 if any(evmd_labels >= 0) else -1 @@ -259,18 +259,18 @@ def add_evmd_labels(self, labels): self.verify_evmd_labels(labels) self.evmd_labels = labels - def do_evmd(self, eps=6): + def do_evmd(self, eps=6, min_samples=4): date = self.get_date() value = self.get_value() - exitstate, labels = EVMD_DBSCAN(date, value, eps=eps) + exitstate, labels = EVMD_DBSCAN(date, value, eps=eps, min_samples=min_samples) return exitstate, labels - def do_wlr(self, evmd_threshold=6): + def do_wlr(self, evmd_threshold=6, min_samples=4): date = self.get_date() value = self.get_value() uncertainty = self.get_uncertainty() evmd_labels = self.evmd_labels - single_results = wlr_corefun(date, value, uncertainty, evmd_labels=evmd_labels, evmd_threshold=evmd_threshold) + single_results = wlr_corefun(date, value, uncertainty, evmd_labels=evmd_labels, evmd_threshold=evmd_threshold, min_samples=min_samples) # single_results is (slope, slope_err, residual, count) return single_results @@ -411,19 +411,19 @@ def init_mosaic(self): self.mosaic['uncertainty']= np.full_like(self.ts, self.refgeo.get_nodata(), dtype=float) @timeit - def polyfit(self, parallel=False, chunksize=(1000, 1000)): + def polyfit(self, parallel=False, chunksize=(1000, 1000), min_samples=4): # ==== Create final array ==== self.init_fitdata() # ==== Weighted regression ==== if parallel: import dask from dask.diagnostics import ProgressBar - def batch(seq, min_time_span, evmd_threshold, nodata): + def batch(seq, min_time_span, evmd_threshold, nodata, min_samples): sub_results = [] for x in seq: date = x.get_date() if date.size >= 2 and date[-1] - date[0] > min_time_span: - single_results = x.do_wlr(evmd_threshold=evmd_threshold) + single_results = x.do_wlr(evmd_threshold=evmd_threshold, min_samples=min_samples) # single_results is (slope, slope_err, residual, count) else: single_results = (nodata, nodata, nodata, nodata) @@ -434,7 +434,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata): ### Single chunk batches = [] for m in range(self.ts.shape[0]): - result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata()) + result_batch = dask.delayed(batch)(self.ts[m, :], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata(), min_samples) batches.append(result_batch) with ProgressBar(): @@ -460,7 +460,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata): ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :] for m in range(ts_slice.shape[0]): for n in range(n_nodes.size): - result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata()) + result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.maskparam['min_time_span'], self.evmd_threshold, self.refgeo.get_nodata(), min_samples) batches.append(result_batch) with ProgressBar(): @@ -488,7 +488,7 @@ def batch(seq, min_time_span, evmd_threshold, nodata): # evmd_labels = self.ts[m, n].evmd_labels if date.size >= 2 and date[-1] - date[0] > self.maskparam['min_time_span']: - slope, slope_err, residual, count = self.ts[m, n].do_wlr(evmd_threshold=self.evmd_threshold) + slope, slope_err, residual, count = self.ts[m, n].do_wlr(evmd_threshold=self.evmd_threshold, min_samples=min_samples) # if residual > 100: # print(date, value, uncertainty) self.fitdata['slope'][m, n] = slope @@ -515,7 +515,7 @@ def show_dhdt_tifs(self): return dhdt_dem, dhdt_error, dhdt_res, dhdt_count @timeit - def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False): + def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False, min_samples=4): """ order options: ascending: early elevations will be populated first @@ -526,7 +526,7 @@ def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False): self.init_mosaic() if method == 'DBSCAN' and self.ts[0, 0].evmd_labels is None: print('No EVMD labels detected. Run do_evmd first.') - self.do_evmd(parallel=parallel) + self.do_evmd(parallel=parallel, min_samples=min_samples) for m in range(self.ts.shape[0]): self.display_progress(m, self.ts.shape[0]) for n in range(self.ts.shape[1]): @@ -568,14 +568,14 @@ def form_mosaic(self, order='ascending', method='DBSCAN', parallel=False): mosaic_uncertainty.Array2Raster(self.mosaic['uncertainty'], self.refgeo) @timeit - def do_evmd(self, parallel=False, chunksize=(1000, 1000)): + def do_evmd(self, parallel=False, chunksize=(1000, 1000), min_samples=4): if parallel: import dask from dask.diagnostics import ProgressBar - def batch(seq, evmd_threshold): + def batch(seq, evmd_threshold, min_samples): sub_results = [] for x in seq: - exitstate, labels = x.do_evmd(eps=evmd_threshold) + exitstate, labels = x.do_evmd(eps=evmd_threshold, min_samples=min_samples) sub_results.append(labels) return sub_results @@ -583,7 +583,7 @@ def batch(seq, evmd_threshold): ### Single chunk batches = [] for m in range(self.ts.shape[0]): - result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold) + result_batch = dask.delayed(batch)(self.ts[m, :], self.evmd_threshold, min_samples) batches.append(result_batch) with ProgressBar(): @@ -605,7 +605,7 @@ def batch(seq, evmd_threshold): ts_slice = self.ts[m_nodes[super_m]:m_nodes[super_m]+msize, :] for m in range(ts_slice.shape[0]): for n in range(n_nodes.size): - result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.evmd_threshold) + result_batch = dask.delayed(batch)(ts_slice[m, n_nodes[n]:n_nodes[n]+nsize ], self.evmd_threshold, min_samples) batches.append(result_batch) with ProgressBar(): @@ -622,7 +622,7 @@ def batch(seq, evmd_threshold): for m in range(self.ts.shape[0]): self.display_progress(m, self.ts.shape[0]) for n in range(self.ts.shape[1]): - exitstate, labels = self.ts[m, n].do_evmd(eps=self.evmd_threshold) + exitstate, labels = self.ts[m, n].do_evmd(eps=self.evmd_threshold, min_samples=min_samples) self.ts[m, n].add_evmd_labels(labels) From ce630cfcbe23bc29b2694877b3e47e7ac250fecf Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Wed, 26 Apr 2023 05:08:23 +0000 Subject: [PATCH 19/24] Add min_samples argument to onclick --- carst/libdhdt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 95febc4..e5dcfeb 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -631,7 +631,7 @@ def display_progress(m, total): if m % 100 == 0: print(f'{m}/{total} lines processed') - def viz(self, figsize=(8,8), clim=(-6, 6)): + def viz(self, figsize=(8,8), clim=(-6, 6), min_samples=4): dhdt_raster, _, _, _ = self.show_dhdt_tifs() try: nodata = dhdt_raster.get_nodata() @@ -644,10 +644,10 @@ def viz(self, figsize=(8,8), clim=(-6, 6)): fig, axs = plt.subplots(2, 1, figsize=figsize) first_img = axs[0].imshow(img, cmap='RdBu', vmin=clim[0], vmax=clim[1]) - onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold) + onclick = onclick_wrapper(self.ts, axs, self.evmd_threshold, min_samples=min_samples) cid = fig.canvas.mpl_connect('button_press_event', onclick) -def onclick_wrapper(data, axs, evmd_threshold): +def onclick_wrapper(data, axs, evmd_threshold, min_samples=4): def onclick_ipynb(event): """ Callback function for mouse click @@ -661,7 +661,7 @@ def onclick_ipynb(event): xx = data[row, col].get_date() yy = data[row, col].get_value() ye = data[row, col].get_uncertainty() - slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, detailed=True) + slope, slope_err, residual, count, x_good, y_good, y_goodest = wlr_corefun(xx, yy, ye, evmd_threshold=evmd_threshold, min_samples=min_samples, detailed=True) SSReg = np.sum((y_goodest - np.mean(y_good)) ** 2) SSRes = np.sum((y_good - y_goodest) ** 2) SST = np.sum((y_good - np.mean(y_good)) ** 2) From ce0630b2459415ed94640b545f5d8a62af0ee5ab Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Wed, 26 Apr 2023 05:35:44 +0000 Subject: [PATCH 20/24] Fix onclick behavior when EVMD returns nothing --- carst/libdhdt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index e5dcfeb..c0b6d6d 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -154,7 +154,10 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi # if exitstate >= 0 or (max(x) - min(x)) < 1: if exitstate < 0 or (max(x) - min(x)) < 1: slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size - return slope, slope_err, resid, count + if detailed: + return slope, slope_err, resid, count, np.array([]), np.array([]), np.array([]) + else: + return slope, slope_err, resid, count else: # if x.size == 3: # print(x, y, ye) From 171eaf61787d32d4a27b85e3144374633c2b5db8 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Tue, 2 May 2023 01:57:12 +0000 Subject: [PATCH 21/24] WLR hotfix for singular matrix issue --- carst/libdhdt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index c0b6d6d..4d94a0e 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -152,7 +152,7 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi idx = evmd_labels >= 0 # if exitstate >= 0 or (max(x) - min(x)) < 1: - if exitstate < 0 or (max(x) - min(x)) < 1: + if exitstate < 0: slope, slope_err, resid, count = -9999.0, -9999.0, -9999.0, x.size if detailed: return slope, slope_err, resid, count, np.array([]), np.array([]), np.array([]) @@ -162,7 +162,7 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi # if x.size == 3: # print(x, y, ye) # idx = EVMD_idx(y, validated_value, threshold=evmd_threshold) - if sum(idx) >= 3: + if sum(idx) >= 3 or (max(x[idx]) - min(x[idx])) < 1: x = x[idx] y = y[idx] ye = ye[idx] From aa081faafcbcdf6eef375634747d50a1e32d881e Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Tue, 2 May 2023 02:15:02 +0000 Subject: [PATCH 22/24] WLR issue hotfix 2 --- carst/libdhdt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/carst/libdhdt.py b/carst/libdhdt.py index 4d94a0e..ce58790 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -162,7 +162,7 @@ def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, mi # if x.size == 3: # print(x, y, ye) # idx = EVMD_idx(y, validated_value, threshold=evmd_threshold) - if sum(idx) >= 3 or (max(x[idx]) - min(x[idx])) < 1: + if sum(idx) >= 3 and (max(x[idx]) - min(x[idx])) > 1: x = x[idx] y = y[idx] ye = ye[idx] From 018a6c4da0907398f97a9239bebf7eaa92dfa52a Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 20 May 2023 03:19:00 +0000 Subject: [PATCH 23/24] libconfig Tab --> Space --- carst/libconfig.py | 338 ++++++++++++++++++++++----------------------- carst/libdhdt.py | 1 + 2 files changed, 170 insertions(+), 169 deletions(-) diff --git a/carst/libconfig.py b/carst/libconfig.py index 3cfd761..2839294 100644 --- a/carst/libconfig.py +++ b/carst/libconfig.py @@ -90,175 +90,175 @@ def Write2File(self): class ConfParams: - """ - Read variables in a configuration file. The file should have the specified structure like CARST/dhdt/defaults.ini - """ - - def __init__(self, fpath=None): - self.fpath = fpath - # self.gdalwarp = {} - # self.demlist = {} - # self.regression = {} - # self.result = {} - - def ReadParam(self): - - """ - Read parameters and save them as self.xxxx - example: if there is a section called [gdalwarp] - and there is an option named tr = 30 30 - then self.gdalwarp['tr'] = '30 30' - """ - - if self.fpath is not None: - config = ConfigParser.RawConfigParser() - config.read(self.fpath) - - for section in config.sections(): - section_contents = {} - for item in config.items(section): - section_contents[item[0]] = item[1] - setattr(self, section, section_contents) - - else: - print('Warning: No ini file is given. Nothing will run.') - - def VerifyParam(self): - - """ - Verify params and modify them to proper types. - """ - - if hasattr(self, 'regression'): - for key in self.regression: - self.regression[key] = int(self.regression[key]) - if hasattr(self, 'gdalwarp'): - if 'output_dir' in self.gdalwarp: - if not os.path.exists(self.gdalwarp['output_dir']): - os.makedirs(self.gdalwarp['output_dir']) # create gdalwarp output folder - if hasattr(self, 'splitampcor'): - for key in self.splitampcor: - self.splitampcor[key] = int(self.splitampcor[key]) - if hasattr(self, 'parallel'): - if 'gnu_parallel' in self.parallel: - s = self.parallel['gnu_parallel'].lower() - self.parallel['gnu_parallel'] = s in ['true', 't', 'yes', 'y', '1'] - if hasattr(self, 'pxsettings'): - for key in self.pxsettings: - if not self.pxsettings[key]: - # empty string - self.pxsettings[key] = None - else: - self.pxsettings[key] = int(self.pxsettings[key]) - if 'size_across' not in self.pxsettings: - self.pxsettings['size_across'] = None - if 'size_down' not in self.pxsettings: - self.pxsettings['size_down'] = None - if 'gaussian_hp' in self.pxsettings: - self.pxsettings['gaussian_hp'] = bool(int(self.pxsettings['gaussian_hp'])) - else: - self.pxsettings['gaussian_hp'] = True - if 'gaussian_hp_sigma' in self.pxsettings: - self.pxsettings['gaussian_hp_sigma'] = float(self.pxsettings['gaussian_hp_sigma']) - else: - self.pxsettings['gaussian_hp_sigma'] = 3.0 - - if hasattr(self, 'outputcontrol'): - if 'datepair_prefix' in self.outputcontrol: - if self.outputcontrol['datepair_prefix'] in ['false', 'f', 'no', 'n', '0']: - self.outputcontrol['if_generate_xyztext'] = False - else: - self.outputcontrol['datepair_prefix'] = bool(int(self.outputcontrol['datepair_prefix'])) - atime = datetime.strptime(self.imagepair['image1_date'], '%Y-%m-%d') - btime = datetime.strptime(self.imagepair['image2_date'], '%Y-%m-%d') - self.outputcontrol['label_datepair'] = atime.strftime('%Y%m%d') + '-' + btime.strftime('%Y%m%d' + '_') - if 'output_folder' not in self.outputcontrol: - self.outputcontrol['output_folder'] = '.' - if hasattr(self, 'rawoutput'): - if 'if_generate_xyztext' in self.rawoutput: - self.rawoutput['if_generate_xyztext'] = bool(int(self.rawoutput['if_generate_xyztext'])) - else: - self.rawoutput['if_generate_xyztext'] = False - if 'if_generate_ampofftxt' in self.rawoutput: - self.rawoutput['if_generate_ampofftxt'] = bool(int(self.rawoutput['if_generate_ampofftxt'])) - else: - self.rawoutput['if_generate_ampofftxt'] = False - if 'label_ampcor' in self.rawoutput: - if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: - self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_ampcor']) - else: - self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_ampcor']) - if 'label_geotiff' in self.rawoutput: - if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: - self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_geotiff']) - else: - self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_geotiff']) - if hasattr(self, 'velocorrection'): - if 'label_bedrock_histogram' in self.velocorrection: - if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: - self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_bedrock_histogram']) - else: - self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_bedrock_histogram']) - if 'label_geotiff' in self.velocorrection: - if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: - self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_geotiff']) - else: - self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_geotiff']) - if 'label_logfile' in self.velocorrection: - if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: - self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_logfile']) - else: - self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_logfile']) - if 'refvelo_outlier_sigma' in self.velocorrection: - self.velocorrection['refvelo_outlier_sigma'] = float(self.velocorrection['refvelo_outlier_sigma']) - else: - self.velocorrection['refvelo_outlier_sigma'] = 3.0 - if hasattr(self, 'noiseremoval'): - for key in self.noiseremoval: - self.noiseremoval[key] = float(self.noiseremoval[key]) - - - """ - # ======== Check if PAIRS_DIR, METADATA_DIR, and PAIRS exist ======== - if not os.path.exists(PAIRS_DIR): - print("\n***** ERROR: Pair directory specified (\"" + PAIRS_DIR + "\") not found, make sure full path is provided, exiting...\n"); - sys.exit(1) - - if not os.path.exists(METADATA_DIR): - print("\n***** ERROR: Metadata directory specified (\"" + METADATA_DIR + "\") not found, make sure full path is provided, exiting...\n"); - sys.exit(1) - - if not os.path.exists(PAIRS): - print("\n***** ERROR: Pair list \"" + PAIRS + "\" not found, make sure full path is provided, exiting...\n"); - # =================================================================== - """ - - - def GetDEM(self): - - """ - Get DEMs from "csvfile" field. Return a list of SingleRaster objects. - """ - - if 'csvfile' in self.demlist: - csv = CsvTable(self.demlist['csvfile']) - return csv.GetDEM() - else: - print('Warning: No DEM-list file is given. Nothing will run.') - return [] - - def GetImgPair(self, delimiter=','): - - """ - Get ImgPair from the contents of this csv file - """ - - if 'pairs_list' in self.io: - csv = CsvTable(self.io['pairs_list']) - return csv.GetImgPair(delimiter=delimiter) - else: - print('Warning: No Img-list file is given. Nothing will run.') - return [] + """ + Read variables in a configuration file. The file should have the specified structure like CARST/dhdt/defaults.ini + """ + + def __init__(self, fpath=None): + self.fpath = fpath + # self.gdalwarp = {} + # self.demlist = {} + # self.regression = {} + # self.result = {} + + def ReadParam(self): + + """ + Read parameters and save them as self.xxxx + example: if there is a section called [gdalwarp] + and there is an option named tr = 30 30 + then self.gdalwarp['tr'] = '30 30' + """ + + if self.fpath is not None: + config = ConfigParser.RawConfigParser() + config.read(self.fpath) + + for section in config.sections(): + section_contents = {} + for item in config.items(section): + section_contents[item[0]] = item[1] + setattr(self, section, section_contents) + + else: + print('Warning: No ini file is given. Nothing will run.') + + def VerifyParam(self): + + """ + Verify params and modify them to proper types. + """ + + if hasattr(self, 'regression'): + for key in self.regression: + self.regression[key] = int(self.regression[key]) + if hasattr(self, 'gdalwarp'): + if 'output_dir' in self.gdalwarp: + if not os.path.exists(self.gdalwarp['output_dir']): + os.makedirs(self.gdalwarp['output_dir']) # create gdalwarp output folder + if hasattr(self, 'splitampcor'): + for key in self.splitampcor: + self.splitampcor[key] = int(self.splitampcor[key]) + if hasattr(self, 'parallel'): + if 'gnu_parallel' in self.parallel: + s = self.parallel['gnu_parallel'].lower() + self.parallel['gnu_parallel'] = s in ['true', 't', 'yes', 'y', '1'] + if hasattr(self, 'pxsettings'): + for key in self.pxsettings: + if not self.pxsettings[key]: + # empty string + self.pxsettings[key] = None + else: + self.pxsettings[key] = int(self.pxsettings[key]) + if 'size_across' not in self.pxsettings: + self.pxsettings['size_across'] = None + if 'size_down' not in self.pxsettings: + self.pxsettings['size_down'] = None + if 'gaussian_hp' in self.pxsettings: + self.pxsettings['gaussian_hp'] = bool(int(self.pxsettings['gaussian_hp'])) + else: + self.pxsettings['gaussian_hp'] = True + if 'gaussian_hp_sigma' in self.pxsettings: + self.pxsettings['gaussian_hp_sigma'] = float(self.pxsettings['gaussian_hp_sigma']) + else: + self.pxsettings['gaussian_hp_sigma'] = 3.0 + + if hasattr(self, 'outputcontrol'): + if 'datepair_prefix' in self.outputcontrol: + if self.outputcontrol['datepair_prefix'] in ['false', 'f', 'no', 'n', '0']: + self.outputcontrol['if_generate_xyztext'] = False + else: + self.outputcontrol['datepair_prefix'] = bool(int(self.outputcontrol['datepair_prefix'])) + atime = datetime.strptime(self.imagepair['image1_date'], '%Y-%m-%d') + btime = datetime.strptime(self.imagepair['image2_date'], '%Y-%m-%d') + self.outputcontrol['label_datepair'] = atime.strftime('%Y%m%d') + '-' + btime.strftime('%Y%m%d' + '_') + if 'output_folder' not in self.outputcontrol: + self.outputcontrol['output_folder'] = '.' + if hasattr(self, 'rawoutput'): + if 'if_generate_xyztext' in self.rawoutput: + self.rawoutput['if_generate_xyztext'] = bool(int(self.rawoutput['if_generate_xyztext'])) + else: + self.rawoutput['if_generate_xyztext'] = False + if 'if_generate_ampofftxt' in self.rawoutput: + self.rawoutput['if_generate_ampofftxt'] = bool(int(self.rawoutput['if_generate_ampofftxt'])) + else: + self.rawoutput['if_generate_ampofftxt'] = False + if 'label_ampcor' in self.rawoutput: + if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: + self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_ampcor']) + else: + self.rawoutput['label_ampcor'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_ampcor']) + if 'label_geotiff' in self.rawoutput: + if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: + self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.rawoutput['label_geotiff']) + else: + self.rawoutput['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.rawoutput['label_geotiff']) + if hasattr(self, 'velocorrection'): + if 'label_bedrock_histogram' in self.velocorrection: + if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: + self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_bedrock_histogram']) + else: + self.velocorrection['label_bedrock_histogram'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_bedrock_histogram']) + if 'label_geotiff' in self.velocorrection: + if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: + self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_geotiff']) + else: + self.velocorrection['label_geotiff'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_geotiff']) + if 'label_logfile' in self.velocorrection: + if self.outputcontrol['datepair_prefix'] not in ['false', 'f', 'no', 'n', '0']: + self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.outputcontrol['label_datepair'] + self.velocorrection['label_logfile']) + else: + self.velocorrection['label_logfile'] = os.path.join(self.outputcontrol['output_folder'], self.velocorrection['label_logfile']) + if 'refvelo_outlier_sigma' in self.velocorrection: + self.velocorrection['refvelo_outlier_sigma'] = float(self.velocorrection['refvelo_outlier_sigma']) + else: + self.velocorrection['refvelo_outlier_sigma'] = 3.0 + if hasattr(self, 'noiseremoval'): + for key in self.noiseremoval: + self.noiseremoval[key] = float(self.noiseremoval[key]) + + + """ + # ======== Check if PAIRS_DIR, METADATA_DIR, and PAIRS exist ======== + if not os.path.exists(PAIRS_DIR): + print("\n***** ERROR: Pair directory specified (\"" + PAIRS_DIR + "\") not found, make sure full path is provided, exiting...\n"); + sys.exit(1) + + if not os.path.exists(METADATA_DIR): + print("\n***** ERROR: Metadata directory specified (\"" + METADATA_DIR + "\") not found, make sure full path is provided, exiting...\n"); + sys.exit(1) + + if not os.path.exists(PAIRS): + print("\n***** ERROR: Pair list \"" + PAIRS + "\" not found, make sure full path is provided, exiting...\n"); + # =================================================================== + """ + + + def GetDEM(self): + + """ + Get DEMs from "csvfile" field. Return a list of SingleRaster objects. + """ + + if 'csvfile' in self.demlist: + csv = CsvTable(self.demlist['csvfile']) + return csv.GetDEM() + else: + print('Warning: No DEM-list file is given. Nothing will run.') + return [ + + def GetImgPair(self, delimiter=','): + + """ + Get ImgPair from the contents of this csv file + """ + + if 'pairs_list' in self.io: + csv = CsvTable(self.io['pairs_list']) + return csv.GetImgPair(delimiter=delimiter) + else: + print('Warning: No Img-list file is given. Nothing will run.') + return [] class LS8MTL: diff --git a/carst/libdhdt.py b/carst/libdhdt.py index ce58790..c2d3316 100644 --- a/carst/libdhdt.py +++ b/carst/libdhdt.py @@ -145,6 +145,7 @@ def EVMD_idx(y, validated_value, threshold=6): def wlr_corefun(x, y, ye, evmd_labels=None, evmd_threshold=6, detailed=False, min_samples=4): # wlr = weighted linear regression. # exitstate, validated_value, validated_value_idx = EVMD(y, threshold=evmd_threshold) + # WARNING issue: count does not represent the same meaning in the following if-else statement!!! if evmd_labels is None: exitstate, evmd_labels = EVMD_DBSCAN(x, y, eps=evmd_threshold, min_samples=min_samples) else: From 7e404c947f2fd19b393a7aa3fd7c8c947621acb8 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Sat, 20 May 2023 04:26:42 +0000 Subject: [PATCH 24/24] Hotfix --- carst/libconfig.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/carst/libconfig.py b/carst/libconfig.py index 2839294..2a25b96 100644 --- a/carst/libconfig.py +++ b/carst/libconfig.py @@ -245,7 +245,7 @@ def GetDEM(self): return csv.GetDEM() else: print('Warning: No DEM-list file is given. Nothing will run.') - return [ + return [] def GetImgPair(self, delimiter=','):