Updates

rslab-ntua · Nov 29, 2023 · 7744dcc · 7744dcc
1 parent e1a4428
commit 7744dcc
Show file tree

Hide file tree

Showing 9 changed files with 89 additions and 77 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -1,49 +1,28 @@
-# This file is a template, and might need editing before it works on your project.
-# To contribute improvements to CI/CD templates, please follow the Development guide at:
-# https://docs.gitlab.com/ee/development/cicd/templates.html
-# This specific template is located at:
-# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Python.gitlab-ci.yml
-
-# Official language image. Look for the different tagged releases at:
-# https://hub.docker.com/r/library/python/tags/
 image: python:latest
 
 # Change pip's cache directory to be inside the project directory since we can
 # only cache local items.
 variables:
   PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
 
-# Pip's cache doesn't store the python packages
 # https://pip.pypa.io/en/stable/topics/caching/
-#
-# If you want to also cache the installed packages, you have to install
-# them in a virtualenv and cache it as well.
 cache:
   paths:
     - .cache/pip
-    - venv/
 
 before_script:
-  - python --version  # For debugging
+  - python --version ; pip --version  # For debugging
   - pip install virtualenv
   - virtualenv venv
   - source venv/bin/activate
 
-test:
-  script:
-    - python setup.py test
-    - pip install tox flake8  # you can also use tox
-    - tox -e py36,flake8
-
 run:
   script:
-    - python setup.py bdist_wheel
-    # an alternative approach is to install and run:
-    - pip install dist/*
+    - pip install .
     # run the command here
   artifacts:
     paths:
-      - dist/*.whl
+      - build/*
 
 pages:
   script:
@@ -57,4 +36,3 @@ pages:
       - public
   rules:
     - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
-
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -24,5 +24,4 @@ sphinx:
 
 python:
   install:
-    - requirements: docs/requirements-docs.txt
-  system_packages: true  
+    - requirements: docs/requirements-docs.txt 
diff --git a/cropmaps/clipper.py b/cropmaps/clipper.py
@@ -17,7 +17,7 @@
 class Clipper():
 
     @staticmethod
-    def clipByMask(image, shapefile, store = None, band = None, new = None, resize = False, method = None, ext = 'tif', verbose = False):
+    def clipByMask(image, shapefile, store = None, band = None, new = None, resize = False, method = None, ext = 'tif', verbose = False, compress = False):
         """Mask image based on a shapefile mask.
         
         Args:
@@ -158,6 +158,8 @@ def clipByMask(image, shapefile, store = None, band = None, new = None, resize =
                         "driver": "GTiff",
                         "nodata": nodata
                         })
+                    if compress:
+                        metadata.update({"compress": "lzw"})                    
 
                     with rasterio.open(out_tif, 'w', **metadata) as dst:
                         dst.write(reproj_array)
@@ -183,6 +185,8 @@ def clipByMask(image, shapefile, store = None, band = None, new = None, resize =
                                         "width": out_image.shape[2],
                                         "transform": out_transform,
                                         "nodata": nodata})
+                        if compress:
+                            out_meta.update({"compress": "lzw"})                    
 
                     with rasterio.open(out_tif, "w", **out_meta) as output_image:
                         output_image.write(out_image)

diff --git a/cropmaps/cube.py b/cropmaps/cube.py
@@ -37,7 +37,7 @@ def generate_cube_paths(eodata:sentimeseries, bands:list, mask:str = None)->list
 
     return paths
 
-def make_cube(listOfPaths:List[str], searchPath:str, newFilename:str, dtype:np.dtype, nodata:float = -9999, gap_fill:bool = True, harmonize:bool = True, alpha:float = 0.0001, beta:float = 0., force_new:bool = False)->Tuple[List[str], Dict]:
+def make_cube(listOfPaths:List[str], searchPath:str, newFilename:str, dtype:np.dtype, nodata:float = -9999, gap_fill:bool = True, harmonize:bool = True, alpha:float = 0.0001, beta:float = 0., force_new:bool = False, compress = False)->Tuple[List[str], Dict]:
     """Stack satellite images (FROM DIFFERENT FILES) as timeseries cube, without loading them in memory.
     If there is a datetime field in filename, could enable sort=True, to sort cube layers by date, ascending.
     Also, if sort=True, dates are written at .txt file which will be saved with the same output name, as cube.
@@ -78,7 +78,10 @@ def make_cube(listOfPaths:List[str], searchPath:str, newFilename:str, dtype:np.d
         'count': len(listOfPaths),
         'driver':'GTiff',
         'nodata': nodata})
-
+
+    if compress:
+        metadata.update({"compress": "lzw"})
+
     # New filename.
     cubeName = os.path.join(searchPath, str(newFilename) + '.tif')
     # Stack products as timeseries cube.

diff --git a/cropmaps/get_creodias.py b/cropmaps/get_creodias.py
@@ -44,6 +44,31 @@
     }
 }
 
+def get_data_DIAS(area:str, start_date:str, end_date:str, platform:str = "Sentinel2", **kwargs):
+    """Query the Copernicus Data Space Ecosystem (CDSE) OpenSearch service for available products.
+    - For Sentinel-2 catalog attributes: https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/describe.xml
+    Args:
+        area (str): WKT geometry of the AOI.
+        start_date (str): Start date in YYYYMMDD format.
+        end_date (str): Start date in YYYYMMDD format.
+        platform (str, optional): Platform name (like Sentinel2 or Sentinel3). Defaults to "Sentinel2".
+
+    Returns:
+        list: List of available products in CreoDIAS based on the query.
+    """
+    data = []
+    results = query.query(
+        platform,
+        geometry=area,
+        start_date=datetime(int(start_date[:4]), int(start_date[4:6]), int(start_date[6:])),
+        end_date=datetime(int(end_date[:4]), int(end_date[4:6]), int(end_date[6:])),
+        **kwargs)
+    for key in results:
+        data.append(results[key]["properties"]["productIdentifier"])
+
+    return data
+
+
 def eodata_path_creator(data:pd.DataFrame):
     """Convert a DataFrame with the response from APIHUB to CreoDIAS paths. Works with Sentinel-1, 2, 3 (all instruments).
     Function builds paths as follows:
@@ -81,8 +106,7 @@ def eodata_path_creator(data:pd.DataFrame):
         creodias_paths.append(path)
 
     return creodias_paths
-
-
+
 def check_L2(data:pd.DataFrame):
     """Cleans API response dataframe from Sentinel-2 L1C data.
 
@@ -107,32 +131,8 @@ def check_L2(data:pd.DataFrame):
 
     return data
 
-def get_data_DIAS(area:str, start_date:str, end_date:str, platform:str = "Sentinel2", **kwargs):
-    """Query the Copernicus Data Space Ecosystem (CDSE) OpenSearch service for available products.
-    - For Sentinel-2 catalog attributes: https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/describe.xml
-    Args:
-        area (str): WKT geometry of the AOI.
-        start_date (str): Start date in YYYYMMDD format.
-        end_date (str): Start date in YYYYMMDD format.
-        platform (str, optional): Platform name (like Sentinel2 or Sentinel3). Defaults to "Sentinel2".
-
-    Returns:
-        list: List of available products in CreoDIAS based on the query.
-    """
-    data = []
-    results = query.query(
-        platform,
-        geometry=area,
-        start_date=datetime(int(start_date[:4]), int(start_date[4:6]), int(start_date[6:])),
-        end_date=datetime(int(end_date[:4]), int(end_date[4:6]), int(end_date[6:])),
-        **kwargs)
-    for key in results:
-        data.append(results[key]["properties"]["productIdentifier"])
-
-    return data
-
-def get_data(area:str, start_date:str, end_date:str, username:str, password:str, platform:str = "Sentinel-2", **kwargs):
-    """Get data information from ESA APIHUB.
+def _get_data(area:str, start_date:str, end_date:str, username:str, password:str, platform:str = "Sentinel-2", **kwargs):
+    """Get data information from ESA APIHUB. DEPRECATED.
 
     Args:
         area (str): Path to geometry file (geojson)

diff --git a/cropmaps/models.py b/cropmaps/models.py
@@ -88,10 +88,11 @@ def fill_confMatrix(ct:pd.DataFrame, labels:List)->pd.DataFrame:
     Returns:
         pd.DataFrame: Filled confusion matrix
     """
-    rowColIdx = list(labels) + ['All']
+    labels = [str(l) for l in labels]
+    rowColIdx = labels + ['All']
     # Usefull when rows are missing
     notAlabel = [val for val in list(ct.index) if val not in rowColIdx]
-    if len(ct.index) < len(rowColIdx) or len(notAlabel)>0:
+    if len(ct.index) < len(rowColIdx) or len(notAlabel) > 0:
         for label in labels:
             if label not in ct.index:
                 ct.loc[label] = [0] * ct.shape[1]
@@ -109,13 +110,11 @@ def fill_confMatrix(ct:pd.DataFrame, labels:List)->pd.DataFrame:
 
     # Add producer accuracy
     pa = [round(ct.loc[rvcd][rvcd] / ct.loc[rvcd]['All'] *100, 2) for rvcd in labels]
-    pa = [0 if math.isnan(x) else x for x in pa]
     pa.append(round(np.nanmean(pa), 2))   # total PA
     ct['PA'] = pa
 
     # Add User acc
     ua = [round(ct.loc[rvcd][rvcd] / ct.loc['All'][rvcd] * 100, 2) for rvcd in labels]
-    ua = [0 if math.isnan(x) else x for x in ua]
     ua.append(round(np.nanmean(ua), 2))   # total UA
 
     # Overall acc
@@ -128,7 +127,6 @@ def fill_confMatrix(ct:pd.DataFrame, labels:List)->pd.DataFrame:
     f1 = [round(
         (2*ct.loc[rvcd]['PA']*ct.loc['UA'][rvcd]) / (ct.loc[rvcd]['PA']+ct.loc['UA'][rvcd]),
         2) for rvcd in labels]
-    f1 = [0 if math.isnan(x) else x for x in f1]
     f1.append(f"avg_f1 {round(np.nanmean(f1), 2)}") # total f1
 
     # test another calculation of f1
@@ -154,8 +152,15 @@ def importance(band_desc:list, feature_importance:np.ndarray, store_to:str = Non
     for b, imp in zip(band_desc, feature_importance):
         band = b.split('_')[2]
         date = b.split('_')[1]
-        bands_importance = bands_importance._append([{band:imp}], ignore_index=True)
-        dates_importance = dates_importance._append([{date:imp}], ignore_index=True)
+        try:
+            bands_importance = bands_importance._append([{band:imp}], ignore_index=True)
+        except AttributeError:
+            bands_importance = bands_importance.append([{band:imp}], ignore_index=True)
+        try:
+            dates_importance = dates_importance._append([{date:imp}], ignore_index=True)
+        except AttributeError:
+            dates_importance = dates_importance.append([{date:imp}], ignore_index=True)
+
 
     bands_importance = bands_importance.apply(lambda x: pd.Series(x.dropna().values))
     dates_importance = dates_importance.apply(lambda x: pd.Series(x.dropna().values))
@@ -181,7 +186,7 @@ def random_forest_train(cube_path:str, gt_fpath:str, results_to:str, test_size:f
         results_to (str): Path to store results
         test_size (float, optional): Test sample size. Defaults to 0.33
         gridsearch (bool, optional): Hyperparameter Tuning using GridSearchCV. Check here: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV. Defaults to False.
-        parameters(dict, optional): SVM parameters. Check here: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier.
+        parameters(dict, optional): RF parameters. Check here: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier.
     Returns:
         RandomForestClassifier: The model
     """
@@ -282,7 +287,11 @@ def random_forest_train(cube_path:str, gt_fpath:str, results_to:str, test_size:f
             logging.info(f'OOB prediction of accuracy is: {round(rf.oob_score_ * 100, 2)}%')
 
     # Cross-tabulate predictions
-    cm = pd.crosstab(df['truth'], df['predict'], margins=True)
+    truth = df['truth'].astype(int)
+    predict =df['predict'].astype(int)
+    cm = pd.crosstab(truth, predict , margins=True)
+    cm.columns = cm.columns.astype(str).tolist() 
+    cm.index = cm.index.astype(str).tolist()
     cm = fill_confMatrix(cm, labels)
     cm.to_csv(os.path.join(results_to, "Confusion_Matrix.csv"))
 
@@ -493,10 +502,12 @@ def svm_train(cube_path:str, gt_fpath:str, results_to:str, test_size:float = 0.3
         X_test = None
         gc.collect()
 
-    # Cross-tabulate predictions
-    cm = pd.crosstab(df['truth'], df['predict'], margins=True)
+    truth = df['truth'].astype(int)
+    predict =df['predict'].astype(int)
+    cm = pd.crosstab(truth, predict , margins=True)
+    cm.columns = cm.columns.astype(str).tolist() 
+    cm.index = cm.index.astype(str).tolist()
     cm = fill_confMatrix(cm, labels)
-    cm.to_csv(os.path.join(results_to, "Confusion_Matrix.csv"))
 
     logging.info("Done.")
 

diff --git a/cropmaps/sentinels.py b/cropmaps/sentinels.py
@@ -270,7 +270,7 @@ def upsample(self, band = None, store = None, new = None, subregion = None, meth
             raise ValueError("Arguments 'band' must be provided!")
 
     @staticmethod
-    def reproj_match(image:str, base:str, to_file:bool = False, outfile:str = "output.tif", resampling:rasterio.warp.Resampling = Resampling.nearest) -> None:
+    def reproj_match(image:str, base:str, to_file:bool = False, outfile:str = "output.tif", resampling:rasterio.warp.Resampling = Resampling.nearest, compress = False) -> None:
         """Reprojects/Resamples an image to a base image.
         Args:
             image (str): Path to input file to reproject/resample
@@ -292,6 +292,8 @@ def reproj_match(image:str, base:str, to_file:bool = False, outfile:str = "outpu
                             "width": dst_width,
                             "height": dst_height,
                             })
+            if compress:
+                metadata.update({"compress": "lzw"})          
             if to_file:
                 with rasterio.open(outfile, "w", **metadata) as dst:
                     # iterate through bands and write using reproject function
@@ -344,7 +346,7 @@ def _cloud_mask(mask:rasterio.io.DatasetReader)->np.array:
 
         return mask_array
 
-    def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = None, resolution:str = None, new:str = "CLOUDMASK")->None:
+    def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = None, resolution:str = None, new:str = "CLOUDMASK", compress = False)->None:
         """Apply default SCL mask to S2 images.
 
         Args:
@@ -431,7 +433,8 @@ def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = No
                     array = src.read(1)
                     array[mask==0] = nodata
                     meta.update({"nodata": nodata})
-
+                    if compress:
+                        meta.update({"compress": "lzw"})                    
                     with rasterio.open(os.path.join(path, new_name), "w", **meta) as dest:
                         dest.write(array, 1)
 
@@ -511,13 +514,15 @@ def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = No
                 array = src.read(1)
                 array[mask==0] = nodata
                 meta.update({"nodata": nodata})
-
+                if compress:
+                    meta.update({"compress": "lzw"})                    
+
                 with rasterio.open(os.path.join(path, new_name), "w", **meta) as dest:
                     dest.write(array, 1)
 
             getattr(self, band)[str(res)][subregion] = os.path.join(path, new_name)
 
-    def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
+    def calcVI(self, index, store = None, subregion = None, verbose:bool = False, compress = False):
         """Calculates a selected vegetation index (NDVI, NDBI, NDWI).
         Args:
             index (str): Vegetation index to be calculated and saved. Currently only NDVI, NDBI, NDWI are supported
@@ -568,6 +573,9 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
                     path = self.datapath_10
                     metadata = red.meta.copy()
                     metadata.update({"driver": driver, "dtype": ndvi_array.dtype, "nodata": -9999.})
+                    if compress:
+                        metadata.update({"compress": "lzw"})                    
+
                     self.writeResults(path, new_name, ndvi_array, metadata)
                     # Setting NDVI attribute to S2 image
                     setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(self.datapath_10, new_name)}})
@@ -604,6 +612,9 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
                     path = store
                     metadata = red.meta.copy()
                     metadata.update({"driver": driver, "dtype": ndvi_array.dtype, "nodata": -9999.})
+                    if compress:
+                        metadata.update({"compress": "lzw"})                    
+
                     self.writeResults(path, new_name, ndvi_array, metadata)
                     # Setting NDVI attribute to S2 image
                     setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(path, new_name)}})
@@ -643,6 +654,8 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
                     path = self.datapath_10
                     metadata = green.meta.copy()
                     metadata.update({"driver": driver, "dtype": ndwi_array.dtype, "nodata": -9999.})
+                    if compress:            
+                        metadata.update({"compress": "lzw"})                    
                     self.writeResults(path, new_name, ndwi_array, metadata)
                     setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(self.datapath_10, new_name)}})
             else:
@@ -678,6 +691,8 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
                     ndwi_array[green_array == green.meta["nodata"]] = -9999.
                     metadata = green.meta.copy()
                     metadata.update({"driver": driver, "dtype": ndwi_array.dtype, "nodata": -9999.})
+                    if compress:
+                        metadata.update({"compress": "lzw"})                    
                     self.writeResults(store, new_name, ndwi_array, metadata)
                     setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(store, new_name)}})
 
@@ -752,6 +767,8 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
                     ndbi_array[swir_array == swir.meta["nodata"]] = -9999.
                     metadata = swir.meta.copy()
                     metadata.update({"driver": driver, "dtype": ndbi_array.dtype, "nodata": -9999})
+                    if compress:
+                        metadata.update({"compress": "lzw"})                    
                     self.writeResults(store, new_name, ndbi_array, metadata)
                     setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(store, new_name)}})
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -25,7 +25,7 @@
 author = 'Alekos Falagas'
 
 # The full version, including alpha/beta/rc tags
-release = '0.0.1beta'
+release = '0.0.1a0'
 
 
 # -- General configuration ---------------------------------------------------