Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
alekfal committed Nov 29, 2023
1 parent e1a4428 commit 7744dcc
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 77 deletions.
28 changes: 3 additions & 25 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -1,49 +1,28 @@
# This file is a template, and might need editing before it works on your project.
# To contribute improvements to CI/CD templates, please follow the Development guide at:
# https://docs.gitlab.com/ee/development/cicd/templates.html
# This specific template is located at:
# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Python.gitlab-ci.yml

# Official language image. Look for the different tagged releases at:
# https://hub.docker.com/r/library/python/tags/
image: python:latest

# Change pip's cache directory to be inside the project directory since we can
# only cache local items.
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"

# Pip's cache doesn't store the python packages
# https://pip.pypa.io/en/stable/topics/caching/
#
# If you want to also cache the installed packages, you have to install
# them in a virtualenv and cache it as well.
cache:
paths:
- .cache/pip
- venv/

before_script:
- python --version # For debugging
- python --version ; pip --version # For debugging
- pip install virtualenv
- virtualenv venv
- source venv/bin/activate

test:
script:
- python setup.py test
- pip install tox flake8 # you can also use tox
- tox -e py36,flake8

run:
script:
- python setup.py bdist_wheel
# an alternative approach is to install and run:
- pip install dist/*
- pip install .
# run the command here
artifacts:
paths:
- dist/*.whl
- build/*

pages:
script:
Expand All @@ -57,4 +36,3 @@ pages:
- public
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH

3 changes: 1 addition & 2 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,4 @@ sphinx:

python:
install:
- requirements: docs/requirements-docs.txt
system_packages: true
- requirements: docs/requirements-docs.txt
6 changes: 5 additions & 1 deletion cropmaps/clipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class Clipper():

@staticmethod
def clipByMask(image, shapefile, store = None, band = None, new = None, resize = False, method = None, ext = 'tif', verbose = False):
def clipByMask(image, shapefile, store = None, band = None, new = None, resize = False, method = None, ext = 'tif', verbose = False, compress = False):
"""Mask image based on a shapefile mask.
Args:
Expand Down Expand Up @@ -158,6 +158,8 @@ def clipByMask(image, shapefile, store = None, band = None, new = None, resize =
"driver": "GTiff",
"nodata": nodata
})
if compress:
metadata.update({"compress": "lzw"})

with rasterio.open(out_tif, 'w', **metadata) as dst:
dst.write(reproj_array)
Expand All @@ -183,6 +185,8 @@ def clipByMask(image, shapefile, store = None, band = None, new = None, resize =
"width": out_image.shape[2],
"transform": out_transform,
"nodata": nodata})
if compress:
out_meta.update({"compress": "lzw"})

with rasterio.open(out_tif, "w", **out_meta) as output_image:
output_image.write(out_image)
Expand Down
7 changes: 5 additions & 2 deletions cropmaps/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def generate_cube_paths(eodata:sentimeseries, bands:list, mask:str = None)->list

return paths

def make_cube(listOfPaths:List[str], searchPath:str, newFilename:str, dtype:np.dtype, nodata:float = -9999, gap_fill:bool = True, harmonize:bool = True, alpha:float = 0.0001, beta:float = 0., force_new:bool = False)->Tuple[List[str], Dict]:
def make_cube(listOfPaths:List[str], searchPath:str, newFilename:str, dtype:np.dtype, nodata:float = -9999, gap_fill:bool = True, harmonize:bool = True, alpha:float = 0.0001, beta:float = 0., force_new:bool = False, compress = False)->Tuple[List[str], Dict]:
"""Stack satellite images (FROM DIFFERENT FILES) as timeseries cube, without loading them in memory.
If there is a datetime field in filename, could enable sort=True, to sort cube layers by date, ascending.
Also, if sort=True, dates are written at .txt file which will be saved with the same output name, as cube.
Expand Down Expand Up @@ -78,7 +78,10 @@ def make_cube(listOfPaths:List[str], searchPath:str, newFilename:str, dtype:np.d
'count': len(listOfPaths),
'driver':'GTiff',
'nodata': nodata})


if compress:
metadata.update({"compress": "lzw"})

# New filename.
cubeName = os.path.join(searchPath, str(newFilename) + '.tif')
# Stack products as timeseries cube.
Expand Down
56 changes: 28 additions & 28 deletions cropmaps/get_creodias.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,31 @@
}
}

def get_data_DIAS(area:str, start_date:str, end_date:str, platform:str = "Sentinel2", **kwargs):
"""Query the Copernicus Data Space Ecosystem (CDSE) OpenSearch service for available products.
- For Sentinel-2 catalog attributes: https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/describe.xml
Args:
area (str): WKT geometry of the AOI.
start_date (str): Start date in YYYYMMDD format.
end_date (str): Start date in YYYYMMDD format.
platform (str, optional): Platform name (like Sentinel2 or Sentinel3). Defaults to "Sentinel2".
Returns:
list: List of available products in CreoDIAS based on the query.
"""
data = []
results = query.query(
platform,
geometry=area,
start_date=datetime(int(start_date[:4]), int(start_date[4:6]), int(start_date[6:])),
end_date=datetime(int(end_date[:4]), int(end_date[4:6]), int(end_date[6:])),
**kwargs)
for key in results:
data.append(results[key]["properties"]["productIdentifier"])

return data


def eodata_path_creator(data:pd.DataFrame):
"""Convert a DataFrame with the response from APIHUB to CreoDIAS paths. Works with Sentinel-1, 2, 3 (all instruments).
Function builds paths as follows:
Expand Down Expand Up @@ -81,8 +106,7 @@ def eodata_path_creator(data:pd.DataFrame):
creodias_paths.append(path)

return creodias_paths



def check_L2(data:pd.DataFrame):
"""Cleans API response dataframe from Sentinel-2 L1C data.
Expand All @@ -107,32 +131,8 @@ def check_L2(data:pd.DataFrame):

return data

def get_data_DIAS(area:str, start_date:str, end_date:str, platform:str = "Sentinel2", **kwargs):
"""Query the Copernicus Data Space Ecosystem (CDSE) OpenSearch service for available products.
- For Sentinel-2 catalog attributes: https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/describe.xml
Args:
area (str): WKT geometry of the AOI.
start_date (str): Start date in YYYYMMDD format.
end_date (str): Start date in YYYYMMDD format.
platform (str, optional): Platform name (like Sentinel2 or Sentinel3). Defaults to "Sentinel2".
Returns:
list: List of available products in CreoDIAS based on the query.
"""
data = []
results = query.query(
platform,
geometry=area,
start_date=datetime(int(start_date[:4]), int(start_date[4:6]), int(start_date[6:])),
end_date=datetime(int(end_date[:4]), int(end_date[4:6]), int(end_date[6:])),
**kwargs)
for key in results:
data.append(results[key]["properties"]["productIdentifier"])

return data

def get_data(area:str, start_date:str, end_date:str, username:str, password:str, platform:str = "Sentinel-2", **kwargs):
"""Get data information from ESA APIHUB.
def _get_data(area:str, start_date:str, end_date:str, username:str, password:str, platform:str = "Sentinel-2", **kwargs):
"""Get data information from ESA APIHUB. DEPRECATED.
Args:
area (str): Path to geometry file (geojson)
Expand Down
35 changes: 23 additions & 12 deletions cropmaps/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ def fill_confMatrix(ct:pd.DataFrame, labels:List)->pd.DataFrame:
Returns:
pd.DataFrame: Filled confusion matrix
"""
rowColIdx = list(labels) + ['All']
labels = [str(l) for l in labels]
rowColIdx = labels + ['All']
# Usefull when rows are missing
notAlabel = [val for val in list(ct.index) if val not in rowColIdx]
if len(ct.index) < len(rowColIdx) or len(notAlabel)>0:
if len(ct.index) < len(rowColIdx) or len(notAlabel) > 0:
for label in labels:
if label not in ct.index:
ct.loc[label] = [0] * ct.shape[1]
Expand All @@ -109,13 +110,11 @@ def fill_confMatrix(ct:pd.DataFrame, labels:List)->pd.DataFrame:

# Add producer accuracy
pa = [round(ct.loc[rvcd][rvcd] / ct.loc[rvcd]['All'] *100, 2) for rvcd in labels]
pa = [0 if math.isnan(x) else x for x in pa]
pa.append(round(np.nanmean(pa), 2)) # total PA
ct['PA'] = pa

# Add User acc
ua = [round(ct.loc[rvcd][rvcd] / ct.loc['All'][rvcd] * 100, 2) for rvcd in labels]
ua = [0 if math.isnan(x) else x for x in ua]
ua.append(round(np.nanmean(ua), 2)) # total UA

# Overall acc
Expand All @@ -128,7 +127,6 @@ def fill_confMatrix(ct:pd.DataFrame, labels:List)->pd.DataFrame:
f1 = [round(
(2*ct.loc[rvcd]['PA']*ct.loc['UA'][rvcd]) / (ct.loc[rvcd]['PA']+ct.loc['UA'][rvcd]),
2) for rvcd in labels]
f1 = [0 if math.isnan(x) else x for x in f1]
f1.append(f"avg_f1 {round(np.nanmean(f1), 2)}") # total f1

# test another calculation of f1
Expand All @@ -154,8 +152,15 @@ def importance(band_desc:list, feature_importance:np.ndarray, store_to:str = Non
for b, imp in zip(band_desc, feature_importance):
band = b.split('_')[2]
date = b.split('_')[1]
bands_importance = bands_importance._append([{band:imp}], ignore_index=True)
dates_importance = dates_importance._append([{date:imp}], ignore_index=True)
try:
bands_importance = bands_importance._append([{band:imp}], ignore_index=True)
except AttributeError:
bands_importance = bands_importance.append([{band:imp}], ignore_index=True)
try:
dates_importance = dates_importance._append([{date:imp}], ignore_index=True)
except AttributeError:
dates_importance = dates_importance.append([{date:imp}], ignore_index=True)


bands_importance = bands_importance.apply(lambda x: pd.Series(x.dropna().values))
dates_importance = dates_importance.apply(lambda x: pd.Series(x.dropna().values))
Expand All @@ -181,7 +186,7 @@ def random_forest_train(cube_path:str, gt_fpath:str, results_to:str, test_size:f
results_to (str): Path to store results
test_size (float, optional): Test sample size. Defaults to 0.33
gridsearch (bool, optional): Hyperparameter Tuning using GridSearchCV. Check here: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV. Defaults to False.
parameters(dict, optional): SVM parameters. Check here: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier.
parameters(dict, optional): RF parameters. Check here: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html#sklearn.ensemble.RandomForestClassifier.
Returns:
RandomForestClassifier: The model
"""
Expand Down Expand Up @@ -282,7 +287,11 @@ def random_forest_train(cube_path:str, gt_fpath:str, results_to:str, test_size:f
logging.info(f'OOB prediction of accuracy is: {round(rf.oob_score_ * 100, 2)}%')

# Cross-tabulate predictions
cm = pd.crosstab(df['truth'], df['predict'], margins=True)
truth = df['truth'].astype(int)
predict =df['predict'].astype(int)
cm = pd.crosstab(truth, predict , margins=True)
cm.columns = cm.columns.astype(str).tolist()
cm.index = cm.index.astype(str).tolist()
cm = fill_confMatrix(cm, labels)
cm.to_csv(os.path.join(results_to, "Confusion_Matrix.csv"))

Expand Down Expand Up @@ -493,10 +502,12 @@ def svm_train(cube_path:str, gt_fpath:str, results_to:str, test_size:float = 0.3
X_test = None
gc.collect()

# Cross-tabulate predictions
cm = pd.crosstab(df['truth'], df['predict'], margins=True)
truth = df['truth'].astype(int)
predict =df['predict'].astype(int)
cm = pd.crosstab(truth, predict , margins=True)
cm.columns = cm.columns.astype(str).tolist()
cm.index = cm.index.astype(str).tolist()
cm = fill_confMatrix(cm, labels)
cm.to_csv(os.path.join(results_to, "Confusion_Matrix.csv"))

logging.info("Done.")

Expand Down
27 changes: 22 additions & 5 deletions cropmaps/sentinels.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def upsample(self, band = None, store = None, new = None, subregion = None, meth
raise ValueError("Arguments 'band' must be provided!")

@staticmethod
def reproj_match(image:str, base:str, to_file:bool = False, outfile:str = "output.tif", resampling:rasterio.warp.Resampling = Resampling.nearest) -> None:
def reproj_match(image:str, base:str, to_file:bool = False, outfile:str = "output.tif", resampling:rasterio.warp.Resampling = Resampling.nearest, compress = False) -> None:
"""Reprojects/Resamples an image to a base image.
Args:
image (str): Path to input file to reproject/resample
Expand All @@ -292,6 +292,8 @@ def reproj_match(image:str, base:str, to_file:bool = False, outfile:str = "outpu
"width": dst_width,
"height": dst_height,
})
if compress:
metadata.update({"compress": "lzw"})
if to_file:
with rasterio.open(outfile, "w", **metadata) as dst:
# iterate through bands and write using reproject function
Expand Down Expand Up @@ -344,7 +346,7 @@ def _cloud_mask(mask:rasterio.io.DatasetReader)->np.array:

return mask_array

def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = None, resolution:str = None, new:str = "CLOUDMASK")->None:
def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = None, resolution:str = None, new:str = "CLOUDMASK", compress = False)->None:
"""Apply default SCL mask to S2 images.
Args:
Expand Down Expand Up @@ -431,7 +433,8 @@ def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = No
array = src.read(1)
array[mask==0] = nodata
meta.update({"nodata": nodata})

if compress:
meta.update({"compress": "lzw"})
with rasterio.open(os.path.join(path, new_name), "w", **meta) as dest:
dest.write(array, 1)

Expand Down Expand Up @@ -511,13 +514,15 @@ def apply_cloud_mask(self, band:str = None, store:str = None, subregion:str = No
array = src.read(1)
array[mask==0] = nodata
meta.update({"nodata": nodata})

if compress:
meta.update({"compress": "lzw"})

with rasterio.open(os.path.join(path, new_name), "w", **meta) as dest:
dest.write(array, 1)

getattr(self, band)[str(res)][subregion] = os.path.join(path, new_name)

def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
def calcVI(self, index, store = None, subregion = None, verbose:bool = False, compress = False):
"""Calculates a selected vegetation index (NDVI, NDBI, NDWI).
Args:
index (str): Vegetation index to be calculated and saved. Currently only NDVI, NDBI, NDWI are supported
Expand Down Expand Up @@ -568,6 +573,9 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
path = self.datapath_10
metadata = red.meta.copy()
metadata.update({"driver": driver, "dtype": ndvi_array.dtype, "nodata": -9999.})
if compress:
metadata.update({"compress": "lzw"})

self.writeResults(path, new_name, ndvi_array, metadata)
# Setting NDVI attribute to S2 image
setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(self.datapath_10, new_name)}})
Expand Down Expand Up @@ -604,6 +612,9 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
path = store
metadata = red.meta.copy()
metadata.update({"driver": driver, "dtype": ndvi_array.dtype, "nodata": -9999.})
if compress:
metadata.update({"compress": "lzw"})

self.writeResults(path, new_name, ndvi_array, metadata)
# Setting NDVI attribute to S2 image
setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(path, new_name)}})
Expand Down Expand Up @@ -643,6 +654,8 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
path = self.datapath_10
metadata = green.meta.copy()
metadata.update({"driver": driver, "dtype": ndwi_array.dtype, "nodata": -9999.})
if compress:
metadata.update({"compress": "lzw"})
self.writeResults(path, new_name, ndwi_array, metadata)
setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(self.datapath_10, new_name)}})
else:
Expand Down Expand Up @@ -678,6 +691,8 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
ndwi_array[green_array == green.meta["nodata"]] = -9999.
metadata = green.meta.copy()
metadata.update({"driver": driver, "dtype": ndwi_array.dtype, "nodata": -9999.})
if compress:
metadata.update({"compress": "lzw"})
self.writeResults(store, new_name, ndwi_array, metadata)
setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(store, new_name)}})

Expand Down Expand Up @@ -752,6 +767,8 @@ def calcVI(self, index, store = None, subregion = None, verbose:bool = False):
ndbi_array[swir_array == swir.meta["nodata"]] = -9999.
metadata = swir.meta.copy()
metadata.update({"driver": driver, "dtype": ndbi_array.dtype, "nodata": -9999})
if compress:
metadata.update({"compress": "lzw"})
self.writeResults(store, new_name, ndbi_array, metadata)
setattr(self, '{}'.format(index), {self.setResolution(index): {region : os.path.join(store, new_name)}})

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
author = 'Alekos Falagas'

# The full version, including alpha/beta/rc tags
release = '0.0.1beta'
release = '0.0.1a0'


# -- General configuration ---------------------------------------------------
Expand Down
Loading

0 comments on commit 7744dcc

Please sign in to comment.