diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..6f516a0e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +tests +.github +.git +.pytest_cache +.vscode +__pycache__ +*.md +docs/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 02daa4ad..df16b3ff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,46 +1,38 @@ -FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04 +FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 ARG CONDA_PYTHON_VERSION=3 ARG CONDA_DIR=/opt/conda ARG USERNAME=gdl_user ARG USERID=1000 ARG GIT_TAG=develop - +ENV PATH=$CONDA_DIR/bin:$PATH # RNCAN certificate; uncomment (with right .cer name) if you are building behind a FW -#COPY NRCan-RootCA.cer /usr/local/share/ca-certificates/cert.crt -#RUN chmod 644 /usr/local/share/ca-certificates/cert.crt && update-ca-certificates +# COPY NRCan-RootCA.cer /usr/local/share/ca-certificates/cert.crt +# RUN chmod 644 /usr/local/share/ca-certificates/cert.crt && update-ca-certificates RUN apt-get update \ && apt-get install -y --no-install-recommends git wget unzip bzip2 build-essential sudo \ - && apt-key del 7fa2af80 \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ - && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004-keyring.gpg \ - && sudo mv cuda-ubuntu2004-keyring.gpg /usr/share/keyrings/cuda-archive-keyring.gpg \ - && rm -f cuda-keyring_1.0-1_all.deb && rm -f /etc/apt/sources.list.d/cuda.list - -# Install Mamba directly -ENV PATH $CONDA_DIR/bin:$PATH -RUN wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh -O /tmp/mamba.sh && \ - /bin/bash /tmp/mamba.sh -b -p $CONDA_DIR && \ - rm -rf /tmp/* && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ENV LD_LIBRARY_PATH $CONDA_DIR/lib:$LD_LIBRARY_PATH - -# Create the user -RUN useradd --create-home -s /bin/bash --no-user-group -u $USERID $USERNAME && \ - chown $USERNAME $CONDA_DIR -R && \ - adduser $USERNAME sudo && \ - echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers - + && wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh -O /tmp/mamba.sh \ + && /bin/bash /tmp/mamba.sh -b -p $CONDA_DIR \ + && rm -rf /tmp/* \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && useradd --create-home -s /bin/bash --no-user-group -u $USERID $USERNAME \ + && chown $USERNAME $CONDA_DIR -R \ + && adduser $USERNAME sudo \ + && echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +ENV LD_LIBRARY_PATH=$CONDA_DIR/lib:$LD_LIBRARY_PATH USER $USERNAME -WORKDIR /home/$USERNAME/ - -RUN cd /home/$USERNAME && git clone --depth 1 "https://github.com/NRCan/geo-deep-learning.git" --branch $GIT_TAG -RUN conda config --set ssl_verify no -RUN mamba env create -f /home/$USERNAME/geo-deep-learning/environment.yml - -ENV PATH $CONDA_DIR/envs/geo_deep_env/bin:$PATH -RUN echo "source activate geo_deep_env" > ~/.bashrc \ No newline at end of file +WORKDIR /usr/app + +COPY environment.yml /usr/app +RUN cd /home/$USERNAME && \ + conda config --set ssl_verify no && \ + mamba env create -f /usr/app/environment.yml && \ + mamba clean --all \ + && pip uninstall -y pip + +COPY . /usr/app/geo-deep-learning +ENV PATH=$CONDA_DIR/envs/geo_ml_env/bin:$PATH +RUN echo "source activate geo_ml_env" > ~/.bashrc \ No newline at end of file diff --git a/config/inference/default_binary.yaml b/config/inference/default_binary.yaml index 9241d871..f3d07efa 100644 --- a/config/inference/default_binary.yaml +++ b/config/inference/default_binary.yaml @@ -5,21 +5,18 @@ inference: input_stac_item: # alternatively, use a path or url to stac item directly model_path: ${general.save_weights_dir}/ output_path: - checkpoint_dir: # (string, optional): directory in which to save the object if url - batch_size: 8 - chunk_size: # if empty, will be calculated automatically from max_pix_per_mb_gpu - # Maximum number of pixels each Mb of GPU Ram to allow. E.g. if GPU has 1000 Mb of Ram and this parameter is set to - # 10, chunk_size will be set to sqrt(1000 * 10) = 100. - max_pix_per_mb_gpu: 25 + patch_size: 1024 + workers: 0 prep_data_only: False - override_model_params: False - save_heatmap: True # saves a heatmap to {output_dir}/{output_name}_heatmap.tif heatmap_threshold: 0.3 - + flip: False + rotate: True + num_classes: 2 + # GPU parameters gpu: ${training.num_gpus} max_used_perc: ${training.max_used_perc} # If GPU's usage exceeds this percentage, it will be ignored max_used_ram: ${training.max_used_ram} # If RAM usage of detected GPU exceeds this percentage, it will be ignored # Post-processing - mask_to_vector: False # if True, a polygonized version of the inference (.gpkg) will be created with rasterio tools \ No newline at end of file + ras2vec: False # if True, a polygonized version of the inference (.gpkg) will be created with rasterio tools \ No newline at end of file diff --git a/config/inference/default_multiclass.yaml b/config/inference/default_multiclass.yaml index 7dacbcf8..060bf61b 100644 --- a/config/inference/default_multiclass.yaml +++ b/config/inference/default_multiclass.yaml @@ -5,16 +5,13 @@ inference: input_stac_item: # alternatively, use a path or url to stac item directly model_path: ${general.save_weights_dir}/ output_path: - checkpoint_dir: # (string, optional): directory in which to save the object if url - batch_size: 8 - chunk_size: # if empty, will be calculated automatically from max_pix_per_mb_gpu - # Maximum number of pixels each Mb of GPU Ram to allow. E.g. if GPU has 1000 Mb of Ram and this parameter is set to - # 10, chunk_size will be set to sqrt(1000 * 10) = 100. - max_pix_per_mb_gpu: 25 + patch_size: 1024 + workers: 0 prep_data_only: False - override_model_params: False - save_heatmap: True # saves a heatmap to {output_dir}/{output_name}_heatmap.tif heatmap_threshold: 0.3 + flip: False + rotate: True + num_classes: 5 # GPU parameters gpu: ${training.num_gpus} @@ -22,4 +19,4 @@ inference: max_used_ram: ${training.max_used_ram} # If RAM usage of detected GPU exceeds this percentage, it will be ignored # Post-processing - mask_to_vector: False # if True, a polygonized version of the inference (.gpkg) will be created with rasterio tools \ No newline at end of file + ras2vec: False # if True, a polygonized version of the inference (.gpkg) will be created with rasterio tools \ No newline at end of file diff --git a/docs/source/mode.rst b/docs/source/mode.rst index 7fab5980..06fe28e0 100755 --- a/docs/source/mode.rst +++ b/docs/source/mode.rst @@ -181,11 +181,9 @@ will be found in :ref:`configurationdefaultparam` under ``inference`` and this c .. literalinclude:: ../../../config/inference/default_binary.yaml :language: yaml -- ``raw_data_csv`` (str) - Path to the images csv. - ``root_dir`` (str) Directory where outputs and downloads will be written by default, - if ``checkpoint_dir`` or ``output_path`` are omitted. + if ``output_path`` is omitted. - ``raw_data_csv`` (str) Points to a csv containing paths to imagery for inference. If a ground truth is present in 2nd column, it will be ignored. @@ -193,28 +191,31 @@ will be found in :ref:`configurationdefaultparam` under ``inference`` and this c A path or url to :ref:`stac item ` directly. See stac item example for `Spacenet test data `_, also contained in `test data `_. -- ``state_dict_path`` (str) +- ``model_path`` (str) Path to checkpoint containing trained weights for a given neural network architecture. - ``output_path`` (str, optional) Complete path including parent directories and full name with extension where output inference should be saved. By default ``root_dir/{aoi.aoi_id}_pred.tif`` (see :ref:`AOI documentation `), the ``output_path`` parameter should only be used if a single inference is being performed. Otherwise, it is recommended to set the root_dir and use the default output name. -- ``checkpoint_dir`` (str) - Directory in which to save the checkpoint file if url. -- ``chunk_size`` (int) - Size of chunk (in pixels) to read use for inference iterations over input imagery. The input patch will - be square, therefore set at ``512`` it will generate 512 x 512 patches. -- ``max_pix_per_mb_gpu`` (int) - If chunk_size is omitted, this defines a "*maximum number of pixels per MB of GPU Ram*" that should be - considered. E.g. if GPU has 1000 Mb of Ram and this parameter is set to 10, chunk_size will be set to - ``sqrt(1000 * 10) = 100``. By defaults it's set to 25. Since this feature is based on a rule-of-thumb - and assumes some prior empirical testing. WIP. +- ``patch_size`` (int) + Size of patch (in pixels) to read use for inference iterations over input imagery. The input patch will + be square, therefore, if set at ``512`` it will generate 512 x 512 patches. +- ``workers`` (int) + Number of workers used by the geo-inference library. Default is `0` = Number of cores available on the + host, minus 1. - ``prep_data_only`` (bool) If True, the inference script will exit after preparation of input data. If checkpoint path is url, then the checkpoint will be download, if imagery points to urls, it will be downloaded and if input model expects imagery with :ref:`histogram equalization `, this enhancement is applied and equalized images save to disk. +- ``heatmap_threshold`` (float) + Prediction probability Threshold (fraction of 1) to use. Default is ``0.3``. +- ``flip`` (bool) + If True, perform horizontal and vertical flips during inference. +- ``rotate`` (bool) + If True, perform 90 degree rotation at inference. + - ``gpu`` (int) Number of gpus to use at inference. - ``max_used_perc`` (int) @@ -224,7 +225,7 @@ will be found in :ref:`configurationdefaultparam` under ``inference`` and this c - ``max_used_ram`` (int) If RAM usage of detected GPU exceeds this percentage, it will be ignored. - ``ras2vec`` (bool) - If True, a polygonized version of the inference ``.gpkg`` will be created with rasterio tools. + If True, a polygonized version of the inference ``.geojson`` will be created with rasterio tools. .. note:: diff --git a/environment.yml b/environment.yml index 28fdb323..7e24fe54 100644 --- a/environment.yml +++ b/environment.yml @@ -1,16 +1,27 @@ -name: geo_deep_env +name: geo_ml_env channels: + - pytorch + - nvidia - conda-forge dependencies: - - python==3.11.5 + - coverage>=6.3.1 + - geopandas>=0.14.4 + - hydra-core>=1.2.0 - pip - - gdal - - pystac>=0.3.0 + - pynvml>=11.0 + - pystac>=1.10.1 - pytest>=7.1 + - python>=3.11 + - pytorch>=2.3 + - pytorch-cuda>=12.1 + - rich>=11.1 - ruamel_yaml>=0.15 - scikit-image>=0.18 + - torchgeo>=0.5.2 + - torchvision>=0.13 - pip: - - geo-inference>=2.0.7 + - git+https://github.com/NRCan/geo-inference.git - hydra-colorlog>=1.1.0 - hydra-optuna-sweeper>=1.1.0 + - ttach>=0.0.3 - mlflow>=1.2 # causes env solving to hang if not with pip \ No newline at end of file diff --git a/inference_segmentation.py b/inference_segmentation.py index 23aa1160..00b05ff7 100644 --- a/inference_segmentation.py +++ b/inference_segmentation.py @@ -1,15 +1,16 @@ import csv -from math import sqrt +import rasterio + from tqdm import tqdm +from shutil import move from pathlib import Path from numbers import Number from tempfile import mkstemp from omegaconf import DictConfig from typing import Dict, Sequence, Union -from dataset.stacitem import SingleBandItemEO - from utils.aoiutils import aois_from_csv +from dataset.stacitem import SingleBandItemEO from utils.logger import get_logger, set_tracker from geo_inference.geo_inference import GeoInference from utils.utils import get_device_ids, get_key_def, set_device @@ -24,24 +25,6 @@ def stac_input_to_temp_csv(input_stac_item: Union[str, Path]) -> Path: csv.writer(fh).writerow([str(input_stac_item), None, "inference", Path(input_stac_item).stem]) return Path(stac_temp_csv) -def calc_inference_chunk_size(gpu_devices_dict: dict, max_pix_per_mb_gpu: int = 200, default: int = 512) -> int: - """ - Calculate maximum chunk_size that could fit on GPU during inference based on thumb rule with hardcoded - "pixels per MB of GPU RAM" as threshold. Threshold based on inference with a large model (Deeplabv3_resnet101) - :param gpu_devices_dict: dictionary containing info on GPU devices as returned by lst_device_ids (utils.py) - :param max_pix_per_mb_gpu: Maximum number of pixels that can fit on each MB of GPU (better to underestimate) - :return: returns a downgraded evaluation batch size if the original batch size is considered too high - """ - if not gpu_devices_dict: - return default - # get max ram for smallest gpu - smallest_gpu_ram = min(gpu_info['max_ram'] for _, gpu_info in gpu_devices_dict.items()) - # rule of thumb to determine max chunk size based on approximate max pixels a gpu can handle during inference - max_chunk_size = sqrt(max_pix_per_mb_gpu * smallest_gpu_ram) - max_chunk_size_rd = int(max_chunk_size - (max_chunk_size % 256)) # round to the closest multiple of 256 - logging.info(f'Data will be split into chunks of {max_chunk_size_rd} if chunk_size is not specified.') - return max_chunk_size_rd - def main(params:Union[DictConfig, Dict]): @@ -51,9 +34,10 @@ def main(params:Union[DictConfig, Dict]): params['inference'], to_path=True, validate_path_exists=True, - wildcard='*.pt') - mask_to_vector = get_key_def('mask_to_vector', params['inference'], default=False, expected_type=bool) + wildcard='*pt') + prep_data_only = get_key_def('prep_data_only', params['inference'], default=False, expected_type=bool) + # Set the device num_devices = get_key_def('gpu', params['inference'], default=0, expected_type=(int, bool)) if num_devices > 1: @@ -64,18 +48,15 @@ def main(params:Union[DictConfig, Dict]): raise ValueError(f'\nMax used ram parameter should be a percentage. Got {max_used_ram}.') max_used_perc = get_key_def('max_used_perc', params['inference'], default=25, expected_type=int) gpu_devices_dict = get_device_ids(num_devices, max_used_ram_perc=max_used_ram, max_used_perc=max_used_perc) - max_pix_per_mb_gpu = get_key_def('max_pix_per_mb_gpu', params['inference'], default=25, expected_type=int) - auto_chunk_size = calc_inference_chunk_size(gpu_devices_dict=gpu_devices_dict, - max_pix_per_mb_gpu=max_pix_per_mb_gpu, default=512) - - - chunk_size = get_key_def('chunk_size', params['inference'], default=auto_chunk_size, expected_type=int) - batch_size = get_key_def('batch_size', params['inference'], default=8, expected_type=int) + patch_size = get_key_def('patch_size', params['inference'], default=1024, expected_type=int) + workers = get_key_def('workers', params['inference'], default=0, expected_type=int) + prediction_threshold = get_key_def('prediction_threshold', params['inference'], default=0.3, expected_type=float) device = set_device(gpu_devices_dict=gpu_devices_dict) # Dataset params bands_requested = get_key_def('bands', params['dataset'], default=[1, 2, 3], expected_type=Sequence) + classes_dict = get_key_def('classes_dict', params['dataset'], expected_type=DictConfig) download_data = get_key_def('download_data', params['inference'], default=False, expected_type=bool) data_dir = get_key_def('raw_data_dir', params['dataset'], default="data", to_path=True, validate_path_exists=True) clahe_clip_limit = get_key_def('clahe_clip_limit', params['tiling'], expected_type=Number, default=0) @@ -83,6 +64,11 @@ def main(params:Union[DictConfig, Dict]): validate_path_exists=True) input_stac_item = get_key_def('input_stac_item', params['inference'], expected_type=str, to_path=True, validate_path_exists=True) + num_classes = get_key_def('num_classes', params['inference'], expected_type=int, default=5) + vectorize = get_key_def('ras2vec', params['inference'], expected_type=bool, default=False) + transform_flip = get_key_def('flip', params['inference'], expected_type=bool, default=False) + transform_rotate = get_key_def('rotate', params['inference'], expected_type=bool, default=False) + transforms = True if transform_flip or transform_rotate else False if raw_data_csv and input_stac_item: raise ValueError(f"Input imagery should be either a csv of stac item. Got inputs from both \"raw_data_csv\" " @@ -109,6 +95,10 @@ def main(params:Union[DictConfig, Dict]): data_dir=data_dir, equalize_clahe_clip_limit=clahe_clip_limit, ) + + if prep_data_only: + logging.info(f"[prep_data_only mode] Data preparation for inference is complete. Exiting...") + exit() # Create the inference object device_str = "gpu" if device.type == 'cuda' else "cpu" @@ -116,15 +106,30 @@ def main(params:Union[DictConfig, Dict]): geo_inference = GeoInference(model=str(model_path), work_dir=str(working_folder), - batch_size=batch_size, - mask_to_vec=mask_to_vector, + mask_to_vec=vectorize, device=device_str, gpu_id=gpu_index, + num_classes=num_classes, + prediction_threshold=prediction_threshold, + transformers=transforms, + transformer_flip=transform_flip, + transformer_rotate=transform_rotate, ) # LOOP THROUGH LIST OF INPUT IMAGES for aoi in tqdm(list_aois, desc='Inferring from images', position=0, leave=True): logging.info(f'\nReading image: {aoi.aoi_id}') - raster = aoi.raster - geo_inference(raster, tiff_name=aoi.aoi_id, patch_size=chunk_size) - \ No newline at end of file + input_path = str(aoi.raster.name) + mask_name = geo_inference(input_path, patch_size=patch_size, workers=workers) + mask_path = working_folder / mask_name + + # update metadata info and rename mask tif. + if classes_dict is not None: + meta_data_dict = {"checkpoint": str(model_path), + "classes_dict": classes_dict} + with rasterio.open(mask_path, 'r+') as raster: + raster.update_tags(**meta_data_dict) + output_path = get_key_def('output_path', params['inference'], expected_type=str, to_path=True, + default=mask_path) + move(mask_path, output_path) + logging.info(f"finished inferring image: {aoi.aoi_id} ") \ No newline at end of file diff --git a/tests/CI/test_gh_actions_ci.py b/tests/CI/test_gh_actions_ci.py index b572077c..7210631f 100644 --- a/tests/CI/test_gh_actions_ci.py +++ b/tests/CI/test_gh_actions_ci.py @@ -12,9 +12,9 @@ class Test_GH_Actions(object): def test_ci(self) -> None: data_dir = "data" Path(data_dir).mkdir(exist_ok=True, parents=True) - extract_archive(src="tests/data/spacenet.zip") - extract_archive(src="tests/data/new_brunswick_aerial.zip") - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") with initialize(config_path="../../config", job_name="test_ci"): cfg = compose(config_name="gdl_config_template") diff --git a/tests/dataset/test_aoi.py b/tests/dataset/test_aoi.py index 1c84b8d4..8d3ffa0e 100644 --- a/tests/dataset/test_aoi.py +++ b/tests/dataset/test_aoi.py @@ -19,7 +19,7 @@ class Test_AOI(object): def test_multiband_input(self): """Tests reading a multiband raster as input""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split']) @@ -44,7 +44,7 @@ def bands_request(self, request: SubRequest) -> List: def test_multiband_input_band_selection(self, bands_request: List) -> None: """Tests reading a multiband raster as input with band selection""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split'], raster_bands_request=bands_request) @@ -58,7 +58,7 @@ def test_multiband_input_band_selection(self, bands_request: List) -> None: def test_multiband_input_band_selection_from_letters(self): """Tests error when selecting bands from a multiband raster using letters, not integers""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] bands_request = ["R", "G"] @@ -68,7 +68,7 @@ def test_multiband_input_band_selection_from_letters(self): def test_multiband_input_band_selection_too_many(self): """Tests error when selecting too many bands from a multiband raster""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] bands_request = [1, 2, 3, 4, 5] @@ -78,7 +78,7 @@ def test_multiband_input_band_selection_too_many(self): def test_singleband_input(self): """Tests reading a singleband raster as input with ${dataset.bands} pattern""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-singleband_ci.csv") bands = ['R', 'G', 'B'] row = next(iter(data)) @@ -89,7 +89,7 @@ def test_singleband_input(self): def test_stac_input(self): """Tests singleband raster referenced by stac item as input""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-stac_ci.csv") bands = ['red', 'green', 'blue'] row = next(iter(data)) @@ -106,7 +106,7 @@ def test_stac_input(self): def test_stac_url_input(self): """Tests download of singleband raster as url path referenced by a stac item""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-singleband-url_ci.csv") row = next(iter(data)) aoi = AOI( @@ -123,7 +123,7 @@ def test_stac_url_input(self): def test_missing_label(self): """Tests error when provided label file is missing""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) row['gpkg'] = "missing_file.gpkg" @@ -132,15 +132,15 @@ def test_missing_label(self): def test_no_label(self): """Test when no label are provided. Should pass for inference. """ - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") csv_path = "tests/inference/inference_segmentation_multiclass_no_label.csv" aois = aois_from_csv(csv_path=csv_path, bands_requested=[1, 2, 3]) assert aois[0].label is None def test_parse_input_raster(self) -> None: """Tests parsing for three accepted patterns to reference input raster data with band selection""" - extract_archive(src="tests/data/spacenet.zip") - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") raster_raw = { "tests/data/spacenet/SpaceNet_AOI_2_Las_Vegas-056155973080_01_P001-WV03.json": [ "red", "green", "blue"], @@ -153,7 +153,7 @@ def test_parse_input_raster(self) -> None: def test_corrupt_raster(self) -> None: """Tests error when reading a corrupt file""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) row['tif'] = "tests/data/massachusetts_buildings_kaggle/corrupt_file.tif" @@ -163,7 +163,7 @@ def test_corrupt_raster(self) -> None: def test_image_only(self) -> None: """Tests AOI creation with image only, ie no label""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) aoi = AOI(raster=row['tif'], label=None) @@ -172,7 +172,7 @@ def test_image_only(self) -> None: def test_filter_gdf_by_attribute(self): """Tests filtering features from a vector file according to an attribute field and value""" - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") data = read_csv("tests/tiling/tiling_segmentation_multiclass_ci.csv") iterator = iter(data) row = next(iterator) @@ -188,7 +188,7 @@ def test_filter_gdf_by_attribute(self): def test_missing_raster(self) -> None: """Tests error when pointing to missing raster""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) row['tif'] = "missing_raster.tif" @@ -198,7 +198,7 @@ def test_missing_raster(self) -> None: def test_wrong_split(self) -> None: """Tests error when setting a wrong split, ie not 'trn', 'tst' or 'inference'""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) row['split'] = "missing_split" @@ -208,7 +208,7 @@ def test_wrong_split(self) -> None: def test_download_data(self) -> None: """Tests download data""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) row['tif'] = "http://datacube-stage-data-public.s3.ca-central-1.amazonaws.com/store/imagery/optical/" \ @@ -221,7 +221,7 @@ def test_download_data(self) -> None: def test_no_intersection(self) -> None: """Tests error testing no intersection between raster and label""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) row['gpkg'] = "tests/data/new_brunswick_aerial/BakerLake_2017_clipped.gpkg" @@ -231,7 +231,7 @@ def test_no_intersection(self) -> None: def test_write_multiband_from_single_band(self) -> None: """Tests the 'write_multiband' method""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-singleband_ci.csv") row = data[0] aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split'], raster_bands_request=['R', 'G', 'B'], @@ -243,7 +243,7 @@ def test_write_multiband_from_single_band(self) -> None: def test_write_multiband_from_single_band_url(self) -> None: """Tests the 'write_multiband' method with singleband raster as URL""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-singleband-url_ci.csv") row = next(iter(data)) aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split'], raster_bands_request=['R', 'G', 'B'], @@ -256,7 +256,7 @@ def test_write_multiband_from_single_band_url(self) -> None: def test_write_multiband_not_applicable(self) -> None: """Tests the skipping of 'write_multiband' method""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = next(iter(data)) aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split'], raster_bands_request=[1, 2, 3], @@ -266,7 +266,7 @@ def test_write_multiband_not_applicable(self) -> None: def test_download_true_not_url(self) -> None: """Tests AOI creation if download_data set to True, but not necessary (local image)""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-singleband_ci.csv") row = next(iter(data)) aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split'], download_data=True, @@ -275,7 +275,7 @@ def test_download_true_not_url(self) -> None: def test_raster_stats_from_stac(self) -> None: """Tests the calculation of statistics of raster data as stac item from an AOI instance""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-stac_ci.csv") bands_request = ['red', 'green', 'blue'] expected_stats = { @@ -297,7 +297,7 @@ def test_raster_stats_from_stac(self) -> None: def test_raster_stats_not_stac(self) -> None: """Tests the calculation of statistics of local multiband raster data from an AOI instance""" - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") data = read_csv("tests/tiling/tiling_segmentation_multiclass_ci.csv") expected_stats = { 'band_0': {'statistics': {'minimum': 11, 'maximum': 254, 'mean': 159.36075617930456, 'median': 165.0, @@ -318,7 +318,7 @@ def test_raster_stats_not_stac(self) -> None: def test_to_dict(self): """Test the 'to_dict()' method on an AOI instance""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-stac_ci.csv") bands = ['red', 'green', 'blue'] row = next(iter(data)) @@ -368,7 +368,7 @@ def test_name_raster(self) -> None: def test_is_low_contrast(self): """Test raster contrast (high | low)""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] aoi = AOI( @@ -392,7 +392,7 @@ def test_is_low_contrast(self): def test_equalize_hist_raster(self): """Test equalize input raster with CLAHE transform""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] aoi = AOI( @@ -411,7 +411,7 @@ def test_equalize_hist_raster(self): def test_equalize_hist_raster_per_band(self): """Test equalize input raster per band with CLAHE transform""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") row = data[0] aoi = AOI( diff --git a/tests/dataset/test_datasets.py b/tests/dataset/test_datasets.py index 66f65faa..708cd728 100644 --- a/tests/dataset/test_datasets.py +++ b/tests/dataset/test_datasets.py @@ -54,7 +54,7 @@ class TestDRDataset: "tests/data/massachusetts_buildings_kaggle/23429155_15_uint8_clipped.tif"] ) def raster_dataset(self, request: SubRequest) -> DatasetReader: - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") image = request.param dr_ds = rasterio.open(image) return dr_ds @@ -95,7 +95,7 @@ def test_init(self): "tests/data/massachusetts_buildings_kaggle/23429155_15.gpkg"] ) def vector_dataset(self, request: SubRequest) -> GDLVectorDataset: - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") fp = request.param vec_ds = GDLVectorDataset(fp) return vec_ds @@ -158,7 +158,7 @@ class TestIntersectionCustomDatasets: ) ) def dataset(self, request: SubRequest) -> List: - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") image = request.param[0] dr_ds = rasterio.open(image) raster_ds = DRDataset(dr_ds) diff --git a/tests/dataset/test_stacitem.py b/tests/dataset/test_stacitem.py index a92b5c08..9b1b3b8a 100644 --- a/tests/dataset/test_stacitem.py +++ b/tests/dataset/test_stacitem.py @@ -9,7 +9,7 @@ class Test_SingleBandItemEO(object): def test_stac_input_missing_band(self): """Tests error when requesting non-existing singleband input rasters from stac item""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-stac_ci.csv") row = next(iter(data)) with pytest.raises(ValueError): @@ -18,8 +18,8 @@ def test_stac_input_missing_band(self): def test_stac_input_empty_band_request(self): """Tests error when band selection is required (stac item) but missing""" - extract_archive(src="tests/data/spacenet.zip") - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") stac_item_path = "tests/data/spacenet/SpaceNet_AOI_2_Las_Vegas-056155973080_01_P001-WV03.json" with pytest.raises(ValueError): item = SingleBandItemEO(item=pystac.Item.from_file(stac_item_path), diff --git a/tests/test_tiling_segmentation.py b/tests/test_tiling_segmentation.py index f6bdf5ce..eca5939f 100644 --- a/tests/test_tiling_segmentation.py +++ b/tests/test_tiling_segmentation.py @@ -19,7 +19,7 @@ def test_outputted_chips(self): data_dir = f"data/patches" proj = f"tiling_output_test" Path(data_dir).mkdir(exist_ok=True, parents=True) - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") cfg = { "general": {"project_name": proj}, "debug": True, @@ -49,7 +49,7 @@ def test_outputted_chips(self): def test_min_annotated_percent_filter(self): """Tests the minimum annotated percent filter""" - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") data = read_csv("tests/tiling/tiling_segmentation_multiclass_ci.csv") iterator = iter(data) row = next(iterator) @@ -60,8 +60,8 @@ def test_val_percent(self): """Tests the trn/val sorting to ensure the result is close enough to requested val_percent""" data_dir = f"data/patches" Path(data_dir).mkdir(exist_ok=True, parents=True) - extract_archive(src="tests/data/spacenet.zip") - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") proj_prefix = "test_val_percent" datasets = {"binary-multiband", "multiclass"} results = [] @@ -106,8 +106,8 @@ def test_annot_percent(self): """Tests the minimum annotated percentage to assert ground truth patches with mostly background are rejected""" data_dir = f"data/patches" Path(data_dir).mkdir(exist_ok=True, parents=True) - extract_archive(src="tests/data/spacenet.zip") - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") proj_prefix = "test_annot_percent" datasets = {"binary-multiband", "multiclass"} results = [] @@ -151,7 +151,7 @@ def test_annot_percent(self): def test_tiling_segmentation_parallel(self): data_dir = "data/patches" Path(data_dir).mkdir(exist_ok=True, parents=True) - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") proj = "test_parallel" cfg = { "general": {"project_name": proj}, @@ -190,7 +190,7 @@ def test_tiling_inference(self): """Tests tiling of imagery only for inference""" data_dir = "data/patches" Path(data_dir).mkdir(exist_ok=True, parents=True) - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") project_name = "test_inference" cfg = { "general": {"project_name": project_name}, diff --git a/tests/test_verify_segmentation.py b/tests/test_verify_segmentation.py index 815f528f..78921648 100644 --- a/tests/test_verify_segmentation.py +++ b/tests/test_verify_segmentation.py @@ -11,7 +11,7 @@ class TestVerify(object): def test_verify_per_aoi(self): """Test stats outputs from an AOI""" - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") data = read_csv("tests/tiling/tiling_segmentation_multiclass_ci.csv") aoi = AOI(raster=data[0]['tif'], label=data[0]['gpkg'], split=data[0]['split']) aoi_dict, error = verify_per_aoi( diff --git a/tests/tiling/test_tiling.py b/tests/tiling/test_tiling.py index df6db5ac..28ccb8b0 100644 --- a/tests/tiling/test_tiling.py +++ b/tests/tiling/test_tiling.py @@ -17,7 +17,7 @@ class TestTiler(object): def test_tiling_per_aoi(self): - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") img = "tests/data/massachusetts_buildings_kaggle/22978945_15_uint8_clipped.tif" gt = "tests/data/massachusetts_buildings_kaggle/22978945_15.gpkg" my_aoi = AOI(raster=img, raster_bands_request=[1, 2, 3], label=gt, split='trn') @@ -40,7 +40,7 @@ def test_tiling_per_aoi(self): def test_passes_min_annot(self): """Tests annotated percent calculation""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") img = "tests/data/spacenet/SN7_global_monthly_2020_01_mosaic_L15-0331E-1257N_1327_3160_13_uint8_clipped.tif" gt = "tests/data/spacenet/SN7_global_monthly_2020_01_mosaic_L15-0331E-1257N_1327_3160_13_uint8_clipped.gpkg" my_aoi = AOI(raster=img, raster_bands_request=[1, 2, 3], label=gt, split='trn') @@ -62,7 +62,7 @@ def test_passes_min_annot(self): def test_burn_gt_patch(self): """Tests burning a label while using the filter for attribute field and values""" - extract_archive(src="tests/data/new_brunswick_aerial.zip") + extract_archive(from_path="tests/data/new_brunswick_aerial.zip") img = "tests/data/new_brunswick_aerial/23322E759967N_clipped_1m_1of2.tif" gt = "tests/data/new_brunswick_aerial/BakerLake_2017_clipped.gpkg" my_aoi = AOI( @@ -178,7 +178,7 @@ def test__parse_torchgeo_batch(self): def test__define_output_name(self): """ Test _define_output_name method of the Tiler class """ - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") img = "tests/data/massachusetts_buildings_kaggle/22978945_15_uint8_clipped.tif" gt = "tests/data/massachusetts_buildings_kaggle/22978945_15.gpkg" my_aoi = AOI(raster=img, raster_bands_request=[1, 2, 3], label=gt, split='trn') @@ -199,7 +199,7 @@ def test__save_vec_mem_tile(self): """ Test _save_vec_mem_tile method of the Tiler class """ """ Test _define_output_name method of the Tiler class """ try: - extract_archive(src="tests/data/massachusetts_buildings_kaggle_patch.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle_patch.zip") except FileNotFoundError: pass gt = "tests/data/massachusetts_buildings_kaggle_patch/massachusetts_buildings_kaggle_patch.gpkg" @@ -261,7 +261,7 @@ def test__save_vec_mem_tile(self): def test_tiling_per_aoi_append_mode(self): """Tests tiling's append mode""" - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") img = "tests/data/massachusetts_buildings_kaggle/22978945_15_uint8_clipped.tif" gt = "tests/data/massachusetts_buildings_kaggle/22978945_15.gpkg" my_aoi = AOI(raster=img, raster_bands_request=[1, 2, 3], label=gt, split='trn') diff --git a/tests/utils/test_geoutils.py b/tests/utils/test_geoutils.py index 520a684b..a27cf544 100644 --- a/tests/utils/test_geoutils.py +++ b/tests/utils/test_geoutils.py @@ -19,7 +19,7 @@ class TestGeoutils(object): def test_multiband_vrt_from_single_band(self) -> None: """Tests the 'stack_singlebands_vrt' utility""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-singleband_ci.csv") row = data[0] bands_request = ['R', 'G', 'B'] @@ -38,7 +38,7 @@ def test_create_new_raster_from_base_shape(self) -> None: Tests error in 'create_new_raster_from_base' geo-utility if output array dimensions is not consistant with input raster """ - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") ref_raster = Path(data[0]['tif']) out_raster = ref_raster.parent / f"{ref_raster.stem}_copy.tif" @@ -54,7 +54,7 @@ def bands_request(self, request: SubRequest) -> List: def test_create_new_raster_from_base_bands(self, bands_request) -> None: """Tests the 'create_new_raster_from_base' geo-utility for different output bands number""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") ref_raster = Path(data[0]['tif']) out_raster = ref_raster.parent / f"{ref_raster.stem}_copy.tif" @@ -64,7 +64,7 @@ def test_create_new_raster_from_base_bands(self, bands_request) -> None: def test_create_new_raster_from_base_2d_out_array(self) -> None: """Tests the 'create_new_raster_from_base' geo-utility for a 2D output array""" - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") data = read_csv("tests/tiling/tiling_segmentation_binary-multiband_ci.csv") ref_raster = Path(data[0]['tif']) out_raster = ref_raster.parent / f"{ref_raster.stem}_copy.tif" @@ -90,8 +90,8 @@ def test_bounds_iou(self) -> None: def test_empty_geopackage_overlap(self): """ Tests calculation of overlap of raster relative to an empty geopackage """ - extract_archive(src="tests/data/buil_AB11-WV02-20100926-1.zip") - extract_archive(src="tests/data/massachusetts_buildings_kaggle.zip") + extract_archive(from_path="tests/data/buil_AB11-WV02-20100926-1.zip") + extract_archive(from_path="tests/data/massachusetts_buildings_kaggle.zip") raster_file = "tests/data/massachusetts_buildings_kaggle/22978945_15_uint8_clipped.tif" raster = rasterio.open(raster_file) label_gdf = gpd.read_file('tests/data/buil_AB11-WV02-20100926-1.gpkg') diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index e0d05a4b..4c50275a 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -21,14 +21,14 @@ class TestUtils(unittest.TestCase): def test_wrong_seperation(self) -> None: - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") with pytest.raises(TypeError): data = read_csv("tests/tiling/point_virgule.csv") ##for row in data: ##aoi = AOI(raster=row['tif'], label=row['gpkg'], split=row['split']) def test_with_header_in_csv(self) -> None: - extract_archive(src="tests/data/spacenet.zip") + extract_archive(from_path="tests/data/spacenet.zip") with pytest.raises(ValueError): data = read_csv("tests/tiling/header.csv") ##for row in data: diff --git a/utils/script_model.py b/utils/script_model.py index fd8a43ac..bee43287 100644 --- a/utils/script_model.py +++ b/utils/script_model.py @@ -32,8 +32,8 @@ def forward(self, input): shape = input.shape B, C = shape[0], shape[1] input = (self.max_val - self.min_val) * (input - self.min) / (self.max -self.min) + self.min_val - input = (input.view(B, C, -1) - self.mean) / self.std - input = input.view(shape) + input = (input.reshape(B, C, -1) - self.mean) / self.std + input = input.reshape(shape) output = self.model_scripted(input.to(self.device)) if self.from_logits: if self.num_classes == 1: