diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..8b7acad
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,11 @@
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5e20edf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,110 @@
+# Byte-compiled / optimized / DLL files
+# C extensions
+# Distribution / packaging
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+# Installer logs
+# Unit test / coverage reports
+# Translations
+# Django stuff:
+# Flask stuff:
+# Scrapy stuff:
+# Sphinx documentation
+# PyBuilder
+# Jupyter Notebook
+# pyenv
+# celery beat schedule file
+# SageMath parsed files
+# Environments
+# Spyder project settings
+# Rope project settings
+# mkdocs documentation
+# mypy
diff --git a/Dockerfile-pytorch14-37 b/Dockerfile-pytorch14-37
new file mode 100644
index 0000000..bdca3dd
--- /dev/null
+++ b/Dockerfile-pytorch14-37
@@ -0,0 +1,26 @@
+FROM python:3.7
+RUN apt-get update && apt-get install -y libgeos-dev
+RUN mkdir /xView2
+WORKDIR /xView2
+ADD requirements_docker_pytorch14.txt /xView2
+RUN pip3 install -r requirements_docker_pytorch14.txt
+ADD . /xView2
+RUN mkdir -p /input
+RUN mkdir -p /output
+# https://github.com/pytorch/pytorch/issues/27971
+# set environment variables
+# Prevents Python from writing pyc files to disc
+# Prevents Python from buffering stdout and stderr
+ENTRYPOINT ["python3", "docker_submission_37.py"]
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..766613e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,20 @@
+# 3rd place solution for xView2 Damage Assessment Challenge
+Eugene Khvedchenya, February 2020
+This repository contains source code for my solution to [xView2 challenge](https://xview2.com). My solution was scored second (0.803) on public LB and third (0.807) on private hold-out dataset.
+# Approach in a nutshell
+- Ensemble of semantic segmentation models.
+- Trained with weighted CE to address class imbalance.
+- Heavy augmentations to prevent over-fitting and increase robustness to misalignment of pre- and post- images.
+- Shared encoder for pre- and post- images. Extracted feature are concatenated and sent to decoder.
+- Bunch of encoders (ResNets, Densenets, EfficientNets) and two decoders: Unet and FPN.
+- 1 round of Pseudolabeling
+- Ensemble using weighted averaging. Weights optimized for every model on corresponding validation data.
+# Training
+- Install dependencies from `requirements.txt`
+- Follow `train.sh`
\ No newline at end of file
diff --git a/black.toml b/black.toml
new file mode 100644
index 0000000..d6600cc
--- /dev/null
+++ b/black.toml
@@ -0,0 +1,25 @@
+# Example configuration for Black.
+# NOTE: you have to use single-quoted strings in TOML for regular expressions.
+# It's the equivalent of r-strings in Python. Multiline strings are treated as
+# verbose regular expressions by Black. Use [ ] to denote a significant space
+# character.
+line-length = 119
+target-version = ['py35', 'py36', 'py37', 'py38']
+include = '\.pyi?$'
+exclude = '''
+ \.eggs
+ | \.git
+ | \.hg
+ | \.mypy_cache
+ | \.tox
+ | \.venv
+ | _build
+ | buck-out
+ | build
+ | dist
diff --git a/build_push_docker_37.cmd b/build_push_docker_37.cmd
new file mode 100644
index 0000000..eb1c37a
--- /dev/null
+++ b/build_push_docker_37.cmd
@@ -0,0 +1,5 @@
+set mydate=%date:~10,4%%date:~4,2%%date:~7,2%
+docker build -t xview2:37_pytorch14 -f Dockerfile-pytorch14-37 .
+docker tag xview2:37_pytorch14 ekhvedchenya/xview2:37_pytorch14_%mydate%
+START docker push ekhvedchenya/xview2:37_pytorch14_%mydate%
\ No newline at end of file
diff --git a/convert_crops.py b/convert_crops.py
new file mode 100644
index 0000000..0eb726e
--- /dev/null
+++ b/convert_crops.py
@@ -0,0 +1,116 @@
+import argparse
+import os
+import cv2
+from skimage.measure import label
+from tqdm import tqdm
+import pandas as pd
+from pytorch_toolbelt.utils import fs
+import numpy as np
+from xview.dataset import make_dual_dataframe, read_image
+from xview.utils.inference_image_output import create_inference_image, open_json, create_instance_image
+from PIL import Image
+def bbox1(img):
+ a = np.where(img != 0)
+ bbox = np.min(a[0]), np.max(a[0]) + 1, np.min(a[1]), np.max(a[1]) + 1
+ return bbox
+def convert_dir(df: pd.DataFrame, dir) -> pd.DataFrame:
+ crops_dir = os.path.join(dir, "crops")
+ os.makedirs(crops_dir, exist_ok=True)
+ building_crops = []
+ global_crop_index = 0
+ for i, row in tqdm(df.iterrows(), total=len(df)):
+ image_fname_pre = read_image(os.path.join(dir, row["image_fname_pre"]))
+ image_fname_post = read_image(os.path.join(dir, row["image_fname_post"]))
+ mask_fname_post = row["mask_fname_post"]
+ json_fname_post = fs.change_extension(mask_fname_post.replace("masks", "labels"), ".json")
+ inference_data = open_json(os.path.join(dir, json_fname_post))
+ instance_image, labels = create_instance_image(inference_data)
+ for label_index, damage_label in zip(range(1, instance_image.max() + 1), labels):
+ try:
+ instance_mask = instance_image == label_index
+ rmin, rmax, cmin, cmax = bbox1(instance_mask)
+ max_size = max(rmax - rmin, cmax - cmin)
+ if max_size < 16:
+ print(
+ "Skipping crop since it's too small",
+ fs.id_from_fname(mask_fname_post),
+ "label_index",
+ label_index,
+ "min_size",
+ max_size
+ )
+ continue
+ rpadding = (rmax - rmin) // 4
+ cpadding = (cmax - cmin) // 4
+ pre_crop = image_fname_pre[
+ max(0, rmin - rpadding) : rmax + rpadding, max(0, cmin - cpadding) : cmax + cpadding
+ ]
+ post_crop = image_fname_post[
+ max(0, rmin - rpadding) : rmax + rpadding, max(0, cmin - cpadding) : cmax + cpadding
+ ]
+ image_id_pre = row["image_id_pre"]
+ image_id_post = row["image_id_post"]
+ pre_crop_fname = f"{global_crop_index:06}_{image_id_pre}.png"
+ post_crop_fname = f"{global_crop_index:06}_{image_id_post}.png"
+ global_crop_index += 1
+ cv2.imwrite(os.path.join(crops_dir, pre_crop_fname), pre_crop)
+ cv2.imwrite(os.path.join(crops_dir, post_crop_fname), post_crop)
+ building_crops.append(
+ {
+ "pre_crop_fname": pre_crop_fname,
+ "post_crop": post_crop_fname,
+ "label": damage_label,
+ "event_name": row["event_name_post"],
+ "fold": row["fold_post"],
+ "rmin": rmin,
+ "rmax": rmax,
+ "cmin": cmin,
+ "cmax": cmax,
+ "max_size": max_size,
+ "rpadding": rpadding,
+ "cpadding": cpadding
+ }
+ )
+ except Exception as e:
+ print(e)
+ print(mask_fname_post)
+ df = pd.DataFrame.from_records(building_crops)
+ return df
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-dd", "--data-dir", type=str, default="c:\\datasets\\xview2")
+ args = parser.parse_args()
+ data_dir = args.data_dir
+ df = pd.read_csv(os.path.join(data_dir, "train_folds.csv"))
+ df = make_dual_dataframe(df)
+ df_crops = convert_dir(df, data_dir)
+ df_crops.to_csv(os.path.join(data_dir, "train_crops.csv"), index=None)
+if __name__ == "__main__":
+ main()
diff --git a/convert_masks.py b/convert_masks.py
new file mode 100644
index 0000000..0dc7b26
--- /dev/null
+++ b/convert_masks.py
@@ -0,0 +1,80 @@
+import argparse
+import os
+import pandas as pd
+from pytorch_toolbelt.utils import fs
+from skimage.measure import label
+from tqdm import tqdm
+from xview.utils.inference_image_output import create_inference_image
+def convert_dir(dir, folder):
+ jsons_dir = os.path.join(dir, "labels")
+ masks_dir = os.path.join(dir, "masks")
+ os.makedirs(masks_dir, exist_ok=True)
+ jsons = [fname for fname in fs.find_in_dir(jsons_dir) if fname.endswith(".json")]
+ items = []
+ for json_fname in tqdm(jsons):
+ mask_fname = os.path.join(masks_dir, fs.id_from_fname(json_fname) + ".png")
+ mask = create_inference_image(json_fname, mask_fname)
+ non_damaged_mask = mask == 1
+ light = mask == 2
+ medium = mask == 3
+ destroyed = mask == 4
+ non_damaged_pixels = non_damaged_mask.sum()
+ light_pixels = light.sum()
+ medium_pixels = medium.sum()
+ destroyed_pixels = destroyed.sum()
+ # guatemala-volcano_00000000_post_disaster
+ event_name, sample_id, event_type, disaster = fs.id_from_fname(json_fname).split("_")
+ assert disaster == "disaster"
+ image_id = fs.id_from_fname(json_fname)
+ items.append(
+ {
+ "image_fname": os.path.join(folder, "images", image_id + ".png"),
+ "mask_fname": os.path.join(folder, "masks", image_id + ".png"),
+ "folder": folder,
+ "image_id": image_id,
+ "event_name": event_name,
+ "sample_id": sample_id,
+ "event_type": event_type,
+ "non_damaged_pixels": non_damaged_pixels,
+ "light_damaged_pixels": light_pixels,
+ "medium_damaged_pixels": medium_pixels,
+ "destroyed_pixels": destroyed_pixels,
+ "non_damaged_buildings": label(non_damaged_mask, return_num=True)[1],
+ "light_damaged_buildings": label(light, return_num=True)[1],
+ "medium_damaged_buildings": label(medium, return_num=True)[1],
+ "destroyed_buildings": label(destroyed, return_num=True)[1],
+ }
+ )
+ return items
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-dd", "--data-dir", type=str, default="c:\\datasets\\xview2")
+ args = parser.parse_args()
+ data_dir = args.data_dir
+ train_dir = os.path.join(data_dir, "train")
+ tier3_dir = os.path.join(data_dir, "tier3")
+ items = []
+ items += convert_dir(train_dir, folder="train")
+ items += convert_dir(tier3_dir, folder="tier3")
+ df = pd.DataFrame.from_records(items)
+ df.to_csv(os.path.join(data_dir, "train.csv"), index=None)
+if __name__ == "__main__":
+ main()
diff --git a/docker_submission_37.py b/docker_submission_37.py
new file mode 100644
index 0000000..dab1e16
--- /dev/null
+++ b/docker_submission_37.py
@@ -0,0 +1,266 @@
+import argparse
+import os
+import time
+import albumentations as A
+import cv2
+import numpy as np
+import pandas as pd
+import torch
+from pytorch_toolbelt.utils import fs
+from pytorch_toolbelt.utils.torch_utils import tensor_from_rgb_image, to_numpy
+from xview.dataset import OUTPUT_MASK_KEY, read_image
+from xview.inference import model_from_checkpoint, ApplyWeights, MultiscaleTTA, HFlipTTA, Ensembler
+from xview.postprocessing import make_predictions_naive
+from xview.utils.inference_image_output import colorize_mask
+def weighted_model(checkpoint_fname: str, weights, activation: str):
+ model, info = model_from_checkpoint(
+ fs.auto_file(checkpoint_fname, where="ensemble"), activation_after=activation, report=False, classifiers=False
+ )
+ model = ApplyWeights(model, weights)
+ return model, info
+def main():
+ start = time.time()
+ torch.set_num_threads(1)
+ torch.set_num_interop_threads(1)
+ parser = argparse.ArgumentParser()
+ parser.add_argument("pre_image", type=str)
+ parser.add_argument("post_image", type=str)
+ parser.add_argument("loc_image", type=str)
+ parser.add_argument("dmg_image", type=str)
+ parser.add_argument("--raw", action="store_true")
+ parser.add_argument("--color-mask", action="store_true")
+ parser.add_argument("--gpu", action="store_true")
+ args = parser.parse_args()
+ pre_image = args.pre_image
+ post_image = args.post_image
+ localization_fname = args.loc_image
+ damage_fname = args.dmg_image
+ save_raw = args.raw
+ color_mask = args.color_mask
+ use_gpu = args.gpu
+ size = 1024
+ postprocess = "naive"
+ image_size = size, size
+ print("pre_image ", pre_image)
+ print("post_image ", post_image)
+ print("loc_image ", localization_fname)
+ print("dmg_image ", damage_fname)
+ print("Size ", image_size)
+ print("Postprocess ", postprocess)
+ print("Colorize ", color_mask)
+ raw_predictions_file = fs.change_extension(damage_fname, ".npy")
+ print("raw_predictions_file", raw_predictions_file)
+ print(*torch.__config__.show().split("\n"), sep="\n")
+ if not os.path.isdir(os.path.dirname(localization_fname)):
+ print("Output directory does not exists", localization_fname)
+ return -1
+ if not os.access(os.path.dirname(localization_fname), os.W_OK):
+ print("Output directory does not have write access", localization_fname)
+ return -2
+ if not os.path.isdir(os.path.dirname(damage_fname)):
+ print("Output directory does not exists", damage_fname)
+ return -1
+ if not os.access(os.path.dirname(damage_fname), os.W_OK):
+ print("Output directory does not have write access", damage_fname)
+ return -2
+ fold_0_models_dict = [
+ # (
+ # "Dec15_21_41_resnet101_fpncatv2_256_512_fold0_fp16_crops.pth",
+ # [0.45136154, 1.4482629, 1.42098208, 0.6839698, 0.96800456],
+ # ),
+ # (
+ # "Dec16_08_26_resnet34_unet_v2_512_fold0_fp16_crops.pth",
+ # [0.92919105, 1.03831743, 1.03017048, 0.98257118, 1.0241164],
+ # ),
+ # (
+ # "Dec21_21_54_densenet161_deeplab256_512_fold0_fp16_crops.pth",
+ # [0.48157651, 1.02084685, 1.36264406, 1.03175205, 1.11758873],
+ # ),
+ # 0.762814651939279 0.854002889559006 0.7237339786736817 [0.9186602573598759, 0.5420118318644089, 0.7123870673168781, 0.8405837378060299] coeffs [0.51244243 1.42747062 1.23648384 0.90290896 0.88912514]
+ (
+ "Dec30_15_34_resnet34_unet_v2_512_fold0_fp16_pseudo_crops.pth",
+ [0.51244243, 1.42747062, 1.23648384, 0.90290896, 0.88912514],
+ ),
+ # 0.7673669954814148 0.8582940771677703 0.7283982461872626 [0.919932857782992, 0.5413880912001547, 0.731840942842999, 0.8396640419159087] coeffs [0.50847073 1.15392272 1.2059733 1.1340391 1.03196719]
+ (
+ "Dec30_15_34_resnet101_fpncatv2_256_512_fold0_fp16_pseudo_crops.pth",
+ [0.50847073, 1.15392272, 1.2059733, 1.1340391, 1.03196719],
+ ),
+ ]
+ fold_1_models_dict = [
+ # (
+ # "Dec16_18_59_densenet201_fpncatv2_256_512_fold1_fp16_crops.pth",
+ # [0.64202075, 1.04641224, 1.23015655, 1.03203408, 1.12505602],
+ # ),
+ # (
+ # "Dec17_01_52_resnet34_unet_v2_512_fold1_fp16_crops.pth",
+ # [0.69605759, 0.89963168, 0.9232137, 0.92938775, 0.94460875],
+ # ),
+ (
+ "Dec22_22_24_seresnext50_unet_v2_512_fold1_fp16_crops.pth",
+ [0.54324459, 1.76890163, 1.20782899, 0.85128004, 0.83100698],
+ ),
+ (
+ "Dec31_02_09_resnet34_unet_v2_512_fold1_fp16_pseudo_crops.pth",
+ # Maybe suboptimal
+ [0.48269921, 1.22874469, 1.38328066, 0.96695393, 0.91348539],
+ ),
+ (
+ "Dec31_03_55_densenet201_fpncatv2_256_512_fold1_fp16_pseudo_crops.pth",
+ [0.48804137, 1.14809462, 1.24851827, 1.11798428, 1.00790482]
+ )
+ ]
+ fold_2_models_dict = [
+ # (
+ # "Dec17_19_19_resnet34_unet_v2_512_fold2_fp16_crops.pth",
+ # [0.65977938, 1.50252452, 0.97098732, 0.74048182, 1.08712367],
+ # ),
+ # 0.7674290884579319 0.8107652756500724 0.7488564368041575 [0.9228529822124596, 0.5900700454049471, 0.736806959757804, 0.8292099253270483] coeffs [0.34641084 1.63486251 1.14186036 0.86668715 1.12193125]
+ (
+ "Dec17_19_12_inceptionv4_fpncatv2_256_512_fold2_fp16_crops.pth",
+ [0.34641084, 1.63486251, 1.14186036, 0.86668715, 1.12193125],
+ ),
+ # 0.7683650436367244 0.8543981047493 0.7314937317313349 [0.9248137307721042, 0.5642011151253543, 0.7081016179096937, 0.831720163492164] coeffs [0.51277498 1.4475809 0.8296623 0.97868596 1.34180805]
+ (
+ "Dec27_14_08_densenet169_unet_v2_512_fold2_fp16_crops.pth",
+ [0.55429115, 1.34944309, 1.1087044, 0.89542089, 1.17257541],
+ ),
+ (
+ "Dec31_12_45_resnet34_unet_v2_512_fold2_fp16_pseudo_crops.pth",
+ # Copied from Dec17_19_19_resnet34_unet_v2_512_fold2_fp16_crops
+ [0.65977938, 1.50252452, 0.97098732, 0.74048182, 1.08712367],
+ )
+ ]
+ fold_3_models_dict = [
+ (
+ "Dec15_23_24_resnet34_unet_v2_512_fold3_crops.pth",
+ [0.84090623, 1.02953555, 1.2526516, 0.9298182, 0.94053529],
+ ),
+ # (
+ # "Dec18_12_49_resnet34_unet_v2_512_fold3_fp16_crops.pth",
+ # [0.55555375, 1.18287119, 1.10997173, 0.85927596, 1.18145368],
+ # ),
+ # (
+ # "Dec19_14_59_efficientb4_fpncatv2_256_512_fold3_fp16_crops.pth",
+ # [0.59338243, 1.17347438, 1.186104, 1.06860638, 1.03041829],
+ # ),
+ (
+ "Dec21_11_50_seresnext50_unet_v2_512_fold3_fp16_crops.pth",
+ [0.43108046, 1.30222898, 1.09660616, 0.94958969, 1.07063753],
+ ),
+ (
+ "Dec31_18_17_efficientb4_fpncatv2_256_512_fold3_fp16_pseudo_crops.pth",
+ # Copied from Dec19_14_59_efficientb4_fpncatv2_256_512_fold3_fp16_crops
+ [0.59338243, 1.17347438, 1.186104, 1.06860638, 1.03041829]
+ )
+ ]
+ fold_4_models_dict = [
+ (
+ "Dec19_06_18_resnet34_unet_v2_512_fold4_fp16_crops.pth",
+ [0.83915734, 1.02560309, 0.77639015, 1.17487775, 1.05632771],
+ ),
+ (
+ "Dec27_14_37_resnet101_unet_v2_512_fold4_fp16_crops.pth",
+ [0.57414314, 1.19599486, 1.05561912, 0.98815567, 1.2274592],
+ ),
+ ]
+ infos = []
+ resize = A.Resize(1024, 1024)
+ normalize = A.Normalize(mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225))
+ transform = A.Compose([resize, normalize])
+ # Very dumb way but it matches 1:1 with inferencing
+ pre, post = read_image(pre_image), read_image(post_image)
+ image = np.dstack([pre, post])
+ image = transform(image=image)["image"]
+ pre_image = image[..., 0:3]
+ post_image = image[..., 3:6]
+ models = []
+ for models_dict in [
+ fold_0_models_dict,
+ fold_1_models_dict,
+ fold_2_models_dict,
+ fold_3_models_dict,
+ fold_4_models_dict,
+ ]:
+ for checkpoint, weights in models_dict:
+ model, info = weighted_model(checkpoint, weights, activation="model")
+ models.append(model)
+ infos.append(info)
+ model = Ensembler(models, outputs=[OUTPUT_MASK_KEY])
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ model = model.eval()
+ df = pd.DataFrame.from_records(infos)
+ pd.set_option("display.max_rows", None)
+ pd.set_option("display.max_columns", None)
+ pd.set_option("display.width", None)
+ pd.set_option("display.max_colwidth", -1)
+ print(df)
+ print("score ", df["score"].mean(), df["score"].std())
+ print("localization ", df["localization"].mean(), df["localization"].std())
+ print("damage ", df["damage"].mean(), df["damage"].std())
+ input_image = tensor_from_rgb_image(np.dstack([pre_image, post_image])).unsqueeze(0)
+ if use_gpu:
+ print("Using GPU for inference")
+ input_image = input_image.cuda()
+ model = model.cuda()
+ output = model(input_image)
+ masks = output[OUTPUT_MASK_KEY]
+ predictions = to_numpy(masks.squeeze(0)).astype(np.float32)
+ if save_raw:
+ np.save(raw_predictions_file, predictions)
+ localization_image, damage_image = make_predictions_naive(predictions)
+ if color_mask:
+ localization_image = colorize_mask(localization_image)
+ localization_image.save(localization_fname)
+ damage_image = colorize_mask(damage_image)
+ damage_image.save(damage_fname)
+ else:
+ cv2.imwrite(localization_fname, localization_image)
+ cv2.imwrite(damage_fname, damage_image)
+ print("Saved output to ", localization_fname, damage_fname)
+ done = time.time()
+ elapsed = done - start
+ print("Inference time", elapsed, "(s)")
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/evaluate_postprocessing.py b/evaluate_postprocessing.py
new file mode 100644
index 0000000..c779db0
--- /dev/null
+++ b/evaluate_postprocessing.py
@@ -0,0 +1,102 @@
+import argparse
+import os
+from collections import defaultdict
+from functools import partial
+from multiprocessing.pool import Pool
+import cv2
+from tqdm import tqdm
+from xview.dataset import read_mask
+from xview.metric import CompetitionMetricCallback
+from xview.postprocessing import make_predictions_dominant, make_predictions_naive, make_predictions_floodfill
+from pytorch_toolbelt.utils import fs
+import pandas as pd
+from datetime import datetime
+import numpy as np
+def _compute_fn(args, postprocessing_fn):
+ xi, yi = args
+ dmg_pred = np.load(xi)
+ dmg_true = read_mask(yi)
+ loc_pred, dmg_pred = postprocessing_fn(dmg_pred)
+ if loc_pred.shape[0] != 1024:
+ loc_pred = cv2.resize(loc_pred, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST)
+ dmg_pred = cv2.resize(dmg_pred, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST)
+ row = CompetitionMetricCallback.get_row_pair(loc_pred, dmg_pred, dmg_true, dmg_true)
+ return row
+def optimize_postprocessing(y_pred_filenames, y_true_filenames, workers: int, postprocessing_fn):
+ input = list(zip(y_pred_filenames, y_true_filenames))
+ all_rows = []
+ process = partial(_compute_fn, postprocessing_fn=postprocessing_fn)
+ with Pool(workers) as wp:
+ for row in tqdm(wp.imap_unordered(process, input, chunksize=8), total=len(input)):
+ all_rows.append(row)
+ return CompetitionMetricCallback.compute_metrics(all_rows)
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("predictions", nargs="+")
+ parser.add_argument("-w", "--workers", type=int, default=0, help="")
+ parser.add_argument("-dd", "--data-dir", type=str, default="data", help="Data directory")
+ args = parser.parse_args()
+ targets = fs.find_in_dir(os.path.join(args.data_dir, "tier3", "masks")) + fs.find_in_dir(
+ os.path.join(args.data_dir, "train", "masks")
+ )
+ targets_post = dict((fs.id_from_fname(fname), fname) for fname in targets if "_post_" in fname)
+ df = defaultdict(list)
+ postprocessings = {
+ "naive": make_predictions_naive,
+ "dominant": make_predictions_dominant,
+ "floodfill": make_predictions_floodfill,
+ }
+ for predictions_dir in args.predictions:
+ try:
+ prediction_files = fs.find_in_dir(predictions_dir)
+ prediction_files_post = dict(
+ (fs.id_from_fname(fname), fname) for fname in prediction_files if "_post_" in fname
+ )
+ y_true_filenames = [targets_post[image_id_post] for image_id_post in prediction_files_post.keys()]
+ y_pred_filenames = [prediction_files_post[image_id_post] for image_id_post in prediction_files_post.keys()]
+ for name, fn in postprocessings.items():
+ score, localization_f1, damage_f1, damage_f1s = optimize_postprocessing(
+ y_pred_filenames, y_true_filenames, postprocessing_fn=fn, workers=args.workers
+ )
+ print(name, score)
+ df["samples"].append(len(y_pred_filenames))
+ df["predictions_dir"].append(predictions_dir)
+ df["postprocessing"].append(name)
+ df["score"].append(score)
+ df["localization_f1"].append(localization_f1)
+ df["damage_f1"].append(damage_f1)
+ except Exception as e:
+ print("Failed to process", predictions_dir, e)
+ df = pd.DataFrame.from_dict(df)
+ print(df)
+ current_time = datetime.now().strftime("%b%d_%H_%M")
+ df.to_csv(f"postprocessing_eval_{current_time}.csv", index=None)
+if __name__ == "__main__":
+ main()
diff --git a/finetune.py b/finetune.py
new file mode 100644
index 0000000..e8e9713
--- /dev/null
+++ b/finetune.py
@@ -0,0 +1,517 @@
+from __future__ import absolute_import
+import argparse
+import collections
+import json
+import os
+from datetime import datetime
+from catalyst.dl import SupervisedRunner, OptimizerCallback, SchedulerCallback
+from catalyst.dl.callbacks import CriterionAggregatorCallback, AccuracyCallback
+from catalyst.utils import load_checkpoint, unpack_checkpoint
+from pytorch_toolbelt.utils import fs, torch_utils
+from pytorch_toolbelt.utils.catalyst import ShowPolarBatchesCallback, ConfusionMatrixCallback
+from pytorch_toolbelt.utils.random import set_manual_seed
+from pytorch_toolbelt.utils.torch_utils import count_parameters, transfer_weights, get_optimizable_parameters
+from torch import nn
+from torch.optim.lr_scheduler import CyclicLR
+from torch.utils.data import DataLoader
+from xview.dataset import (
+ get_datasets,
+ get_pseudolabeling_dataset,
+from xview.metric import CompetitionMetricCallback
+from xview.models import get_model
+from xview.optim import get_optimizer
+from xview.scheduler import get_scheduler
+from xview.train_utils import clean_checkpoint, report_checkpoint, get_criterion_callback
+from xview.visualization import draw_predictions
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-acc", "--accumulation-steps", type=int, default=1, help="Number of batches to process")
+ parser.add_argument("--seed", type=int, default=42, help="Random seed")
+ parser.add_argument("-v", "--verbose", action="store_true")
+ parser.add_argument("--fast", action="store_true")
+ parser.add_argument(
+ "-dd", "--data-dir", type=str, required=True, help="Data directory for INRIA sattelite dataset"
+ )
+ parser.add_argument("-m", "--model", type=str, default="resnet34_fpncat128", help="")
+ parser.add_argument("-b", "--batch-size", type=int, default=8, help="Batch Size during training, e.g. -b 64")
+ parser.add_argument("-e", "--epochs", type=int, default=100, help="Epoch to run")
+ # parser.add_argument('-es', '--early-stopping', type=int, default=None, help='Maximum number of epochs without improvement')
+ # parser.add_argument('-fe', '--freeze-encoder', type=int, default=0, help='Freeze encoder parameters for N epochs')
+ # parser.add_argument('-ft', '--fine-tune', action='store_true')
+ parser.add_argument("-lr", "--learning-rate", type=float, default=1e-3, help="Initial learning rate")
+ parser.add_argument(
+ "--disaster-type-loss",
+ type=str,
+ default=None, # [["ce", 1.0]],
+ action="append",
+ nargs="+",
+ help="Criterion for classifying disaster type",
+ )
+ parser.add_argument(
+ "--damage-type-loss",
+ type=str,
+ default=None, # [["bce", 1.0]],
+ action="append",
+ nargs="+",
+ help="Criterion for classifying presence of building with particular damage type",
+ )
+ parser.add_argument("-l", "--criterion", type=str, default=None, action="append", nargs="+", help="Criterion")
+ parser.add_argument(
+ "--mask4", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 4"
+ )
+ parser.add_argument(
+ "--mask8", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 8"
+ )
+ parser.add_argument(
+ "--mask16", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 16"
+ )
+ parser.add_argument(
+ "--mask32", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 32"
+ )
+ parser.add_argument("--embedding", type=str, default=None)
+ parser.add_argument("-o", "--optimizer", default="RAdam", help="Name of the optimizer")
+ parser.add_argument(
+ "-c", "--checkpoint", type=str, default=None, help="Checkpoint filename to use as initial model weights"
+ )
+ parser.add_argument("-w", "--workers", default=8, type=int, help="Num workers")
+ parser.add_argument("-a", "--augmentations", default="safe", type=str, help="Level of image augmentations")
+ parser.add_argument("--transfer", default=None, type=str, help="")
+ parser.add_argument("--fp16", action="store_true")
+ parser.add_argument("--size", default=512, type=int)
+ parser.add_argument("--fold", default=0, type=int)
+ parser.add_argument("-s", "--scheduler", default="multistep", type=str, help="")
+ parser.add_argument("-x", "--experiment", default=None, type=str, help="")
+ parser.add_argument("-d", "--dropout", default=0.0, type=float, help="Dropout before head layer")
+ parser.add_argument("-pl", "--pseudolabeling", type=str, required=True)
+ parser.add_argument("-wd", "--weight-decay", default=0, type=float, help="L2 weight decay")
+ parser.add_argument("--show", action="store_true")
+ parser.add_argument("--dsv", action="store_true")
+ parser.add_argument("--balance", action="store_true")
+ parser.add_argument("--only-buildings", action="store_true")
+ parser.add_argument("--freeze-bn", action="store_true")
+ parser.add_argument("--crops", action="store_true", help="Train on random crops")
+ parser.add_argument("--post-transform", action="store_true")
+ args = parser.parse_args()
+ set_manual_seed(args.seed)
+ data_dir = args.data_dir
+ num_workers = args.workers
+ num_epochs = args.epochs
+ learning_rate = args.learning_rate
+ model_name = args.model
+ optimizer_name = args.optimizer
+ image_size = args.size, args.size
+ fast = args.fast
+ augmentations = args.augmentations
+ fp16 = args.fp16
+ scheduler_name = args.scheduler
+ experiment = args.experiment
+ dropout = args.dropout
+ segmentation_losses = args.criterion
+ verbose = args.verbose
+ show = args.show
+ accumulation_steps = args.accumulation_steps
+ weight_decay = args.weight_decay
+ fold = args.fold
+ balance = args.balance
+ only_buildings = args.only_buildings
+ freeze_bn = args.freeze_bn
+ train_on_crops = args.crops
+ enable_post_image_transform = args.post_transform
+ disaster_type_loss = args.disaster_type_loss
+ train_batch_size = args.batch_size
+ embedding_criterion = args.embedding
+ damage_type_loss = args.damage_type_loss
+ pseudolabels_dir = args.pseudolabeling
+ # Compute batch size for validaion
+ if train_on_crops:
+ valid_batch_size = max(1, (train_batch_size * (image_size[0] * image_size[1])) // (1024 ** 2))
+ else:
+ valid_batch_size = train_batch_size
+ run_train = num_epochs > 0
+ model: nn.Module = get_model(model_name, dropout=dropout).cuda()
+ if args.transfer:
+ transfer_checkpoint = fs.auto_file(args.transfer)
+ print("Transfering weights from model checkpoint", transfer_checkpoint)
+ checkpoint = load_checkpoint(transfer_checkpoint)
+ pretrained_dict = checkpoint["model_state_dict"]
+ transfer_weights(model, pretrained_dict)
+ if args.checkpoint:
+ checkpoint = load_checkpoint(fs.auto_file(args.checkpoint))
+ unpack_checkpoint(checkpoint, model=model)
+ print("Loaded model weights from:", args.checkpoint)
+ report_checkpoint(checkpoint)
+ if freeze_bn:
+ torch_utils.freeze_bn(model)
+ print("Freezing bn params")
+ runner = SupervisedRunner(input_key=INPUT_IMAGE_KEY, output_key=None)
+ main_metric = "weighted_f1"
+ cmd_args = vars(args)
+ current_time = datetime.now().strftime("%b%d_%H_%M")
+ checkpoint_prefix = f"{current_time}_{args.model}_{args.size}_fold{fold}"
+ if fp16:
+ checkpoint_prefix += "_fp16"
+ if fast:
+ checkpoint_prefix += "_fast"
+ if pseudolabels_dir:
+ checkpoint_prefix += "_pseudo"
+ if train_on_crops:
+ checkpoint_prefix += "_crops"
+ if experiment is not None:
+ checkpoint_prefix = experiment
+ log_dir = os.path.join("runs", checkpoint_prefix)
+ os.makedirs(log_dir, exist_ok=False)
+ config_fname = os.path.join(log_dir, f"{checkpoint_prefix}.json")
+ with open(config_fname, "w") as f:
+ train_session_args = vars(args)
+ f.write(json.dumps(train_session_args, indent=2))
+ default_callbacks = [
+ CompetitionMetricCallback(input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_KEY, prefix="weighted_f1"),
+ ConfusionMatrixCallback(
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_KEY,
+ class_names=["land", "no_damage", "minor_damage", "major_damage", "destroyed"],
+ ignore_index=UNLABELED_SAMPLE,
+ ),
+ ]
+ if show:
+ default_callbacks += [
+ ShowPolarBatchesCallback(draw_predictions, metric=main_metric + "_batch", minimize=False)
+ ]
+ train_ds, valid_ds, train_sampler = get_datasets(
+ data_dir=data_dir,
+ image_size=image_size,
+ augmentation=augmentations,
+ fast=fast,
+ fold=fold,
+ balance=balance,
+ only_buildings=only_buildings,
+ train_on_crops=train_on_crops,
+ crops_multiplication_factor=1,
+ enable_post_image_transform=enable_post_image_transform,
+ )
+ if run_train:
+ loaders = collections.OrderedDict()
+ callbacks = default_callbacks.copy()
+ criterions_dict = {}
+ losses = []
+ unlabeled_train = get_pseudolabeling_dataset(
+ data_dir,
+ include_masks=True,
+ image_size=image_size,
+ augmentation="medium_nmd",
+ train_on_crops=train_on_crops,
+ enable_post_image_transform=enable_post_image_transform,
+ pseudolabels_dir=pseudolabels_dir,
+ )
+ train_ds = train_ds + unlabeled_train
+ print("Using online pseudolabeling with ", len(unlabeled_train), "samples")
+ loaders["train"] = DataLoader(
+ train_ds,
+ batch_size=train_batch_size,
+ num_workers=num_workers,
+ pin_memory=True,
+ drop_last=True,
+ shuffle=True,
+ )
+ loaders["valid"] = DataLoader(valid_ds, batch_size=valid_batch_size, num_workers=num_workers, pin_memory=True)
+ # Create losses
+ for criterion in segmentation_losses:
+ if isinstance(criterion, (list, tuple)) and len(criterion) == 2:
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion[0], 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="segmentation",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(INPUT_MASK_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask4 is not None:
+ for criterion in args.mask4:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask4",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_4_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_4_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask8 is not None:
+ for criterion in args.mask8:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask8",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_8_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_8_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask16 is not None:
+ for criterion in args.mask16:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask16",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_16_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_16_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask32 is not None:
+ for criterion in args.mask32:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask32",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_32_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_32_KEY, "Using loss", loss_name, loss_weight)
+ if disaster_type_loss is not None:
+ callbacks += [
+ ConfusionMatrixCallback(
+ input_key=DISASTER_TYPE_KEY,
+ output_key=DISASTER_TYPE_KEY,
+ class_names=DISASTER_TYPES,
+ prefix=f"{DISASTER_TYPE_KEY}/confusion_matrix",
+ ),
+ AccuracyCallback(
+ input_key=DISASTER_TYPE_KEY,
+ output_key=DISASTER_TYPE_KEY,
+ prefix=f"{DISASTER_TYPE_KEY}/accuracy",
+ activation="Softmax",
+ ),
+ ]
+ for criterion in disaster_type_loss:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ input_key=DISASTER_TYPE_KEY,
+ output_key=DISASTER_TYPE_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(DISASTER_TYPE_KEY, "Using loss", loss_name, loss_weight)
+ if damage_type_loss is not None:
+ callbacks += [
+ # MultilabelConfusionMatrixCallback(
+ # input_key=DAMAGE_TYPE_KEY,
+ # output_key=DAMAGE_TYPE_KEY,
+ # class_names=DAMAGE_TYPES,
+ # prefix=f"{DAMAGE_TYPE_KEY}/confusion_matrix",
+ # ),
+ AccuracyCallback(
+ input_key=DAMAGE_TYPE_KEY,
+ output_key=DAMAGE_TYPE_KEY,
+ prefix=f"{DAMAGE_TYPE_KEY}/accuracy",
+ activation="Sigmoid",
+ threshold=0.5,
+ )
+ ]
+ for criterion in damage_type_loss:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ input_key=DAMAGE_TYPE_KEY,
+ output_key=DAMAGE_TYPE_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(DAMAGE_TYPE_KEY, "Using loss", loss_name, loss_weight)
+ if embedding_criterion is not None:
+ cd, criterion, criterion_name = get_criterion_callback(
+ embedding_criterion,
+ prefix="embedding",
+ input_key=INPUT_MASK_KEY,
+ loss_weight=1.0,
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_EMBEDDING_KEY, "Using loss", embedding_criterion)
+ callbacks += [
+ CriterionAggregatorCallback(prefix="loss", loss_keys=losses),
+ OptimizerCallback(accumulation_steps=accumulation_steps, decouple_weight_decay=False),
+ ]
+ optimizer = get_optimizer(
+ optimizer_name, get_optimizable_parameters(model), learning_rate, weight_decay=weight_decay
+ )
+ scheduler = get_scheduler(
+ scheduler_name, optimizer, lr=learning_rate, num_epochs=num_epochs, batches_in_epoch=len(loaders["train"])
+ )
+ if isinstance(scheduler, CyclicLR):
+ callbacks += [SchedulerCallback(mode="batch")]
+ print("Train session :", checkpoint_prefix)
+ print(" FP16 mode :", fp16)
+ print(" Fast mode :", args.fast)
+ print(" Epochs :", num_epochs)
+ print(" Workers :", num_workers)
+ print(" Data dir :", data_dir)
+ print(" Log dir :", log_dir)
+ print("Data ")
+ print(" Augmentations :", augmentations)
+ print(" Train size :", len(loaders["train"]), len(train_ds))
+ print(" Valid size :", len(loaders["valid"]), len(valid_ds))
+ print(" Image size :", image_size)
+ print(" Train on crops :", train_on_crops)
+ print(" Balance :", balance)
+ print(" Buildings only :", only_buildings)
+ print(" Post transform :", enable_post_image_transform)
+ print(" Pseudolabels :", pseudolabels_dir)
+ print("Model :", model_name)
+ print(" Parameters :", count_parameters(model))
+ print(" Dropout :", dropout)
+ print("Optimizer :", optimizer_name)
+ print(" Learning rate :", learning_rate)
+ print(" Weight decay :", weight_decay)
+ print(" Scheduler :", scheduler_name)
+ print(" Batch sizes :", train_batch_size, valid_batch_size)
+ print(" Criterion :", segmentation_losses)
+ print(" Damage type :", damage_type_loss)
+ print(" Disaster type :", disaster_type_loss)
+ print(" Embedding :", embedding_criterion)
+ # model training
+ runner.train(
+ fp16=fp16,
+ model=model,
+ criterion=criterions_dict,
+ optimizer=optimizer,
+ scheduler=scheduler,
+ callbacks=callbacks,
+ loaders=loaders,
+ logdir=os.path.join(log_dir, "opl"),
+ num_epochs=num_epochs,
+ verbose=verbose,
+ main_metric=main_metric,
+ minimize_metric=False,
+ checkpoint_data={"cmd_args": cmd_args},
+ )
+ # Training is finished. Let's run predictions using best checkpoint weights
+ best_checkpoint = os.path.join(log_dir, "main", "checkpoints", "best.pth")
+ model_checkpoint = os.path.join(log_dir, "main", "checkpoints", f"{checkpoint_prefix}.pth")
+ clean_checkpoint(best_checkpoint, model_checkpoint)
+ del optimizer, loaders
+if __name__ == "__main__":
+ main()
diff --git a/fit_predict.py b/fit_predict.py
new file mode 100644
index 0000000..0307e52
--- /dev/null
+++ b/fit_predict.py
@@ -0,0 +1,614 @@
+from __future__ import absolute_import
+import argparse
+import collections
+import gc
+import json
+import os
+from datetime import datetime
+import torch
+from catalyst.dl import SupervisedRunner, OptimizerCallback, SchedulerCallback
+from catalyst.dl.callbacks import CriterionAggregatorCallback, AccuracyCallback
+from catalyst.utils import load_checkpoint, unpack_checkpoint
+from pytorch_toolbelt.optimization.functional import get_lr_decay_parameters
+from pytorch_toolbelt.utils import fs, torch_utils
+from pytorch_toolbelt.utils.catalyst import ShowPolarBatchesCallback, ConfusionMatrixCallback
+from pytorch_toolbelt.utils.random import set_manual_seed
+from pytorch_toolbelt.utils.torch_utils import count_parameters, transfer_weights, get_optimizable_parameters
+from torch import nn
+from torch.optim.lr_scheduler import CyclicLR
+from torch.utils.data import DataLoader
+from xview.dataset import (
+ get_datasets,
+ get_pseudolabeling_dataset,
+from xview.metric import CompetitionMetricCallback
+from xview.models import get_model
+from xview.optim import get_optimizer
+from xview.pseudo import CEOnlinePseudolabelingCallback2d
+from xview.scheduler import get_scheduler
+from xview.train_utils import clean_checkpoint, report_checkpoint, get_criterion_callback
+from xview.visualization import draw_predictions
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-acc", "--accumulation-steps", type=int, default=1, help="Number of batches to process")
+ parser.add_argument("--seed", type=int, default=42, help="Random seed")
+ parser.add_argument("-v", "--verbose", action="store_true")
+ parser.add_argument("--fast", action="store_true")
+ parser.add_argument(
+ "-dd", "--data-dir", type=str, required=True, help="Data directory for INRIA sattelite dataset"
+ )
+ parser.add_argument("-m", "--model", type=str, default="resnet34_fpncat128", help="")
+ parser.add_argument("-b", "--batch-size", type=int, default=8, help="Batch Size during training, e.g. -b 64")
+ parser.add_argument("-e", "--epochs", type=int, default=100, help="Epoch to run")
+ # parser.add_argument('-es', '--early-stopping', type=int, default=None, help='Maximum number of epochs without improvement')
+ # parser.add_argument('-fe', '--freeze-encoder', type=int, default=0, help='Freeze encoder parameters for N epochs')
+ # parser.add_argument('-ft', '--fine-tune', action='store_true')
+ parser.add_argument("-lr", "--learning-rate", type=float, default=1e-3, help="Initial learning rate")
+ parser.add_argument(
+ "--disaster-type-loss",
+ type=str,
+ default=None, # [["ce", 1.0]],
+ action="append",
+ nargs="+",
+ help="Criterion for classifying disaster type",
+ )
+ parser.add_argument(
+ "--damage-type-loss",
+ type=str,
+ default=None, # [["bce", 1.0]],
+ action="append",
+ nargs="+",
+ help="Criterion for classifying presence of building with particular damage type",
+ )
+ parser.add_argument("-l", "--criterion", type=str, default=None, action="append", nargs="+", help="Criterion")
+ parser.add_argument("--mask4", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 4")
+ parser.add_argument("--mask8", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 8")
+ parser.add_argument("--mask16", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 16")
+ parser.add_argument("--mask32", type=str, default=None, action="append", nargs="+", help="Criterion for mask with stride 32")
+ parser.add_argument("--embedding", type=str, default=None)
+ parser.add_argument("-o", "--optimizer", default="RAdam", help="Name of the optimizer")
+ parser.add_argument(
+ "-c", "--checkpoint", type=str, default=None, help="Checkpoint filename to use as initial model weights"
+ )
+ parser.add_argument("-w", "--workers", default=8, type=int, help="Num workers")
+ parser.add_argument("-a", "--augmentations", default="safe", type=str, help="Level of image augmentations")
+ parser.add_argument("--transfer", default=None, type=str, help="")
+ parser.add_argument("--fp16", action="store_true")
+ parser.add_argument("--size", default=512, type=int)
+ parser.add_argument("--fold", default=0, type=int)
+ parser.add_argument("-s", "--scheduler", default="multistep", type=str, help="")
+ parser.add_argument("-x", "--experiment", default=None, type=str, help="")
+ parser.add_argument("-d", "--dropout", default=0.0, type=float, help="Dropout before head layer")
+ parser.add_argument("--opl", action="store_true")
+ parser.add_argument(
+ "--warmup", default=0, type=int, help="Number of warmup epochs with reduced LR on encoder parameters"
+ )
+ parser.add_argument("-wd", "--weight-decay", default=0, type=float, help="L2 weight decay")
+ parser.add_argument("--show", action="store_true")
+ parser.add_argument("--dsv", action="store_true")
+ parser.add_argument("--balance", action="store_true")
+ parser.add_argument("--only-buildings", action="store_true")
+ parser.add_argument("--freeze-bn", action="store_true")
+ parser.add_argument("--crops", action="store_true", help="Train on random crops")
+ parser.add_argument("--post-transform", action="store_true")
+ args = parser.parse_args()
+ set_manual_seed(args.seed)
+ data_dir = args.data_dir
+ num_workers = args.workers
+ num_epochs = args.epochs
+ learning_rate = args.learning_rate
+ model_name = args.model
+ optimizer_name = args.optimizer
+ image_size = args.size, args.size
+ fast = args.fast
+ augmentations = args.augmentations
+ fp16 = args.fp16
+ scheduler_name = args.scheduler
+ experiment = args.experiment
+ dropout = args.dropout
+ online_pseudolabeling = args.opl
+ segmentation_losses = args.criterion
+ verbose = args.verbose
+ warmup = args.warmup
+ show = args.show
+ accumulation_steps = args.accumulation_steps
+ weight_decay = args.weight_decay
+ fold = args.fold
+ balance = args.balance
+ only_buildings = args.only_buildings
+ freeze_bn = args.freeze_bn
+ train_on_crops = args.crops
+ enable_post_image_transform = args.post_transform
+ disaster_type_loss = args.disaster_type_loss
+ train_batch_size = args.batch_size
+ embedding_criterion = args.embedding
+ damage_type_loss = args.damage_type_loss
+ # Compute batch size for validaion
+ if train_on_crops:
+ valid_batch_size = max(1, (train_batch_size * (image_size[0] * image_size[1])) // (1024 ** 2))
+ else:
+ valid_batch_size = train_batch_size
+ run_train = num_epochs > 0
+ model: nn.Module = get_model(model_name, dropout=dropout).cuda()
+ if args.transfer:
+ transfer_checkpoint = fs.auto_file(args.transfer)
+ print("Transfering weights from model checkpoint", transfer_checkpoint)
+ checkpoint = load_checkpoint(transfer_checkpoint)
+ pretrained_dict = checkpoint["model_state_dict"]
+ transfer_weights(model, pretrained_dict)
+ if args.checkpoint:
+ checkpoint = load_checkpoint(fs.auto_file(args.checkpoint))
+ unpack_checkpoint(checkpoint, model=model)
+ print("Loaded model weights from:", args.checkpoint)
+ report_checkpoint(checkpoint)
+ if freeze_bn:
+ torch_utils.freeze_bn(model)
+ print("Freezing bn params")
+ runner = SupervisedRunner(input_key=INPUT_IMAGE_KEY, output_key=None)
+ main_metric = "weighted_f1"
+ cmd_args = vars(args)
+ current_time = datetime.now().strftime("%b%d_%H_%M")
+ checkpoint_prefix = f"{current_time}_{args.model}_{args.size}_fold{fold}"
+ if fp16:
+ checkpoint_prefix += "_fp16"
+ if fast:
+ checkpoint_prefix += "_fast"
+ if online_pseudolabeling:
+ checkpoint_prefix += "_opl"
+ if train_on_crops:
+ checkpoint_prefix += "_crops"
+ if experiment is not None:
+ checkpoint_prefix = experiment
+ log_dir = os.path.join("runs", checkpoint_prefix)
+ os.makedirs(log_dir, exist_ok=False)
+ config_fname = os.path.join(log_dir, f"{checkpoint_prefix}.json")
+ with open(config_fname, "w") as f:
+ train_session_args = vars(args)
+ f.write(json.dumps(train_session_args, indent=2))
+ default_callbacks = [
+ CompetitionMetricCallback(input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_KEY, prefix="weighted_f1"),
+ ConfusionMatrixCallback(
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_KEY,
+ class_names=["land", "no_damage", "minor_damage", "major_damage", "destroyed"],
+ ignore_index=UNLABELED_SAMPLE,
+ ),
+ ]
+ if show:
+ default_callbacks += [
+ ShowPolarBatchesCallback(draw_predictions, metric=main_metric + "_batch", minimize=False)
+ ]
+ train_ds, valid_ds, train_sampler = get_datasets(
+ data_dir=data_dir,
+ image_size=image_size,
+ augmentation=augmentations,
+ fast=fast,
+ fold=fold,
+ balance=balance,
+ only_buildings=only_buildings,
+ train_on_crops=train_on_crops,
+ enable_post_image_transform=enable_post_image_transform,
+ )
+ # Pretrain/warmup
+ if warmup:
+ callbacks = default_callbacks.copy()
+ criterions_dict = {}
+ losses = []
+ for criterion in segmentation_losses:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name, input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_KEY, loss_weight=float(loss_weight)
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print("Using loss", loss_name, loss_weight)
+ if args.mask4 is not None:
+ for criterion in args.mask4:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name, input_key=INPUT_MASK_KEY, output_key=OUTPUT_MASK_4_KEY, loss_weight=float(loss_weight)
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print("Using loss", loss_name, loss_weight)
+ callbacks += [
+ CriterionAggregatorCallback(prefix="loss", loss_keys=losses),
+ OptimizerCallback(accumulation_steps=accumulation_steps, decouple_weight_decay=False),
+ ]
+ parameters = get_lr_decay_parameters(model.named_parameters(), learning_rate, {"encoder": 0.1})
+ optimizer = get_optimizer("RAdam", parameters, learning_rate=learning_rate * 0.1)
+ loaders = collections.OrderedDict()
+ loaders["train"] = DataLoader(
+ train_ds,
+ batch_size=train_batch_size,
+ num_workers=num_workers,
+ pin_memory=True,
+ drop_last=True,
+ shuffle=train_sampler is None,
+ sampler=train_sampler,
+ )
+ loaders["valid"] = DataLoader(valid_ds, batch_size=valid_batch_size, num_workers=num_workers, pin_memory=True)
+ runner.train(
+ fp16=fp16,
+ model=model,
+ criterion=criterions_dict,
+ optimizer=optimizer,
+ scheduler=None,
+ callbacks=callbacks,
+ loaders=loaders,
+ logdir=os.path.join(log_dir, "warmup"),
+ num_epochs=warmup,
+ verbose=verbose,
+ main_metric=main_metric,
+ minimize_metric=False,
+ checkpoint_data={"cmd_args": cmd_args},
+ )
+ del optimizer, loaders
+ best_checkpoint = os.path.join(log_dir, "warmup", "checkpoints", "best.pth")
+ model_checkpoint = os.path.join(log_dir, "warmup", "checkpoints", f"{checkpoint_prefix}_warmup.pth")
+ clean_checkpoint(best_checkpoint, model_checkpoint)
+ torch.cuda.empty_cache()
+ gc.collect()
+ if run_train:
+ loaders = collections.OrderedDict()
+ callbacks = default_callbacks.copy()
+ criterions_dict = {}
+ losses = []
+ if online_pseudolabeling:
+ unlabeled_label = get_pseudolabeling_dataset(
+ data_dir, include_masks=False, image_size=image_size, augmentation=None
+ )
+ unlabeled_train = get_pseudolabeling_dataset(
+ data_dir,
+ include_masks=True,
+ image_size=image_size,
+ augmentation=augmentations,
+ train_on_crops=train_on_crops,
+ enable_post_image_transform=enable_post_image_transform,
+ )
+ loaders["label"] = DataLoader(
+ unlabeled_label, batch_size=valid_batch_size, num_workers=num_workers, pin_memory=True
+ )
+ train_ds = train_ds + unlabeled_train
+ train_sampler = None
+ callbacks += [
+ CEOnlinePseudolabelingCallback2d(
+ unlabeled_train,
+ pseudolabel_loader="label",
+ prob_threshold=0.75,
+ output_key=OUTPUT_MASK_KEY,
+ unlabeled_class=UNLABELED_SAMPLE,
+ label_frequency=5,
+ )
+ ]
+ print("Using online pseudolabeling with ", len(unlabeled_label), "samples")
+ loaders["train"] = DataLoader(
+ train_ds,
+ batch_size=train_batch_size,
+ num_workers=num_workers,
+ pin_memory=True,
+ drop_last=True,
+ shuffle=train_sampler is None,
+ sampler=train_sampler,
+ )
+ loaders["valid"] = DataLoader(valid_ds, batch_size=valid_batch_size, num_workers=num_workers, pin_memory=True)
+ # Create losses
+ for criterion in segmentation_losses:
+ if isinstance(criterion, (list, tuple)) and len(criterion) == 2:
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion[0], 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="segmentation",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(INPUT_MASK_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask4 is not None:
+ for criterion in args.mask4:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask4",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_4_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_4_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask8 is not None:
+ for criterion in args.mask8:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask8",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_8_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_8_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask16 is not None:
+ for criterion in args.mask16:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask16",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_16_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_16_KEY, "Using loss", loss_name, loss_weight)
+ if args.mask32 is not None:
+ for criterion in args.mask32:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ prefix="mask32",
+ input_key=INPUT_MASK_KEY,
+ output_key=OUTPUT_MASK_32_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_MASK_32_KEY, "Using loss", loss_name, loss_weight)
+ if disaster_type_loss is not None:
+ callbacks += [
+ ConfusionMatrixCallback(
+ input_key=DISASTER_TYPE_KEY,
+ output_key=DISASTER_TYPE_KEY,
+ class_names=DISASTER_TYPES,
+ prefix=f"{DISASTER_TYPE_KEY}/confusion_matrix",
+ ),
+ AccuracyCallback(
+ input_key=DISASTER_TYPE_KEY,
+ output_key=DISASTER_TYPE_KEY,
+ prefix=f"{DISASTER_TYPE_KEY}/accuracy",
+ activation="Softmax",
+ ),
+ ]
+ for criterion in disaster_type_loss:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ input_key=DISASTER_TYPE_KEY,
+ output_key=DISASTER_TYPE_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(DISASTER_TYPE_KEY, "Using loss", loss_name, loss_weight)
+ if damage_type_loss is not None:
+ callbacks += [
+ # MultilabelConfusionMatrixCallback(
+ # input_key=DAMAGE_TYPE_KEY,
+ # output_key=DAMAGE_TYPE_KEY,
+ # class_names=DAMAGE_TYPES,
+ # prefix=f"{DAMAGE_TYPE_KEY}/confusion_matrix",
+ # ),
+ AccuracyCallback(
+ input_key=DAMAGE_TYPE_KEY,
+ output_key=DAMAGE_TYPE_KEY,
+ prefix=f"{DAMAGE_TYPE_KEY}/accuracy",
+ activation="Sigmoid",
+ threshold=0.5,
+ )
+ ]
+ for criterion in damage_type_loss:
+ if isinstance(criterion, (list, tuple)):
+ loss_name, loss_weight = criterion
+ else:
+ loss_name, loss_weight = criterion, 1.0
+ cd, criterion, criterion_name = get_criterion_callback(
+ loss_name,
+ input_key=DAMAGE_TYPE_KEY,
+ output_key=DAMAGE_TYPE_KEY,
+ loss_weight=float(loss_weight),
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(DAMAGE_TYPE_KEY, "Using loss", loss_name, loss_weight)
+ if embedding_criterion is not None:
+ cd, criterion, criterion_name = get_criterion_callback(
+ embedding_criterion,
+ prefix="embedding",
+ input_key=INPUT_MASK_KEY,
+ loss_weight=1.0,
+ )
+ criterions_dict.update(cd)
+ callbacks.append(criterion)
+ losses.append(criterion_name)
+ print(OUTPUT_EMBEDDING_KEY, "Using loss", embedding_criterion)
+ callbacks += [
+ CriterionAggregatorCallback(prefix="loss", loss_keys=losses),
+ OptimizerCallback(accumulation_steps=accumulation_steps, decouple_weight_decay=False),
+ ]
+ optimizer = get_optimizer(
+ optimizer_name, get_optimizable_parameters(model), learning_rate, weight_decay=weight_decay
+ )
+ scheduler = get_scheduler(
+ scheduler_name, optimizer, lr=learning_rate, num_epochs=num_epochs, batches_in_epoch=len(loaders["train"])
+ )
+ if isinstance(scheduler, CyclicLR):
+ callbacks += [SchedulerCallback(mode="batch")]
+ print("Train session :", checkpoint_prefix)
+ print(" FP16 mode :", fp16)
+ print(" Fast mode :", args.fast)
+ print(" Epochs :", num_epochs)
+ print(" Workers :", num_workers)
+ print(" Data dir :", data_dir)
+ print(" Log dir :", log_dir)
+ print("Data ")
+ print(" Augmentations :", augmentations)
+ print(" Train size :", len(loaders["train"]), len(train_ds))
+ print(" Valid size :", len(loaders["valid"]), len(valid_ds))
+ print(" Image size :", image_size)
+ print(" Train on crops :", train_on_crops)
+ print(" Balance :", balance)
+ print(" Buildings only :", only_buildings)
+ print(" Post transform :", enable_post_image_transform)
+ print("Model :", model_name)
+ print(" Parameters :", count_parameters(model))
+ print(" Dropout :", dropout)
+ print("Optimizer :", optimizer_name)
+ print(" Learning rate :", learning_rate)
+ print(" Weight decay :", weight_decay)
+ print(" Scheduler :", scheduler_name)
+ print(" Batch sizes :", train_batch_size, valid_batch_size)
+ print(" Criterion :", segmentation_losses)
+ print(" Damage type :", damage_type_loss)
+ print(" Disaster type :", disaster_type_loss)
+ print(" Embedding :", embedding_criterion)
+ # model training
+ runner.train(
+ fp16=fp16,
+ model=model,
+ criterion=criterions_dict,
+ optimizer=optimizer,
+ scheduler=scheduler,
+ callbacks=callbacks,
+ loaders=loaders,
+ logdir=os.path.join(log_dir, "main"),
+ num_epochs=num_epochs,
+ verbose=verbose,
+ main_metric=main_metric,
+ minimize_metric=False,
+ checkpoint_data={"cmd_args": vars(args)},
+ )
+ # Training is finished. Let's run predictions using best checkpoint weights
+ best_checkpoint = os.path.join(log_dir, "main", "checkpoints", "best.pth")
+ model_checkpoint = os.path.join(log_dir, "main", "checkpoints", f"{checkpoint_prefix}.pth")
+ clean_checkpoint(best_checkpoint, model_checkpoint)
+ del optimizer, loaders
+if __name__ == "__main__":
+ main()
diff --git a/make_folds.py b/make_folds.py
new file mode 100644
index 0000000..9a19b14
--- /dev/null
+++ b/make_folds.py
@@ -0,0 +1,53 @@
+import argparse
+import os
+import pandas as pd
+from sklearn.model_selection import StratifiedKFold
+from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
+from sklearn.preprocessing import LabelEncoder
+import numpy as np
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-dd", "--data-dir", type=str, default="c:\\datasets\\xview2")
+ args = parser.parse_args()
+ data_dir = args.data_dir
+ df = pd.read_csv(os.path.join(data_dir, "train.csv"))
+ df = df.sort_values(by="sample_id")
+ df["fold"] = -1
+ df_pre = df[df["event_type"] == "pre"].copy()
+ df_post = df[df["event_type"] == "post"].copy()
+ # Use only post samples to split data
+ # destroyed_buildings,destroyed_pixels,event_name,event_type,light_damaged_buildings,light_damaged_pixels,medium_damaged_buildings,medium_damaged_pixels,non_damaged_buildings,non_damaged_pixels,sample_id
+ y = np.column_stack(
+ [
+ df_post["non_damaged_buildings"].values > 0,
+ df_post["light_damaged_buildings"].values > 0,
+ df_post["medium_damaged_buildings"].values > 0,
+ df_post["destroyed_buildings"].values > 0,
+ LabelEncoder().fit_transform(df_post["event_name"].tolist()),
+ ]
+ )
+ mskf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+ folds = np.ones(len(y), dtype=int) * -1
+ for fold, (train_index, test_index) in enumerate(mskf.split(df_post, y)):
+ folds[test_index] = fold
+ df_pre["fold"] = folds
+ df_post["fold"] = folds
+ df = pd.concat((df_pre, df_post))
+ df.to_csv(os.path.join(data_dir, "train_folds.csv"), index=None)
+if __name__ == "__main__":
+ main()
diff --git a/optimize_softmax.py b/optimize_softmax.py
new file mode 100644
index 0000000..3ce9f74
--- /dev/null
+++ b/optimize_softmax.py
@@ -0,0 +1,79 @@
+import argparse
+import os
+from collections import defaultdict
+import numpy as np
+from xview.rounder import OptimizedRounder
+from pytorch_toolbelt.utils import fs
+import pandas as pd
+from datetime import datetime
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("checkpoints", nargs="+")
+ parser.add_argument("-w", "--workers", type=int, default=1, help="")
+ parser.add_argument("-dd", "--data-dir", type=str, default="data", help="Data directory")
+ parser.add_argument("-a", "--activation", type=str, default="pre", help="")
+ args = parser.parse_args()
+ targets = fs.find_in_dir(os.path.join(args.data_dir, "tier3", "masks")) + fs.find_in_dir(
+ os.path.join(args.data_dir, "train", "masks")
+ )
+ targets_post = dict((fs.id_from_fname(fname), fname) for fname in targets if "_post_" in fname)
+ df = defaultdict(list)
+ current_time = datetime.now().strftime("%b%d_%H_%M")
+ print("Checkpoints ", args.checkpoints)
+ print("Activation ", args.activation)
+ for model_checkpoint in args.checkpoints:
+ model_checkpoint = fs.auto_file(model_checkpoint)
+ predictions_dir = os.path.join(
+ os.path.dirname(model_checkpoint), fs.id_from_fname(model_checkpoint) + "_oof_predictions"
+ )
+ prediction_files = fs.find_in_dir(predictions_dir)
+ prediction_files_post = dict(
+ (fs.id_from_fname(fname), fname) for fname in prediction_files if "_post_" in fname
+ )
+ y_true_filenames = [targets_post[image_id_post] for image_id_post in prediction_files_post.keys()]
+ y_pred_filenames = [prediction_files_post[image_id_post] for image_id_post in prediction_files_post.keys()]
+ rounder = OptimizedRounder(workers=args.workers, apply_softmax=args.activation)
+ raw_score, raw_localization_f1, raw_damage_f1, raw_damage_f1s = rounder.predict(
+ y_pred_filenames, y_true_filenames, np.array([1, 1, 1, 1, 1], dtype=np.float32)
+ )
+ rounder.fit(y_pred_filenames, y_true_filenames)
+ score, localization_f1, damage_f1, damage_f1s = rounder.predict(
+ y_pred_filenames, y_true_filenames, rounder.coefficients()
+ )
+ print(rounder.coefficients())
+ df["checkpoint"].append(fs.id_from_fname(model_checkpoint))
+ df["coefficients"].append(rounder.coefficients())
+ df["samples"].append(len(y_true_filenames))
+ df["raw_score"].append(raw_score)
+ df["raw_localization"].append(raw_localization_f1)
+ df["raw_damage"].append(raw_damage_f1)
+ df["opt_score"].append(score)
+ df["opt_localization"].append(localization_f1)
+ df["opt_damage"].append(damage_f1)
+ dataframe = pd.DataFrame.from_dict(df)
+ dataframe.to_csv(f"optimized_weights_{current_time}.csv", index=None)
+ print(df)
+if __name__ == "__main__":
+ main()
diff --git a/predict.py b/predict.py
new file mode 100644
index 0000000..e8331ed
--- /dev/null
+++ b/predict.py
@@ -0,0 +1,130 @@
+import argparse
+import os
+from collections import defaultdict
+from datetime import datetime
+import torch
+import pandas as pd
+from pytorch_toolbelt.utils import fs
+from xview.dataset import get_test_dataset, OUTPUT_MASK_KEY
+from xview.inference import Ensembler, model_from_checkpoint, run_inference_on_dataset, ApplySoftmaxTo, MultiscaleTTA, \
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("models", nargs="+")
+ parser.add_argument("-o", "--output-dir", type=str)
+ parser.add_argument("--fast", action="store_true")
+ parser.add_argument("--tta", type=str, default=None)
+ parser.add_argument("-b", "--batch-size", type=int, default=1, help="Batch Size during training, e.g. -b 64")
+ parser.add_argument("-w", "--workers", type=int, default=0, help="")
+ parser.add_argument("-dd", "--data-dir", type=str, default="data", help="Data directory")
+ parser.add_argument("-p", "--postprocessing", type=str, default="dominant")
+ parser.add_argument("--size", default=1024, type=int)
+ parser.add_argument("--activation", default="model", type=str)
+ parser.add_argument("--weights", default=None, type=float, nargs="+")
+ parser.add_argument("--fp16", action="store_true")
+ parser.add_argument("--align", action="store_true")
+ args = parser.parse_args()
+ workers = args.workers
+ data_dir = args.data_dir
+ fast = args.fast
+ tta = args.tta
+ image_size = args.size, args.size
+ model_checkpoints = args.models
+ batch_size = args.batch_size
+ activation_after = args.activation
+ fp16 = args.fp16
+ align = args.align
+ postprocessing=args.postprocessing
+ weights = args.weights
+ assert weights is None or len(weights) == 5
+ current_time = datetime.now().strftime("%b%d_%H_%M")
+ if args.output_dir is None and len(model_checkpoints) == 1:
+ output_dir = os.path.join(
+ os.path.dirname(model_checkpoints[0]), fs.id_from_fname(model_checkpoints[0]) + "_test_predictions"
+ )
+ if weights is not None:
+ output_dir += "_weighted"
+ if tta is not None:
+ output_dir += f"_{tta}"
+ else:
+ output_dir = args.output_dir or f"output_dir_{current_time}"
+ print("Size", image_size)
+ print("Output dir", output_dir)
+ print("Postproc ", postprocessing)
+ # Load models
+ models = []
+ infos = []
+ for model_checkpoint in model_checkpoints:
+ try:
+ model, info = model_from_checkpoint(
+ fs.auto_file(model_checkpoint), tta=None, activation_after=activation_after, report=False
+ )
+ models.append(model)
+ infos.append(info)
+ except Exception as e:
+ print(e)
+ print(model_checkpoint)
+ return
+ df = pd.DataFrame.from_records(infos)
+ print(df)
+ print("score ", df["score"].mean(), df["score"].std())
+ print("localization ", df["localization"].mean(), df["localization"].std())
+ print("damage ", df["damage"].mean(), df["damage"].std())
+ if len(models) > 1:
+ model = Ensembler(models, [OUTPUT_MASK_KEY])
+ if activation_after == "ensemble":
+ model = ApplySoftmaxTo(model, OUTPUT_MASK_KEY)
+ print("Applying activation after ensemble")
+ if tta == "multiscale":
+ print(f"Using {tta}")
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ if tta == "flip":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ if tta == "flipscale":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ if tta == "multiscale_d4":
+ print(f"Using {tta}")
+ model = D4TTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ if activation_after == "tta":
+ model = ApplySoftmaxTo(model, OUTPUT_MASK_KEY)
+ else:
+ model = models[0]
+ test_ds = get_test_dataset(data_dir=data_dir, image_size=image_size, fast=fast, align_post=align)
+ run_inference_on_dataset(
+ model=model,
+ dataset=test_ds,
+ output_dir=output_dir,
+ batch_size=batch_size,
+ workers=workers,
+ weights=weights,
+ fp16=fp16,
+ postprocessing=postprocessing,
+ )
+if __name__ == "__main__":
+ main()
diff --git a/predict_37_weighted.py b/predict_37_weighted.py
new file mode 100644
index 0000000..40874c7
--- /dev/null
+++ b/predict_37_weighted.py
@@ -0,0 +1,239 @@
+import argparse
+import pandas as pd
+import torch
+from pytorch_toolbelt.utils import fs
+from xview.dataset import OUTPUT_MASK_KEY, get_test_dataset
+from xview.inference import (
+ model_from_checkpoint,
+ ApplyWeights,
+ Ensembler,
+ ApplySoftmaxTo,
+ MultiscaleTTA,
+ HFlipTTA,
+ D4TTA,
+ run_inference_on_dataset,
+def weighted_model(checkpoint_fname: str, weights, activation: str):
+ model, info = model_from_checkpoint(fs.auto_file(checkpoint_fname, where="models"), activation_after=activation, report=False)
+ model = ApplyWeights(model, weights)
+ return model, info
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-o", "--output-dir", type=str, default="models/predict_37_weighted")
+ parser.add_argument("--tta", type=str, default=None)
+ parser.add_argument("-b", "--batch-size", type=int, default=1, help="Batch Size during training, e.g. -b 64")
+ parser.add_argument("-w", "--workers", type=int, default=0, help="")
+ parser.add_argument("-dd", "--data-dir", type=str, default="c:\\datasets\\xview2", help="Data directory")
+ parser.add_argument("-p", "--postprocessing", type=str, default=None)
+ parser.add_argument("--size", default=1024, type=int)
+ parser.add_argument("--activation", default="model", type=str)
+ parser.add_argument("--fp16", action="store_true")
+ args = parser.parse_args()
+ workers = args.workers
+ data_dir = args.data_dir
+ tta = args.tta
+ image_size = args.size, args.size
+ batch_size = args.batch_size
+ activation_after = args.activation
+ fp16 = args.fp16
+ postprocessing = args.postprocessing
+ output_dir = args.output_dir
+ print("Size ", image_size)
+ print("Output dir", output_dir)
+ print("Postproc ", postprocessing)
+ fold_0_models_dict = [
+ # (
+ # "Dec15_21_41_resnet101_fpncatv2_256_512_fold0_fp16_crops.pth",
+ # [0.45136154, 1.4482629, 1.42098208, 0.6839698, 0.96800456],
+ # ),
+ # (
+ # "Dec16_08_26_resnet34_unet_v2_512_fold0_fp16_crops.pth",
+ # [0.92919105, 1.03831743, 1.03017048, 0.98257118, 1.0241164],
+ # ),
+ # (
+ # "Dec21_21_54_densenet161_deeplab256_512_fold0_fp16_crops.pth",
+ # [0.48157651, 1.02084685, 1.36264406, 1.03175205, 1.11758873],
+ # ),
+ # 0.762814651939279 0.854002889559006 0.7237339786736817 [0.9186602573598759, 0.5420118318644089, 0.7123870673168781, 0.8405837378060299] coeffs [0.51244243 1.42747062 1.23648384 0.90290896 0.88912514]
+ (
+ "Dec30_15_34_resnet34_unet_v2_512_fold0_fp16_pseudo_crops.pth",
+ [0.51244243, 1.42747062, 1.23648384, 0.90290896, 0.88912514],
+ ),
+ # 0.7673669954814148 0.8582940771677703 0.7283982461872626 [0.919932857782992, 0.5413880912001547, 0.731840942842999, 0.8396640419159087] coeffs [0.50847073 1.15392272 1.2059733 1.1340391 1.03196719]
+ (
+ "Dec30_15_34_resnet101_fpncatv2_256_512_fold0_fp16_pseudo_crops.pth",
+ [0.50847073, 1.15392272, 1.2059733, 1.1340391, 1.03196719],
+ ),
+ ]
+ fold_1_models_dict = [
+ # (
+ # "Dec16_18_59_densenet201_fpncatv2_256_512_fold1_fp16_crops.pth",
+ # [0.64202075, 1.04641224, 1.23015655, 1.03203408, 1.12505602],
+ # ),
+ # (
+ # "Dec17_01_52_resnet34_unet_v2_512_fold1_fp16_crops.pth",
+ # [0.69605759, 0.89963168, 0.9232137, 0.92938775, 0.94460875],
+ # ),
+ (
+ "Dec22_22_24_seresnext50_unet_v2_512_fold1_fp16_crops.pth",
+ [0.54324459, 1.76890163, 1.20782899, 0.85128004, 0.83100698],
+ ),
+ (
+ "Dec31_02_09_resnet34_unet_v2_512_fold1_fp16_pseudo_crops.pth",
+ # Maybe suboptimal
+ [0.48269921, 1.22874469, 1.38328066, 0.96695393, 0.91348539],
+ ),
+ (
+ "Dec31_03_55_densenet201_fpncatv2_256_512_fold1_fp16_pseudo_crops.pth",
+ [0.48804137, 1.14809462, 1.24851827, 1.11798428, 1.00790482]
+ )
+ ]
+ fold_2_models_dict = [
+ # (
+ # "Dec17_19_19_resnet34_unet_v2_512_fold2_fp16_crops.pth",
+ # [0.65977938, 1.50252452, 0.97098732, 0.74048182, 1.08712367],
+ # ),
+ # 0.7674290884579319 0.8107652756500724 0.7488564368041575 [0.9228529822124596, 0.5900700454049471, 0.736806959757804, 0.8292099253270483] coeffs [0.34641084 1.63486251 1.14186036 0.86668715 1.12193125]
+ (
+ "Dec17_19_12_inceptionv4_fpncatv2_256_512_fold2_fp16_crops.pth",
+ [0.34641084, 1.63486251, 1.14186036, 0.86668715, 1.12193125],
+ ),
+ # 0.7683650436367244 0.8543981047493 0.7314937317313349 [0.9248137307721042, 0.5642011151253543, 0.7081016179096937, 0.831720163492164] coeffs [0.51277498 1.4475809 0.8296623 0.97868596 1.34180805]
+ (
+ "Dec27_14_08_densenet169_unet_v2_512_fold2_fp16_crops.pth",
+ [0.55429115, 1.34944309, 1.1087044, 0.89542089, 1.17257541],
+ ),
+ (
+ "Dec31_12_45_resnet34_unet_v2_512_fold2_fp16_pseudo_crops.pth",
+ # Copied from Dec17_19_19_resnet34_unet_v2_512_fold2_fp16_crops
+ [0.65977938, 1.50252452, 0.97098732, 0.74048182, 1.08712367],
+ )
+ ]
+ fold_3_models_dict = [
+ (
+ "Dec15_23_24_resnet34_unet_v2_512_fold3_crops.pth",
+ [0.84090623, 1.02953555, 1.2526516, 0.9298182, 0.94053529],
+ ),
+ # (
+ # "Dec18_12_49_resnet34_unet_v2_512_fold3_fp16_crops.pth",
+ # [0.55555375, 1.18287119, 1.10997173, 0.85927596, 1.18145368],
+ # ),
+ # (
+ # "Dec19_14_59_efficientb4_fpncatv2_256_512_fold3_fp16_crops.pth",
+ # [0.59338243, 1.17347438, 1.186104, 1.06860638, 1.03041829],
+ # ),
+ (
+ "Dec21_11_50_seresnext50_unet_v2_512_fold3_fp16_crops.pth",
+ [0.43108046, 1.30222898, 1.09660616, 0.94958969, 1.07063753],
+ ),
+ (
+ "Dec31_18_17_efficientb4_fpncatv2_256_512_fold3_fp16_pseudo_crops.pth",
+ # Copied from Dec19_14_59_efficientb4_fpncatv2_256_512_fold3_fp16_crops
+ [0.59338243, 1.17347438, 1.186104, 1.06860638, 1.03041829]
+ )
+ ]
+ fold_4_models_dict = [
+ (
+ "Dec19_06_18_resnet34_unet_v2_512_fold4_fp16_crops.pth",
+ [0.83915734, 1.02560309, 0.77639015, 1.17487775, 1.05632771],
+ ),
+ (
+ "Dec27_14_37_resnet101_unet_v2_512_fold4_fp16_crops.pth",
+ [0.57414314, 1.19599486, 1.05561912, 0.98815567, 1.2274592],
+ ),
+ ]
+ infos = []
+ models = []
+ for models_dict in [
+ fold_0_models_dict,
+ fold_1_models_dict,
+ fold_2_models_dict,
+ fold_3_models_dict,
+ fold_4_models_dict,
+ ]:
+ for checkpoint, weights in models_dict:
+ model, info = weighted_model(checkpoint, weights, activation_after)
+ models.append(model)
+ infos.append(info)
+ model = Ensembler(models, outputs=[OUTPUT_MASK_KEY])
+ df = pd.DataFrame.from_records(infos)
+ pd.set_option("display.max_rows", None)
+ pd.set_option("display.max_columns", None)
+ pd.set_option("display.width", None)
+ pd.set_option("display.max_colwidth", -1)
+ print(df)
+ print("score ", df["score"].mean(), df["score"].std())
+ print("localization ", df["localization"].mean(), df["localization"].std())
+ print("damage ", df["damage"].mean(), df["damage"].std())
+ if activation_after == "ensemble":
+ model = ApplySoftmaxTo(model, OUTPUT_MASK_KEY)
+ print("Applying activation after ensemble")
+ if tta == "multiscale":
+ print(f"Using {tta}")
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ if tta == "flip":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ if tta == "flipscale":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ if tta == "flipscale2":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-256, -128, +128, +256], average=True)
+ if tta == "multiscale_d4":
+ print(f"Using {tta}")
+ model = D4TTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-128, +128], average=True)
+ if tta is not None:
+ output_dir += "_" + tta
+ if activation_after == "tta":
+ model = ApplySoftmaxTo(model, OUTPUT_MASK_KEY)
+ print("Applying activation after TTA")
+ test_ds = get_test_dataset(data_dir=data_dir, image_size=image_size)
+ run_inference_on_dataset(
+ model=model,
+ dataset=test_ds,
+ output_dir=output_dir,
+ batch_size=batch_size,
+ workers=workers,
+ fp16=fp16,
+ postprocessing=postprocessing,
+ save_pseudolabels=False,
+ cpu=False
+ )
+if __name__ == "__main__":
+ main()
diff --git a/predict_oof.py b/predict_oof.py
new file mode 100644
index 0000000..c2677b7
--- /dev/null
+++ b/predict_oof.py
@@ -0,0 +1,106 @@
+import argparse
+import os
+import torch
+from pytorch_toolbelt.utils import fs
+from xview.dataset import get_datasets
+from xview.inference import model_from_checkpoint, run_inference_on_dataset_oof
+import numpy as np
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("model", type=str, nargs="+")
+ parser.add_argument("-o", "--output-dir", type=str, default=None)
+ parser.add_argument("--fast", action="store_true")
+ parser.add_argument("--tta", type=str, default=None)
+ parser.add_argument("-b", "--batch-size", type=int, default=1, help="Batch Size during training, e.g. -b 64")
+ parser.add_argument("-w", "--workers", type=int, default=0, help="")
+ parser.add_argument("-dd", "--data-dir", type=str, default="data", help="Data directory")
+ parser.add_argument("--size", default=1024, type=int)
+ parser.add_argument("--fold", default=None, type=int)
+ parser.add_argument("--no-save", action="store_true")
+ parser.add_argument("--fp16", action="store_true")
+ parser.add_argument("--activation", default="model", type=str)
+ parser.add_argument("--align", action="store_true")
+ args = parser.parse_args()
+ fp16 = args.fp16
+ activation = args.activation
+ average_score = []
+ average_dmg = []
+ average_loc = []
+ for model_checkpoint in args.model:
+ model_checkpoint = fs.auto_file(model_checkpoint)
+ checkpoint = torch.load(model_checkpoint)
+ print("Model :", model_checkpoint)
+ print(
+ "Metrics :",
+ checkpoint["epoch_metrics"]["valid"]["weighted_f1"],
+ checkpoint["epoch_metrics"]["valid"]["weighted_f1/localization_f1"],
+ checkpoint["epoch_metrics"]["valid"]["weighted_f1/damage_f1"],
+ )
+ workers = args.workers
+ data_dir = args.data_dir
+ fast = args.fast
+ tta = args.tta
+ no_save = args.no_save
+ image_size = args.size or checkpoint["checkpoint_data"]["cmd_args"]["size"]
+ batch_size = args.batch_size or checkpoint["checkpoint_data"]["cmd_args"]["batch_size"]
+ fold = args.fold or checkpoint["checkpoint_data"]["cmd_args"]["fold"]
+ align = args.align
+ print("Image size :", image_size)
+ print("Fold :", fold)
+ print("Align :", align)
+ print("Workers :", workers)
+ print("Save :", not no_save)
+ output_dir = None
+ if not no_save:
+ output_dir = args.output_dir or os.path.join(
+ os.path.dirname(model_checkpoint), fs.id_from_fname(model_checkpoint) + "_oof_predictions"
+ )
+ print("Output dir :", output_dir)
+ # Load models
+ model, info = model_from_checkpoint(model_checkpoint, tta=tta, activation_after=None, report=False)
+ print(info)
+ _, valid_ds, _ = get_datasets(data_dir=data_dir, image_size=(image_size, image_size), fast=fast, fold=fold, align_post=align)
+ score, localization_f1, damage_f1, damage_f1s = run_inference_on_dataset_oof(
+ model=model,
+ dataset=valid_ds,
+ output_dir=output_dir,
+ batch_size=batch_size,
+ workers=workers,
+ save=not no_save,
+ fp16=fp16
+ )
+ average_score.append(score)
+ average_dmg.append(damage_f1)
+ average_loc.append(localization_f1)
+ print("Score :", score)
+ print("Localization :", localization_f1)
+ print("Damage :", damage_f1)
+ print("Per class :", damage_f1s)
+ print()
+ print("Average")
+ if len(average_score) > 1:
+ print("Score :", np.mean(average_score), np.std(average_score))
+ print("Localization :", np.mean(average_loc), np.std(average_loc))
+ print("Damage :", np.mean(average_dmg), np.std(average_dmg))
+if __name__ == "__main__":
+ torch.backends.cudnn.benchmark = True
+ main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..727e0b1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
\ No newline at end of file
diff --git a/requirements_docker_pytorch14.txt b/requirements_docker_pytorch14.txt
new file mode 100644
index 0000000..1c81c6d
--- /dev/null
+++ b/requirements_docker_pytorch14.txt
@@ -0,0 +1,17 @@
+-f https://download.pytorch.org/whl/cpu/torch_stable.html
\ No newline at end of file
diff --git a/run_tensorboard.cmd b/run_tensorboard.cmd
new file mode 100644
index 0000000..184373d
--- /dev/null
+++ b/run_tensorboard.cmd
@@ -0,0 +1,3 @@
+@call c:\Anaconda3\Scripts\activate.bat tb
+tensorboard --logdir runs --host --port 5555
\ No newline at end of file
diff --git a/run_tensorboard.sh b/run_tensorboard.sh
new file mode 100644
index 0000000..c717e76
--- /dev/null
+++ b/run_tensorboard.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+tensorboard --logdir runs --host --port 5555
\ No newline at end of file
diff --git a/run_tensorboard_3389.sh b/run_tensorboard_3389.sh
new file mode 100644
index 0000000..290aee4
--- /dev/null
+++ b/run_tensorboard_3389.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+tensorboard --logdir runs --host --port 3389
\ No newline at end of file
diff --git a/test_docker_pytorch14_37.cmd b/test_docker_pytorch14_37.cmd
new file mode 100644
index 0000000..4f84579
--- /dev/null
+++ b/test_docker_pytorch14_37.cmd
@@ -0,0 +1,26 @@
+docker build -t xview2:37_pytorch14 -f Dockerfile-pytorch14-37 .
+docker tag xview2:37_pytorch14 ekhvedchenya/xview2:37_pytorch14
+docker run --rm --memory=7g --memory-swap=7g --memory-swappiness=0 --kernel-memory=7g --cpus=1^
+ -v j:\xview2\test\images:/input^
+ -v j:\xview2\test_predictions:/output^
+ ekhvedchenya/xview2:37_pytorch14^
+ /input/test_pre_00000.png /input/test_post_00000.png /output/test_localization_00000_pytorch14_v37.png /output/test_damage_00000_pytorch14_v37.png --color-mask --raw
+docker run --rm --memory=7g --memory-swap=7g --memory-swappiness=0 --kernel-memory=7g --cpus=1^
+ -v j:\xview2\test\images:/input^
+ -v j:\xview2\test_predictions:/output^
+ ekhvedchenya/xview2:37_pytorch14^
+ /input/test_pre_00284.png /input/test_post_00284.png /output/test_localization_00284_pytorch14_v37.png /output/test_damage_00284_pytorch14_v37.png --color-mask --raw
+docker run --rm --memory=7g --memory-swap=7g --memory-swappiness=0 --kernel-memory=7g --cpus=1^
+ -v j:\xview2\test\images:/input^
+ -v j:\xview2\test_predictions:/output^
+ ekhvedchenya/xview2:37_pytorch14^
+ /input/test_pre_00033.png /input/test_post_00033.png /output/test_localization_00033_pytorch14_v37.png /output/test_damage_00033_pytorch14_v37.png --color-mask --raw
+docker run --rm --memory=7g --memory-swap=7g --memory-swappiness=0 --kernel-memory=7g --cpus=1^
+ -v j:\xview2\test\images:/input^
+ -v j:\xview2\test_predictions:/output^
+ ekhvedchenya/xview2:37_pytorch14^
+ /input/test_pre_00096.png /input/test_post_00096.png /output/test_localization_00096_pytorch14_v37.png /output/test_damage_00096_pytorch14_v37.png --color-mask --raw
diff --git a/tests/guatemala-volcano_00000000_post_disaster.png b/tests/guatemala-volcano_00000000_post_disaster.png
new file mode 100644
index 0000000..e067211
Binary files /dev/null and b/tests/guatemala-volcano_00000000_post_disaster.png differ
diff --git a/tests/hurricane-florence_00000115_post_disaster.png b/tests/hurricane-florence_00000115_post_disaster.png
new file mode 100644
index 0000000..80e8b45
Binary files /dev/null and b/tests/hurricane-florence_00000115_post_disaster.png differ
diff --git a/tests/hurricane-florence_00000475_post_disaster.png b/tests/hurricane-florence_00000475_post_disaster.png
new file mode 100644
index 0000000..db9a9e8
Binary files /dev/null and b/tests/hurricane-florence_00000475_post_disaster.png differ
diff --git a/tests/post.png b/tests/post.png
new file mode 100644
index 0000000..c933a53
Binary files /dev/null and b/tests/post.png differ
diff --git a/tests/pre.png b/tests/pre.png
new file mode 100644
index 0000000..2b7aef8
Binary files /dev/null and b/tests/pre.png differ
diff --git a/tests/test_damage_00121_prediction.png b/tests/test_damage_00121_prediction.png
new file mode 100644
index 0000000..0cd8252
Binary files /dev/null and b/tests/test_damage_00121_prediction.png differ
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
new file mode 100644
index 0000000..e9165cd
--- /dev/null
+++ b/tests/test_dataset.py
@@ -0,0 +1,126 @@
+import cv2
+from xview.augmentations import old_post_transform_augs, light_post_image_transform, medium_post_transform_augs
+from xview.dataset import make_dual_dataframe
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+def test_dataset():
+ df = pd.read_csv("../train_folds.csv")
+ train_df = make_dual_dataframe(df)
+ non_damaged_buildings = train_df["non_damaged_buildings_post"].values
+ light_damaged_buildings = train_df["light_damaged_buildings_post"].values
+ medium_damaged_buildings = train_df["medium_damaged_buildings_post"].values
+ destroyed_buildings = train_df["destroyed_buildings_post"].values
+ non_damaged_pixels = train_df["non_damaged_pixels_post"].values
+ light_damaged_pixels = train_df["light_damaged_pixels_post"].values
+ medium_damaged_pixels = train_df["medium_damaged_pixels_post"].values
+ destroyed_pixels = train_df["destroyed_pixels_post"].values
+ print(
+ non_damaged_buildings.sum(),
+ light_damaged_buildings.sum(),
+ medium_damaged_buildings.sum(),
+ destroyed_buildings.sum(),
+ )
+ print(
+ (1024 * 1024) * len(train_df)
+ - non_damaged_pixels.sum()
+ - light_damaged_pixels.sum()
+ - medium_damaged_pixels.sum()
+ - destroyed_pixels.sum(),
+ non_damaged_pixels.sum(),
+ light_damaged_pixels.sum(),
+ medium_damaged_pixels.sum(),
+ destroyed_pixels.sum(),
+ )
+def test_post_transform():
+ image = cv2.imread("guatemala-volcano_00000000_post_disaster.png")
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+ post_transform = old_post_transform_augs()
+ image_acc = image.astype(np.long)
+ n = 1000
+ for i in range(n):
+ image_t = post_transform(image=image)["image"]
+ image_acc += image_t
+ image_acc = (image_acc * (1. / n)).astype(np.uint8)
+ plt.figure()
+ plt.imshow(image)
+ plt.show()
+ plt.figure()
+ plt.imshow(image_acc)
+ plt.show()
+def test_light_post_image_transform():
+ image = cv2.imread("guatemala-volcano_00000000_post_disaster.png")
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+ post_transform = light_post_image_transform()
+ image_acc = image.astype(np.long)
+ n = 1000
+ for i in range(n):
+ image_t = post_transform(image=image)["image"]
+ image_acc += image_t
+ image_acc = (image_acc * (1. / n)).astype(np.uint8)
+ plt.figure()
+ plt.imshow(image)
+ plt.show()
+ plt.figure()
+ plt.imshow(image_acc)
+ plt.show()
+def test_medium_post_transform_augs():
+ image = cv2.imread("guatemala-volcano_00000000_post_disaster.png")
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+ plt.figure()
+ plt.imshow(image)
+ plt.show()
+ post_transform = medium_post_transform_augs()
+ k = 10
+ for i in range(k):
+ image_t = post_transform(image=image)["image"]
+ plt.figure()
+ plt.imshow(image_t)
+ plt.show()
+ image_acc = image.astype(np.long)
+ n = 100
+ for i in range(n):
+ image_t = post_transform(image=image)["image"]
+ image_acc += image_t
+ image_acc = (image_acc * (1. / n)).astype(np.uint8)
+ plt.figure()
+ plt.imshow(image_acc)
+ plt.show()
diff --git a/tests/test_load_mask.py b/tests/test_load_mask.py
new file mode 100644
index 0000000..f6f8914
--- /dev/null
+++ b/tests/test_load_mask.py
@@ -0,0 +1,43 @@
+import cv2
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+from xview.dataset import read_mask
+from xview.utils.inference_image_output import resize_mask_one_hot
+def test_load_paletted():
+ fname = "d:\\datasets\\xview2\\train\\masks\\hurricane-harvey_00000402_post_disaster.png"
+ a = cv2.imread(fname)
+ b = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)
+ c = cv2.imread(fname, cv2.IMREAD_ANYCOLOR)
+ d = cv2.imread(fname, cv2.IMREAD_ANYCOLOR)
+ e = cv2.imread(fname, cv2.IMREAD_ANYDEPTH)
+ f = cv2.imread(fname, cv2.IMREAD_UNCHANGED)
+ g = np.array(Image.open(fname))
+ print(a.shape, np.unique(a))
+ print(b.shape, np.unique(b))
+ print(c.shape, np.unique(c))
+ print(d.shape, np.unique(d))
+ print(e.shape, np.unique(e))
+ print(f.shape, np.unique(f))
+ print(g.shape, np.unique(g))
+def test_mask_resize():
+ fname = "d:\\datasets\\xview2\\train\\masks\\hurricane-harvey_00000402_post_disaster.png"
+ mask = read_mask(fname)
+ mask2 = cv2.resize(mask, (512,512), interpolation=cv2.INTER_NEAREST)
+ mask3 = resize_mask_one_hot(mask, (512,512))
+ cv2.imshow("Original", mask * 255)
+ cv2.imshow("Nearest", mask2 * 255)
+ cv2.imshow("Smart", mask3 * 255)
+ cv2.waitKey(-1)
diff --git a/tests/test_localization_00121_prediction.png b/tests/test_localization_00121_prediction.png
new file mode 100644
index 0000000..04a2fcb
Binary files /dev/null and b/tests/test_localization_00121_prediction.png differ
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..dd5760d
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,157 @@
+import torch
+from pytorch_toolbelt.utils.torch_utils import count_parameters
+from torch import nn
+from xview.dataset import OUTPUT_MASK_KEY
+from xview.losses import ArcFaceLoss2d, OHEMCrossEntropyLoss
+from xview.models.deeplab import resnet34_deeplab128
+from xview.models.fpn_v2 import (
+ resnet101_fpncatv2_256,
+ densenet201_fpncatv2_256,
+ efficientb4_fpncatv2_256,
+ inceptionv4_fpncatv2_256,
+from xview.models.hrnet_arc import hrnet18_arc
+from xview.models.segcaps import SegCaps
+from xview.models.unet import resnet18_unet32
+from xview.models.unetv2 import inceptionv4_unet_v2, resnet101_unet_v2
+def test_ohem_ce():
+ x = torch.randn((8, 5, 128, 128)).cuda()
+ y = torch.randint(0, 5, (8, 128, 128)).long().cuda()
+ loss = OHEMCrossEntropyLoss()
+ l = loss(x, y)
+ print(l)
+def test_conv_transpose():
+ x = torch.randn((1, 32, 128, 128)).cuda()
+ module = nn.ConvTranspose2d(32, 5, kernel_size=8, stride=4, padding=2).cuda()
+ y = module(x)
+ print(y.size())
+def test_hrnet18_arc():
+ x = torch.randn((1, 6, 256, 256))
+ net = hrnet18_arc().eval()
+ out = net(x)
+ tgt = torch.randint(0, 5, (1, 256, 256)).long()
+ criterion = ArcFaceLoss2d()
+ loss = criterion(out[OUTPUT_MASK_KEY], tgt)
+ print(out)
+def test_resnet18_unet():
+ x = torch.randn((1, 6, 256, 256))
+ net = resnet18_unet32().eval()
+ print(count_parameters(net))
+ out = net(x)
+ print(out)
+def test_resnet34_deeplab128():
+ x = torch.randn((1, 6, 512, 512))
+ net = resnet34_deeplab128().eval()
+ print(count_parameters(net))
+ out = net(x)
+ print(out)
+def test_seg_caps():
+ net = SegCaps(num_classes=5)
+ print(count_parameters(net))
+ x = torch.randn((4, 3, 256, 256))
+ y = net(x)
+ print(y.size())
+def test_selim_unet():
+ from xview.models.selim.unet import DensenetUnet
+ d = DensenetUnet(5, backbone_arch="densenet121")
+ d.eval()
+ import numpy as np
+ with torch.no_grad():
+ images = torch.from_numpy(np.zeros((16, 3, 256, 256), dtype="float32"))
+ i = d(images)
+ print(i.shape)
+ print(d)
+def test_inception_unet_like_selim():
+ d = inceptionv4_unet_v2().cuda().eval()
+ print(count_parameters(d))
+ print(d.decoder.decoder_features)
+ print(d.decoder.bottlenecks)
+ print(d.decoder.decoder_stages)
+ images = torch.rand(4, 6, 512, 512).cuda()
+ i = d(images)
+ print(i[OUTPUT_MASK_KEY].size())
+def test_inception_unet_like_selim():
+ d = resnet101_unet_v2().cuda().eval()
+ print(count_parameters(d))
+ print(d.decoder.decoder_features)
+ print(d.decoder.bottlenecks)
+ print(d.decoder.decoder_stages)
+ images = torch.rand(4, 6, 512, 512).cuda()
+ i = d(images)
+ print(i[OUTPUT_MASK_KEY].size())
+def test_resnet101_fpncatv2_256():
+ d = resnet101_fpncatv2_256().cuda().eval()
+ print(count_parameters(d))
+ images = torch.rand(2, 6, 512, 512).cuda()
+ i = d(images)
+ print(i[OUTPUT_MASK_KEY].size())
+def test_densenet201_fpncatv2_256():
+ d = densenet201_fpncatv2_256().cuda().eval()
+ print(count_parameters(d))
+ images = torch.rand(4, 6, 512, 512).cuda()
+ i = d(images)
+ print(i[OUTPUT_MASK_KEY].size())
+def test_inceptionv4_fpncatv2_256():
+ d = inceptionv4_fpncatv2_256().cuda().eval()
+ print(count_parameters(d))
+ images = torch.rand(2, 6, 512, 512).cuda()
+ i = d(images)
+ print(i[OUTPUT_MASK_KEY].size())
+def test_efficientb4_fpncatv2_256():
+ d = efficientb4_fpncatv2_256().cuda().eval()
+ print(count_parameters(d))
+ images = torch.rand(4, 6, 512, 512).cuda()
+ i = d(images)
+ print(i[OUTPUT_MASK_KEY].size())
diff --git a/tests/test_post_00121.png b/tests/test_post_00121.png
new file mode 100644
index 0000000..4c8550f
Binary files /dev/null and b/tests/test_post_00121.png differ
diff --git a/tests/test_postprocessing.py b/tests/test_postprocessing.py
new file mode 100644
index 0000000..c304e8f
--- /dev/null
+++ b/tests/test_postprocessing.py
@@ -0,0 +1,63 @@
+import cv2
+import numpy as np
+from xview.dataset import read_mask
+import matplotlib.pyplot as plt
+from xview.postprocessing import make_predictions_floodfill, make_predictions_dominant_v2
+from xview.utils.inference_image_output import make_rgb_image
+import pytest
+@pytest.mark.parametrize(["actual", "expected"], [
+ ("hurricane-florence_00000115_post_disaster.npy", "hurricane-florence_00000115_post_disaster.png"),
+ ("hurricane-florence_00000475_post_disaster.npy", "hurricane-florence_00000475_post_disaster.png"),
+def test_watershed(actual, expected):
+ dmg = np.load(actual)
+ dmg_true = read_mask(expected)
+ loc_cls, dmg_cls = make_predictions_dominant_v2(dmg)
+ plt.figure()
+ plt.imshow(make_rgb_image(dmg_true))
+ plt.show()
+ plt.figure()
+ plt.imshow(make_rgb_image(np.argmax(dmg, axis=0)))
+ plt.show()
+ plt.figure()
+ plt.imshow(make_rgb_image(loc_cls))
+ plt.show()
+ plt.figure()
+ plt.imshow(make_rgb_image(dmg_cls))
+ plt.show()
+def test_watershed_with_image():
+ dmg = read_mask("test_damage_00121_prediction.png")
+ loc = read_mask("test_localization_00121_prediction.png")
+ img = cv2.imread("test_post_00121.png")
+ # Fix mask
+ dmg[loc == 0] = 0
+ seed = dmg.copy()
+ seed[loc == 0] = 0
+ markers = cv2.watershed(img, seed.astype(int))
+ markers[markers == 0] = 1
+ plt.figure()
+ plt.imshow(dmg)
+ plt.show()
+ plt.figure()
+ plt.imshow(loc)
+ plt.show()
+ plt.figure()
+ plt.imshow(markers)
+ plt.show()
diff --git a/tests/test_registration.py b/tests/test_registration.py
new file mode 100644
index 0000000..2b3f4e1
--- /dev/null
+++ b/tests/test_registration.py
@@ -0,0 +1,105 @@
+import pytest
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+from xview.alignment import align_post_image_pyramid
+def test_ecc():
+ pre = cv2.imread("d:\\datasets\\xview2\\train\\images\\guatemala-volcano_00000002_pre_disaster.png")
+ post = cv2.imread("d:\\datasets\\xview2\\train\\images\\guatemala-volcano_00000002_post_disaster.png")
+ warpMatrix = np.zeros((3, 3), dtype=np.float32)
+ warpMatrix[0, 0] = warpMatrix[1, 1] = warpMatrix[2, 2] = 1.0
+ stop_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.0001)
+ retval = False
+ try:
+ retval, warpMatrix = cv2.findTransformECC(
+ cv2.cvtColor(pre, cv2.COLOR_RGB2GRAY),
+ cv2.cvtColor(post, cv2.COLOR_RGB2GRAY),
+ warpMatrix,
+ stop_criteria,
+ None,
+ 5,
+ )
+ post_warped = cv2.warpPerspective(post, warpMatrix, dsize=(1024, 1024), flags=cv2.WARP_INVERSE_MAP)
+ except:
+ retval = False
+ post_warped = post.copy()
+ plt.figure()
+ plt.imshow(pre)
+ plt.show()
+ plt.figure()
+ plt.imshow(post)
+ plt.show()
+ plt.figure()
+ plt.imshow(post_warped)
+ plt.show()
+def test_ecc_pyramid():
+ pre = cv2.imread("c:\\datasets\\xview2\\train\\images\\guatemala-volcano_00000001_pre_disaster.png")
+ post = cv2.imread("c:\\datasets\\xview2\\train\\images\\guatemala-volcano_00000001_post_disaster.png")
+ post_warped = align_post_image_pyramid(pre, post)
+ plt.figure()
+ plt.imshow(pre)
+ plt.show()
+ plt.figure()
+ plt.imshow(post)
+ plt.show()
+ plt.figure()
+ plt.imshow(post_warped)
+ plt.show()
+def test_ecc_simple():
+ pre = cv2.imread("pre.png")
+ post = cv2.imread("post.png")
+ warpMatrix = np.zeros((3, 3), dtype=np.float32)
+ warpMatrix[0, 0] = warpMatrix[1, 1] = warpMatrix[2, 2] = 1.0
+ stop_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 0.0001)
+ retval = False
+ try:
+ retval, warpMatrix = cv2.findTransformECC(
+ cv2.cvtColor(pre, cv2.COLOR_RGB2GRAY),
+ cv2.cvtColor(post, cv2.COLOR_RGB2GRAY),
+ warpMatrix,
+ stop_criteria,
+ None,
+ 5,
+ )
+ post_warped = cv2.warpPerspective(post, warpMatrix, dsize=(256, 256), flags=cv2.WARP_INVERSE_MAP)
+ except:
+ retval = False
+ post_warped = post.copy()
+ plt.figure()
+ plt.imshow(pre)
+ plt.show()
+ plt.figure()
+ plt.imshow(post)
+ plt.show()
+ plt.figure()
+ plt.imshow(post_warped)
+ plt.show()
diff --git a/train.csv b/train.csv
new file mode 100644
index 0000000..6c971e8
--- /dev/null
+++ b/train.csv
@@ -0,0 +1,18337 @@
diff --git a/train.sh b/train.sh
new file mode 100644
index 0000000..c1f25fc
--- /dev/null
+++ b/train.sh
@@ -0,0 +1,99 @@
+# Step 0
+python convert_masks.py -dd $XVIEW2_DATA_DIR
+# Train base models (no pseudolabeling, no optimized weights)
+# Batch size tuned to fit into p3.8xlarge instance or 4x1080Ti
+# Estimated training time ~1 week
+# Step 1
+python fit_predict.py --seed 330 -dd $XVIEW2_DATA_DIR -x fold0_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 150 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 0 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 332 -dd $XVIEW2_DATA_DIR -x fold0_resnet101_fpncatv2_256 --model resnet101_fpncatv2_256 --batch-size 48 --epochs 150 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 0 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 13 -dd $XVIEW2_DATA_DIR -x fold1_seresnext50_unet_v2 --model seresnext50_unet_v2 --batch-size 32 --epochs 150 --learning-rate 0.001 --criterion [['weighted_ce', '1'], ['focal', '1']] -w 16 -a medium --fp16 --fold 1 --scheduler simple -wd 1e-05 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 331 -dd $XVIEW2_DATA_DIR -x fold1_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 150 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 1 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 1331 -dd $XVIEW2_DATA_DIR -x fold1_densenet201_fpncatv2_256 --model densenet201_fpncatv2_256 --batch-size 32 --epochs 150 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 1 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 333 -dd $XVIEW2_DATA_DIR -x fold2_inceptionv4_fpncatv2_256 --model inceptionv4_fpncatv2_256 --batch-size 48 --epochs 150 --learning-rate 0.001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 2 --scheduler poly -wd 1e-05 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 303 -dd $XVIEW2_DATA_DIR -x fold2_densenet169_unet_v2 --model densenet169_unet_v2 --batch-size 32 --epochs 150 --learning-rate 0.001 --criterion [['ohem_ce', '1']] -w 16 -a medium --fp16 --fold 2 --scheduler simple -wd 1e-05 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 332 -dd $XVIEW2_DATA_DIR -x fold2_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 150 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 2 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 50 -dd $XVIEW2_DATA_DIR -x fold3_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 16 --epochs 150 --learning-rate 0.0003 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 3 --scheduler cos -wd 1e-05 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 3 -dd $XVIEW2_DATA_DIR -x fold3_seresnext50_unet_v2 --model seresnext50_unet_v2 --batch-size 32 --epochs 150 --learning-rate 0.001 --criterion [['weighted_ce', '1'], ['focal', '1']] -w 16 -a medium --fp16 --fold 3 --scheduler simple -wd 1e-05 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 3334 -dd $XVIEW2_DATA_DIR -x fold3_efficientb4_fpncatv2_256 --model efficientb4_fpncatv2_256 --batch-size 32 --epochs 150 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 3 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 334 -dd $XVIEW2_DATA_DIR -x fold4_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 150 --learning-rate 0.001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 4 --scheduler cos -wd 1e-05 --only-buildings True --crops True --post-transform True
+python fit_predict.py --seed 133 -dd $XVIEW2_DATA_DIR -x fold4_resnet101_unet_v2 --model resnet101_unet_v2 --batch-size 40 --epochs 150 --learning-rate 0.001 --criterion [['weighted_ce', '1'], ['focal', '1']] -w 16 -a medium --fp16 --fold 4 --scheduler simple -wd 0.0001 --only-buildings True --crops True --post-transform True
+# Run inference on test dataset
+python predict.py -dd $XVIEW2_DATA_DIR -tta flipscale -p naive -o stage1_predictions -b 16 --fp16 \
+ fold0_resnet34_unet_v2.pth\
+ fold0_resnet101_fpncatv2_256.pth\
+ fold1_seresnext50_unet_v2.pth\
+ fold1_resnet34_unet_v2.pth\
+ fold1_densenet201_fpncatv2_256.pth\
+ fold2_inceptionv4_fpncatv2_256.pth\
+ fold2_densenet169_unet_v2.pth\
+ fold2_resnet34_unet_v2.pth\
+ fold3_resnet34_unet_v2.pth\
+ fold3_seresnext50_unet_v2.pth\
+ fold3_efficientb4_fpncatv2_256.pth\
+ fold4_resnet34_unet_v2.pth\
+ fold4_resnet101_unet_v2.pth
+# Step 2
+# Fine-tune using pseudo-label predictions
+# Estimated training time ~3-4 days
+python finetune.py --seed 330 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold0_pl_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 0 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 332 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold0_pl_resnet101_fpncatv2_256 --model resnet101_fpncatv2_256 --batch-size 48 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 0 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 13 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold1_pl_seresnext50_unet_v2 --model seresnext50_unet_v2 --batch-size 32 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1'], ['focal', '1']] -w 16 -a medium --fp16 --fold 1 --scheduler simple -wd 1e-05 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 331 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold1_pl_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 1 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 1331 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold1_pl_densenet201_fpncatv2_256 --model densenet201_fpncatv2_256 --batch-size 32 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 1 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 333 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold2_pl_inceptionv4_fpncatv2_256 --model inceptionv4_fpncatv2_256 --batch-size 48 --epochs 50 --learning-rate 0.001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 2 --scheduler poly -wd 1e-05 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 303 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold2_pl_densenet169_unet_v2 --model densenet169_unet_v2 --batch-size 32 --epochs 50 --learning-rate 0.0001 --criterion [['ohem_ce', '1']] -w 16 -a medium --fp16 --fold 2 --scheduler simple -wd 1e-05 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 332 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold2_pl_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 2 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 50 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold3_pl_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 16 --epochs 50 --learning-rate 0.0003 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 3 --scheduler cos -wd 1e-05 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 3 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold3_pl_seresnext50_unet_v2 --model seresnext50_unet_v2 --batch-size 32 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1'], ['focal', '1']] -w 16 -a medium --fp16 --fold 3 --scheduler simple -wd 1e-05 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 3334 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold3_pl_efficientb4_fpncatv2_256 --model efficientb4_fpncatv2_256 --batch-size 32 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 3 --scheduler cos -wd 0.0001 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 334 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold4_pl_resnet34_unet_v2 --model resnet34_unet_v2 --batch-size 64 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1']] -w 16 -a medium --fp16 --fold 4 --scheduler cos -wd 1e-05 --only-buildings True --crops True --post-transform True
+python finetune.py --seed 133 -dd $XVIEW2_DATA_DIR -pl stage1_predictions_pseudolabeling -x fold4_pl_resnet101_unet_v2 --model resnet101_unet_v2 --batch-size 40 --epochs 50 --learning-rate 0.0001 --criterion [['weighted_ce', '1'], ['focal', '1']] -w 16 -a medium --fp16 --fold 4 --scheduler simple -wd 0.0001 --only-buildings True --crops True --post-transform True
+# Make OOF predictions on fine-tuned models
+# This would require up to 1Tb to save raw masks in NPY format
+python predict_off.py -dd $XVIEW2_DATA_DIR\
+ fold0_pl_resnet34_unet_v2.pth\
+ fold0_pl_resnet101_fpncatv2_256.pth\
+ fold1_pl_seresnext50_unet_v2.pth\
+ fold1_pl_resnet34_unet_v2.pth\
+ fold1_pl_densenet201_fpncatv2_256.pth\
+ fold2_pl_inceptionv4_fpncatv2_256.pth\
+ fold2_pl_densenet169_unet_v2.pth\
+ fold2_pl_resnet34_unet_v2.pth\
+ fold3_pl_resnet34_unet_v2.pth\
+ fold3_pl_seresnext50_unet_v2.pth\
+ fold3_pl_efficientb4_fpncatv2_256.pth\
+ fold4_pl_resnet34_unet_v2.pth\
+ fold4_pl_resnet101_unet_v2.pth
+# Optimize per-class weights. As a result, you will get optimized_weights_%timestamp%.csv file
+# This is very CPU and IO consuming operation
+# Exhaustive search of optimal weights checkpoint may take up to several hours PER ONE checkpoint.
+python optimize_softmax.py -dd $XVIEW2_DATA_DIR\
+ fold0_pl_resnet34_unet_v2.pth\
+ fold0_pl_resnet101_fpncatv2_256.pth\
+ fold1_pl_seresnext50_unet_v2.pth\
+ fold1_pl_resnet34_unet_v2.pth\
+ fold1_pl_densenet201_fpncatv2_256.pth\
+ fold2_pl_inceptionv4_fpncatv2_256.pth\
+ fold2_pl_densenet169_unet_v2.pth\
+ fold2_pl_resnet34_unet_v2.pth\
+ fold3_pl_resnet34_unet_v2.pth\
+ fold3_pl_seresnext50_unet_v2.pth\
+ fold3_pl_efficientb4_fpncatv2_256.pth\
+ fold4_pl_resnet34_unet_v2.pth\
+ fold4_pl_resnet101_unet_v2.pth
diff --git a/train_folds.csv b/train_folds.csv
new file mode 100644
index 0000000..6da020a
--- /dev/null
+++ b/train_folds.csv
@@ -0,0 +1,18337 @@
diff --git a/xview/alignment.py b/xview/alignment.py
new file mode 100644
index 0000000..4d1b170
--- /dev/null
+++ b/xview/alignment.py
@@ -0,0 +1,91 @@
+import numpy as np
+import cv2
+def align_post_image(pre, post):
+ warpMatrix = np.zeros((3, 3), dtype=np.float32)
+ warpMatrix[0, 0] = warpMatrix[1, 1] = warpMatrix[2, 2] = 1.0
+ stop_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.0001)
+ retval = False
+ post_warped: np.ndarray = None
+ try:
+ retval, warpMatrix = cv2.findTransformECC(
+ cv2.cvtColor(pre, cv2.COLOR_RGB2GRAY),
+ cv2.cvtColor(post, cv2.COLOR_RGB2GRAY),
+ warpMatrix,
+ stop_criteria,
+ None,
+ 5,
+ )
+ post_warped = cv2.warpPerspective(post, warpMatrix, dsize=(1024, 1024), flags=cv2.WARP_INVERSE_MAP)
+ except:
+ retval = False
+ post_warped = post.copy()
+ return post_warped
+def align_post_image_pyramid(pre, post):
+ pre_pyrs = [cv2.cvtColor(pre, cv2.COLOR_RGB2GRAY)]
+ pre_pyrs.append(cv2.pyrDown(pre_pyrs[-1]))
+ pre_pyrs.append(cv2.pyrDown(pre_pyrs[-1]))
+ pre_pyrs.append(cv2.pyrDown(pre_pyrs[-1]))
+ post_pyrs = [cv2.cvtColor(post, cv2.COLOR_RGB2GRAY)]
+ post_pyrs.append(cv2.pyrDown(post_pyrs[-1]))
+ post_pyrs.append(cv2.pyrDown(post_pyrs[-1]))
+ post_pyrs.append(cv2.pyrDown(post_pyrs[-1]))
+ stop_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.0001)
+ warpMatrix = np.zeros((3, 3), dtype=np.float32)
+ warpMatrix[0, 0] = warpMatrix[1, 1] = warpMatrix[2, 2] = 1.0
+ scale_up = np.zeros((3, 3), dtype=np.float32)
+ scale_up[0, 0] = scale_up[1, 1] = 0.5
+ scale_up[2, 2] = 1.0
+ M = np.zeros((3, 3), dtype=np.float32)
+ M[0, 0] = M[1, 1] = M[2, 2] = 1.0
+ for pre_i, post_i in zip(reversed(pre_pyrs), reversed(post_pyrs)):
+ warpMatrix = np.zeros((3, 3), dtype=np.float32)
+ warpMatrix[0, 0] = warpMatrix[1, 1] = warpMatrix[2, 2] = 1.0
+ retval = False
+ post_i_refined = cv2.warpPerspective(post_i, M,
+ dsize=(post_i.shape[1], post_i.shape[0]),
+ flags=cv2.WARP_INVERSE_MAP)
+ try:
+ retval, warpMatrix = cv2.findTransformECC(
+ pre_i,
+ post_i_refined,
+ warpMatrix,
+ stop_criteria,
+ None,
+ 5,
+ )
+ if retval:
+ M = np.dot(warpMatrix, M)
+ # M = np.dot(np.dot(scale_up, warpMatrix), M)
+ # M = np.dot(np.dot(warpMatrix, scale_up), M)
+ # M = np.dot(M, np.dot(warpMatrix, scale_up))
+ # M = np.dot(M, np.dot(scale_up, warpMatrix))
+ except:
+ pass
+ post_warped = cv2.warpPerspective(post, M,
+ dsize=(post.shape[1], post.shape[0]),
+ flags=cv2.WARP_INVERSE_MAP)
+ return post_warped
diff --git a/xview/augmentations.py b/xview/augmentations.py
new file mode 100644
index 0000000..928a79c
--- /dev/null
+++ b/xview/augmentations.py
@@ -0,0 +1,221 @@
+from typing import Tuple
+import albumentations as A
+import cv2
+__all__ = [
+ "safe_color_augmentations",
+ "safe_spatial_augmentations",
+ "light_color_augmentations",
+ "light_spatial_augmentations",
+ "light_post_image_transform",
+ "medium_color_augmentations",
+ "medium_spatial_augmentations",
+ "medium_post_transform_augs",
+ "hard_spatial_augmentations",
+ "hard_color_augmentations",
+ "old_light_augmentations",
+ "old_post_transform_augs"
+def safe_color_augmentations():
+ return A.Compose([A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, brightness_by_max=True)])
+def safe_spatial_augmentations(image_size: Tuple[int, int]):
+ return A.Compose(
+ [
+ A.ShiftScaleRotate(
+ shift_limit=0.05,
+ scale_limit=0.1,
+ rotate_limit=5,
+ border_mode=cv2.BORDER_CONSTANT,
+ value=0,
+ mask_value=0,
+ ),
+ A.MaskDropout(10),
+ A.Compose([A.Transpose(), A.RandomRotate90()]),
+ ]
+ )
+def light_color_augmentations():
+ return A.Compose(
+ [
+ A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, brightness_by_max=True),
+ A.RandomGamma(gamma_limit=(90, 110)),
+ ]
+ )
+def light_spatial_augmentations(image_size: Tuple[int, int]):
+ return A.Compose(
+ [
+ A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT),
+ # D4 Augmentations
+ A.Compose([A.Transpose(), A.RandomRotate90()]),
+ # Spatial-preserving augmentations:
+ A.RandomBrightnessContrast(),
+ A.MaskDropout(max_objects=10),
+ ]
+ )
+def old_light_augmentations(image_size: Tuple[int, int]):
+ return A.Compose(
+ [
+ A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT),
+ # D4 Augmentations
+ A.Compose([A.Transpose(), A.RandomRotate90()]),
+ # Spatial-preserving augmentations:
+ A.RandomBrightnessContrast(),
+ A.ElasticTransform(border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0),
+ ]
+ )
+def light_post_image_transform():
+ return A.OneOf(
+ [
+ A.NoOp(),
+ A.Compose(
+ [
+ A.PadIfNeeded(1024 + 10, 1024 + 10, border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0),
+ A.RandomSizedCrop((1024 - 5, 1024 + 5), 1024, 1024),
+ ],
+ p=0.2,
+ ),
+ A.ShiftScaleRotate(
+ shift_limit=0.02,
+ rotate_limit=3,
+ scale_limit=0.02,
+ border_mode=cv2.BORDER_CONSTANT,
+ mask_value=0,
+ value=0,
+ p=0.2,
+ ),
+ ]
+ )
+def old_post_transform_augs():
+ return A.OneOf(
+ [
+ A.NoOp(),
+ A.Compose(
+ [
+ A.PadIfNeeded(1024 + 20, 1024 + 20, border_mode=cv2.BORDER_CONSTANT, value=0),
+ A.RandomSizedCrop((1024 - 10, 1024 + 10), 1024, 1024),
+ ],
+ p=0.2,
+ ),
+ A.ShiftScaleRotate(
+ shift_limit=0.0625, rotate_limit=3, scale_limit=0.05, border_mode=cv2.BORDER_CONSTANT, value=0, p=0.2
+ ),
+ ]
+ )
+def medium_post_transform_augs():
+ return A.OneOf(
+ [
+ A.NoOp(),
+ A.Compose(
+ [
+ A.PadIfNeeded(1024 + 40, 1024 + 40, border_mode=cv2.BORDER_CONSTANT, value=0),
+ A.RandomSizedCrop((1024 - 20, 1024 + 20), 1024, 1024),
+ ]
+ ),
+ A.ShiftScaleRotate(
+ shift_limit=0.1, rotate_limit=5, scale_limit=0.075, border_mode=cv2.BORDER_CONSTANT, value=0
+ ),
+ ]
+ )
+def medium_color_augmentations():
+ return A.Compose(
+ [
+ A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True),
+ A.RandomGamma(gamma_limit=(90, 110)),
+ A.OneOf(
+ [
+ A.NoOp(p=0.8),
+ A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10),
+ A.RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10),
+ ],
+ p=0.2,
+ ),
+ ]
+ )
+def medium_spatial_augmentations(image_size: Tuple[int, int], no_mask_dropout=False):
+ return A.Compose(
+ [
+ A.OneOf(
+ [
+ A.NoOp(p=0.8),
+ A.RandomGridShuffle(grid=(4, 4), p=0.2),
+ A.RandomGridShuffle(grid=(3, 3), p=0.2),
+ A.RandomGridShuffle(grid=(2, 2), p=0.2),
+ ], p=1
+ ),
+ A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT),
+ # D4 Augmentations
+ A.Compose([A.Transpose(), A.RandomRotate90()]),
+ # Spatial-preserving augmentations:
+ A.RandomBrightnessContrast(),
+ A.NoOp() if no_mask_dropout else A.MaskDropout(max_objects=10),
+ ]
+ )
+def hard_color_augmentations():
+ return A.Compose(
+ [
+ A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, brightness_by_max=True),
+ A.RandomGamma(gamma_limit=(90, 110)),
+ A.OneOf([A.NoOp(), A.MultiplicativeNoise(), A.GaussNoise(), A.ISONoise()]),
+ A.OneOf([A.RGBShift(), A.HueSaturationValue(), A.NoOp()]),
+ A.RandomFog(fog_coef_lower=0.05, fog_coef_upper=0.3),
+ ]
+ )
+def hard_spatial_augmentations(image_size: Tuple[int, int], rot_angle=45):
+ return A.Compose(
+ [
+ A.OneOf(
+ [
+ A.NoOp(),
+ A.RandomGridShuffle(grid=(4, 4)),
+ A.RandomGridShuffle(grid=(3, 3)),
+ A.RandomGridShuffle(grid=(2, 2)),
+ ]
+ ),
+ A.MaskDropout(max_objects=10),
+ A.OneOf(
+ [
+ A.ShiftScaleRotate(
+ scale_limit=0.1, rotate_limit=rot_angle, border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0
+ ),
+ A.NoOp(),
+ ]
+ ),
+ A.OneOf(
+ [
+ A.ElasticTransform(border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0),
+ A.GridDistortion(border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0),
+ A.NoOp(),
+ ]
+ ),
+ # D4
+ A.Compose([A.Transpose(), A.RandomRotate90()]),
+ ]
+ )
diff --git a/xview/averaging_rounder.py b/xview/averaging_rounder.py
new file mode 100644
index 0000000..a637e37
--- /dev/null
+++ b/xview/averaging_rounder.py
@@ -0,0 +1,99 @@
+from functools import partial
+from multiprocessing.pool import Pool
+import cv2
+import numpy as np
+import scipy as sp
+import torch
+from pytorch_toolbelt.utils.torch_utils import to_numpy
+from tqdm import tqdm
+from xview.dataset import read_mask
+from xview.metric import CompetitionMetricCallback
+from xview.postprocessing import make_predictions_naive
+def _compute_fn(args, coef_exp):
+ xi, dmg_true = args
+ loc_pred, dmg_pred = make_predictions_naive(xi.astype(np.float32) * coef_exp)
+ row = CompetitionMetricCallback.get_row_pair(loc_pred, dmg_pred, dmg_true, dmg_true)
+ return row
+class AveragingOptimizedRounder(object):
+ def __init__(self, apply_softmax, workers=0):
+ self.coef_ = 0
+ self.workers = workers
+ self.apply_softmax = apply_softmax
+ @torch.no_grad()
+ def _prepare_data(self, X, y):
+ X_data = []
+ n = len(X[0])
+ m = len(X)
+ for i in tqdm(range(n), desc="Loading predictions"):
+ x_preds = []
+ for j in range(m):
+ x = np.load(X[j][i])
+ if self.apply_softmax == "pre":
+ x = torch.from_numpy(x).float().softmax(dim=0).numpy().astype(np.float16)
+ x_preds.append(x)
+ x = np.mean(np.stack(x_preds), axis=0)
+ if self.apply_softmax == "post":
+ x = torch.from_numpy(x).float().softmax(dim=0).numpy().astype(np.float16)
+ X_data.append(x)
+ Y_data = [read_mask(yi) for yi in tqdm(y, desc="Loading ground-truths")]
+ assert len(X_data) == len(Y_data)
+ print("Loaded data into memory")
+ return X_data, Y_data
+ def _target_metric_loss(self, coef, X, y):
+ coef_exp = np.expand_dims(np.expand_dims(coef, -1), -1)
+ all_rows = []
+ proc_fn = partial(_compute_fn, coef_exp=coef_exp)
+ with Pool(self.workers) as wp:
+ for row in wp.imap_unordered(proc_fn, zip(X, y)):
+ all_rows.append(row)
+ score, localization_f1, damage_f1, damage_f1s = CompetitionMetricCallback.compute_metrics(all_rows)
+ print(score, localization_f1, damage_f1, damage_f1s, "coeffs", coef)
+ return 1.0 - score
+ def fit(self, X, y):
+ X_data, Y_data = self._prepare_data(X, y)
+ loss_partial = partial(self._target_metric_loss, X=X_data, y=Y_data)
+ initial_coef = [1.0, 1.0, 1.0, 1.0, 1.0]
+ self.coef_ = sp.optimize.minimize(
+ loss_partial, initial_coef, method="nelder-mead", options={"maxiter": 100, "xatol": 0.001}
+ )
+ del X_data, Y_data
+ return self.coefficients()
+ def predict(self, X, y, coef: np.ndarray):
+ coef_exp = np.expand_dims(np.expand_dims(coef, -1), -1)
+ all_rows = []
+ X_data, Y_data = self._prepare_data(X, y)
+ proc_fn = partial(_compute_fn, coef_exp=coef_exp)
+ with Pool(self.workers) as wp:
+ for row in wp.imap_unordered(proc_fn, zip(X_data, Y_data)):
+ all_rows.append(row)
+ score, localization_f1, damage_f1, damage_f1s = CompetitionMetricCallback.compute_metrics(all_rows)
+ del X_data, Y_data
+ return score, localization_f1, damage_f1, damage_f1s
+ def coefficients(self):
+ return self.coef_["x"]
diff --git a/xview/dataset.py b/xview/dataset.py
new file mode 100644
index 0000000..945e669
--- /dev/null
+++ b/xview/dataset.py
@@ -0,0 +1,608 @@
+import os
+from typing import List, Optional
+import albumentations as A
+import cv2
+import numpy as np
+import pandas as pd
+import torch
+from PIL import Image
+from pytorch_toolbelt.utils import fs
+from pytorch_toolbelt.utils.catalyst import PseudolabelDatasetMixin
+from pytorch_toolbelt.utils.torch_utils import tensor_from_rgb_image
+from scipy.ndimage import binary_dilation, binary_fill_holes
+from sklearn.utils import compute_sample_weight, compute_class_weight
+from torch.utils.data import Dataset, WeightedRandomSampler, ConcatDataset
+from .alignment import align_post_image
+from .augmentations import *
+from .utils.inference_image_output import colorize_mask
+INPUT_IMAGE_KEY = "image"
+INPUT_IMAGE_PRE_KEY = "image_pre"
+INPUT_IMAGE_POST_KEY = "image_post"
+INPUT_IMAGE_ID_KEY = "image_id"
+INPUT_MASK_KEY = "mask"
+INPUT_MASK_PRE_KEY = "mask_pre"
+INPUT_MASK_POST_KEY = "mask_post"
+OUTPUT_MASK_ARC_KEY = "mask_arc"
+OUTPUT_MASK_PRE_KEY = "mask_pre"
+OUTPUT_MASK_POST_KEY = "mask_post"
+INPUT_INDEX_KEY = "index"
+DISASTER_TYPE_KEY = "disaster_type"
+DAMAGE_TYPE_KEY = "damage_type"
+# Smaller masks for deep supervision
+OUTPUT_MASK_4_KEY = "mask_4"
+OUTPUT_MASK_8_KEY = "mask_8"
+OUTPUT_MASK_16_KEY = "mask_16"
+OUTPUT_MASK_32_KEY = "mask_32"
+OUTPUT_CLASS_KEY = "classes"
+DAMAGE_TYPES = ["no_damage", "minor_damage", "major_damage", "destroyed"]
+DISASTER_TYPES = ["volcano", "fire", "tornado", "tsunami", "flooding", "earthquake", "hurricane"]
+def get_disaster_class_from_fname(fname: str) -> int:
+ image_id = fs.id_from_fname(fname)
+ for i, disaster_name in enumerate(DISASTER_TYPES):
+ if disaster_name in image_id:
+ return i
+def read_image(fname):
+ image = cv2.imread(fname)
+ if image is None:
+ raise FileNotFoundError(fname)
+ return image
+def read_mask(fname):
+ mask = np.array(Image.open(fname)) # Read using PIL since it supports palletted image
+ if len(mask.shape) == 3:
+ mask = np.squeeze(mask, axis=-1)
+ return mask
+def compute_boundary_mask(mask: np.ndarray) -> np.ndarray:
+ dilated = binary_dilation(mask, structure=np.ones((5, 5), dtype=np.bool))
+ dilated = binary_fill_holes(dilated)
+ diff = dilated & ~mask
+ diff = cv2.dilate(diff, kernel=(5, 5))
+ diff = diff & ~mask
+ return diff.astype(np.uint8)
+class ImageLabelDataset(Dataset):
+ def __init__(
+ self,
+ pre_image_filenames: List[str],
+ post_image_filenames: List[str],
+ targets: Optional[np.ndarray],
+ spatial_transform: A.Compose,
+ color_transform: A.Compose = None,
+ post_image_transform=None,
+ image_loader=read_image,
+ ):
+ assert len(pre_image_filenames) == len(post_image_filenames)
+ self.pre_image_filenames = pre_image_filenames
+ self.post_image_filenames = post_image_filenames
+ self.targets = targets
+ self.get_image = image_loader
+ self.spatial_transform = spatial_transform
+ self.color_transform = color_transform
+ self.post_image_transform = post_image_transform
+ def __len__(self):
+ return len(self.pre_image_filenames)
+ def __getitem__(self, index):
+ pre_image = self.get_image(self.pre_image_filenames[index])
+ post_image = self.get_image(self.post_image_filenames[index])
+ if self.color_transform is not None:
+ pre_image = self.color_transform(image=pre_image)["image"]
+ post_image = self.color_transform(image=post_image)["image"]
+ if self.post_image_transform is not None:
+ post_image = self.post_image_transform(image=post_image)["image"]
+ image = np.dstack([pre_image, post_image])
+ data = {"image": image}
+ data = self.spatial_transform(**data)
+ sample = {
+ INPUT_IMAGE_ID_KEY: fs.id_from_fname(self.pre_image_filenames[index]),
+ INPUT_IMAGE_KEY: tensor_from_rgb_image(data["image"]),
+ }
+ if self.targets is not None:
+ target = int(self.targets[index])
+ sample[DAMAGE_TYPE_KEY] = target
+ return sample
+class ImageMaskDataset(Dataset, PseudolabelDatasetMixin):
+ def __init__(
+ self,
+ pre_image_filenames: List[str],
+ post_image_filenames: List[str],
+ post_mask_filenames: Optional[List[str]],
+ spatial_transform: A.Compose,
+ color_transform: A.Compose = None,
+ post_image_transform=None,
+ image_loader=read_image,
+ mask_loader=read_mask,
+ use_edges=False,
+ align_post=False,
+ ):
+ assert len(pre_image_filenames) == len(post_image_filenames)
+ self.use_edges = use_edges
+ self.pre_image_filenames = pre_image_filenames
+ self.post_image_filenames = post_image_filenames
+ self.post_mask_filenames = post_mask_filenames
+ self.get_image = image_loader
+ self.get_mask = mask_loader
+ self.spatial_transform = spatial_transform
+ self.color_transform = color_transform
+ self.post_image_transform = post_image_transform
+ self.align_post = align_post
+ def __len__(self):
+ return len(self.pre_image_filenames)
+ def __getitem__(self, index):
+ pre_image = self.get_image(self.pre_image_filenames[index])
+ post_image = self.get_image(self.post_image_filenames[index])
+ if self.align_post:
+ post_image = align_post_image(pre_image, post_image)
+ if self.color_transform is not None:
+ pre_image = self.color_transform(image=pre_image)["image"]
+ post_image = self.color_transform(image=post_image)["image"]
+ if self.post_image_transform is not None:
+ post_image = self.post_image_transform(image=post_image)["image"]
+ image = np.dstack([pre_image, post_image])
+ data = {"image": image}
+ if self.post_mask_filenames is not None:
+ post_mask = self.get_mask(self.post_mask_filenames[index])
+ # assert np.all((post_mask >= 0) & (post_mask < 5)), f"Mask for sample {index} {self.post_mask_filenames[index]} contains values {np.unique(post_mask)}"
+ data["mask"] = post_mask
+ else:
+ data["mask"] = np.ones(image.shape[:2], dtype=int) * UNLABELED_SAMPLE
+ data = self.spatial_transform(**data)
+ sample = {
+ INPUT_IMAGE_ID_KEY: fs.id_from_fname(self.pre_image_filenames[index]),
+ INPUT_IMAGE_KEY: tensor_from_rgb_image(data["image"]),
+ DISASTER_TYPE_KEY: get_disaster_class_from_fname(self.pre_image_filenames[index]),
+ }
+ if "mask" in data:
+ post_mask = data["mask"]
+ sample[INPUT_MASK_KEY] = torch.from_numpy(post_mask).long()
+ sample[DAMAGE_TYPE_KEY] = torch.tensor(
+ [(post_mask == 1).any(), (post_mask == 2).any(), (post_mask == 3).any(), (post_mask == 4).any()]
+ ).float()
+ return sample
+ def set_target(self, index: int, value: np.ndarray):
+ mask_fname = self.post_mask_filenames[index]
+ value = value.astype(np.uint8)
+ value = colorize_mask(value)
+ value.save(mask_fname)
+def get_transforms(image_size, augmentation, train_on_crops, enable_post_image_transform):
+ if train_on_crops:
+ train_crop_or_resize = A.RandomSizedCrop(
+ (int(image_size[0] * 0.8), int(image_size[0] * 1.2)), image_size[0], image_size[1]
+ )
+ valid_crop_or_resize = A.NoOp()
+ print("Training on crops", train_crop_or_resize.min_max_height)
+ else:
+ if image_size[0] != 1024 or image_size[1] != 1024:
+ train_crop_or_resize = A.Resize(image_size[0], image_size[1])
+ else:
+ train_crop_or_resize = A.NoOp()
+ valid_crop_or_resize = train_crop_or_resize
+ normalize = A.Normalize(
+ mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225)
+ )
+ train_spatial_augs = A.NoOp()
+ train_color_augs = None
+ # This transform slightly moves post- image to simulate imperfect alignment of satellite
+ post_image_transform = None
+ if augmentation == "hard":
+ train_color_augs = hard_color_augmentations()
+ train_spatial_augs = hard_spatial_augmentations(image_size)
+ post_image_transform = light_post_image_transform()
+ elif augmentation == "medium":
+ train_color_augs = medium_color_augmentations()
+ train_spatial_augs = medium_spatial_augmentations(image_size)
+ post_image_transform = medium_post_transform_augs()
+ elif augmentation == "medium_nmd":
+ train_color_augs = medium_color_augmentations()
+ train_spatial_augs = medium_spatial_augmentations(image_size, no_mask_dropout=True)
+ post_image_transform = medium_post_transform_augs()
+ elif augmentation == "light":
+ train_color_augs = light_color_augmentations()
+ train_spatial_augs = light_spatial_augmentations(image_size)
+ post_image_transform = light_post_image_transform()
+ elif augmentation == "old":
+ train_color_augs = None
+ train_spatial_augs = old_light_augmentations(image_size)
+ post_image_transform = old_post_transform_augs()
+ elif augmentation == "safe":
+ train_color_augs = safe_color_augmentations()
+ train_spatial_augs = safe_spatial_augmentations(image_size)
+ post_image_transform = old_post_transform_augs()
+ train_transform = A.Compose([train_crop_or_resize, train_spatial_augs, normalize])
+ valid_transform = A.Compose([valid_crop_or_resize, normalize])
+ if enable_post_image_transform:
+ print("Enabling post-image spatial transformation")
+ else:
+ post_image_transform = None
+ return train_transform, train_color_augs, valid_transform, post_image_transform
+def get_datasets(
+ data_dir: str,
+ image_size=(512, 512),
+ augmentation="safe",
+ use_edges=False,
+ sanity_check=False,
+ fast=False,
+ fold=0,
+ only_buildings=False,
+ balance=False,
+ train_on_crops=False,
+ enable_post_image_transform=False,
+ align_post=False,
+ crops_multiplication_factor=3
+ """
+ Create train and validation data loaders
+ :param data_dir: Inria dataset directory
+ :param fast: Fast training model. Use only one image per location for training and one image per location for validation
+ :param image_size: Size of image crops during training & validation
+ :param use_edges: If True, adds 'edge' target mask
+ :param augmentation: Type of image augmentations to use
+ :param train_mode:
+ 'random' - crops tiles from source images randomly.
+ 'tiles' - crop image in overlapping tiles (guaranteed to process entire dataset)
+ :return: (train_loader, valid_loader)
+ """
+ df = pd.read_csv(os.path.join(data_dir, "train_folds.csv"))
+ df = make_dual_dataframe(df)
+ train_transform, train_color_augs, valid_transform, post_image_transform = get_transforms(
+ image_size=image_size,
+ augmentation=augmentation,
+ train_on_crops=train_on_crops,
+ enable_post_image_transform=enable_post_image_transform,
+ )
+ train_df = df[df["fold_post"] != fold]
+ valid_df = df[df["fold_post"] == fold]
+ if only_buildings:
+ only_buildings_mask = train_df["non_damaged_buildings_pre"] > 0
+ total = len(train_df)
+ percentage = only_buildings_mask.sum() / float(total)
+ train_df = train_df[only_buildings_mask]
+ print("Using only images with buildings for training", percentage)
+ if fast:
+ train_df = train_df[:128]
+ valid_df = valid_df[:128]
+ train_sampler = None
+ train_img_pre = [os.path.join(data_dir, fname) for fname in train_df["image_fname_pre"]]
+ train_img_post = [os.path.join(data_dir, fname) for fname in train_df["image_fname_post"]]
+ train_mask_post = [os.path.join(data_dir, fname) for fname in train_df["mask_fname_post"]]
+ valid_img_pre = [os.path.join(data_dir, fname) for fname in valid_df["image_fname_pre"]]
+ valid_img_post = [os.path.join(data_dir, fname) for fname in valid_df["image_fname_post"]]
+ valid_mask_post = [os.path.join(data_dir, fname) for fname in valid_df["mask_fname_post"]]
+ trainset = ImageMaskDataset(
+ train_img_pre,
+ train_img_post,
+ train_mask_post,
+ use_edges=use_edges,
+ spatial_transform=train_transform,
+ color_transform=train_color_augs,
+ post_image_transform=post_image_transform,
+ )
+ validset = ImageMaskDataset(
+ valid_img_pre,
+ valid_img_post,
+ valid_mask_post,
+ use_edges=use_edges,
+ spatial_transform=valid_transform,
+ align_post=align_post
+ )
+ train_sampler = None
+ if balance:
+ # destroyed_buildings, destroyed_pixels, event_name, event_type, folder, image_fname, image_id, , light_damaged_pixels, mask_fname, , medium_damaged_pixels, , non_damaged_pixels, sample_id, fold
+ non_damaged_buildings = train_df["non_damaged_buildings_post"].values > 0
+ light_damaged_buildings = train_df["light_damaged_buildings_post"].values > 0
+ medium_damaged_buildings = train_df["medium_damaged_buildings_post"].values > 0
+ destroyed_buildings = train_df["destroyed_buildings_post"].values > 0
+ labels = (
+ non_damaged_buildings * 1
+ + light_damaged_buildings * 2
+ + medium_damaged_buildings * 4
+ + destroyed_buildings * 8
+ )
+ num_samples = 4 * min(
+ sum(non_damaged_buildings),
+ sum(light_damaged_buildings),
+ sum(medium_damaged_buildings),
+ sum(destroyed_buildings),
+ )
+ weights = compute_sample_weight("balanced", labels)
+ train_sampler = WeightedRandomSampler(weights, int(num_samples), replacement=bool(num_samples > len(train_df)))
+ print("Using balancing for training", num_samples, weights.min(), weights.mean(), weights.max())
+ elif train_on_crops:
+ # If we're training on crops, make 3 crops for each sample
+ trainset = ConcatDataset([trainset] * int(crops_multiplication_factor))
+ if sanity_check:
+ first_batch = [trainset[i] for i in range(32)]
+ return first_batch * 50, first_batch, None
+ return trainset, validset, train_sampler
+def make_dual_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+ columns_to_rename = [
+ "destroyed_buildings",
+ "destroyed_pixels",
+ "event_name",
+ "event_type",
+ "folder",
+ "image_fname",
+ "image_id",
+ "light_damaged_buildings",
+ "light_damaged_pixels",
+ "mask_fname",
+ "medium_damaged_buildings",
+ "medium_damaged_pixels",
+ "non_damaged_buildings",
+ "non_damaged_pixels",
+ "sample_id",
+ # "fold"
+ ]
+ df = df.sort_values(by=["image_id"])
+ df_pre = df[df["event_type"] == "pre"].copy().reset_index(drop=True)
+ df_post = df[df["event_type"] == "post"].copy().reset_index(drop=True)
+ df = df_pre.merge(df_post, left_index=True, right_index=True, suffixes=["_pre", "_post"])
+ return df
+def get_test_dataset(data_dir: str, image_size=(224, 224), use_edges=False, fast=False, align_post=False):
+ """
+ Create train and validation data loaders
+ :param data_dir: Inria dataset directory
+ :param fast: Fast training model. Use only one image per location for training and one image per location for validation
+ :param image_size: Size of image crops during training & validation
+ :param use_edges: If True, adds 'edge' target mask
+ :param augmentation: Type of image augmentations to use
+ :param train_mode:
+ 'random' - crops tiles from source images randomly.
+ 'tiles' - crop image in overlapping tiles (guaranteed to process entire dataset)
+ :return: (train_loader, valid_loader)
+ """
+ resize = A.Resize(image_size[0], image_size[1])
+ normalize = A.Normalize(
+ mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225)
+ )
+ valid_transform = A.Compose([resize, normalize])
+ test_images_post = [
+ fname
+ for fname in fs.find_images_in_dir(os.path.join(data_dir, "test", "images"))
+ if "post_" in fs.id_from_fname(fname)
+ ]
+ test_images_pre = [fname.replace("_post_", "_pre_") for fname in test_images_post]
+ if fast:
+ test_images_pre = test_images_pre[:128]
+ test_images_post = test_images_post[:128]
+ validset = ImageMaskDataset(
+ test_images_pre,
+ test_images_post,
+ None,
+ use_edges=use_edges,
+ spatial_transform=valid_transform,
+ align_post=align_post,
+ )
+ return validset
+def get_pseudolabeling_dataset(
+ data_dir,
+ image_size,
+ include_masks,
+ augmentation,
+ use_edges=False,
+ train_on_crops=False,
+ enable_post_image_transform=False,
+ pseudolabels_dir=None
+ train_transform, train_color_augs, valid_transform, post_image_transform = get_transforms(
+ image_size=image_size,
+ augmentation=augmentation,
+ train_on_crops=train_on_crops,
+ enable_post_image_transform=enable_post_image_transform,
+ )
+ images_dir = os.path.join(data_dir, "test", "images")
+ masks_dir = pseudolabels_dir or os.path.join(data_dir, "test", "masks")
+ os.makedirs(masks_dir, exist_ok=True)
+ test_images_post = [fname for fname in fs.find_images_in_dir(images_dir) if "_post_" in fs.id_from_fname(fname)]
+ test_images_pre = [fname.replace("_post_", "_pre_") for fname in test_images_post]
+ if include_masks:
+ test_masks_post = [os.path.join(masks_dir, os.path.basename(fname)) for fname in test_images_post]
+ else:
+ test_masks_post = None
+ validset = ImageMaskDataset(
+ test_images_pre,
+ test_images_post,
+ test_masks_post,
+ use_edges=use_edges,
+ color_transform=train_color_augs,
+ spatial_transform=train_transform,
+ post_image_transform=post_image_transform,
+ )
+ return validset
+def get_classification_datasets(
+ data_dir: str,
+ min_size=64,
+ image_size=(224, 224),
+ augmentation="safe",
+ sanity_check=False,
+ fast=False,
+ fold=0,
+ enable_post_image_transform=False,
+ """
+ Create train and validation data loaders
+ :param data_dir: Inria dataset directory
+ :param fast: Fast training model. Use only one image per location for training and one image per location for validation
+ :param image_size: Size of image crops during training & validation
+ :param use_edges: If True, adds 'edge' target mask
+ :param augmentation: Type of image augmentations to use
+ :param train_mode:
+ 'random' - crops tiles from source images randomly.
+ 'tiles' - crop image in overlapping tiles (guaranteed to process entire dataset)
+ :return: (train_loader, valid_loader)
+ """
+ resize_op = A.Compose(
+ [
+ A.LongestMaxSize(max(image_size[0], image_size[1])),
+ A.PadIfNeeded(image_size[0], image_size[1], border_mode=cv2.BORDER_CONSTANT, value=0),
+ ]
+ )
+ normalize = A.Normalize(
+ mean=(0.485, 0.456, 0.406, 0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225, 0.229, 0.224, 0.225)
+ )
+ df = pd.read_csv(os.path.join(data_dir, "train_crops.csv"))
+ post_transform = None
+ if augmentation == "safe":
+ augment = A.Compose([A.RandomRotate90(), A.Transpose()])
+ elif augmentation == "light":
+ augment = A.Compose([A.RandomRotate90(), A.Transpose()])
+ else:
+ print("Unsupported augmentation", augmentation)
+ augment = A.NoOp()
+ train_transform = A.Compose([resize_op, augment, normalize])
+ valid_transform = A.Compose([resize_op, normalize])
+ train_sampler = None
+ df = df[df["max_size"] >= min_size]
+ train_df = df[df["fold"] != fold]
+ valid_df = df[df["fold"] == fold]
+ if fast:
+ train_df = train_df[:128]
+ valid_df = valid_df[:128]
+ train_sampler = None
+ train_img_pre = [os.path.join(data_dir, "crops", fname) for fname in train_df["pre_crop_fname"]]
+ train_img_post = [os.path.join(data_dir, "crops", fname) for fname in train_df["post_crop"]]
+ train_targets = np.array(train_df["label"]) - 1 # Targets in CSV starting from 1
+ valid_img_pre = [os.path.join(data_dir, "crops", fname) for fname in valid_df["pre_crop_fname"]]
+ valid_img_post = [os.path.join(data_dir, "crops", fname) for fname in valid_df["post_crop"]]
+ valid_targets = np.array(valid_df["label"]) - 1 # Targets in CSV starting from 1
+ print(
+ "Sample weights (train,val)",
+ compute_class_weight("balanced", np.arange(len(DAMAGE_TYPES)), train_targets),
+ compute_class_weight("balanced", np.arange(len(DAMAGE_TYPES)), valid_targets),
+ )
+ trainset = ImageLabelDataset(
+ train_img_pre,
+ train_img_post,
+ train_targets,
+ spatial_transform=train_transform,
+ color_transform=None,
+ post_image_transform=None,
+ )
+ validset = ImageLabelDataset(valid_img_pre, valid_img_post, valid_targets, spatial_transform=valid_transform)
+ if sanity_check:
+ first_batch = [trainset[i] for i in range(32)]
+ return first_batch * 50, first_batch, None
+ return trainset, validset, train_sampler
diff --git a/xview/factory.py b/xview/factory.py
new file mode 100644
index 0000000..fdd87fe
--- /dev/null
+++ b/xview/factory.py
@@ -0,0 +1,146 @@
+from multiprocessing.pool import Pool
+from typing import List, Dict
+import albumentations as A
+import cv2
+import numpy as np
+import torch
+from pytorch_toolbelt.inference.tiles import CudaTileMerger, ImageSlicer
+from pytorch_toolbelt.inference.tta import TTAWrapper, fliplr_image2mask, d4_image2mask
+from pytorch_toolbelt.utils.torch_utils import tensor_from_rgb_image, to_numpy, rgb_image_from_tensor
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import Dataset, DataLoader
+from tqdm import tqdm
+class InMemoryDataset(Dataset):
+ def __init__(self, data: List[Dict], transform: A.Compose):
+ self.data = data
+ self.transform = transform
+ def __len__(self):
+ return len(self.data)
+ def __getitem__(self, item):
+ return self.transform(**self.data[item])
+def _tensor_from_rgb_image(image: np.ndarray, **kwargs):
+ return tensor_from_rgb_image(image)
+class PickModelOutput(nn.Module):
+ def __init__(self, model, key):
+ super().__init__()
+ self.model = model
+ self.target_key = key
+ def forward(self, input):
+ output = self.model(input)
+ return output[self.target_key]
+def predict(model: nn.Module, image: np.ndarray, image_size, normalize=A.Normalize(), batch_size=1) -> np.ndarray:
+ tile_step = (image_size[0] // 2, image_size[1] // 2)
+ tile_slicer = ImageSlicer(image.shape, image_size, tile_step)
+ tile_merger = CudaTileMerger(tile_slicer.target_shape, 1, tile_slicer.weight)
+ patches = tile_slicer.split(image)
+ transform = A.Compose([normalize, A.Lambda(image=_tensor_from_rgb_image)])
+ data = list(
+ {"image": patch, "coords": np.array(coords, dtype=np.int)}
+ for (patch, coords) in zip(patches, tile_slicer.crops)
+ )
+ for batch in DataLoader(InMemoryDataset(data, transform), pin_memory=True, batch_size=batch_size):
+ image = batch["image"].cuda(non_blocking=True)
+ coords = batch["coords"]
+ mask_batch = model(image)
+ tile_merger.integrate_batch(mask_batch, coords)
+ mask = tile_merger.merge()
+ mask = np.moveaxis(to_numpy(mask), 0, -1)
+ mask = tile_slicer.crop_to_orignal_size(mask)
+ return mask
+def __compute_ious(args):
+ thresholds = np.arange(0, 256)
+ gt, pred = args
+ gt = cv2.imread(gt) > 0 # Make binary {0,1}
+ pred = cv2.imread(pred)
+ pred_i = np.zeros_like(gt)
+ intersection = np.zeros(len(thresholds))
+ union = np.zeros(len(thresholds))
+ gt_sum = gt.sum()
+ for index, threshold in enumerate(thresholds):
+ np.greater(pred, threshold, out=pred_i)
+ union[index] += gt_sum + pred_i.sum()
+ np.logical_and(gt, pred_i, out=pred_i)
+ intersection[index] += pred_i.sum()
+ return intersection, union
+def optimize_threshold(gt_images, pred_images):
+ thresholds = np.arange(0, 256)
+ intersection = np.zeros(len(thresholds))
+ union = np.zeros(len(thresholds))
+ with Pool(32) as wp:
+ for i, u in tqdm(wp.imap_unordered(__compute_ious, zip(gt_images, pred_images)), total=len(gt_images)):
+ intersection += i
+ union += u
+ return thresholds, intersection / (union - intersection)
+def visualize_inria_predictions(
+ input: dict,
+ output: dict,
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225),
+ input_image_key=INPUT_IMAGE_KEY,
+ input_mask_key=INPUT_MASK_KEY,
+ input_image_id_key=INPUT_IMAGE_ID_KEY,
+ output_mask_key=OUTPUT_MASK_KEY,
+ images = []
+ for image, target, image_id, logits in zip(
+ input[input_image_key], input[input_mask_key], input[input_image_id_key], output[output_mask_key]
+ ):
+ image = rgb_image_from_tensor(image, mean, std)
+ target = to_numpy(target).squeeze(0)
+ logits = to_numpy(logits).squeeze(0)
+ overlay = np.zeros_like(image)
+ true_mask = target > 0
+ pred_mask = logits > 0
+ overlay[true_mask & pred_mask] = np.array(
+ [0, 250, 0], dtype=overlay.dtype
+ ) # Correct predictions (Hits) painted with green
+ overlay[true_mask & ~pred_mask] = np.array([250, 0, 0], dtype=overlay.dtype) # Misses painted with red
+ overlay[~true_mask & pred_mask] = np.array(
+ [250, 250, 0], dtype=overlay.dtype
+ ) # False alarm painted with yellow
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+ overlay = cv2.addWeighted(image, 0.5, overlay, 0.5, 0, dtype=cv2.CV_8U)
+ cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250))
+ images.append(overlay)
+ return images
diff --git a/xview/inference.py b/xview/inference.py
new file mode 100644
index 0000000..c9bcadc
--- /dev/null
+++ b/xview/inference.py
@@ -0,0 +1,500 @@
+import os
+import torch
+from typing import Optional, Dict, List, Tuple
+from pytorch_toolbelt.inference.tiles import CudaTileMerger, ImageSlicer
+from pytorch_toolbelt.utils import fs
+from torch.nn import functional as F
+from pytorch_toolbelt.utils.torch_utils import to_numpy
+from torch import nn
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import pytorch_toolbelt.inference.functional as AF
+from xview.dataset import (
+from xview.metric import CompetitionMetricCallback
+from xview.models import get_model
+from xview.postprocessing import (
+ make_predictions_dominant,
+ make_predictions_naive,
+ make_predictions_floodfill,
+ make_predictions_dominant_v2,
+ make_pseudolabeling_target,
+from xview.train_utils import report_checkpoint
+from xview.utils.inference_image_output import colorize_mask
+import numpy as np
+class ApplySigmoidTo(nn.Module):
+ def __init__(self, model, input_key="logits"):
+ super().__init__()
+ self.model = model
+ self.input_key = input_key
+ def forward(self, *input, **kwargs) -> Dict:
+ output = self.model(*input, **kwargs)
+ if self.input_key in output:
+ output[self.input_key] = output[self.input_key].sigmoid()
+ return output
+class ApplySoftmaxTo(nn.Module):
+ def __init__(self, model, input_key="logits"):
+ super().__init__()
+ self.model = model
+ self.input_key = input_key
+ def forward(self, *input, **kwargs) -> Dict:
+ output = self.model(*input, **kwargs)
+ if self.input_key in output:
+ output[self.input_key] = output[self.input_key].softmax(dim=1)
+ return output
+class HFlipTTA(nn.Module):
+ def __init__(self, model, outputs, average=True):
+ super().__init__()
+ self.model = model
+ self.outputs = outputs
+ self.average = average
+ def forward(self, image):
+ outputs = self.model(image)
+ outputs_flip = self.model(AF.torch_fliplr(image))
+ for output_key in self.outputs:
+ outputs[output_key] += AF.torch_fliplr(outputs_flip[output_key])
+ if self.average:
+ averaging_scale = 0.5
+ for output_key in self.outputs:
+ outputs[output_key] *= averaging_scale
+ return outputs
+class D4TTA(nn.Module):
+ def __init__(self, model, outputs, average=True):
+ super().__init__()
+ self.model = model
+ self.outputs = outputs
+ self.average = average
+ def forward(self, image):
+ outputs = self.model(image)
+ augment = [AF.torch_rot90, AF.torch_rot180, AF.torch_rot270]
+ deaugment = [AF.torch_rot270, AF.torch_rot180, AF.torch_rot90]
+ for aug, deaug in zip(augment, deaugment):
+ input = aug(image)
+ aug_output = self.model(input)
+ for output_key in self.outputs:
+ outputs[output_key] += deaug(aug_output[output_key])
+ image_t = AF.torch_transpose(image)
+ augment = [AF.torch_none, AF.torch_rot90, AF.torch_rot180, AF.torch_rot270]
+ deaugment = [AF.torch_none, AF.torch_rot270, AF.torch_rot180, AF.torch_rot90]
+ for aug, deaug in zip(augment, deaugment):
+ input = aug(image_t)
+ aug_output = self.model(input)
+ for output_key in self.outputs:
+ x = deaug(aug_output[output_key])
+ outputs[output_key] += AF.torch_transpose(x)
+ if self.average:
+ averaging_scale = 1.0 / 8.0
+ for output_key in self.outputs:
+ outputs[output_key] *= averaging_scale
+ return outputs
+class MultiscaleTTA(nn.Module):
+ def __init__(self, model, outputs, size_offsets: List[int], average=True):
+ super().__init__()
+ self.model = model
+ self.outputs = outputs
+ self.size_offsets = size_offsets
+ self.average = average
+ def integrate(self, outputs, input, augment, deaugment):
+ aug_input = augment(input)
+ aug_output = self.model(aug_input)
+ for output_key in self.outputs:
+ outputs[output_key] += deaugment(aug_output[output_key])
+ def forward(self, image):
+ outputs = self.model(image)
+ x_size_orig = image.size()[2:]
+ for image_size_offset in self.size_offsets:
+ x_size_modified = x_size_orig[0] + image_size_offset, x_size_orig[1] + image_size_offset
+ self.integrate(
+ outputs,
+ image,
+ lambda x: F.interpolate(x, size=x_size_modified, mode="bilinear", align_corners=False),
+ lambda x: F.interpolate(x, size=x_size_orig, mode="bilinear", align_corners=False),
+ )
+ if self.average:
+ averaging_scale = 1.0 / (len(self.size_offsets) + 1)
+ for output_key in self.outputs:
+ outputs[output_key] *= averaging_scale
+ return outputs
+class Ensembler(nn.Module):
+ def __init__(self, models: List[nn.Module], outputs: List[str]):
+ super().__init__()
+ self.models = nn.ModuleList(models)
+ self.outputs = outputs
+ def forward(self, *input, **kwargs):
+ num_models = len(self.models)
+ with tqdm(total=num_models, desc="Inference") as tq:
+ output_0 = self.models[0](*input, **kwargs)
+ tq.update()
+ for i in range(1, num_models):
+ output_i = self.models[i](*input, **kwargs)
+ tq.update()
+ # Aggregate predictions
+ for key in self.outputs:
+ output_0[key] += output_i[key]
+ scale = 1.0 / num_models
+ return {key: output_0[key] * scale for key in self.outputs}
+class ApplyWeights(nn.Module):
+ def __init__(self, model, weights, output_key=OUTPUT_MASK_KEY):
+ if not isinstance(weights, torch.Tensor):
+ weights = torch.tensor(weights).float().view(1, -1, 1, 1)
+ super().__init__()
+ self.model = model
+ self.register_buffer("weights", weights)
+ self.output_key = output_key
+ def forward(self, x):
+ output = self.model(x)
+ output[self.output_key] *= self.weights
+ return output
+def model_from_checkpoint(
+ model_checkpoint: str, tta: Optional[str] = None, activation_after="model", model=None, report=True, classifiers=True
+) -> Tuple[nn.Module, Dict]:
+ checkpoint = torch.load(model_checkpoint, map_location="cpu")
+ model_name = model or checkpoint["checkpoint_data"]["cmd_args"]["model"]
+ score = float(checkpoint["epoch_metrics"]["valid"]["weighted_f1"])
+ loc = float(checkpoint["epoch_metrics"]["valid"]["weighted_f1/localization_f1"])
+ dmg = float(checkpoint["epoch_metrics"]["valid"]["weighted_f1/damage_f1"])
+ fold = int(checkpoint["checkpoint_data"]["cmd_args"]["fold"])
+ if report:
+ print(model_checkpoint, model_name)
+ report_checkpoint(checkpoint)
+ model = get_model(model_name, pretrained=False, classifiers=classifiers)
+ model.load_state_dict(checkpoint["model_state_dict"], strict=False)
+ del checkpoint
+ if activation_after == "model":
+ model = ApplySoftmaxTo(model, OUTPUT_MASK_KEY)
+ if tta == "multiscale":
+ print(f"Using {tta}")
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-256, -128, +128, +256], average=True)
+ if tta == "flip":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ if tta == "flipscale":
+ print(f"Using {tta}")
+ model = HFlipTTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-256, -128, +128, +256], average=True)
+ if tta == "multiscale_d4":
+ print(f"Using {tta}")
+ model = D4TTA(model, outputs=[OUTPUT_MASK_KEY], average=True)
+ model = MultiscaleTTA(model, outputs=[OUTPUT_MASK_KEY], size_offsets=[-256, -128, +128, +256], average=True)
+ if activation_after == "tta":
+ model = ApplySoftmaxTo(model, OUTPUT_MASK_KEY)
+ info = {
+ "model": fs.id_from_fname(model_checkpoint),
+ "model_name": model_name,
+ "fold": fold,
+ "score": score,
+ "localization": loc,
+ "damage": dmg,
+ }
+ return model, info
+def run_inference_on_dataset(
+ model, dataset, output_dir, batch_size=1, workers=0, weights=None, fp16=False, cpu=False, postprocessing="naive", save_pseudolabels=True
+ if not cpu:
+ if fp16:
+ model = model.half()
+ model = model.cuda()
+ if torch.cuda.device_count() > 1:
+ model = nn.DataParallel(model)
+ print("Using multi-GPU inference")
+ model = model.eval()
+ if weights is not None:
+ print("Using weights", weights)
+ weights = torch.tensor(weights).float().view(1, -1, 1, 1)
+ if not cpu:
+ if fp16:
+ weights = weights.half()
+ weights = weights.cuda()
+ data_loader = DataLoader(dataset, batch_size=batch_size, pin_memory=not cpu, num_workers=workers)
+ pseudolabeling_dir = os.path.join(output_dir + "_pseudolabeling")
+ os.makedirs(output_dir, exist_ok=True)
+ os.makedirs(pseudolabeling_dir, exist_ok=True)
+ postprocessings = {}
+ if postprocessing == "naive":
+ postprocessings[postprocessing] = make_predictions_naive
+ elif postprocessing == "dominant":
+ postprocessings[postprocessing] = make_predictions_dominant
+ elif postprocessing in {"dominant2", "dominantv2", "dominant_v2"}:
+ postprocessings[postprocessing] = make_predictions_dominant_v2
+ elif postprocessing == "floodfill":
+ postprocessings[postprocessing] = make_predictions_floodfill
+ elif postprocessing is None:
+ postprocessings = {
+ "naive": make_predictions_naive,
+ "dominant": make_predictions_dominant,
+ "dominantv2": make_predictions_dominant_v2,
+ "floodfill": make_predictions_floodfill,
+ }
+ for batch in tqdm(data_loader):
+ image = batch[INPUT_IMAGE_KEY]
+ if not cpu:
+ if fp16:
+ image = image.half()
+ image = image.cuda(non_blocking=True)
+ image_ids = batch[INPUT_IMAGE_ID_KEY]
+ output = model(image)
+ masks = output[OUTPUT_MASK_KEY]
+ if weights is not None:
+ masks *= weights
+ if masks.size(2) != 1024 or masks.size(3) != 1024:
+ masks = F.interpolate(masks, size=(1024, 1024), mode="bilinear", align_corners=False)
+ masks = to_numpy(masks).astype(np.float32)
+ for i, image_id in enumerate(image_ids):
+ _, _, image_uuid = image_id.split("_")
+ # Save pseudolabeling target
+ if save_pseudolabels:
+ pseudo_mask = make_pseudolabeling_target(masks[i])
+ pseudo_mask = pseudo_mask.astype(np.uint8)
+ pseudo_mask = colorize_mask(pseudo_mask)
+ pseudo_mask.save(os.path.join(pseudolabeling_dir, f"test_post_{image_uuid}.png"))
+ for postprocessing_name, postprocessing_fn in postprocessings.items():
+ output_dir_for_postprocessing = os.path.join(output_dir + "_" + postprocessing_name)
+ os.makedirs(output_dir_for_postprocessing, exist_ok=True)
+ localization_image, damage_image = postprocessing_fn(masks[i])
+ localization_fname = os.path.join(
+ output_dir_for_postprocessing, f"test_localization_{image_uuid}_prediction.png"
+ )
+ localization_image = colorize_mask(localization_image)
+ localization_image.save(localization_fname)
+ damage_fname = os.path.join(output_dir_for_postprocessing, f"test_damage_{image_uuid}_prediction.png")
+ damage_image = colorize_mask(damage_image)
+ damage_image.save(damage_fname)
+ del data_loader
+def run_inference_on_dataset_oof(model, dataset, output_dir, batch_size=1, workers=0, save=True, fp16=False):
+ model = model.cuda()
+ if torch.cuda.device_count() > 1:
+ model = nn.DataParallel(model)
+ model = model.eval()
+ if fp16:
+ model = model.half()
+ data_loader = DataLoader(dataset, batch_size=batch_size, pin_memory=True, num_workers=workers)
+ if save:
+ os.makedirs(output_dir, exist_ok=True)
+ allrows = []
+ for batch in tqdm(data_loader):
+ image = batch[INPUT_IMAGE_KEY]
+ if fp16:
+ image = image.half()
+ image = image.cuda(non_blocking=True)
+ image_ids = batch[INPUT_IMAGE_ID_KEY]
+ dmg_true = to_numpy(batch[INPUT_MASK_KEY]).astype(np.float32)
+ output = model(image)
+ masks = output[OUTPUT_MASK_KEY]
+ masks = to_numpy(masks)
+ for i, image_id in enumerate(image_ids):
+ damage_mask = masks[i]
+ if save:
+ damage_fname = os.path.join(output_dir, fs.change_extension(image_id.replace("_pre", "_post"), ".npy"))
+ np.save(damage_fname, damage_mask.astype(np.float16))
+ loc_pred, dmg_pred = make_predictions_naive(damage_mask)
+ row = CompetitionMetricCallback.get_row_pair(loc_pred, dmg_pred, dmg_true[i], dmg_true[i])
+ allrows.append(row)
+ if save:
+ if DAMAGE_TYPE_KEY in output:
+ damage_type = to_numpy(output[DAMAGE_TYPE_KEY].sigmoid()).astype(np.float32)
+ for i, image_id in enumerate(image_ids):
+ damage_fname = os.path.join(
+ output_dir, fs.change_extension(image_id.replace("_pre", "_damage_type"), ".npy")
+ )
+ np.save(damage_fname, damage_type[i])
+ del data_loader
+ return CompetitionMetricCallback.compute_metrics(allrows)
+def run_dual_inference_on_dataset(model, dataset, output_dir, batch_size=1, workers=0):
+ model = model.cuda()
+ if torch.cuda.device_count() > 1:
+ model = nn.DataParallel(model)
+ model = model.eval()
+ data_loader = DataLoader(dataset, batch_size=batch_size, pin_memory=True, num_workers=workers)
+ os.makedirs(output_dir, exist_ok=True)
+ for batch in tqdm(data_loader):
+ image_pre = batch[INPUT_IMAGE_PRE_KEY].cuda(non_blocking=True)
+ image_post = batch[INPUT_IMAGE_POST_KEY].cuda(non_blocking=True)
+ image_ids = batch[INPUT_IMAGE_ID_KEY]
+ output = model(image_pre=image_pre, image_post=image_post)
+ masks_pre = output[OUTPUT_MASK_PRE_KEY]
+ if masks_pre.size(2) != 1024 or masks_pre.size(3) != 1024:
+ masks_pre = F.interpolate(masks_pre, size=(1024, 1024), mode="bilinear", align_corners=False)
+ masks_pre = to_numpy(masks_pre.squeeze(1)).astype(np.float32)
+ masks_post = output[OUTPUT_MASK_POST_KEY]
+ if masks_post.size(2) != 1024 or masks_post.size(3) != 1024:
+ masks_post = F.interpolate(masks_post, size=(1024, 1024), mode="bilinear", align_corners=False)
+ masks_post = to_numpy(masks_post).astype(np.float32)
+ for i, image_id in enumerate(image_ids):
+ _, _, image_uuid = image_id.split("_")
+ localization_image = masks_pre[i]
+ damage_image = masks_post[i]
+ localization_fname = os.path.join(output_dir, f"test_localization_{image_uuid}_prediction.png")
+ localization_image = (localization_image > 0.5).astype(np.uint8)
+ localization_image = colorize_mask(localization_image)
+ localization_image.save(localization_fname)
+ damage_fname = os.path.join(output_dir, f"test_damage_{image_uuid}_prediction.png")
+ damage_image = np.argmax(damage_image, axis=0).astype(np.uint8)
+ damage_image = colorize_mask(damage_image)
+ damage_image.save(damage_fname)
+ del data_loader
+def run_dual_inference_on_dataset_oof(model, dataset, output_dir, batch_size=1, workers=0):
+ model = model.cuda()
+ if torch.cuda.device_count() > 1:
+ model = nn.DataParallel(model)
+ model = model.eval()
+ data_loader = DataLoader(dataset, batch_size=batch_size, pin_memory=True, num_workers=workers)
+ os.makedirs(output_dir, exist_ok=True)
+ for batch in tqdm(data_loader):
+ image_pre = batch[INPUT_IMAGE_PRE_KEY].cuda(non_blocking=True)
+ image_post = batch[INPUT_IMAGE_POST_KEY].cuda(non_blocking=True)
+ image_ids = batch[INPUT_IMAGE_ID_KEY]
+ output = model(image_pre=image_pre, image_post=image_post)
+ masks_pre = output[OUTPUT_MASK_PRE_KEY]
+ if masks_pre.size(2) != 1024 or masks_pre.size(3) != 1024:
+ masks_pre = F.interpolate(masks_pre, size=(1024, 1024), mode="bilinear", align_corners=False)
+ masks_pre = to_numpy(masks_pre.squeeze(1)).astype(np.float32)
+ masks_post = output[OUTPUT_MASK_POST_KEY]
+ if masks_post.size(2) != 1024 or masks_post.size(3) != 1024:
+ masks_post = F.interpolate(masks_post, size=(1024, 1024), mode="bilinear", align_corners=False)
+ masks_post = to_numpy(masks_post).astype(np.float32)
+ for i, image_id in enumerate(image_ids):
+ localization_image = masks_pre[i]
+ damage_image = masks_post[i]
+ localization_fname = os.path.join(output_dir, fs.change_extension(image_id, ".npy"))
+ np.save(localization_fname, localization_image)
+ damage_fname = os.path.join(output_dir, fs.change_extension(image_id.replace("_pre", "_post"), ".npy"))
+ np.save(damage_fname, damage_image)
+ del data_loader
diff --git a/xview/losses.py b/xview/losses.py
new file mode 100644
index 0000000..4c4b303
--- /dev/null
+++ b/xview/losses.py
@@ -0,0 +1,231 @@
+import math
+import torch
+from pytorch_toolbelt.losses import *
+import torch.nn.functional as F
+__all__ = ["get_loss", "AdaptiveMaskLoss2d"]
+from torch import nn
+from torch.nn import Module, Parameter
+from .dataset import UNLABELED_SAMPLE
+from .ssim_loss import SSIM
+from .utils.inference_image_output import resize_mask_one_hot
+class LabelSmoothingCrossEntropy2d(Module):
+ """
+ Original implementation: fast.ai
+ """
+ def __init__(self, eps: float = 0.1, reduction="mean", weight=None, ignore_index=-100):
+ super().__init__()
+ self.eps = eps
+ self.reduction = reduction
+ self.ignore_index = ignore_index
+ self.register_buffer("weight", weight)
+ def forward(self, output, target):
+ num_classes = output.size(1)
+ log_preds = F.log_softmax(output, dim=1)
+ if self.reduction == "sum":
+ loss = -log_preds.sum()
+ else:
+ loss = -log_preds.sum(dim=1)
+ if self.reduction == "mean":
+ loss = loss.mean()
+ return loss * self.eps / num_classes + (1 - self.eps) * F.nll_loss(
+ log_preds, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction
+ )
+class OHEMCrossEntropyLoss(nn.CrossEntropyLoss):
+ """
+ Online hard example mining CE loss
+ https://arxiv.org/pdf/1812.05802.pdf
+ """
+ def __init__(self, weight=None, fraction=0.3, ignore_index=-100, reduction="mean"):
+ super().__init__(weight, ignore_index=ignore_index, reduction=reduction)
+ self.fraction = fraction
+ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
+ batch_size = input.size(0)
+ with torch.no_grad():
+ positive_mask = (target > 0).view(batch_size, -1)
+ Cp = torch.sum(positive_mask, dim=1) # Number of positive pixels
+ Cn = torch.sum(~positive_mask, dim=1) # Number of negative pixels
+ Chn = torch.max((Cn / 4).clamp_min(5), 2 * Cp)
+ losses = F.cross_entropy(
+ input, target, weight=self.weight, ignore_index=self.ignore_index, reduction="none"
+ ).view(target.size(0), -1)
+ loss = 0
+ num_samples = 0
+ for i in range(batch_size):
+ positive_losses = losses[i, positive_mask[i]]
+ negative_losses = losses[i, ~positive_mask[i]]
+ num_negatives = Chn[i]
+ hard_negative_losses, _ = negative_losses.sort(descending=True)[:num_negatives]
+ loss = positive_losses.sum() + hard_negative_losses.sum() + loss
+ num_samples += positive_losses.size(0)
+ num_samples += hard_negative_losses.size(0)
+ loss /= float(num_samples)
+ return loss
+def get_loss(loss_name: str, ignore_index=UNLABELED_SAMPLE):
+ if loss_name.lower() == "bce":
+ return BCELoss(ignore_index=ignore_index)
+ if loss_name.lower() == "ce":
+ return nn.CrossEntropyLoss(ignore_index=ignore_index)
+ if loss_name.lower() == "ohem_ce":
+ return OHEMCrossEntropyLoss(ignore_index=ignore_index, weight=torch.tensor([1.0, 1.0, 3.0, 3.0, 3.0])).cuda()
+ if loss_name.lower() == "weighted_ce":
+ return nn.CrossEntropyLoss(ignore_index=ignore_index, weight=torch.tensor([1.0, 1.0, 3.0, 3.0, 3.0])).cuda()
+ if loss_name.lower() == "weighted2_ce":
+ return nn.CrossEntropyLoss(ignore_index=ignore_index, weight=torch.tensor([1.0, 1.0, 3.0, 2.0, 1.0])).cuda()
+ if loss_name.lower() == "dsv_ce":
+ return AdaptiveMaskLoss2d(
+ nn.CrossEntropyLoss(ignore_index=ignore_index, weight=torch.tensor([1.0, 1.0, 3.0, 3.0, 3.0]))
+ ).cuda()
+ if loss_name.lower() in {"ce_building_only", "ce_buildings_only"}:
+ # This ignores predictions on "non-building" pixels
+ return nn.CrossEntropyLoss(ignore_index=0)
+ if loss_name.lower() == "soft_bce":
+ return SoftBCELoss(smooth_factor=0.1, ignore_index=ignore_index)
+ if loss_name.lower() == "soft_ce":
+ return LabelSmoothingCrossEntropy2d(eps=0.1, ignore_index=ignore_index)
+ if loss_name.lower() == "binary_focal":
+ return BinaryFocalLoss(alpha=None, gamma=2, ignore_index=ignore_index)
+ if loss_name.lower() == "focal":
+ return FocalLoss(alpha=None, gamma=2, ignore_index=ignore_index, reduction="mean")
+ if loss_name.lower() == "nfl":
+ return FocalLoss(alpha=None, gamma=2, ignore_index=ignore_index, normalized=True, reduction="sum")
+ if loss_name.lower() == "dice":
+ return DiceLoss(mode="multiclass")
+ if loss_name.lower() == "log_dice":
+ return DiceLoss(mode="multiclass", log_loss=True)
+ if loss_name.lower() == "am-softmax":
+ return AmSoftmax2d(weight=torch.tensor([1.0, 1.0, 3.0, 3.0, 3.0])).cuda()
+ if loss_name.lower() == "arcface":
+ return ArcFaceLoss2d(ignore_index=ignore_index)
+ if loss_name.lower() == "ssim":
+ return SSIM(5).cuda()
+ raise KeyError(loss_name)
+class AdaptiveMaskLoss2d(nn.Module):
+ """
+ Works only with sigmoid masks and bce loss
+ Rescales target mask to predicted mask
+ """
+ def __init__(self, loss):
+ super().__init__()
+ self.loss = loss
+ def forward(self, input: torch.Tensor, target: torch.Tensor):
+ with torch.no_grad():
+ target_one_hot = F.one_hot(target, int(input.size(1))).permute(0, 3, 1, 2).type(input.dtype)
+ scale = int(target.size(2)) // int(input.size(2))
+ while scale > 2:
+ target_one_hot = F.interpolate(target_one_hot, scale_factor=0.5, mode="bilinear", align_corners=False)
+ scale = int(target_one_hot.size(2)) // int(input.size(2))
+ target_one_hot = F.interpolate(target_one_hot, size=input.size()[2:], mode="bilinear", align_corners=False)
+ target = target_one_hot.argmax(dim=1).type(target.dtype)
+ return self.loss(input, target)
+class ArcFaceLoss2d(nn.modules.Module):
+ """
+ https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/78109#latest-560973
+ """
+ def __init__(self, s=30.0, m=0.35, gamma=1, ignore_index=-100):
+ super(ArcFaceLoss2d, self).__init__()
+ self.gamma = gamma
+ self.classify_loss = nn.CrossEntropyLoss(ignore_index=ignore_index)
+ self.s = s
+ self.easy_margin = False
+ self.cos_m = float(math.cos(m))
+ self.sin_m = float(math.sin(m))
+ self.th = float(math.cos(math.pi - m))
+ self.mm = float(math.sin(math.pi - m) * m)
+ def forward(self, cos_theta: torch.Tensor, labels):
+ num_classes = cos_theta.size(1)
+ sine = torch.sqrt(1.0 - torch.pow(cos_theta, 2))
+ phi = (cos_theta * self.cos_m - sine * self.sin_m).type(cos_theta.dtype)
+ if self.easy_margin:
+ phi = torch.where(cos_theta > 0, phi, cos_theta)
+ else:
+ phi = torch.where(cos_theta > self.th, phi, cos_theta - self.mm)
+ one_hot = F.one_hot(labels, num_classes).type(cos_theta.dtype)
+ one_hot = one_hot.permute(0, 3, 1, 2)
+ # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
+ output = (one_hot * phi) + ((1.0 - one_hot) * cos_theta)
+ output *= self.s
+ loss1 = self.classify_loss(output, labels)
+ loss2 = self.classify_loss(cos_theta, labels)
+ loss = (loss1 + self.gamma * loss2) / (1 + self.gamma)
+ return loss
+class AmSoftmax2d(Module):
+ # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
+ def __init__(self, ignore_index=UNLABELED_SAMPLE, weight=None):
+ super(AmSoftmax2d, self).__init__()
+ # initial kernel
+ self.m = 0.35 # additive margin recommended by the paper
+ self.s = 30.0 # see normface https://arxiv.org/abs/1704.06369
+ self.classify_loss = nn.CrossEntropyLoss(ignore_index=ignore_index, weight=weight)
+ def forward(self, cos_theta, labels):
+ cos_theta = cos_theta.clamp(-1, 1) # for numerical stability
+ phi = cos_theta - self.m
+ num_classes = cos_theta.size(1)
+ one_hot = F.one_hot(labels, num_classes) # .type(embbedings.dtype)
+ one_hot = one_hot.permute(0, 3, 1, 2)
+ output = (one_hot * phi) + ((1.0 - one_hot) * cos_theta)
+ output *= self.s # scale up in order to make softmax work, first introduced in normface
+ return self.classify_loss(output, labels)
diff --git a/xview/metric.py b/xview/metric.py
new file mode 100644
index 0000000..423acf7
--- /dev/null
+++ b/xview/metric.py
@@ -0,0 +1,232 @@
+from typing import List
+import numpy as np
+import pandas as pd
+import torch
+from catalyst.dl import Callback, RunnerState, CallbackOrder
+from pytorch_toolbelt.utils.catalyst import get_tensorboard_logger
+from pytorch_toolbelt.utils.torch_utils import to_numpy
+from pytorch_toolbelt.utils.visualization import render_figure_to_tensor, plot_confusion_matrix
+from torchnet.meter import ConfusionMeter
+from .xview2_metrics import F1Recorder
+class CompetitionMetricCallback(Callback):
+ """
+ """
+ def __init__(
+ self,
+ input_key: str = "targets",
+ output_key: str = "logits",
+ image_id_key: str = "image_id",
+ prefix: str = "weighted_f1",
+ ):
+ super().__init__(CallbackOrder.Metric)
+ """
+ :param input_key: input key to use for precision calculation; specifies our `y_true`.
+ :param output_key: output key to use for precision calculation; specifies our `y_pred`.
+ """
+ self.prefix = prefix
+ self.output_key = output_key
+ self.input_key = input_key
+ self.image_id_key = image_id_key
+ self.all_rows = []
+ def on_loader_start(self, state):
+ self.all_rows = []
+ @staticmethod
+ def extract_buildings(x: np.ndarray):
+ """ Returns a mask of the buildings in x """
+ buildings = x.copy()
+ buildings[x > 0] = 1
+ return buildings
+ @staticmethod
+ def compute_tp_fn_fp(pred: np.ndarray, targ: np.ndarray, c: int) -> List[int]:
+ """
+ Computes the number of TPs, FNs, FPs, between a prediction (x) and a target (y) for the desired class (c)
+ Args:
+ pred (np.ndarray): prediction
+ targ (np.ndarray): target
+ c (int): positive class
+ """
+ TP = np.logical_and(pred == c, targ == c).sum()
+ FN = np.logical_and(pred != c, targ == c).sum()
+ FP = np.logical_and(pred == c, targ != c).sum()
+ return [TP, FN, FP]
+ @classmethod
+ def get_row_pair(cls, lp, dp, lt, dt):
+ """
+ Builds a row of TPs, FNs, and FPs for both the localization dataframe and the damage dataframe.
+ This pair of rows are built in the same function as damages are only assessed where buildings are predicted.
+ Args:
+ lp: localization predictions
+ dp: damage predictions
+ lt: localization targets
+ dt: damage targets
+ """
+ lp_b, lt_b, dt_b = map(cls.extract_buildings, (lp, lt, dt)) # convert all damage scores 1-4 to 1
+ dp = dp * lp_b # only give credit to damages where buildings are predicted
+ dp, dt = dp[dt_b == 1], dt[dt_b == 1] # only score damage where there exist buildings in target damage
+ lrow = cls.compute_tp_fn_fp(lp_b, lt_b, 1)
+ drow = []
+ for i in range(1, 5):
+ drow += cls.compute_tp_fn_fp(dp, dt, i)
+ return lrow, drow
+ def on_batch_end(self, state: RunnerState):
+ image_ids = state.input[self.image_id_key]
+ outputs = to_numpy(torch.argmax(state.output[self.output_key].detach(), dim=1))
+ targets = to_numpy(state.input[self.input_key].detach())
+ rows = []
+ for image_id, y_true, y_pred in zip(image_ids, targets, outputs):
+ row = self.get_row_pair(y_pred, y_pred, y_true, y_true)
+ rows.append(row)
+ self.all_rows.extend(rows)
+ score, localization_f1, damage_f1, damage_f1s = self.compute_metrics(rows)
+ state.metrics.add_batch_value(self.prefix + "_batch" + "/localization_f1", localization_f1)
+ state.metrics.add_batch_value(self.prefix + "_batch" + "/damage_f1", damage_f1)
+ state.metrics.add_batch_value(self.prefix + "_batch", score)
+ @staticmethod
+ def compute_metrics(rows):
+ lcolumns = ["lTP", "lFN", "lFP"]
+ ldf = pd.DataFrame([lrow for lrow, drow in rows], columns=lcolumns)
+ dcolumns = ["dTP1", "dFN1", "dFP1", "dTP2", "dFN2", "dFP2", "dTP3", "dFN3", "dFP3", "dTP4", "dFN4", "dFP4"]
+ ddf = pd.DataFrame([drow for lrow, drow in rows], columns=dcolumns)
+ TP = ldf["lTP"].sum()
+ FP = ldf["lFP"].sum()
+ FN = ldf["lFN"].sum()
+ lf1r = F1Recorder(TP, FP, FN, "Buildings")
+ dmg2str = {
+ 1: f"No damage (1) ",
+ 2: f"Minor damage (2) ",
+ 3: f"Major damage (3) ",
+ 4: f"Destroyed (4) ",
+ }
+ df1rs = []
+ for i in range(1, 5):
+ TP = ddf[f"dTP{i}"].sum()
+ FP = ddf[f"dFP{i}"].sum()
+ FN = ddf[f"dFN{i}"].sum()
+ df1rs.append(F1Recorder(TP, FP, FN, dmg2str[i]))
+ localization_f1 = lf1r.f1
+ damage_f1s = [F1.f1 for F1 in df1rs]
+ harmonic_mean = lambda xs: len(xs) / sum((x + 1e-6) ** -1 for x in xs)
+ damage_f1 = harmonic_mean(damage_f1s)
+ score = 0.3 * localization_f1 + 0.7 * damage_f1
+ return score, localization_f1, damage_f1, damage_f1s
+ def on_loader_end(self, state):
+ score, localization_f1, damage_f1, damage_f1s = self.compute_metrics(self.all_rows)
+ state.metrics.epoch_values[state.loader_name][self.prefix + "/localization_f1"] = localization_f1
+ state.metrics.epoch_values[state.loader_name][self.prefix + "/damage_f1"] = damage_f1
+ state.metrics.epoch_values[state.loader_name][self.prefix] = score
+ class_names = ["no_damage", "minor_damage", "major_damage", "destroyed"]
+ for i in range(4):
+ state.metrics.epoch_values[state.loader_name][self.prefix + f"/{class_names[i]}"] = damage_f1s[i]
+def default_multilabel_activation(x):
+ return (x.sigmoid() > 0.5).long()
+class MultilabelConfusionMatrixCallback(Callback):
+ """
+ Compute and log confusion matrix to Tensorboard.
+ For use with Multiclass classification/segmentation.
+ """
+ def __init__(
+ self,
+ input_key: str = "targets",
+ output_key: str = "logits",
+ prefix: str = "confusion_matrix",
+ class_names: List[str] = None,
+ num_classes: int = None,
+ ignore_index=None,
+ activation_fn=default_multilabel_activation,
+ ):
+ """
+ :param input_key: input key to use for precision calculation;
+ specifies our `y_true`.
+ :param output_key: output key to use for precision calculation;
+ specifies our `y_pred`.
+ :param ignore_index: same meaning as in nn.CrossEntropyLoss
+ """
+ super().__init__(CallbackOrder.Metric)
+ self.prefix = prefix
+ self.class_names = class_names
+ self.num_classes = num_classes if class_names is None else len(class_names)
+ self.output_key = output_key
+ self.input_key = input_key
+ self.ignore_index = ignore_index
+ self.confusion_matrix = None
+ self.activation_fn = activation_fn
+ def on_loader_start(self, state):
+ self.confusion_matrix = ConfusionMeter(self.num_classes)
+ def on_batch_end(self, state: RunnerState):
+ outputs: torch.Tensor = state.output[self.output_key].detach().cpu()
+ outputs: torch.Tensor = self.activation_fn(outputs)
+ targets: torch.Tensor = state.input[self.input_key].detach().cpu()
+ # Flatten
+ outputs = outputs.view(outputs.size(0), outputs.size(1), -1).permute(0, 2, 1).contiguous()
+ targets = targets.view(targets.size(0), targets.size(1), -1).permute(0, 2, 1).contiguous()
+ targets = targets.type_as(outputs)
+ for class_index in range(self.num_classes):
+ outputs_i = outputs[class_index].view(-1)
+ targets_i = targets[class_index].view(-1)
+ if self.ignore_index is not None:
+ mask = targets_i != self.ignore_index
+ outputs_i = outputs_i[mask]
+ targets_i = targets_i[mask]
+ self.confusion_matrix.add(predicted=outputs_i, target=targets_i)
+ def on_loader_end(self, state):
+ if self.class_names is None:
+ class_names = [str(i) for i in range(self.num_classes)]
+ else:
+ class_names = self.class_names
+ num_classes = len(class_names)
+ cm = self.confusion_matrix.value()
+ fig = plot_confusion_matrix(
+ cm,
+ figsize=(6 + num_classes // 3, 6 + num_classes // 3),
+ class_names=class_names,
+ normalize=True,
+ noshow=True,
+ )
+ fig = render_figure_to_tensor(fig)
+ logger = get_tensorboard_logger(state)
+ logger.add_image(f"{self.prefix}/epoch", fig, global_step=state.step)
diff --git a/xview/model_wrapper.py b/xview/model_wrapper.py
new file mode 100644
index 0000000..cd0e2c1
--- /dev/null
+++ b/xview/model_wrapper.py
@@ -0,0 +1,103 @@
+from typing import List, Union, Dict, Any
+import torch
+from catalyst.dl import CallbackOrder, logger, RunnerState, Callback
+from catalyst.dl.callbacks.criterion import _add_loss_to_state, CriterionCallback
+from torch import nn, Tensor
+from xview.dataset import INPUT_IMAGE_KEY
+class ModelTrainer(nn.Module):
+ """
+ Adapter class that computes loss on each GPU independently and returns only computed losses
+ """
+ def __init__(
+ self,
+ model: nn.Module,
+ losses: List[nn.Module],
+ loss_input_keys: List[str],
+ loss_output_keys: List[str],
+ loss_key="losses",
+ model_input_key=INPUT_IMAGE_KEY,
+ ):
+ """
+ :param model:
+ :param loss_output_keys: List of keys to get outputs for each loss function
+ :param losses: List of loss functions
+ """
+ super().__init__()
+ self.model = model
+ self.input_key = model_input_key
+ self.output_keys = loss_output_keys
+ self.losses = nn.ModuleList(losses)
+ self.loss_key = loss_key
+ self.loss_input_keys = loss_input_keys
+ def forward(self, **input):
+ model_output = self.model(input[self.input_key])
+ losses = []
+ for input_key, output_key, loss_fn in zip(self.loss_input_keys, self.output_keys, self.losses):
+ target = input[input_key]
+ output = model_output[output_key]
+ loss = loss_fn(output, target)
+ losses.append(loss)
+ model_output[self.loss_key] = losses
+ return model_output
+class PassthroughCriterionCallback(CriterionCallback):
+ """
+ This callback allows you to aggregate the values of the loss
+ (with different aggregation strategies)
+ and put the value back into ``state.loss``.
+ """
+ def __init__(
+ self,
+ prefix: str,
+ output_key="losses",
+ loss_keys: Union[str, List[str], Dict[str, float]] = None,
+ loss_aggregate_fn: str = "sum",
+ ) -> None:
+ """
+ Args:
+ prefix (str): new key for aggregated loss.
+ loss_keys (Union[str, List[str], Dict[str, float]]): If not empty,
+ it aggregates only the values from the loss by these keys.
+ for ``weighted_sum`` aggregation it must be a Dict[str, float].
+ loss_aggregate_fn (str): function for aggregation.
+ Must be either ``sum``, ``mean`` or ``weighted_sum``.
+ """
+ super().__init__(prefix=prefix)
+ if prefix is None or not isinstance(prefix, str):
+ raise ValueError("prefix must be str")
+ self.prefix = prefix
+ if isinstance(loss_keys, str):
+ loss_keys = [loss_keys]
+ self.loss_keys = loss_keys
+ self.output_key = output_key
+ self.loss_aggregate_name = loss_aggregate_fn
+ def on_stage_start(self, state: RunnerState):
+ pass
+ def on_batch_end(self, state: RunnerState) -> None:
+ """
+ Computes the loss and add it to the metrics
+ """
+ losses = state.output[self.output_key]
+ losses = [torch.sum(x) for x in losses] # Sum losses from all devices
+ for loss_name, loss in zip(self.loss_keys, losses):
+ state.metrics.add_batch_value(metrics_dict={loss_name: loss.item()})
+ loss = torch.sum(torch.stack(losses))
+ _add_loss_to_state(self.prefix, state, loss)
+ state.metrics.add_batch_value(metrics_dict={self.prefix: loss.item()})
diff --git a/xview/optim.py b/xview/optim.py
new file mode 100644
index 0000000..35b9828
--- /dev/null
+++ b/xview/optim.py
@@ -0,0 +1,172 @@
+from catalyst.contrib.optimizers import RAdam, Lamb
+from torch.optim import SGD, Adam, RMSprop, AdamW
+import math
+import torch
+from torch.optim.optimizer import Optimizer
+import numpy as np
+import torch.nn as nn
+# import torch.optim as Optimizer
+# Original source: https://github.com/shivram1987/diffGrad/blob/master/diffGrad.py
+# modifications: @lessw2020
+class DiffGrad(Optimizer):
+ r"""Implements diffGrad algorithm. It is modified from the pytorch implementation of Adam.
+ It has been proposed in `diffGrad: An Optimization Method for Convolutional Neural Networks`_.
+ Arguments:
+ params (iterable): iterable of parameters to optimize or dicts defining
+ parameter groups
+ lr (float, optional): learning rate (default: 1e-3)
+ betas (Tuple[float, float], optional): coefficients used for computing
+ running averages of gradient and its square (default: (0.9, 0.999))
+ eps (float, optional): term added to the denominator to improve
+ numerical stability (default: 1e-8)
+ weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+ amsgrad (boolean, optional): whether to use the AMSGrad variant of this
+ algorithm from the paper `On the Convergence of Adam and Beyond`_
+ (default: False)
+ .. _diffGrad: An Optimization Method for Convolutional Neural Networks:
+ https://arxiv.org/abs/1909.11015
+ .. _Adam\: A Method for Stochastic Optimization:
+ https://arxiv.org/abs/1412.6980
+ .. _On the Convergence of Adam and Beyond:
+ https://openreview.net/forum?id=ryQu7f-RZ
+ """
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, version=0, weight_decay=0):
+ if not 0.0 <= lr:
+ raise ValueError("Invalid learning rate: {}".format(lr))
+ if not 0.0 <= eps:
+ raise ValueError("Invalid epsilon value: {}".format(eps))
+ if not 0.0 <= betas[0] < 1.0:
+ raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+ if not 0.0 <= betas[1] < 1.0:
+ raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+ defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
+ super().__init__(params, defaults)
+ # save version
+ self.version = version
+ def __setstate__(self, state):
+ super().__setstate__(state)
+ def step(self, closure=None):
+ """Performs a single optimization step.
+ Arguments:
+ closure (callable, optional): A closure that reevaluates the model
+ and returns the loss.
+ """
+ loss = None
+ if closure is not None:
+ loss = closure()
+ for group in self.param_groups:
+ for p in group['params']:
+ if p.grad is None:
+ continue
+ grad = p.grad.data
+ if grad.is_sparse:
+ raise RuntimeError('diffGrad does not support sparse gradients, please consider SparseAdam instead')
+ state = self.state[p]
+ # State initialization
+ if len(state) == 0:
+ state['step'] = 0
+ # Exponential moving average of gradient values
+ state['exp_avg'] = torch.zeros_like(p.data)
+ # Exponential moving average of squared gradient values
+ state['exp_avg_sq'] = torch.zeros_like(p.data)
+ # Previous gradient
+ state['previous_grad'] = torch.zeros_like(p.data)
+ exp_avg, exp_avg_sq, previous_grad = state['exp_avg'], state['exp_avg_sq'], state['previous_grad']
+ beta1, beta2 = group['betas']
+ state['step'] += 1
+ if group['weight_decay'] != 0:
+ grad.add_(group['weight_decay'], p.data)
+ # Decay the first and second moment running average coefficient
+ exp_avg.mul_(beta1).add_(1 - beta1, grad)
+ exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
+ denom = exp_avg_sq.sqrt().add_(group['eps'])
+ bias_correction1 = 1 - beta1 ** state['step']
+ bias_correction2 = 1 - beta2 ** state['step']
+ # compute diffgrad coefficient (dfc)
+ if self.version == 0:
+ diff = abs(previous_grad - grad)
+ elif self.version == 1:
+ diff = previous_grad - grad
+ elif self.version == 2:
+ diff = .5 * abs(previous_grad - grad)
+ if self.version == 0 or self.version == 1:
+ dfc = 1. / (1. + torch.exp(-diff))
+ elif self.version == 2:
+ dfc = 9. / (1. + torch.exp(-diff)) - 4 # DFC2 = 9/(1+e-(.5/g/)-4 #range .5,5
+ state['previous_grad'] = grad
+ # update momentum with dfc
+ exp_avg1 = exp_avg * dfc
+ step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
+ p.data.addcdiv_(-step_size, exp_avg1, denom)
+ return loss
+def get_optimizer(optimizer_name: str, parameters, learning_rate: float, weight_decay=1e-5, **kwargs):
+ if optimizer_name.lower() == "sgd":
+ return SGD(parameters, learning_rate, momentum=0.9, nesterov=True, weight_decay=weight_decay, **kwargs)
+ if optimizer_name.lower() == "adam":
+ return Adam(parameters, learning_rate, weight_decay=weight_decay, eps=1e-5, **kwargs) # As Jeremy suggests
+ if optimizer_name.lower() == "rms":
+ return RMSprop(parameters, learning_rate, weight_decay=weight_decay, **kwargs)
+ if optimizer_name.lower() == "adamw":
+ return AdamW(parameters, learning_rate, weight_decay=weight_decay, eps=1e-5, **kwargs)
+ if optimizer_name.lower() == "radam":
+ return RAdam(parameters, learning_rate, weight_decay=weight_decay, eps=1e-5, **kwargs) # As Jeremy suggests
+ # if optimizer_name.lower() == "ranger":
+ # return Ranger(parameters, learning_rate, weight_decay=weight_decay,
+ # **kwargs)
+ # if optimizer_name.lower() == "qhadamw":
+ # return QHAdamW(parameters, learning_rate, weight_decay=weight_decay,
+ # **kwargs)
+ #
+ if optimizer_name.lower() == "lamb":
+ return Lamb(parameters, learning_rate, weight_decay=weight_decay, **kwargs)
+ if optimizer_name.lower() == "fused_lamb":
+ from apex.optimizers import FusedLAMB
+ return FusedLAMB(parameters, learning_rate, weight_decay=weight_decay, **kwargs)
+ if optimizer_name.lower() == "fused_adam":
+ from apex.optimizers import FusedAdam
+ return FusedAdam(parameters, learning_rate, eps=1e-5, weight_decay=weight_decay, adam_w_mode=True, **kwargs)
+ if optimizer_name.lower() == "diffgrad":
+ return DiffGrad(parameters, learning_rate, eps=1e-5, weight_decay=weight_decay, **kwargs)
+ raise ValueError("Unsupported optimizer name " + optimizer_name)
diff --git a/xview/postprocessing.py b/xview/postprocessing.py
new file mode 100644
index 0000000..0c6e3a9
--- /dev/null
+++ b/xview/postprocessing.py
@@ -0,0 +1,236 @@
+import cv2
+import numpy as np
+from skimage.measure import label, regionprops
+import matplotlib.pyplot as plt
+__all__ = ["make_predictions_dominant", "make_predictions_naive", "make_predictions_floodfill"]
+from skimage.morphology import remove_small_objects
+from skimage.segmentation import relabel_sequential
+from xview.dataset import UNLABELED_SAMPLE
+def make_pseudolabeling_target(damage_probs:np.ndarray, ratio_threshold=1.5):
+ damage_probs = damage_probs.copy()
+ class_index = np.argmax(damage_probs, axis=0)
+ sorted_probs = np.sort(-damage_probs, axis=0)
+ ratio = sorted_probs[0] / sorted_probs[1]
+ confident_classes = ratio > ratio_threshold
+ class_index[~confident_classes] = UNLABELED_SAMPLE
+ return class_index
+def make_predictions_naive(damage_probs: np.ndarray):
+ loc_pred = np.stack((damage_probs[0, ...], np.sum(damage_probs[1:, ...], axis=0)))
+ loc_cls = np.argmax(loc_pred, axis=0)
+ # After we have 'fixed' localization predictions, we must zero-out probabilities for damage probs
+ damage_probs = damage_probs.copy()
+ damage_probs[0, loc_cls > 0] = 0
+ dmg_cls = np.argmax(damage_probs, axis=0)
+ dmg_cls[dmg_cls == 0] = 1 # Fill remaining with damage type 1 (no damage)
+ return loc_cls.astype(np.uint8), dmg_cls.astype(np.uint8)
+def make_predictions_dominant(
+ damage_probs: np.ndarray, min_size=32, assign_dominant=True, max_building_area=2048, min_solidity=0.75
+ loc_pred = np.stack((damage_probs[0, ...], np.sum(damage_probs[1:, ...], axis=0)))
+ loc_cls = np.argmax(loc_pred, axis=0)
+ # After we have 'fixed' localization predictions, we must zero-out probabilities for damage probs
+ damage_probs = damage_probs.copy()
+ damage_probs[0, loc_cls > 0] = 0
+ dmg_cls = np.argmax(damage_probs, axis=0)
+ buildings = label(loc_cls)
+ if min_size is not None:
+ # If there are any objects at all
+ if buildings.max() > 0:
+ buildings = remove_small_objects(buildings, min_size=min_size)
+ buildings, _, _ = relabel_sequential(buildings)
+ loc_cls = buildings > 0
+ dmg_cls[~loc_cls] = 0
+ if assign_dominant:
+ building_props = regionprops(buildings)
+ classes = list(range(1, 5))
+ for index, region in enumerate(building_props):
+ region_label, area, solidity = region["label"], region["area"], region["solidity"]
+ region_mask = buildings == region_label
+ if area < max_building_area or solidity > min_solidity:
+ label_counts = [np.sum(dmg_cls[region_mask] == cls_indxex) for cls_indxex in classes]
+ max_label = np.argmax(label_counts) + 1
+ dmg_cls[region_mask] = max_label
+ # print(region_label, area, solidity)
+ dmg_cls[dmg_cls == 0] = 1 # Fill remaining with damage type 1 (no damage)
+ return loc_cls.astype(np.uint8), dmg_cls.astype(np.uint8)
+def make_predictions_most_severe(damage_probs: np.ndarray, min_size=32, assign_severe=True):
+ loc_pred = np.stack((damage_probs[0, ...], np.sum(damage_probs[1:, ...], axis=0)))
+ loc_cls = np.argmax(loc_pred, axis=0)
+ # After we have 'fixed' localization predictions, we must zero-out probabilities for damage probs
+ damage_probs = damage_probs.copy()
+ damage_probs[0, loc_cls > 0] = 0
+ dmg_cls = np.argmax(damage_probs, axis=0)
+ buildings = label(loc_cls)
+ if min_size is not None:
+ # If there are any objects at all
+ if buildings.max() > 0:
+ buildings = remove_small_objects(buildings, min_size=min_size)
+ buildings, _, _ = relabel_sequential(buildings)
+ loc_cls = buildings > 0
+ dmg_cls[~loc_cls] = 0
+ if assign_severe:
+ building_props = regionprops(buildings)
+ classes = np.arange(1, 5)
+ for index, region in enumerate(building_props):
+ region_label, area, solidity = region["label"], region["area"], region["solidity"]
+ region_mask = buildings == region_label
+ if area < 2048 or solidity > 0.75:
+ label_counts = np.array([np.sum(dmg_cls[region_mask] == cls_indxex) for cls_indxex in classes])
+ if label_counts.sum() == 0:
+ import matplotlib.pyplot as plt
+ plt.figure()
+ plt.imshow(buildings)
+ plt.show()
+ plt.figure()
+ plt.imshow(region_mask)
+ plt.show()
+ plt.figure()
+ plt.imshow(dmg_cls)
+ plt.show()
+ breakpoint()
+ min_count = max(1, label_counts[label_counts > 0].mean() - 3 * label_counts[label_counts > 0].std())
+ labels = classes[label_counts >= min_count]
+ max_label = labels.max()
+ if len(labels) > 1:
+ print(label_counts, min_count, labels, max_label)
+ # label_counts > 0
+ # max_label = np.argmax(label_counts) + 1
+ dmg_cls[region_mask] = max_label
+ # print(region_label, area, solidity)
+ dmg_cls[dmg_cls == 0] = 1 # Fill remaining with damage type 1 (no damage)
+ return loc_cls.astype(np.uint8), dmg_cls.astype(np.uint8)
+def make_predictions_floodfill(damage_probs: np.ndarray):
+ loc_pred = np.stack((damage_probs[0, ...], np.sum(damage_probs[1:, ...], axis=0)))
+ loc_cls = np.argmax(loc_pred, axis=0)
+ # After we have 'fixed' localization predictions, we must zero-out probabilities for damage probs
+ damage_probs = damage_probs.copy()
+ damage_probs[0, loc_cls > 0] = 0
+ seed = np.argmax(damage_probs, axis=0)
+ dist = cv2.distanceTransform((1 - loc_cls).astype(np.uint8), distanceType=cv2.DIST_L2, maskSize=3, dstType=cv2.CV_8U)
+ dist = np.clip(dist, a_min=0, a_max=255).astype(np.uint8)
+ # plt.figure()
+ # plt.imshow(dist)
+ # plt.show()
+ img = np.dstack([dist, dist, dist])
+ dmg_cls = cv2.watershed(img, seed.astype(int))
+ if not isinstance(dmg_cls, np.ndarray):
+ dmg_cls = dmg_cls.get()
+ dmg_cls[dmg_cls < 1] = 1
+ return loc_cls.astype(np.uint8), dmg_cls.astype(np.uint8)
+def make_predictions_floodfill_with_image(damage_probs: np.ndarray, image):
+ loc_pred = np.stack((damage_probs[0, ...], np.sum(damage_probs[1:, ...], axis=0)))
+ loc_cls = np.argmax(loc_pred, axis=0)
+ # After we have 'fixed' localization predictions, we must zero-out probabilities for damage probs
+ damage_probs = damage_probs.copy()
+ damage_probs[0, loc_cls > 0] = 0
+ seed = np.argmax(damage_probs, axis=0)
+ dmg_cls = cv2.watershed(image, seed.astype(int))
+ if not isinstance(dmg_cls, np.ndarray):
+ dmg_cls = dmg_cls.get()
+ dmg_cls[dmg_cls < 1] = 1
+ return loc_cls.astype(np.uint8), dmg_cls.astype(np.uint8)
+def make_predictions_dominant_v2(
+ damage_probs: np.ndarray, min_size=32, assign_dominant=True, max_building_area=4096, min_solidity=0.9
+ """
+ Combines floodfill and dominant postprocessing
+ :param damage_probs:
+ :param min_size:
+ :param assign_dominant:
+ :param max_building_area:
+ :param min_solidity:
+ :return:
+ """
+ loc_pred = np.stack((damage_probs[0, ...], np.sum(damage_probs[1:, ...], axis=0)))
+ loc_cls = np.argmax(loc_pred, axis=0)
+ # After we have 'fixed' localization predictions, we must zero-out probabilities for damage probs
+ damage_probs = damage_probs.copy()
+ damage_probs[0, loc_cls > 0] = 0
+ dmg_cls = np.argmax(damage_probs, axis=0)
+ buildings = label(loc_cls)
+ if min_size is not None:
+ # If there are any objects at all
+ if buildings.max() > 0:
+ buildings = remove_small_objects(buildings, min_size=min_size)
+ buildings, _, _ = relabel_sequential(buildings)
+ loc_cls = buildings > 0
+ dmg_cls[~loc_cls] = 0
+ if assign_dominant:
+ building_props = regionprops(buildings)
+ classes = list(range(1, 5))
+ for index, region in enumerate(building_props):
+ region_label, area, solidity = region["label"], region["area"], region["solidity"]
+ region_mask = buildings == region_label
+ if area < max_building_area and solidity > min_solidity:
+ label_counts = [np.sum(dmg_cls[region_mask] == cls_indxex) for cls_indxex in classes]
+ max_label = np.argmax(label_counts) + 1
+ dmg_cls[region_mask] = max_label
+ # print(region_label, area, solidity)
+ dmg_cls[dmg_cls == 0] = 1 # Fill remaining with damage type 1 (no damage)
+ return loc_cls.astype(np.uint8), dmg_cls.astype(np.uint8)
diff --git a/xview/pseudo.py b/xview/pseudo.py
new file mode 100644
index 0000000..c8800ce
--- /dev/null
+++ b/xview/pseudo.py
@@ -0,0 +1,93 @@
+import numpy as np
+import torch.nn.functional as F
+from catalyst.dl import Callback, CallbackOrder, RunnerState
+from pytorch_toolbelt.utils.catalyst import PseudolabelDatasetMixin
+from pytorch_toolbelt.utils.torch_utils import to_numpy
+class CEOnlinePseudolabelingCallback2d(Callback):
+ """
+ Online pseudo-labeling callback for multi-class problem.
+ >>> unlabeled_train = get_test_dataset(
+ >>> data_dir, image_size=image_size, augmentation=augmentations
+ >>> )
+ >>> unlabeled_eval = get_test_dataset(
+ >>> data_dir, image_size=image_size
+ >>> )
+ >>>
+ >>> callbacks += [
+ >>> CEOnlinePseudolabelingCallback2d(
+ >>> unlabeled_train.targets,
+ >>> pseudolabel_loader="label",
+ >>> prob_threshold=0.9)
+ >>> ]
+ >>> train_ds = train_ds + unlabeled_train
+ >>>
+ >>> loaders = collections.OrderedDict()
+ >>> loaders["train"] = DataLoader(train_ds)
+ >>> loaders["valid"] = DataLoader(valid_ds)
+ >>> loaders["label"] = DataLoader(unlabeled_eval, shuffle=False) # ! shuffle=False is important !
+ """
+ def __init__(
+ self,
+ unlabeled_ds: PseudolabelDatasetMixin,
+ pseudolabel_loader="label",
+ prob_threshold=0.9,
+ sample_index_key="index",
+ output_key="logits",
+ unlabeled_class=-100,
+ label_smoothing=0.0,
+ label_frequency=1,
+ ):
+ assert 1.0 > prob_threshold > 0.5
+ super().__init__(CallbackOrder.Other)
+ self.unlabeled_ds = unlabeled_ds
+ self.pseudolabel_loader = pseudolabel_loader
+ self.prob_threshold = prob_threshold
+ self.sample_index_key = sample_index_key
+ self.output_key = output_key
+ self.unlabeled_class = unlabeled_class
+ self.label_smoothing = label_smoothing
+ self.label_frequency = label_frequency
+ self.last_labeled_epoch = None
+ self.should_relabel = None
+ def on_epoch_start(self, state: RunnerState):
+ self.should_relabel = (
+ self.last_labeled_epoch is None or (state.epoch - self.last_labeled_epoch) % self.label_frequency == 0
+ )
+ def on_epoch_end(self, state: RunnerState):
+ if self.should_relabel:
+ self.last_labeled_epoch = state.epoch
+ def get_probabilities(self, state: RunnerState):
+ probs = state.output[self.output_key].detach().softmax(dim=1)
+ indexes = state.input[self.sample_index_key]
+ if probs.size(2) != 1024 or probs.size(3) != 1024:
+ probs = F.interpolate(probs, size=(1024, 1024), mode="bilinear", align_corners=False)
+ return to_numpy(probs), to_numpy(indexes)
+ def on_batch_end(self, state: RunnerState):
+ if state.loader_name != self.pseudolabel_loader:
+ return
+ if not self.should_relabel:
+ return
+ # Get predictions for batch
+ probs, indexes = self.get_probabilities(state)
+ for p, sample_index in zip(probs, indexes):
+ max_prob = np.max(p, axis=0)
+ class_index = np.argmax(p, axis=0)
+ confident_classes = max_prob > self.prob_threshold
+ class_index[~confident_classes] = self.unlabeled_class
+ self.unlabeled_ds.set_target(sample_index, class_index)
diff --git a/xview/rounder.py b/xview/rounder.py
new file mode 100644
index 0000000..b8b4e3f
--- /dev/null
+++ b/xview/rounder.py
@@ -0,0 +1,85 @@
+from functools import partial
+from multiprocessing.pool import Pool
+import cv2
+import numpy as np
+import scipy as sp
+import torch
+from pytorch_toolbelt.utils.torch_utils import to_numpy
+from xview.dataset import read_mask
+from xview.metric import CompetitionMetricCallback
+from xview.postprocessing import make_predictions_naive
+def _compute_fn(args, coef_exp):
+ xi, dmg_true = args
+ dmg_pred = xi.astype(np.float32) * coef_exp
+ loc_pred, dmg_pred = make_predictions_naive(dmg_pred)
+ if loc_pred.shape[0] != 1024:
+ loc_pred = cv2.resize(loc_pred, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST)
+ dmg_pred = cv2.resize(dmg_pred, dsize=(1024, 1024), interpolation=cv2.INTER_NEAREST)
+ row = CompetitionMetricCallback.get_row_pair(loc_pred, dmg_pred, dmg_true, dmg_true)
+ return row
+class OptimizedRounder(object):
+ def __init__(self, apply_softmax, workers=0):
+ self.coef_ = 0
+ self.workers = workers
+ self.apply_softmax = apply_softmax
+ def _target_metric_loss(self, coef, X, y):
+ coef_exp = np.expand_dims(np.expand_dims(coef, -1), -1)
+ all_rows = []
+ proc_fn = partial(_compute_fn, coef_exp=coef_exp)
+ with Pool(self.workers) as wp:
+ for row in wp.imap_unordered(proc_fn, zip(X, y)):
+ all_rows.append(row)
+ score, localization_f1, damage_f1, damage_f1s = CompetitionMetricCallback.compute_metrics(all_rows)
+ print(score, localization_f1, damage_f1, damage_f1s, "coeffs", coef)
+ return 1.0 - score
+ def _prepare_data(self, X, y):
+ assert self.apply_softmax == "pre"
+ X_data = [to_numpy(torch.from_numpy(np.load(xi)).float().softmax(dim=0)).astype(np.float16) for xi in X]
+ Y_data = [read_mask(yi) for yi in y]
+ print("Loaded data into memory")
+ return X_data, Y_data
+ def fit(self, X, y):
+ X_data, Y_data = self._prepare_data(X, y)
+ loss_partial = partial(self._target_metric_loss, X=X_data, y=Y_data)
+ initial_coef = [0.5, 1.1, 1.1, 1.1, 1.1]
+ self.coef_ = sp.optimize.minimize(
+ loss_partial, initial_coef, method="nelder-mead", options={"maxiter": 100, "xatol": 0.001}
+ )
+ del X_data, Y_data
+ return self.coefficients()
+ def predict(self, X, y, coef: np.ndarray):
+ X_data, Y_data = self._prepare_data(X, y)
+ coef_exp = np.expand_dims(np.expand_dims(coef, -1), -1)
+ all_rows = []
+ proc_fn = partial(_compute_fn, coef_exp=coef_exp)
+ with Pool(self.workers) as wp:
+ for row in wp.imap_unordered(proc_fn, zip(X_data, Y_data)):
+ all_rows.append(row)
+ score, localization_f1, damage_f1, damage_f1s = CompetitionMetricCallback.compute_metrics(all_rows)
+ del X_data, Y_data
+ return score, localization_f1, damage_f1, damage_f1s
+ def coefficients(self):
+ return self.coef_["x"]
diff --git a/xview/scheduler.py b/xview/scheduler.py
new file mode 100644
index 0000000..225198b
--- /dev/null
+++ b/xview/scheduler.py
@@ -0,0 +1,52 @@
+import torch
+from pytorch_toolbelt.optimization.lr_schedules import PolyLR
+from torch.optim.lr_scheduler import (
+ OneCycleLR,
+ ExponentialLR,
+ CyclicLR,
+ MultiStepLR,
+ CosineAnnealingLR,
+ CosineAnnealingWarmRestarts,
+def get_scheduler(scheduler_name: str, optimizer, lr, num_epochs, batches_in_epoch=None):
+ if scheduler_name is None or scheduler_name.lower() == "none":
+ return None
+ if scheduler_name.lower() == "poly":
+ return PolyLR(optimizer, num_epochs, gamma=0.9)
+ if scheduler_name.lower() == "cos":
+ return CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-5)
+ if scheduler_name.lower() == "cosr":
+ return CosineAnnealingWarmRestarts(optimizer, T_0=max(2, num_epochs // 4), eta_min=1e-5)
+ if scheduler_name.lower() in {"1cycle", "one_cycle"}:
+ return OneCycleLR(
+ optimizer, lr_range=(lr, 1e-6, 1e-5), num_steps=batches_in_epoch, warmup_fraction=0.05, decay_fraction=0.1
+ )
+ if scheduler_name.lower() == "exp":
+ return ExponentialLR(optimizer, gamma=0.95)
+ if scheduler_name.lower() == "clr":
+ return CyclicLR(
+ optimizer,
+ base_lr=1e-6,
+ max_lr=lr,
+ step_size_up=batches_in_epoch // 4,
+ # mode='exp_range',
+ gamma=0.99,
+ )
+ if scheduler_name.lower() == "multistep":
+ return MultiStepLR(
+ optimizer, milestones=[int(num_epochs * 0.5), int(num_epochs * 0.7), int(num_epochs * 0.9)], gamma=0.3
+ )
+ if scheduler_name.lower() == "simple":
+ return MultiStepLR(optimizer, milestones=[int(num_epochs * 0.4), int(num_epochs * 0.7)], gamma=0.4)
+ raise KeyError(scheduler_name)
diff --git a/xview/ssim_loss.py b/xview/ssim_loss.py
new file mode 100644
index 0000000..7ff3ca4
--- /dev/null
+++ b/xview/ssim_loss.py
@@ -0,0 +1,58 @@
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+from math import exp
+def gaussian(window_size, sigma):
+ gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
+ return gauss / gauss.sum()
+def create_window(window_size, channel):
+ _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+ _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+ window = _2D_window.expand(channel, 1, window_size, window_size).contiguous()
+ return window
+def _ssim(img1, img2, window, window_size, channel, size_average=True):
+ mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
+ mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
+ mu1_sq = mu1.pow(2)
+ mu2_sq = mu2.pow(2)
+ mu1_mu2 = mu1 * mu2
+ sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
+ sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
+ sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
+ C1 = 0.01 ** 2
+ C2 = 0.03 ** 2
+ ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
+ if size_average:
+ return 1 - ssim_map.mean()
+ else:
+ return 1 - ssim_map.mean(1).mean(1).mean(1)
+class SSIM(torch.nn.Module):
+ def __init__(self, channels, window_size=11, size_average=True):
+ super(SSIM, self).__init__()
+ self.window_size = window_size
+ self.size_average = size_average
+ self.register_buffer("window", create_window(window_size, channels))
+ def forward(self, input: torch.Tensor, target: torch.Tensor):
+ (_, channel, _, _) = input.size()
+ input = input.softmax(dim=1)
+ target = F.one_hot(target, channel).permute(0, 3, 1, 2).type_as(input)
+ window = self.window.type_as(input)
+ return _ssim(input, target, window, self.window_size, channel, self.size_average)
diff --git a/xview/train_utils.py b/xview/train_utils.py
new file mode 100644
index 0000000..cbd83c2
--- /dev/null
+++ b/xview/train_utils.py
@@ -0,0 +1,63 @@
+import torch
+__all__ = ["report_checkpoint", "clean_checkpoint"]
+from catalyst.dl import CriterionCallback
+from .dataset import UNLABELED_SAMPLE
+from .losses import get_loss
+def report_checkpoint(checkpoint):
+ print("Epoch :", checkpoint["epoch"])
+ # {'mask_pre/bce': 0.011939526008819881, 'mask_post/ce': 0.039905175798535336, 'loss': 0.05184470175895215, 'jaccard': 0.6682964469961886, '_base/lr': 0.001,
+ # '_base/momentum': 0.9, '_timers/data_time': 0.2810825881448131, '_timers/model_time': 0.025946252149632927, '_timers/batch_time': 0.3070834094035581, '_timers/_fps': 121.48878000184467,
+ # 'localization_f1': 0.7123450379603988, 'damage_f1': 0.021565931686082063, 'weighted_f1': 0.22879966356837708, 'jaccard_no-damage': 0.4595737876547124, 'jaccard_minor-damage': 0.7845541293707017, 'jaccard_major-damage': 0.7821522489229876, 'jaccard_destroyed': 0.6469056220363518}
+ skip_fields = [
+ "_base/lr",
+ "_base/momentum",
+ "_timers/data_time",
+ "_timers/model_time",
+ "_timers/batch_time",
+ "_timers/_fps",
+ ]
+ print(
+ "Metrics (Train):", [(k, v) for k, v, in checkpoint["epoch_metrics"]["train"].items() if k not in skip_fields]
+ )
+ print(
+ "Metrics (Valid):", [(k, v) for k, v, in checkpoint["epoch_metrics"]["valid"].items() if k not in skip_fields]
+ )
+def clean_checkpoint(src_fname, dst_fname):
+ checkpoint = torch.load(src_fname)
+ keys = ["criterion_state_dict", "optimizer_state_dict", "scheduler_state_dict"]
+ for key in keys:
+ if key in checkpoint:
+ del checkpoint[key]
+ torch.save(checkpoint, dst_fname)
+def get_criterion_callback(loss_name, input_key, output_key, prefix=None, loss_weight=1.0, ignore_index=UNLABELED_SAMPLE):
+ criterions_dict = {f"{prefix}/{loss_name}": get_loss(loss_name, ignore_index=ignore_index)}
+ if prefix is None:
+ prefix = input_key
+ criterion_callback = CriterionCallback(
+ prefix=f"{prefix}/{loss_name}",
+ input_key=input_key,
+ output_key=output_key,
+ criterion_key=f"{prefix}/{loss_name}",
+ multiplier=float(loss_weight),
+ )
+ return criterions_dict, criterion_callback, criterion_callback.prefix
+def get_criterion(loss_name, prefix=None, ignore_index=UNLABELED_SAMPLE):
+ loss = get_loss(loss_name, ignore_index=ignore_index)
+ prefix = f"{prefix}/{loss_name}"
+ return loss, prefix
diff --git a/xview/utils/combine_jsons.py b/xview/utils/combine_jsons.py
new file mode 100644
index 0000000..5271772
--- /dev/null
+++ b/xview/utils/combine_jsons.py
@@ -0,0 +1,98 @@
+# xView2 #
+# Copyright 2019 Carnegie Mellon University. #
+# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #
+# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #
+# and distribution. #
+# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #
+# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #
+# DM19-0988 #
+import json
+def combine_output(pred_polygons, pred_classification, output_file):
+ """
+ :param pred_polygons: the file path to the localization inference output json
+ :param pre_classification: the file path to the classification inference output json
+ :param output_file: the file path to store the combined json file
+ """
+ # Skeleton of the json with null values
+ output_json = {
+ "features": {"lng_lat": [], "xy": []},
+ "metadata": {
+ "sensor": "",
+ "provider_asset_type": "",
+ "gsd": 0,
+ "capture_date": "",
+ "off_nadir_angle": 0,
+ "pan_resolution": 0,
+ "sun_azimuth": 0,
+ "sun_elevation": 0,
+ "target_azimuth": 0,
+ "disaster": "",
+ "disaster_type": "",
+ "catalog_id": "",
+ "original_width": 0,
+ "original_height": 0,
+ "width": 0,
+ "height": 0,
+ "id": "",
+ "img_name": "",
+ },
+ }
+ # Open the classification json
+ with open(pred_classification) as labels:
+ label_json = json.load(labels)
+ # Open the localization json
+ with open(pred_polygons) as polys:
+ poly_json = json.load(polys)
+ # Match UUIDs from the two jsons and combine in output_json skeleton
+ for p in poly_json["features"]["xy"]:
+ p["properties"]["subtype"] = label_json[p["properties"]["uid"]]
+ output_json["features"]["xy"].append(p)
+ # Finally save out the combined json file
+ with open(output_file, "w") as out:
+ json.dump(output_json, out)
+if __name__ == "__main__":
+ import argparse
+ # Parse command line arguments
+ parser = argparse.ArgumentParser(
+ description="""combine_jsons.py: combines the outputs of localization and classification inference into a single output json"""
+ )
+ parser.add_argument(
+ "--polys",
+ required=True,
+ metavar="/path/to/input/polygons.json",
+ help="Full path to the json from polygonize.py",
+ )
+ parser.add_argument(
+ "--classes",
+ required=True,
+ metavar="/path/to/classifications.json",
+ help="Full path to the json from tensor_inf.py",
+ )
+ parser.add_argument(
+ "--output",
+ required=True,
+ metavar="/path/to/pred.json",
+ help="Full path to save the final single output file to",
+ )
+ args = parser.parse_args()
+ # Combining the json based off the uuid assigned at the polygonize stage
+ combine_output(args.polys, args.classes, args.output)
diff --git a/xview/utils/data_finalize.sh b/xview/utils/data_finalize.sh
new file mode 100644
index 0000000..58ee049
--- /dev/null
+++ b/xview/utils/data_finalize.sh
@@ -0,0 +1,105 @@
+# xView2 #
+# Copyright 2019 Carnegie Mellon University. #
+# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #
+# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #
+# and distribution. #
+# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #
+# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #
+# DM19-0988 #
+set -euo pipefail
+# this function is called when Ctrl-C is sent
+function trap_ctrlc ()
+ # perform cleanup here
+ echo "Ctrl-C or Error caught...performing clean up"
+ if [ -d "$input"/spacenet_gt ]; then
+ rm -rf "$input"/spacenet_gt
+ fi
+ exit 99
+# initialise trap to call trap_ctrlc function
+# when signal 2 (SIGINT) is received
+trap "trap_ctrlc" 2 9 13 3
+help_message () {
+ printf "${0}: Moves files around for the spacenet model to train\n\t-i /path/to/xBD/ \n\t-s split percentage to go to train\n\t-x /path/to/xview-2/repository/\n\t(Note: this script expects mask_polygons.py to have ran first to create labels)\n\n"
+# Checking for `bc` first (users reported that wasn't installed on some systems)
+if ! [ -x "$(command -v bc)" ]; then
+ echo 'Error: bc is not installed, please install before continuing.' >&2
+ exit 98
+if [ $# -lt 3 ]; then
+ help_message
+ exit 1
+while getopts "i:s:x:h" OPTION
+ case $OPTION in
+ h)
+ help_message
+ exit 1
+ ;;
+ i)
+ input="$OPTARG"
+ ;;
+ s)
+ split="$OPTARG"
+ ;;
+ x)
+ ;;
+ esac
+# Get list of disasters to iterate over
+disasters=`/bin/ls -1 "$input"`
+# Making the spacenet training directory
+mkdir -p "$input"/spacenet_gt/images
+mkdir -p "$input"/spacenet_gt/labels
+mkdir -p "$input"/spacenet_gt/dataSet
+# for each disaster, copy the pre images and labels to the spacenet training directory
+for disaster in $disasters; do
+ masks=`/bin/ls -1 "$input"/"$disaster"/masks`
+ for mask in $masks; do
+ cp "$input"/"$disaster"/masks/$mask "$input"/spacenet_gt/labels
+ cp "$input"/"$disaster"/images/$mask "$input"/spacenet_gt/images
+ done
+# Listing all files to do the split
+cd "$input"/spacenet_gt/dataSet/
+touch all_images.txt
+/bin/ls -1 "$input"/spacenet_gt/images > all_images.txt
+line_count=`cat all_images.txt | wc -l`
+lines_to_split=$(bc -l <<< "$line_count"*"$split")
+split -l `awk -F. '{print $1}' <<< $lines_to_split` all_images.txt
+mv ./xaa train.txt
+mv ./xab val.txt
+rm all_images.txt
+# Running the mean creation code over the images
+python "$XBDIR"/spacenet/src/features/compute_mean.py "$input"/spacenet_gt/dataSet/train.txt --root "$input"/spacenet_gt/images/ --output "$input"/spacenet_gt/dataSet/mean.npy
+echo "Done!"
diff --git a/xview/utils/inference.sh b/xview/utils/inference.sh
new file mode 100644
index 0000000..bbe6b2c
--- /dev/null
+++ b/xview/utils/inference.sh
@@ -0,0 +1,175 @@
+# xView2 #
+# Copyright 2019 Carnegie Mellon University. #
+# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #
+# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #
+# and distribution. #
+# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #
+# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #
+# DM19-0988 #
+set -euo pipefail
+# this function is called when Ctrl-C is sent
+function trap_ctrlc ()
+ # perform cleanup here
+ echo "Ctrl-C or Error caught...performing clean up check /tmp/inference.log"
+ if [ -d /tmp/inference ]; then
+ rm -rf /tmp/inference
+ fi
+ exit 99
+# initialise trap to call trap_ctrlc function
+# when signal 2 (SIGINT) is received
+trap "trap_ctrlc" 2 9 13 3
+help_message () {
+ printf "${0}: Runs the polygonization in inference mode\n\t-x: path to xview-2 repository\n\t-i: /full/path/to/input/pre-disaster/image.png\n\t-p: /full/path/to/input/post-disaster/image.png\n\t-o: /path/to/output.png\n\t-l: path/to/localization_weights\n\t-c: path/to/classification_weights\n\t-e /path/to/virtual/env/activate\n\t-y continue with local environment and without interactive prompt\n\n"
+if [ "$#" -lt 13 ]; then
+ help_message
+ exit 1
+while getopts "i:p:o:x:l:e:c:hy" OPTION
+ case $OPTION in
+ h)
+ help_message
+ exit 0
+ ;;
+ y)
+ continue_answer="y"
+ ;;
+ o)
+ output_file="$OPTARG"
+ ;;
+ x)
+ virtual_env="$XBDIR/bin/activate"
+ ;;
+ i)
+ input="$OPTARG"
+ ;;
+ p)
+ input_post="$OPTARG"
+ ;;
+ l)
+ localization_weights="$OPTARG"
+ ;;
+ c)
+ classification_weights="$OPTARG"
+ ;;
+ e)
+ virtual_env="$OPTARG"
+ ;;
+ ?)
+ help_message
+ exit 0
+ ;;
+ esac
+# Create the output directory if it doesn't exist
+mkdir -p "$inference_base"
+if ! [ -f "$LOGFILE" ]; then
+ touch "$LOGFILE"
+printf "==========\n" >> "$LOGFILE"
+echo `date +%Y%m%dT%H%M%S` >> "$LOGFILE"
+printf "\n" >> "$LOGFILE"
+mkdir -p "$label_temp"
+printf "\n"
+printf "\n"
+# Run in inference mode
+# Because of the models _have_ to be in the correct directory, they use relative paths to find the source (e.g. "../src")
+# sourcing the virtual environment packages if they exist
+# this is *necessary* or all packages must be installed globally
+if [ -f "$virtual_env" ]; then
+ source "$virtual_env"
+ if [ "$continue_answer" = "n" ]; then
+ printf "Error: cannot source virtual environment \n\tDo you have all the dependencies installed and want to continue? [Y/N]: "
+ read continue_answer
+ if [ "$continue_answer" == "N" ]; then
+ exit 2
+ fi
+ fi
+cd "$XBDIR"/spacenet/inference/
+# Quietly running the localization inference to output a json with the predicted polygons from the supplied input image
+printf "Running localization\n"
+python3 ./inference.py --input "$input" --weights "$localization_weights" --mean "$XBDIR"/weights/mean.npy --output "$label_temp"/"${input_image%.*}".json >> "$LOGFILE" 2>&1
+printf "\n" >> "$LOGFILE"
+# Classification inferences start below
+cd "$XBDIR"/model
+# Replace the pre image here with the post
+# We need to do this so the classification inference pulls the images from the post
+# Since post is where the damage occurs
+printf "Grabbing post image file for classification\n"
+mkdir -p "$inference_base"/output_polygons
+printf "Running classification\n"
+# Extracting polygons from post image
+python3 ./process_data_inference.py --input_img "$disaster_post_file" --label_path "$label_temp"/"${input_image%.*}".json --output_dir "$inference_base"/output_polygons --output_csv "$inference_base"/output.csv >> "$LOGFILE" 2>&1
+# Classifying extracted polygons
+python3 ./damage_inference.py --test_data "$inference_base"/output_polygons --test_csv "$inference_base"/output.csv --model_weights "$classification_weights" --output_json /tmp/inference/classification_inference.json >> "$LOGFILE" 2>&1
+printf "\n" >> "$LOGFILE"
+# Combining the predicted polygons with the predicted labels, based off a UUID generated during the localization inference stage
+printf "Formatting json and scoring image\n"
+python3 "$XBDIR"/utils/combine_jsons.py --polys "$label_temp"/"${input_image%.*}".json --classes /tmp/inference/classification_inference.json --output "$inference_base/inference.json" >> "$LOGFILE" 2>&1
+printf "\n" >> "$LOGFILE"
+# Transforming the inference json file to the image required for scoring
+printf "Finalizing output file"
+python3 "$XBDIR"/utils/inference_image_output.py --input "$inference_base"/inference.json --output "$output_file" >> "$LOGFILE" 2>&1
+#Cleaning up by removing the temporary working directory we created
+printf "Cleaning up\n"
+rm -rf "$inference_base"
+printf "==========\n" >> "$LOGFILE"
+printf "Done!\n"
diff --git a/xview/utils/inference_image_output.py b/xview/utils/inference_image_output.py
new file mode 100644
index 0000000..36cf785
--- /dev/null
+++ b/xview/utils/inference_image_output.py
@@ -0,0 +1,187 @@
+# xView2 #
+# Copyright 2019 Carnegie Mellon University. #
+# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #
+# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #
+# and distribution. #
+# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #
+# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #
+# DM19-0988 #
+import json
+from typing import List, Tuple
+from shapely import wkt
+from shapely.geometry import Polygon
+import numpy as np
+from cv2 import fillPoly, imwrite
+from PIL import Image
+def open_json(json_file_path):
+ """
+ :param json_file_path: path to open inference json file
+ :returns: the json data dictionary of localized polygon and their classifications
+ """
+ with open(json_file_path) as jf:
+ json_data = json.load(jf)
+ inference_data = json_data["features"]["xy"]
+ return inference_data
+def resize_mask_one_hot(mask, size):
+ import albumentations as A
+ import cv2
+ mask = np.squeeze(mask,-1)
+ one_hot_mask = (np.arange(mask.max() + 1) == mask[..., None]).astype(np.float32)
+ resize_op = A.Resize(size[0], size[1], interpolation=cv2.INTER_LINEAR)
+ image_resized = resize_op(image=one_hot_mask)["image"]
+ mask = np.argmax(image_resized, axis=2).astype(mask.dtype)
+ return mask
+def create_image(inference_data) -> np.ndarray:
+ """
+ :params inference_data: json data dictionary of localized polygon and their classifications
+ :returns: an numpy array of 8-bit grey scale image with polygons filled in according to the key provided
+ """
+ damage_key = {"un-classified": 1, "no-damage": 1, "minor-damage": 2, "major-damage": 3, "destroyed": 4}
+ mask_img = np.zeros((1024, 1024, 1), np.uint8)
+ for poly in inference_data:
+ damage = poly["properties"].get("subtype", "no-damage")
+ coords = wkt.loads(poly["wkt"])
+ poly_np = np.array(coords.exterior.coords, np.int32)
+ fillPoly(mask_img, [poly_np], damage_key[damage])
+ return mask_img
+def create_instance_image(inference_data) -> Tuple[np.ndarray, List[int]]:
+ """
+ :params inference_data: json data dictionary of localized polygon and their classifications
+ :returns: an numpy array of 16-bit grey scale image with polygons filled in according to the key provided
+ """
+ damage_key = {"un-classified": 1, "no-damage": 1, "minor-damage": 2, "major-damage": 3, "destroyed": 4}
+ mask_img = np.zeros((1024, 1024), np.uint16)
+ damage_labels = []
+ for poly_index, poly in enumerate(inference_data):
+ damage = poly["properties"].get("subtype", "no-damage")
+ damage_label = damage_key[damage]
+ damage_labels.append(damage_label)
+ coords = wkt.loads(poly["wkt"])
+ poly_np = np.array(coords.exterior.coords, np.int32)
+ fillPoly(mask_img, [poly_np], poly_index + 1)
+ return mask_img, damage_labels
+def colorize_mask(mask, color_map=None):
+ """
+ Attaches a color palette to a PIL image. So long as the image is saved as a PNG, it will render visibly using the
+ provided color map.
+ :param mask: PIL image whose values are only 0 to 4 inclusive
+ :param color_map: np.ndarray or list of 3-tuples with 5 rows
+ :return:
+ """
+ if len(mask.shape) == 3:
+ mask = np.squeeze(mask, -1)
+ mask = Image.fromarray(mask, "L")
+ color_map = color_map or np.array(
+ [
+ (0, 0, 0), # 0=background
+ (0, 255, 0), # no damage (or just 'building' for localization) (green)
+ (255, 255, 0), # minor damage (yellow)
+ (255, 128, 0), # major damage (red)
+ (255, 0, 0), # destroyed (red)
+ (127, 127, 127), # Unlabeled
+ ]
+ )
+ assert color_map.shape == (6, 3)
+ mask.putpalette(color_map.astype(np.uint8))
+ return mask
+def make_rgb_image(mask):
+ color_map = np.array(
+ [
+ (0, 0, 0), # 0=background
+ (0, 255, 0), # no damage (or just 'building' for localization) (green)
+ (255, 255, 0), # minor damage (yellow)
+ (255, 128, 0), # major damage (red)
+ (255, 0, 0), # destroyed (red)
+ (127, 127, 127), # Unlabeled
+ ], dtype=np.uint8
+ )
+ mask_rgb = color_map[mask]
+ return mask_rgb
+def save_image(polygons, output_path):
+ """
+ :param polygons: np array with filled in polygons from create_image()
+ :param output_path: path to save the final output inference image
+ """
+ mask = colorize_mask(polygons)
+ mask.save(output_path)
+ # Output the filled in polygons to an image file
+ # imwrite(output_path, polygons)
+def create_inference_image(json_input_path, image_output_path):
+ """
+ :param json_input_path: Path to output inference json file
+ :param image_outut_pat: Path to save the final inference image
+ """
+ # Getting the inference data from the localization and classification
+ inference_data = open_json(json_input_path)
+ # Filling in the polygons and readying the image format
+ polygon_array = create_image(inference_data)
+ # Saving the image to the desired location
+ save_image(polygon_array, image_output_path)
+ return polygon_array
+if __name__ == "__main__":
+ import argparse
+ # Parse command line arguments
+ parser = argparse.ArgumentParser(
+ description="""inference_image_output.py: Takes the inference localization and classification final outputs in json from and outputs an image ready to be scored based off the challenge parameters"""
+ )
+ parser.add_argument(
+ "--input", required=True, metavar="/path/to/final/inference.json", help="Full path to the final inference json"
+ )
+ parser.add_argument(
+ "--output", required=True, metavar="/path/to/inference.png", help="Full path to save the image to"
+ )
+ args = parser.parse_args()
+ # Creating the scoring image
+ create_inference_image(args.input, args.output)
diff --git a/xview/utils/mask_polygons.py b/xview/utils/mask_polygons.py
new file mode 100644
index 0000000..dcd0429
--- /dev/null
+++ b/xview/utils/mask_polygons.py
@@ -0,0 +1,275 @@
+# xView2 #
+# Copyright 2019 Carnegie Mellon University. #
+# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #
+# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #
+# and distribution. #
+# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #
+# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #
+# DM19-0988 #
+import json
+from os import path, walk, makedirs
+from sys import exit, stderr
+from cv2 import fillPoly, imwrite
+import numpy as np
+from shapely import wkt
+from shapely.geometry import mapping, Polygon
+from skimage.io import imread
+from tqdm import tqdm
+# import imantics
+# This removes the massive amount of scikit warnings of "low contrast images"
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+def get_dimensions(file_path):
+ """
+ :param file_path: The path of the file
+ :return: returns (width,height,channels)
+ """
+ # Open the image we are going to mask
+ pil_img = imread(file_path)
+ img = np.array(pil_img)
+ w, h, c = img.shape
+ return (w, h, c)
+def mask_polygons_separately(size, shapes):
+ """
+ :param size: A tuple of the (width,height,channels)
+ :param shapes: A list of points in the polygon from get_feature_info
+ :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
+ """
+ # For each WKT polygon, read the WKT format and fill the polygon as an image
+ masked_polys = {}
+ for u in shapes:
+ sh = shapes[u]
+ mask_img = np.zeros(size, np.uint8)
+ i = fillPoly(mask_img, [sh], (255, 255, 255))
+ masked_polys[u] = i
+ return masked_polys
+def mask_polygons_together(size, shapes):
+ """
+ :param size: A tuple of the (width,height,channels)
+ :param shapes: A list of points in the polygon from get_feature_info
+ :returns: A numpy array with the polygons filled 255s where there's a building and 0 where not
+ """
+ # For each WKT polygon, read the WKT format and fill the polygon as an image
+ mask_img = np.zeros(size, np.uint8)
+ for u in shapes:
+ blank = np.zeros(size, np.uint8)
+ poly = shapes[u]
+ fillPoly(blank, [poly], (1, 1, 1))
+ mask_img += blank
+ # Here we are taking the overlap (+=) and squashing it back to 0
+ mask_img[mask_img > 1] = 0
+ # Finally we are taking all 1s and making it pure white (255)
+ mask_img[mask_img == 1] = 255
+ return mask_img
+def mask_polygons_together_with_border(size, shapes, border):
+ """
+ :param size: A tuple of the (width,height,channels)
+ :param shapes: A list of points in the polygon from get_feature_info
+ :returns: a dict of masked polygons with the shapes filled in from cv2.fillPoly
+ """
+ # For each WKT polygon, read the WKT format and fill the polygon as an image
+ mask_img = np.zeros(size, np.uint8)
+ for u in shapes:
+ blank = np.zeros(size, np.uint8)
+ # Each polygon stored in shapes is a np.ndarray
+ poly = shapes[u]
+ # Creating a shapely polygon object out of the numpy array
+ polygon = Polygon(poly)
+ # Getting the center points from the polygon and the polygon points
+ (poly_center_x, poly_center_y) = polygon.centroid.coords[0]
+ polygon_points = polygon.exterior.coords
+ # Setting a new polygon with each X,Y manipulated based off the center point
+ shrunk_polygon = []
+ for (x, y) in polygon_points:
+ if x < poly_center_x:
+ x += border
+ elif x > poly_center_x:
+ x -= border
+ if y < poly_center_y:
+ y += border
+ elif y > poly_center_y:
+ y -= border
+ shrunk_polygon.append([x, y])
+ # Transforming the polygon back to a np.ndarray
+ ns_poly = np.array(shrunk_polygon, np.int32)
+ # Filling the shrunken polygon to add a border between close polygons
+ fillPoly(blank, [ns_poly], (1, 1, 1))
+ mask_img += blank
+ mask_img[mask_img > 1] = 0
+ mask_img[mask_img == 1] = 255
+ return mask_img
+def save_masks(masks, output_path, mask_file_name):
+ """
+ :param masks: dictionary of UID:masked polygons from mask_polygons_separately()
+ :param output_path: path to save the masks
+ :param mask_file_name: the file name the masks should have
+ """
+ # For each filled polygon, write out a separate file, increasing the name
+ for m in masks:
+ final_out = path.join(output_path, mask_file_name + "_{}.png".format(m))
+ imwrite(final_out, masks[m])
+def save_one_mask(masks, output_path, mask_file_name):
+ """
+ :param masks: list of masked polygons from the mask_polygons_separately function
+ :param output_path: path to save the masks
+ :param mask_file_name: the file name the masks should have
+ """
+ # For each filled polygon, write the mask shape out to the file per image
+ mask_file_name = path.join(output_path, mask_file_name + ".png")
+ imwrite(mask_file_name, masks)
+def read_json(json_path):
+ """
+ :param json_path: path to load json from
+ :returns: a python dictionary of json features
+ """
+ annotations = json.load(open(json_path))
+ return annotations
+def get_feature_info(feature):
+ """
+ :param feature: a python dictionary of json labels
+ :returns: a list mapping of polygons contained in the image
+ """
+ # Getting each polygon points from the json file and adding it to a dictionary of uid:polygons
+ props = {}
+ for feat in feature["features"]["xy"]:
+ feat_shape = wkt.loads(feat["wkt"])
+ coords = list(mapping(feat_shape)["coordinates"][0])
+ props[feat["properties"]["uid"]] = np.array(coords, np.int32)
+ return props
+def mask_chips(json_path, images_directory, output_directory, single_file, border):
+ """
+ :param json_path: path to find multiple json files for the chips
+ :param images_directory: path to the directory containing the images to be masked
+ :param output_directory: path to the directory where masks are to be saved
+ :param single_file: a boolean value to see if masks should be saved a single file or multiple
+ """
+ # For each feature in the json we will create a separate mask
+ # Getting all files in the directory provided for jsons
+ jsons = [j for j in next(walk(json_path))[2] if "_pre" in j]
+ # After removing non-json items in dir (if any)
+ for j in tqdm([j for j in jsons if j.endswith("json")], unit="poly", leave=False):
+ # Our chips start off in life as PNGs
+ chip_image_id = path.splitext(j)[0] + ".png"
+ mask_file = path.splitext(j)[0]
+ # Loading the per chip json
+ j_full_path = path.join(json_path, j)
+ chip_json = read_json(j_full_path)
+ # Getting the full chip path, and loading the size dimensions
+ chip_file = path.join(images_directory, chip_image_id)
+ chip_size = get_dimensions(chip_file)
+ # Reading in the polygons from the json file
+ polys = get_feature_info(chip_json)
+ # Getting a list of the polygons and saving masks as separate or single image files
+ if len(polys) > 0:
+ if single_file:
+ if border > 0:
+ masked_polys = mask_polygons_together_with_border(chip_size, polys, border)
+ else:
+ masked_polys = mask_polygons_together(chip_size, polys)
+ save_one_mask(masked_polys, output_directory, mask_file)
+ else:
+ masked_polys = mask_polygons_separately(chip_size, polys)
+ save_masks(masked_polys, output_directory, mask_file)
+if __name__ == "__main__":
+ import argparse
+ # Parse command line arguments
+ parser = argparse.ArgumentParser(
+ description="""mask_polygons.py: Takes in xBD dataset and masks polygons in the image (make sure you've ran chip_masks.py first)\n\n
+ WARNING: This could lead to hundreds of output images per input\n"""
+ )
+ parser.add_argument(
+ "--input", required=True, metavar="/path/to/xBD/", help='Path to parent dataset directory "xBD"'
+ )
+ parser.add_argument(
+ "--single-file",
+ action="store_true",
+ help="use to save all masked polygon instances to a single file rather than one polygon per mask file",
+ )
+ parser.add_argument(
+ "--border",
+ default=0,
+ type=int,
+ metavar="positive integer for pixel border (e.g. 1)",
+ help="Positive integer used to shrink the polygon by",
+ )
+ args = parser.parse_args()
+ # Getting the list of the disaster types under the xBD directory
+ disasters = next(walk(args.input))[1]
+ for disaster in tqdm(disasters, desc="Masking", unit="disaster"):
+ # Create the full path to the images, labels, and mask output directories
+ image_dir = path.join(args.input, disaster, "images")
+ json_dir = path.join(args.input, disaster, "labels")
+ output_dir = path.join(args.input, disaster, "masks")
+ if not path.isdir(image_dir):
+ print("Error, could not find image files in {}.\n\n".format(image_dir), file=stderr)
+ exit(2)
+ if not path.isdir(json_dir):
+ print("Error, could not find labels in {}.\n\n".format(json_dir), file=stderr)
+ exit(3)
+ if not path.isdir(output_dir):
+ makedirs(output_dir)
+ mask_chips(json_dir, image_dir, output_dir, args.single_file, args.border)
diff --git a/xview/utils/split_into_disasters.py b/xview/utils/split_into_disasters.py
new file mode 100644
index 0000000..d53c59c
--- /dev/null
+++ b/xview/utils/split_into_disasters.py
@@ -0,0 +1,80 @@
+# xView2 #
+# Copyright 2019 Carnegie Mellon University. #
+# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #
+# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #
+# and distribution. #
+# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #
+# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #
+# DM19-0988 #
+from os import walk, path, makedirs
+from shutil import copy2 as cp
+def get_files(base_dir):
+ # Minmizing (halfing) list to just pre image files
+ base_dir = path.join(base_dir, "images")
+ files = [f for f in next(walk(base_dir))[2] if "pre" in f]
+ return files
+def move_files(files, base_dir, output_dir):
+ for filename in files:
+ disaster = filename.split("_")[0]
+ # If the output directory and disater name do not exist make the directory
+ if not path.isdir(path.join(output_dir, disaster)):
+ makedirs(path.join(output_dir, disaster))
+ # Check if the images directory exists
+ if not path.isdir(path.join(output_dir, disaster, "images")):
+ # If not create it
+ makedirs(path.join(output_dir, disaster, "images"))
+ # Move the pre and post image to the images directory under the disaster name
+ cp(path.join(base_dir, "images", filename), path.join(output_dir, disaster, "images", filename))
+ post_file = filename.replace("_pre_", "_post_")
+ cp(path.join(base_dir, "images", post_file), path.join(output_dir, disaster, "images", post_file))
+ # Check if the label directory exists
+ if not path.isdir(path.join(output_dir, disaster, "labels")):
+ # If not create it
+ makedirs(path.join(output_dir, disaster, "labels"))
+ pre_label_file = filename.replace("png", "json")
+ # Move the pre and post label files to the labels directory under the disaster name
+ cp(path.join(base_dir, "labels", pre_label_file), path.join(output_dir, disaster, "labels", pre_label_file))
+ post_label_file = pre_label_file.replace("_pre_", "_post_")
+ cp(path.join(base_dir, "labels", post_label_file), path.join(output_dir, disaster, "labels", post_label_file))
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser(
+ description="split_into_disasters.py: Splits files under a single directory (with images/ and labels/ into directory of disasters/images|labels for the base submission pipeline (copies files)"
+ )
+ parser.add_argument(
+ "--input",
+ required=True,
+ metavar="/path/to/dataset/train",
+ help="Full path to the train (or any other directory) with /images and /labels",
+ )
+ parser.add_argument(
+ "--output",
+ required=True,
+ metavar="/path/to/output/xBD",
+ help="Full path to the output root dataset directory, will create disaster/images|labels under this directory",
+ )
+ args = parser.parse_args()
+ files = get_files(args.input)
+ move_files(files, args.input, args.output)
diff --git a/xview/utils/view_polygons.ipynb b/xview/utils/view_polygons.ipynb
new file mode 100644
index 0000000..25b9098
--- /dev/null
+++ b/xview/utils/view_polygons.ipynb
@@ -0,0 +1,367 @@
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#####################################################################################################################################################################\n",
+ "# xView2 #\n",
+ "# Copyright 2019 Carnegie Mellon University. #\n",
+ "# Released under a MIT (SEI)-style license, please see LICENSE.md or contact permission@sei.cmu.edu for full terms. #\n",
+ "# [DISTRIBUTION STATEMENT A] This material has been approved for public release and unlimited distribution. Please see Copyright notice for non-US Government use #\n",
+ "# and distribution. #\n",
+ "# This Software includes and/or makes use of the following Third-Party Software subject to its own license: #\n",
+ "# 1. SpaceNet (https://github.com/motokimura/spacenet_building_detection/blob/master/LICENSE) Copyright 2017 Motoki Kimura. #\n",
+ "# DM19-0988 #\n",
+ "#####################################################################################################################################################################"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# To Run this notebook, start at the first cell with the license information and click run 4 times to show \n",
+ "# the field blocks, then input the *full path* to the label, and image. Finally, click \"Create next input\", \n",
+ "# you'll then see a full sized image with labels overlaid, you will also get different color labels if the\n",
+ "# label file as damage labels under ['features']['xy'][i]['properties']['subtype'] where i is the polygon \n",
+ "# in the ['xy'] list "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "hidden": true
+ },
+ "outputs": [],
+ "source": [
+ "from ipywidgets import Layout\n",
+ "from IPython.display import Javascript, HTML\n",
+ "import ipywidgets as widgets\n",
+ "\n",
+ "def run_all(ev):\n",
+ " display(Javascript('IPython.notebook.execute_cells_below()'))\n",
+ "\n",
+ "path_to_label = widgets.Text(\n",
+ " placeholder='Label path here',\n",
+ " description='Label:',\n",
+ " disabled=False,\n",
+ " layout=Layout(width='100%')\n",
+ ")\n",
+ "path_to_image = widgets.Text(\n",
+ " placeholder='Image path here',\n",
+ " description='Image:',\n",
+ " disabled=False,\n",
+ " layout=Layout(width='100%')\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "The raw code for this IPython notebook is by default hidden for easier reading.\n",
+ "To toggle on/off the raw code, click here."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "HTML('''\n",
+ "The raw code for this IPython notebook is by default hidden for easier reading.\n",
+ "To toggle on/off the raw code, click here.''')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/javascript": [
+ "IPython.notebook.execute_cells_below()"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d2be852d23c1454ca6d8b7871ecf5eb5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Text(value='/tmp/inference/inference.json', description='Label:', layout=Layout(width='100%'), placeholder='La…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a4df1b7f08c6433eaba706e160b7956b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Text(value='/Users/rthosfelt/Downloads/xBD/tuscaloosa-tornado/images/tuscaloosa-tornado_00000027_pre_disaster.…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1bcd23b318194309bcee1c297b83c4c2",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Button(description='Create next input', style=ButtonStyle())"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4df43ae86c7847c595ef8deab722b358",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Text(value='', description='Label:', layout=Layout(width='100%'), placeholder='Label path here')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6d55945ff4664d3bb3341993e0df13f7",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Text(value='', description='Image:', layout=Layout(width='100%'), placeholder='Image path here')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9fa49996441e4368bb3c477207577b08",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Button(description='Create next input', style=ButtonStyle())"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(path_to_label, path_to_image)\n",
+ "button = widgets.Button(description=\"Create next input\")\n",
+ "button.on_click(run_all)\n",
+ "display(button)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json \n",
+ "from PIL import Image, ImageDraw\n",
+ "from IPython.display import display\n",
+ "from shapely import wkt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "FileNotFoundError",
+ "evalue": "[Errno 2] No such file or directory: ''",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpath_to_image_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpath_to_image\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_to_label_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mimage_json_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mimage_json\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage_json_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: ''"
+ ]
+ }
+ ],
+ "source": [
+ "# Opening and loading polygons from label json \n",
+ "path_to_label_value = path_to_label.value\n",
+ "path_to_image_value = path_to_image.value\n",
+ "\n",
+ "with open(path_to_label_value, 'rb') as image_json_file:\n",
+ " image_json = json.load(image_json_file)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "coords = image_json['features']['xy']\n",
+ "wkt_polygons = []\n",
+ "\n",
+ "for coord in coords:\n",
+ " if 'subtype' in coord['properties']:\n",
+ " damage = coord['properties']['subtype']\n",
+ " else:\n",
+ " damage = 'no-damage'\n",
+ " wkt_polygons.append((damage, coord['wkt']))\n",
+ " \n",
+ "polygons = []\n",
+ "\n",
+ "for damage, swkt in wkt_polygons:\n",
+ " polygons.append((damage, wkt.loads(swkt)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "# Loading image\n",
+ "img = Image.open(path_to_image_value)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "draw = ImageDraw.Draw(img, 'RGBA')\n",
+ "\n",
+ "damage_dict = {\n",
+ " \"no-damage\": (0, 255, 0, 100),\n",
+ " \"minor-damage\": (0, 0, 255, 125),\n",
+ " \"major-damage\": (255, 69, 0, 125),\n",
+ " \"destroyed\": (255, 0, 0, 125),\n",
+ " \"un-classified\": (255, 255, 255, 125)\n",
+ "}\n",
+ "\n",
+ "for damage, polygon in polygons:\n",
+ " x,y = polygon.exterior.coords.xy\n",
+ " coords = list(zip(x,y))\n",
+ " draw.polygon(coords, damage_dict[damage])\n",
+ "\n",
+ "del draw\n",
+ "\n",
+ "display(img)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
diff --git a/xview/visualization.py b/xview/visualization.py
new file mode 100644
index 0000000..1f7d31e
--- /dev/null
+++ b/xview/visualization.py
@@ -0,0 +1,134 @@
+import cv2
+import torch
+from albumentations.augmentations.functional import longest_max_size
+from pytorch_toolbelt.utils.torch_utils import rgb_image_from_tensor, to_numpy
+from xview.dataset import (
+import numpy as np
+def overlay_image_and_mask(image, mask, class_colors, alpha=0.5):
+ overlay = image.copy()
+ for class_index, class_color in enumerate(class_colors):
+ if class_index == 0:
+ continue # Skip background
+ overlay[mask == class_index, :] = class_color
+ return cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
+def draw_predictions(
+ input: dict,
+ output: dict,
+ image_id_key="image_id",
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225),
+ class_colors=[
+ (0, 0, 0), # 0=background
+ (0, 255, 0), # no damage (or just 'building' for localization) (green)
+ (255, 255, 0), # minor damage (yellow)
+ (255, 128, 0), # major damage (red)
+ (255, 0, 0), # destroyed (red)
+ (127, 127, 127)
+ ],
+ max_images=32
+ images = []
+ num_images = len(input[image_id_key])
+ for i in range(num_images):
+ image_id = input[INPUT_IMAGE_ID_KEY][i]
+ image_pre = rgb_image_from_tensor(input[INPUT_IMAGE_KEY][i, 0:3, ...], mean, std)
+ image_pre = cv2.cvtColor(image_pre, cv2.COLOR_RGB2BGR)
+ image_post = rgb_image_from_tensor(input[INPUT_IMAGE_KEY][i, 3:6, ...], mean, std)
+ image_post = cv2.cvtColor(image_post, cv2.COLOR_RGB2BGR)
+ image_pre_gt = image_pre.copy()
+ image_post_gt = image_post.copy()
+ damage_target = to_numpy(input[INPUT_MASK_KEY][i])
+ image_pre_gt = overlay_image_and_mask(image_pre_gt, damage_target, class_colors)
+ image_post_gt = overlay_image_and_mask(image_post_gt, damage_target, class_colors)
+ damage_predictions = to_numpy(output[INPUT_MASK_KEY][i]).argmax(axis=0)
+ image_pre = overlay_image_and_mask(image_pre, damage_predictions, class_colors)
+ image_post = overlay_image_and_mask(image_post, damage_predictions, class_colors)
+ overlay_gt = np.column_stack([image_pre_gt, image_post_gt])
+ overlay = np.column_stack([image_pre, image_post])
+ overlay = np.row_stack([overlay_gt, overlay])
+ overlay = longest_max_size(overlay, 1024, cv2.INTER_LINEAR)
+ cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250))
+ images.append(overlay)
+ if len(images) >= max_images:
+ break
+ return images
+def draw_predictions_dual(
+ input: dict,
+ output: dict,
+ image_id_key="image_id",
+ mean=(0.485, 0.456, 0.406),
+ std=(0.229, 0.224, 0.225),
+ class_colors=[
+ (0, 0, 0), # 0=background
+ (0, 255, 0), # no damage (or just 'building' for localization) (green)
+ (255, 255, 0), # minor damage (yellow)
+ (255, 128, 0), # major damage (red)
+ (255, 0, 0), # destroyed (red)
+ ],
+ images = []
+ num_images = len(input[image_id_key])
+ for i, image_id in enumerate(range(num_images)):
+ image_pre = rgb_image_from_tensor(input[INPUT_IMAGE_PRE_KEY][i], mean, std)
+ image_pre = cv2.cvtColor(image_pre, cv2.COLOR_RGB2BGR)
+ image_post = rgb_image_from_tensor(input[INPUT_IMAGE_POST_KEY][i], mean, std)
+ image_post = cv2.cvtColor(image_post, cv2.COLOR_RGB2BGR)
+ image_pre_gt = image_pre.copy()
+ image_post_gt = image_post.copy()
+ localization_target = to_numpy(input[INPUT_MASK_PRE_KEY][i].squeeze(0))
+ damage_target = to_numpy(input[INPUT_MASK_POST_KEY][i])
+ image_pre_gt = overlay_image_and_mask(image_pre_gt, localization_target, class_colors)
+ image_post_gt = overlay_image_and_mask(image_post_gt, damage_target, class_colors)
+ localization_predictions = to_numpy(output[OUTPUT_MASK_PRE_KEY][i].squeeze(0).sigmoid() > 0.5).astype(np.uint8)
+ damage_predictions = to_numpy(output[OUTPUT_MASK_POST_KEY][i]).argmax(axis=0)
+ image_pre = overlay_image_and_mask(image_pre, localization_predictions, class_colors)
+ image_post = overlay_image_and_mask(image_post, damage_predictions, class_colors)
+ overlay_gt = np.column_stack([image_pre_gt, image_post_gt])
+ overlay = np.column_stack([image_pre, image_post])
+ overlay = np.row_stack([overlay_gt, overlay])
+ overlay = longest_max_size(overlay, 1024, cv2.INTER_LINEAR)
+ cv2.putText(overlay, str(image_id), (10, 15), cv2.FONT_HERSHEY_PLAIN, 1, (250, 250, 250))
+ images.append(overlay)
+ return images
diff --git a/xview/xview2_metrics.py b/xview/xview2_metrics.py
new file mode 100644
index 0000000..4dc3ef3
--- /dev/null
+++ b/xview/xview2_metrics.py
@@ -0,0 +1,297 @@
+# xview2 metrics
+# the total score is calculated a weighted average of the localization f1 score (lf1) and the damage f1 score (df1)
+# score = .3 * lf1 + .7 * df1
+# the df1 is calculated by taking the harmonic mean of the 4 damage f1 scores (no damage, minor damage, major damage, and destroyed)
+# df1 = 4 / sum((f1+epsilon)**-1 for f1 in [no_damage_f1, minor_damage_f1, major_damage_f1, destroyed_f1]), where epsilon = 1e-6
+# Abbreviations used in this file:
+# l: localization
+# d: damage
+# p: prediction
+# t: target (ground truth)
+# x: usually a numpy array
+import os, json
+import numpy as np
+import pandas as pd
+from multiprocessing import Pool, cpu_count
+from pathlib import Path
+from PIL import Image
+from typing import Union, List
+class PathHandler:
+ def __init__(self, pred_dir: Path, targ_dir: Path, img_id: str, test_hold: str):
+ """
+ Args:
+ pred_dir (Path): directory of localization and damage predictions
+ targ_dir (Path): directory of localization and damage targets
+ img_id (str) : 5 digit string of image id
+ test_hold (str) : either 'test' or 'hold'. Most likely 'test' unless you have access to holdout set
+ """
+ assert isinstance(pred_dir, Path), f"pred_dir should be of type Path, got {type(pred_dir)}"
+ assert pred_dir.is_dir(), f"Directory '{pred_dir}' does not exist or is not a directory"
+ assert isinstance(targ_dir, Path), f"targ_dir '{targ_dir}' should be of type Path, got {type(pred_dir)}"
+ assert targ_dir.is_dir(), f"Directory '{targ_dir}' does not exist or is not a directory"
+ assert test_hold in ["test", "hold"], f"test_hold '{test_hold}' was not one of 'test' or 'hold'"
+ self.lp = pred_dir / f"{test_hold}_localization_{img_id}_prediction.png" # localization prediction
+ self.dp = pred_dir / f"{test_hold}_damage_{img_id}_prediction.png" # damage prediction
+ self.lt = targ_dir / f"{test_hold}_localization_{img_id}_target.png" # localization target
+ self.dt = targ_dir / f"{test_hold}_damage_{img_id}_target.png" # damage target
+ self.paths = (self.lp, self.dp, self.lt, self.dt)
+ def load_and_validate_image(self, path):
+ assert path.is_file(), f"file '{path}' does not exist or is not a file"
+ img = np.array(Image.open(path))
+ assert img.dtype == np.uint8, f"{path.name} is of wrong format {img.dtype} - should be np.uint8"
+ assert set(np.unique(img)) <= {0, 1, 2, 3, 4}, f"values must ints 0-4, found {np.unique(img)}, path: {path}"
+ assert img.shape == (1024, 1024), f"{path} must be a 1024x1024 image"
+ return img
+ def load_images(self):
+ return [self.load_and_validate_image(path) for path in self.paths]
+class RowPairCalculator:
+ """
+ Contains all the information and functions necessary to calculate the true positives (TPs),
+ false negatives (FNs), and false positives (FPs), for a pair of localization/damage predictions
+ """
+ @staticmethod
+ def extract_buildings(x: np.ndarray):
+ """ Returns a mask of the buildings in x """
+ buildings = x.copy()
+ buildings[x > 0] = 1
+ return buildings
+ @staticmethod
+ def compute_tp_fn_fp(pred: np.ndarray, targ: np.ndarray, c: int) -> List[int]:
+ """
+ Computes the number of TPs, FNs, FPs, between a prediction (x) and a target (y) for the desired class (c)
+ Args:
+ pred (np.ndarray): prediction
+ targ (np.ndarray): target
+ c (int): positive class
+ """
+ TP = np.logical_and(pred == c, targ == c).sum()
+ FN = np.logical_and(pred != c, targ == c).sum()
+ FP = np.logical_and(pred == c, targ != c).sum()
+ return [TP, FN, FP]
+ @classmethod
+ def get_row_pair(cls, ph: PathHandler):
+ """
+ Builds a row of TPs, FNs, and FPs for both the localization dataframe and the damage dataframe.
+ This pair of rows are built in the same function as damages are only assessed where buildings are predicted.
+ Args:
+ ph (PathHandler): used to load the required prediction and target images
+ """
+ lp, dp, lt, dt = ph.load_images()
+ lp_b, lt_b, dt_b = map(cls.extract_buildings, (lp, lt, dt)) # convert all damage scores 1-4 to 1
+ dp = dp * lp_b # only give credit to damages where buildings are predicted
+ dp, dt = dp[dt_b == 1], dt[dt_b == 1] # only score damage where there exist buildings in target damage
+ lrow = cls.compute_tp_fn_fp(lp_b, lt_b, 1)
+ drow = []
+ for i in range(1, 5):
+ drow += cls.compute_tp_fn_fp(dp, dt, i)
+ return lrow, drow
+class F1Recorder:
+ """
+ Records the precision and recall when calculating the f1 score.
+ Read about the f1 score here: https://en.wikipedia.org/wiki/F1_score
+ """
+ def __init__(self, TP, FP, FN, name=""):
+ """
+ Args:
+ TP (int): true positives
+ FP (int): false positives
+ FN (int): false negatives
+ name (str): optional name when printing
+ """
+ self.TP, self.FN, self.FP, self.name = TP, FN, FP, name
+ self.P = self.precision()
+ self.R = self.recall()
+ self.f1 = self.f1()
+ def __repr__(self):
+ return f"{self.name} | f1: {self.f1:.4f}, precision: {self.P:.4f}, recall: {self.R:.4f}"
+ def precision(self):
+ """ calculates the precision using the true positives (self.TP) and false positives (self.FP)"""
+ assert self.TP >= 0 and self.FP >= 0
+ if self.TP == 0:
+ return 0
+ else:
+ return self.TP / (self.TP + self.FP)
+ def recall(self):
+ """ calculates recall using the true positives (self.TP) and false negatives (self.FN) """
+ assert self.TP >= 0 and self.FN >= 0
+ if self.TP == 0:
+ return 0
+ return self.TP / (self.TP + self.FN)
+ def f1(self):
+ """ calculates the f1 score using precision (self.P) and recall (self.R) """
+ assert 0 <= self.P <= 1 and 0 <= self.R <= 1
+ if self.P == 0 or self.R == 0:
+ return 0
+ return (2 * self.P * self.R) / (self.P + self.R)
+class XviewMetrics:
+ """
+ Calculates the xview2 metrics given a directory of predictions and a directory of targets
+ Directory of predictions and directory of targets must be two separate directories. These
+ could be structured as followed:
+ .
+ ├── predictions
+ │ ├── test_damage_00000_prediction.png
+ │ ├── test_damage_00001_prediction.png
+ │ ├── test_localization_00000_prediction.png
+ │ ├── test_localization_00001_prediction.png
+ │ └── ...
+ └── targets
+ ├── test_damage_00000_target.png
+ ├── test_damage_00001_target.png
+ ├── test_localization_00000_target.png
+ ├── test_localization_00001_target.png
+ └── ...
+ """
+ def __init__(self, pred_dir, targ_dir):
+ self.pred_dir, self.targ_dir = Path(pred_dir), Path(targ_dir)
+ assert self.pred_dir.is_dir(), f"Could not find prediction directory: '{pred_dir}'"
+ assert self.targ_dir.is_dir(), f"Could not find target directory: '{targ_dir}'"
+ self.dmg2str = {
+ 1: f"No damage (1) ",
+ 2: f"Minor damage (2) ",
+ 3: f"Major damage (3) ",
+ 4: f"Destroyed (4) ",
+ }
+ self.get_path_handlers()
+ self.get_dfs()
+ self.get_lf1r()
+ self.get_df1rs()
+ def __repr__(self):
+ s = "Localization:\n"
+ s += f" {self.lf1r}\n"
+ s += "\nDamage:\n"
+ for F1Rec in self.df1rs:
+ s += f" {F1Rec}\n"
+ s += f" Harmonic mean dmgs | f1: {self.df1:.4f}\n"
+ s += "\nScore:\n"
+ s += f" Score | f1: {self.score:.4f}\n"
+ return s.rstrip()
+ def get_path_handlers(self):
+ self.path_handlers = []
+ for path in self.targ_dir.glob("*.png"):
+ test_hold, loc_dmg, img_id, target = path.name.rstrip(".png").split("_")
+ assert loc_dmg in [
+ "localization",
+ "damage",
+ ], f"target filenames must have 'localization' or 'damage' in filename, got {path}"
+ assert target == "target", f"{target} should equal 'target' when getting path handlers"
+ if loc_dmg == "localization": # localization or damage is fine here
+ self.path_handlers.append(PathHandler(self.pred_dir, self.targ_dir, img_id, test_hold))
+ def get_dfs(self):
+ """
+ builds the localization dataframe (self.ldf) and damage dataframe (self.ddf) from
+ path handlers (self.path_handlers)
+ """
+ with Pool() as p:
+ all_rows = p.map(RowPairCalculator.get_row_pair, self.path_handlers)
+ lcolumns = ["lTP", "lFN", "lFP"]
+ self.ldf = pd.DataFrame([lrow for lrow, drow in all_rows], columns=lcolumns)
+ dcolumns = ["dTP1", "dFN1", "dFP1", "dTP2", "dFN2", "dFP2", "dTP3", "dFN3", "dFP3", "dTP4", "dFN4", "dFP4"]
+ self.ddf = pd.DataFrame([drow for lrow, drow in all_rows], columns=dcolumns)
+ def get_lf1r(self):
+ """ localization f1 recorder """
+ TP = self.ldf["lTP"].sum()
+ FP = self.ldf["lFP"].sum()
+ FN = self.ldf["lFN"].sum()
+ self.lf1r = F1Recorder(TP, FP, FN, "Buildings")
+ @property
+ def lf1(self):
+ """ localization f1 """
+ return self.lf1r.f1
+ def get_df1rs(self):
+ """ damage f1 recorders """
+ self.df1rs = []
+ for i in range(1, 5):
+ TP = self.ddf[f"dTP{i}"].sum()
+ FP = self.ddf[f"dFP{i}"].sum()
+ FN = self.ddf[f"dFN{i}"].sum()
+ self.df1rs.append(F1Recorder(TP, FP, FN, self.dmg2str[i]))
+ @property
+ def df1s(self):
+ """ damage f1s """
+ return [F1.f1 for F1 in self.df1rs]
+ @property
+ def df1(self):
+ """ damage f1. Computed using harmonic mean of damage f1s """
+ harmonic_mean = lambda xs: len(xs) / sum((x + 1e-6) ** -1 for x in xs)
+ return harmonic_mean(self.df1s)
+ @property
+ def score(self):
+ """ xview2 score computed as a weighted average of the localization f1 and damage f1 """
+ return 0.3 * self.lf1 + 0.7 * self.df1
+ @classmethod
+ def compute_score(cls, pred_dir, targ_dir, out_fp):
+ """
+ Args:
+ pred_dir (str): directory of localization and damage predictions
+ targ_dir (str): directory of localization and damage targets
+ out_fp (str): output json - folder must already exist
+ """
+ print(f"Calculating metrics using {cpu_count()} cpus...")
+ self = cls(pred_dir, targ_dir)
+ d = {"score": self.score, "damage_f1": self.df1, "localization_f1": self.lf1}
+ d["damage_f1_no_damage"] = self.df1s[0]
+ d["damage_f1_minor_damage"] = self.df1s[1]
+ d["damage_f1_major_damage"] = self.df1s[2]
+ d["damage_f1_destroyed"] = self.df1s[3]
+ with open(out_fp, "w") as f:
+ json.dump(d, f)
+ print(f"Wrote metrics to {out_fp}")
+if __name__ == "__main__":
+ import fire
+ fire.Fire(XviewMetrics.compute_score)