Initial commit

BloodAxe · Feb 25, 2020 · f2e062e · f2e062e
commit f2e062e
Show file tree

Hide file tree

Showing 64 changed files with 44,533 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,11 @@
+runs/
+pretrain/
+models/
+models_old/
+tests/
+predictions/
+predictions_docker/
+test_images/
+tests/
+old/
+*.cmd
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,110 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.idea/
+runs/
+/xview2.zip
+models/
+models_old/
+pretrain/
diff --git a/Dockerfile-pytorch14-37 b/Dockerfile-pytorch14-37
@@ -0,0 +1,26 @@
+FROM python:3.7
+
+RUN apt-get update && apt-get install -y libgeos-dev
+
+RUN mkdir /xView2
+WORKDIR /xView2
+
+ADD requirements_docker_pytorch14.txt /xView2
+RUN pip3 install -r requirements_docker_pytorch14.txt
+
+ADD . /xView2
+
+RUN mkdir -p /input
+RUN mkdir -p /output
+
+# https://github.com/pytorch/pytorch/issues/27971
+ENV LRU_CACHE_CAPACITY 1
+
+# set environment variables
+# Prevents Python from writing pyc files to disc
+ENV PYTHONDONTWRITEBYTECODE 1
+
+# Prevents Python from buffering stdout and stderr
+ENV PYTHONUNBUFFERED 1
+
+ENTRYPOINT ["python3", "docker_submission_37.py"]
diff --git a/README.md b/README.md
@@ -0,0 +1,20 @@
+# 3rd place solution for xView2 Damage Assessment Challenge
+
+Eugene Khvedchenya, February 2020
+
+This repository contains source code for my solution to [xView2 challenge](https://xview2.com). My solution was scored second (0.803) on public LB and third (0.807) on private hold-out dataset.
+
+# Approach in a nutshell
+
+- Ensemble of semantic segmentation models. 
+- Trained with weighted CE to address class imbalance.
+- Heavy augmentations to prevent over-fitting and increase robustness to misalignment of pre- and post- images. 
+- Shared encoder for pre- and post- images. Extracted feature are concatenated and sent to decoder. 
+- Bunch of encoders (ResNets, Densenets, EfficientNets) and two decoders: Unet and FPN.
+- 1 round of Pseudolabeling
+- Ensemble using weighted averaging. Weights optimized for every model on corresponding validation data.
+
+# Training
+
+- Install dependencies from `requirements.txt`
+- Follow `train.sh` 
diff --git a/black.toml b/black.toml
@@ -0,0 +1,25 @@
+# Example configuration for Black.
+
+# NOTE: you have to use single-quoted strings in TOML for regular expressions.
+# It's the equivalent of r-strings in Python.  Multiline strings are treated as
+# verbose regular expressions by Black.  Use [ ] to denote a significant space
+# character.
+
+[tool.black]
+line-length = 119
+target-version = ['py35', 'py36', 'py37', 'py38']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+)/
+'''
diff --git a/build_push_docker_37.cmd b/build_push_docker_37.cmd
@@ -0,0 +1,5 @@
+set mydate=%date:~10,4%%date:~4,2%%date:~7,2%
+
+docker build -t xview2:37_pytorch14 -f Dockerfile-pytorch14-37 .
+docker tag xview2:37_pytorch14 ekhvedchenya/xview2:37_pytorch14_%mydate%
+START docker push ekhvedchenya/xview2:37_pytorch14_%mydate%
diff --git a/convert_crops.py b/convert_crops.py
@@ -0,0 +1,116 @@
+import argparse
+import os
+
+import cv2
+from skimage.measure import label
+from tqdm import tqdm
+import pandas as pd
+
+from pytorch_toolbelt.utils import fs
+import numpy as np
+
+from xview.dataset import make_dual_dataframe, read_image
+from xview.utils.inference_image_output import create_inference_image, open_json, create_instance_image
+from PIL import Image
+
+
+def bbox1(img):
+    a = np.where(img != 0)
+    bbox = np.min(a[0]), np.max(a[0]) + 1, np.min(a[1]), np.max(a[1]) + 1
+    return bbox
+
+
+def convert_dir(df: pd.DataFrame, dir) -> pd.DataFrame:
+    crops_dir = os.path.join(dir, "crops")
+    os.makedirs(crops_dir, exist_ok=True)
+
+    building_crops = []
+
+    global_crop_index = 0
+
+    for i, row in tqdm(df.iterrows(), total=len(df)):
+        image_fname_pre = read_image(os.path.join(dir, row["image_fname_pre"]))
+        image_fname_post = read_image(os.path.join(dir, row["image_fname_post"]))
+
+        mask_fname_post = row["mask_fname_post"]
+        json_fname_post = fs.change_extension(mask_fname_post.replace("masks", "labels"), ".json")
+        inference_data = open_json(os.path.join(dir, json_fname_post))
+        instance_image, labels = create_instance_image(inference_data)
+
+        for label_index, damage_label in zip(range(1, instance_image.max() + 1), labels):
+            try:
+                instance_mask = instance_image == label_index
+                rmin, rmax, cmin, cmax = bbox1(instance_mask)
+
+                max_size = max(rmax - rmin, cmax - cmin)
+                if max_size < 16:
+                    print(
+                        "Skipping crop since it's too small",
+                        fs.id_from_fname(mask_fname_post),
+                        "label_index",
+                        label_index,
+                        "min_size",
+                        max_size
+                    )
+                    continue
+
+                rpadding = (rmax - rmin) // 4
+                cpadding = (cmax - cmin) // 4
+
+                pre_crop = image_fname_pre[
+                    max(0, rmin - rpadding) : rmax + rpadding, max(0, cmin - cpadding) : cmax + cpadding
+                ]
+                post_crop = image_fname_post[
+                    max(0, rmin - rpadding) : rmax + rpadding, max(0, cmin - cpadding) : cmax + cpadding
+                ]
+
+                image_id_pre = row["image_id_pre"]
+                image_id_post = row["image_id_post"]
+
+                pre_crop_fname = f"{global_crop_index:06}_{image_id_pre}.png"
+                post_crop_fname = f"{global_crop_index:06}_{image_id_post}.png"
+                global_crop_index += 1
+
+                cv2.imwrite(os.path.join(crops_dir, pre_crop_fname), pre_crop)
+                cv2.imwrite(os.path.join(crops_dir, post_crop_fname), post_crop)
+
+                building_crops.append(
+                    {
+                        "pre_crop_fname": pre_crop_fname,
+                        "post_crop": post_crop_fname,
+                        "label": damage_label,
+                        "event_name": row["event_name_post"],
+                        "fold": row["fold_post"],
+                        "rmin": rmin,
+                        "rmax": rmax,
+                        "cmin": cmin,
+                        "cmax": cmax,
+                        "max_size": max_size,
+                        "rpadding": rpadding,
+                        "cpadding": cpadding
+                    }
+                )
+            except Exception as e:
+                print(e)
+                print(mask_fname_post)
+
+    df = pd.DataFrame.from_records(building_crops)
+    return df
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-dd", "--data-dir", type=str, default="c:\\datasets\\xview2")
+    args = parser.parse_args()
+
+    data_dir = args.data_dir
+
+    df = pd.read_csv(os.path.join(data_dir, "train_folds.csv"))
+    df = make_dual_dataframe(df)
+
+    df_crops = convert_dir(df, data_dir)
+    df_crops.to_csv(os.path.join(data_dir, "train_crops.csv"), index=None)
+
+
+if __name__ == "__main__":
+    main()