Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Eugene Khvedchenya authored and BloodAxe committed Feb 25, 2020
0 parents commit f2e062e
Show file tree
Hide file tree
Showing 64 changed files with 44,533 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
runs/
pretrain/
models/
models_old/
tests/
predictions/
predictions_docker/
test_images/
tests/
old/
*.cmd
110 changes: 110 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.idea/
runs/
/xview2.zip
models/
models_old/
pretrain/
26 changes: 26 additions & 0 deletions Dockerfile-pytorch14-37
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM python:3.7

RUN apt-get update && apt-get install -y libgeos-dev

RUN mkdir /xView2
WORKDIR /xView2

ADD requirements_docker_pytorch14.txt /xView2
RUN pip3 install -r requirements_docker_pytorch14.txt

ADD . /xView2

RUN mkdir -p /input
RUN mkdir -p /output

# https://github.com/pytorch/pytorch/issues/27971
ENV LRU_CACHE_CAPACITY 1

# set environment variables
# Prevents Python from writing pyc files to disc
ENV PYTHONDONTWRITEBYTECODE 1

# Prevents Python from buffering stdout and stderr
ENV PYTHONUNBUFFERED 1

ENTRYPOINT ["python3", "docker_submission_37.py"]
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# 3rd place solution for xView2 Damage Assessment Challenge

Eugene Khvedchenya, February 2020

This repository contains source code for my solution to [xView2 challenge](https://xview2.com). My solution was scored second (0.803) on public LB and third (0.807) on private hold-out dataset.

# Approach in a nutshell

- Ensemble of semantic segmentation models.
- Trained with weighted CE to address class imbalance.
- Heavy augmentations to prevent over-fitting and increase robustness to misalignment of pre- and post- images.
- Shared encoder for pre- and post- images. Extracted feature are concatenated and sent to decoder.
- Bunch of encoders (ResNets, Densenets, EfficientNets) and two decoders: Unet and FPN.
- 1 round of Pseudolabeling
- Ensemble using weighted averaging. Weights optimized for every model on corresponding validation data.

# Training

- Install dependencies from `requirements.txt`
- Follow `train.sh`
25 changes: 25 additions & 0 deletions black.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Example configuration for Black.

# NOTE: you have to use single-quoted strings in TOML for regular expressions.
# It's the equivalent of r-strings in Python. Multiline strings are treated as
# verbose regular expressions by Black. Use [ ] to denote a significant space
# character.

[tool.black]
line-length = 119
target-version = ['py35', 'py36', 'py37', 'py38']
include = '\.pyi?$'
exclude = '''
/(
\.eggs
| \.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''
5 changes: 5 additions & 0 deletions build_push_docker_37.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set mydate=%date:~10,4%%date:~4,2%%date:~7,2%

docker build -t xview2:37_pytorch14 -f Dockerfile-pytorch14-37 .
docker tag xview2:37_pytorch14 ekhvedchenya/xview2:37_pytorch14_%mydate%
START docker push ekhvedchenya/xview2:37_pytorch14_%mydate%
116 changes: 116 additions & 0 deletions convert_crops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import argparse
import os

import cv2
from skimage.measure import label
from tqdm import tqdm
import pandas as pd

from pytorch_toolbelt.utils import fs
import numpy as np

from xview.dataset import make_dual_dataframe, read_image
from xview.utils.inference_image_output import create_inference_image, open_json, create_instance_image
from PIL import Image


def bbox1(img):
a = np.where(img != 0)
bbox = np.min(a[0]), np.max(a[0]) + 1, np.min(a[1]), np.max(a[1]) + 1
return bbox


def convert_dir(df: pd.DataFrame, dir) -> pd.DataFrame:
crops_dir = os.path.join(dir, "crops")
os.makedirs(crops_dir, exist_ok=True)

building_crops = []

global_crop_index = 0

for i, row in tqdm(df.iterrows(), total=len(df)):
image_fname_pre = read_image(os.path.join(dir, row["image_fname_pre"]))
image_fname_post = read_image(os.path.join(dir, row["image_fname_post"]))

mask_fname_post = row["mask_fname_post"]
json_fname_post = fs.change_extension(mask_fname_post.replace("masks", "labels"), ".json")
inference_data = open_json(os.path.join(dir, json_fname_post))
instance_image, labels = create_instance_image(inference_data)

for label_index, damage_label in zip(range(1, instance_image.max() + 1), labels):
try:
instance_mask = instance_image == label_index
rmin, rmax, cmin, cmax = bbox1(instance_mask)

max_size = max(rmax - rmin, cmax - cmin)
if max_size < 16:
print(
"Skipping crop since it's too small",
fs.id_from_fname(mask_fname_post),
"label_index",
label_index,
"min_size",
max_size
)
continue

rpadding = (rmax - rmin) // 4
cpadding = (cmax - cmin) // 4

pre_crop = image_fname_pre[
max(0, rmin - rpadding) : rmax + rpadding, max(0, cmin - cpadding) : cmax + cpadding
]
post_crop = image_fname_post[
max(0, rmin - rpadding) : rmax + rpadding, max(0, cmin - cpadding) : cmax + cpadding
]

image_id_pre = row["image_id_pre"]
image_id_post = row["image_id_post"]

pre_crop_fname = f"{global_crop_index:06}_{image_id_pre}.png"
post_crop_fname = f"{global_crop_index:06}_{image_id_post}.png"
global_crop_index += 1

cv2.imwrite(os.path.join(crops_dir, pre_crop_fname), pre_crop)
cv2.imwrite(os.path.join(crops_dir, post_crop_fname), post_crop)

building_crops.append(
{
"pre_crop_fname": pre_crop_fname,
"post_crop": post_crop_fname,
"label": damage_label,
"event_name": row["event_name_post"],
"fold": row["fold_post"],
"rmin": rmin,
"rmax": rmax,
"cmin": cmin,
"cmax": cmax,
"max_size": max_size,
"rpadding": rpadding,
"cpadding": cpadding
}
)
except Exception as e:
print(e)
print(mask_fname_post)

df = pd.DataFrame.from_records(building_crops)
return df


def main():
parser = argparse.ArgumentParser()
parser.add_argument("-dd", "--data-dir", type=str, default="c:\\datasets\\xview2")
args = parser.parse_args()

data_dir = args.data_dir

df = pd.read_csv(os.path.join(data_dir, "train_folds.csv"))
df = make_dual_dataframe(df)

df_crops = convert_dir(df, data_dir)
df_crops.to_csv(os.path.join(data_dir, "train_crops.csv"), index=None)


if __name__ == "__main__":
main()
Loading

0 comments on commit f2e062e

Please sign in to comment.