-
Notifications
You must be signed in to change notification settings - Fork 385
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HySpecNet-11k: add new dataset (#2410)
* HySpecNet-11k: add new dataset * Add docs for data module * Shorten filename to appease Windows git * Add more tests * Even shorter * Fix mypy * Clarify benchmark task
- Loading branch information
1 parent
e9c3c2a
commit 6baa00d
Showing
22 changed files
with
440 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
model: | ||
class_path: BYOLTask | ||
init_args: | ||
model: 'resnet18' | ||
in_channels: 202 | ||
data: | ||
class_path: HySpecNet11kDataModule | ||
init_args: | ||
batch_size: 2 | ||
dict_kwargs: | ||
root: 'tests/data/hyspecnet' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
model: | ||
class_path: MoCoTask | ||
init_args: | ||
model: 'resnet18' | ||
in_channels: 202 | ||
data: | ||
class_path: HySpecNet11kDataModule | ||
init_args: | ||
batch_size: 2 | ||
dict_kwargs: | ||
root: 'tests/data/hyspecnet' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
model: | ||
class_path: SimCLRTask | ||
init_args: | ||
model: 'resnet18' | ||
in_channels: 202 | ||
data: | ||
class_path: HySpecNet11kDataModule | ||
init_args: | ||
batch_size: 2 | ||
dict_kwargs: | ||
root: 'tests/data/hyspecnet' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import os | ||
import shutil | ||
|
||
import numpy as np | ||
import rasterio | ||
from rasterio import Affine | ||
from rasterio.crs import CRS | ||
|
||
SIZE = 32 | ||
DTYPE = 'int16' | ||
|
||
np.random.seed(0) | ||
|
||
# Tile name purposefully shortened to avoid Windows git filename length limit. | ||
tiles = ['ENMAP01_20221103T162438Z'] | ||
patches = ['Y01460273_X05670694', 'Y01460273_X06950822'] | ||
|
||
profile = { | ||
'driver': 'GTiff', | ||
'dtype': DTYPE, | ||
'nodata': -32768.0, | ||
'width': SIZE, | ||
'height': SIZE, | ||
'count': 224, | ||
'crs': CRS.from_epsg(32618), | ||
'transform': Affine(30.0, 0.0, 691845.0, 0.0, -30.0, 4561935.0), | ||
'blockysize': 3, | ||
'tiled': False, | ||
'compress': 'deflate', | ||
'interleave': 'band', | ||
} | ||
|
||
root = 'hyspecnet-11k' | ||
path = os.path.join(root, 'splits', 'easy') | ||
os.makedirs(path, exist_ok=True) | ||
for tile in tiles: | ||
for patch in patches: | ||
# Split CSV | ||
path = os.path.join(tile, f'{tile}-{patch}', f'{tile}-{patch}-DATA.npy') | ||
for split in ['train', 'val', 'test']: | ||
with open(os.path.join(root, 'splits', 'easy', f'{split}.csv'), 'a+') as f: | ||
f.write(f'{path}\n') | ||
|
||
# Spectral image | ||
path = os.path.join(root, 'patches', path) | ||
os.makedirs(os.path.dirname(path), exist_ok=True) | ||
path = path.replace('DATA.npy', 'SPECTRAL_IMAGE.TIF') | ||
Z = np.random.randint( | ||
np.iinfo(DTYPE).min, np.iinfo(DTYPE).max, size=(SIZE, SIZE), dtype=DTYPE | ||
) | ||
with rasterio.open(path, 'w', **profile) as src: | ||
for i in range(1, profile['count'] + 1): | ||
src.write(Z, i) | ||
|
||
shutil.make_archive(f'{root}-01', 'gztar', '.', os.path.join(root, 'patches')) | ||
shutil.make_archive(f'{root}-splits', 'gztar', '.', os.path.join(root, 'splits')) |
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+491 KB
...2438Z-Y01460273_X05670694/ENMAP01_20221103T162438Z-Y01460273_X05670694-SPECTRAL_IMAGE.TIF
Binary file not shown.
Binary file added
BIN
+491 KB
...2438Z-Y01460273_X06950822/ENMAP01_20221103T162438Z-Y01460273_X06950822-SPECTRAL_IMAGE.TIF
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ENMAP01_20221103T162438Z/ENMAP01_20221103T162438Z-Y01460273_X05670694/ENMAP01_20221103T162438Z-Y01460273_X05670694-DATA.npy | ||
ENMAP01_20221103T162438Z/ENMAP01_20221103T162438Z-Y01460273_X06950822/ENMAP01_20221103T162438Z-Y01460273_X06950822-DATA.npy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ENMAP01_20221103T162438Z/ENMAP01_20221103T162438Z-Y01460273_X05670694/ENMAP01_20221103T162438Z-Y01460273_X05670694-DATA.npy | ||
ENMAP01_20221103T162438Z/ENMAP01_20221103T162438Z-Y01460273_X06950822/ENMAP01_20221103T162438Z-Y01460273_X06950822-DATA.npy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ENMAP01_20221103T162438Z/ENMAP01_20221103T162438Z-Y01460273_X05670694/ENMAP01_20221103T162438Z-Y01460273_X05670694-DATA.npy | ||
ENMAP01_20221103T162438Z/ENMAP01_20221103T162438Z-Y01460273_X06950822/ENMAP01_20221103T162438Z-Y01460273_X06950822-DATA.npy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
import glob | ||
import os | ||
import shutil | ||
from pathlib import Path | ||
|
||
import matplotlib.pyplot as plt | ||
import pytest | ||
import torch.nn as nn | ||
from pytest import MonkeyPatch | ||
from torch import Tensor | ||
|
||
from torchgeo.datasets import DatasetNotFoundError, HySpecNet11k, RGBBandsMissingError | ||
|
||
root = os.path.join('tests', 'data', 'hyspecnet') | ||
md5s = {'hyspecnet-11k-01.tar.gz': '', 'hyspecnet-11k-splits.tar.gz': ''} | ||
|
||
|
||
class TestHySpecNet11k: | ||
@pytest.fixture | ||
def dataset(self, monkeypatch: MonkeyPatch) -> HySpecNet11k: | ||
monkeypatch.setattr(HySpecNet11k, 'url', root + os.sep) | ||
monkeypatch.setattr(HySpecNet11k, 'md5s', md5s) | ||
transforms = nn.Identity() | ||
return HySpecNet11k(root, transforms=transforms) | ||
|
||
def test_getitem(self, dataset: HySpecNet11k) -> None: | ||
x = dataset[0] | ||
assert isinstance(x, dict) | ||
assert isinstance(x['image'], Tensor) | ||
|
||
def test_len(self, dataset: HySpecNet11k) -> None: | ||
assert len(dataset) == 2 | ||
|
||
def test_download(self, dataset: HySpecNet11k, tmp_path: Path) -> None: | ||
HySpecNet11k(tmp_path, download=True) | ||
|
||
def test_extract(self, dataset: HySpecNet11k, tmp_path: Path) -> None: | ||
for file in glob.iglob(os.path.join(root, '*.tar.gz')): | ||
shutil.copy(file, tmp_path) | ||
HySpecNet11k(tmp_path) | ||
|
||
def test_not_downloaded(self, tmp_path: Path) -> None: | ||
with pytest.raises(DatasetNotFoundError, match='Dataset not found'): | ||
HySpecNet11k(tmp_path) | ||
|
||
def test_plot(self, dataset: HySpecNet11k) -> None: | ||
x = dataset[0] | ||
dataset.plot(x, suptitle='Test') | ||
plt.close() | ||
|
||
def test_plot_rgb(self, dataset: HySpecNet11k) -> None: | ||
dataset = HySpecNet11k(root=dataset.root, bands=(1, 2, 3)) | ||
match = 'Dataset does not contain some of the RGB bands' | ||
with pytest.raises(RGBBandsMissingError, match=match): | ||
dataset.plot(dataset[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
"""HySpecNet datamodule.""" | ||
|
||
from typing import Any | ||
|
||
import torch | ||
|
||
from ..datasets import HySpecNet11k | ||
from .geo import NonGeoDataModule | ||
|
||
|
||
class HySpecNet11kDataModule(NonGeoDataModule): | ||
"""LightningDataModule implementation for the HySpecNet11k dataset. | ||
.. versionadded:: 0.7 | ||
""" | ||
|
||
# https://git.tu-berlin.de/rsim/hyspecnet-tools/-/blob/main/tif_to_npy.ipynb | ||
mean = torch.tensor(0) | ||
std = torch.tensor(10000) | ||
|
||
def __init__( | ||
self, batch_size: int = 64, num_workers: int = 0, **kwargs: Any | ||
) -> None: | ||
"""Initialize a new HySpecNet11kDataModule instance. | ||
Args: | ||
batch_size: Size of each mini-batch. | ||
num_workers: Number of workers for parallel data loading. | ||
**kwargs: Additional keyword arguments passed to | ||
:class:`~torchgeo.datasets.HySpecNet11k`. | ||
""" | ||
super().__init__(HySpecNet11k, batch_size, num_workers, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.