diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
index b8f2137c920..c60b08f6666 100644
--- a/docs/api/datasets.rst
+++ b/docs/api/datasets.rst
@@ -354,6 +354,11 @@ MapInWild
.. autoclass:: MapInWild
+MDAS
+^^^^
+
+.. autoclass:: MDAS
+
Million-AID
^^^^^^^^^^^
diff --git a/docs/api/datasets/non_geo_datasets.csv b/docs/api/datasets/non_geo_datasets.csv
index 7d7a17a4b94..f91f6b0e967 100644
--- a/docs/api/datasets/non_geo_datasets.csv
+++ b/docs/api/datasets/non_geo_datasets.csv
@@ -29,6 +29,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`LEVIR-CD+`_,CD,Google Earth,-,985,2,"1,024x1,024",0.5,RGB
`LoveDA`_,S,Google Earth,"CC-BY-NC-SA-4.0","5,987",7,"1,024x1,024",0.3,RGB
`MapInWild`_,S,"Sentinel-1/2, ESA WorldCover, NOAA VIIRS DNB","CC-BY-4.0",1018,1,1920x1920,10--463.83,"SAR, MSI, 2020_Map, avg_rad"
+`MDAS`_,S,"Sentinel-1/2,EnMAP,HySpex","CC-BY-SA-4.0",3,20,"100x120, 300x360, 1364x1636, 10000x12000, 15000x18000",0.3--30,HSI
`Million-AID`_,C,Google Earth,-,1M,51--73,,0.5--153,RGB
`MMEarth`_,"C, S","Aster, Sentinel, ERA5","CC-BY-4.0","100K--1M",,"128x128 or 64x64",10,MSI
`NASA Marine Debris`_,OD,PlanetScope,"Apache-2.0",707,1,256x256,3,RGB
diff --git a/tests/data/mdas/Augsburg_data_4_publication.zip b/tests/data/mdas/Augsburg_data_4_publication.zip
new file mode 100644
index 00000000000..a4e00554127
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication.zip differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/3K_DSM_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/3K_DSM_sub_area1.tif
new file mode 100644
index 00000000000..13e7483a0be
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/3K_DSM_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/3K_RGB_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/3K_RGB_sub_area1.tif
new file mode 100644
index 00000000000..400085756b3
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/3K_RGB_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_EnMAP_10m_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_EnMAP_10m_sub_area1.tif
new file mode 100644
index 00000000000..96f0de07058
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_EnMAP_10m_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_EnMAP_30m_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_EnMAP_30m_sub_area1.tif
new file mode 100644
index 00000000000..30d07cd51ea
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_EnMAP_30m_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_Sentinel_2_10m_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_Sentinel_2_10m_sub_area1.tif
new file mode 100644
index 00000000000..5a50122b2c9
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/EeteS_Sentinel_2_10m_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/HySpex_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/HySpex_sub_area1.tif
new file mode 100644
index 00000000000..e0984b4f7c1
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/HySpex_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/Sentinel_1_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/Sentinel_1_sub_area1.tif
new file mode 100644
index 00000000000..834990c3e20
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/Sentinel_1_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/Sentinel_2_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/Sentinel_2_sub_area1.tif
new file mode 100644
index 00000000000..489e96791a2
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/Sentinel_2_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_buildings_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_buildings_sub_area1.tif
new file mode 100644
index 00000000000..0faaa47d73f
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_buildings_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_landuse_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_landuse_sub_area1.tif
new file mode 100644
index 00000000000..c80350418fc
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_landuse_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_water_sub_area1.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_water_sub_area1.tif
new file mode 100644
index 00000000000..8840f67a9aa
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_1/osm_water_sub_area1.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/3K_DSM_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/3K_DSM_sub_area2.tif
new file mode 100644
index 00000000000..313e5f9f6f4
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/3K_DSM_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/3K_RGB_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/3K_RGB_sub_area2.tif
new file mode 100644
index 00000000000..37cdaa7f26b
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/3K_RGB_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_EnMAP_10m_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_EnMAP_10m_sub_area2.tif
new file mode 100644
index 00000000000..7ac37f43f06
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_EnMAP_10m_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_EnMAP_30m_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_EnMAP_30m_sub_area2.tif
new file mode 100644
index 00000000000..349b28401ac
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_EnMAP_30m_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_Sentinel_2_10m_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_Sentinel_2_10m_sub_area2.tif
new file mode 100644
index 00000000000..fd0bcceda81
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/EeteS_Sentinel_2_10m_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/HySpex_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/HySpex_sub_area2.tif
new file mode 100644
index 00000000000..21e22212147
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/HySpex_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/Sentinel_1_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/Sentinel_1_sub_area2.tif
new file mode 100644
index 00000000000..a7ebb2b5b61
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/Sentinel_1_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/Sentinel_2_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/Sentinel_2_sub_area2.tif
new file mode 100644
index 00000000000..c4a390f88a8
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/Sentinel_2_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_buildings_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_buildings_sub_area2.tif
new file mode 100644
index 00000000000..711d872d705
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_buildings_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_landuse_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_landuse_sub_area2.tif
new file mode 100644
index 00000000000..8a3f1ca1692
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_landuse_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_water_sub_area2.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_water_sub_area2.tif
new file mode 100644
index 00000000000..9c88128b574
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_2/osm_water_sub_area2.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/3K_DSM_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/3K_DSM_sub_area3.tif
new file mode 100644
index 00000000000..e664307c2c4
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/3K_DSM_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/3K_RGB_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/3K_RGB_sub_area3.tif
new file mode 100644
index 00000000000..aa020dde3f1
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/3K_RGB_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_EnMAP_10m_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_EnMAP_10m_sub_area3.tif
new file mode 100644
index 00000000000..8f64503512d
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_EnMAP_10m_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_EnMAP_30m_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_EnMAP_30m_sub_area3.tif
new file mode 100644
index 00000000000..c30d66e3dd4
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_EnMAP_30m_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_Sentinel_2_10m_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_Sentinel_2_10m_sub_area3.tif
new file mode 100644
index 00000000000..2fd45d96c7f
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/EeteS_Sentinel_2_10m_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/HySpex_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/HySpex_sub_area3.tif
new file mode 100644
index 00000000000..36b6ce50d76
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/HySpex_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/Sentinel_1_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/Sentinel_1_sub_area3.tif
new file mode 100644
index 00000000000..5cfad510e84
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/Sentinel_1_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/Sentinel_2_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/Sentinel_2_sub_area3.tif
new file mode 100644
index 00000000000..3e80c170bf8
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/Sentinel_2_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_buildings_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_buildings_sub_area3.tif
new file mode 100644
index 00000000000..9861e5e07ee
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_buildings_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_landuse_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_landuse_sub_area3.tif
new file mode 100644
index 00000000000..3705837b37a
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_landuse_sub_area3.tif differ
diff --git a/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_water_sub_area3.tif b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_water_sub_area3.tif
new file mode 100644
index 00000000000..4bcb146bef6
Binary files /dev/null and b/tests/data/mdas/Augsburg_data_4_publication/sub_area_3/osm_water_sub_area3.tif differ
diff --git a/tests/data/mdas/data.py b/tests/data/mdas/data.py
new file mode 100644
index 00000000000..c82b54cb89a
--- /dev/null
+++ b/tests/data/mdas/data.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import hashlib
+import os
+import shutil
+
+import numpy as np
+import rasterio
+from rasterio.crs import CRS
+from rasterio.transform import from_origin
+
+# Set the random seed for reproducibility
+np.random.seed(0)
+
+# Define the root directory, dataset name, subareas, and modalities based on mdas.py
+root_dir = '.'
+ds_root_name = 'Augsburg_data_4_publication'
+subareas = ['sub_area_1', 'sub_area_2', 'sub_area_3']
+modalities = [
+ '3K_DSM',
+ '3K_RGB',
+ 'HySpex',
+ 'EeteS_EnMAP_10m',
+ 'EeteS_EnMAP_30m',
+ 'EeteS_Sentinel_2_10m',
+ 'Sentinel_1',
+ 'Sentinel_2',
+ 'osm_buildings',
+ 'osm_landuse',
+ 'osm_water',
+]
+
+landuse_class_codes = [
+ -2147483647, # no label
+ 7201, # forest
+ 7202, # park
+ 7203, # residential
+ 7204, # industrial
+ 7205, # farm
+ 7206, # cemetery
+ 7207, # allotments
+ 7208, # meadow
+ 7209, # commercial
+ 7210, # nature reserve
+ 7211, # recreation ground
+ 7212, # retail
+ 7213, # military
+ 7214, # quarry
+ 7215, # orchard
+ 7217, # scrub
+ 7218, # grass
+ 7219, # heath
+]
+
+# Remove existing dummy data if it exists
+dataset_path = os.path.join(root_dir, ds_root_name)
+if os.path.exists(dataset_path):
+ shutil.rmtree(dataset_path)
+
+
+def create_dummy_geotiff(
+ path: str,
+ num_bands: int = 3,
+ width: int = 32,
+ height: int = 32,
+ dtype: np.dtype = np.uint16,
+ binary: bool = False,
+ landuse: bool = False,
+) -> None:
+ """Create a dummy GeoTIFF file."""
+ crs = CRS.from_epsg(32632)
+ transform = from_origin(0, 0, 1, 1)
+
+ if binary:
+ data = np.random.randint(0, 2, size=(num_bands, height, width)).astype(dtype)
+ elif landuse:
+ num_pixels = num_bands * height * width
+ no_label_ratio = 0.1
+ num_no_label = int(no_label_ratio * num_pixels)
+ num_labels = num_pixels - num_no_label
+ landuse_values = np.random.choice(landuse_class_codes[1:], size=num_labels)
+ no_label_values = np.full(num_no_label, landuse_class_codes[0], dtype=dtype)
+ combined = np.concatenate([landuse_values, no_label_values])
+ np.random.shuffle(combined)
+ data = combined.reshape((num_bands, height, width)).astype(dtype)
+ else:
+ # Generate random data for other modalities
+ data = np.random.randint(0, 255, size=(num_bands, height, width)).astype(dtype)
+
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+
+ with rasterio.open(
+ path,
+ 'w',
+ driver='GTiff',
+ height=height,
+ width=width,
+ count=num_bands,
+ dtype=dtype,
+ crs=crs,
+ transform=transform,
+ ) as dst:
+ dst.write(data)
+
+
+# Create directory structure and dummy data
+for subarea in subareas:
+ # Format the subarea name for filenames, as in mdas.py _format_subarea method
+ parts = subarea.split('_')
+ subarea_formatted = parts[0] + '_' + parts[1] + parts[2] # e.g., 'sub_area1'
+
+ subarea_dir = os.path.join(root_dir, ds_root_name, subarea)
+
+ for modality in modalities:
+ filename = f'{modality}_{subarea_formatted}.tif'
+ file_path = os.path.join(subarea_dir, filename)
+
+ if modality in ['osm_buildings', 'osm_water']:
+ create_dummy_geotiff(file_path, num_bands=1, dtype=np.uint8, binary=True)
+ elif modality == 'osm_landuse':
+ create_dummy_geotiff(file_path, num_bands=1, dtype=np.float64, landuse=True)
+ elif modality == 'HySpex':
+ create_dummy_geotiff(file_path, num_bands=368, dtype=np.int16)
+ elif modality in ['EeteS_EnMAP_10m', 'EeteS_EnMAP_30m']:
+ create_dummy_geotiff(file_path, num_bands=242, dtype=np.uint16)
+ elif modality == 'Sentinel_1':
+ create_dummy_geotiff(file_path, num_bands=2, dtype=np.float32)
+ elif modality in ['Sentinel_2', 'EeteS_Sentinel_2_10m']:
+ create_dummy_geotiff(file_path, num_bands=13, dtype=np.uint16)
+ elif modality == '3K_DSM':
+ create_dummy_geotiff(file_path, num_bands=1, dtype=np.float32)
+ elif modality == '3K_RGB':
+ create_dummy_geotiff(file_path, num_bands=3, dtype=np.uint8)
+
+print(f'Dummy MDAS dataset created at {os.path.join(root_dir, ds_root_name)}')
+
+# Create a zip archive of the dataset directory
+zip_filename = f'{ds_root_name}.zip'
+zip_path = os.path.join(root_dir, zip_filename)
+
+shutil.make_archive(
+ base_name=os.path.splitext(zip_path)[0],
+ format='zip',
+ root_dir='.',
+ base_dir=ds_root_name,
+)
+
+
+def calculate_md5(filename: str) -> str:
+ hash_md5 = hashlib.md5()
+ with open(filename, 'rb') as f:
+ for chunk in iter(lambda: f.read(4096), b''):
+ hash_md5.update(chunk)
+ return hash_md5.hexdigest()
+
+
+checksum = calculate_md5(zip_path)
+print(f'MD5 checksum: {checksum}')
diff --git a/tests/datasets/test_mdas.py b/tests/datasets/test_mdas.py
new file mode 100644
index 00000000000..83138c84207
--- /dev/null
+++ b/tests/datasets/test_mdas.py
@@ -0,0 +1,113 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+import shutil
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pytest
+import torch
+import torch.nn as nn
+from _pytest.fixtures import SubRequest
+from pytest import MonkeyPatch
+
+from torchgeo.datasets import MDAS, DatasetNotFoundError
+
+
+class TestMDAS:
+ @pytest.fixture(
+ params=[
+ {'subareas': ['sub_area_1'], 'modalities': ['HySpex']},
+ {
+ 'subareas': ['sub_area_1', 'sub_area_2'],
+ 'modalities': ['3K_DSM', 'HySpex', 'osm_water'],
+ },
+ {
+ 'subareas': ['sub_area_2', 'sub_area_3'],
+ 'modalities': [
+ '3K_DSM',
+ '3K_RGB',
+ 'HySpex',
+ 'EeteS_EnMAP_10m',
+ 'EeteS_EnMAP_30m',
+ 'EeteS_Sentinel_2_10m',
+ 'Sentinel_2',
+ 'Sentinel_1',
+ 'osm_buildings',
+ 'osm_landuse',
+ 'osm_water',
+ ],
+ },
+ ]
+ )
+ def dataset(
+ self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
+ ) -> MDAS:
+ md5 = '99e1744ca6f19aa19a3aa23a2bbf7bef'
+ monkeypatch.setattr(MDAS, 'md5', md5)
+ url = os.path.join('tests', 'data', 'mdas', 'Augsburg_data_4_publication.zip')
+ monkeypatch.setattr(MDAS, 'url', url)
+
+ params = request.param
+ subareas = params['subareas']
+ modalities = params['modalities']
+
+ root = tmp_path
+ transforms = nn.Identity()
+
+ return MDAS(
+ root=root,
+ subareas=subareas,
+ modalities=modalities,
+ transforms=transforms,
+ download=True,
+ checksum=True,
+ )
+
+ def test_getitem(self, dataset: MDAS) -> None:
+ x = dataset[0]
+ assert isinstance(x, dict)
+ for key in dataset.modalities:
+ if key.startswith('osm'):
+ key = f'{key}_mask'
+ else:
+ key = f'{key}_image'
+ assert key in x
+
+ for key, value in x.items():
+ assert isinstance(value, torch.Tensor)
+
+ def test_len(self, dataset: MDAS) -> None:
+ assert len(dataset) == len(dataset.subareas)
+
+ def test_already_downloaded(self, dataset: MDAS) -> None:
+ MDAS(root=dataset.root)
+
+ def test_not_yet_extracted(self, tmp_path: Path) -> None:
+ filename = 'Augsburg_data_4_publication.zip'
+ dir = os.path.join('tests', 'data', 'mdas')
+ shutil.copyfile(
+ os.path.join(dir, filename), os.path.join(str(tmp_path), filename)
+ )
+ MDAS(root=str(tmp_path))
+
+ def test_invalid_subarea(self) -> None:
+ with pytest.raises(AssertionError):
+ MDAS(subareas=['foo'])
+
+ def test_invalid_modality(self) -> None:
+ with pytest.raises(AssertionError):
+ MDAS(modalities=['foo'])
+
+ def test_not_downloaded(self, tmp_path: Path) -> None:
+ with pytest.raises(DatasetNotFoundError, match='Dataset not found'):
+ MDAS(tmp_path)
+
+ def test_plot(self, dataset: MDAS) -> None:
+ dataset.plot(dataset[0], suptitle='Test')
+ plt.close()
+
+ def test_plot_single_sample(self, dataset: MDAS) -> None:
+ dataset.plot(dataset[0], show_titles=False)
+ plt.close()
diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
index f55ef3af22c..0e522c09976 100644
--- a/torchgeo/datasets/__init__.py
+++ b/torchgeo/datasets/__init__.py
@@ -85,6 +85,7 @@
from .levircd import LEVIRCD, LEVIRCDBase, LEVIRCDPlus
from .loveda import LoveDA
from .mapinwild import MapInWild
+from .mdas import MDAS
from .millionaid import MillionAID
from .mmearth import MMEarth
from .naip import NAIP
@@ -159,6 +160,7 @@
'GBIF',
'GID15',
'LEVIRCD',
+ 'MDAS',
'NAIP',
'NCCM',
'NLCD',
diff --git a/torchgeo/datasets/eurocrops.py b/torchgeo/datasets/eurocrops.py
index bac2b03552a..8832905c2d6 100644
--- a/torchgeo/datasets/eurocrops.py
+++ b/torchgeo/datasets/eurocrops.py
@@ -205,9 +205,6 @@ def get_label(self, feature: 'fiona.model.Feature') -> int:
# (Parent code is computed by replacing rightmost non-0 character with 0.)
hcat_code = feature['properties'][self.label_name]
if hcat_code is None:
- print(
- f"Feature does not contain the label '{self.label_name}'. Skip rendering."
- )
return 0
while True:
diff --git a/torchgeo/datasets/mdas.py b/torchgeo/datasets/mdas.py
new file mode 100644
index 00000000000..1ee020ab953
--- /dev/null
+++ b/torchgeo/datasets/mdas.py
@@ -0,0 +1,379 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""MDAS dataset."""
+
+import os
+from collections.abc import Callable
+from typing import Any, ClassVar
+
+import matplotlib.cm as cm
+import matplotlib.pyplot as plt
+import numpy as np
+import rasterio as rio
+import torch
+from matplotlib.colors import ListedColormap
+from matplotlib.figure import Figure
+from torch import Tensor
+
+from .errors import DatasetNotFoundError
+from .geo import NonGeoDataset
+from .utils import Path, download_and_extract_archive, extract_archive
+
+
+class MDAS(NonGeoDataset):
+ """MDAS dataset.
+
+ The `MDAS `__ multimodal dataset
+ is a comprehensive dataset for the city of Augsburg, Germany, collected on 7th May 2018.
+ It includes SAR, multispectral, hyperspectral, DSM, and GIS data,
+ providing comprehensive options for data fusion research.
+ MDAS supports applications like resolution enhancement, spectral unmixing, and land cover classification.
+
+ Dataset features:
+
+ * 3K DSM data
+ * 3K high resolution RGB images
+ * Original very high resolution HySpex airborne imagery
+ * EeteS simulated imagery with 10m GSD and EnMAP spectral bands
+ * EeteS simulated imagery with 30m GSD and EnMAP spectral bands
+ * EeteS simulated imagery with 10m GSD and Sentinel-2 spectral bands
+ * Sentinel-2 L2A product
+ * Sentinel-1 GRD product
+ * Open Street Map (OSM) labels, see `this table `__ for
+ a table of the label distribution
+
+ Dataset format:
+
+ * 3K_RGB.tif (Shape: (4, 15000, 18000)px, Data Type: uint8)
+ * 3K_dsm.tif (Shape: (1, 10000, 12000)px, Data Type: float32)
+ * HySpex.tif (Shape: (368, 1364, 1636)px, Data Type: int16)
+ * EeteS_EnMAP_2dot2m.tif (Shape: (242, 1364, 1636)px, Data Type: float32)
+ * EeteS_EnMAP_10m.tif (Shape: (242, 300, 360)px, Data Type: uint16)
+ * EeteS_EnMAP_30m.tif (Shape: (242, 100, 120)px, Data Type: uint16)
+ * EeteS_Sentinel_2_10m.tif (Shape: (4, 300, 360)px, Data Type: uint16)
+ * Sentinel_2.tif (Shape: (12, 300, 360)px, Data Type: uint16)
+ * Sentinel_1.tif (Shape: (2, 300, 360)px, Data Type: float32)
+ * osm_buildings.tif (Shape: (1, 1364, 1636)px, Data Type: uint8)
+ * osm_landuse.tif (Shape: (1, 1364, 1636)px, Data Type: float64)
+ * osm_water.tif (Shape: (1, 1364, 1636)px, Data Type: float64)
+
+ If you use this dataset in your research, please cite the following paper:
+
+ * https://essd.copernicus.org/articles/15/113/2023/
+
+ .. versionadded:: 0.7
+ """
+
+ valid_modalities = (
+ '3K_DSM',
+ '3K_RGB',
+ 'HySpex',
+ 'EeteS_EnMAP_10m',
+ 'EeteS_EnMAP_30m',
+ 'EeteS_Sentinel_2_10m',
+ 'Sentinel_2',
+ 'Sentinel_1',
+ 'osm_buildings',
+ 'osm_landuse',
+ 'osm_water',
+ )
+ landuse_class_names: ClassVar[dict[int, str]] = {
+ 0: 'no label',
+ 1: 'forest',
+ 2: 'park',
+ 3: 'residential',
+ 4: 'industrial',
+ 5: 'farm',
+ 6: 'cemetery',
+ 7: 'allotments',
+ 8: 'meadow',
+ 9: 'commercial',
+ 10: 'nature reserve',
+ 11: 'recreation ground',
+ 12: 'retail',
+ 13: 'military',
+ 14: 'quarry',
+ 15: 'orchard',
+ 16: 'scrub',
+ 17: 'grass',
+ 18: 'heath',
+ }
+
+ # https://github.com/zhu-xlab/augsburg_Multimodal_Data_Set_MDaS/blob/75c015022b5f688dfc44744f19bcf34bdce786c7/Augsburg_data_4_publication/entire_city/OSM_label/README#L14
+ landuse_mapping: ClassVar[dict[int, int]] = {
+ -2147483647: 0,
+ 7201: 1,
+ 7202: 2,
+ 7203: 3,
+ 7204: 4,
+ 7205: 5,
+ 7206: 6,
+ 7207: 7,
+ 7208: 8,
+ 7209: 9,
+ 7210: 10,
+ 7211: 11,
+ 7212: 12,
+ 7213: 13,
+ 7214: 14,
+ 7215: 15,
+ 7217: 16,
+ 7218: 17,
+ 7219: 18,
+ }
+
+ ds_root_name = 'Augsburg_data_4_publication'
+
+ zipfilename = f'{ds_root_name}.zip'
+
+ valid_subareas = ('sub_area_1', 'sub_area_2', 'sub_area_3')
+
+ url = 'https://huggingface.co/datasets/torchgeo/mdas/resolve/860226b74269f1cf1bed8ea3c03f571ae701144c/Augsburg_data_4_publication.zip'
+
+ md5 = '7b63c26e3717cb52c6ba47d215f18d5b'
+
+ enmap_rgb_band_idx: ClassVar[list[int]] = [43, 28, 10]
+ sentinel_2_rgb_band_idx: ClassVar[list[int]] = [3, 2, 1]
+ hyspex_rgb_band_idx: ClassVar[list[int]] = [100, 50, 10]
+
+ def __init__(
+ self,
+ root: Path = 'data',
+ subareas: list[str] = ['sub_area_1'],
+ modalities: list[str] = ['3K_RGB', 'HySpex', 'Sentinel_2'],
+ transforms: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+ download: bool = False,
+ checksum: bool = False,
+ ) -> None:
+ """Initialize a new MDAS dataset instance.
+
+ Args:
+ root: Root directory where the dataset should be stored.
+ subareas: The subareas to load. Options are 'sub_area_1', 'sub_area_2', 'sub_area_3'.
+ modalities: The modalities to load. Options are '3K_DSM', '3K_RGB', 'HySpex', 'EeteS_EnMAP_10m', 'EeteS_EnMAP_30m', 'EeteS_Sentinel_2_10m', 'Sentinel-2', 'Sentinel-1', 'OSM_label'.
+ transforms: A function/transform that takes in a dictionary and returns a transformed version.
+ download: if True, download dataset and store it in the root directory
+ checksum: If True, check the integrity of the dataset after download.
+
+ Raises:
+ AssertionError: If the subareas or modalities are not valid.
+ DatasetNotFoundError: If dataset is not found and *download* is False.
+ """
+ self.root = root
+ self.download = download
+ assert all(
+ sub in self.valid_subareas for sub in subareas
+ ), f'Subareas must be one of {self.valid_subareas}'
+ self.subareas = subareas
+ assert all(
+ mod in self.valid_modalities for mod in modalities
+ ), f'Modalities must be one of {self.valid_modalities}'
+ self.modalities = modalities
+ self.transforms = transforms
+ self.checksum = checksum
+
+ self._verify()
+ self.files = self._load_files()
+
+ def __len__(self) -> int:
+ """Return the number of samples in the dataset.
+
+ Returns:
+ the length of the dataset
+ """
+ return len(self.files)
+
+ def _load_files(self) -> list[dict[str, str]]:
+ """Return the paths of the files in the dataset.
+
+ Returns:
+ a list of dictionaries containing the paths of the files in the dataset
+ """
+ files = []
+ for subarea in self.subareas:
+ subarea_files = {}
+ for modality in self.modalities:
+ subarea_files[modality] = os.path.join(
+ self.root,
+ self.ds_root_name,
+ subarea,
+ f'{modality}_{self._format_subarea(subarea)}.tif',
+ )
+ files.append(subarea_files)
+ return files
+
+ def _format_subarea(self, subarea: str) -> str:
+ """Format the subarea name.
+
+ Args:
+ subarea: The subarea string to format.
+
+ Returns:
+ formatted subarea string for files
+ """
+ parts = subarea.split('_')
+ return parts[0] + '_' + parts[1] + parts[2]
+
+ def _load_image(self, path: Path) -> Tensor:
+ """Load an image from a given path.
+
+ Args:
+ path: The path to the image file
+
+ Returns:
+ the loaded image as a tensor
+ """
+ with rio.open(path) as src:
+ img = src.read()
+ if img.dtype == np.uint16:
+ img = img.astype(np.int32)
+ if 'osm_landuse' in str(path):
+ img = np.vectorize(self.landuse_mapping.get)(img)
+
+ return torch.from_numpy(img)
+
+ def __getitem__(self, idx: int) -> dict[str, Tensor]:
+ """Return the dataset sample at the given index.
+
+ Args:
+ idx: The index of the sample to return
+
+ Returns:
+ a dictionary containing the data of chosen modalities
+ """
+ sample_files = self.files[idx]
+ sample: dict[str, Any] = {}
+ for modality, path in sample_files.items():
+ if 'osm' in modality:
+ sample[f'{modality}_mask'] = self._load_image(path).long()
+ else:
+ sample[f'{modality}_image'] = self._load_image(path)
+
+ if self.transforms:
+ sample = self.transforms(sample)
+
+ return sample
+
+ def _verify(self) -> None:
+ """Verify the integrity of the dataset."""
+ # check if each desired modality file exists in specified subarea
+ exists = []
+ for subarea in self.subareas:
+ for modality in self.modalities:
+ path = os.path.join(
+ self.root,
+ self.ds_root_name,
+ subarea,
+ f'{modality}_{self._format_subarea(subarea)}.tif',
+ )
+ if not os.path.exists(path):
+ exists.append(False)
+ else:
+ exists.append(True)
+ if all(exists):
+ return
+
+ # check if zip file downloaded
+ if os.path.exists(os.path.join(self.root, self.zipfilename)):
+ self._extract()
+ return
+
+ if not self.download:
+ raise DatasetNotFoundError(self)
+
+ self._download()
+
+ def _extract(self) -> None:
+ """Extract the dataset."""
+ extract_archive(os.path.join(self.root, self.zipfilename), self.root)
+
+ def _download(self) -> None:
+ """Download the dataset."""
+ download_and_extract_archive(
+ self.url,
+ self.root,
+ filename=self.zipfilename,
+ md5=self.md5 if self.checksum else None,
+ )
+
+ def plot(
+ self,
+ sample: dict[str, Tensor],
+ show_titles: bool = True,
+ suptitle: str | None = None,
+ ) -> Figure:
+ """Plot a sample from the dataset.
+
+ Args:
+ sample: A sample returned by `__getitem__`.
+ show_titles: Whether to display titles on the subplots.
+ suptitle: An optional super title for the plot.
+
+ Returns:
+ a matplotlib Figure with the rendered sample
+ """
+ ncols = len(sample)
+ fig, axs = plt.subplots(1, ncols, figsize=(5 * ncols, 5))
+
+ if ncols == 1:
+ axs = [axs]
+
+ for idx, (key, data) in enumerate(sample.items()):
+ match key:
+ case '3K_RGB_image':
+ img = data[:3].numpy().transpose(1, 2, 0) / 255.0
+ axs[idx].imshow(img)
+ case '3K_DSM_image':
+ img = data.numpy().squeeze(0)
+ axs[idx].imshow(img, cmap='gray')
+ case 'EeteS_EnMAP_10m_image' | 'EeteS_EnMAP_30m_image':
+ img = (
+ data[self.enmap_rgb_band_idx].numpy().transpose(1, 2, 0)
+ / 10000.0
+ )
+ axs[idx].imshow(img)
+ case 'EeteS_Sentinel_2_10m_image':
+ img = (
+ data[self.sentinel_2_rgb_band_idx].numpy().transpose(1, 2, 0)
+ / 10000.0
+ )
+ axs[idx].imshow(img)
+ case 'Sentinel_1_image':
+ img = data[0].numpy().clip(0, 1)
+ axs[idx].imshow(img)
+ case 'Sentinel_2_image':
+ img = (
+ data[self.sentinel_2_rgb_band_idx].numpy().transpose(1, 2, 0)
+ / 10000.0
+ )
+ axs[idx].imshow(img)
+ case 'HySpex_image':
+ img = (
+ data[self.hyspex_rgb_band_idx].numpy().transpose(1, 2, 0)
+ / 15000.0
+ )
+ axs[idx].imshow(img)
+ case 'osm_landuse_mask':
+ img = data.numpy().squeeze(0)
+ cmap = ListedColormap([cm.get_cmap('tab20')(i) for i in range(20)])
+ im = axs[idx].imshow(img, cmap=cmap)
+ cbar = plt.colorbar(im, ax=axs[idx], ticks=range(19))
+ cbar.ax.set_yticklabels(
+ [self.landuse_class_names[i] for i in range(19)]
+ )
+ case 'osm_buildings_mask':
+ img = data.numpy().squeeze(0)
+ axs[idx].imshow(img, cmap='gray')
+ case 'osm_water_mask':
+ img = data.numpy().squeeze(0)
+ axs[idx].imshow(img, cmap='Blues')
+
+ axs[idx].axis('off')
+ if show_titles:
+ axs[idx].set_title(key)
+
+ if suptitle:
+ plt.suptitle(suptitle)
+
+ return fig