Skip to content

Commit

Permalink
Large Index Zenodo Remote Migration (#632)
Browse files Browse the repository at this point in the history
* setting up pyproject

* added support for scms

* updated checksum for scms

* migrated billboard

* added support for salami

* Added support for Mridangam_Stroke

* Added support for Mridangam_Stroke

* Added support for dali

* Added support for mtg_jamendo_autotagging_moodtheme

* fixed black changes

* Added support for freesound_one_shot

* Added support for irmas

* Added support for medley_solos_db

* Added support for openmic2018

* Added support for good_sounds

* Added support for idmt

* Fixed index for mtg

* Fixed index for mtg

* Added support for da_tacos

* Added more support and tests from soundata

* Support for raga compmusic

* added support for classicaldb

* added support for beatport

* Added support for saraga carnatic

* added support for maestro

* added support for tinysol

* added support for baf

* support for cipi

* added support for giantsteps tempo

* added support for groove midi

* Added support for ballroom

* Added support for giansteps key

* support for indian tonic

* added support for otmm makam

* added support for gtzan

* Fixed partial download for gtzan

* Fixes in download_util remot

* Fixes in download_util remot

* Fixes in conda version for ci

* updated doc string for core

* updated get_path func

* missing partial download in GTZAN

---------

Co-authored-by: Tanmay Khandelwal <[email protected]>
Co-authored-by: tanmayy24 <tk3309nyu.edu>
Co-authored-by: Genís Plaja-Roglans <[email protected]>
  • Loading branch information
3 people authored Nov 4, 2024
1 parent ba0a902 commit 9f797d2
Show file tree
Hide file tree
Showing 119 changed files with 1,334 additions and 2,673,262 deletions.
19 changes: 19 additions & 0 deletions mirdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,25 @@ def list_datasets():
return DATASETS


def list_dataset_versions(dataset_name):
"""List the available versions of a dataset
Returns:
list: a list of available versions
"""
if dataset_name not in DATASETS:
raise ValueError("Invalid dataset {}".format(dataset_name))
module = importlib.import_module("mirdata.datasets.{}".format(dataset_name))
return "Available versions for {}: {}. Default version: {}".format(
dataset_name,
[
x
for x in list(module.INDEXES.keys())
if x not in ["default", "sample", "test"]
],
module.INDEXES["default"],
)


def initialize(dataset_name, data_home=None, version="default"):
"""Load a mirdata dataset by name
Expand Down
33 changes: 11 additions & 22 deletions mirdata/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ def __init__(
Args:
data_home (str or None): path where mirdata will look for the dataset
version (str): dataset version
name (str or None): the identifier of the dataset
track_class (mirdata.core.Track or None): a Track class
multitrack_class (mirdata.core.Multitrack or None): a Multitrack class
bibtex (str or None): dataset citation/s in bibtex format
indexes (dict or None): indexes to be downloaded
remotes (dict or None): data to be downloaded
download_info (str or None): download instructions or caveats
license_info (str or None): license of the dataset
Expand All @@ -127,7 +129,7 @@ def __init__(
self.version = version

self._index_data = indexes[self.version]
self.index_path = self._index_data.get_path(self.data_home)
self.index_path = self._index_data.get_path()

self._track_class = track_class
self._multitrack_class = multitrack_class
Expand Down Expand Up @@ -784,20 +786,17 @@ def get_mix(self):

class Index(object):
"""Class for storing information about dataset indexes.
Args:
filename (str): The index filename (not path), e.g. "example_dataset_index_1.2.json"
url (str or None): None if index is not remote, or a url to download from
checksum (str or None): None if index is not remote, or the md5 checksum of the file
partial_download (list or None): if provided, specifies a subset of Dataset.remotes
corresponding to this index to be downloaded. If None, all Dataset.remotes will
be downloaded when calling Dataset.download()
Attributes:
remote (download_utils.RemoteFileMetadata or None): None if index is not remote, or
a RemoteFileMetadata object
partial_download (list or None): a list of keys to partially download, or None
"""

def __init__(
Expand All @@ -809,12 +808,17 @@ def __init__(
):
self.filename = filename
self.remote: Optional[download_utils.RemoteFileMetadata]
self.indexes_dir = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"datasets",
"indexes",
)
if url and checksum:
self.remote = download_utils.RemoteFileMetadata(
filename=filename,
url=url,
checksum=checksum,
destination_dir="mirdata_indexes",
destination_dir=self.indexes_dir,
)
elif url or checksum:
raise ValueError(
Expand All @@ -825,24 +829,9 @@ def __init__(

self.partial_download = partial_download

def get_path(self, data_home: str) -> str:
def get_path(self) -> str:
"""Get the absolute path to the index file
Args:
data_home (str): Path where the dataset's data lives
Returns:
str: absolute path to the index file
"""
# if the index is downloaded from remote, it is in the same folder
# as the data
if self.remote:
return os.path.join(data_home, "mirdata_indexes", self.filename)
# if the index is part of mirdata locally, it is in the indexes folder
# of the repository
else:
return os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"datasets/indexes",
self.filename,
)
return os.path.join(self.indexes_dir, self.filename)
9 changes: 7 additions & 2 deletions mirdata/datasets/baf.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,13 @@

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="baf_index_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="baf_index_1.0.json",
url="https://zenodo.org/records/13993303/files/baf_index_1.0.json?download=1",
checksum="6bc533ab686a7c8940873e4580d93563",
),
"sample": core.Index(filename="baf_index_1.0_sample.json"),
}

REMOTES = None
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/ballroom.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,13 @@

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="ballroom_full_index_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="ballroom_full_index_1.0.json",
url="https://zenodo.org/records/13993346/files/ballroom_full_index_1.0.json?download=1",
checksum="ca5a5c68e59c608ae8b73b23454d5707",
),
"sample": core.Index(filename="ballroom_full_index_1.0_sample.json"),
}

REMOTES = {
Expand Down
10 changes: 8 additions & 2 deletions mirdata/datasets/beatport_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,16 @@
author = {{\'A}ngel Faraldo}
}"""


INDEXES = {
"default": "1.0.0",
"test": "1.0.0",
"1.0.0": core.Index(filename="beatport_key_index_1.0.0.json"),
"test": "sample",
"1.0.0": core.Index(
filename="beatport_key_index_1.0.0.json",
url="https://zenodo.org/records/13993022/files/beatport_key_index_1.0.0.json?download=1",
checksum="71291eec1a4791259d05fd9281c5cfbf",
),
"sample": core.Index(filename="beatport_key_index_1.0.0_sample.json"),
}

REMOTES = {
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/billboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,13 @@

INDEXES = {
"default": "2.0",
"test": "2.0",
"2.0": core.Index(filename="billboard_index_2.0.json"),
"test": "sample",
"2.0": core.Index(
filename="billboard_index_2.0.json",
url="https://zenodo.org/records/13930536/files/billboard_index_2.0.json?download=1",
checksum="cafd738016a369550af23583e58a16c8",
),
"sample": core.Index(filename="billboard_index_2.0_sample.json"),
}

REMOTES = {
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/cipi.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,13 @@

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="cipi_index_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="cipi_index_1.0.json",
url="https://zenodo.org/records/13993323/files/cipi_index_1.0.json?download=1",
checksum="dfc4dad2f1089049f99bfc7f4dd2595e",
),
"sample": core.Index(filename="cipi_index_1.0_sample.json"),
}

LICENSE_INFO = (
Expand Down
10 changes: 7 additions & 3 deletions mirdata/datasets/compmusic_indian_tonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,15 @@
year = {2014}
}"""


INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="compmusic_indian_tonic_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="compmusic_indian_tonic_1.0.json",
url="https://zenodo.org/records/13993293/files/compmusic_indian_tonic_1.0.json?download=1",
checksum="67b1b25169bc7e5f7e2eb279197c08cc",
),
"sample": core.Index(filename="compmusic_indian_tonic_1.0_sample.json"),
}

REMOTES = {
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/compmusic_otmm_makam.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,14 @@

INDEXES = {
"default": "dlfm2016-fix1",
"test": "dlfm2016-fix1",
"test": "sample",
"dlfm2016-fix1": core.Index(
filename="compmusic_otmm_makam_index_dlfm2016-fix1.json"
filename="compmusic_otmm_makam_index_dlfm2016-fix1.json",
url="https://zenodo.org/records/13993317/files/compmusic_otmm_makam_index_dlfm2016-fix1.json?download=1",
checksum="4400d99c243a2f2d3748631abe05c311",
),
"sample": core.Index(
filename="compmusic_otmm_makam_index_dlfm2016-fix1_sample.json"
),
}

Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/compmusic_raga.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,13 @@

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="compmusic_raga_index_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="compmusic_raga_index_1.0.json",
url="https://zenodo.org/records/13993003/files/compmusic_raga_index_1.0.json?download=1",
checksum="f4b2c4d19169e35e76f3f161d6325341",
),
"sample": core.Index(filename="compmusic_raga_index_1.0_sample.json"),
}

REMOTES = {
Expand Down
12 changes: 10 additions & 2 deletions mirdata/datasets/da_tacos.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,22 @@
checksum="11371910cad7012daaa81a5fe9dfa1c0",
),
}

INDEXES = {
"default": "1.1_full",
"test": "1.1_full",
"test": "sample",
"1.1_crema": core.Index(
filename="da_tacos_index_1.1_crema.json",
url="https://zenodo.org/records/13930418/files/da_tacos_index_1.1_crema.json?download=1",
checksum="fd8fb8fce9ce64016f3039ab8aefe01a",
partial_download=["benchmark_crema", "coveranalysis_crema"],
),
"1.1_full": core.Index(filename="da_tacos_index_1.1_full.json"),
"1.1_full": core.Index(
filename="da_tacos_index_1.1_full.json",
url="https://zenodo.org/records/13916461/files/da_tacos_index_1.1_full.json?download=1",
checksum="27f5ee0367d0182b06a7b8eca6dce096",
),
"sample": core.Index(filename="da_tacos_index_1.1_full_sample.json"),
}


Expand Down
11 changes: 11 additions & 0 deletions mirdata/datasets/dali.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@
"1.0": core.Index(filename="dali_index_1.0.json"),
}

INDEXES = {
"default": "1.0",
"test": "sample",
"1.0": core.Index(
filename="dali_index_1.0.json",
url="https://zenodo.org/records/13930497/files/dali_index_1.0.json?download=1",
checksum="7091b6ce623aaa8a87351819f418a4ea",
),
"sample": core.Index(filename="dali_index_1.0_sample.json"),
}

REMOTES = {
"metadata": download_utils.RemoteFileMetadata(
filename="dali_metadata.json",
Expand Down
11 changes: 8 additions & 3 deletions mirdata/datasets/egfxset.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,14 @@
"""

INDEXES = {
"default": "1",
"test": "1",
"1": core.Index(filename="egfxset_index_1.json"),
"default": "1.0",
"test": "sample",
"1.0": core.Index(
filename="egfxset_index_1.0.json",
url="https://zenodo.org/records/13930501/files/egfxset_index_1.json?download=1",
checksum="c72222f93e03fce0f6135a60aefe5312",
),
"sample": core.Index(filename="egfxset_index_1.0_sample.json"),
}

REMOTES = {
Expand Down
11 changes: 9 additions & 2 deletions mirdata/datasets/freesound_one_shot_percussive_sounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,15 @@

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="freesound_one_shot_percussive_sounds_index_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="freesound_one_shot_percussive_sounds_index_1.0.json",
url="https://zenodo.org/records/13930469/files/freesound_one_shot_percussive_sounds_index_1.0.json?download=1",
checksum="5992d20ef9b2a9eadff0f7324d902003",
),
"sample": core.Index(
filename="freesound_one_shot_percussive_sounds_index_1.0_sample.json"
),
}

REMOTES = {
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/giantsteps_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,13 @@

INDEXES = {
"default": "+",
"test": "+",
"+": core.Index(filename="giantsteps_key_index_+.json"),
"test": "sample",
"+": core.Index(
filename="giantsteps_key_index_+.json",
url="https://zenodo.org/records/13993357/files/giantsteps_key_index_+.json?download=1",
checksum="abce33ea617809a0d534299b00412024",
),
"sample": core.Index(filename="giantsteps_key_index_+_sample.json"),
}

REMOTES = {
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/giantsteps_tempo.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,13 @@

INDEXES = {
"default": "2.0",
"test": "2.0",
"2.0": core.Index(filename="giantsteps_tempo_index_2.0.json"),
"test": "sample",
"2.0": core.Index(
filename="giantsteps_tempo_index_2.0.json",
url="https://zenodo.org/records/13993327/files/giantsteps_tempo_index_2.0.json?download=1",
checksum="92e8db769a01def442b6bb89b700afb8",
),
"sample": core.Index(filename="giantsteps_tempo_index_2.0_sample.json"),
}

REMOTES = {
Expand Down
10 changes: 8 additions & 2 deletions mirdata/datasets/good_sounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,16 @@
year={2015},
organization={Audio Engineering Society}
}"""

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="good_sounds_index_1.0.json"),
"test": "sample",
"1.0": core.Index(
filename="good_sounds_index_1.0.json",
url="https://zenodo.org/records/13916510/files/good_sounds_index_1.0.json?download=1",
checksum="9cda4e4ab46effbdfcc2be744d593d06",
),
"sample": core.Index(filename="good_sounds_index_1.0_sample.json"),
}
REMOTES = {
"packs": download_utils.RemoteFileMetadata(
Expand Down
9 changes: 7 additions & 2 deletions mirdata/datasets/groove_midi.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,13 @@

INDEXES = {
"default": "1.0.0",
"test": "1.0.0",
"1.0.0": core.Index(filename="groove_midi_index_1.0.0.json"),
"test": "sample",
"1.0.0": core.Index(
filename="groove_midi_index_1.0.0.json",
url="https://zenodo.org/records/13993337/files/groove_midi_index_1.0.0.json?download=1",
checksum="9ee6fd1b2f3d50570fc446d4b19814a3",
),
"sample": core.Index(filename="groove_midi_index_1.0.0_sample.json"),
}

REMOTES = {
Expand Down
Loading

0 comments on commit 9f797d2

Please sign in to comment.