-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathprepare_data.py
135 lines (113 loc) · 3.89 KB
/
prepare_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import sys
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from zipfile import ZipFile, is_zipfile
import wget
sys.path.append(".")
from paths import DATA_FOLDER
# if len(sys.argv) == 1:
basepath = str(DATA_FOLDER)
# else:
# basepath = sys.argv[1]
print("Looking for files in", basepath, "\n")
zenodo_doi = "14804380"
zenodo_record = f"https://zenodo.org/records/{zenodo_doi}"
zenodo_file_api = f"https://zenodo.org/api/records/{zenodo_doi}/files-archive"
zenodo_individual_files_api = f"https://zenodo.org/records/{zenodo_doi}/files"
zipfiles = [
"charge_noise.zip",
"converted_data.zip",
"cut_loop_A.zip",
"cut_loop_B.zip",
"dot_tuneup_A1.zip",
"injector.zip",
"mpr_A1.zip",
"mpr_A2.zip",
"mpr_B1.zip",
"qdmzm_A1.zip",
"qpp.zip",
"simulated.zip",
"tgp2_tuneup.zip",
"thermometry.zip",
"trivial_A.zip",
"trivial_B.zip",
]
minimal_files = [
"converted_data.zip",
"simulated.zip",
"tgp2_tuneup.zip",
"charge_noise.zip",
"dot_tuneup_A1.zip",
]
def process_file(filename):
full_path = os.path.join(basepath, filename)
if not is_zipfile(full_path):
if filename in minimal_files:
print(
f"Cannot find {full_path} required for reproducing of the paper figures"
)
else:
print(
f"Skip optional {full_path}, needed for reproducing of Cq converted data"
)
else:
print(f"Unpacking {full_path}")
with ZipFile(full_path, "r") as myzip:
myzip.extractall(path=basepath)
if __name__ == "__main__":
download, minimal_dataset = None, None
if "--download-all" in sys.argv:
download = True
minimal_dataset = "n"
if "--download-minimal" in sys.argv:
download = True
minimal_dataset = "Y"
if download is None:
to_download = input(
f"""To run the code in this repo and reproduce the paper figures, you
will need access to the measurement and simulation data. You can do so
by downloading it from {zenodo_record} and placing
it in a folder named data (total path should be parity-readout/data).
Do you want to download the data using this script (this will query
{zenodo_file_api})? If you have
already downloaded the data, you can skip this step.
[Y/n] """
)
if to_download == "Y":
download = True
elif to_download == "n":
download = False
else:
raise ValueError("Answer to download can be either Y or n.")
if download:
Path(basepath).mkdir(exist_ok=True)
if minimal_dataset is None:
minimal_dataset = input(
f"""\n\nDo you want to download the minimal datasets required reproduce the paper (~6GB)?
Answer yes (Y) to download {minimal_files} and no (n)
to download all datasets (~150Gb+).
[Y/n] """
)
if minimal_dataset == "Y":
for file_name in minimal_files:
print("\nDownloading", file_name)
_ = wget.download(
f"{zenodo_individual_files_api}/{file_name}",
out=f"{basepath}/{file_name}",
)
elif minimal_dataset == "n":
for file_name in zipfiles:
if not is_zipfile(f"{basepath}/{file_name}"):
print("\nDownloading", file_name)
_ = wget.download(
f"{zenodo_individual_files_api}/{file_name}",
out=f"{basepath}/{file_name}",
)
else:
print(f"\nFound {file_name}, skipping download")
else:
raise ValueError("Answer to download minimal dataset can be either Y or n.")
print("\n\nExtracting files and setting up directory structure")
with ProcessPoolExecutor() as executor:
executor.map(process_file, zipfiles)