-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdcm2metadata.py
50 lines (43 loc) · 1.33 KB
/
dcm2metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
# coding: utf-8
import pydicom as dicom
import pandas as pd
from glob import glob
from pathlib import Path
from tqdm import tqdm
# adjusted from https://github.com/pydicom/pydicom/issues/319#issuecomment-282954803
def basedict(ds):
"""Turn a pydicom Dataset into a dict with keys derived from the Element tags.
Parameters
----------
ds : pydicom.dataset.Dataset
The Dataset to dictify
Returns
-------
output : dict
"""
output = dict()
for elem in ds:
key = elem.name
if elem.tag.is_private:
key = elem.name + str(elem.tag).replace(", ","")
# skip Pixel Data, CSA Series Header Info, CSA Image Header Info (potentially others)
if elem.VR.startswith("O"):
continue
if elem.VR != 'SQ':
output[key] = elem.value
else:
output[key] = [basedict(item) for item in elem]
return output
files = glob("../../data/kaggle/raw/*/*/*/study/sax_*/*.dcm")
metadata = list()
for f in tqdm(files):
d = dicom.read_file(f)
fd = basedict(d)
f = Path(f)
fd["file"] = str(f.name)
fd["dir"] = str(f.parent.name)
fd["pid"] = str(f.parent.parent.parent.name)
metadata.append(fd)
metadata_df = pd.DataFrame(metadata)
metadata_df.to_csv("dicom_metadata.tsv.xz",sep="\t",index=False)