Initial Commit

PHAIR-Consortium · Jul 22, 2024 · bd31eb5 · bd31eb5
1 parent 4be9c5f
commit bd31eb5
Show file tree

Hide file tree

Showing 78 changed files with 4,096 additions and 0 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/inference.iml b/.idea/inference.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
diff --git a/nnUNet/CRLM/__init__.py b/nnUNet/CRLM/__init__.py
diff --git a/nnUNet/CRLM/helpers/__init__.py b/nnUNet/CRLM/helpers/__init__.py
diff --git a/nnUNet/CRLM/helpers/eval.py b/nnUNet/CRLM/helpers/eval.py
@@ -0,0 +1,183 @@
+import pandas as pd
+import openpyxl
+import pingouin as pg
+import SimpleITK as sitk
+import os
+import numpy as np
+from scipy.stats import wilcoxon
+import nibabel as nib
+from scipy import stats
+
+
+def calculate_dice(im1_path, im2_path, label1 =1, label2=1):
+    im1 = sitk.ReadImage(im1_path)
+    im2 = sitk.ReadImage(im2_path)
+
+    im1_binary = sitk.Cast(im1 == label1, sitk.sitkUInt8)
+    im2_binary = sitk.Cast(im2 == label2, sitk.sitkUInt8)
+
+    overlap_filter = sitk.LabelOverlapMeasuresImageFilter()
+    overlap_filter.Execute(im1_binary, im2_binary)
+    dice_score = overlap_filter.GetDiceCoefficient()
+    return dice_score
+
+
+def dice_rad(base_folder):
+    folder1 = base_folder + 'rad1'
+    folder2 = base_folder + 'rad2'
+    folder3 = base_folder + 'rad3'
+
+    dice_scores = []
+    dice_12 = []
+    dice_13 = []
+    dice_23 = []
+
+    for filename1 in os.listdir(folder1):
+        full_path1 = os.path.join(folder1, filename1)
+        full_path2 = os.path.join(folder2, filename1)
+        full_path3 = os.path.join(folder3, filename1)
+
+        dice12 = calculate_dice(full_path1, full_path2)
+        dice13 = calculate_dice(full_path1, full_path3)
+        dice23 = calculate_dice(full_path2, full_path3)
+
+        dice_12.append(dice12)
+        dice_13.append(dice13)
+        dice_23.append(dice23)
+
+        average_dice = (dice12 + dice13 + dice23) / 3.0
+        dice_scores.append(average_dice)
+
+    Q1 = np.percentile(dice_scores, 25)
+    Q3 = np.percentile(dice_scores, 75)
+
+    Q1_12 = np.percentile(dice_12, 25)
+    Q3_12 = np.percentile(dice_12, 75)
+
+    Q1_13 = np.percentile(dice_13, 25)
+    Q3_13 = np.percentile(dice_13, 75)
+
+    Q1_23 = np.percentile(dice_23, 25)
+    Q3_23 = np.percentile(dice_23, 75)
+
+    print(np.mean(dice_12), Q3_12 - Q1_12,  np.mean(dice_23), Q3_23 - Q1_23,  np.mean(dice_13), Q3_13 - Q1_13)
+    print("Dice scores of radiologists:", np.mean(dice_scores), Q3 - Q1)
+
+
+def dice_ai(base_folder):
+    folder1 = base_folder + 'ai_segmentations'
+    folder2 = base_folder + 'gt_segmentations'
+
+    dice_scores = []
+
+    for filename1 in os.listdir(folder1):
+        full_path1 = os.path.join(folder1, filename1)
+        full_path2 = os.path.join(folder2, filename1)
+
+        dice_scores.append(calculate_dice(full_path1, full_path2, 13, 1))
+
+    Q1 = np.percentile(dice_scores, 25)
+    Q3 = np.percentile(dice_scores, 75)
+
+    print("Dice scores of AI:", np.mean(dice_scores), Q3 - Q1, len(dice_scores))
+
+
+def dice_ai_NAT(base_folder):
+    folder1 = base_folder + 'ai_segmentations'
+    folder2 = base_folder + 'gt_segmentations'
+
+    dice_scores_pre = []
+    dice_scores_post = []
+
+    for filename1 in os.listdir(folder1):
+        if filename1.endswith('.nii.gz'):
+            full_path1 = os.path.join(folder1, filename1)
+            full_path2 = os.path.join(folder2, filename1)
+
+            dice_scores_pre.append(calculate_dice(full_path1, full_path2, 13, 1)) if '_0' in filename1 else dice_scores_post.append(calculate_dice(full_path1, full_path2, 13, 1))
+
+    Q1 = np.percentile(dice_scores_pre, 25)
+    Q3 = np.percentile(dice_scores_pre, 75)
+
+    print("Dice scores pre-NAT:", np.mean(dice_scores_pre), Q3 - Q1, len(dice_scores_pre))
+
+    Q1 = np.percentile(dice_scores_post, 25)
+    Q3 = np.percentile(dice_scores_post, 75)
+
+    print("Dice scores post-NAT:", np.mean(dice_scores_post), Q3 - Q1, len(dice_scores_post))
+
+    t_statistic, p_value = stats.ttest_ind(dice_scores_pre, dice_scores_post, equal_var=False)
+
+
+def icc(path, group='ai'):
+    file_path = path
+    xls = openpyxl.load_workbook(file_path)
+    sheet = xls['Sheet1']
+
+    volumes_ai = []
+    volumes_staple = []
+    volumes_rad1 = []
+    volumes_rad2 = []
+    volumes_rad3 = []
+    for row in sheet.iter_rows(min_row=2, values_only=True):
+        volumes_ai.append(row[1])
+        volumes_staple.append(row[2])
+        volumes_rad1.append(row[3])
+        volumes_rad2.append(row[4])
+        volumes_rad3.append(row[5])
+
+    if group == 'rad':
+        df = pd.DataFrame({
+            'rad1': volumes_rad1,
+            'rad2': volumes_rad2,
+            'rad3': volumes_rad3,
+        })
+    else:
+        df = pd.DataFrame({
+            'ai': volumes_ai,
+            'staple': volumes_staple
+        })
+
+    df.reset_index(inplace=True)
+    df_long = df.melt(id_vars='index', var_name='Rater', value_name='Volume')
+    df_long.rename(columns={'index': 'Scan'}, inplace=True)
+
+    icc_df = pg.intraclass_corr(data=df_long, targets='Scan', raters='Rater', ratings='Volume')
+    icc_value = icc_df.loc[icc_df['Type'] == 'ICC3', 'ICC'].values[0]
+    print(f'ICC of {group}  is {icc_value}.')
+
+
+def extract_volume(scan_folder, segmentation_folder):
+    scans = [f for f in os.listdir(scan_folder) if f.endswith('.nii') or f.endswith('.nii.gz')]
+
+    df = []
+    for scan in scans:
+        scan_path = os.path.join(scan_folder, scan)
+        segmentation_path = os.path.join(segmentation_folder, scan.split('_0000')[0] + '.nii.gz')
+
+        if not os.path.exists(segmentation_path):
+            print(f"Segmentation for {segmentation_path} not found. Skipping...")
+            return
+
+        segmentation_data = nib.load(segmentation_path).get_fdata()
+
+        nifti_img = nib.load(scan_path)
+        voxel_dims = np.abs(nifti_img.header.get_zooms())
+        voxel_volume = np.prod(voxel_dims)
+
+        labels, counts = np.unique(segmentation_data, return_counts=True)
+
+        df.append({"Filename": scan, "Label": labels[len(counts)-1], "Volume (mm^3)": counts[len(counts)-1] * voxel_volume})
+
+    df = pd.DataFrame(df)
+    df.to_excel(segmentation_folder + 'volumes.xlsx', index=False)
+
+
+if __name__ == '__main__':
+    dice_rad('your_path')
+    dice_ai('your_path')
+    dice_ai_NAT('your_path')
+
+    extract_volume('your_path', 'your_path')
+    icc('your_path', 'ai')
+    icc('your_path', 'rad')
diff --git a/nnUNet/CRLM/helpers/staple.py b/nnUNet/CRLM/helpers/staple.py
@@ -0,0 +1,49 @@
+import os
+import SimpleITK as sitk
+import glob
+import numpy as np
+
+root_dir = 'your_path_to_segmentations'
+output_dir = os.getcwd() + '\\output'
+
+if not os.path.exists(output_dir):
+    os.makedirs(output_dir)
+
+project_folders = ['your_folders']
+label_values = [0, 1, 2]
+
+for project_folder in os.listdir('ypur_path_to_radiologists_folder'):
+    for label_value in label_values:
+        segmentations = []
+        for radiologist_folder in ['your_radiolgists_folders']:
+            image_path = os.path.join(root_dir, radiologist_folder, project_folder, str(label_value), '*.nii*')
+            segmentations.extend(glob.glob(image_path))
+        print(segmentations)
+        if len(segmentations) > 0:
+            segs = []
+            for segmentation in segmentations:
+                seg = sitk.ReadImage(segmentation)
+                spacing = np.array(list(seg.GetSpacing()))
+                origin = np.array(list(seg.GetOrigin()))
+                image_array = sitk.GetArrayFromImage(seg)
+                image_array = np.where(image_array > 1, 1, 0)
+                seg = sitk.GetImageFromArray(image_array)
+                segs.append(seg)
+            try:
+                foregroundValue = 1
+                threshold = 0.5
+                staple_segmentation_probabilities = sitk.STAPLE(segs, foregroundValue)
+                staple_segmentation_probabilities = sitk.GetArrayFromImage(staple_segmentation_probabilities)
+                staple_segmentation = staple_segmentation_probabilities > threshold
+                staple_segmentation = staple_segmentation.astype(int)
+                staple_segmentation = sitk.GetImageFromArray(staple_segmentation, isVector=False)
+                staple_segmentation.SetSpacing(spacing)
+                staple_segmentation.SetOrigin(origin)
+
+                output_path = os.path.join(output_dir, project_folder + '_' + str(label_value) + '.nii.gz')
+                sitk.WriteImage(staple_segmentation, output_path)
+                print(f"STAPLE successfully ran for {project_folder}_{label_value}! Saved the final segmentation to the output folder")
+            except:
+                print(f"STAPLE could not run for {project_folder}_{label_value}!")
+
+print("Done! all projects have been STAPLED!")
diff --git a/nnUNet/main_crlm.py b/nnUNet/main_crlm.py
@@ -0,0 +1,31 @@
+import argparse
+from utils import *
+from CRLM.helpers.eval import *
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='CRLM segmentation')
+    parser.add_argument('--task_id', type=str, default='Task001_CRLM')
+    parser.add_argument('--nnunet_dir', type=str, default="your_folder\\nnUNet\\")
+    parser.add_argument('--input_dir', type=str, default="your_folder\\nnUNet\\prediction_input\\")
+    parser.add_argument('--output_dir', type=str, default="your_folder\\nnUNet\\prediction_output\\")
+    parser.add_argument('--data_dir', type=str, default="your_folder\\nnUNet\\CRLM\\")
+
+    args = parser.parse_args()
+
+    print("Renaming files...")
+    rename_files(args.input_dir, '_0000')
+    print("Successfully renamed files...")
+
+    print("Making lowres prediction...")
+    run_inference(args.nnunet_dir, args.input_dir, args.output_dir, args.task_id, '3d_lowres')
+    print("Successfully made lowres prediction...")
+
+    print("Making fullres prediction...")
+    run_inference(args.nnunet_dir, args.input_dir, args.output_dir, args.task_id, '3d_cascade_fullres')
+
+    print("Successfully made fullres prediction...")
+
+    print("Extracting volume...")
+    move(args.output_dir, os.path.join(args.data_dir, 'ai_segmentations'))
+    extract_volume(args.input_dir, args.output_dir)
+    print("Succesfully extracted volume...")