Init commit

AIM-Harvard · Sep 29, 2023 · 441fa22 · 441fa22
commit 441fa22
Show file tree

Hide file tree

Showing 31 changed files with 2,161 additions and 0 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,62 @@
+# Specify the base image for the environment
+FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
+
+# Authors of the image
+LABEL authors="[email protected]"
+
+# Remove any third-party apt sources to avoid issues with expiring keys.
+RUN rm -f /etc/apt/sources.list.d/*.list
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Install basic system utilities and useful packages
+# Install common libraries that are needed by a number of models (e.g., nnUNet, Platipy, ...)
+# (merge these in a single RUN command to avoid creating intermediate layers)
+RUN apt update && apt install -y --no-install-recommends \
+  sudo \
+  ffmpeg \
+  libsm6 \
+  libxext6 \
+  xvfb \
+  wget \
+  curl \
+  git \
+  && rm -rf /var/lib/apt/lists/*
+
+# Extra steps for installing Python 3.7
+RUN apt update && apt install -y --no-install-recommends \
+  build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libsqlite3-dev libreadline-dev libffi-dev wget libbz2-dev
+RUN apt-get install -y software-properties-common
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt update
+RUN apt install -y python3.7
+RUN apt install -y python3-pip
+
+
+# Create a working directory and set it as the working directory
+# Also create directories for input and output data (mounting points) in the same RUN to avoid creating intermediate layers
+RUN mkdir /app /app/data /app/data/input_data /app/data/output_data
+WORKDIR /app
+
+# Install general utilities (specify version if necessary)
+RUN python3.7 -m pip install --upgrade pip && pip3 install --no-cache-dir \
+  SimpleITK==1.2.4 \
+  h5py==2.10.0 \
+  keras==2.2.4 \
+  pandas==0.24.2 \
+  scipy==1.2.1 \
+  numpy==1.16.4 \
+  scikit-image==0.16.2 \
+  protobuf==3.20.* \
+  tensorflow-gpu==1.13.1
+
+# COPY requirements.txt requirements.txt
+# RUN python3.7 -m pip install -r requirements.txt
+
+# Set PYTHONPATH to the /app folder
+ENV PYTHONPATH="/app"
+
+# Copy over the project directory into the image
+COPY . .
+
+CMD [ "ls" ]
diff --git a/README.md b/README.md
@@ -0,0 +1,27 @@
+# Body Composition AIM
+
+This repository is intended for AIM personnel and authorized collaborators only. Don't share without written consent. 
+
+This repository provides code for training and running the body composition pipeline, which consists of two deep learning models. The first model is to localize the L3 slice from the input CT scan. The second model is to segment the localized slice into three components: muscle, subcutaneous fat  and visceral fat.
+
+### Getting Started
+
+See the documentation pages for further details:
+* [Env_setup](docs/env_setup.md) - For installing packages directly on your system
+* [Training_1](docs/train_selection.md) - For training a deep learning model for L3 slice selection
+* [Training_2](docs/train_segmentation.md) - For training a deep learning model for segmentation
+* [Inference](docs/test.md) - For running the model on new CT scans
+
+## Repository Structure
+
+The LiverSeg repository is structured as follows:
+
+* All the source code to run the liver segmentation model is found under the `src` folder.
+* Three example CT images, the corresponding manual segmentation masks, and optional data curation scripts are stored under the `data` folder.
+* Model weights for pre-trained/trained models are saved in `model` folder.
+
+### Reference
+
+This repository is modified after the following work under [GPL 3.0 License](LICENSE):
+
+https://github.com/CPBridge/ct_body_composition
diff --git a/docs/env_setup.md b/docs/env_setup.md
@@ -0,0 +1,19 @@
+### Set-up 
+The code was developed and tested with python 3.7.3 on Ubuntu 18.04 with CUDA 10.0 and cuDNN 7.4.2
+
+For the code to run as intended, all the packages under `requirements.txt` should be installed. In order not to break previous installation, it's highly recommend to create a virtual environment to install such packages. Here follows an example of set-up by using `conda environment` from the root of the repository:
+
+```
+# Create a conda environment with python3.7.3 and activate it:
+conda create -n bd python=3.7.3
+conda activate bd
+
+# Once the virtualenv is activated, install the dependencies
+conda install -c conda-forge tensorflow=1.13
+conda install nb_conda_kernels  # if you want to use jupyter notebook
+# Once you change directories into the root path of the project
+pip3 install -r requirements.txt  
+
+# conda environment is setted up 
+## if you want to delete the conda environment, type in: conda remove -n bd --all 
+```
diff --git a/docs/test.md b/docs/test.md
@@ -0,0 +1,59 @@
+
+### Model preidiciton of the L3 top slice
+
+The selection model is tested with the `test_selection.py` script in the 'src' directory. The script takes a NIFTI image and output in csv the predicted number of the L3 slice. You can run the basic test routine by passing the two required arguments:
+
+`data_dir` -- Directory in which the test ct images are stored. Default path is '../data/test/input'
+
+`model_dir` -- Directory in which trained model are stored. Default model is 'model/test/L3_Top_Selection_Model_Weight.h5'
+
+For example:
+```bash
+$ python test_selection.py 
+```
+#### We can check the selection performance by overlaping the prediction slice into the input CT series, in this [script](../data/test/optional_scripts_for_test_performance_check/selection_check_by_screenshots.ipynb)
+
+### Model segmentaion of the L3 top slice
+
+The segmentation model is tested with the `test_segmentation.py` script in 'src' directory. The script takes a NIFTI image and L3 top slice and output the segmented CT scanin NIFTI format.  You can run the basic test routine by passing required arguments:
+
+`data_dir` -- Directory in which the test ct images, labels and automatic segmentations are stored. Default path is '../data/test/input'
+
+`model_dir` -- Directory in which well-trained model are stored. Default model is 'model/test/L3_Top_Segmentation_Model_Weight.hdf5'
+
+
+For example:
+```bash
+$ python test_segmentation.py 
+```
+#### We can check the segmentation performance by overlaping the model segmentation into the input L3 slice, in this [script](../data/test/optional_scripts_for_test_performance_check/segmentation_check_in_screenshots_L3slice_auto.ipynb)
+
+
+### Data Structure for testing models
+Before test the model you must prepare the data in NIFTI format. The files should be placed within data directory with structure below :
+
+```
+- data/
+
+|- test/
+
+|  |- input/
+|  |  |- test-volume-11.nii.gz
+|  |  |- test-volume-8.nii.gz
+
+|  |- output_segmentation/
+|  |  |- test-volume-11_AI_seg_L3.nii.gz
+|  |  |- test-volume-8_AI_seg_L3.nii.gz
+
+
+|  |- output_csv/
+
+|  |  |- L3_Top_Slice_Prediction.csv
+|  |  |- L3_body_comp_area_density.csv
+
+```
+#### Example Data Source
+
+https://competitions.codalab.org/competitions/17094#learn_the_details-overview
+
+Under licence of https://creativecommons.org/licenses/by-nc-nd/4.0/
diff --git a/docs/train_segmentation.md b/docs/train_segmentation.md
@@ -0,0 +1,51 @@
+## Segmentation Model - UNet (2D)
+
+#### Preparing Training Data
+
+Model training data consists of a set of 2D CT slices and corresponding segmentation masks. Each should be prepared as a numpy array stored in a `.npy` file as follows, and all files should be placed under '../data/train_segmentation'.
+
+`../data/train/train_segmentation/train_images.npy` -- A numpy array of size (*N* x 512 x 512 x 1), where *N* is the number of training samples. This represents all *N* training CT images stacked down the first dimension of the array, and a singleton channel dimension at the end. The pixel intensities should be raw Hounsfield units, without intensity windowing or scaling. The data type should be `float`. These images are used to train the model.
+
+`../data/train/train_segmentation/val_images.npy` -- An array of validation images that are used to monitor the progress of the training process and compare the generalization performance of different models. Its construction is identical to train_images.npy (note that the number of images in the validation will usually different to the
+number of images in the training set).
+
+`../data/train/train_segmentation/train_masks.npy` -- An array the same shape as train_images.npy, where all spatial dimensions correspond to the train_images.npy array. Each slice of the masks array is the segmentation mask for the same slice in the images array. The masks should have a `uint8` data type, and each pixel encodes the segmentation label of the corresponding pixel in the image array. A value of 0 denotes the background class, 1 denotes the 'muscle' class, 2 denotes the 'subcutaneous fat' class, and 3 denotes the 'visceral fat' class.
+
+`../data/train/train_segmentation/val_masks.npy` -- Mask array for the validation images in val_images.npy. Construction is otherwise identical to train_masks.npy.
+
+#### Training the Model
+
+The segmentation model is trained with the `train_segmentation.py` script in the `src` directory. 
+
+For example:
+
+```bash
+$ python train_segmentation.py -d
+```
+
+There are a number of other options you can specify to tweak the model
+architecture and training procedure. Of particular note are:
+
+* `-d` -- Directory in which the training data (e.g. `train_images.npy`) arrays are stored.
+* `-m` -- The model checkpoints and associated files will be stored in a sub-directory of this directory.
+* `-g` - Specify the number of GPUs to use for training
+* `-l` - Specify the initial learning rate
+* `-b` - Specify the batch size
+
+Run the help for a full list of options:
+
+```bash
+$ python train_segmentation.py --help
+```
+
+
+The files described above should be placed within a directory with the following structure:
+```
+- data/train/train_segmentation/
+
+|- train_images.npy
+|- train_masks.npy
+|- val_images.npy
+|- val_masks.npy
+```
+
diff --git a/docs/train_selection.md b/docs/train_selection.md
@@ -0,0 +1,70 @@
+
+## Slice Selection Model - DenseNet (2D)
+
+#### Preparing Training Data
+
+
+The training data for the slice selection model consists of CT slices, with a physical offset from the levels of L3 Top slice.  To allow for efficient and precise loading during training, slices of input CT scans should be extracted out into numpy array (`.npy`) format. The spacings of the input CT could be different, but the horizontal size should be 512 * 512.
+
+Each `.npy` array should have pixel values between 0 and 255 as a result of intensity clipping and rescaling the raw Hounsfield units. For example, raw pixel values below -160HU are transformed to a pixel value of 0, raw pixel values above 240HU are transformed to 255, and raw pixel vales between -160HU and 240HU should be transformed into a value between 0 and 255 (inclusive) with linear scaling.
+
+#### Scripts to generate the npy files along its annotations are listed [here](../data/data_in_NIFTI/scipts_nifty_to_npy_transformation/NIFTI_to_npy_for_selection_training.ipynb). Example CSV for training is listed [here](../data/train/train_selection/selection_meta/train.csv).
+
+The extracted numpy arrays should be divided into train and validation splits and placed in a directory according to the split. Each split should be accompanied by a CSV file that contains the offset from the level of L3 top slice. Train.csv and Tune.csv should consists of two columns like this:
+```
+index,   npy_file_name,      ZOffset_L3
+0,       000000.npy,    -234.6
+1,       000001.npy,    5.2
+2,       000002.npy,    145.3  
+```
+
+The first column, `npy_file_name`, represents sorted file names in the `.npy` format. The order of npy_name should be the same as the order stored in each directory of the data folder. The second column, `ZOffset_L3`, should represent its offset above or below the level of interest in mm in the physical space of the scanner. Slices above L3 top slice (closer to the head) should be given positive offsets, and slices below L3 top slice (closer to the feet) should be given negative offsets.
+
+
+
+The files described above should be placed within a directory with the following structure:
+```
+- data/train/train_selection/
+
+|- selection_meta/
+|  |- train.csv
+|  |- val.csv
+
+|- selection_npy/
+|  |- train/
+|  |  |- 000000.npy
+|  |  |- 000001.npy
+|  |  |- 00000n.npy (n>=2)
+
+|  |- val/
+|  |  |- 000000.npy
+|  |  |- 000001.npy
+|  |  |- 00000n.npy (n>=2)
+
+
+
+```
+
+
+#### Training the Model
+
+The script `train_slice_selection.py` in the `src` directory is used to train the slice selection model. 
+
+```bash
+$ python train_slice_selection.py 
+```
+
+A number of optional arguments may be passed to control various aspects of the model architecture and training process. Of particular note are:
+
+* `-d` - Directory in which the training data arrays are stored
+* `-m` - Directory in which trained models are to be stored
+* `-g` - Specify the number of GPUs to use for training
+* `-l` - Specify the initial learning rate
+* `-b` - Specify the batch size
+
+
+Run the help for a full list of options:
+
+```bash
+$ python train_slice_selection.py --help
+```
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,8 @@
+SimpleITK==1.2.4
+h5py==2.10.0
+keras==2.2.4
+pandas==0.24.2
+scipy==1.2.1
+numpy==1.16.4
+scikit-image==0.16.2
+tensorflow-gpu==1.13.1
diff --git a/segs/out_example.csv b/segs/out_example.csv
@@ -0,0 +1,3 @@
+,patient_id,L3_Predict_slice,Z_spacing,XY_spacing
+0,test-volume-11,125,2.0,0.7168
+0,test-volume-8,68,4.0,0.76953
diff --git a/src/L3_segmentation_area_densities.py b/src/L3_segmentation_area_densities.py
@@ -0,0 +1,47 @@
+import pandas as pd
+import os
+
+from scripts.image_processing.image_window import get_image_path_by_id,apply_window
+from scripts.image_processing.slice_area_density import get_l3_slice_area,get_l3_slice_density
+import SimpleITK as sitk
+import numpy as np
+
+csv_path = '/home/taf/Documents/TafSegs/out_niftis.csv'
+df_l3_prediction = pd.read_csv(csv_path, index_col = 0)
+
+print(df_l3_prediction.shape)
+(df_l3_prediction.head())
+
+df_init = pd.DataFrame()
+img_dir  = '/home/taf/Documents/Body_Comp_images/Converted/NIFTIs'
+seg_dir = '/home/taf/Documents/TafSegs/'
+csv_write_path = '/home/taf/Documents/TafSegs/L3_body_comp_area_density.csv'
+
+for idx,rows in df_l3_prediction.iterrows():
+    patient_id =  rows['patient_id']
+    image_path =  get_image_path_by_id(patient_id, img_dir)
+    seg_path = get_image_path_by_id(patient_id, seg_dir)
+
+    if os.path.exists(image_path) and os.path.exists(seg_path):
+        l3_slice = int(rows['L3_Predict_slice'])
+
+        muscle_auto_area,sfat_auto_area,vfat_auto_area = \
+                            get_l3_slice_area(patient_id,l3_slice,seg_dir)  
+
+        muscle_auto_density,sfat_auto_density,vfat_auto_density = \
+                            get_l3_slice_density(patient_id,l3_slice,seg_dir,img_dir)
+
+        round_num = 2
+        df_inter = pd.DataFrame({'patient_id':patient_id,
+                                    'muscle_manual_area':round(muscle_auto_area, round_num),
+                                    'muscle_manual_density':round(muscle_auto_density, round_num),
+
+                                    'sfat_manual_area':round(sfat_auto_area, round_num),
+                                    'sfat_manual_density':round(sfat_auto_density, round_num),
+
+                                    'vfat_manual_area':round(vfat_auto_area, round_num),
+                                    'vfat_manual_density':round(vfat_auto_density, round_num)},index=[0])
+
+        df_init = df_init.append(df_inter)
+        df_init.to_csv(csv_write_path)
+        print(idx,'th', patient_id, 'writen to', csv_write_path)