From 9c72eb728b3989fe416d0c250c46353bea018086 Mon Sep 17 00:00:00 2001 From: Danh Truong Date: Thu, 19 Sep 2024 11:45:20 -0500 Subject: [PATCH 1/4] initial commit --- analyses/cell-type-dsrct/README.md | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 analyses/cell-type-dsrct/README.md diff --git a/analyses/cell-type-dsrct/README.md b/analyses/cell-type-dsrct/README.md new file mode 100644 index 000000000..80f6b3845 --- /dev/null +++ b/analyses/cell-type-dsrct/README.md @@ -0,0 +1,43 @@ +# DSRCT Cell Type analysis module + + + +## Description + +This analysis aims to annotate the DSRCT samples in the SCPCP000013 (n=7) dataset. + +To do so, we will: + +- Assess the quality of the data set +- Curate a list of marker genes associated with DSRCT cells +- Detect the expression levels of DSRCT marker genes in the data set +- Identify clusters in the DSRCT samples +- Use the list of marker genes to identify tumor cells +- Perform copy number inference to identify tumor cells +- Annotate normal cells +- Perform clustering on tumor cells and identify tumor cell states + +## Usage + +The code for the module will be in the form of a notebook. + +## Input files + + +The input is dependent on the output fles run from `download-data.py`. This generates `SingleCellExperiment` for DSRCT samples. + + +## Output files + +Please include a description of the output from your analysis, including: + +- Plots from each analysis +- RDS file containing the processed single cell data set + +## Software requirements + +The analysis will be done in R using the `Seurat`, `SingleCellExperiment`, and `scran` packages. + +## Computational resources + +This will be done on a local machine. From 4385fd5bf9163e5f32394756605a96b561119d4e Mon Sep 17 00:00:00 2001 From: Danh Truong Date: Thu, 19 Sep 2024 11:45:20 -0500 Subject: [PATCH 2/4] initial commit --- .github/workflows/docker_cell-type-dsrct.yml | 63 ++++++++++++++++++++ .github/workflows/run_cell-type-dsrct.yml | 62 +++++++++++++++++++ analyses/cell-type-dsrct/Dockerfile | 10 ++++ analyses/cell-type-dsrct/README.md | 43 +++++++++++++ analyses/cell-type-dsrct/plots/.gitkeep | 0 analyses/cell-type-dsrct/results/README.md | 5 ++ analyses/cell-type-dsrct/scratch/.gitkeep | 0 analyses/cell-type-dsrct/scripts/.gitkeep | 0 8 files changed, 183 insertions(+) create mode 100644 .github/workflows/docker_cell-type-dsrct.yml create mode 100644 .github/workflows/run_cell-type-dsrct.yml create mode 100644 analyses/cell-type-dsrct/Dockerfile create mode 100644 analyses/cell-type-dsrct/README.md create mode 100644 analyses/cell-type-dsrct/plots/.gitkeep create mode 100644 analyses/cell-type-dsrct/results/README.md create mode 100644 analyses/cell-type-dsrct/scratch/.gitkeep create mode 100644 analyses/cell-type-dsrct/scripts/.gitkeep diff --git a/.github/workflows/docker_cell-type-dsrct.yml b/.github/workflows/docker_cell-type-dsrct.yml new file mode 100644 index 000000000..139f0777e --- /dev/null +++ b/.github/workflows/docker_cell-type-dsrct.yml @@ -0,0 +1,63 @@ +# This is a workflow to build the docker image for the cell-type-dsrct module +# +# Docker modules are run on pull requests when code for files that affect the Docker image have changed. +# If other files are used during the Docker build, they should be added to `paths` +# +# At module initialization, this workflow is inactive, and needs to be activated manually + +name: Build docker image for cell-type-dsrct + +concurrency: + # only one run per branch at a time + group: "docker_cell-type-dsrct_${{ github.ref }}" + cancel-in-progress: true + +on: + # pull_request: + # branches: + # - main + # paths: + # - "analyses/cell-type-dsrct/Dockerfile" + # - "analyses/cell-type-dsrct/.dockerignore" + # - "analyses/cell-type-dsrct/renv.lock" + # - "analyses/cell-type-dsrct/conda-lock.yml" + # push: + # branches: + # - main + # paths: + # - "analyses/cell-type-dsrct/Dockerfile" + # - "analyses/cell-type-dsrct/.dockerignore" + # - "analyses/cell-type-dsrct/renv.lock" + # - "analyses/cell-type-dsrct/conda-lock.yml" + workflow_dispatch: + inputs: + push-ecr: + description: "Push to AWS ECR" + type: boolean + required: true + +jobs: + test-build: + name: Test Build Docker Image + if: github.event_name == 'pull_request' || (contains(github.event_name, 'workflow_') && !inputs.push-ecr) + runs-on: ubuntu-latest + + steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build image + uses: docker/build-push-action@v5 + with: + context: "{{defaultContext}}:analyses/cell-type-dsrct" + push: false + cache-from: type=gha + cache-to: type=gha,mode=max + + build-push: + name: Build and Push Docker Image + if: github.repository_owner == 'AlexsLemonade' && (github.event_name == 'push' || inputs.push-ecr) + uses: ./.github/workflows/build-push-docker-module.yml + with: + module: "cell-type-dsrct" + push-ecr: true diff --git a/.github/workflows/run_cell-type-dsrct.yml b/.github/workflows/run_cell-type-dsrct.yml new file mode 100644 index 000000000..f1c2aa301 --- /dev/null +++ b/.github/workflows/run_cell-type-dsrct.yml @@ -0,0 +1,62 @@ +# This is a workflow to run the cell-type-dsrct module +# +# Analysis modules are run based on three triggers: +# - Manual trigger +# - On pull requests where code in the module has changed +# - As a reusable workflow called from a separate workflow which periodically runs all modules +# +# At initialization, only the manual trigger is active + +name: Run cell-type-dsrct analysis module +env: + MODULE_PATH: analyses/cell-type-dsrct + AWS_DEFAULT_REGION: us-east-2 + +concurrency: + # only one run per branch at a time + group: "run_cell-type-dsrct_${{ github.ref }}" + cancel-in-progress: true + +on: + workflow_dispatch: + # workflow_call: + # pull_request: + # branches: + # - main + # paths: + # - analyses/cell-type-dsrct/** + # - "!analyses/cell-type-dsrct/Dockerfile" + # - "!analyses/cell-type-dsrct/.dockerignore" + # - .github/workflows/run_cell-type-dsrct.yml + +jobs: + run-module: + if: github.repository_owner == 'AlexsLemonade' + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Set up R + uses: r-lib/actions/setup-r@v2 + with: + r-version: 4.4.0 + use-public-rspm: true + + - name: Set up pandoc + uses: r-lib/actions/setup-pandoc@v2 + + - name: Set up renv + uses: r-lib/actions/setup-renv@v2 + with: + working-directory: ${{ env.MODULE_PATH }} + + # Update this step as needed to download the desired data + - name: Download test data + run: ./download-data.py --test-data --format SCE + + - name: Run analysis module + run: | + cd ${MODULE_PATH} + # run module script(s) here diff --git a/analyses/cell-type-dsrct/Dockerfile b/analyses/cell-type-dsrct/Dockerfile new file mode 100644 index 000000000..80010ba65 --- /dev/null +++ b/analyses/cell-type-dsrct/Dockerfile @@ -0,0 +1,10 @@ +# A template docker file for creating a new analysis +FROM ubuntu:22.04 + +# Labels following the Open Containers Initiative (OCI) recommendations +# For more information, see https://specs.opencontainers.org/image-spec/annotations/?v=v1.0.1 +LABEL org.opencontainers.image.authors="OpenScPCA scpca@ccdatalab.org" +LABEL org.opencontainers.image.source="https://github.com/AlexsLemonade/OpenScPCA-analysis/tree/main/templates/analysis-module" + +# Set an environment variable to allow checking if we are in an OpenScPCA container +ENV OPENSCPCA_DOCKER=TRUE diff --git a/analyses/cell-type-dsrct/README.md b/analyses/cell-type-dsrct/README.md new file mode 100644 index 000000000..80f6b3845 --- /dev/null +++ b/analyses/cell-type-dsrct/README.md @@ -0,0 +1,43 @@ +# DSRCT Cell Type analysis module + + + +## Description + +This analysis aims to annotate the DSRCT samples in the SCPCP000013 (n=7) dataset. + +To do so, we will: + +- Assess the quality of the data set +- Curate a list of marker genes associated with DSRCT cells +- Detect the expression levels of DSRCT marker genes in the data set +- Identify clusters in the DSRCT samples +- Use the list of marker genes to identify tumor cells +- Perform copy number inference to identify tumor cells +- Annotate normal cells +- Perform clustering on tumor cells and identify tumor cell states + +## Usage + +The code for the module will be in the form of a notebook. + +## Input files + + +The input is dependent on the output fles run from `download-data.py`. This generates `SingleCellExperiment` for DSRCT samples. + + +## Output files + +Please include a description of the output from your analysis, including: + +- Plots from each analysis +- RDS file containing the processed single cell data set + +## Software requirements + +The analysis will be done in R using the `Seurat`, `SingleCellExperiment`, and `scran` packages. + +## Computational resources + +This will be done on a local machine. diff --git a/analyses/cell-type-dsrct/plots/.gitkeep b/analyses/cell-type-dsrct/plots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/analyses/cell-type-dsrct/results/README.md b/analyses/cell-type-dsrct/results/README.md new file mode 100644 index 000000000..af51c690e --- /dev/null +++ b/analyses/cell-type-dsrct/results/README.md @@ -0,0 +1,5 @@ +# Results directory instructions + +Files in the results directory should not be directly committed to the repository. + +Instead, copy results files to an S3 bucket and add a link to the S3 location in this README file. diff --git a/analyses/cell-type-dsrct/scratch/.gitkeep b/analyses/cell-type-dsrct/scratch/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/analyses/cell-type-dsrct/scripts/.gitkeep b/analyses/cell-type-dsrct/scripts/.gitkeep new file mode 100644 index 000000000..e69de29bb From 8a0989707fec9c9dd4da85c40ec9fe0d91375be5 Mon Sep 17 00:00:00 2001 From: Danh Truong Date: Fri, 20 Sep 2024 12:10:02 -0500 Subject: [PATCH 3/4] added the code to download the samples --- analyses/cell-type-dsrct/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/analyses/cell-type-dsrct/README.md b/analyses/cell-type-dsrct/README.md index 80f6b3845..b000a7b65 100644 --- a/analyses/cell-type-dsrct/README.md +++ b/analyses/cell-type-dsrct/README.md @@ -24,7 +24,13 @@ The code for the module will be in the form of a notebook. ## Input files -The input is dependent on the output fles run from `download-data.py`. This generates `SingleCellExperiment` for DSRCT samples. +The input is dependent on the output fles run from `download-data.py`. + +``` +./download-data.py SCPCP000013 +``` + +This downloads the `SingleCellExperiment` files for DSRCT samples. ## Output files From fe72aeaeb70d869f652bd736d66e936c617a93be Mon Sep 17 00:00:00 2001 From: Danh Truong Date: Fri, 20 Sep 2024 12:14:29 -0500 Subject: [PATCH 4/4] Update analyses/cell-type-dsrct/README.md Added the tag Co-authored-by: Joshua Shapiro --- analyses/cell-type-dsrct/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analyses/cell-type-dsrct/README.md b/analyses/cell-type-dsrct/README.md index b000a7b65..97b89cced 100644 --- a/analyses/cell-type-dsrct/README.md +++ b/analyses/cell-type-dsrct/README.md @@ -27,7 +27,7 @@ The code for the module will be in the form of a notebook. The input is dependent on the output fles run from `download-data.py`. ``` -./download-data.py SCPCP000013 +./download-data.py --projects SCPCP000013 ``` This downloads the `SingleCellExperiment` files for DSRCT samples.