Skip to content

Commit

Permalink
wip: blastp
Browse files Browse the repository at this point in the history
  • Loading branch information
alubbock committed Oct 14, 2024
1 parent 6974514 commit 9f20d9c
Show file tree
Hide file tree
Showing 13 changed files with 440 additions and 113 deletions.
30 changes: 16 additions & 14 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
FROM quay.io/rosalindfranklininstitute/blast:2.16.0 AS blast

FROM python:3.12-bookworm AS builder
FROM python:3.13-bookworm AS builder

RUN pip install --user wheel pipenv
RUN pip install --upgrade pip wheel pipenv

RUN adduser --uid 10191 --group --system --no-create-home nonroot
RUN chown -R nonroot:nonroot /usr/src
USER nonroot
WORKDIR /usr/src

# Tell pipenv to create venv in the current directory
ENV PIPENV_VENV_IN_PROJECT=1

COPY Pipfile Pipfile.lock pyproject.toml setup.cfg /usr/src/
WORKDIR /usr/src

RUN /root/.local/bin/pipenv sync
ARG PIPENV_SYNC_FLAGS=
RUN pipenv sync ${PIPENV_SYNC_FLAGS}

COPY manage.py /usr/src/
COPY antigendjango /usr/src/antigendjango
COPY antigenapi /usr/src/antigenapi

RUN DJANGO_CI=true .venv/bin/python manage.py collectstatic --noinput

FROM python:3.12-slim-bookworm AS prod
FROM python:3.13-slim-bookworm AS prod

# liblmdb-dev required by BLAST
RUN apt-get update && apt-get install -y \
Expand All @@ -27,7 +32,10 @@ RUN apt-get update && apt-get install -y \
liblmdb-dev \
&& rm -rf /var/lib/apt/lists/*

RUN mkdir -v /usr/src/.venv
RUN adduser --uid 10191 --group --system --no-create-home nonroot
RUN chown -R nonroot:nonroot /usr/src
USER nonroot
WORKDIR /usr/src

# blastp and makeblastdb commands
COPY --from=blast /blast/ReleaseMT/bin/blastp /usr/local/bin/blastp
Expand All @@ -42,23 +50,17 @@ COPY uwsgi.ini /usr/src

WORKDIR /usr/src

RUN addgroup --gid 10191 nonroot
RUN adduser --uid 10191 --gid 10191 --system --no-create-home nonroot
USER nonroot

CMD [".venv/bin/uwsgi", "--ini", "uwsgi.ini"]

FROM builder AS dev

# liblmdb-dev required by BLAST
USER root
RUN apt update && apt install -y liblmdb-dev && rm -rf /var/lib/apt/lists/*
USER nonroot

# blastp and makeblastdb commands
COPY --from=blast /blast/ReleaseMT/bin/blastp /usr/local/bin/blastp
COPY --from=blast /blast/ReleaseMT/bin/makeblastdb /usr/local/bin/makeblastdb

ENV PATH="$PATH:/root/.local/bin:/usr/src/.venv/bin"

RUN pipenv sync --dev

CMD ["pipenv", "run", "python", "manage.py", "runserver", "0.0.0.0:8080"]
Empty file.
137 changes: 137 additions & 0 deletions backend/antigenapi/bioinformatics/blast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import os
import subprocess
from tempfile import TemporaryDirectory
from typing import Optional

from ..models import SequencingRunResults
from .imgt import read_airr_file

# https://www.ncbi.nlm.nih.gov/books/NBK279684/table/appendices.T.options_common_to_all_blast/
BLAST_FMT_MULTIPLE_FILE_BLAST_JSON = "15"
BLAST_NUM_THREADS = 4


def get_db_fasta(include_run: Optional[int] = None, exclude_run: Optional[int] = None):
"""Get the sequencing database in fasta format.
Args:
include_run (int, optional): Sequencing run ID to include.
Defaults to None.
exclude_run (int, optional): Sequencing run ID to exclude.
Defaults to None.
Returns:
str: Sequencing run as a FASTA format string
"""
fasta_data = ""
query = SequencingRunResults.objects.all()
if include_run:
query = query.filter(sequencing_run_id=include_run)
if exclude_run:
query = query.exclude(sequencing_run_id=exclude_run)
for sr in query:
airr_file = read_airr_file(
sr.airr_file, usecols=("sequence_id", "sequence_alignment_aa")
)
airr_file = airr_file[airr_file.sequence_alignment_aa.notna()]
if not airr_file.empty:
for _, row in airr_file.iterrows():
fasta_data += f"> {row.sequence_id}\n"
fasta_data += f"{row.sequence_alignment_aa.replace('.', '')}\n"
return fasta_data


def get_sequencing_run_fasta(sequencing_run_id: int):
"""Get sequencing run in BLAST format.
Args:
sequencing_run_id (int): Sequencing run ID
Returns:
str: Sequencing run as a FASTA format string
"""
return get_db_fasta(include_run=sequencing_run_id)


def run_blastp(
sequencing_run_id: int, outfmt: str = BLAST_FMT_MULTIPLE_FILE_BLAST_JSON
):
"""Run blastp for a sequencing run vs rest of database.
Args:
sequencing_run_id (int): Sequencing run ID.
Returns:
JSONResponse: Single file BLAST JSON
"""
db_data = get_db_fasta()
if not db_data:
return None
query_data = get_sequencing_run_fasta(sequencing_run_id)
if not query_data:
return None

# Write the DB to disk as .fasta format
with TemporaryDirectory() as tmp_dir:
fasta_filename = os.path.join(tmp_dir, "db.fasta")
with open(fasta_filename, "w") as f:
f.write(db_data)

# Run makeblastdb
mkdb_proc = subprocess.run(
[
"makeblastdb",
"-in",
"db.fasta",
"-dbtype",
"prot",
"-out",
"antigen.db",
],
capture_output=True,
cwd=tmp_dir,
)

if mkdb_proc.returncode != 0:
raise Exception(
f"makeblastdb returned exit code of "
f"{mkdb_proc.returncode}\n\n"
f"STDOUT: {mkdb_proc.stdout}\n\n"
f"STDERR: {mkdb_proc.stderr}"
)

# Write out query file
fasta_filename = os.path.join(tmp_dir, "query.fasta")
with open(fasta_filename, "w") as f:
f.write(query_data)

# Run blastp
blastp_proc = subprocess.run(
[
"blastp",
"-db",
"antigen.db",
"-query",
"query.fasta",
"-outfmt",
outfmt,
"-out",
"antigen.results",
"-num_threads",
str(BLAST_NUM_THREADS),
],
capture_output=True,
cwd=tmp_dir,
)

if blastp_proc.returncode != 0:
raise Exception(
f"blastp returned exit code of "
f"{blastp_proc.returncode}\n\n"
f"STDOUT: {blastp_proc.stdout}\n\n"
f"STDERR: {blastp_proc.stderr}"
)

# Read in the results file
with open(os.path.join(tmp_dir, "antigen.results"), "r") as f:
return f.read()
File renamed without changes.
2 changes: 1 addition & 1 deletion backend/antigenapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
)
from django.db.models.signals import post_init, post_save

from .bioinformatics import read_airr_file
from .bioinformatics.imgt import read_airr_file


class Project(Model):
Expand Down
2 changes: 1 addition & 1 deletion backend/antigenapi/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import unittest
import zipfile

from antigenapi.bioinformatics import load_sequences, trim_sequence
from antigenapi.bioinformatics.imgt import load_sequences, trim_sequence
from antigenapi.views_old import _extract_well


Expand Down
2 changes: 0 additions & 2 deletions backend/antigenapi/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from antigenapi.views.dashboard import AuditLogLatestEvents, DashboardStats
from antigenapi.views_old import (
AntigenViewSet,
BlastAllView,
CohortViewSet,
ElisaPlateViewSet,
GlobalFastaView,
Expand All @@ -30,5 +29,4 @@
path("fasta/", GlobalFastaView.as_view(), name="fasta"),
path("dashboard/stats", DashboardStats.as_view(), name="dashboard_stats"),
path("dashboard/latest", AuditLogLatestEvents.as_view(), name="dashboard_latest"),
path("blast/all", BlastAllView.as_view(), name="blast_all"),
]
Loading

0 comments on commit 9f20d9c

Please sign in to comment.