Skip to content

Commit

Permalink
Merge pull request #37 from iqbal-lab-org/fix/add_integration_tests_t…
Browse files Browse the repository at this point in the history
…o_all_pling_commands

Plasnet integration PR series (5/5): tests: add integration tests to all pling commands
  • Loading branch information
babayagaofficial authored Nov 27, 2023
2 parents b373d9c + 772e837 commit 6f0b3c0
Show file tree
Hide file tree
Showing 63 changed files with 2,467 additions and 429 deletions.
20 changes: 17 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ on:
push:
branches:
- main
- dev
pull_request:
branches:
- main
- dev

jobs:
Testing:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest, macos-latest ]
os: [ ubuntu-latest ]
python-version: [ 3.8, 3.9, "3.10", 3.11 ]
steps:
- uses: actions/checkout@v3
Expand All @@ -30,11 +32,23 @@ jobs:
python=${{ matrix.python-version }}
snakemake
mamba
poetry
init-shell: bash
cache-environment: true
post-cleanup: 'all'
- name: Test workflow (local test data)

- uses: eWaterCycle/setup-singularity@v7
with:
singularity-version: 3.7.1

- name: Install
shell: bash -el {0}
run: |
micromamba activate test-env
make install-ci
- name: Test
shell: bash -el {0}
run: |
micromamba activate test-env
python -m unittest discover -v -v -v -s tests -t .
make test
26 changes: 26 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Inspired by https://github.com/snakemake/snakefmt/blob/master/Makefile
PROJECT = pling
OS := $(shell uname -s)
VERSION := $(shell poetry version -s)
BOLD := $(shell tput bold)
NORMAL := $(shell tput sgr0)

.PHONY: all
all: install

.PHONY: install
install:
poetry install

.PHONY: install-ci
install-ci:
poetry install --no-interaction
poetry run pling --help

.PHONY: test
test:
poetry run python -m unittest discover -s tests -t .

.PHONY: build
build:
poetry build
911 changes: 497 additions & 414 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ keywords = ["Plasmids", "Comparative genomics", "Genome rearrangement", "DCJ-ind
pling = 'pling.run_pling:main'

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
python = ">=3.8,<3.13"
snakemake = "^7.32.4"
pandas = "^2.1.1"
pandas = "^2.0.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
17 changes: 17 additions & 0 deletions tests/integration_test/data/all_plasmids_distances.align.truth.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
plasmid_1 plasmid_2 distance
0 CP057418.1 CP057418.1 0
1 CP057418.1 NZ_CP027199.1 23
2 CP057418.1 NZ_LR882977.1 19
3 CP057418.1 NZ_MF510423.1
4 NZ_CP027199.1 CP057418.1 23
5 NZ_CP027199.1 NZ_CP027199.1 0
6 NZ_CP027199.1 NZ_LR882977.1 19
7 NZ_CP027199.1 NZ_MF510423.1
8 NZ_LR882977.1 CP057418.1 19
9 NZ_LR882977.1 NZ_CP027199.1 19
10 NZ_LR882977.1 NZ_LR882977.1 0
11 NZ_LR882977.1 NZ_MF510423.1
12 NZ_MF510423.1 CP057418.1
13 NZ_MF510423.1 NZ_CP027199.1
14 NZ_MF510423.1 NZ_LR882977.1
15 NZ_MF510423.1 NZ_MF510423.1 0
17 changes: 17 additions & 0 deletions tests/integration_test/data/all_plasmids_distances.anno.truth.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
plasmid_1 plasmid_2 distance
0 CP057418.1 CP057418.1 0
1 CP057418.1 NZ_CP027199.1 18
2 CP057418.1 NZ_LR882977.1 14
3 CP057418.1 NZ_MF510423.1
4 NZ_CP027199.1 CP057418.1 18
5 NZ_CP027199.1 NZ_CP027199.1 0
6 NZ_CP027199.1 NZ_LR882977.1 16
7 NZ_CP027199.1 NZ_MF510423.1
8 NZ_LR882977.1 CP057418.1 14
9 NZ_LR882977.1 NZ_CP027199.1 16
10 NZ_LR882977.1 NZ_LR882977.1 0
11 NZ_LR882977.1 NZ_MF510423.1
12 NZ_MF510423.1 CP057418.1
13 NZ_MF510423.1 NZ_CP027199.1
14 NZ_MF510423.1 NZ_LR882977.1
15 NZ_MF510423.1 NZ_MF510423.1 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
4
CP057418.1 0 23 19
NZ_CP027199.1 23 0 19
NZ_LR882977.1 19 19 0
NZ_MF510423.1 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
4
CP057418.1 0 18 14
NZ_CP027199.1 18 0 16
NZ_LR882977.1 14 16 0
NZ_MF510423.1 0
5 changes: 0 additions & 5 deletions tests/integration_test/data/all_plasmids_matrix.truth.dist

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#taxgroup accession_version mutation_position mutation_symbol class subclass mutated_protein_name
Escherichia WP_000019358.1 12 soxS_A12S MULTIDRUG AMPICILLIN/CHLORAMPHENICOL/QUINOLONE/RIFAMPIN/TETRACYCLINE Escherichia_ampicillin/chloramphenicol/quinolone/rifampin/tetracycline_resistant_SoxS
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#taxgroup protein_accession protein_gi
Escherichia AAA21095.1 151858
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#taxgroup gene_symbol accession_version resistance_cutoff class subclass resistance_protein_name
Streptococcus_pneumoniae pbp1a WP_001040013.1 99.000000 BETA-LACTAM BETA-LACTAM Streptococcus_pneumoniae_beta-lactam_resistant_PBP1A
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10.16
1,744 changes: 1,744 additions & 0 deletions tests/integration_test/data/bakta_db/amrfinderplus-db/2021-09-30.1/fam.tab

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#taxgroup gpipe_taxgroup number_of_nucl_ref_genes
Acinetobacter_baumannii Acinetobacter 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2021-09-30.1
Binary file added tests/integration_test/data/bakta_db/antifam.h3f
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/antifam.h3i
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/antifam.h3m
Binary file not shown.
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/bakta.db
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/ncRNA-genes.i1i
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/integration_test/data/bakta_db/oric.fna
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>ORI10010001
TATTCTTCTATAACATTGTCAAGAATGATAGTTAAAATTCTCGAAATTGGGATATTAACTGCTTTGGAGTAATTTCTAACTTTTTGTCATACTCTTTGACTTGTATAGAAGTGTACACCTGTATCTAGTTTTTCTTGGCGTTCAACAGGAACTATTCCTGGTATTTTTGTTTTAGGTTGGGGAGGAATAGGCTGTGGTTGTGTGAATTGTTGTTGAAAATTTTGATTTTTTTGCTGTAAGAAACCATTATTATGATATTGAAAATTTTGTTCCTCTTGAAAATATCTCTCTTTTTTTGGTTTTCCAGAAAAATTTGATGAAAAAGATTTTTCTTCATTTCAATTTTCAAGATTATTTTCATTTTGTTGATTTATTTGCTCAGGCTGTTGAAATGAATTATTTTTTGATCAAAAAGATTTTGGAAAGGTTTTTTCAAAAGCAGATAAAGGTCCAAAATCAAATGAAGATGAATCTTTGTCAAAAGATGTTTCTTCTCTTTTTGACAAATTTTGTTTTTGATTAAACTTATTTTTATTTTGGGGTGTTACTTTTTCTTTTATGGAAAACAAATCTTCTTCTAAAAGACTTTGTTCTGGGTCATCATCTTGTGCTAAATCAAAGAAAAAACGTTTCTTTTTGTTA
>ORI10010003
GGCGTAGACACTGAATTCGATGGGGATAAGTGGTGGATAAAAGAATATAAATTAGTCATTACACTTTACTCACGAATATCCCCCTTTTTTTAGAGAAAAAATATACTTTCTTCACAAGCTTGTGTGCGGTTTTTGTTTGGTAATTCTCGAGACATAAGCACTTATCCAGATATTCACAGTTACTATTATGTGATACGACTACATTCTTTATACTTATAAGATTAATAAGGAGGAAACTAACT
4 changes: 4 additions & 0 deletions tests/integration_test/data/bakta_db/orit.fna
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>CP019995|MOBP
GTAGAATCGTTTAGTATGAGAATAGAAAACCAACGGTTTTCATGAACTTACTAAACGATTCTAC
>CP012386|MOBP
AGAACAATCAACAACTAATTAGGCAAATTAAGGGGTGCTAAACAACTGCTAGTAGGTGCTAGAGATGTGCTATAAAGGGTGCTAGTTTGGTGCTAGTTACTGCTAAATACGTGCTAGTTTAGGTGCTAGAAACGTGCTATATGGTGCTAAAAAGGTGCTAGTTTGCATGAAGTTACCTGCTAGCCAAGTGCTAGTGGCGTTCGTTTTTGGGTCCCACGGGAAAGCCTTGCACTGCAAGGCGGGTCAGCTTGTCTGACCCCCATTTCCCCTTATGCTCTTCCGAAACACAAAGCGCAATTAAGCGAATACTAGAGAATAAATA
Binary file added tests/integration_test/data/bakta_db/pfam.h3f
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/pfam.h3i
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/pfam.h3m
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/pfam.h3p
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/psc.dmnd
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/rRNA.i1f
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/rRNA.i1i
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/rRNA.i1m
Binary file not shown.
Binary file added tests/integration_test/data/bakta_db/rRNA.i1p
Binary file not shown.
1 change: 1 addition & 0 deletions tests/integration_test/data/bakta_db/rfam-go.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Rfam:RF00001 GO:0003735
Binary file added tests/integration_test/data/bakta_db/sorf.dmnd
Binary file not shown.
54 changes: 54 additions & 0 deletions tests/integration_test/data/bakta_db/version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"date": "2023-02-20",
"major": 5,
"minor": 0,
"type": "full",
"dependencies": [
{
"name": "AMRFinderPlus",
"release": "2020-09-22.2"
},
{
"name": "COG",
"release": "2014"
},
{
"name": "DoriC",
"release": "10"
},
{
"name": "ISFinder",
"release": "2019-09-25"
},
{
"name": "Mob-suite",
"release": "2.0"
},
{
"name": "Pfam",
"release": "33.1"
},
{
"name": "RefSeq",
"release": "r202"
},
{
"name": "Rfam",
"release": "14.2"
},
{
"name": "UniProtKB/Swiss-Prot",
"release": "2020_04"
}
],
"experts": [
{
"name": "AMRFinderPlus",
"release": "3.10.1"
},
{
"name": "NCBI BlastRules",
"release": "4.0"
}
]
}
68 changes: 63 additions & 5 deletions tests/integration_test/test_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ def assert_files_are_identical(self, path_to_first_file, path_to_second_file):
second_file_content = self.read_file(path_to_second_file)
self.assertEqual(first_file_content, second_file_content)

def test_pling_end_to_end(self):
def test_pling_align_end_to_end(self):
args = Namespace(genomes_list='tests/integration_test/data/incy_list_4.txt',
output_dir='tests/integration_test/data/out',
output_dir='tests/integration_test/data/out_align',
integerisation='align',
bakta_db=None,
jaccard=0.4,
jaccard_distance=0.6,
dcj=4,
dedup=None,
dedup_threshold=None,
Expand All @@ -38,5 +38,63 @@ def test_pling_end_to_end(self):
profile=None)
run_pling.pling(args)

assert_files_are_identical("tests/integration_test/data/out/all_plasmids_matrix.dist",
"tests/integration_test/data/all_plasmids_matrix.truth.dist")
assert_files_are_identical("tests/integration_test/data/out_align/all_plasmids_matrix.dist",
"tests/integration_test/data/all_plasmids_matrix.align.truth.dist")
assert_files_are_identical("tests/integration_test/data/out_align/all_plasmids_distances.tsv",
"tests/integration_test/data/all_plasmids_distances.align.truth.tsv")

def test_pling_anno_with_dedup_end_to_end(self):
args = Namespace(genomes_list='tests/integration_test/data/incy_list_4.txt',
output_dir='tests/integration_test/data/out_anno_with_dedup',
integerisation='anno',
bakta_db="tests/integration_test/data/bakta_db",
jaccard_distance=0.6,
dcj=4,
dedup=True,
dedup_threshold=98.5,
identity=80,
min_indel_size=200,
bh_connectivity=10,
bh_neighbours_edge_density=0.2,
small_subcommunity_size_threshold=4,
cores='2',
storetmp=False,
forceall=True,
ilp_solver='GLPK',
timelimit=None,
resources=None,
profile=None)
run_pling.pling(args)

assert_files_are_identical("tests/integration_test/data/out_anno_with_dedup/all_plasmids_matrix.dist",
"tests/integration_test/data/all_plasmids_matrix.anno.truth.dist")
assert_files_are_identical("tests/integration_test/data/out_anno_with_dedup/all_plasmids_distances.tsv",
"tests/integration_test/data/all_plasmids_distances.anno.truth.tsv")

def test_pling_anno_without_dedup_end_to_end(self):
args = Namespace(genomes_list='tests/integration_test/data/incy_list_4.txt',
output_dir='tests/integration_test/data/out_anno_without_dedup',
integerisation='anno',
bakta_db="tests/integration_test/data/bakta_db",
jaccard_distance=0.6,
dcj=4,
dedup=None,
dedup_threshold=None,
identity=80,
min_indel_size=200,
bh_connectivity=10,
bh_neighbours_edge_density=0.2,
small_subcommunity_size_threshold=4,
cores='2',
storetmp=False,
forceall=True,
ilp_solver='GLPK',
timelimit=None,
resources=None,
profile=None)
run_pling.pling(args)

assert_files_are_identical("tests/integration_test/data/out_anno_without_dedup/all_plasmids_matrix.dist",
"tests/integration_test/data/all_plasmids_matrix.anno.truth.dist")
assert_files_are_identical("tests/integration_test/data/out_anno_without_dedup/all_plasmids_distances.tsv",
"tests/integration_test/data/all_plasmids_distances.anno.truth.tsv")

0 comments on commit 6f0b3c0

Please sign in to comment.