Skip to content

Commit

Permalink
maybe up
Browse files Browse the repository at this point in the history
  • Loading branch information
danpf committed Nov 2, 2023
1 parent 0e536a1 commit 2dcfd98
Show file tree
Hide file tree
Showing 311 changed files with 15,253 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,5 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

boldsystems.db
**/boldsystems.db
Binary file added src/python/dpf-sanger-sequencing/.DS_Store
Binary file not shown.
5 changes: 5 additions & 0 deletions src/python/dpf-sanger-sequencing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@



# Citations
Ratnasingham, Sujeevan, and Paul D N Hebert. “bold: The Barcode of Life Data System (http://www.barcodinglife.org).” Molecular ecology notes vol. 7,3 (2007): 355-364. doi:10.1111/j.1471-8286.2007.01678.x
7,813 changes: 7,813 additions & 0 deletions src/python/dpf-sanger-sequencing/bold_data.txt

Large diffs are not rendered by default.

1,986 changes: 1,986 additions & 0 deletions src/python/dpf-sanger-sequencing/file.out

Large diffs are not rendered by default.

4,227 changes: 4,227 additions & 0 deletions src/python/dpf-sanger-sequencing/mammalia_canada_combined.tsv

Large diffs are not rendered by default.

Binary file not shown.
257 changes: 257 additions & 0 deletions src/python/dpf-sanger-sequencing/out.txt

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/python/dpf-sanger-sequencing/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[project]
name = "dpf_sanger_sequencing"
description="A library for interpreting sanger sequencing data"
requires-python = ">=3.11"
authors = [{name = "Danny Farrell", email = "[email protected]"}]
dependencies = ["biopython", "requests", "sqlalchemy"]
version="1.0.0"
2 changes: 2 additions & 0 deletions src/python/dpf-sanger-sequencing/scripts/TRACE_FILE_INFO.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PROCESSID TAXON MARKER GENBANK_ACCESSION TRACEFILE

76 changes: 76 additions & 0 deletions src/python/dpf-sanger-sequencing/scripts/run_bs_dl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from pathlib import Path
from dpf_sanger_sequencing.boldsystems import BoldSystemsAPI, Session, CombinedData
from dpf_sanger_sequencing.boldsystems.bold_api_access import parse_tsv_weird_format
import asyncio


def store_combined_data_for_taxonomies(taxonomies: list[str]):
api = BoldSystemsAPI()
# api.get_data_package_data(Path("~/Downloads/bold_pub/BOLD_Public.05-May-2023.tsv").expanduser())
# print("x1?")
# asyncio.run(api.get_combined_data_from_local_package_data())
# raise

# for i, taxonomy in enumerate(taxonomies):
# print(f"getting {i} / {len(taxonomies)} {taxonomy=}")
# api.get_combined_data(dict(taxon=taxonomy))
session = Session()
# rows = session.query(CombinedData).all()
asyncio.run(api.download_all_trace_data(session))

# for row in rows:
# print(row.processid)


if __name__ == "__main__":
taxonomies = [
"Acanthocephala",
# "Acoelomorpha", # Missing data?
"Annelida",
"Arthropoda",
"Brachiopoda",
"Bryozoa",
"Chaetognatha",
"Chordata",
"Cnidaria",
"Ctenophora",
"Cycliophora",
"Echinodermata",
"Entoprocta",
"Gastrotricha",
"Gnathostomulida",
"Hemichordata",
"Kinorhyncha",
"Mollusca",
"Nematoda",
"Nematomorpha",
"Nemertea",
"Onychophora",
"Phoronida",
"Placozoa",
"Platyhelminthes",
"Porifera",
"Priapulida",
"Rhombozoa",
"Rotifera",
"Tardigrada",
"Xenacoelomorpha",
"Bryophyta",
"Chlorophyta",
"Lycopodiophyta",
"Magnoliophyta",
"Pinophyta",
"Pteridophyta",
"Ascomycota",
"Basidiomycota",
"Chytridiomycota",
"Glomeromycota",
"Myxomycota",
"Zygomycota",
"Chlorarachniophyta",
"Ciliophora",
"Heterokontophyta",
"Pyrrophycophyta",
"Rhodophyta",
]
store_combined_data_for_taxonomies(taxonomies)
8 changes: 8 additions & 0 deletions src/python/dpf-sanger-sequencing/scripts/test_bs_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@


from dpf_sanger_sequencing.boldsystems.bold_api_access import BoldSystemsAPI

api = BoldSystemsAPI()
search_params = {"taxon": "Brachiopoda"}
# api.get_combined_data(search_params, "tsv")
api.get_trace_data(search_params)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .models import Base as Base, CombinedData as CombinedData, TraceData as TraceData, UrlQueryData as UrlQueryData
from .bold_api_access import BoldSystemsAPI as BoldSystemsAPI, Session as Session
Loading

0 comments on commit 2dcfd98

Please sign in to comment.