Skip to content

Commit

Permalink
Upcoming 0.2 - merge branch 'dev', new features:
Browse files Browse the repository at this point in the history
 - hard-coded table replaced with:
   - BED fields for amplicons regions
   - YAML for signatures / variants definition
   - amplicon query tables can themselve by saved and reused
 - tool for importing signatures from PHE-Genomics
 - (preliminary) tool for evaluating quality of variants definition
   using mutation prevalences fetched from CoV-Spectum
  • Loading branch information
DrYak committed Feb 8, 2022
2 parents 800ba92 + 2548124 commit 7300b3b
Show file tree
Hide file tree
Showing 33 changed files with 74,167 additions and 432 deletions.
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
notebooks/.ipynb_checkpoints/
notebooks/working
notebooks/ww_plants.tsv
notebooks/mutlist.txt
notebooks/snv_tables/
notebooks/tallymut_line_snv.tsv
notebooks/tallymut_line.tsv
notebooks/viollier_data.csv
notebooks/data_per_city.csv
notebooks/data_per_day_and_canton2.csv
notebooks/ww_update_data.json

298 changes: 250 additions & 48 deletions README.md

Large diffs are not rendered by default.

99 changes: 99 additions & 0 deletions SARS-CoV-2.insert.V4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
MN908947.3 50 408 SARS-CoV-2_INSERT_1 1 +
MN908947.3 344 705 SARS-CoV-2_INSERT_2 2 +
MN908947.3 666 1017 SARS-CoV-2_INSERT_3 1 +
MN908947.3 966 1337 SARS-CoV-2_INSERT_4 2 +
MN908947.3 1266 1623 SARS-CoV-2_INSERT_5 1 +
MN908947.3 1562 1925 SARS-CoV-2_INSERT_6 2 +
MN908947.3 1875 2228 SARS-CoV-2_INSERT_7 1 +
MN908947.3 2180 2544 SARS-CoV-2_INSERT_8 2 +
MN908947.3 2508 2861 SARS-CoV-2_INSERT_9 1 +
MN908947.3 2850 3183 SARS-CoV-2_INSERT_10 2 +
MN908947.3 3102 3470 SARS-CoV-2_INSERT_11 1 +
MN908947.3 3412 3769 SARS-CoV-2_INSERT_12 2 +
MN908947.3 3705 4067 SARS-CoV-2_INSERT_13 1 +
MN908947.3 4018 4387 SARS-CoV-2_INSERT_14 2 +
MN908947.3 4339 4685 SARS-CoV-2_INSERT_15 1 +
MN908947.3 4648 4995 SARS-CoV-2_INSERT_16 2 +
MN908947.3 4953 5302 SARS-CoV-2_INSERT_17 1 +
MN908947.3 5259 5620 SARS-CoV-2_INSERT_18 2 +
MN908947.3 5584 5932 SARS-CoV-2_INSERT_19 1 +
MN908947.3 5894 6247 SARS-CoV-2_INSERT_20 2 +
MN908947.3 6210 6553 SARS-CoV-2_INSERT_21 1 +
MN908947.3 6507 6859 SARS-CoV-2_INSERT_22 2 +
MN908947.3 6776 7122 SARS-CoV-2_INSERT_23 1 +
MN908947.3 7084 7440 SARS-CoV-2_INSERT_24 2 +
MN908947.3 7403 7747 SARS-CoV-2_INSERT_25 1 +
MN908947.3 7695 8063 SARS-CoV-2_INSERT_26 2 +
MN908947.3 8019 8370 SARS-CoV-2_INSERT_27 1 +
MN908947.3 8326 8691 SARS-CoV-2_INSERT_28 2 +
MN908947.3 8619 8990 SARS-CoV-2_INSERT_29 1 +
MN908947.3 8944 9306 SARS-CoV-2_INSERT_30 2 +
MN908947.3 9192 9535 SARS-CoV-2_INSERT_31 1 +
MN908947.3 9497 9842 SARS-CoV-2_INSERT_32 2 +
MN908947.3 9805 10150 SARS-CoV-2_INSERT_33 1 +
MN908947.3 10099 10465 SARS-CoV-2_INSERT_34 2 +
MN908947.3 10419 10785 SARS-CoV-2_INSERT_35 1 +
MN908947.3 10742 11092 SARS-CoV-2_INSERT_36 2 +
MN908947.3 11023 11388 SARS-CoV-2_INSERT_37 1 +
MN908947.3 11330 11689 SARS-CoV-2_INSERT_38 2 +
MN908947.3 11651 12011 SARS-CoV-2_INSERT_39 1 +
MN908947.3 11963 12317 SARS-CoV-2_INSERT_40 2 +
MN908947.3 12255 12618 SARS-CoV-2_INSERT_41 1 +
MN908947.3 12546 12895 SARS-CoV-2_INSERT_42 2 +
MN908947.3 12856 13218 SARS-CoV-2_INSERT_43 1 +
MN908947.3 13148 13506 SARS-CoV-2_INSERT_44 2 +
MN908947.3 13485 13833 SARS-CoV-2_INSERT_45 1 +
MN908947.3 13775 14120 SARS-CoV-2_INSERT_46 2 +
MN908947.3 14075 14428 SARS-CoV-2_INSERT_47 1 +
MN908947.3 14362 14717 SARS-CoV-2_INSERT_48 2 +
MN908947.3 14674 15023 SARS-CoV-2_INSERT_49 1 +
MN908947.3 14983 15336 SARS-CoV-2_INSERT_50 2 +
MN908947.3 15237 15596 SARS-CoV-2_INSERT_51 1 +
MN908947.3 15557 15917 SARS-CoV-2_INSERT_52 2 +
MN908947.3 15881 16239 SARS-CoV-2_INSERT_53 1 +
MN908947.3 16137 16483 SARS-CoV-2_INSERT_54 2 +
MN908947.3 16408 16767 SARS-CoV-2_INSERT_55 1 +
MN908947.3 16714 17082 SARS-CoV-2_INSERT_56 2 +
MN908947.3 17013 17381 SARS-CoV-2_INSERT_57 1 +
MN908947.3 17345 17688 SARS-CoV-2_INSERT_58 2 +
MN908947.3 17642 17997 SARS-CoV-2_INSERT_59 1 +
MN908947.3 17939 18307 SARS-CoV-2_INSERT_60 2 +
MN908947.3 18267 18624 SARS-CoV-2_INSERT_61 1 +
MN908947.3 18578 18936 SARS-CoV-2_INSERT_62 2 +
MN908947.3 18891 19252 SARS-CoV-2_INSERT_63 1 +
MN908947.3 19208 19558 SARS-CoV-2_INSERT_64 2 +
MN908947.3 19513 19877 SARS-CoV-2_INSERT_65 1 +
MN908947.3 19836 20186 SARS-CoV-2_INSERT_66 2 +
MN908947.3 20117 20472 SARS-CoV-2_INSERT_67 1 +
MN908947.3 20405 20766 SARS-CoV-2_INSERT_68 2 +
MN908947.3 20699 21050 SARS-CoV-2_INSERT_69 1 +
MN908947.3 21013 21358 SARS-CoV-2_INSERT_70 2 +
MN908947.3 21316 21675 SARS-CoV-2_INSERT_71 1 +
MN908947.3 21561 21904 SARS-CoV-2_INSERT_72 2 +
MN908947.3 21889 22247 SARS-CoV-2_INSERT_73 1 +
MN908947.3 22113 22474 SARS-CoV-2_INSERT_74 2 +
MN908947.3 22428 22785 SARS-CoV-2_INSERT_75 1 +
MN908947.3 22677 23028 SARS-CoV-2_INSERT_76 2 +
MN908947.3 22974 23327 SARS-CoV-2_INSERT_77 1 +
MN908947.3 23246 23611 SARS-CoV-2_INSERT_78 2 +
MN908947.3 23575 23927 SARS-CoV-2_INSERT_79 1 +
MN908947.3 23876 24233 SARS-CoV-2_INSERT_80 2 +
MN908947.3 24194 24545 SARS-CoV-2_INSERT_81 1 +
MN908947.3 24448 24814 SARS-CoV-2_INSERT_82 2 +
MN908947.3 24772 25122 SARS-CoV-2_INSERT_83 1 +
MN908947.3 25076 25438 SARS-CoV-2_INSERT_84 2 +
MN908947.3 25353 25711 SARS-CoV-2_INSERT_85 1 +
MN908947.3 25672 26026 SARS-CoV-2_INSERT_86 2 +
MN908947.3 25979 26338 SARS-CoV-2_INSERT_87 1 +
MN908947.3 26277 26635 SARS-CoV-2_INSERT_88 2 +
MN908947.3 26587 26956 SARS-CoV-2_INSERT_89 1 +
MN908947.3 26895 27256 SARS-CoV-2_INSERT_90 2 +
MN908947.3 27177 27534 SARS-CoV-2_INSERT_91 1 +
MN908947.3 27473 27826 SARS-CoV-2_INSERT_92 2 +
MN908947.3 27726 28082 SARS-CoV-2_INSERT_93 1 +
MN908947.3 28021 28394 SARS-CoV-2_INSERT_94 2 +
MN908947.3 28214 28572 SARS-CoV-2_INSERT_95 1 +
MN908947.3 28536 28893 SARS-CoV-2_INSERT_96 2 +
MN908947.3 28849 29206 SARS-CoV-2_INSERT_97 1 +
MN908947.3 29161 29512 SARS-CoV-2_INSERT_98 2 +
MN908947.3 29475 29827 SARS-CoV-2_INSERT_99 1 +
8 changes: 5 additions & 3 deletions conda_cojac_env.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
name: cojac
channels:
- defaults
- conda-forge
- bioconda
- defaults
dependencies:
- pysam
- python
- pysam >= 0.17
- python
- pandas
- numpy
- pyyaml
- strictyaml
- requests
29 changes: 17 additions & 12 deletions cooc-colourmut
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import yaml
# parse command line
argparser = argparse.ArgumentParser(description="print coloured pretty table on terminal",
epilog="see cooc-pubmut for a CSV file that can be imported into an article")
argparser.add_argument('-a', '--amplicons', metavar='YAML', required=True,
type=str, dest='amp', help="list of query amplicons, from mutbamscan")
inputgroup = argparser.add_mutually_exclusive_group(required=True)
inputgroup.add_argument('-j', '--json', metavar='JSON',
type=str, dest='json', help="results generated by mutbamscan")
Expand All @@ -24,22 +26,25 @@ if args.json:
with open(args.json, 'rt') as jf:
table=json.load(fp=jf)
elif args.yaml:
assert os.path.isfile(args.yaml), f"cannot find result json file {args.yaml}"
assert os.path.isfile(args.yaml), f"cannot find result yaml file {args.yaml}"
with open(args.yaml, 'rt') as yf:
table=yaml.safe_load(yf)

assert len(table) > 0, "cannot succesfully load table"

# TODO generate instead of hardcodind
# reuse stored amplicons
amplicon_nfo={ }

assert os.path.isfile(args.amp), f"cannot find amplicon file yaml file {args.amp}"
with open(args.amp, 'rt') as yf:
amp_str = yaml.safe_load(yf)

amplicon_nfo = {
a: ','.join([
f"{p}{b}" if len(b) == 1 else (f"\u0394{p}-{p + len(b) - 1}" if b == '-' * len(b) else f"{p}\u2192{b}")for p,b in aqu[4].items()
]) for a,aqu in amp_str.items()
}

amplicon_nfo={
'72_UK': '21765-21770Δ,21991-21993Δ',
'78_UK': '23604A,23709T',
'92_UK': '27972T,28048T,28111G',
'93_UK': '28111G,28280-28280->CTA',
'76_SA': '23012A,23063T',
'77_EU': '23403G',
}


#
Expand All @@ -49,8 +54,8 @@ amplicon_nfo={
l=max(len(k) for k in table)
print(f"{'':<{l}} ", end='')
for a in amplicon_nfo: print(f" {a :<26}", end='')
print(f"\n{'':<{l}} ", end='')
for a,label in amplicon_nfo.items(): print(f" {label :<26}", end='')
print(f"\n{'':<{l}} ", end='')
for a,label in amplicon_nfo.items(): print(f"{label if len(label)<=27 else (label[:26]+chr(0x2026)) :<27.27}", end='')
print(f"\n{'sample:':<{l}} ", end='')
for a in amplicon_nfo: print(f"{'cov:' :>9}{'mut:' :>9}{'frq/%:' :>9}", end='')
print()
Expand Down
Loading

0 comments on commit 7300b3b

Please sign in to comment.