forked from aniafijarczyk/Fijarczyk2020_JeanTalon
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
90a5922
commit cd4fa9a
Showing
12 changed files
with
984,110 additions
and
0 deletions.
There are no files selected for viewing
195,449 changes: 195,449 additions & 0 deletions
195,449
07_draft_assemblies/Jean-Talon.contigs.fasta
Large diffs are not rendered by default.
Oops, something went wrong.
3,356 changes: 3,356 additions & 0 deletions
3,356
07_draft_assemblies/Jean-Talon.contigs.fasta.coords
Large diffs are not rendered by default.
Oops, something went wrong.
246,904 changes: 246,904 additions & 0 deletions
246,904
07_draft_assemblies/Jean-Talon.unitigs.fasta
Large diffs are not rendered by default.
Oops, something went wrong.
4,464 changes: 4,464 additions & 0 deletions
4,464
07_draft_assemblies/Jean-Talon.unitigs.fasta.coords
Large diffs are not rendered by default.
Oops, something went wrong.
192,857 changes: 192,857 additions & 0 deletions
192,857
07_draft_assemblies/Jean-Talon_reordered.fasta
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Code for analysing translocation using draft assemblies | ||
|
||
|
||
Jean-Talon.contigs.fasta # canu draft assembly contigs from reads >8kb | ||
Jean-Talon.contigs.fasta.coords # mummer output from alignment of the draft assembly against the wtdbg2 polished assembly | ||
Jean-Talon.unitigs.fasta # canu draft assembly unitigs from reads >8kb | ||
Jean-Talon.unitigs.fasta.coords # mummer output from alignment of the draft assembly against the wtdbg2 polished assembly | ||
Jean-Talon_reordered.fasta # wtdbg2 polished assembly | ||
S288c.genome.fa # Genome of the S.cerevisiae strain S288C produced by Yue et al. Nat. Genet. 2017. | ||
# Downloaded from https://yjx1217.github.io/Yeast_PacBio_2016/data/ | ||
S288c.genome.fa.coords # mummer output from alignment of the draft assembly against the wtdbg2 polished assembly | ||
assemblies.ipynb # main script | ||
barcode11.cns.fa # wtdbg2 draft assembly from reads >8kb | ||
barcode11.cns.fa.coords # mummer output from alignment of the draft assembly against the wtdbg2 polished assembly | ||
tig00000035.fasta # canu unitig corresponding to the uncertain translocation junction |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# author: Mathieu Hénault\n", | ||
"from Bio import SeqIO\n", | ||
"from Bio import SeqRecord as SeqRecord\n", | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import itertools\n", | ||
"from matplotlib import pyplot as plt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Analysis of the alignments of draft assemblies using mummer" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"DELTA = {}\n", | ||
"ASSEMBLIES = {}\n", | ||
"tig_order = {}\n", | ||
"cum_len = {}\n", | ||
"for s in ['Jean-Talon_reordered.fasta','S288c.genome.fa','barcode11.cns.fa', 'Jean-Talon.unitigs.fasta', 'Jean-Talon.contigs.fasta']:\n", | ||
" ASSEMBLIES[s] = {}\n", | ||
" tig_order[s] = []\n", | ||
" cl = []\n", | ||
" with open(f'/Volumes/MacintoshHD/Dropbox/Jean_Talon/assemblies/{s}') as fi:\n", | ||
" for seq in SeqIO.parse(fi, 'fasta'):\n", | ||
" ASSEMBLIES[s][seq.id] = seq\n", | ||
" tig_order[s].append(seq.id)\n", | ||
" cl.append(len(seq.seq))\n", | ||
" \n", | ||
" cl = pd.Series([0]+list(np.cumsum(cl)[:-1]), index=tig_order[s])\n", | ||
" cum_len[s] = cl\n", | ||
"\n", | ||
" if s!='Jean-Talon_reordered.fasta':\n", | ||
" delta = pd.read_csv(f'/Volumes/MacintoshHD/Dropbox/Jean_Talon/mummer/{s}.coords', sep='\\t', skiprows=range(4), index_col=None, header=None)\n", | ||
" delta = delta.loc[(delta[4]>=1e4) & (delta[5]>=1e4)].astype({1:int,2:int,2:int,2:int}).sort_values(by=4)\n", | ||
" \n", | ||
" delta['s1'] = delta[0]+cum_len['Jean-Talon_reordered.fasta'].loc[delta[7].values].values\n", | ||
" delta['e1'] = delta[1]+cum_len['Jean-Talon_reordered.fasta'].loc[delta[7].values].values\n", | ||
" delta['s2'] = delta[2]+cum_len[s].loc[delta[8].values].values\n", | ||
" delta['e2'] = delta[3]+cum_len[s].loc[delta[8].values].values\n", | ||
" \n", | ||
" DELTA[s] = delta " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fig = plt.figure(figsize=[32,8])\n", | ||
"gs = plt.GridSpec(ncols=4, nrows=1, wspace=0.3, left=0.03, bottom=0.08, right=0.96, top=0.9)\n", | ||
"\n", | ||
"assembly_alias = {'Jean-Talon_reordered.fasta': 'Jean-Talon wtdbg2 polished',\n", | ||
" 'S288c.genome.fa': 'S288C',\n", | ||
" 'barcode11.cns.fa': 'Jean-Talon wtdbg2 draft (>8kb reads)', \n", | ||
" 'Jean-Talon.unitigs.fasta': 'Jean-Talon Canu draft unitigs (>8kb reads)', \n", | ||
" 'Jean-Talon.contigs.fasta': 'Jean-Talon Canu draft contigs (>8kb reads)'}\n", | ||
"coord_translocation = np.mean(np.array([1.4e5, 1.47e5])+cum_len['Jean-Talon_reordered.fasta'].loc['ctg6_pilon'])\n", | ||
"\n", | ||
"for ax_idx, s in enumerate(['S288c.genome.fa', 'barcode11.cns.fa', 'Jean-Talon.contigs.fasta', 'Jean-Talon.unitigs.fasta']):\n", | ||
" ax = fig.add_subplot(gs[ax_idx])\n", | ||
" delta = DELTA[s]\n", | ||
" for i in delta.index:\n", | ||
" s1, e1, s2, e2 = delta.loc[i, ['s1','e1','s2','e2']]\n", | ||
" c = str(1-delta.loc[i,6]/100)\n", | ||
" ax.plot([s1,e1], [s2,e2], color=c, lw=1)\n", | ||
" for i in cum_len[s]:\n", | ||
" ax.axhline(i, lw=0.5, ls=':', color='k')\n", | ||
" for i in cum_len['Jean-Talon_reordered.fasta']:\n", | ||
" ax.axvline(i, lw=0.5, ls=':', color='k')\n", | ||
" ax.margins(0)\n", | ||
" ax.axvline(coord_translocation, color='red', ls='-', lw=3, alpha=0.3)\n", | ||
" \n", | ||
" assembly_size = ax.axis()\n", | ||
" ax.set_xticks(np.arange(0, assembly_size[1], 1e6))\n", | ||
" ax.set_xticklabels(np.arange(0, assembly_size[1]*1e-6, 1).astype(int))\n", | ||
" ax.set_yticks(np.arange(0, assembly_size[3], 1e6))\n", | ||
" ax.set_yticklabels(np.arange(0, assembly_size[3]*1e-6, 1).astype(int))\n", | ||
" \n", | ||
" #if s=='S288c.genome.fa':\n", | ||
" for tig, df in delta.groupby(8):\n", | ||
" if len(ASSEMBLIES[s][tig].seq)>2e5:\n", | ||
" ax.text(assembly_size[1]*1.02, np.mean([df['s2'].min(), df['e2'].max()]), s=tig, va='center', ha='left', size=7)\n", | ||
" \n", | ||
" for tig, df in delta.groupby(7):\n", | ||
" if len(ASSEMBLIES['Jean-Talon_reordered.fasta'][tig].seq)>2e5:\n", | ||
" ax.text(np.mean([df['s1'].min(), df['e1'].max()]), assembly_size[3]*1.02, s=tig, va='bottom', ha='center', rotation=90, size=7)\n", | ||
" \n", | ||
" ax.set_xlabel(assembly_alias['Jean-Talon_reordered.fasta'], size=14)\n", | ||
" ax.set_ylabel(assembly_alias[s], size=14)\n", | ||
" \n", | ||
"plt.savefig('/Volumes/MacintoshHD/Dropbox/Jean_Talon/fig/FigS7.svg')\n", | ||
"#plt.show()\n", | ||
"plt.close()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.