Skip to content

Commit

Permalink
Merge pull request #2 from fmalmeida/dev
Browse files Browse the repository at this point in the history
merge dev to master
  • Loading branch information
fmalmeida authored Dec 19, 2022
2 parents 24eb1da + 0e95997 commit e1c52c2
Show file tree
Hide file tree
Showing 17 changed files with 384 additions and 92 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
tests
test
docs/_site
34 changes: 17 additions & 17 deletions docs/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ To keep things simple and reproducible, let's work with the reference *E. coli*

.. code-block:: bash
## download ecoli genome Sakai
wget \
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/008/865/GCF_000008865.2_ASM886v2/GCF_000008865.2_ASM886v2_genomic.fna.gz \
-O ecoli_sakai.fna.gz && \
gzip -d ecoli_sakai.fna.gz
## download ecoli genome K12
wget \
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz \
-O ecoli_k12.fna.gz && \
gzip -d ecoli_k12.fna.gz
## download ecoli genome Sakai
wget \
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/008/865/GCF_000008865.2_ASM886v2/GCF_000008865.2_ASM886v2_genomic.fna.gz \
-O ecoli_sakai.fna.gz && \
gzip -d ecoli_sakai.fna.gz
## download ecoli genome K12
wget \
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz \
-O ecoli_k12.fna.gz && \
gzip -d ecoli_k12.fna.gz
Prepare input
"""""""""""""
Expand All @@ -36,15 +36,15 @@ We can prepare this csv with:

.. code-block:: bash
echo "ecoli_k12.fna,K12,purple" > input.fofn
echo "ecoli_sakai.fna,SAKAI,lorange" >> input.fofn
echo "ecoli_k12.fna,K12,purple" > input.fofn
echo "ecoli_sakai.fna,SAKAI,lorange" >> input.fofn
The file must look like this (input.fofn):

.. code-block:: bash
ecoli_k12.fna,K12,purple
ecoli_sakai.fna,SAKAI,lorange
ecoli_k12.fna,K12,purple
ecoli_sakai.fna,SAKAI,lorange
Create the circos plot
""""""""""""""""""""""
Expand All @@ -58,8 +58,8 @@ With that, we can finally create a minimal circos configuration file and plots h

.. code-block:: bash
# run easy_circos
plot_circos --fofn input.fofn
# run easy_circos
plot_circos --fofn input.fofn
This will create all the circos configuration files and required data files under the ``results`` folder (Can be changed with ``--outdir``). See the created plot:

Expand Down
Empty file modified recipe/bin/GCcalc.py
100644 → 100755
Empty file.
11 changes: 11 additions & 0 deletions recipe/bin/plot_circos
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ export INTRACHR_FILE="links_concatenated_colored_no_intrachr.txt"
export INTRACHR_SHOW="no"
export GCWINDOW=5000
export GCSTEP=5000
export BACANNOT="no"
export SKIP_LINKS="no"

######################################
### Function to filter FASTA files ###
Expand Down Expand Up @@ -82,6 +84,7 @@ source ${SCRIPT_DIR}/../src/tiles.sh
### Function to create circos.conf file ###
###########################################
source ${SCRIPT_DIR}/../src/write_circos.sh
source ${SCRIPT_DIR}/../src/write_circos_bacannot.sh

###############################
### Function to plot circos ###
Expand Down Expand Up @@ -223,6 +226,14 @@ case $ARGS in
gff2tiles
exit
;;
--bacannot)
export BACANNOT="yes"
shift
;;
--skip_links)
export SKIP_LINKS="yes"
shift
;;
*)
printf "******************************\n"
printf "Error: Invalid argument $1\n"
Expand Down
Empty file modified recipe/bin/removesmalls.pl
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion recipe/cmd_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ conda build purge-all
conda clean -afy

# build package
conda-build --user falmeida . -c conda-forge -c r -c defaults -c bioconda -c falmeida -c anaconda
conda-build --user falmeida . -c conda-forge -c r -c defaults -c bioconda -c falmeida -c anaconda $@
2 changes: 1 addition & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "easy_circos" %}
{% set version = "0.2" %}
{% set version = "0.3" %}

package:
name: "{{ name|lower }}"
Expand Down
4 changes: 2 additions & 2 deletions recipe/src/find_links.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ find_links()
mkdir -p ${RESULTS}/all_vs_all_blast

# concatenate genomes
cat ${RESULTS}/filtered/* >> ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db

# Run blast
Expand Down
2 changes: 1 addition & 1 deletion recipe/src/gc_skew.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
gc_skew()
{
# exec GCcalc.py
$CONDA_PREFIX/bin/python3 $CONDA_PREFIX/bin/GCcalc.py -f ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta -w $GCWINDOW -s $GCSTEP | \
$CONDA_PREFIX/bin/python3 $CONDA_PREFIX/bin/GCcalc.py -f ${RESULTS}/concatenated_genomes.fasta -w $GCWINDOW -s $GCSTEP | \
cut -f 1,2,3,5 | awk '{ if ($4 > 0) print $0 "\t" "color=dblue"; else print $0 "\t" "color=red"}' > ${RESULTS}/conf/GC_skew.txt
}
2 changes: 1 addition & 1 deletion recipe/src/gff2labels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ gff2labels()
}
} ;
}
}' $GFF
}' <( sed 's/ /__/g' $GFF )
}
4 changes: 2 additions & 2 deletions recipe/src/gff2tiles.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
gff2tiles()
{
# use awk to parse gff and create labels based on user desire
# use awk to parse gff and create tiles based on user desire
echo -e "#chr\tstart\tend\toptions\tcomment"
awk \
-v pattern="$PATTERN" \
Expand All @@ -18,5 +18,5 @@ gff2tiles()
print $1,$4,$5,"color="color_val,"# attributes: "s
} ;
}
}' $GFF
}' <( sed 's/ /__/g' $GFF )
}
118 changes: 62 additions & 56 deletions recipe/src/help.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,80 +5,86 @@ cat << EOF
Simple script to create a circos plot between FASTA files.
Copyright, Felipe Almeida <[email protected]>, 2021
Syntax:
Syntax:
# To draw circos
plot_circos.sh [-h] [--fofn <file> --outdir <outdir> --minlen <int> --minid <int>
--linklen <int> --show_intrachr --gc_window <int> --gc_step <int>
--labels <file> --tiles <files> ]
# To draw circos
plot_circos.sh [-h] [ options ]
# To use helpful scripts
plot_circos [ --gff2labels <FEATURES> <PATTERN> <ATTRIBUTE> <COLOR> <GFF> ]
plot_circos [ --gff2tiles <FEATURES> <PATTERN> <COLOR> <GFF> ]
# To use helpful scripts
plot_circos [ --gff2labels <FEATURES> <PATTERN> <ATTRIBUTE> <COLOR> <GFF> ]
plot_circos [ --gff2tiles <FEATURES> <PATTERN> <COLOR> <GFF> ]
Options:
Options:
# Help
-h/--help Print this help
# Help
-h/--help Print this help
# Threads for blastn
--threads Number of threads to use [Default: 1]
# Threads for blastn
--threads Number of threads to use [Default: 1]
# Output
--outdir Path to output directory [Default: ./results]
# Output
--outdir Path to output directory [Default: ./results]
# Input file of file names
# CSV: fasta path,prefix,color
--fofn File of file names containing list of fastas to
draw circos plot.
# Input file of file names
# CSV: fasta path,prefix,color
--fofn File of file names containing list of fastas to draw circos plot.
# Input min. length
--minlen Min size of contigs to consider for plot [Default: 10000]
# Input min. length
--minlen Min size of contigs to consider for plot [Default: 10000]
# Links (blastn) min. percentage id
--minid Min. percentage id to filter the results of blastn to draw links [Default: 85]
--linklen Min. link (blastn hit) length to display in plot [Default: 5000]
--show_intrachr Tells the program to create a conf file showing intra chr links [Default: false]
Mandatory if using only one FASTA, otherwise, links will not be shown.
# Links (blastn) min. percentage id
--skip_links Do not compute blast and do not draw links.
Useful for when only desiring the configs. [Default: false]
# GC skew config
--gc_window GC skew window size [Default: 5000]
--gc_step GC skew step size [Default: 5000]
--minid Min. percentage id to filter the results of blastn to draw links [Default: 85]
# Labels config
--labels TSV file containing the label definitions for plotting. The file must contain
4 or 5 columns as shown at http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson,
"DATA FORMAT" section. The first column must be the name (ID) of the contig.
Checkout the "--gff2labels" script (below).
--linklen Min. link (blastn hit) length to display in plot [Default: 5000]
--show_intrachr Tells the program to create a conf file showing intra chr links [Default: false]
Mandatory if using only one FASTA, otherwise, links will not be shown.
# Tiles config
--tiles TSV file containing the tile definitions for plotting. The file must contain
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files.
The first column must be the name (ID) of the contig.
Checkout the "--gff2tiles" script (below).
# GC skew config
--gc_window GC skew window size [Default: 5000]
--gc_step GC skew step size [Default: 5000]
# Labels config
--labels TSV file containing the label definitions for plotting. The file must contain
4 or 5 columns as shown at http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson,
"DATA FORMAT" section. The first column must be the name (ID) of the contig.
Checkout the "--gff2labels" script (below).
# Helpful scripts!
# See the examples in our quickstart: https://easy-circos.readthedocs.io/en/latest/quickstart.html
--gff2labels A useful script that allows you to filter a GFF file and create a "circos label file"
with desired inputs. Eg. "plot_circos --gff2labels CDS arcA ID red ecoli_k12.gff". This
command will get each line where the feature (3rd column) is a CDS and that has the
"acrA" (in the complete line) pattern to write the "circos labels file" using the ID
attributes column as label (string as found in the gff), giving these features
a "red" color option.
# Tiles config
--tiles TSV file containing the tile definitions for plotting. The file must contain
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files.
The first column must be the name (ID) of the contig.
Checkout the "--gff2tiles" script (below).
For <FEATURES> and <PATTERN> users can use "" to match anything, and "|" to match
more than one string. E.g. plot_circos --gff2labels "" "acrA|mdt" ID red ecoli_k12.gff
--gff2tiles A useful script that allows you to filter a GFF file and create a "circos tiles file"
with desired inputs. Eg. "plot_circos --gff2tiles CDS arcA red ecoli_k12.gff". This
command will get each line where the feature (3rd column) is a CDS and that has the
"acrA" (in the complete line) pattern to write the "circos tiles file" giving these
features a "red" color option.
# Helpful scripts!
# See the examples in our quickstart: https://easy-circos.readthedocs.io/en/latest/quickstart.html
--gff2labels A useful script that allows you to filter a GFF file and create a "circos label file"
with desired inputs. Eg. "plot_circos --gff2labels CDS arcA ID red ecoli_k12.gff". This
command will get each line where the feature (3rd column) is a CDS and that has the
"acrA" (in the complete line) pattern to write the "circos labels file" using the ID
attributes column as label (string as found in the gff), giving these features
a "red" color option.
For <FEATURES> and <PATTERN> users can use "" to match anything, and "|" to match
more than one string. E.g. plot_circos --gff2tiles "" "acrA|mdt" red ecoli_k12.gff.
For <FEATURES> and <PATTERN> users can use "" to match anything, and "|" to match
more than one string. E.g. plot_circos --gff2labels "" "acrA|mdt" ID red ecoli_k12.gff
--gff2tiles A useful script that allows you to filter a GFF file and create a "circos tiles file"
with desired inputs. Eg. "plot_circos --gff2tiles CDS arcA red ecoli_k12.gff". This
command will get each line where the feature (3rd column) is a CDS and that has the
"acrA" (in the complete line) pattern to write the "circos tiles file" giving these
features a "red" color option.
For <FEATURES> and <PATTERN> users can use "" to match anything, and "|" to match
more than one string. E.g. plot_circos --gff2tiles "" "acrA|mdt" red ecoli_k12.gff.
--bacannot It also writes the customized configs for bacannot annotations.
Only useful inside bacannot pipelines.
EOF
Expand Down
10 changes: 10 additions & 0 deletions recipe/src/parse_links.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,13 @@ done<"$FOFN"
# create additional file whithout intrachr links
awk -F'\t' '{ if ($1 != $4) { print } }' ${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ;
}

empty_links()
{
# concatenate genomes
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db

touch ${RESULTS}/conf/links_concatenated_colored.txt ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt
}
2 changes: 1 addition & 1 deletion recipe/src/tiles.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ read -r -d '' TILES_CONF << EOM
# to understand its configuration read: http://circos.ca/documentation/tutorials/2d_tracks/tiles/configuration
<plot>
type = tile
layers_overflow = grow
layers_overflow = collapse
file = circos_tiles.txt
r1 = 0.85r
r0 = 0.75r
Expand Down
23 changes: 22 additions & 1 deletion recipe/src/workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,27 @@ workflow()
karyotype ;

# Step 3
if [ "$SKIP_LINKS" = "no" ]
then
echo " # Finding links (all vs all blastn)!"
find_links ;
parse_links ;
else
echo " # Skipping links (all vs all blastn)!"
empty_links ;
fi

# Step 4
echo " # Removing duplicate lines in conf files!"
dedup ;
if [ "$SKIP_LINKS" = "no" ]
then
check_links ;
export DEFAULT_LINE="chromosomes_display_default = no"
else
export DEFAULT_LINE="chromosomes_display_default = yes"
export CUSTOM_CHR_LINE=""
fi

# Step 5
echo " # Computing GC Skew!"
Expand Down Expand Up @@ -47,11 +60,19 @@ workflow()

# Step 6
echo " # Wrinting circos conf file!"
write_circos > ${RESULTS}/conf/circos.conf ;
if [ "$BACANNOT" == "no" ]
then
write_circos > ${RESULTS}/conf/circos.conf ;
else
write_circos_bacannot > ${RESULTS}/conf/circos.conf ;
fi

# Step 7
if [ "$BACANNOT" == "no" ]
then
echo " # Plotting circos!"
plot_circos ;
fi

# Bye
echo ${BYE}
Expand Down
Loading

0 comments on commit e1c52c2

Please sign in to comment.