Merge branch 'master' of github.com:merenlab/anvio

merenlab · Oct 27, 2023 · 9227669 · 9227669
2 parents 109c9bd + 79aa42b
commit 9227669
Show file tree

Hide file tree

Showing 53 changed files with 1,848 additions and 591 deletions.
diff --git a/Dockerfiles/anvio-main/Dockerfile b/Dockerfiles/anvio-main/Dockerfile
@@ -12,7 +12,7 @@
 #
 
 FROM continuumio/miniconda3:4.11.0
-ENV ANVIO_VERSION "7.1_main_0522"
+ENV ANVIO_VERSION "8"
 
 SHELL ["/bin/bash", "--login", "-c"]
 
@@ -21,7 +21,7 @@ RUN conda config --env --add channels conda-forge
 
 # Create a conda environment for anvi'o, activate it, and make sure it will
 # always be activated
-RUN conda create -n anvioenv python=3.7
+RUN conda create -n anvioenv python=3.10
 RUN conda init bash
 RUN conda activate anvioenv
 RUN echo "conda activate anvioenv" >> ~/.bashrc
@@ -43,23 +43,29 @@ RUN conda install -y nano
 RUN conda install -y -c conda-forge mamba
 
 # Setup the environment
-RUN mamba install -y -c bioconda -c conda-forge python=3.7 \
-        sqlite prodigal idba mcl muscle=3.8.1551 hmmer diamond \
-        blast megahit spades bowtie2 tbb=2020.3 bwa graphviz \
-        "samtools >=1.9" trimal iqtree trnascan-se fasttree vmatch \
-        r-base r-tidyverse r-optparse r-stringi r-magrittr
+RUN mamba install -y -c conda-forge -c bioconda python=3.10 \
+        sqlite prodigal idba mcl muscle=3.8.1551 famsa hmmer diamond \
+        blast megahit spades bowtie2 bwa graphviz "samtools>=1.9" \
+        trimal iqtree trnascan-se fasttree vmatch r-base r-tidyverse \
+        r-optparse r-stringi r-magrittr bioconductor-qvalue meme ghostscript
 
 # try this, too. it may also fail to install. which is OK:
 RUN mamba install -y -c bioconda fastani
 
-# install qvalue
-RUN Rscript -e 'install.packages("BiocManager", repos="https://cran.rstudio.com"); BiocManager::install("qvalue")'
+RUN wget -qO- "https://cmake.org/files/v3.23/cmake-3.23.1-linux-"$(uname -m)".tar.gz" | tar --strip-components=1 -xz -C /usr/local
+
+#GCC compiler
+RUN apt-get update && \
+    apt-get -y install gcc mono-mcs && \
+    rm -rf /var/lib/apt/lists/*
 
 # Install anvi'o from pip
-RUN pip install git+https://github.com/merenlab/anvio.git
+RUN curl -L https://github.com/merenlab/anvio/releases/download/v8/anvio-8.tar.gz \
+        --output anvio-8.tar.gz
+RUN pip install anvio-8.tar.gz
 
-# Install METABAT and DAS_TOOL
-RUN conda install metabat2 das_tool
+# Install METABAT and DAS_TOOL 
+#RUN mamba install metabat2 das_tool
 
 # Install CONCOCT
 RUN apt-get update && apt-get install -qq build-essential libgsl0-dev bedtools mummer samtools perl libssl-dev

diff --git a/anvio/argparse.py b/anvio/argparse.py
@@ -59,7 +59,7 @@ def get_anvio_epilogue(self):
 
         version = anvio.anvio_version_for_help_docs
 
-        general_help = f"https://merenlab.org/software/anvio/help/{version}"
+        general_help = f"https://anvio.org/help/{version}"
         program_help = f"{general_help}/programs/{self.prog}"
 
         if os.path.exists(os.path.join(os.path.dirname(docs.__file__), f"programs/{self.prog}.md")):

diff --git a/anvio/biochemistry/reactionnetwork.py b/anvio/biochemistry/reactionnetwork.py
diff --git a/anvio/cogs.py b/anvio/cogs.py
@@ -702,7 +702,7 @@ def raise_error(line_num, line_content, fields, e):
         p_id_without_cog_id = set([])
 
         line_counter = 0
-        for line in open(input_file_path, 'rU').readlines():
+        for line in open(input_file_path, 'r').readlines():
             line_counter += 1
 
             if line_counter % 500 == 0:
@@ -823,7 +823,7 @@ def format_categories(self, input_file_path, output_file_path):
         progress.update('...')
 
         output = open(output_file_path, 'w')
-        for line in open(input_file_path, 'rU').readlines():
+        for line in open(input_file_path, 'r').readlines():
             if line.startswith('#'):
                 continue
 
@@ -951,7 +951,7 @@ def check_raw_data_hash_and_existence(self, input_file_path, output_file_path):
 
         # Get a dictionnary of checksums, the file is formatted as "checksum filename" per line
         checksums = {}
-        for line in open(input_file_path, 'rU').readlines():
+        for line in open(input_file_path, 'r').readlines():
             stripped = line.strip('\n').split(' ')
             file_name = stripped[-1].strip('*')
             checksums[file_name] = stripped[0]

diff --git a/anvio/data/static/template/inversions.tmpl b/anvio/data/static/template/inversions.tmpl
@@ -117,6 +117,7 @@
                                                 <tr>
                                                     <th>ID</th>
                                                     <th>Source</th>
+                                                    <th>Contig</th>
                                                     <th>Length</th>
                                                     <th>Direction</th>
                                                     <th>Start</th>
@@ -128,6 +129,7 @@
                                                 <tr>
                                                     <td>{{ gene|lookup:"gene_callers_id"|pretty }}</td>
                                                     <td>{{ gene|lookup:"source"|pretty }}</td>
+                                                    <td>{{ gene|lookup:"contig"|pretty }}</td>
                                                     <td>{{ gene|lookup:"length"|pretty }}</td>
                                                     <td>{{ gene|lookup:"direction"|pretty }}</td>
                                                     <td>{{ gene|lookup:"start"|pretty }}</td>
@@ -238,6 +240,14 @@
                                             <td width="25%">Length of the inversion</td>
                                             <td>{{ inversions|lookup:inversion|lookup:"inversion_data"|lookup:"distance"|pretty }}</td>
                                         </tr>
+                                        <tr>
+                                            <td width="25%">Start position</td>
+                                            <td>{{ inversions|lookup:inversion|lookup:"inversion_data"|lookup:"first_end"|pretty }}</td>
+                                        </tr>
+                                        <tr>
+                                            <td width="25%">Stop position</td>
+                                            <td>{{ inversions|lookup:inversion|lookup:"inversion_data"|lookup:"second_start"|pretty }}</td>
+                                        </tr>
                                         <tr>
                                             <td width="25%">Number of samples observed</td>
                                             <td>{{ inversions|lookup:inversion|lookup:"inversion_data"|lookup:"num_samples" }}</td>
@@ -256,6 +266,7 @@
                             <div class="panel-heading">
                                 Inverted Repeats
                             </div>
+
                             <div class="panel-body">
                                 <table class="table table-hover">
                                     <thead id="{{ inversion }}-table">
@@ -355,6 +366,35 @@
                                     </div>
                                 </div>
                             </div>
+
+                            <div class="panel panel-default" style="margin-right:10px; margin-left:10px;">
+                                <div class="panel-heading">
+                                    Motifs
+                                </div>
+                                <div class="panel-body image-panel">
+                                    <table class="table">
+                                        <tbody>
+                                                <tr>
+                                                    <td width="25%"><h5>Motif group</h5></td>
+                                                    <td><h5>Motif logo</h5></td>
+                                                </tr>
+                                                {% for motif in inversions|lookup:inversion|lookup:"motifs" %}
+                                                        <tr>
+                                                            <td width="15%" style="vertical-align:middle;">{{ motif }}</td>
+                                                            <td class="extra-long>">
+                                                            {% if inversions|lookup:inversion|lookup:"motifs"|lookup:motif|lookup:'logo_path' == None %}
+                                                                MEME was not able to generate the motif's logo in png format, which means that you are missing cool logo pictures in this summary.<br>
+                                                                The logos are still available in .eps format in the ouput directory.
+                                                            {% else %}
+                                                                <img src="{{ inversions|lookup:inversion|lookup:"motifs"|lookup:motif|lookup:'logo_path' }}" alt="Motif logo" style="height:120px;">
+                                                            </td>
+                                                            {% endif %}
+                                                        </tr>
+                                                {% endfor %}
+                                        </tbody>
+                                    </table>
+                                </div>
+                            </div>
                         </div>
                     </div>
                 </div>
@@ -389,10 +429,19 @@
         white-space:normal;
         max-width: 300px;
     }
+    .extra-long{
+        white-space:normal;
+        max-width: 900px;
+    }
     #popover-panel{
         border: none;
         margin-bottom: 0px;
     }
+    .image-panel{
+        width: fit-content;
+        display: flex;
+        justify-content: left;
+    }
 </style>
 <script type="text/javascript">
     // Binding template to the Popover Content

diff --git a/anvio/dbops.py b/anvio/dbops.py
@@ -3983,15 +3983,17 @@ def init(self):
             return
 
         self.meta = dbi(self.db_path, expecting=self.db_type).get_self_table()
-
-        for key in ['num_genomes', 'gene_cluster_min_occurrence', 'use_ncbi_blast', 'exclude_partial_gene_calls', \
-                    'num_gene_clusters', 'num_genes_in_gene_clusters', 'gene_alignments_computed', 'items_ordered']:
+        for key in ['num_genomes', 'gene_cluster_min_occurrence', 'use_ncbi_blast', 'exclude_partial_gene_calls',
+                    'num_gene_clusters', 'num_genes_in_gene_clusters', 'gene_alignments_computed', 'items_ordered',
+                    'reaction_network_ko_annotations_hash', 'reaction_network_kegg_database_release',
+                    'reaction_network_modelseed_database_sha']:
             try:
                 self.meta[key] = int(self.meta[key])
             except:
                 pass
 
-        for key in ['min_percent_identity', 'minbit', 'mcl_inflation']:
+        for key in ['min_percent_identity', 'minbit', 'mcl_inflation',
+                    'reaction_network_consensus_threshold', 'reaction_network_discard_ties']:
             try:
                 self.meta[key] = float(self.meta[key])
             except:
@@ -4015,6 +4017,8 @@ def touch(self):
 
         # creating empty default tables for pan specific operations:
         self.db.create_table(t.pan_gene_clusters_table_name, t.pan_gene_clusters_table_structure, t.pan_gene_clusters_table_types)
+        self.db.create_table(t.pan_gene_cluster_function_reactions_table_name, t.pan_gene_cluster_function_reactions_table_structure, t.pan_gene_cluster_function_reactions_table_types)
+        self.db.create_table(t.pan_gene_cluster_function_metabolites_table_name, t.pan_gene_cluster_function_metabolites_table_structure, t.pan_gene_cluster_function_metabolites_table_types)
 
         # creating empty default tables for standard anvi'o pan dbs
         self.db.create_table(t.item_additional_data_table_name, t.item_additional_data_table_structure, t.item_additional_data_table_types)
@@ -4079,8 +4083,7 @@ def init(self):
         try:
             for key in ['split_length', 'kmer_size', 'total_length', 'num_splits', 'num_contigs',
                         'genes_are_called', 'splits_consider_gene_calls', 'scg_taxonomy_was_run',
-                        'trna_taxonomy_was_run', 'external_gene_calls', 'external_gene_amino_acid_seqs',
-                        'skip_predict_frame']:
+                        'trna_taxonomy_was_run', 'external_gene_calls', 'external_gene_amino_acid_seqs', 'skip_predict_frame']:
                 self.meta[key] = int(self.meta[key])
         except KeyError:
             raise ConfigError("Oh no :( There is a contigs database here at '%s', but it seems to be broken :( It is very "
@@ -4332,7 +4335,7 @@ def create(self, args):
 
         if description_file_path:
             filesnpaths.is_file_plain_text(description_file_path)
-            description = open(os.path.abspath(description_file_path), 'rU').read()
+            description = open(os.path.abspath(description_file_path), 'r').read()
         else:
             description = ''
 
@@ -4573,6 +4576,9 @@ def create(self, args):
         self.db.set_meta_value('scg_taxonomy_database_version', None)
         self.db.set_meta_value('trna_taxonomy_was_run', False)
         self.db.set_meta_value('trna_taxonomy_database_version', None)
+        self.db.set_meta_value('reaction_network_ko_annotations_hash', None)
+        self.db.set_meta_value('reaction_network_kegg_database_release', None)
+        self.db.set_meta_value('reaction_network_modelseed_database_sha', None)
         self.db.set_meta_value('creation_date', self.get_date())
         self.disconnect()
 
@@ -4951,7 +4957,7 @@ def get_description_in_db(anvio_db_path, run=run):
 
 def update_description_in_db_from_file(anvio_db_path, description_file_path, run=run):
     filesnpaths.is_file_plain_text(description_file_path)
-    description = open(os.path.abspath(description_file_path), 'rU').read()
+    description = open(os.path.abspath(description_file_path), 'r').read()
 
     update_description_in_db(anvio_db_path, description, run=run)
 

diff --git a/anvio/docs/artifacts/user-modules-data.md b/anvio/docs/artifacts/user-modules-data.md
@@ -15,9 +15,11 @@ Also, think about how you will annotate each enzyme, because for each one you wi
 Enzyme comes from... | annotation program | ANNOTATION_SOURCE
 |:---|:---|:---|
 KEGG KOfam | %(anvi-run-kegg-kofams)s | Kofam
-NCBI COG (2020) | %(anvi-run-ncbi-cogs)s | COG20_FUNCTION
-NCBI COG (2014) | %(anvi-run-ncbi-cogs)s | COG14_FUNCTION
+NCBI COG (2020) | %(anvi-run-ncbi-cogs)s with `--cog-version COG20`| COG20_FUNCTION
+NCBI COG (2014) | %(anvi-run-ncbi-cogs)s with `--cog-version COG14`| COG14_FUNCTION
+archaeal COGs (2014) | %(anvi-run-ncbi-cogs)s with `--cog-version arCOG14`| arCOG14_FUNCTION
 PFAM | %(anvi-run-pfams)s | Pfam
+CAZy dbCAN | %(anvi-run-cazymes)s | CAZyme
 custom HMMs | %(anvi-run-hmms)s with `--hmm-source` and `--add-to-functions-table` parameters | name of directory given to `--hmm-source`
 other annotation strategy | %(anvi-import-functions)s | source defined in input file
 

diff --git a/anvio/docs/images/png/anvi-report-inversions-01.png b/anvio/docs/images/png/anvi-report-inversions-01.png
diff --git a/anvio/docs/images/png/anvi-report-inversions-02.gif b/anvio/docs/images/png/anvi-report-inversions-02.gif
diff --git a/anvio/docs/images/png/anvi-report-inversions-03.png b/anvio/docs/images/png/anvi-report-inversions-03.png
diff --git a/anvio/docs/images/png/anvi-report-inversions-04.png b/anvio/docs/images/png/anvi-report-inversions-04.png
diff --git a/anvio/docs/images/png/anvi-report-inversions-05.png b/anvio/docs/images/png/anvi-report-inversions-05.png
diff --git a/anvio/docs/images/png/metabolism_reconstruction.png b/anvio/docs/images/png/metabolism_reconstruction.png
diff --git a/anvio/docs/programs/anvi-estimate-metabolism.md b/anvio/docs/programs/anvi-estimate-metabolism.md
@@ -698,7 +698,7 @@ Once we have the completeness scores and copy numbers of all possible paths thro
 So if the module does not have any complete paths, then its copy number is 0. If it has one complete path, then its copy number is the copy number of that path. If there are multiple paths with highest completeness score, then its copy number is the maximum of the copy numbers of those paths - for example, let's say we have two paths, both of which are 90%% complete. One of those paths has a copy number of 1 and the other has a copy number of 3. The module copy number would be 3 in this case.
 
 {:.notice}
-We're making assumptions here again, just like we were when computing module completeness. Any of those paths (or none of them) could be the one that is used in the cell, and we don't know which one. But the idea here is that if a sample has the most copies of path X, there is probably a good reason that is has that many copies because microbial cells like to streamline their genomes whenever possible.
+We're making assumptions here again, just like we were when computing module completeness. Any of those paths (or none of them) could be the one that is used in the cell, and we don't know which one. But the idea here is that if a sample has the most copies of path X, there is probably a good reason that it has that many copies because microbial cells like to streamline their genomes whenever possible.
 
 One last note - if a module does not have any paths of highest completeness, we cannot compute the copy number. In this case, the copy number of the module will be reported as 'NA' in the output file(s).