diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ee3528b..f6dbf584 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html) +## [0.4.16] 2024-06-12 + +- Better management for mongo error when adding annotation +- Manage empty genes (no subentities) +- Manage CDS with same IDs in the same gene + ## [0.4.15] 2024-04-05 ### Fixed diff --git a/imports/api/genomes/annotation/addAnnotation.test.js b/imports/api/genomes/annotation/addAnnotation.test.js index 5350f8ea..1179e144 100644 --- a/imports/api/genomes/annotation/addAnnotation.test.js +++ b/imports/api/genomes/annotation/addAnnotation.test.js @@ -63,7 +63,15 @@ describe('AddAnnotation', function testAnnotation() { chai.assert.equal(gene.start, 13640); chai.assert.equal(gene.end, 15401); - chai.assert.lengthOf(gene.subfeatures, 13, 'Number of subfeatures is not 13'); + chai.assert.lengthOf(gene.subfeatures, 14, 'Number of subfeatures is not 14'); + + // Check CDS with the same ID + has_default_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1") + has_new_cds = gene.subfeatures.some((sub) => sub.type == "CDS" && sub.ID == "Bni|B01g000010.2N.1.cds1.1") + + chai.assert.isTrue(has_default_cds, "Bni|B01g000010.2N.1.cds1 was not found") + chai.assert.isTrue(has_default_cds, "Bni|B01g000010.2N.1.cds1.1 was not found") + }); it('Should add multiple copies of genes with different annotation names', function addAnnotationGff3() { diff --git a/imports/api/genomes/annotation/parser/annotationParserGff3.js b/imports/api/genomes/annotation/parser/annotationParserGff3.js index 8964cb47..72f2e626 100644 --- a/imports/api/genomes/annotation/parser/annotationParserGff3.js +++ b/imports/api/genomes/annotation/parser/annotationParserGff3.js @@ -51,6 +51,8 @@ class AnnotationProcessor { // (this.IdParents[parents[0]] in addChildren function). this.IdParents = {}; this.indexIdParent = 0; + + this.cds_ids = {}; } /** @@ -358,6 +360,8 @@ class AnnotationProcessor { features.attributes, ); this.geneLevelHierarchy.attributes = attributesFiltered; + + this.geneLevelHierarchy.subfeatures = []; } else { // Create an array if not exists for the subfeatures (exons, cds ...) of // the gene. @@ -387,7 +391,18 @@ class AnnotationProcessor { ); } - const identifiant = features.ID + let identifiant = features.ID + + // Manage case of discontinuous CDS: Same ID -> we add a suffix to avoid crashing + if (typeAttr === 'CDS'){ + if (identifiant in this.cds_ids){ + identifiant = identifiant + "." + this.cds_ids[identifiant] + this.cds_ids[identifiant] += 1 + } else { + this.cds_ids[identifiant] = 1 + } + } + let proteinID // Complete ID parents. @@ -446,7 +461,7 @@ class AnnotationProcessor { GeneSchema.validate(geneWithoutId); } catch (err) { logger.error(err) - throw new Error('There is something wrong with the gene collection schema'); + throw new Error('Current gene is not valid, stopping'); } return true; }; @@ -528,7 +543,7 @@ class AnnotationProcessor { // Increment. this.nAnnotation += 1; - const protein_ids = this.geneLevelHierarchy.subfeatures.flatMap(children => { + const protein_ids = this.geneLevelHierarchy.subfeatures.flatMap(children => { if(typeof children.protein_id === 'undefined'){ return [] } else { @@ -538,6 +553,9 @@ class AnnotationProcessor { this.geneLevelHierarchy.children = this.geneLevelHierarchy.children.concat(protein_ids) + // Validate schema before adding to bulk + this.isValidateGeneSchema(); + // Add to bulk operation. this.geneBulkOperation.insert(this.geneLevelHierarchy) @@ -547,6 +565,7 @@ class AnnotationProcessor { this.shiftSequence = 0; this.IdParents = {}; this.indexIdParent = 0; + this.cds_ids = {}; // Init new gene. this.initGeneHierarchy(features); @@ -554,7 +573,8 @@ class AnnotationProcessor { // Arbitrary break up of batch size to save ram if (this.geneBulkOperation.length > 500) { this.isReset = true - return this.geneBulkOperation.execute(); + let execute = Meteor.wrapAsync(this.geneBulkOperation.execute, this.geneBulkOperation); + return execute() } } } else { diff --git a/package.json b/package.json index 0455a7ed..2f6f2592 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "genoboo", - "version": "0.4.15", + "version": "0.4.16", "repository": "https://github.com/gogepp/genoboo", "description": "A portable website for browsing and querying genome sequences and annotations. Forked from genenotebook", "license": "AGPL-3.0", diff --git a/private/data/Bnigra.gff3 b/private/data/Bnigra.gff3 index 26a88232..cbc4a43d 100644 --- a/private/data/Bnigra.gff3 +++ b/private/data/Bnigra.gff3 @@ -8,6 +8,7 @@ B1 AAFC_GIFS exon 14210 14284 . - . ID=Bni%7CB01g000010.2N.1.exon4;Parent=Bni%7C B1 AAFC_GIFS exon 13970 14062 . - . ID=Bni%7CB01g000010.2N.1.exon5;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS exon 13640 13870 . - . ID=Bni%7CB01g000010.2N.1.exon6;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14702 15401 . - 0 ID=Bni%7CB01g000010.2N.1.cds1;Parent=Bni%7CB01g000010.2N.1 +B1 AAFC_GIFS CDS 14702 15401 . - 0 ID=Bni%7CB01g000010.2N.1.cds1;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14557 14636 . - 2 ID=Bni%7CB01g000010.2N.1.cds2;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14403 14486 . - 0 ID=Bni%7CB01g000010.2N.1.cds3;Parent=Bni%7CB01g000010.2N.1 B1 AAFC_GIFS CDS 14210 14284 . - 0 ID=Bni%7CB01g000010.2N.1.cds4;Parent=Bni%7CB01g000010.2N.1