diff --git a/CHANGELOG.md b/CHANGELOG.md index 53e85e28..6f66b44d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html) +## [0.4.5] 2023-09-19 + +### Added + +- Added Hector loader + +### Changed + +- Changed GO API url due to changes + ## [0.4.4] 2023-06-23 ### Changed diff --git a/cli/genoboo.js b/cli/genoboo.js index 59328c65..22a032ca 100755 --- a/cli/genoboo.js +++ b/cli/genoboo.js @@ -755,6 +755,45 @@ Example: }) .exitOverride(customExitOverride(addEggnog)); +// Add Hectar annotations file. +const addHectar = add.command('hectar'); + +addHectar + .description('Add Hectar results to a running GeneNoteBook server') + .usage('[options] ') + .arguments('') + .requiredOption( + '-u, --username ', + 'GeneNoteBook admin username' + ) + .requiredOption( + '-p, --password ', + 'GeneNoteBook admin password' + ) + .option( + '--port [port]', + 'Port on which GeneNoteBook is running. Default: 3000' + ) + .action((file, { username, password, port = 3000 }) => { + if (typeof file !== 'string') addHectar.help(); + + const fileName = path.resolve(file); + if (!(fileName && username && password)) { + addHectar.help(); + } + + new GeneNoteBookConnection({ username, password, port }).call('addHectar', { + fileName, + }); + }) + .on('--help', () => { + console.log(` +Example: + genenotebook add hectar hectar_annotations.tab -u admin -p admin + `); + }) + .exitOverride(customExitOverride(addHectar)); + // add orthogroups. const addOrthogroups = add.command('orthogroups'); diff --git a/imports/api/api.js b/imports/api/api.js index d552add6..486c50aa 100644 --- a/imports/api/api.js +++ b/imports/api/api.js @@ -19,6 +19,7 @@ import './genomes/annotation/addAnnotation.js'; import './genes/interproscan.js'; import './genes/addInterproscan.js'; import './genes/eggnog/addEggnog.js'; +import './genes/hectar/addHectar.js'; import './genes/scanGeneAttributes.js'; import './genes/updateAttributeInfo.js'; import './genes/updateGene.js'; @@ -54,5 +55,6 @@ import './jobqueue/process-blast.js'; import './jobqueue/process-download.js'; import './jobqueue/process-addGenome.js'; import './jobqueue/process-eggnog.js'; +import './jobqueue/process-hectar.js'; import './jobqueue/process-similarsequences.js'; import './jobqueue/process-orthogroup.js'; diff --git a/imports/api/genes/geneCollection.js b/imports/api/genes/geneCollection.js index c741c79e..cc093e3e 100644 --- a/imports/api/genes/geneCollection.js +++ b/imports/api/genes/geneCollection.js @@ -173,6 +173,12 @@ const GeneSchema = new SimpleSchema( optional: true, label: 'eggnog DB identifier (_id in eggnog collection)', }, + hectarId: { + type: String, + index: true, + optional: true, + label: 'Hectar identifier (_id in hectar collection)', + }, seqid: { type: String, label: 'ID of the sequence on which the gene is, e.g. chr1', diff --git a/imports/api/genes/hectar/addHectar.js b/imports/api/genes/hectar/addHectar.js new file mode 100644 index 00000000..7c8ebf6d --- /dev/null +++ b/imports/api/genes/hectar/addHectar.js @@ -0,0 +1,143 @@ +import { hectarCollection } from '/imports/api/genes/hectar/hectarCollection.js'; +import jobQueue, { Job } from '/imports/api/jobqueue/jobqueue.js'; +import { ValidatedMethod } from 'meteor/mdg:validated-method'; +import { Genes } from '/imports/api/genes/geneCollection.js'; +import logger from '/imports/api/util/logger.js'; +import { Roles } from 'meteor/alanning:roles'; +import SimpleSchema from 'simpl-schema'; +import { Meteor } from 'meteor/meteor'; + +class HectarProcessor { + constructor() { + // Not a bulk mongo suite. + this.genesDb = Genes.rawCollection(); + this.nHectar = 0; + } + + /** + * Function that returns the total number of insertions or updates in the + * hectar collection. + * @function + * @return {Number} Return the total number of insertions or updates of + * hectar. + */ + getNumberHectar() { + return this.nHectar; + } + + parse = (line) => { + if (!(line.slice(0,10) === 'protein id' || line.split('\t').length <= 1)) { + // Get all hectar informations line by line and separated by tabs. + const [ + proteinId, + predictedTargetingCategory, + signalPeptideScore, + signalPeptideCleavageSite, + typeIISignalAnchorScore, + chloroplastScore, + mitochondrionScore, + otherScore, + ] = line.split('\t'); + + // Organize data in a dictionary. + const annotations = { + protein_id: proteinId, + predicted_targeting_category: predictedTargetingCategory, + signal_peptide_score: signalPeptideScore, + signal_peptide_cleavage_site: signalPeptideCleavageSite, + typeII_signal_anchor_score: typeIISignalAnchorScore, + chloroplast_score: chloroplastScore, + mitochondrion_score: mitochondrionScore, + other_score: otherScore, + }; + + // Filters undefined data (with a dash) and splits into an array for + // comma-separated data. + for (const [key, value] of Object.entries(annotations)) { + if (value[0] === '-') { + annotations[key] = undefined; + } + if (value.indexOf(',') > -1) { + annotations[key] = value.split(','); + } + } + // If subfeatures is found in genes database (e.g: ID = + // MMUCEDO_000002-T1). + const subfeatureIsFound = Genes.findOne({ + $or: [ + { 'subfeatures.ID': proteinId }, + { 'subfeatures.protein_id': proteinId }, + ], + }); + + if (typeof subfeatureIsFound !== 'undefined') { + console.log("if loop" + typeof subfeatureIsFound); + // Increment hectar. + this.nHectar += 1; + + // Update or insert if no matching documents were found. + const documentHectar = hectarCollection.upsert( + { protein_id: proteinId }, // selector. + annotations, // modifier. + ); + + // Update hectarId in genes database. + if (typeof documentHectar.insertedId !== 'undefined') { + // Hectar _id is created. + return this.genesDb.update({ + $or: [ + { 'subfeatures.ID': proteinId }, + { 'subfeatures.protein_id': proteinId }, + ]}, + { $set: { hectarId: documentHectar.insertedId } }, + ); + } else { + // Hectar already exists. + const hectarIdentifiant = hectarCollection.findOne({ protein_id: proteinId })._id; + return this.genesDb.update( + { $or: [{'subfeatures.ID': proteinId}, {'subfeatures.protein_id': proteinId}] }, + { $set: { hectarId: hectarIdentifiant } }, + ); + } + } else { + logger.warn(` +Warning ! ${proteinId} hectar annotation did +not find a matching protein domain in the genes database. +${proteinId} is not added to the hectar database.`); + } + } + }; +} + +const addHectar = new ValidatedMethod({ + name: 'addHectar', + validate: new SimpleSchema({ + fileName: { type: String }, + }).validator(), + applyOptions: { + noRetry: true, + }, + run({ fileName }) { + if (!this.userId) { + throw new Meteor.Error('not-authorized'); + } + if (!Roles.userIsInRole(this.userId, 'admin')) { + throw new Meteor.Error('not-authorized'); + } + + logger.log('file :', { fileName }); + const job = new Job(jobQueue, 'addHectar', { fileName }); + const jobId = job.priority('high').save(); + + let { status } = job.doc; + logger.debug(`Job status: ${status}`); + while ((status !== 'completed') && (status !== 'failed')) { + const { doc } = job.refresh(); + status = doc.status; + } + return { result: job.doc.result, jobStatus: status}; + }, +}); + +export default addHectar; +export { HectarProcessor }; diff --git a/imports/api/genes/hectar/hectar.test.js b/imports/api/genes/hectar/hectar.test.js new file mode 100644 index 00000000..dbc210f8 --- /dev/null +++ b/imports/api/genes/hectar/hectar.test.js @@ -0,0 +1,64 @@ +/* eslint-env mocha */ +import { resetDatabase } from 'meteor/xolvio:cleaner'; +import chai from 'chai'; +import logger from '../../util/logger'; +import { hectarCollection } from './hectarCollection'; +import addHectar from './addHectar'; +import { addTestUsers, addTestGenome } from '../../../startup/server/fixtures/addTestData'; +import '../../jobqueue/process-hectar'; + +describe('hectar', function testHectar() { + let adminId; + let newUserId; + let adminContext; + let userContext; + + logger.log('Testing Hectar methods'); + + beforeEach(() => { + ({ adminId, newUserId } = addTestUsers()); + adminContext = { userId: adminId }; + userContext = { userId: newUserId }; + }); + + afterEach(() => { + resetDatabase(); + }); + + it('Should add Hectar tab file', function importhectar() { + // Increase timeout + this.timeout(20000); + + addTestGenome(annot = true); + + const hectarParams = { + fileName: 'assets/app/data/Bnigra_hectar.tab', + }; + + // Should fail for non-logged in + chai.expect(() => { + addHectar._execute({}, hectarParams); + }).to.throw('[not-authorized]'); + + // Should fail for non admin user + chai.expect(() => { + addHectar._execute(userContext, hectarParams); + }).to.throw('[not-authorized]'); + + const { result } = addHectar._execute(adminContext, hectarParams); + + chai.assert.equal(result.nInserted, 1) + + const hecs = hectarCollection.find({ protein_id: 'BniB01g000010.2N.1' }).fetch(); + + chai.assert.lengthOf(hecs, 1, 'No hectar data found'); + + const hec = hecs[0]; + + chai.assert.equal(hec.predicted_targeting_category, 'other localisation'); + chai.assert.equal(hec.signal_peptide_score, '0.0583'); + chai.assert.equal(hec.typeII_signal_anchor_score, '0.0228'); + chai.assert.equal(hec.mitochondrion_score, '0.1032'); + chai.assert.equal(hec.other_score, '0.8968'); + }); +}); diff --git a/imports/api/genes/hectar/hectarCollection.js b/imports/api/genes/hectar/hectarCollection.js new file mode 100644 index 00000000..bf7dad3f --- /dev/null +++ b/imports/api/genes/hectar/hectarCollection.js @@ -0,0 +1,41 @@ +import SimpleSchema from 'simpl-schema'; +import { Mongo } from 'meteor/mongo'; + +const hectarSchema = new SimpleSchema({ + protein_id: { + type: String, + label: 'Query sequence name and type.', + }, + predicted_targeting_category: { + type: String, + label: 'Predicted sub-cellular localization.', + }, + signal_peptide_score: { + type: String, + label: 'Probability (score) to be a signal peptide.', + }, + signal_peptide_cleavage_site: { + type: String, + label: 'Predicted cleavage site of signal peptide.', + }, + typeII_signal_anchor_score: { + type: String, + label: 'Probability (score) to be a type II signal anchor.', + }, + chloroplast_score: { + type: String, + label: 'Probability (score) to be in chloroplast.', + }, + mitochondrion_score: { + type: String, + label: 'Probability (score) to be in mitochondrion.', + }, + other_score: { + type: String, + label: 'Probability (score) to be elsewhere .', + }, +}); + +const hectarCollection = new Mongo.Collection('hectar'); + +export { hectarCollection, hectarSchema }; diff --git a/imports/api/jobqueue/process-hectar.js b/imports/api/jobqueue/process-hectar.js new file mode 100644 index 00000000..4338b288 --- /dev/null +++ b/imports/api/jobqueue/process-hectar.js @@ -0,0 +1,57 @@ +import { HectarProcessor } from '/imports/api/genes/hectar/addHectar.js'; +import logger from '/imports/api/util/logger.js'; +import jobQueue from './jobqueue.js'; +import readline from 'readline'; +import fs from 'fs'; + +jobQueue.processJobs( + 'addHectar', + { + concurrency: 4, + payload: 1, + }, + async (job, callback) => { + const { fileName } = job.data; + logger.log(`Add ${fileName} hectar file.`); + + const lineProcessor = new HectarProcessor(); + + const rl = readline.createInterface({ + input: fs.createReadStream(fileName, 'utf8'), + crlfDelay: Infinity, + }); + + const { size: fileSize } = await fs.promises.stat(fileName); + let processedBytes = 0; + let processedLines = 0; + let nHectar = 0; + + for await (const line of rl) { + processedBytes += line.length + 1; // also count \n + processedLines += 1; + + if ((processedLines % 100) === 0) { + await job.progress( + processedBytes, + fileSize, + { echo: true }, + (err) => { + if (err) logger.error(err); + }, + ); + } + + try { + await lineProcessor.parse(line); + nHectar = lineProcessor.getNumberHectar(); + } catch (err) { + logger.error(err); + job.fail({ err }); + callback(); + } + } + logger.log(`Inserted ${nHectar} Hectar`); + job.done({ nInserted: nHectar }); + callback(); + }, +); diff --git a/imports/api/methods/fetchDbxref.js b/imports/api/methods/fetchDbxref.js index 64e0f47f..90f6e300 100644 --- a/imports/api/methods/fetchDbxref.js +++ b/imports/api/methods/fetchDbxref.js @@ -25,7 +25,7 @@ const fetchDbxref = new ValidatedMethod({ switch (true) { case DBXREF_REGEX.go.test(dbxrefId): publicUrl = `http://amigo.geneontology.org/amigo/term/${dbxrefId}`; - apiUrl = `http://api.geneontology.org/api/bioentity/${dbxrefId}`; + apiUrl = `https://api.geneontology.org/api/go/${dbxrefId}`; dbType = 'go'; break; case DBXREF_REGEX.interpro.test(dbxrefId): diff --git a/imports/api/publications.js b/imports/api/publications.js index b8f14428..e82906bb 100644 --- a/imports/api/publications.js +++ b/imports/api/publications.js @@ -9,6 +9,7 @@ import { attributeCollection } from '/imports/api/genes/attributeCollection.js'; import { dbxrefCollection } from '/imports/api/genes/dbxrefCollection.js'; import { EditHistory } from '/imports/api/genes/edithistory_collection.js'; import { eggnogCollection } from '/imports/api/genes/eggnog/eggnogCollection.js'; +import { hectarCollection } from '/imports/api/genes/hectar/hectarCollection.js'; import { interproscanCollection } from '/imports/api/genes/interproscan/interproscanCollection.js'; import { similarSequencesCollection } from '/imports/api/genes/alignment/similarSequenceCollection.js'; // orthogroups @@ -179,6 +180,10 @@ Meteor.publish({ const eggnog = eggnogCollection.find({_id: query}); return eggnog; }, + hectar(query) { + const hectar = hectarCollection.find({_id: query}); + return hectar; + }, alignment(gene) { const diamond = similarSequencesCollection.find( { diff --git a/imports/ui/singleGenePage/SingleGenePage.jsx b/imports/ui/singleGenePage/SingleGenePage.jsx index 4ff5cccf..983cce10 100644 --- a/imports/ui/singleGenePage/SingleGenePage.jsx +++ b/imports/ui/singleGenePage/SingleGenePage.jsx @@ -16,6 +16,7 @@ import Genemodel from './Genemodel.jsx'; import Seq from './Seq.jsx'; import ProteinDomains from './ProteinDomains.jsx'; import Eggnog from './eggnog/Eggnog.jsx'; +import Hectar from './hectar/Hectar.jsx'; import SequenceSimilarity from './alignment/SequenceSimilarity.jsx'; import Orthogroup from './orthoGroup/Orthogroup.jsx'; @@ -115,6 +116,11 @@ function SingleGenePage({ gene, genome = {} }) { EggNOG +
  • + + Hectar + +
  • Sequence Similarity @@ -147,6 +153,9 @@ function SingleGenePage({ gene, genome = {} }) {
    +
    + +
    diff --git a/imports/ui/singleGenePage/hectar/Hectar.jsx b/imports/ui/singleGenePage/hectar/Hectar.jsx new file mode 100644 index 00000000..ec22abe0 --- /dev/null +++ b/imports/ui/singleGenePage/hectar/Hectar.jsx @@ -0,0 +1,211 @@ +/* eslint-disable react/prop-types */ +import { hectarCollection } from '/imports/api/genes/hectar/hectarCollection.js'; +import { branch, compose } from '/imports/ui/util/uiUtil.jsx'; +import { Genes } from '/imports/api/genes/geneCollection.js'; +import { withTracker } from 'meteor/react-meteor-data'; +import React, { useEffect, useState } from 'react'; +import { Meteor } from 'meteor/meteor'; +import './hectar.scss'; + +function Header() { + return ( + <> +
    +

    Hectar annotations

    + + ); +} + +function hasNoHectar({ hectar }) { + return typeof hectar === 'undefined'; +} + +function NoHectar({ showHeader }) { + return ( + <> + {showHeader &&
    } +
    +
    +

    No Hectar annotations found

    +
    +
    + + ); +} + +function hectarDataTracker({ gene }) { + const hectarSub = Meteor.subscribe('hectar', gene.hectarId); + const loading = !hectarSub.ready(); + const hectar = hectarCollection.findOne({}); + + return { + loading, + gene, + hectar, + }; +} + +function Localisation({ annot }) { + return ( +

    + { annot } +

    + ); +} + +function SigPepScore({ sigPep }) { + return ( +

    + { sigPep } +

    + ); +} + +function SigPepClivScore({ sigPepCli }) { + return ( +

    + { sigPepCli } +

    + ); +} + +function TIISigAncScore({ sigAnchor }) { + return ( +

    + { sigAnchor } +

    + ); +} + +function ChloroScore({ chloro }) { + return ( +

    + { chloro } +

    + ); +} + +function MitoScore({ mito }) { + return ( +

    + { mito } +

    + ); +} + + +function OtherScore({ other }) { + return ( +

    + { other } +

    + ); +} + +function ArrayHectarAnnotations({ hectar }) { + return ( +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + General informations +
    + Sub-cellular localisation prediction + + { hectar.predicted_targeting_category && } +
    + Signal peptide score + + { hectar.signal_peptide_score && } +
    + Signal peptide cleavage site score + + { hectar.signal_peptide_cleavage_site && } +
    + TypeII signal anchor score + + { + hectar.typeII_signal_anchor_score + && ( + + ) + } +
    + Chloroplastic protein score + + { + hectar.chloroplast_score + && ( + + ) + } +
    + Mitochondrial protein score + + { + hectar.mitochondrion_score + && ( + + ) + } +
    + Other localisation score + + { + hectar.other_score + && ( + + ) + } +
    +
    + ); +} + +function HectarAnnotation({ showHeader = false, hectar }) { + return ( + <> + { showHeader &&
    } +
    + +
    + + ); +} + +export default compose( + withTracker(hectarDataTracker), + branch(hasNoHectar, NoHectar), +)(HectarAnnotation); diff --git a/imports/ui/singleGenePage/hectar/hectar.scss b/imports/ui/singleGenePage/hectar/hectar.scss new file mode 100644 index 00000000..98be477b --- /dev/null +++ b/imports/ui/singleGenePage/hectar/hectar.scss @@ -0,0 +1,4 @@ +.table-hectar { + width: 100%; + table-layout: fixed; +} diff --git a/package.json b/package.json index 9881af92..fd490b65 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "genoboo", - "version": "0.4.4", + "version": "0.4.5", "repository": "https://github.com/gogepp/genoboo", "description": "A portable website for browsing and querying genome sequences and annotations. Forked from genenotebook", "license": "AGPL-3.0", diff --git a/private/data/Bnigra_hectar.tab b/private/data/Bnigra_hectar.tab new file mode 100644 index 00000000..3926ca41 --- /dev/null +++ b/private/data/Bnigra_hectar.tab @@ -0,0 +1,2 @@ +protein id predicted targeting category signal peptide score signal peptide cleavage site type II signal anchor score chloroplast score mitochondrion score other score +BniB01g000010.2N.1 other localisation 0.0583 - 0.0228 - 0.1032 0.8968 diff --git a/tests/main.js b/tests/main.js index 7b3a444a..aa6a4c5d 100644 --- a/tests/main.js +++ b/tests/main.js @@ -10,6 +10,7 @@ if (Meteor.isServer) { import '../imports/api/genomes/annotation/addAnnotation.test'; import '../imports/api/genes/alignment/alignment.test.js'; import '../imports/api/genes/eggnog/eggnog.test.js'; + import '../imports/api/genes/hectar/hectar.test.js'; import '../imports/api/genes/interproscan/interproscan.test.js'; import '../imports/api/transcriptomes/transcriptomes.test.js'; import '../imports/api/genes/download/download.test.js';