From eefa29309215c4c9df95c4f8ad40d17c68bdca60 Mon Sep 17 00:00:00 2001 From: Ian Holmes Date: Tue, 7 Mar 2017 10:03:08 -0800 Subject: [PATCH] Added more documentation of static site generator to README. Simplified operation (can now add examples separately from GAF+OBO files) --- README.md | 38 +++++++++- bin/add-to-site.js | 172 ++++++++++++++++++++++++++++----------------- 2 files changed, 144 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 1494f65..5d2ca73 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # wtfgenes -**What's The Function of these genes?** +**"What's The Function of these genes?"** Answer this question with Bayesian [Term Enrichment Analysis](https://en.wikipedia.org/wiki/Gene_Ontology_Term_Enrichment) (TEA) using a model described [here](https://github.com/ihh/wtfgenes-appnote/blob/master/main.pdf) @@ -24,7 +24,29 @@ The repository contains two implementations of Bayesian and Frequentist TEA: The two implementations should be identical at the level of numerical output, although the C++ version is about twice as fast. -This guide focuses on the JavaScript implementation; the C++ version is similar but does not use JSON files. +This guide focuses mostly on the JavaScript implementation; the C++ version is similar but does not use JSON files. + +## Installation + +### JavaScript version + +Prerequisites: +- node v6.0.0+ + + cd wtfgenes + npm install + bin/wtfgenes.js --help + +### C++11 version + +Prerequisites: +- clang (Apple LLVM version 7.3.0+) +- gsl (version 2.2.1+) +- boost (version 1.63.0+) + + cd wtfgenes/cpp + make + bin/wtfgenes --help ## Input and output formats @@ -51,10 +73,20 @@ You can see a demo of the web client [here](https://evoldoers.github.io/wtfgo/). To set up the web client as a [static site](https://en.wikipedia.org/wiki/Static_web_page), you need to perform the following steps: - run the [bin/create-site.js](https://github.com/evoldoers/wtfgenes/blob/master/bin/create-site.js) script to create a static site directory -- run the [bin/add-to-site.js](https://github.com/evoldoers/wtfgenes/blob/master/bin/add-to-site.js) script as many times as you want to add GAF-format gene-term association files (and the accompanying OBO-format ontologies) to the site, optionally with example gene sets +- run the [bin/add-to-site.js](https://github.com/evoldoers/wtfgenes/blob/master/bin/add-to-site.js) script as many times as you want to add GAF-format gene-term association files (and the accompanying OBO-format ontologies) to the site, optionally with gene ID aliases and example sets - hand-edit the [index.html](https://github.com/evoldoers/wtfgenes/blob/master/web/index.html) file in the static site directory to include any additional text you want to include - move the static site directory to someplace your webserver can see (it's OK to rename it) +For example, to set up a static site for yeast: + + curl -O http://geneontology.org/ontology/go-basic.obo + curl -O http://geneontology.org/gene-associations/gene_association.sgd.gz + gunzip gene_association.sgd.gz + bin/create-site.js yeast + bin/add-to-site.js yeast -s "S.cerevisiae" -n "Gene ontology" -o go-basic.obo -g gene_association.sgd + bin/add-to-site.js yeast -e "Mating genes" -i "STE2 STE3 STE5 GPA1 SST2 STE11 STE50 STE20 STE4 STE18 FUS3 KSS1 PTP2 MSG5 DIG1 DIG2 STE12" + bin/add-to-site.js yeast -e "Sulfate assimilation and nitrogen utilization" -i "MET10 MET1 MET14 MET22 MET3 MET5 MET8 TRX1 SUL1 FZF1 SUL2 OAC1 ATF1 ATF2 ADY2 ATO2 ATO3 MEP1 MEP2 MEP3 UGA1 UGA3 YGR125W YPR003C YIL165C MKS1 NPR1 RSP5 URE2 VID30 AGC1 CPS1 GDH2 DAL80 GZF3 PPH3 GAT1 RTG2 UME6" + The `create-site.js` and `add-to-site.js` scripts should be self-documenting (use the `-h` option to show a brief help message). Since the web client consists of web-browsable files and does not need to execute any code on a server, diff --git a/bin/add-to-site.js b/bin/add-to-site.js index 0923d0b..39f8b3b 100755 --- a/bin/add-to-site.js +++ b/bin/add-to-site.js @@ -22,12 +22,14 @@ var getopt = new Getopt([ ['n', 'ontology=NAME', 'ontology name'], ['s', 'species=NAME', 'species name'], ['e', 'example=STRING+', 'name of example gene set'], - ['i', 'ids=STRING+', 'example gene set (space-separated ID list)'], + ['i', 'ids=STRING+', 'example gene set (whitespace-separated)'], ['h' , 'help', 'display this help message'] ]) // create Getopt instance .bindHelp() .setHelp ("Usage: add-to-site.js \n" - + "[[OPTIONS]]\n") + + "[[OPTIONS]]\n\n" + + "The gene ID aliases file, if it exists, should have\n" + + "one set of synonyms per line, whitespace-separated.\n") var opt = getopt.parseSystem() // parse command line @@ -42,83 +44,127 @@ opt.argv.length === 1 || inputError ("Too many arguments", true) var dir = opt.argv[0] var datasetsPath = "datasets.json" + function dirPath (filename) { return dir + "/" + filename } +function makeJsonFilename (stem, orgNum, ontNum) { return "json/" + stem + "." + (orgNum+1) + "-" + (ontNum+1) + ".json" } + +function readJson (filename) { + var path = dirPath(filename) + fs.existsSync (path) || inputError ("Can't find " + path) + return JSON.parse (fs.readFileSync(path).toString()) +} + +function writeJson (filename, json) { + fs.writeFileSync (dirPath(filename), JSON.stringify (json)) +} + fs.existsSync(dirPath(datasetsPath)) || inputError ("Can't find " + dirPath(datasetsPath)) +var datasets = readJson (datasetsPath) -opt.options.obo || inputError ("Please specify an OBO file", true) -opt.options.gaf || inputError ("Please specify a GAF file", true) +var organism, organismOntology, orgNum, ontNum, examples +function findOrganism (name) { + for (var i = 0; i < datasets.organisms.length; ++i) + if (datasets.organisms[i].name === name) { + organism = datasets.organisms[orgNum = i] + break + } +} -fs.existsSync(opt.options.obo) || inputError ("OBO file not found") -fs.existsSync(opt.options.gaf) || inputError ("GAF file not found") +function findOntology (name) { + for (var i = 0; i < organism.ontologies.length; ++i) + if (organism.ontologies[i].name === name) { + organismOntology = organism.ontologies[ontNum = i] + break + } +} -var examples if (opt.options.example || opt.options.ids) { - var exampleName = opt.options.example || [] var exampleIds = opt.options.ids || [] + var exampleName = opt.options.example + || exampleIds.map (function (ids) { return "Example (" + ids.length + " gene" + (ids.length == 1 ? "" : "s") + ")" }) exampleName.length === exampleIds.length || inputError ("Please supply as many example gene-set names as example gene-sets", true) examples = exampleName.map (function (name, n) { return { name: name, genes: exampleIds[n].split(" ") } }) } -function readJson (filename) { - var path = dirPath(filename) - fs.existsSync (path) || inputError ("Can't find " + path) - return JSON.parse (fs.readFileSync(path).toString()) -} +if (opt.options.obo || opt.options.gaf) { + opt.options.obo || inputError ("Please specify an OBO file", true) + opt.options.gaf || inputError ("Please specify a GAF file", true) -function writeJson (filename, json) { - fs.writeFileSync (dirPath(filename), JSON.stringify (json)) -} + fs.existsSync(opt.options.obo) || inputError ("OBO file '" + opt.options.obo + "' not found") + fs.existsSync(opt.options.gaf) || inputError ("GAF file '" + opt.options.gaf + "' not found") -var datasets = readJson (datasetsPath) -var oboText = fs.readFileSync(opt.options.obo).toString() -var gafText = fs.readFileSync(opt.options.gaf).toString() + var ontologyName = opt.options.ontology || opt.options.obo + var speciesName = opt.options.species || opt.options.gaf + + findOrganism (speciesName) + if (!organism) + orgNum = datasets.organisms.push (organism = { name: speciesName, ontologies: [] }) - 1 -var aliasesText -if ('aliases' in opt.options) + findOntology (ontologyName) + organismOntology && inputError ("An ontology named '" + ontologyName + "' for species '" + speciesName + "' already exists") + + var oboText = fs.readFileSync(opt.options.obo).toString() + var gafText = fs.readFileSync(opt.options.gaf).toString() + + var aliasesText + if (opt.options.aliases) aliasesText = fs.readFileSync(opt.options.aliases).toString() -var ontologyName = opt.options.ontology || opt.options.obo -var speciesName = opt.options.species || opt.options.gaf - -var organism, orgNum -for (var i = 0; !organism && i < datasets.organisms.length; ++i) - if (datasets.organisms[i].name === speciesName) { - organism = datasets.organisms[i] - orgNum = i + 1 - } -if (!organism) - orgNum = datasets.organisms.push (organism = { name: speciesName, ontologies: [] }) - -var ontologyJson = converters.obo2json ({ obo: oboText, - compress: true, - includeTermInfo: true }) - -var assocsJson = converters.gaf2json ({ gaf: gafText, - aliases: aliasesText, - mergeDuplicates: true }) - -var ontology = new Ontology (ontologyJson) -var trimmedAssocs = new Assocs ({ idAliasTerm: assocsJson.idAliasTerm, - ontology: ontology, - ignoreMissingTerms: true, - closure: false }) - -var slimOntology = ontology.subgraphWithAncestors (trimmedAssocs.relevantTerms().map (ontology.getTermName.bind(ontology))) - -var trimmedAssocsJson = trimmedAssocs.toJSON() -var slimOntologyJson = slimOntology.toJSON() - -var ontNum = organism.ontologies.length + 1 -var ontologyPath = "json/ontology." + orgNum + "-" + ontNum + ".json" -var assocsPath = "json/assocs." + orgNum + "-" + ontNum + ".json" -writeJson (ontologyPath, slimOntologyJson) -writeJson (assocsPath, trimmedAssocsJson) - -organism.ontologies.push ({ name: ontologyName, - ontology: "./" + ontologyPath, - assocs: "./" + assocsPath, - examples: examples }) -writeJson (datasetsPath, datasets) + var ontologyJson = converters.obo2json ({ obo: oboText, + compress: true, + includeTermInfo: true }) + + var assocsJson = converters.gaf2json ({ gaf: gafText, + aliases: aliasesText, + mergeDuplicates: true }) + + var ontology = new Ontology (ontologyJson) + var trimmedAssocs = new Assocs ({ idAliasTerm: assocsJson.idAliasTerm, + ontology: ontology, + ignoreMissingTerms: true, + closure: false }) + + var slimOntology = ontology.subgraphWithAncestors (trimmedAssocs.relevantTerms().map (ontology.getTermName.bind(ontology))) + + var trimmedAssocsJson = trimmedAssocs.toJSON() + var slimOntologyJson = slimOntology.toJSON() + + ontNum = organism.ontologies.length + var ontologyPath = makeJsonFilename("ontology",orgNum,ontNum) + var assocsPath = makeJsonFilename("assocs",orgNum,ontNum) + writeJson (ontologyPath, slimOntologyJson) + writeJson (assocsPath, trimmedAssocsJson) + + organismOntology = { name: ontologyName, + ontology: "./" + ontologyPath, + assocs: "./" + assocsPath, + examples: [] } + + organism.ontologies.push (organismOntology) + +} else { + if (opt.options.species) + findOrganism (opt.options.species) + else if (datasets.organisms.length === 1) + organism = datasets.organisms[0]; + + if (opt.options.ontology) + findOntology (opt.options.ontology) + else if (organism && organism.ontologies.length === 1) + organismOntology = organism.ontologies[0]; + + ((opt.options.species || organism) && (opt.options.ontology || organismOntology)) + || inputError ("Please specify GAF and OBO files\n(or species & ontology names, if adding example gene-sets to an existing entry)", true); + organism || inputError ("Organism '" + opt.options.species + "' not found in " + dirPath(datasetsPath)) + organismOntology || inputError ("Ontology '" + opt.options.ontology + "' not found for organism '" + opt.options.species + "' in " + dirPath(datasetsPath)) + + (examples && examples.length) || inputError ("Please specify at least one example gene-set to add to an existing entry,\nor GAF and OBO files to create a new entry"); + opt.options.aliases && inputError ("Can't specify an aliases file without a GAF file"); +} + +if (examples) + organismOntology.examples = organismOntology.examples.concat (examples) +writeJson (datasetsPath, datasets) console.log ("done")