From 5fdb1763a5d3d4806354abbc9db4c7de3612ade8 Mon Sep 17 00:00:00 2001 From: Dominic Bennett Date: Mon, 26 Nov 2018 21:26:30 +0100 Subject: [PATCH] available on CRAN --- README.Rmd | 10 +++++-- README.md | 87 ++++++++++++++++++++++-------------------------------- 2 files changed, 44 insertions(+), 53 deletions(-) diff --git a/README.Rmd b/README.Rmd index 00c99d1..93f462c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,7 +21,7 @@ assignInNamespace(x = 'gb_build2', value = restez:::gb_build, # Locally query GenBank -[![Build Status](https://travis-ci.org/ropensci/restez.svg?branch=master)](https://travis-ci.org/ropensci/restez) [![Coverage Status](https://coveralls.io/repos/github/ropensci/restez/badge.svg?branch=master)](https://coveralls.io/github/ropensci/restez?branch=master) [![ROpenSci status](https://badges.ropensci.org/232_status.svg)](https://github.com/ropensci/onboarding/issues/232) [![DOI](https://zenodo.org/badge/129107980.svg)](https://zenodo.org/badge/latestdoi/129107980) +[![Build Status](https://travis-ci.org/ropensci/restez.svg?branch=master)](https://travis-ci.org/ropensci/restez) [![Coverage Status](https://coveralls.io/repos/github/ropensci/restez/badge.svg?branch=master)](https://coveralls.io/github/ropensci/restez?branch=master) [![ROpenSci status](https://badges.ropensci.org/232_status.svg)](https://github.com/ropensci/onboarding/issues/232) [![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/restez)](https://CRAN.R-project.org/package=restez) [![DOI](https://zenodo.org/badge/129107980.svg)](https://zenodo.org/badge/latestdoi/129107980) Download parts of [NCBI's GenBank](https://www.ncbi.nlm.nih.gov/nuccore) to a local folder and create a simple SQL-like database. Use 'get' tools to query the database by accession IDs. [rentrez](https://github.com/ropensci/rentrez) wrappers are available, so that if sequences are not available locally they can be searched for online through [Entrez](https://www.ncbi.nlm.nih.gov/books/NBK25500/). @@ -39,7 +39,13 @@ This package aims to make sequence retrieval more efficient by allowing a user t ## Installation -You can install `restez` from GitHub with: +`restez` is available via CRAN and can be installed: + +```{r cran-installation, include=TRUE, echo=TRUE, eval=FALSE} +install.packages("restez") +``` + +Or you can install the development version via GitHub with: ```{r gh-installation, include=TRUE, echo=TRUE, eval=FALSE} # install.packages("devtools") diff --git a/README.md b/README.md index c49f60d..b79da13 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Locally query GenBank =============================================================================================================================== -[![Build Status](https://travis-ci.org/ropensci/restez.svg?branch=master)](https://travis-ci.org/ropensci/restez) [![Coverage Status](https://coveralls.io/repos/github/ropensci/restez/badge.svg?branch=master)](https://coveralls.io/github/ropensci/restez?branch=master) [![ROpenSci status](https://badges.ropensci.org/232_status.svg)](https://github.com/ropensci/onboarding/issues/232) [![DOI](https://zenodo.org/badge/129107980.svg)](https://zenodo.org/badge/latestdoi/129107980) +[![Build Status](https://travis-ci.org/ropensci/restez.svg?branch=master)](https://travis-ci.org/ropensci/restez) [![Coverage Status](https://coveralls.io/repos/github/ropensci/restez/badge.svg?branch=master)](https://coveralls.io/github/ropensci/restez?branch=master) [![ROpenSci status](https://badges.ropensci.org/232_status.svg)](https://github.com/ropensci/onboarding/issues/232) [![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/restez)](https://CRAN.R-project.org/package=restez) [![DOI](https://zenodo.org/badge/129107980.svg)](https://zenodo.org/badge/latestdoi/129107980) Download parts of [NCBI's GenBank](https://www.ncbi.nlm.nih.gov/nuccore) to a local folder and create a simple SQL-like database. Use 'get' tools to query the database by accession IDs. [rentrez](https://github.com/ropensci/rentrez) wrappers are available, so that if sequences are not available locally they can be searched for online through [Entrez](https://www.ncbi.nlm.nih.gov/books/NBK25500/). @@ -25,7 +25,13 @@ This package aims to make sequence retrieval more efficient by allowing a user t Installation ------------ -You can install `restez` from GitHub with: +`restez` is available via CRAN and can be installed: + +``` r +install.packages("restez") +``` + +Or you can install the development version via GitHub with: ``` r # install.packages("devtools") @@ -71,11 +77,11 @@ id <- sample(list_db_ids(), 1) # sequences seq <- gb_sequence_get(id)[[1]] str(seq) -#> chr "GATCCGGCCGCAGCCGCAGTGTCGGCATTGTTCCCGCTGGGCGAGACGGAGATCACCCTCACGGTCTTCTCGGGCGATCAGTCCGACGCCGAGACGACGACGGTGACGATC"| __truncated__ +#> chr "CCCTCGAAGCCCTCATCGCTGAGGCTGCCCCGGATACCGAGGTTCTTTACAAAGACCGGATGGACTCCAGCAACGACACCTCGCTGGTAACAGCCCTCATGACGACCTATGACGGCACTCTTGAGG" # definitions def <- gb_definition_get(id)[[1]] print(def) -#> [1] "Unidentified clone B15 DNA sequence from ocean beach sand" +#> [1] "Unidentified RNA clone P10.11" # organisms org <- gb_organism_get(id)[[1]] print(org) @@ -83,43 +89,36 @@ print(org) # or whole records rec <- gb_record_get(id)[[1]] cat(rec) -#> LOCUS AF298094 581 bp DNA linear UNA 23-NOV-2000 -#> DEFINITION Unidentified clone B15 DNA sequence from ocean beach sand. -#> ACCESSION AF298094 -#> VERSION AF298094.1 +#> LOCUS AF040893 126 bp RNA linear UNA 06-MAR-1998 +#> DEFINITION Unidentified RNA clone P10.11. +#> ACCESSION AF040893 +#> VERSION AF040893.1 #> KEYWORDS . #> SOURCE unidentified #> ORGANISM unidentified #> unclassified sequences. -#> REFERENCE 1 (bases 1 to 581) -#> AUTHORS Naviaux,R.K. -#> TITLE Sand DNA: a multigenomic library on the beach +#> REFERENCE 1 (bases 1 to 126) +#> AUTHORS Pan,W.S., Ji,X.Y., Wang,H.T. and Zhong,Y.S. +#> TITLE RNA from plasma of patient NO.10 #> JOURNAL Unpublished -#> REFERENCE 2 (bases 1 to 581) -#> AUTHORS Naviaux,R.K. +#> REFERENCE 2 (bases 1 to 126) +#> AUTHORS Pan,W.S., Ji,X.Y., Wang,H.T. and Zhong,Y.S. #> TITLE Direct Submission -#> JOURNAL Submitted (21-AUG-2000) Medicine, University of California, San -#> Diego School of Medicine, 200 West Arbor Drive, San Diego, CA -#> 92103-8467, USA +#> JOURNAL Submitted (31-DEC-1997) Department of Applied Molecular Biology, +#> Microbiology & Epidemiology Institution, 20 Dongdajie Street, +#> Fengtai, Beijing 100071, China #> FEATURES Location/Qualifiers -#> source 1..581 +#> source 1..126 #> /organism="unidentified" -#> /mol_type="genomic DNA" +#> /mol_type="genomic RNA" #> /db_xref="taxon:32644" -#> /clone="B15" -#> /note="anonymous environmental sample sequence from ocean -#> beach sand" +#> /clone="P10.11" +#> /note="from the plasma of patient no.10, a person infected +#> by an unknown hepatitis virus" #> ORIGIN -#> 1 gatccggccg cagccgcagt gtcggcattg ttcccgctgg gcgagacgga gatcaccctc -#> 61 acggtcttct cgggcgatca gtccgacgcc gagacgacga cggtgacgat cgaggacacg -#> 121 accgcgccga cgttcaccca cgcactgggt gatgtccttc cgatggtgac gaaggaggca -#> 181 acggagcccg gagggcatga cttcagcccg gccacgccgg acgcctggga ccatggagac -#> 241 agcgacctcg acatcgcttg cggtacggaa ctcccgcatc tcttcccgat cggggataca -#> 301 gagatcacct ggacggcgac ggacgatcag gacctttcga cgacggcaac gcagatcatc -#> 361 cggatcgagg acaacacgcc gccgaccttc atccagcgcg atgatcaggt cgtggcgacc -#> 421 acgtacgatc cggtcggtct caggaaggag cacgttccgc tcgcgggcac cgtcatcgcg -#> 481 gtggacttcg gacagcccgt accgctcacg aacaccgccc cggacgtntt tcggttgggg -#> 541 agcacggaga tncctggacc gcgacggtgc gtncgggaac t +#> 1 ccctcgaagc cctcatcgct gaggctgccc cggataccga ggttctttac aaagaccgga +#> 61 tggactccag caacgacacc tcgctggtaa cagccctcat gacgacctat gacggcactc +#> 121 ttgagg #> // ``` @@ -129,32 +128,18 @@ cat(rec) # use the entrez_* wrappers to access GB data res <- entrez_fetch(db = 'nucleotide', id = id, rettype = 'fasta') cat(res) -#> >AF298094.1 Unidentified clone B15 DNA sequence from ocean beach sand -#> GATCCGGCCGCAGCCGCAGTGTCGGCATTGTTCCCGCTGGGCGAGACGGAGATCACCCTCACGGTCTTCT -#> CGGGCGATCAGTCCGACGCCGAGACGACGACGGTGACGATCGAGGACACGACCGCGCCGACGTTCACCCA -#> CGCACTGGGTGATGTCCTTCCGATGGTGACGAAGGAGGCAACGGAGCCCGGAGGGCATGACTTCAGCCCG -#> GCCACGCCGGACGCCTGGGACCATGGAGACAGCGACCTCGACATCGCTTGCGGTACGGAACTCCCGCATC -#> TCTTCCCGATCGGGGATACAGAGATCACCTGGACGGCGACGGACGATCAGGACCTTTCGACGACGGCAAC -#> GCAGATCATCCGGATCGAGGACAACACGCCGCCGACCTTCATCCAGCGCGATGATCAGGTCGTGGCGACC -#> ACGTACGATCCGGTCGGTCTCAGGAAGGAGCACGTTCCGCTCGCGGGCACCGTCATCGCGGTGGACTTCG -#> GACAGCCCGTACCGCTCACGAACACCGCCCCGGACGTNTTTCGGTTGGGGAGCACGGAGATNCCTGGACC -#> GCGACGGTGCGTNCGGGAACT +#> >AF040893.1 Unidentified RNA clone P10.11 +#> CCCTCGAAGCCCTCATCGCTGAGGCTGCCCCGGATACCGAGGTTCTTTACAAAGACCGGATGGACTCCAG +#> CAACGACACCTCGCTGGTAACAGCCCTCATGACGACCTATGACGGCACTCTTGAGG # if the id is not in the local database # these wrappers will search online via the rentrez package res <- entrez_fetch(db = 'nucleotide', id = c('S71333.1', id), rettype = 'fasta') #> [1] id(s) are unavailable locally, searching online. cat(res) -#> >AF298094.1 Unidentified clone B15 DNA sequence from ocean beach sand -#> GATCCGGCCGCAGCCGCAGTGTCGGCATTGTTCCCGCTGGGCGAGACGGAGATCACCCTCACGGTCTTCT -#> CGGGCGATCAGTCCGACGCCGAGACGACGACGGTGACGATCGAGGACACGACCGCGCCGACGTTCACCCA -#> CGCACTGGGTGATGTCCTTCCGATGGTGACGAAGGAGGCAACGGAGCCCGGAGGGCATGACTTCAGCCCG -#> GCCACGCCGGACGCCTGGGACCATGGAGACAGCGACCTCGACATCGCTTGCGGTACGGAACTCCCGCATC -#> TCTTCCCGATCGGGGATACAGAGATCACCTGGACGGCGACGGACGATCAGGACCTTTCGACGACGGCAAC -#> GCAGATCATCCGGATCGAGGACAACACGCCGCCGACCTTCATCCAGCGCGATGATCAGGTCGTGGCGACC -#> ACGTACGATCCGGTCGGTCTCAGGAAGGAGCACGTTCCGCTCGCGGGCACCGTCATCGCGGTGGACTTCG -#> GACAGCCCGTACCGCTCACGAACACCGCCCCGGACGTNTTTCGGTTGGGGAGCACGGAGATNCCTGGACC -#> GCGACGGTGCGTNCGGGAACT +#> >AF040893.1 Unidentified RNA clone P10.11 +#> CCCTCGAAGCCCTCATCGCTGAGGCTGCCCCGGATACCGAGGTTCTTTACAAAGACCGGATGGACTCCAG +#> CAACGACACCTCGCTGGTAACAGCCCTCATGACGACCTATGACGGCACTCTTGAGG #> #> >S71333.1 alpha 1,3 galactosyltransferase [New World monkeys, mermoset lymphoid cell line B95.8, mRNA Partial, 1131 nt] #> ATGAATGTCAAAGGAAAAGTAATTCTGTCGATGCTGGTTGTCTCAACTGTGATTGTTGTGTTTTGGGAAT