Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Germplasm contrib #9

Merged
merged 3 commits into from
Oct 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ for TERRA-REF.
The server code was generated using [swagger-codegen](https://github.com/swagger-api/swagger-codegen).

## Requirements

Python 3.5.2+

## Usage

To run, first start an intance of the BETY database:

```
Expand Down Expand Up @@ -42,6 +44,11 @@ preliminary mapping of BETY fields to BRAPI objects.
| /germplasm | cultivars. | |
| /observations | traits | |

## Contributed Data

This repository provides the canonical reference for data that is outside of the scope of databases used in the TERRA REF program. Such data can be found in the `/contrib/` folder.

Genomics data in `contrib/genomics` is in a set of CSVs that were previously only available in the [experimental design section of the TERRA REF documentation](https://docs.terraref.org/scientific-objectives-and-experimental-design/experimental-design). These files provide metadata that describe the germplasm used in the sorghum trials, and were originally prepared by Noah Fahlgren.

## How to add an endpoint

Expand Down
32 changes: 32 additions & 0 deletions contrib/germplasm/commercial_hybrids.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Entry,Source,Common name,Ecotype
301,PI651496,RIO,Sweet Sorghum
302,PI651491,BAILEY,Sweet Sorghum
303,PI651495,DALE,Sweet Sorghum
304,PI653616,WRAY,Sweet Sorghum
305,PI653411,M 81E,Sweet Sorghum
306,PI586537,ATLAS,Sweet Sorghum
307,PI535785,N100,Sweet Sorghum
308,PI583832,TOP_76_6,Sweet Sorghum
309,PI641824,KS_ORANGE,Sweet Sorghum
310,PI641825,LEOTI,Sweet Sorghum
311,PI566819,DELLA,Sweet Sorghum
312,PI571107,COLMAN,Sweet Sorghum
313,PI505722,ZM/A 5298,Biomass Sorghum
314,PI505735,ZM/A 5345,Biomass Sorghum
315,PI562730,Grain Grass 3A,Biomass Sorghum
316,PI297171,IS13647,Biomass Sorghum
317,PI506122,Epo,Biomass Sorghum
318,PI297130,IS13613,Biomass Sorghum
319,PI154844,GRASSL,Biomass Sorghum
320,PI508366,MA 38,Biomass Sorghum
321,PI506114,Kwete-Mila,Biomass Sorghum
322,PI506030,Tchinlouol,Biomass Sorghum
323,PI506069,Mbonou,Biomass Sorghum
324,PI267573,IS 2983,Biomass Sorghum
325,PI564163,BTx623,Grain Sorghum
326,PI656056,P850029,Grain Sorghum
327,PI533964,"Safara, Kordafan",Grain Sorghum
328,PI533759,Mugbash 56/56,Grain Sorghum
329,PI35038,SUMAC,Grain Sorghum
330,PI561472,SURENO,Grain Sorghum
401,Richardson Seeds,700D BMR,Forage Sorghum
205 changes: 205 additions & 0 deletions contrib/germplasm/germplasm.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
---
title: "Building Data for Germplasm Endpoint"
output: html_document
---


```{r}
library(tidyverse)

genotypes <- read_csv('sorghum_lines_genomics.csv')

g_json <- jsonlite::toJSON(genotypes[1:10,])

print(g_json)
```


Search for germplasm from https://www.genesys-pgr.org BrAPI endpoint ...

https://www.genesys-pgr.org/brapi/v1/germplasm/0ba28636-e634-428a-aa58-4346a20de326


```{r}
devtools::install_github(repo = 'brapi', username = 'CIP-RIU')
library(brapi)
library(jsonlite)
genesys_con <- ba_connect(brapiDb = as.ba_db(protocol = 'https://', db = 'www.genesys-pgr.org', secure = TRUE))

a <- list()
for(z in seq_along(genotypes$`source material identifiers`)){
a[[genotypes$accession[z]]] <- brapi::ba_germplasm_details(con = genesys_con, rclass = 'json',
germplasmDbId = genotypes$`source material identifiers`[z])
}

empty <- lapply(fromJSON(a[[1]])$result, function(x) x = '')
empty$commonCropName <- 'sorghum'
empty$genus <- 'Sorghum'
empty$species <- 'bicolor'
empty$subtaxa <- 'subsp. bicolor'
empty$speciesAuthority <- "(L.) Moench"
empty$biologicalStatusOfAccessionCode <- "412"

b <- list()
for(i in seq_along(a)){
if(all(class(a[[i]]) == 'try-error')){
res <- empty
accession <- genotypes$accession[i]
res$germplasmDbId <- genotypes$`source material identifiers`[i]
} else {
res <- fromJSON(a[[i]])$result
accession <- gsub(' ', '', genotypes$accession[i])

}
res$donors <- list(donorAccessionNumber = "", donorInstituteCode = "", germplasmPUI = "")
res$breedingMethodDbId <- ""
res$instituteName <- ""
res$germplasmName <- res$accessionNumber <- res$defaultDisplayName <- accession
res$germplasmPUI <- paste("https://purl.org/germplasm/id/", res$germplasmDbId, sep = "")
res$germplasmSeedSource <- res$seedSource
res$subtaxaAuthority <- res$speciesAuthority
res$taxonIds <- list(list(sourceName = "ncbiTaxon", taxonId = "http://purl.obolibrary.org/obo/NCBITaxon_4558"),
list(sourceName = "USDA Plants", taxonId = "https://plants.usda.gov/core/profile?symbol=SOBIB"))

b[[i]] <- res[c("accessionNumber", "acquisitionDate", "biologicalStatusOfAccessionCode", "breedingMethodDbId",
"commonCropName", "defaultDisplayName", "donors",
"genus", "germplasmDbId", "germplasmName",
"germplasmPUI", "instituteCode", "instituteName", "pedigree", "seedSource", "species", "speciesAuthority", "subtaxa", "subtaxaAuthority", "synonyms", "taxonIds", "typeOfGermplasmStorageCode")]
}

```

get Ids from BETY

```{r, eval=FALSE}
library(tidyverse)
cultivars <- jsonlite::fromJSON(paste0("https://terraref.ncsa.illinois.edu/bety/api/v1/cultivars?key=9999999999999999999999999999999999999999&limit=none"), flatten = TRUE, simplifyVector = TRUE)

cultivars <- cultivars[[2]]

```

```{r}

for(i in seq_along(b)){
c <- cultivars[cultivars$cultivar.name == b[[i]]$accessionNumber,]
cultivars_id <- c$cultivar.id
b[[i]][['xref']] <- list(id = cultivars_id,
source = c$cultivar.view_url
)
b[[i]]$germplasmDbId <- cultivars_id
}
```


### Add other cultivars

```{r}


zz <- list()
for(i in seq_along(b)){
zz[i] <- b[[i]]$accessionNumber
}
zzz <- unlist(zz)

remaining_cultivars <- cultivars[!cultivars$cultivar.name %in% zzz, ]

template <- b[[1]]
bb <- list()
for(i in 1:nrow(remaining_cultivars)){
t <- template
c <- remaining_cultivars[i,]
t$accessionNumber <- c$cultivar.name
t$acquisitionDate <- ""
t$commonCropName <- ifelse(c$cultivar.specie_id == 2588, 'sorghum', 'wheat')
t$defaultDisplayName <- c$cultivar.name
t$genus <- ifelse(c$cultivar.specie_id == 2588, 'Sorghum', 'Triticum')
t$germplasmDbId <- c$cultivar.id
t$germplasmName <- c$cultivar.name
t$germplasmPUI <- ""
t$instituteCode <- ""
t$species <- ifelse(c$cultivar.specie_id == 2588, 'bicolor', 'Triticum')
t$speciesAuthority <- ifelse(c$cultivar.specie_id == 2588, "(L.) Moench", 'L.')
t$subtaxa <- ifelse(c$cultivar.specie_id == 2588, "subsp. bicolor", '')
t$subtaxaAuthority <- ifelse(c$cultivar.specie_id == 2588, "(L.) Moench", '')
t$synonyms <- ''
t$taxonIds[[1]]$taxonId <- ifelse(c$cultivar.specie_id == 2588, "http://purl.obolibrary.org/obo/NCBITaxon_4558", 'http://purl.obolibrary.org/obo/NCBITaxon_4565')
t$taxonIds[[2]]$taxonId <- ifelse(c$cultivar.specie_id == 2588, "https://plants.usda.gov/core/profile?symbol=SOBIB", 'https://plants.usda.gov/core/profile?symbol=TRAE')
t$xref$id <- c$cultivar.id
t$xref$source <- c$cultivar.view_url
bb[[i]] <- t
}

cc <- append(b, bb)
```


```{r}
for(i in seq_along(cc)){

}
```


```{r}
names(cc) <- NULL #should not be needed, just in case

ccc <- toJSON(cc, auto_unbox = TRUE)

#jsonlite::write_json(c, 'germplasm.txt')
write_lines(jsonlite::prettify(ccc), '~/dev/brapi/bety_brapi/data/germplasm.json')
#dir.create('~/dev/brapi/data')
#jsonlite::write_json(b, '~/dev/brapi/germplasm.json')

```


## Further updates

```{r}
germplasm <- jsonlite::fromJSON('germplasm.json', simplifyVector = FALSE)

```


## Add latitudes and longitudes for referenced baps

```{r}
locations <- readr::read_tsv('referenced_bap.txt')

for(i in seq_along(germplasm)){
cultivars.name <- germplasm[[i]]$accessionNumber
if(cultivars.name %in% locations$Taxa){
print(i)
location <- locations[locations$Taxa == cultivars.name,]
germplasm[[i]][['additionalInfo']] <- list(longitude = location$Longitude, latitude = location$Latitude)
}
}

write_lines(jsonlite::prettify(toJSON(germplasm)), '~/dev/brapi/bety_brapi/data/germplasm.json')

```

##

### Construct inserts for attributes table in betydb


```{r}
insert_germplasm <- list()
for(i in seq_along(b)){
cultivars.name <- b[[i]]$accessionNumber

id <- cultivars[[cultivars.name]]$data[[1]]$cultivar$id
json <- toJSON(b[[cultivars.name]])
insert_germplasm[[cultivars.name]] <- list(id = id, json = as.character(json))



}

inserts <- lapply(insert_germplasm, function(x){
paste0("insert into attributes (table, table_id, json), values (",x$id, "'cultivars'", x$json,");")})

```
Loading