Skip to content

Commit

Permalink
Merge pull request #58 from bento-platform/feat/impl-ref-service
Browse files Browse the repository at this point in the history
feat!: use reference service to provide assembly ID
  • Loading branch information
davidlougheed authored Jan 4, 2024
2 parents 1fad838 + fa12ae3 commit 3de386b
Show file tree
Hide file tree
Showing 15 changed files with 62 additions and 120 deletions.
4 changes: 2 additions & 2 deletions etc/example.env
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ GOHAN_API_IMAGE=gohan-api
GOHAN_API_VERSION=latest

GOHAN_API_BUILDER_BASE_IMAGE=golang:1.21-bookworm
GOHAN_API_DEV_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.11.10
GOHAN_API_PROD_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.11.10
GOHAN_API_DEV_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.12.01
GOHAN_API_PROD_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.12.01

GOHAN_API_CONTAINER_NAME=gohan-api
GOHAN_API_SERVICE_HOST=0.0.0.0
Expand Down
2 changes: 1 addition & 1 deletion src/api/contexts/contexts.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ type (

// Convenient storage for relevant http context data
QueryParameters struct {
AssemblyId constants.AssemblyId
AssemblyId string
Alleles []string
Chromosome string
Genotype constants.GenotypeQuery
Expand Down
10 changes: 4 additions & 6 deletions src/api/middleware/assemblyMiddleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package middleware

import (
"gohan/api/contexts"
"gohan/api/models/constants"
assid "gohan/api/models/constants/assembly-id"
"net/http"

"github.com/labstack/echo"
Expand All @@ -16,14 +14,14 @@ func MandateAssemblyIdAttribute(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
// check for assemblyId query parameter
assemblyId := c.QueryParam("assemblyId")
if len(assemblyId) == 0 || !assid.IsKnownAssemblyId(assemblyId) {
// if no id was provided, or it was invalid, return an error
return echo.NewHTTPError(http.StatusBadRequest, "Missing or unknown assemblyId!")
if len(assemblyId) == 0 {
// if no id was provided, return an error
return echo.NewHTTPError(http.StatusBadRequest, "Missing assemblyId!")
}

// forward a type-safe value down the pipeline
gc := c.(*contexts.GohanContext)
gc.AssemblyId = constants.AssemblyId(assemblyId)
gc.AssemblyId = assemblyId

return next(gc)
}
Expand Down
40 changes: 2 additions & 38 deletions src/api/models/constants/assembly-id/main.go
Original file line number Diff line number Diff line change
@@ -1,42 +1,6 @@
package assemblyId

import (
"gohan/api/models/constants"
"strings"
)

const (
Unknown constants.AssemblyId = "Unknown"

GRCh38 constants.AssemblyId = "GRCh38"
GRCh37 constants.AssemblyId = "GRCh37"
NCBI36 constants.AssemblyId = "NCBI36"
NCBI35 constants.AssemblyId = "NCBI35"
NCBI34 constants.AssemblyId = "NCBI34"
Other constants.AssemblyId = "Other"
GRCh38 string = "GRCh38"
GRCh37 string = "GRCh37"
)

func CastToAssemblyId(text string) constants.AssemblyId {
switch strings.ToLower(text) {
case "grch38":
return GRCh38
case "grch37":
return GRCh37
case "ncbi36":
return NCBI36
case "ncbi35":
return NCBI35
case "ncbi34":
return NCBI34
case "other":
return Other
default:
return Unknown
}
}

func IsKnownAssemblyId(text string) bool {
// attempt to cast to assemblyId and
// return if unknown assemblyId
return CastToAssemblyId(text) != Unknown
}
17 changes: 8 additions & 9 deletions src/api/models/dtos/main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package dtos

import (
"gohan/api/models/constants"
"gohan/api/models/indexes"
"time"
)
Expand All @@ -21,11 +20,11 @@ type VariantCountReponse struct {
}

type VariantResult struct {
Query string `json:"query,omitempty"`
AssemblyId constants.AssemblyId `json:"assembly_id"`
Chromosome string `json:"chromosome"`
Start int `json:"start"`
End int `json:"end"`
Query string `json:"query,omitempty"`
AssemblyId string `json:"assembly_id"`
Chromosome string `json:"chromosome"`
Start int `json:"start"`
End int `json:"end"`
}

type VariantGetResult struct {
Expand Down Expand Up @@ -54,9 +53,9 @@ type VariantCall struct {
Alleles []string `json:"alleles,omitempty"`
// TODO: GenotypeProbability, PhredScaleLikelyhood ?

AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"`
Dataset string `json:"dataset,omitempty"`
DocumentId string `json:"documentId,omitempty"`
AssemblyId string `json:"assemblyId,omitempty"`
Dataset string `json:"dataset,omitempty"`
DocumentId string `json:"documentId,omitempty"`
}

// --- Dataset
Expand Down
18 changes: 9 additions & 9 deletions src/api/models/indexes/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ type Variant struct {

Sample Sample `json:"sample"`

FileId string `json:"fileId"`
Dataset string `json:"dataset"`
AssemblyId c.AssemblyId `json:"assemblyId"`
CreatedTime time.Time `json:"createdTime"`
FileId string `json:"fileId"`
Dataset string `json:"dataset"`
AssemblyId string `json:"assemblyId"`
CreatedTime time.Time `json:"createdTime"`
}

type Info struct {
Expand Down Expand Up @@ -51,9 +51,9 @@ type Genotype struct {
}

type Gene struct {
Name string `json:"name"`
Chrom string `json:"chrom"`
Start int `json:"start"`
End int `json:"end"`
AssemblyId c.AssemblyId `json:"assemblyId"`
Name string `json:"name"`
Chrom string `json:"chrom"`
Start int `json:"start"`
End int `json:"end"`
AssemblyId string `json:"assemblyId"`
}
33 changes: 14 additions & 19 deletions src/api/mvc/genes/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"crypto/tls"
"fmt"
"gohan/api/contexts"
"gohan/api/models/constants"
assemblyId "gohan/api/models/constants/assembly-id"
"gohan/api/models/constants/chromosome"
"gohan/api/models/dtos"
Expand Down Expand Up @@ -51,15 +50,15 @@ func GenesIngest(c echo.Context) error {
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
}

assemblyIdMap := map[constants.AssemblyId]string{
assemblyIdMap := map[string]string{
assemblyId.GRCh38: "gencode.v38.annotation.gtf",
assemblyId.GRCh37: "gencode.v19.annotation.gtf",
// SKIP
// assemblyId.NCBI36: "hg18",
// assemblyId.NCBI35: "hg17",
// assemblyId.NCBI34: "hg16",
}
assemblyIdGTFUrlMap := map[constants.AssemblyId]string{
assemblyIdGTFUrlMap := map[string]string{
assemblyId.GRCh38: "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz",
assemblyId.GRCh37: "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz",
// SKIP
Expand All @@ -79,7 +78,7 @@ func GenesIngest(c echo.Context) error {
CreatedAt: fmt.Sprintf("%v", time.Now()),
}

go func(_assId constants.AssemblyId, _fileName string, _assemblyWg *sync.WaitGroup, reqStat *ingest.GeneIngestRequest) {
go func(_asmId string, _fileName string, _assemblyWg *sync.WaitGroup, reqStat *ingest.GeneIngestRequest) {
defer _assemblyWg.Done()

var (
Expand All @@ -89,7 +88,7 @@ func GenesIngest(c echo.Context) error {
gtfFile, err := os.Open(fmt.Sprintf("%s/%s", gtfPath, _fileName))
if err != nil {
// Download the file
fullURLFile := assemblyIdGTFUrlMap[_assId]
fullURLFile := assemblyIdGTFUrlMap[_asmId]

handleHardErr := func(err error) {
msg := "Something went wrong: " + err.Error()
Expand Down Expand Up @@ -193,13 +192,13 @@ func GenesIngest(c echo.Context) error {
defer gtfFile.Close()

// clean out genes currently in elasticsearch by assembly id
fmt.Printf("Cleaning out %s gene documents from genes index (if any)\n", string(_assId))
esRepo.DeleteGenesByAssemblyId(cfg, es7Client, _assId)
fmt.Printf("Cleaning out %s gene documents from genes index (if any)\n", string(_asmId))
esRepo.DeleteGenesByAssemblyId(cfg, es7Client, _asmId)

fileScanner := bufio.NewScanner(gtfFile)
fileScanner.Split(bufio.ScanLines)

fmt.Printf("Ingesting %s\n", string(_assId))
fmt.Printf("Ingesting %s\n", string(_asmId))
reqStat.State = ingest.Running
iz.GeneIngestRequestChan <- reqStat

Expand All @@ -222,7 +221,7 @@ func GenesIngest(c echo.Context) error {
go func(rowText string, _chromHeaderKey int,
_startKey int, _endKey int,
_nameHeaderKeys []int, _geneNameHeaderKeys []int,
_assId constants.AssemblyId,
_assId string,
_gwg *sync.WaitGroup) {
// fmt.Printf("row : %s\n", row)

Expand Down Expand Up @@ -276,19 +275,19 @@ func GenesIngest(c echo.Context) error {
Chrom: chromosomeClean,
Start: start,
End: end,
AssemblyId: _assId,
AssemblyId: _asmId,
}

iz.GeneIngestionBulkIndexingQueue <- &structs.GeneIngestionQueueStructure{
Gene: discoveredGene,
WaitGroup: _gwg,
}
}(rowText, chromHeaderKey, startKey, endKey, nameHeaderKeys, geneNameHeaderKeys, _assId, &geneWg)
}(rowText, chromHeaderKey, startKey, endKey, nameHeaderKeys, geneNameHeaderKeys, _asmId, &geneWg)
}

geneWg.Wait()

fmt.Printf("%s ingestion done!\n", _assId)
fmt.Printf("%s ingestion done!\n", _asmId)
fmt.Printf("Deleting %s\n", unzippedFileName)
err = os.Remove(fmt.Sprintf("%s/%s", gtfPath, unzippedFileName))
if err != nil {
Expand Down Expand Up @@ -335,11 +334,7 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error {
// Assembly ID
// perform wildcard search if empty/random parameter is passed
// - set to Unknown to trigger it
var assId constants.AssemblyId
if gc.AssemblyId != assemblyId.Unknown {
// retrieve passed parameter if is valid
assId = gc.AssemblyId
}
asmId := gc.AssemblyId

// Size
var (
Expand All @@ -354,10 +349,10 @@ func GenesGetByNomenclatureWildcard(c echo.Context) error {
}
}

fmt.Printf("Executing wildcard genes search for term %s, assemblyId %s (max size: %d)\n", term, assId, size)
fmt.Printf("Executing wildcard genes search for term %s, assemblyId %s (max size: %d)\n", term, asmId, size)

// Execute
docs, geneErr := esRepo.GetGeneDocumentsByTermWildcard(cfg, es, chromosomeSearchTerm, term, assId, size)
docs, geneErr := esRepo.GetGeneDocumentsByTermWildcard(cfg, es, chromosomeSearchTerm, term, asmId, size)
if geneErr != nil {
return c.JSON(http.StatusOK, map[string]interface{}{
"status": 500,
Expand Down
9 changes: 2 additions & 7 deletions src/api/mvc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package mvc
import (
"gohan/api/contexts"
"gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
gq "gohan/api/models/constants/genotype-query"
"strings"

Expand All @@ -12,7 +11,7 @@ import (
"github.com/labstack/echo"
)

func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId, string) {
func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, string, string) {
gc := c.(*contexts.GohanContext)
es := gc.Es7Client

Expand Down Expand Up @@ -48,11 +47,7 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int,
}
}

assemblyId := a.Unknown
assemblyIdQP := c.QueryParam("assemblyId")
if len(assemblyIdQP) > 0 && a.IsKnownAssemblyId(assemblyIdQP) {
assemblyId = a.CastToAssemblyId(assemblyIdQP)
}
assemblyId := c.QueryParam("assemblyId")

return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString
}
14 changes: 6 additions & 8 deletions src/api/repositories/elasticsearch/genes.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ import (
"time"

"gohan/api/models"
"gohan/api/models/constants"
assemblyId "gohan/api/models/constants/assembly-id"
"gohan/api/utils"

"github.com/elastic/go-elasticsearch/v7"
Expand Down Expand Up @@ -106,7 +104,7 @@ func GetGeneBucketsByKeyword(cfg *models.Config, es *elasticsearch.Client) (map[
}

func GetGeneDocumentsByTermWildcard(cfg *models.Config, es *elasticsearch.Client,
chromosomeSearchTerm string, term string, assId constants.AssemblyId, size int) (map[string]interface{}, error) {
chromosomeSearchTerm string, term string, asmId string, size int) (map[string]interface{}, error) {

if cfg.Debug {
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
Expand All @@ -115,10 +113,10 @@ func GetGeneDocumentsByTermWildcard(cfg *models.Config, es *elasticsearch.Client
// Nomenclature Search Term
nomenclatureStringTerm := fmt.Sprintf("*%s*", term)

// Assembly Id Search Term (wildcard by default)
// Assembly ID Search Term (wildcard by default)
assemblyIdStringTerm := "*"
if assId != assemblyId.Unknown {
assemblyIdStringTerm = string(assId)
if asmId != "" {
assemblyIdStringTerm = asmId
}

var buf bytes.Buffer
Expand Down Expand Up @@ -217,7 +215,7 @@ func GetGeneDocumentsByTermWildcard(cfg *models.Config, es *elasticsearch.Client
return result, nil
}

func DeleteGenesByAssemblyId(cfg *models.Config, es *elasticsearch.Client, assId constants.AssemblyId) (map[string]interface{}, error) {
func DeleteGenesByAssemblyId(cfg *models.Config, es *elasticsearch.Client, asmId string) (map[string]interface{}, error) {

if cfg.Debug {
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
Expand All @@ -227,7 +225,7 @@ func DeleteGenesByAssemblyId(cfg *models.Config, es *elasticsearch.Client, assId
query := map[string]interface{}{
"query": map[string]interface{}{
"match": map[string]interface{}{
"assemblyId": string(assId),
"assemblyId": asmId,
},
},
}
Expand Down
7 changes: 3 additions & 4 deletions src/api/repositories/elasticsearch/variants.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (

"gohan/api/models"
c "gohan/api/models/constants"
a "gohan/api/models/constants/assembly-id"
gq "gohan/api/models/constants/genotype-query"
s "gohan/api/models/constants/sort"
z "gohan/api/models/constants/zygosity"
Expand Down Expand Up @@ -110,7 +109,7 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e
reference string, alternative string, alleles []string,
size int, sortByPosition c.SortDirection,
includeInfoInResultSet bool,
genotype c.GenotypeQuery, assemblyId c.AssemblyId,
genotype c.GenotypeQuery, assemblyId string,
getSampleIdsOnly bool) (map[string]interface{}, error) {

// begin building the request body.
Expand Down Expand Up @@ -405,7 +404,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config,
chromosome string, lowerBound int, upperBound int,
variantId string, sampleId string, datasetString string,
reference string, alternative string, alleles []string,
genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) {
genotype c.GenotypeQuery, assemblyId string) (map[string]interface{}, error) {

// begin building the request body.
mustMap := []map[string]interface{}{{
Expand Down Expand Up @@ -465,7 +464,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config,
}})
}

if assemblyId != "" && assemblyId != a.Unknown {
if assemblyId != "" {
mustMap = append(mustMap, map[string]interface{}{
"match": map[string]interface{}{
"assemblyId": map[string]interface{}{
Expand Down
Loading

0 comments on commit 3de386b

Please sign in to comment.