From 93077e5673f6f46b3561662c14df90d8b0b5c012 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:33:59 -0400 Subject: [PATCH 01/84] chore: remove middleware, routes --- src/api/main.go | 14 +- src/api/middleware/tableMiddleware.go | 35 --- src/api/mvc/tables/main.go | 324 -------------------------- 3 files changed, 2 insertions(+), 371 deletions(-) delete mode 100644 src/api/middleware/tableMiddleware.go delete mode 100644 src/api/mvc/tables/main.go diff --git a/src/api/main.go b/src/api/main.go index 7477f66f..9bf35377 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -8,7 +8,6 @@ import ( dataTypesMvc "gohan/api/mvc/data-types" genesMvc "gohan/api/mvc/genes" serviceInfoMvc "gohan/api/mvc/service-info" - tablesMvc "gohan/api/mvc/tables" variantsMvc "gohan/api/mvc/variants" workflowsMvc "gohan/api/mvc/workflows" "gohan/api/services" @@ -139,13 +138,6 @@ func main() { e.GET("/data-types/variant/schema", dataTypesMvc.GetVariantDataTypeSchema) e.GET("/data-types/variant/metadata_schema", dataTypesMvc.GetVariantDataTypeMetadataSchema) - // -- Tables - e.GET("/tables", tablesMvc.GetTables) - e.POST("/tables", tablesMvc.CreateTable) - e.GET("/tables/:id", tablesMvc.GetTables) - e.DELETE("/tables/:id", tablesMvc.DeleteTable) - e.GET("/tables/:id/summary", tablesMvc.GetTableSummary) - // -- Variants e.GET("/variants/overview", variantsMvc.GetVariantsOverview) @@ -183,15 +175,13 @@ func main() { // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute, - gam.MandateTableIdAttribute) + gam.MandateAssemblyIdAttribute) e.GET("/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) e.GET("/variants/ingestion/stats", variantsMvc.VariantsIngestionStats) e.GET("/private/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute, - gam.MandateTableIdAttribute) + gam.MandateAssemblyIdAttribute) e.GET("/private/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) // -- diff --git a/src/api/middleware/tableMiddleware.go b/src/api/middleware/tableMiddleware.go deleted file mode 100644 index 4b0218af..00000000 --- a/src/api/middleware/tableMiddleware.go +++ /dev/null @@ -1,35 +0,0 @@ -package middleware - -import ( - "fmt" - "gohan/api/models/dtos/errors" - "gohan/api/utils" - "net/http" - - "github.com/labstack/echo" -) - -/* -Echo middleware to ensure a valid `tableId` HTTP query parameter was provided -*/ -func MandateTableIdAttribute(next echo.HandlerFunc) echo.HandlerFunc { - return func(c echo.Context) error { - // check for tableId query parameter - tableId := c.QueryParam("tableId") - if len(tableId) == 0 { - // if no id was provided, or is invalid, return an error - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("missing table id")) - } - - // verify tableId is a valid UUID - // - assume it's a valid table id if it's a uuid, - // further verification is done later - if !utils.IsValidUUID(tableId) { - fmt.Printf("Invalid table id %s\n", tableId) - - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid table id %s - please provide a valid uuid", tableId))) - } - - return next(c) - } -} diff --git a/src/api/mvc/tables/main.go b/src/api/mvc/tables/main.go deleted file mode 100644 index 828c4254..00000000 --- a/src/api/mvc/tables/main.go +++ /dev/null @@ -1,324 +0,0 @@ -package tables - -import ( - "encoding/json" - "fmt" - "net/http" - "time" - - "gohan/api/contexts" - "gohan/api/models/constants" - "gohan/api/models/dtos" - "gohan/api/models/dtos/errors" - "gohan/api/models/indexes" - "gohan/api/mvc" - esRepo "gohan/api/repositories/elasticsearch" - "gohan/api/utils" - - "github.com/labstack/echo" - "github.com/mitchellh/mapstructure" -) - -func CreateTable(c echo.Context) error { - fmt.Printf("[%s] - CreateTable hit!\n", time.Now()) - - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - - decoder := json.NewDecoder(c.Request().Body) - var t dtos.CreateTableRequestDto - err := decoder.Decode(&t) - if err != nil { - return c.JSON(http.StatusBadRequest, map[string]interface{}{ - "error": err, - }) - } - - // TODO: improve verification - if t.Name == "" { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: "'name' cannot be empty", - }) - } else if t.Dataset == "" { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: "'dataset' cannot be empty", - }) - } else if t.DataType == "" { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: "'data_type' cannot be empty", - }) - } - - // ensure data_type is valid ('variant', etc..) - if !utils.StringInSlice(t.DataType, constants.ValidTableDataTypes) { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: fmt.Sprintf("Invalid data_type: %s -- Must be one of the following: %s", t.DataType, constants.ValidTableDataTypes), - }) - } - - // TODO: ensure dataset is a valid identifier (uuid ?) - - // avoid creating duplicate tables with the same name - existingTables, error := esRepo.GetTablesByName(cfg, es, c.Request().Context(), t.Name) - if error != nil { - return c.JSON(http.StatusInternalServerError, dtos.CreateTableResponseDto{ - Error: error.Error(), - }) - } - if len(existingTables) > 0 { - return c.JSON(http.StatusBadRequest, dtos.CreateTableResponseDto{ - Error: fmt.Sprintf("A table with the name '%s' already exists", t.Name), - }) - } - - // call repository - table, error := esRepo.CreateTable(es, c.Request().Context(), t) - if error != nil { - return c.JSON(http.StatusInternalServerError, dtos.CreateTableResponseDto{ - Error: error.Error(), - }) - } - - return c.JSON(http.StatusOK, dtos.CreateTableResponseDto{ - Message: "Success", - Table: table, - }) -} - -func GetTables(c echo.Context) error { - fmt.Printf("[%s] - GetTables hit!\n", time.Now()) - - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - - // obtain tableId from the path - tableId := c.Param("id") - - // obtain dataTypes from query parameter - dataType := c.QueryParam("data-type") - - // at least one of these parameters must be present - if tableId == "" && dataType == "" { - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing both id and data type - please provide at least one of them")) - } else if dataType != "" { - // ensure data_type is valid ('variant', etc..) - if !utils.StringInSlice(dataType, constants.ValidTableDataTypes) { - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Invalid data_type: %s -- Must be one of the following: %s", dataType, constants.ValidTableDataTypes))) - } - } - - // call repository - results, _ := esRepo.GetTables(cfg, es, c.Request().Context(), tableId, dataType) - if results == nil { - // return empty result (assume there are no tables because the index doesn't exist) - return c.JSON(http.StatusOK, []map[string]interface{}{}) - } - // TODO: handle _ error better - - // gather data from "hits" - docsHits := results["hits"].(map[string]interface{})["hits"] - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - allSources := make([]indexes.Table, 0) - - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} to table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - } - - // accumulate structs - allSources = append(allSources, resultingTable) - } - - if tableId != "" && len(allSources) > 0 { - // assume there is only 1 document in the database with this `id` - // return a single object rather than the whole list - return c.JSON(http.StatusOK, allSources[0]) - } - - return c.JSON(http.StatusOK, allSources) -} - -func GetTableSummary(c echo.Context) error { - fmt.Printf("[%s] - GetTableSummary hit!\n", time.Now()) - - cfg := c.(*contexts.GohanContext).Config - - // obtain tableId from the path - tableId := c.Param("id") - // obtain other potentially relevant parameters from available query parameters - // (these should be empty, but utilizing this common function is convenient to set up - // the call to the variants index through the repository functions) - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, _ = mvc.RetrieveCommonElements(c) - // unused tableId from query parameter set to '_' - - // table id must be provided - if tableId == "" { - fmt.Println("Missing table id") - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing table id - please try again")) - } - - // call repository - // - get the table by id - results, getTablesError := esRepo.GetTables(cfg, es, c.Request().Context(), tableId, "") - if getTablesError != nil { - fmt.Printf("Failed to get tables with ID %s\n", tableId) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } - - // gather data from "hits" - docsHits := results["hits"].(map[string]interface{})["hits"] - if docsHits == nil { - fmt.Printf("No Tables with ID '%s' were found\n", tableId) - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Table with ID %s not found", tableId))) - } - - // obtain hits (expecting 1) - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - allSources := make([]interface{}, 0) - // var allSources []indexes.Variant - - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} to table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - } - - // accumulate structs - allSources = append(allSources, resultingTable) - } - - if len(allSources) == 0 { - fmt.Printf("Failed to get table summary with ID '%s'\n", tableId) - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Failed to get table summary with ID %s", tableId))) - } - - // obtain table id from the one expected hit - // and search for variants associated with it - - totalVariantsCount := 0.0 - - docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, - chromosome, lowerBound, upperBound, - "", "", // note : both variantId and sampleId are deliberately set to "" - reference, alternative, alleles, genotype, assemblyId, tableId) - if countError != nil { - fmt.Printf("Failed to count variants with table ID %s\n", tableId) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } - - totalVariantsCount = docs["count"].(float64) - - // obtain number of samples associated with this tableId - resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndTableId(cfg, es, "sample.id.keyword", tableId) - if bucketsError != nil { - fmt.Println(resultingBuckets) - } - - // retrieve aggregations.items.buckets - // and count number of samples - bucketsMapped := []interface{}{} - if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { - aggsMapped := aggs.(map[string]interface{}) - - if items, itemsOk := aggsMapped["items"]; itemsOk { - itemsMapped := items.(map[string]interface{}) - - if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { - bucketsMapped = buckets.([]interface{}) - } - } - } - - fmt.Printf("Successfully Obtained Table ID '%s' Summary \n", tableId) - - return c.JSON(http.StatusOK, &dtos.TableSummaryResponseDto{ - Count: int(totalVariantsCount), - DataTypeSpecific: map[string]interface{}{ - "samples": len(bucketsMapped), - }, - }) -} - -func DeleteTable(c echo.Context) error { - fmt.Printf("[%s] - DeleteTable hit!\n", time.Now()) - - // obtain tableId from the path - tableId := c.Param("id") - - // at least one of these parameters must be present - if tableId == "" { - fmt.Println("Missing table id") - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing table id - please try again")) - } - - // call repository - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - results, deleteError := esRepo.DeleteTableById(cfg, es, c.Request().Context(), tableId) - if deleteError != nil { - fmt.Printf("Failed to delete tables with ID %s\n", tableId) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } - - // gather 'deleted table' data from "deleted" - numDeleted := 0.0 - docsHits := results["deleted"] - if docsHits != nil { - numDeleted = docsHits.(float64) - } else { - fmt.Printf("No Tables with ID '%s' were deleted\n", tableId) - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("Failed to delete tables with ID %s", tableId))) - } - if numDeleted == 0 { - fmt.Printf("No Tables with ID '%s' were deleted\n", tableId) - return c.JSON(http.StatusNotFound, errors.CreateSimpleNotFound(fmt.Sprintf("No table with ID %s", tableId))) - } - - // spin off the deletion of variants associated with - // the tableId provided in a go routine if the table - // was successfully deleted and assume the variants - // deletion completes successfully in the background - go func(_tableId string) { - var message string - - // delete variants associated with this table id - deletedVariants, deleteVariantsError := esRepo.DeleteVariantsByTableId(es, cfg, _tableId) - if deleteVariantsError != nil { - fmt.Printf("Failed to delete variants associated with table ID %s\n", tableId) - - // "do nothing" - return - } - - // successfully attempted to delete variants (if any) - - // get deletion details (if any) - deletedVariantsResults := deletedVariants["deleted"] - if deletedVariantsResults == nil { - message = fmt.Sprintf("Failed to delete variants associated with table ID %s", _tableId) - } else { - numDeletedVariants := int(deletedVariantsResults.(float64)) - message = fmt.Sprintf("Successfully deleted %d variants associated with table ID %s", numDeletedVariants, _tableId) - } - fmt.Println(message) - }(tableId) - // TODO: ensure that no variants exist without a valid tableId - - fmt.Printf("Successfully Deleted Table(s) with ID '%s' . Variants will be deleted in the background!\n", tableId) //numDeletedVariants - return c.NoContent(204) -} From b9254f86d7f2ed51034cca1a0ce41e0671920680 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:34:52 -0400 Subject: [PATCH 02/84] chore: removed dtos, constants, index/pseudo-fk, --- src/api/models/constants/main.go | 1 - src/api/models/dtos/main.go | 20 -------------------- src/api/models/indexes/main.go | 11 ----------- 3 files changed, 32 deletions(-) diff --git a/src/api/models/constants/main.go b/src/api/models/constants/main.go index 74884d7f..ebadb64b 100644 --- a/src/api/models/constants/main.go +++ b/src/api/models/constants/main.go @@ -1,6 +1,5 @@ package constants -var ValidTableDataTypes = []string{"variant"} var VcfHeaders = []string{"chrom", "pos", "id", "ref", "alt", "qual", "filter", "info", "format"} /* diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index ee016f35..539c87df 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -67,26 +67,6 @@ type GenesResponseDTO struct { Results []indexes.Gene `json:"results"` // []Gene } -// -- Tables -type CreateTableRequestDto struct { - Name string `json:"name"` - DataType string `json:"data_type"` - Dataset string `json:"dataset"` - Metadata map[string]interface{} `json:"metadata"` // TODO: type-safety? -} -type CreateTableResponseDto struct { - // --- testing: combine dto with an index model - // - makes for a clean 'single-layer' json response object - indexes.Table - - Message string `json:"message,omitempty"` - Error string `json:"error,omitempty"` -} -type TableSummaryResponseDto struct { - Count int `json:"count"` - DataTypeSpecific map[string]interface{} `json:"data_type_specific"` // TODO: type-safety? -} - // -- Errors type GeneralErrorResponseDto struct { Status int `json:"status,omitempty"` diff --git a/src/api/models/indexes/main.go b/src/api/models/indexes/main.go index 12f94edd..8d6e2710 100644 --- a/src/api/models/indexes/main.go +++ b/src/api/models/indexes/main.go @@ -18,7 +18,6 @@ type Variant struct { Sample Sample `json:"sample"` FileId string `json:"fileId"` - TableId string `json:"tableId"` AssemblyId c.AssemblyId `json:"assemblyId"` } @@ -55,13 +54,3 @@ type Gene struct { End int `json:"end"` AssemblyId c.AssemblyId `json:"assemblyId"` } - -type Table struct { - Id string `json:"id,omitempty"` // TODO: UUID ? - Name string `json:"name,omitempty"` - DataType string `json:"data_type,omitempty"` - Dataset string `json:"dataset,omitempty"` - AssemblyIds []string `json:"assembly_ids,omitempty"` - Metadata map[string]interface{} `json:"metadata,omitempty"` // TODO: type-safety? - Schema map[string]interface{} `json:"schema,omitempty"` -} From c47d426eb64fe080577a06ef502d2da634b61fda Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:37:17 -0400 Subject: [PATCH 03/84] patch: metadata schema --- src/api/models/schemas/schemas.go | 4 ++-- src/api/mvc/data-types/main.go | 3 +-- src/api/mvc/main.go | 10 ++-------- src/api/services/variants/main.go | 6 +----- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/api/models/schemas/schemas.go b/src/api/models/schemas/schemas.go index 822c1226..7e520ea6 100644 --- a/src/api/models/schemas/schemas.go +++ b/src/api/models/schemas/schemas.go @@ -7,8 +7,8 @@ import ( type Schema map[string]interface{} -var VARIANT_TABLE_METADATA_SCHEMA Schema = map[string]interface{}{ - "$id": "variant:table_metadata", // TODO: Real ID +var VARIANT_METADATA_SCHEMA Schema = map[string]interface{}{ + "$id": "variant:metadata", // TODO: Real ID "$schema": "http://json-schema.org/draft-07/schema#", "description": "Bento variant data type metadata schema", "type": "object", diff --git a/src/api/mvc/data-types/main.go b/src/api/mvc/data-types/main.go index 16ca0230..299ee94f 100644 --- a/src/api/mvc/data-types/main.go +++ b/src/api/mvc/data-types/main.go @@ -18,7 +18,6 @@ var variantDataTypeJson = map[string]interface{}{ "schema": schemas.VARIANT_SCHEMA, } -// "metadata_schema": schemas.VARIANT_TABLE_METADATA_SCHEMA, func GetDataTypes(c echo.Context) error { es := c.(*contexts.GohanContext).Es7Client cfg := c.(*contexts.GohanContext).Config @@ -44,7 +43,7 @@ func GetVariantDataTypeSchema(c echo.Context) error { } func GetVariantDataTypeMetadataSchema(c echo.Context) error { - return c.JSON(http.StatusOK, schemas.VARIANT_TABLE_METADATA_SCHEMA) + return c.JSON(http.StatusOK, schemas.VARIANT_METADATA_SCHEMA) } // - helpers diff --git a/src/api/mvc/main.go b/src/api/mvc/main.go index 5d7736a1..0c61cf6b 100644 --- a/src/api/mvc/main.go +++ b/src/api/mvc/main.go @@ -13,7 +13,7 @@ import ( "github.com/labstack/echo" ) -func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId, string) { +func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId) { es := c.(*contexts.GohanContext).Es7Client chromosome := c.QueryParam("chromosome") @@ -79,11 +79,5 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, assemblyId = a.CastToAssemblyId(assemblyIdQP) } - tableId := c.QueryParam("tableId") - if len(tableId) == 0 { - // if no tableId is provided, assume "wildcard" search - tableId = "*" - } - - return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, tableId + return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId } diff --git a/src/api/services/variants/main.go b/src/api/services/variants/main.go index 4ed9afce..eac9bfe7 100644 --- a/src/api/services/variants/main.go +++ b/src/api/services/variants/main.go @@ -31,7 +31,7 @@ func GetVariantsOverview(es *elasticsearch.Client, cfg *models.Config) map[strin callGetBucketsByKeyword := func(key string, keyword string, _wg *sync.WaitGroup) { defer _wg.Done() - results, bucketsError := esRepo.GetVariantsBucketsByKeywordAndTableId(cfg, es, keyword, "") + results, bucketsError := esRepo.GetVariantsBucketsByKeyword(cfg, es, keyword) if bucketsError != nil { resultsMux.Lock() defer resultsMux.Unlock() @@ -86,10 +86,6 @@ func GetVariantsOverview(es *elasticsearch.Client, cfg *models.Config) map[strin wg.Add(1) go callGetBucketsByKeyword("assemblyIDs", "assemblyId.keyword", &wg) - // get distribution of table IDs - wg.Add(1) - go callGetBucketsByKeyword("tableIDs", "tableId.keyword", &wg) - wg.Wait() return resultsMap From aeedbf5153c848a8304d02966229a387ab23ae66 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:38:39 -0400 Subject: [PATCH 04/84] chore: removed tables es layer --- src/api/repositories/elasticsearch/tables.go | 359 ------------------ .../repositories/elasticsearch/variants.go | 96 +---- 2 files changed, 3 insertions(+), 452 deletions(-) delete mode 100644 src/api/repositories/elasticsearch/tables.go diff --git a/src/api/repositories/elasticsearch/tables.go b/src/api/repositories/elasticsearch/tables.go deleted file mode 100644 index b2abc82a..00000000 --- a/src/api/repositories/elasticsearch/tables.go +++ /dev/null @@ -1,359 +0,0 @@ -package elasticsearch - -import ( - // "gohan/api/contexts" - "bytes" - "context" - "crypto/tls" - "encoding/json" - "errors" - "fmt" - "gohan/api/models" - "gohan/api/models/dtos" - "gohan/api/models/indexes" - "gohan/api/models/schemas" - "gohan/api/utils" - "log" - "net/http" - "reflect" - "strings" - "time" - - es7 "github.com/elastic/go-elasticsearch/v7" - - "github.com/elastic/go-elasticsearch/esapi" - "github.com/google/uuid" - "github.com/mitchellh/mapstructure" -) - -const tablesIndex = "tables" - -func CreateTable(es *es7.Client, ctxt context.Context, t dtos.CreateTableRequestDto) (indexes.Table, error) { - - now := time.Now() - - // TODO: improve checks and balances.. - - // merge inbound metadata if any - defaultMeta := map[string]interface{}{ - "created_at": now, - "updated_at": now, - "name": t.Name, - } - - defaultAssemblyIds := []string{ - "GRCh38", - "GRCh37", - "NCBI36", - "Other", - } - - // Create struct instance of the Elasticsearch fields struct object - docStruct := indexes.Table{ - Id: uuid.New().String(), - Name: t.Name, - DataType: t.DataType, - Dataset: t.Dataset, - AssemblyIds: defaultAssemblyIds, - Metadata: defaultMeta, - Schema: schemas.VARIANT_SCHEMA, - } - - fmt.Println("\ndocStruct:", docStruct) - fmt.Println("docStruct TYPE:", reflect.TypeOf(docStruct)) - - // Marshal the struct to JSON and check for errors - b, err := json.Marshal(docStruct) - if err != nil { - fmt.Println("json.Marshal ERROR:", err) - return docStruct, err - } - - // Instantiate a request object - req := esapi.IndexRequest{ - Index: tablesIndex, - Body: strings.NewReader(string(b)), - Refresh: "true", - } - fmt.Println(reflect.TypeOf(req)) - - // Return an API response object from request - res, err := req.Do(ctxt, es) - if err != nil { - fmt.Printf("IndexRequest ERROR: %s\n", err) - return docStruct, err - } - defer res.Body.Close() - - if res.IsError() { - msg := fmt.Sprintf("%s ERROR", res.Status()) - fmt.Println(msg) - return docStruct, errors.New(msg) - } else { - - // Deserialize the response into a map. - var resMap map[string]interface{} - if err := json.NewDecoder(res.Body).Decode(&resMap); err != nil { - log.Printf("Error parsing the response body: %s", err) - } else { - log.Printf("\nIndexRequest() RESPONSE:") - // Print the response status and indexed document version. - fmt.Println("Status:", res.Status()) - fmt.Println("Result:", resMap["result"]) - fmt.Println("Version:", int(resMap["_version"].(float64))) - fmt.Println("resMap:", resMap) - fmt.Println() - } - } - - return docStruct, nil -} - -func GetTables(cfg *models.Config, es *es7.Client, ctxt context.Context, tableId string, dataType string) (map[string]interface{}, error) { - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - - // return GetTablesWithoutContext(es, tableId, dataType) - // get table by "any combination of any applicable parameter" query structure - filter := make([]map[string]interface{}, 0) - - if tableId != "" { - - filter = append(filter, map[string]interface{}{ - "term": map[string]string{ - "id.keyword": tableId, - }, - }) - } - if dataType != "" { - filter = append(filter, map[string]interface{}{ - "term": map[string]string{ - "data_type.keyword": dataType, - }, - }) - } - // if `filter` remains an empty array, this will effecetively act as a "wildcard" query - - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "bool": map[string]interface{}{ - "filter": filter, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return nil, err - } - // Perform the search request. - res, searchErr := es.Search( - es.Search.WithContext(context.Background()), - es.Search.WithIndex(tablesIndex), - es.Search.WithBody(&buf), - es.Search.WithTrackTotalHits(true), - es.Search.WithPretty(), - ) - if searchErr != nil { - fmt.Printf("Error getting response: %s\n", searchErr) - return nil, searchErr - } - - defer res.Body.Close() - - resultString := res.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // Declared an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - // umErr := json.Unmarshal([]byte(resultString[9:]), &result) - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling response: %s\n", umErr) - return nil, umErr - } - - fmt.Printf("Query End: %s\n", time.Now()) - - return result, nil - -} - -func GetTablesByName(cfg *models.Config, es *es7.Client, ctxt context.Context, tableName string) ([]indexes.Table, error) { - - allTables := make([]indexes.Table, 0) - - // overall query structure - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "bool": map[string]interface{}{ - "filter": []map[string]interface{}{{ - "term": map[string]interface{}{ - "name.keyword": tableName, - }, - }}, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return allTables, err - } - - if cfg.Debug { - // view the outbound elasticsearch query - myString := string(buf.Bytes()[:]) - fmt.Println(myString) - } - - fmt.Printf("Query Start: %s\n", time.Now()) - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - // Perform the search request. - res, searchErr := es.Search( - es.Search.WithContext(context.Background()), - es.Search.WithIndex(tablesIndex), - es.Search.WithBody(&buf), - es.Search.WithTrackTotalHits(true), - es.Search.WithPretty(), - ) - if searchErr != nil { - fmt.Printf("Error getting response: %s\n", searchErr) - return allTables, searchErr - } - - defer res.Body.Close() - - resultString := res.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // TODO: improve stability - // - check for 404 Not Found : assume index simply doesnt exist, return 0 results - if strings.Contains(resultString[0:15], "Not Found") { - return allTables, nil - } - - // Declared an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling response: %s\n", umErr) - return allTables, umErr - } - - fmt.Printf("Query End: %s\n", time.Now()) - - // gather data from "hits" - docsHits := result["hits"].(map[string]interface{})["hits"] - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} a table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - } - - // accumulate structs - allTables = append(allTables, resultingTable) - } - - return allTables, nil -} - -func DeleteTableById(cfg *models.Config, es *es7.Client, ctxt context.Context, tableId string) (map[string]interface{}, error) { - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "match": map[string]interface{}{ - "id": tableId, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return nil, err - } - - if cfg.Debug { - // view the outbound elasticsearch query - myString := string(buf.Bytes()[:]) - fmt.Println(myString) - } - - // Perform the delete request. - deleteRes, deleteErr := es.DeleteByQuery( - []string{tablesIndex}, - bytes.NewReader(buf.Bytes()), - ) - if deleteErr != nil { - fmt.Printf("Error getting response: %s\n", deleteErr) - return nil, deleteErr - } - - defer deleteRes.Body.Close() - - resultString := deleteRes.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // Prepare an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the empty interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - // umErr := json.Unmarshal([]byte(resultString[9:]), &result) - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling gene search response: %s\n", umErr) - return nil, umErr - } - - return result, nil -} diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index f777a6d3..1747d653 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -20,7 +20,6 @@ import ( "gohan/api/utils" "github.com/elastic/go-elasticsearch/v7" - es7 "github.com/elastic/go-elasticsearch/v7" ) const wildcardVariantsIndex = "variants-*" @@ -111,7 +110,7 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e reference string, alternative string, alleles []string, size int, sortByPosition c.SortDirection, includeInfoInResultSet bool, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, tableId string, + genotype c.GenotypeQuery, assemblyId c.AssemblyId, getSampleIdsOnly bool) (map[string]interface{}, error) { // begin building the request body. @@ -171,13 +170,6 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e }) } - if tableId != "" { - mustMap = append(mustMap, map[string]interface{}{ - "query_string": map[string]interface{}{ - "query": "tableId:" + tableId, - }}) - } - rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? @@ -325,7 +317,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, chromosome string, lowerBound int, upperBound int, variantId string, sampleId string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, tableId string) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ @@ -385,14 +377,6 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }, }) } - - if tableId != "" { - mustMap = append(mustMap, map[string]interface{}{ - "query_string": map[string]interface{}{ - "query": "tableId:" + tableId, - }}) - } - rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? @@ -507,7 +491,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, return result, nil } -func GetVariantsBucketsByKeywordAndTableId(cfg *models.Config, es *elasticsearch.Client, keyword string, tableId string) (map[string]interface{}, error) { +func GetVariantsBucketsByKeyword(cfg *models.Config, es *elasticsearch.Client, keyword string) (map[string]interface{}, error) { // begin building the request body. var buf bytes.Buffer aggMap := map[string]interface{}{ @@ -525,14 +509,6 @@ func GetVariantsBucketsByKeywordAndTableId(cfg *models.Config, es *elasticsearch }, } - if tableId != "" { - aggMap["query"] = map[string]interface{}{ - "match": map[string]interface{}{ - "tableId": tableId, - }, - } - } - // encode the query if err := json.NewEncoder(&buf).Encode(aggMap); err != nil { log.Fatalf("Error encoding aggMap: %s\n", err) @@ -589,72 +565,6 @@ func GetVariantsBucketsByKeywordAndTableId(cfg *models.Config, es *elasticsearch return result, nil } -func DeleteVariantsByTableId(es *es7.Client, cfg *models.Config, tableId string) (map[string]interface{}, error) { - - // cfg := c.(*contexts.GohanContext).Config - // es := c.(*contexts.GohanContext).Es7Client - - if cfg.Debug { - http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} - } - - var buf bytes.Buffer - query := map[string]interface{}{ - "query": map[string]interface{}{ - "match": map[string]interface{}{ - "tableId": tableId, - }, - }, - } - - // encode the query - if err := json.NewEncoder(&buf).Encode(query); err != nil { - log.Fatalf("Error encoding query: %s\n", err) - return nil, err - } - - if cfg.Debug { - // view the outbound elasticsearch query - myString := string(buf.Bytes()[:]) - fmt.Println(myString) - } - - // Perform the delete request. - deleteRes, deleteErr := es.DeleteByQuery( - []string{wildcardVariantsIndex}, - bytes.NewReader(buf.Bytes()), - ) - if deleteErr != nil { - fmt.Printf("Error getting response: %s\n", deleteErr) - return nil, deleteErr - } - - defer deleteRes.Body.Close() - - resultString := deleteRes.String() - if cfg.Debug { - fmt.Println(resultString) - } - - // Prepare an empty interface - result := make(map[string]interface{}) - - // Unmarshal or Decode the JSON to the empty interface. - // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming - bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) - if !strings.Contains(bracketString, "200") { - return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) - } - - umErr := json.Unmarshal([]byte(jsonBodyString), &result) - if umErr != nil { - fmt.Printf("Error unmarshalling gene search response: %s\n", umErr) - return nil, umErr - } - - return result, nil -} - // -- internal use only -- func addAllelesToShouldMap(alleles []string, genotype c.GenotypeQuery, allelesShouldMap []map[string]interface{}) ([]map[string]interface{}, int) { minimumShouldMatch := 0 From 3fe7247a8bd3b4eaa857d9426442d394b04bb563 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:39:15 -0400 Subject: [PATCH 05/84] chore: patch ingestion, santitation, workflows --- src/api/mvc/variants/main.go | 16 +- src/api/services/ingestion.go | 3 +- src/api/services/sanitation/main.go | 75 +---- .../tests/integration/api/api_table_test.go | 293 ------------------ src/api/workflows/vcf_gz.wdl | 5 +- 5 files changed, 11 insertions(+), 381 deletions(-) delete mode 100644 src/api/tests/integration/api/api_table_test.go diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 0922f0b9..f5275f99 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -207,8 +207,6 @@ func VariantsIngest(c echo.Context) error { } assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId")) - tableId := c.QueryParam("tableId") - // TODO: validate table exists in elasticsearch // -- optional filter var ( @@ -410,7 +408,7 @@ func VariantsIngest(c echo.Context) error { // --- load vcf into memory and ingest the vcf file into elasticsearch beginProcessingTime := time.Now() fmt.Printf("Begin processing %s at [%s]\n", gzippedFilePath, beginProcessingTime) - ingestionService.ProcessVcf(gzippedFilePath, drsFileId, tableId, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) + ingestionService.ProcessVcf(gzippedFilePath, drsFileId, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) fmt.Printf("Ingest duration for file at %s : %s\n", gzippedFilePath, time.Since(beginProcessingTime)) reqStat.State = ingest.Done @@ -450,7 +448,7 @@ func GetAllVariantIngestionRequests(c echo.Context) error { func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, tableId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) // retrieve other query parameters relevent to this 'get' query --- getSampleIdsOnlyQP := c.QueryParam("getSampleIdsOnly") @@ -536,7 +534,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu _id, "", // note : "" is for sampleId reference, alternative, alleles, size, sortByPosition, - includeInfoInResultSet, genotype, assemblyId, tableId, + includeInfoInResultSet, genotype, assemblyId, getSampleIdsOnly) } else { @@ -561,7 +559,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu "", _id, // note : "" is for variantId reference, alternative, alleles, size, sortByPosition, - includeInfoInResultSet, genotype, assemblyId, tableId, + includeInfoInResultSet, genotype, assemblyId, false) } @@ -689,7 +687,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, tableId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) respDTO := dtos.VariantCountReponse{ Results: make([]dtos.VariantCountResult, 0), @@ -719,7 +717,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, _id, "", // note : "" is for sampleId - reference, alternative, alleles, genotype, assemblyId, tableId) + reference, alternative, alleles, genotype, assemblyId) } else { // implied sampleId query fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) @@ -728,7 +726,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, "", _id, // note : "" is for variantId - reference, alternative, alleles, genotype, assemblyId, tableId) + reference, alternative, alleles, genotype, assemblyId) } if countError != nil { diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 07e35551..117c0ddd 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -341,7 +341,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto } func (i *IngestionService) ProcessVcf( - gzippedFilePath string, drsFileId string, tableId string, + gzippedFilePath string, drsFileId string, assemblyId constants.AssemblyId, filterOutReferences bool, lineProcessingConcurrencyLevel int) { @@ -419,7 +419,6 @@ func (i *IngestionService) ProcessVcf( tmpVariant["fileId"] = drsFileId tmpVariant["assemblyId"] = assemblyId - tmpVariant["tableId"] = tableId // skip this call if need be skipThisCall := false diff --git a/src/api/services/sanitation/main.go b/src/api/services/sanitation/main.go index 7b24a582..4b15cf6d 100644 --- a/src/api/services/sanitation/main.go +++ b/src/api/services/sanitation/main.go @@ -1,20 +1,13 @@ package sanitation import ( - "context" - "encoding/json" "fmt" "time" es7 "github.com/elastic/go-elasticsearch/v7" "github.com/go-co-op/gocron" - "github.com/mitchellh/mapstructure" "gohan/api/models" - "gohan/api/models/indexes" - esRepo "gohan/api/repositories/elasticsearch" - - variantsService "gohan/api/services/variants" ) type ( @@ -46,7 +39,7 @@ func (ss *SanitationService) Init() { // context, that would mean performing something like // - removing duplicate documents // - cleaning documents that have broken pseudo-foreign keys - // - variants -> tables + // - variants -> tables (no longer necessary) // etc... go func() { // setup cron job @@ -54,71 +47,7 @@ func (ss *SanitationService) Init() { // clean variant documents with non-existing tables s.Every(1).Days().At("04:00:00").Do(func() { // 12am EST - fmt.Printf("[%s] - Running variant documents cleanup..\n", time.Now()) - - // - get all available tables - tables, tablesError := esRepo.GetTables(ss.Config, ss.Es7Client, context.Background(), "", "variant") - if tablesError != nil { - fmt.Printf("[%s] - Error getting tables : %v..\n", time.Now(), tablesError) - return - } - - // gather data from "hits" - docsHits := tables["hits"].(map[string]interface{})["hits"] - allDocHits := []map[string]interface{}{} - mapstructure.Decode(docsHits, &allDocHits) - - // grab _source for each hit - tableIds := make([]string, 0) - for _, r := range allDocHits { - source := r["_source"] - byteSlice, _ := json.Marshal(source) - - // cast map[string]interface{} to table - var resultingTable indexes.Table - if err := json.Unmarshal(byteSlice, &resultingTable); err != nil { - fmt.Println("failed to unmarshal:", err) - continue - } - - // accumulate structs - tableIds = append(tableIds, resultingTable.Id) - } - fmt.Printf("[%s] - Table IDs found : %v..\n", time.Now(), tableIds) - - // - obtain distribution of table IDs accross all variants - // TODO: refactor not use variants-mvc package to access this (anti-pattern) - variantsOverview := variantsService.GetVariantsOverview(ss.Es7Client, ss.Config) - if variantsOverview == nil { - return - } - if variantsOverview["tableIDs"] == nil { - return - } - - variantTableIdsCountsMap := variantsOverview["tableIDs"].(map[string]interface{}) - - variantTableIds := make([]string, 0) - for _variantTableId, _ := range variantTableIdsCountsMap { - // ignore variant count set to _ - - // accumulate IDs found in list - variantTableIds = append(variantTableIds, _variantTableId) - } - fmt.Printf("[%s] - Tables IDs found across all variants : %v..\n", time.Now(), variantTableIds) - - // obtain set-difference between variant-table IDs table IDs - setDiff := setDifference(tableIds, variantTableIds) - fmt.Printf("[%s] - Variant Table ID Difference: %v..\n", time.Now(), setDiff) - - // delete variants with table IDs found in this set difference - for _, differentId := range setDiff { - // TODO: refactor - // fire and forget - go func(_differentId string) { - _, _ = esRepo.DeleteVariantsByTableId(ss.Es7Client, ss.Config, _differentId) - }(differentId) - } + // nothing for now }) // starts the scheduler in blocking mode, which blocks diff --git a/src/api/tests/integration/api/api_table_test.go b/src/api/tests/integration/api/api_table_test.go deleted file mode 100644 index 28fce735..00000000 --- a/src/api/tests/integration/api/api_table_test.go +++ /dev/null @@ -1,293 +0,0 @@ -package api - -import ( - "bytes" - "encoding/json" - "fmt" - "gohan/api/models" - "gohan/api/models/dtos" - "gohan/api/models/indexes" - common "gohan/api/tests/common" - "gohan/api/utils" - "io/ioutil" - "net/http" - "testing" - - "github.com/stretchr/testify/assert" -) - -const ( - GetVariantTablesPath string = "%s/tables?data-type=variant" - GetTableByIdPathWithPlaceholder string = "%s/tables/%s" - GetTableSummaryByIdPathWithPlaceholder string = "%s/tables/%s/summary" - DeleteTableByIdPathWithPlaceholder string = "%s/tables/%s" - PostCreateTablePath string = "%s/tables" -) - -func TestCanGetVariantTables(t *testing.T) { - cfg := common.InitConfig() - - // get all available 'variant' tables - allTableDtos := getVariantTables(t, cfg) - assert.NotNil(t, allTableDtos) -} - -func TestCanCreateTable(t *testing.T) { - cfg := common.InitConfig() - - // create table - createTablesRespJson := createVariantTable(t, cfg) - - // test get-by-id with newly created table - newTableId := createTablesRespJson.Id - getTableByIdUrl := fmt.Sprintf(GetTableByIdPathWithPlaceholder, cfg.Api.Url, newTableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("GET", getTableByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableByIdUrl, response.Status, shouldBe)) - - // -- interpret array of available tables from response as a serialized json byte string - tableRespBody, tableRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, tableRespBodyErr) - - // --- transform body bytes to string - tableRespBodyString := string(tableRespBody) - - // -- check for json error - var getTableByIdResp indexes.Table - getTableByIdRespUnmarshallingError := json.Unmarshal([]byte(tableRespBodyString), &getTableByIdResp) - assert.Nil(t, getTableByIdRespUnmarshallingError) - - // ================ - - // -- ensure the table ids are the same - assert.True(t, getTableByIdResp.Id == newTableId) - -} - -func TestCanGetAllTablesById(t *testing.T) { - cfg := common.InitConfig() - - allTableDtos := getVariantTables(t, cfg) - assert.NotNil(t, allTableDtos) - assert.True(t, len(allTableDtos) > 0) - - for _, table := range allTableDtos { - - tableId := table.Id - getTableByIdUrl := fmt.Sprintf(GetTableByIdPathWithPlaceholder, cfg.Api.Url, tableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("GET", getTableByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableByIdUrl, response.Status, shouldBe)) - - // -- interpret array of available tables from response - tableRespBody, tableRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, tableRespBodyErr) - - // --- transform body bytes to string - tableRespBodyString := string(tableRespBody) - - // -- check for json error - var tablesRespJson indexes.Table - tableJsonUnmarshallingError := json.Unmarshal([]byte(tableRespBodyString), &tablesRespJson) - assert.Nil(t, tableJsonUnmarshallingError) - - // ================ - - // -- ensure the table ids are the same - assert.True(t, tablesRespJson.Id == tableId) - } -} -func TestCannotGetTablesWithInvalidIds(t *testing.T) { - cfg := common.InitConfig() - - // test with an empty id, and a random string - // both cases should result in a 400 bad request - for _, invalidTableId := range []string{"", utils.RandomString(32)} { - getTableSummaryByIdUrl := fmt.Sprintf(GetTableSummaryByIdPathWithPlaceholder, cfg.Api.Url, invalidTableId) - - request, _ := http.NewRequest("GET", getTableSummaryByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - shouldBe := 400 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableSummaryByIdUrl, response.Status, shouldBe)) - } -} - -func TestCanGetAllTableSummariesById(t *testing.T) { - cfg := common.InitConfig() - - allTableDtos := getVariantTables(t, cfg) - assert.NotNil(t, allTableDtos) - assert.True(t, len(allTableDtos) > 0) - - for _, table := range allTableDtos { - - tableId := table.Id - getTableSummaryByIdUrl := fmt.Sprintf(GetTableSummaryByIdPathWithPlaceholder, cfg.Api.Url, tableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("GET", getTableSummaryByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", getTableSummaryByIdUrl, response.Status, shouldBe)) - - // -- interpret array of available tables from response - tableSummaryRespBody, tableSummaryRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, tableSummaryRespBodyErr) - - // --- transform body bytes to string - tableSummaryRespBodyString := string(tableSummaryRespBody) - - // -- check for json error - var tableSummary dtos.TableSummaryResponseDto - tableJsonUnmarshallingError := json.Unmarshal([]byte(tableSummaryRespBodyString), &tableSummary) - assert.Nil(t, tableJsonUnmarshallingError) - - // ================ - - // -- ensure table summary is valid - assert.NotNil(t, tableSummary.Count) - assert.NotNil(t, tableSummary.DataTypeSpecific) - } -} - -func TestCanDeleteTableById(t *testing.T) { - cfg := common.InitConfig() - - // create table - createTablesRespJson := createVariantTable(t, cfg) - - // test get-by-id with newly created table - newTableId := createTablesRespJson.Id - deleteTableByIdUrl := fmt.Sprintf(DeleteTableByIdPathWithPlaceholder, cfg.Api.Url, newTableId) - - // TODO: refactor - // ================ - request, _ := http.NewRequest("DELETE", deleteTableByIdUrl, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - shouldBe := 204 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api DELETE %s Status: %s ; Should be %d", deleteTableByIdUrl, response.Status, shouldBe)) - - // ================ -} - -func getVariantTables(_t *testing.T, _cfg *models.Config) []indexes.Table { - url := fmt.Sprintf(GetVariantTablesPath, _cfg.Api.Url) - request, _ := http.NewRequest("GET", url, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) - - // -- interpret array of available tables from response - overviewRespBody, overviewRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, overviewRespBodyErr) - - // --- transform body bytes to string - overviewRespBodyString := string(overviewRespBody) - - // -- check for json error - var tableDtos []indexes.Table - overviewJsonUnmarshallingError := json.Unmarshal([]byte(overviewRespBodyString), &tableDtos) - assert.Nil(_t, overviewJsonUnmarshallingError) - - return tableDtos -} - -func createVariantTable(_t *testing.T, _cfg *models.Config) dtos.CreateTableResponseDto { - // prepare request - postCreateTableUrl := fmt.Sprintf(PostCreateTablePath, _cfg.Api.Url) - data := dtos.CreateTableRequestDto{ - Name: utils.RandomString(32), // random table name - DataType: "variant", // set variant data_type - Dataset: utils.RandomString(32), // random dataset name - Metadata: map[string]interface{}{}, // TODO : expand ? - } - dataBytes, err := json.Marshal(data) - if err != nil { - panic(err) - } - dataString := string(dataBytes) - - r, _ := http.NewRequest("POST", postCreateTableUrl, bytes.NewBufferString(dataString)) - r.Header.Add("Content-Type", "application/json") - - // perform request - client := &http.Client{} - resp, err := client.Do(r) - if err != nil { - fmt.Printf("Table Creation error: %s\n", err) - } - defer resp.Body.Close() - - fmt.Printf("Table Creation status: %d\n", resp.StatusCode) - - // obtain the newly created table - // -- interpret create-table dto from response - createTableRespBody, createTableRespBodyErr := ioutil.ReadAll(resp.Body) - assert.Nil(_t, createTableRespBodyErr) - - // --- transform body bytes to string - createTableRespBodyString := string(createTableRespBody) - - // -- check for json error - var createTablesRespJson dtos.CreateTableResponseDto - createTableJsonUnmarshallingError := json.Unmarshal([]byte(createTableRespBodyString), &createTablesRespJson) - assert.Nil(_t, createTableJsonUnmarshallingError) - - // -- ensure table was successfully created - assert.Empty(_t, createTablesRespJson.Error) - - assert.NotNil(_t, createTablesRespJson.Table) - assert.NotNil(_t, createTablesRespJson.Table.Id) - assert.NotEmpty(_t, createTablesRespJson.Table.Id) - - return createTablesRespJson -} diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index fd820d61..0944b975 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -3,7 +3,6 @@ workflow vcf_gz { Array[File] vcf_gz_file_names # redundant Array[String] original_vcf_gz_file_paths String assembly_id - String table_id String filter_out_references String temp_token String temp_token_host @@ -14,7 +13,6 @@ workflow vcf_gz { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, - table_id = table_id, filter_out_references = filter_out_references, temp_token = temp_token, temp_token_host = temp_token_host @@ -27,7 +25,6 @@ task vcf_gz_gohan { String gohan_url String vcf_gz_file_name String assembly_id - String table_id String filter_out_references String temp_token String temp_token_host @@ -35,7 +32,7 @@ task vcf_gz_gohan { command { echo "Using temporary-token : ${temp_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&tableId=${table_id}&filterOutReferences=${filter_out_references}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&filterOutReferences=${filter_out_references}" # TODO: refactor # append temporary-token header if present From 7796caa104c03bb6d03f20c79acad38ac6ba6b97 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 5 Jun 2023 15:39:24 -0400 Subject: [PATCH 06/84] patch: readme --- README.md | 137 +----------------------------------------------------- 1 file changed, 1 insertion(+), 136 deletions(-) diff --git a/README.md b/README.md index 0ce64f21..dc60ffe3 100644 --- a/README.md +++ b/README.md @@ -77,25 +77,10 @@ # view catalogue curl -k https://gohan.local/genes/overview - - # create table - DATA='{ - "name": "Gohan Box Test Table", - "data_type": "variant", - "dataset": "00000000-0000-0000-0000-000000000000", - "metadata": {} - }' - curl -k -0 -v -X POST https://gohan.local/tables \ - -H 'Content-Type:application/json' \ - --data "$(echo $DATA)" | jq - - # - - # move vcf.gz files to `$GOHAN_API_VCF_PATH` # ingest vcf.gz - curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true\&tableId= + curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true # monitor progress: curl -k https://gohan.local/variants/ingestion/requests @@ -483,126 +468,6 @@ Response
-**`/tables`** - -
- - -Request ->   **GET** `/tables`
- -
- -Response ->```json -> [ -> { -> "id": `string`, -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "assembly_ids": `[]string`, -> "metadata": {...}, -> "schema": {...}, -> }, -> ... -> ] -> ``` - -
-
- - -Request ->   **POST** `/tables`
->```json -> { -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "metadata": {...}, -> } -> ``` - -
- -Response ->```json -> { -> "id": `string`, -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "assembly_ids": `[]string`, -> "metadata": {...}, -> "schema": {...}, -> } -> ``` - - -
-
- - -Request ->   **GET** `/tables/:id`
->    path params: -> - id : **string (UUID)** `(required)` - -
- -Response ->```json -> { -> "id": `string`, -> "name": `string`, -> "data_type": `string`, -> "dataset": `string`, -> "assembly_ids": `[]string`, -> "metadata": {...}, -> "schema": {...}, -> } -> ``` - -
-
- - -Request ->   **GET** `/tables/:id/summary`
->    path params: -> - id : **string (UUID)** `(required)` - -
- -Response ->```json -> { -> "count": `int`, -> "data_type_specific": {...}, -> } -> ``` - -
-
- - -Request ->   **DELETE** `/tables/:id`
->    path params: -> - id : **string (UUID)** `(required)` - -
- -Response - -`Status Code:` **204** - -
-
- - - ## Deployments : From db59ca81214ecbc41cda7899a3cf1c038bddee8e Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 6 Jun 2023 17:43:06 -0400 Subject: [PATCH 07/84] chore: begin associating variants with a dataset --- src/api/main.go | 6 +++-- src/api/middleware/datasetMiddleware.go | 35 +++++++++++++++++++++++++ src/api/models/indexes/main.go | 1 + src/api/mvc/variants/main.go | 3 ++- src/api/services/ingestion.go | 3 ++- src/api/workflows/vcf_gz.wdl | 5 +++- 6 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 src/api/middleware/datasetMiddleware.go diff --git a/src/api/main.go b/src/api/main.go index 9bf35377..eeadc496 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -175,13 +175,15 @@ func main() { // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute) + gam.MandateAssemblyIdAttribute, + gam.MandateDatasetAttribute) e.GET("/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) e.GET("/variants/ingestion/stats", variantsMvc.VariantsIngestionStats) e.GET("/private/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware - gam.MandateAssemblyIdAttribute) + gam.MandateAssemblyIdAttribute, + gam.MandateDatasetAttribute) e.GET("/private/variants/ingestion/requests", variantsMvc.GetAllVariantIngestionRequests) // -- diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go new file mode 100644 index 00000000..a86ddb59 --- /dev/null +++ b/src/api/middleware/datasetMiddleware.go @@ -0,0 +1,35 @@ +package middleware + +import ( + "fmt" + "gohan/api/models/dtos/errors" + "gohan/api/utils" + "net/http" + + "github.com/labstack/echo" +) + +/* +Echo middleware to ensure a valid `dataset` HTTP query parameter was provided +*/ +func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + // check for dataset query parameter + dataset := c.QueryParam("dataset") + if len(dataset) == 0 { + // if no id was provided, or is invalid, return an error + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("missing dataset")) + } + + // verify dataset is a valid UUID + // - assume it's a valid dataset if it's a uuid, + // further verification is done later + if !utils.IsValidUUID(dataset) { + fmt.Printf("Invalid dataset %s\n", dataset) + + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) + } + + return next(c) + } +} diff --git a/src/api/models/indexes/main.go b/src/api/models/indexes/main.go index 8d6e2710..a53d1017 100644 --- a/src/api/models/indexes/main.go +++ b/src/api/models/indexes/main.go @@ -18,6 +18,7 @@ type Variant struct { Sample Sample `json:"sample"` FileId string `json:"fileId"` + Dataset string `json:"dataset"` AssemblyId c.AssemblyId `json:"assemblyId"` } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index f5275f99..522f64b4 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -207,6 +207,7 @@ func VariantsIngest(c echo.Context) error { } assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId")) + dataset := c.QueryParam("dataset") // -- optional filter var ( @@ -408,7 +409,7 @@ func VariantsIngest(c echo.Context) error { // --- load vcf into memory and ingest the vcf file into elasticsearch beginProcessingTime := time.Now() fmt.Printf("Begin processing %s at [%s]\n", gzippedFilePath, beginProcessingTime) - ingestionService.ProcessVcf(gzippedFilePath, drsFileId, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) + ingestionService.ProcessVcf(gzippedFilePath, drsFileId, dataset, assemblyId, filterOutReferences, cfg.Api.LineProcessingConcurrencyLevel) fmt.Printf("Ingest duration for file at %s : %s\n", gzippedFilePath, time.Since(beginProcessingTime)) reqStat.State = ingest.Done diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 117c0ddd..fa524a55 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -341,7 +341,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto } func (i *IngestionService) ProcessVcf( - gzippedFilePath string, drsFileId string, + gzippedFilePath string, drsFileId string, dataset string, assemblyId constants.AssemblyId, filterOutReferences bool, lineProcessingConcurrencyLevel int) { @@ -419,6 +419,7 @@ func (i *IngestionService) ProcessVcf( tmpVariant["fileId"] = drsFileId tmpVariant["assemblyId"] = assemblyId + tmpVariant["dataset"] = dataset // skip this call if need be skipThisCall := false diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 0944b975..603b54c1 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -3,6 +3,7 @@ workflow vcf_gz { Array[File] vcf_gz_file_names # redundant Array[String] original_vcf_gz_file_paths String assembly_id + String dataset String filter_out_references String temp_token String temp_token_host @@ -13,6 +14,7 @@ workflow vcf_gz { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, + dataset = dataset, filter_out_references = filter_out_references, temp_token = temp_token, temp_token_host = temp_token_host @@ -25,6 +27,7 @@ task vcf_gz_gohan { String gohan_url String vcf_gz_file_name String assembly_id + String dataset String filter_out_references String temp_token String temp_token_host @@ -32,7 +35,7 @@ task vcf_gz_gohan { command { echo "Using temporary-token : ${temp_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&filterOutReferences=${filter_out_references}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" # TODO: refactor # append temporary-token header if present From 1a0e59f823aae1edebbb70237c2585034f7d1350 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 6 Jun 2023 17:54:02 -0400 Subject: [PATCH 08/84] chore: add dataset to response object type --- src/api/models/dtos/main.go | 1 + src/api/mvc/variants/main.go | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 539c87df..823e47bf 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -55,6 +55,7 @@ type VariantCall struct { // TODO: GenotypeProbability, PhredScaleLikelyhood ? AssemblyId constants.AssemblyId `json:"assemblyId,omitempty"` + Dataset string `json:"dataset,omitempty"` DocumentId string `json:"documentId,omitempty"` } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 522f64b4..393aac3d 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -652,9 +652,9 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu SampleId: sampleId, GenotypeType: zygosity.ZygosityToString(variant.Sample.Variation.Genotype.Zygosity), Alleles: []string{alleles.Left, alleles.Right}, - - AssemblyId: variant.AssemblyId, - DocumentId: docId, + Dataset: variant.Dataset, + AssemblyId: variant.AssemblyId, + DocumentId: docId, }) } } From 3a47884b17dd23ab90a24bdb2a050cb63c465efb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 6 Jun 2023 17:59:34 -0400 Subject: [PATCH 09/84] chore: add datasets to variants overview --- src/api/services/variants/main.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/api/services/variants/main.go b/src/api/services/variants/main.go index eac9bfe7..f45d7fbb 100644 --- a/src/api/services/variants/main.go +++ b/src/api/services/variants/main.go @@ -86,6 +86,10 @@ func GetVariantsOverview(es *elasticsearch.Client, cfg *models.Config) map[strin wg.Add(1) go callGetBucketsByKeyword("assemblyIDs", "assemblyId.keyword", &wg) + // get distribution of datasets + wg.Add(1) + go callGetBucketsByKeyword("datasets", "dataset.keyword", &wg) + wg.Wait() return resultsMap From 315170aa28e31631baa936425f533b0e8f1bd86e Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 7 Jun 2023 23:41:10 -0400 Subject: [PATCH 10/84] chore: dataset summary --- src/api/main.go | 3 + src/api/models/dtos/main.go | 6 ++ src/api/mvc/variants/main.go | 63 ++++++++++++- .../repositories/elasticsearch/variants.go | 92 ++++++++++++++++++- 4 files changed, 161 insertions(+), 3 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index eeadc496..fc8a10e8 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -172,6 +172,9 @@ func main() { gam.MandateSampleIdsSingularAttribute, gam.ValidatePotentialGenotypeQueryParameter) + // --- Dataset + e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary) + // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, // middleware diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 823e47bf..6d782967 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -59,6 +59,12 @@ type VariantCall struct { DocumentId string `json:"documentId,omitempty"` } +// --- Dataset +type DatasetSummaryResponseDto struct { + Count int `json:"count"` + DataTypeSpecific map[string]interface{} `json:"data_type_specific"` // TODO: type-safety? +} + // -- Genes type GenesResponseDTO struct { Status int `json:"status"` diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 393aac3d..e48c51a7 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -19,6 +19,7 @@ import ( a "gohan/api/models/constants/assembly-id" s "gohan/api/models/constants/sort" "gohan/api/models/dtos" + "gohan/api/models/dtos/errors" "gohan/api/models/indexes" "gohan/api/models/ingest" "gohan/api/mvc" @@ -446,6 +447,64 @@ func GetAllVariantIngestionRequests(c echo.Context) error { return c.JSON(http.StatusOK, m) } +func GetDatasetSummary(c echo.Context) error { + fmt.Printf("[%s] - GetDatasetSummary hit!\n", time.Now()) + + cfg := c.(*contexts.GohanContext).Config + es := c.(*contexts.GohanContext).Es7Client + // obtain dataset from the path + dataset := c.Param("dataset") + + // dataset must be provided + if dataset == "" { + fmt.Println("Missing dataset") + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing dataset - please try again")) + } + + totalVariantsCount := 0.0 + + docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + "*", 0, 0, + "", "", // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "", dataset) + if countError != nil { + fmt.Printf("Failed to count variants in dataset %s\n", dataset) + return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) + } + + totalVariantsCount = docs["count"].(float64) + + // obtain number of samples associated with this tableId + resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) + if bucketsError != nil { + fmt.Println(resultingBuckets) + } + + // retrieve aggregations.items.buckets + // and count number of samples + bucketsMapped := []interface{}{} + if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { + aggsMapped := aggs.(map[string]interface{}) + + if items, itemsOk := aggsMapped["items"]; itemsOk { + itemsMapped := items.(map[string]interface{}) + + if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { + bucketsMapped = buckets.([]interface{}) + } + } + } + + fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) + + return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ + Count: int(totalVariantsCount), + DataTypeSpecific: map[string]interface{}{ + "samples": len(bucketsMapped), + }, + }) +} + func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { cfg := c.(*contexts.GohanContext).Config @@ -718,7 +777,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, _id, "", // note : "" is for sampleId - reference, alternative, alleles, genotype, assemblyId) + reference, alternative, alleles, genotype, assemblyId, "") } else { // implied sampleId query fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) @@ -727,7 +786,7 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, "", _id, // note : "" is for variantId - reference, alternative, alleles, genotype, assemblyId) + reference, alternative, alleles, genotype, assemblyId, "") } if countError != nil { diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index 1747d653..d2cfa782 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -317,7 +317,7 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, chromosome string, lowerBound int, upperBound int, variantId string, sampleId string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId c.AssemblyId, dataset string) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ @@ -377,6 +377,14 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }, }) } + + if dataset != "" { + mustMap = append(mustMap, map[string]interface{}{ + "query_string": map[string]interface{}{ + "query": "dataset:" + dataset, + }}) + } + rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? @@ -565,6 +573,88 @@ func GetVariantsBucketsByKeyword(cfg *models.Config, es *elasticsearch.Client, k return result, nil } +func GetVariantsBucketsByKeywordAndDataset(cfg *models.Config, es *elasticsearch.Client, keyword string, dataset string) (map[string]interface{}, error) { + // begin building the request body. + var buf bytes.Buffer + aggMap := map[string]interface{}{ + "size": "0", + "aggs": map[string]interface{}{ + "items": map[string]interface{}{ + "terms": map[string]interface{}{ + "field": keyword, + "size": "10000", // increases the number of buckets returned (default is 10) + "order": map[string]string{ + "_key": "asc", + }, + }, + }, + }, + } + + if dataset != "" { + aggMap["query"] = map[string]interface{}{ + "match": map[string]interface{}{ + "dataset": dataset, + }, + } + } + + // encode the query + if err := json.NewEncoder(&buf).Encode(aggMap); err != nil { + log.Fatalf("Error encoding aggMap: %s\n", err) + return nil, err + } + + if cfg.Debug { + // view the outbound elasticsearch query + myString := string(buf.Bytes()[:]) + fmt.Println(myString) + } + + if cfg.Debug { + http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + } + // Perform the search request. + res, searchErr := es.Search( + es.Search.WithContext(context.Background()), + es.Search.WithIndex(wildcardVariantsIndex), + es.Search.WithBody(&buf), + es.Search.WithTrackTotalHits(true), + es.Search.WithPretty(), + ) + if searchErr != nil { + fmt.Printf("Error getting response: %s\n", searchErr) + return nil, searchErr + } + + defer res.Body.Close() + + resultString := res.String() + if cfg.Debug { + fmt.Println(resultString) + } + + // Declared an empty interface + result := make(map[string]interface{}) + + // Unmarshal or Decode the JSON to the interface. + // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming + bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) + if !strings.Contains(bracketString, "200") { + return nil, fmt.Errorf("failed to get buckets by keyword: got '%s'", bracketString) + } + // umErr := json.Unmarshal([]byte(resultString[9:]), &result) + umErr := json.Unmarshal([]byte(jsonBodyString), &result) + if umErr != nil { + fmt.Printf("Error unmarshalling response: %s\n", umErr) + return nil, umErr + } + + fmt.Printf("Query End: %s\n", time.Now()) + + return result, nil +} + // -- internal use only -- func addAllelesToShouldMap(alleles []string, genotype c.GenotypeQuery, allelesShouldMap []map[string]interface{}) ([]map[string]interface{}, int) { minimumShouldMatch := 0 From 2c2df96d66beae0646dc79bcd02c0b48af5bbb4f Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Thu, 8 Jun 2023 00:32:24 -0400 Subject: [PATCH 11/84] chore: optimize dataset summary --- src/api/go.mod | 2 +- src/api/go.sum | 2 + src/api/mvc/variants/main.go | 84 ++++++++++++------- .../repositories/elasticsearch/variants.go | 2 + 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/api/go.mod b/src/api/go.mod index 5129978c..1011458f 100644 --- a/src/api/go.mod +++ b/src/api/go.mod @@ -29,7 +29,7 @@ require ( github.com/valyala/fasttemplate v1.2.2 // indirect golang.org/x/crypto v0.4.0 // indirect golang.org/x/net v0.3.0 // indirect - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect + golang.org/x/sync v0.2.0 // indirect golang.org/x/sys v0.3.0 // indirect golang.org/x/text v0.5.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/src/api/go.sum b/src/api/go.sum index f00ea740..9ae3f5e3 100644 --- a/src/api/go.sum +++ b/src/api/go.sum @@ -52,6 +52,8 @@ golang.org/x/net v0.3.0 h1:VWL6FNY2bEEmsGVKabSlHu5Irp34xmMRoqb/9lF9lxk= golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= +golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index e48c51a7..2f82b81d 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -33,6 +33,8 @@ import ( "github.com/mitchellh/mapstructure" "github.com/labstack/echo" + + "golang.org/x/sync/errgroup" ) func VariantsIngestionStats(c echo.Context) error { @@ -461,48 +463,66 @@ func GetDatasetSummary(c echo.Context) error { return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing dataset - please try again")) } - totalVariantsCount := 0.0 + // parallelize these two es queries - docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, - "*", 0, 0, - "", "", // note : both variantId and sampleId are deliberately set to "" - "", "", []string{}, "", "", dataset) - if countError != nil { - fmt.Printf("Failed to count variants in dataset %s\n", dataset) - return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) - } + var ( + totalVariantsCount = 0.0 + bucketsMapped = []interface{}{} + g = new(errgroup.Group) + ) + // request #1 + g.Go(func() error { + docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + "*", 0, 0, + "", "", // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "", dataset) + if countError != nil { + fmt.Printf("Failed to count variants in dataset %s\n", dataset) + return countError + } - totalVariantsCount = docs["count"].(float64) + totalVariantsCount = docs["count"].(float64) + return nil + }) - // obtain number of samples associated with this tableId - resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) - if bucketsError != nil { - fmt.Println(resultingBuckets) - } + // request #2 + g.Go(func() error { + // obtain number of samples associated with this tableId + resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) + if bucketsError != nil { + fmt.Printf("Failed to bucket dataset %s variants\n", dataset) + return bucketsError + } - // retrieve aggregations.items.buckets - // and count number of samples - bucketsMapped := []interface{}{} - if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { - aggsMapped := aggs.(map[string]interface{}) + // retrieve aggregations.items.buckets + // and count number of samples + if aggs, aggsOk := resultingBuckets["aggregations"]; aggsOk { + aggsMapped := aggs.(map[string]interface{}) - if items, itemsOk := aggsMapped["items"]; itemsOk { - itemsMapped := items.(map[string]interface{}) + if items, itemsOk := aggsMapped["items"]; itemsOk { + itemsMapped := items.(map[string]interface{}) - if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { - bucketsMapped = buckets.([]interface{}) + if buckets, bucketsOk := itemsMapped["buckets"]; bucketsOk { + bucketsMapped = buckets.([]interface{}) + } } } - } + return nil + }) - fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) + // wait for all HTTP fetches to complete. + if err := g.Wait(); err == nil { + fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) - return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ - Count: int(totalVariantsCount), - DataTypeSpecific: map[string]interface{}{ - "samples": len(bucketsMapped), - }, - }) + return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ + Count: int(totalVariantsCount), + DataTypeSpecific: map[string]interface{}{ + "samples": len(bucketsMapped), + }, + }) + } else { + return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) + } } func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index d2cfa782..e040942b 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -611,6 +611,8 @@ func GetVariantsBucketsByKeywordAndDataset(cfg *models.Config, es *elasticsearch fmt.Println(myString) } + fmt.Printf("Query Start: %s\n", time.Now()) + if cfg.Debug { http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } From cba7a5782f5685cd481207b928ec3605e4f0f0aa Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Thu, 8 Jun 2023 01:59:26 -0400 Subject: [PATCH 12/84] chore: type safe dataset qp - improved http context composition --- src/api/contexts/contexts.go | 9 +++++++++ src/api/middleware/datasetMiddleware.go | 8 +++++++- src/api/mvc/variants/main.go | 10 ++++++---- src/api/services/ingestion.go | 3 ++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index bc829a76..4ddce517 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -2,10 +2,12 @@ package contexts import ( "gohan/api/models" + "gohan/api/models/constants" "gohan/api/services" variantsService "gohan/api/services/variants" es7 "github.com/elastic/go-elasticsearch/v7" + "github.com/google/uuid" "github.com/labstack/echo" ) @@ -18,5 +20,12 @@ type ( Config *models.Config IngestionService *services.IngestionService VariantService *variantsService.VariantService + QueryParameters + } + + // Convenient storage for relevant http context data + QueryParameters struct { + AssemblyId constants.AssemblyId + Dataset uuid.UUID } ) diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index a86ddb59..93e16c60 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -2,10 +2,12 @@ package middleware import ( "fmt" + "gohan/api/contexts" "gohan/api/models/dtos/errors" "gohan/api/utils" "net/http" + "github.com/google/uuid" "github.com/labstack/echo" ) @@ -30,6 +32,10 @@ func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) } - return next(c) + // forward a type-safe value down the pipeline + gc := c.(*contexts.GohanContext) + gc.Dataset = uuid.MustParse(dataset) + + return next(gc) } } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 2f82b81d..4e645eb8 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -105,14 +105,14 @@ func VariantsCountBySampleId(c echo.Context) error { func VariantsIngest(c echo.Context) error { fmt.Printf("[%s] - VariantsIngest hit!\n", time.Now()) - cfg := c.(*contexts.GohanContext).Config + gc := c.(*contexts.GohanContext) + + cfg := gc.Config vcfPath := cfg.Api.VcfPath drsUrl := cfg.Drs.Url drsUsername := cfg.Drs.Username drsPassword := cfg.Drs.Password - ingestionService := c.(*contexts.GohanContext).IngestionService - // retrieve query parameters (comman separated) var fileNames []string // get vcf files @@ -209,8 +209,9 @@ func VariantsIngest(c echo.Context) error { // ----- } + // -- from query params assemblyId := a.CastToAssemblyId(c.QueryParam("assemblyId")) - dataset := c.QueryParam("dataset") + dataset := gc.Dataset // -- optional filter var ( @@ -231,6 +232,7 @@ func VariantsIngest(c echo.Context) error { // ingest vcf // ingserviceMux := sync.RWMutex{} + ingestionService := gc.IngestionService responseDtos := []ingest.IngestResponseDTO{} for _, fileName := range fileNames { diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index fa524a55..c74fb28b 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -33,6 +33,7 @@ import ( "github.com/Jeffail/gabs" "github.com/elastic/go-elasticsearch/v7" "github.com/elastic/go-elasticsearch/v7/esutil" + "github.com/google/uuid" "github.com/mitchellh/mapstructure" ) @@ -341,7 +342,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto } func (i *IngestionService) ProcessVcf( - gzippedFilePath string, drsFileId string, dataset string, + gzippedFilePath string, drsFileId string, dataset uuid.UUID, assemblyId constants.AssemblyId, filterOutReferences bool, lineProcessingConcurrencyLevel int) { From df72857a7e7064a3f74000dab9799d60112d32fe Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 16:16:54 -0400 Subject: [PATCH 13/84] patch: fix auto-merge errors --- src/api/contexts/contexts.go | 8 +------- src/api/mvc/main.go | 2 ++ src/api/mvc/variants/main.go | 5 +---- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index 3933b428..b1c36f38 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -21,13 +21,6 @@ type ( Config *models.Config IngestionService *services.IngestionService VariantService *variantsService.VariantService - QueryParameters - } - - // Convenient storage for relevant http context data - QueryParameters struct { - AssemblyId constants.AssemblyId - Dataset uuid.UUID } // Convenient storage for relevant http context data @@ -37,6 +30,7 @@ type ( Chromosome string Genotype constants.GenotypeQuery SampleIds []string + Dataset uuid.UUID PositionBounds } diff --git a/src/api/mvc/main.go b/src/api/mvc/main.go index 7694a572..635cc5f4 100644 --- a/src/api/mvc/main.go +++ b/src/api/mvc/main.go @@ -3,6 +3,8 @@ package mvc import ( "gohan/api/contexts" "gohan/api/models/constants" + a "gohan/api/models/constants/assembly-id" + gq "gohan/api/models/constants/genotype-query" "strings" "github.com/elastic/go-elasticsearch/v7" diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 0f308082..53cd9cba 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -109,6 +109,7 @@ func VariantsIngest(c echo.Context) error { // query parameters assemblyId := gc.AssemblyId + dataset := gc.Dataset // retrieve query parameters (comman separated) var fileNames []string @@ -206,10 +207,6 @@ func VariantsIngest(c echo.Context) error { // ----- } - // -- from query params - assemblyId := gc.AssemblyId - dataset := gc.Dataset - // -- optional filter var ( filterOutReferences bool = false // default From 32930bcfa630da7dd059aa0ea62c039d5d5ae1dc Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 16:23:30 -0400 Subject: [PATCH 14/84] patch: readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dc60ffe3..91fdecbf 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ # move vcf.gz files to `$GOHAN_API_VCF_PATH` # ingest vcf.gz - curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true + curl -k https://gohan.local/variants/ingestion/run\?fileNames=\&assemblyId=GRCh37\&filterOutReferences=true\&dataset=00000000-0000-0000-0000-000000000000 # monitor progress: curl -k https://gohan.local/variants/ingestion/requests From c0a102b9c40debeae69243b25f8d8f45572fa46c Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 16:08:22 -0400 Subject: [PATCH 15/84] chore: simple service-info unit test --- src/api/go.mod | 12 ++-- src/api/go.sum | 24 ++++---- src/api/tests/unit/api/service_info_test.go | 66 +++++++++++++++++++++ 3 files changed, 86 insertions(+), 16 deletions(-) create mode 100644 src/api/tests/unit/api/service_info_test.go diff --git a/src/api/go.mod b/src/api/go.mod index 5129978c..5b4e8490 100644 --- a/src/api/go.mod +++ b/src/api/go.mod @@ -21,16 +21,16 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect github.com/labstack/gommon v0.4.0 // indirect - github.com/mattn/go-colorable v0.1.11 // indirect - github.com/mattn/go-isatty v0.0.14 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.17 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect - golang.org/x/crypto v0.4.0 // indirect - golang.org/x/net v0.3.0 // indirect + golang.org/x/crypto v0.6.0 // indirect + golang.org/x/net v0.7.0 // indirect golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect - golang.org/x/sys v0.3.0 // indirect - golang.org/x/text v0.5.0 // indirect + golang.org/x/sys v0.5.0 // indirect + golang.org/x/text v0.7.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/src/api/go.sum b/src/api/go.sum index f00ea740..b417224c 100644 --- a/src/api/go.sum +++ b/src/api/go.sum @@ -23,10 +23,13 @@ github.com/labstack/echo v3.3.10+incompatible h1:pGRcYk231ExFAyoAjAfD85kQzRJCRI8 github.com/labstack/echo v3.3.10+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s= github.com/labstack/gommon v0.4.0 h1:y7cvthEAEbU0yHOf4axH8ZG2NH8knB9iNSoTO8dyIk8= github.com/labstack/gommon v0.4.0/go.mod h1:uW6kP17uPlLJsD3ijUYn3/M5bAxtlZhMI6m3MFxTMTM= -github.com/mattn/go-colorable v0.1.11 h1:nQ+aFkoE2TMGc0b68U2OKSexC+eq46+XwZzWXHRmPYs= github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= -github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -46,19 +49,20 @@ github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyC github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo= github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= -golang.org/x/crypto v0.4.0 h1:UVQgzMY87xqpKNgb+kDsll2Igd33HszWHFLmpaRMq/8= -golang.org/x/crypto v0.4.0/go.mod h1:3quD/ATkf6oY+rnes5c3ExXTbLc8mueNue5/DoinL80= -golang.org/x/net v0.3.0 h1:VWL6FNY2bEEmsGVKabSlHu5Irp34xmMRoqb/9lF9lxk= -golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= +golang.org/x/crypto v0.6.0 h1:qfktjS5LUO+fFKeJXZ+ikTRijMmljikvG68fpMMruSc= +golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= -golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= -golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/src/api/tests/unit/api/service_info_test.go b/src/api/tests/unit/api/service_info_test.go new file mode 100644 index 00000000..1effdd81 --- /dev/null +++ b/src/api/tests/unit/api/service_info_test.go @@ -0,0 +1,66 @@ +package api + +import ( + "encoding/json" + "gohan/api/contexts" + serviceInfo "gohan/api/models/constants/service-info" + serviceInfoMvc "gohan/api/mvc/service-info" + "gohan/api/tests/common" + "io" + + "net/http" + "net/http/httptest" + "testing" + + "github.com/labstack/echo" + "github.com/stretchr/testify/assert" +) + +func TestGetServiceInfo(t *testing.T) { + cfg := common.InitConfig() + + setUpEcho := func(method string, path string) (*contexts.GohanContext, *httptest.ResponseRecorder) { + e := echo.New() + req := httptest.NewRequest(method, path, nil) + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + gc := &contexts.GohanContext{ + Context: c, + Es7Client: nil, // todo mockup + Config: cfg, + IngestionService: nil, + VariantService: nil, + } + return gc, rec + } + + getJsonBody := func(rec *httptest.ResponseRecorder) map[string]interface{} { + // - extract body bytes from response + body, _ := io.ReadAll(rec.Body) + // - unmarshal or decode the JSON to a declared empty interface. + var bodyJson map[string]interface{} + json.Unmarshal(body, &bodyJson) + + return bodyJson + } + t.Run("should return 200 status ok", func(t *testing.T) { + //set up + gc, rec := setUpEcho(http.MethodGet, "/service-info") + + // perform + serviceInfoMvc.GetServiceInfo(gc) + + // verify response status + assert.Equal(t, http.StatusOK, rec.Code) + + // verify body + json := getJsonBody(rec) + + // - detailed + assert.Equal(t, json["bento"].(map[string]interface{})["dataService"].(bool), true) + + assert.Equal(t, json["id"].(string), string(serviceInfo.SERVICE_ID)) + assert.Equal(t, json["name"].(string), string(serviceInfo.SERVICE_NAME)) + assert.Equal(t, json["description"].(string), string(serviceInfo.SERVICE_DESCRIPTION)) + }) +} From d319318f841d12abc993739462202695e763508d Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 19:53:58 -0400 Subject: [PATCH 16/84] patch: api internal port env --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index f1651b77..7dedd6aa 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -58,7 +58,7 @@ services: - GOHAN_API_BULK_INDEXING_CAP=${GOHAN_API_BULK_INDEXING_CAP} - GOHAN_API_FILE_PROC_CONC_LVL=${GOHAN_API_FILE_PROC_CONC_LVL} - GOHAN_API_LINE_PROC_CONC_LVL=${GOHAN_API_LINE_PROC_CONC_LVL} - + - GOHAN_API_INTERNAL_PORT # Elasticsearch - GOHAN_ES_URL=${GOHAN_PRIVATE_ES_URL} From bb9c2daba22d4c9ac4b245fa280a4a06c0b96762 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 20:21:31 -0400 Subject: [PATCH 17/84] chore: begin api test ci --- .github/workflows/api.test.yml | 40 ++++++++++++++++++++++++++++++++++ Makefile | 26 +++++++++++++++++++++- docker-compose.test.yaml | 21 ++++++++++++++++++ etc/test.yml.tpl | 22 +++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/api.test.yml create mode 100644 docker-compose.test.yaml create mode 100644 etc/test.yml.tpl diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml new file mode 100644 index 00000000..dddab926 --- /dev/null +++ b/.github/workflows/api.test.yml @@ -0,0 +1,40 @@ +name: Build and push gohan-api + +on: + push: + branches: + - "*" # TEMP + +jobs: + build-push: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Load environment variables from .env file + uses: xom9ikk/dotenv@v2 + + # - name: Run Bento build action + # uses: bento-platform/bento_build_action@v0.11.0 + # with: + # context: "{{defaultContext}}:src/api" + # build-args: | + # BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} + # BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} + # registry: ghcr.io + # registry-username: ${{ github.actor }} + # registry-password: ${{ secrets.GITHUB_TOKEN }} + # image-name: ghcr.io/bento-platform/gohan-api + # development-dockerfile: Dockerfile + # dockerfile: Dockerfile + - name: Test + run: | + make test-api + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 \ No newline at end of file diff --git a/Makefile b/Makefile index d1ef115a..5086aea4 100644 --- a/Makefile +++ b/Makefile @@ -196,17 +196,41 @@ clean-api-drs-bridge-data: ## Tests -test-api-dev: prepare-test-config +test-api: prepare-test-config + # # @# Run the tests directly from the api source directory + # # cd src/api && \ + # # go clean -cache && \ + # # go test ./tests/unit/... -v + docker compose -f docker-compose.test.yaml down + docker compose -f docker-compose.test.yaml up -d + # ... + cd src/api && \ + go clean -cache && \ + go test ./tests/integration/api/api_variant_test.go -v && \ + cd ../.. + docker compose -f docker-compose.test.yaml stop + #docker logs gohan-api + + # go test ./tests/unit/... -v && \ + +test-api-dev: prepare-dev-config @# Run the tests cd src/api && \ go clean -cache && \ go test ./tests/integration/... -v + prepare-test-config: + @# Prepare environment variables dynamically via a JSON file + @# since xUnit doens't support loading env variables natively + envsubst < ./etc/test.yml.tpl > ./src/api/tests/common/test.config.yml + +prepare-dev-config: @# Prepare environment variables dynamically via a JSON file @# since xUnit doens't support loading env variables natively envsubst < ./etc/test.config.yml.tpl > ./src/api/tests/common/test.config.yml + clean-tests: @# Clean up rm ./src/api/tests/common/test.config.yml \ No newline at end of file diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml new file mode 100644 index 00000000..a7a087f9 --- /dev/null +++ b/docker-compose.test.yaml @@ -0,0 +1,21 @@ +# # Test-only (!) compose file for the Gohan + +networks: + bridge-net: + external: true + +services: + api: + ports: + - "${GOHAN_API_EXTERNAL_PORT}:${GOHAN_API_INTERNAL_PORT}" + extends: + file: docker-compose.yaml + service: api + + + elasticsearch: + ports: + - ${GOHAN_ES_EXTERNAL_PORT_1}:${GOHAN_ES_INTERNAL_PORT_1} + extends: + file: docker-compose.yaml + service: elasticsearch diff --git a/etc/test.yml.tpl b/etc/test.yml.tpl new file mode 100644 index 00000000..94a0fdfb --- /dev/null +++ b/etc/test.yml.tpl @@ -0,0 +1,22 @@ +debug: true +api: + url: http://0.0.0.0:5000 + port: ${GOHAN_API_INTERNAL_PORT} + vcfPath: "${GOHAN_API_CONTAINERIZED_VCF_PATH}" + localVcfPath: "${GOHAN_API_VCF_PATH}" + +elasticsearch: + url: elasticsearch:${GOHAN_API_ES_PORT} + username: "${GOHAN_ES_USERNAME}" + password: "${GOHAN_ES_PASSWORD}" + +drs: + url: drs:${GOHAN_DRS_INTERNAL_PORT} + username: "${GOHAN_DRS_BASIC_AUTH_USERNAME}" + password: "${GOHAN_DRS_BASIC_AUTH_PASSWORD}" + +authX: + isAuthorizationEnabled: false + oidcPublicJwksUrl: "" + opaUrl: "" + requiredHeadersCommaSep: "" \ No newline at end of file From 434fd8d12dea9f640ba9742162c69c9786a50fe6 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 20:22:51 -0400 Subject: [PATCH 18/84] patch: typo --- .github/workflows/api.test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index dddab926..9226eadf 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -1,4 +1,4 @@ -name: Build and push gohan-api +name: Test gohan-api on: push: From 6c5adec62cbecc2fec1ee5cd6c891cffb7139466 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 20:26:46 -0400 Subject: [PATCH 19/84] patch: trigger ci --- .github/workflows/api.test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index 9226eadf..22b455eb 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -3,7 +3,10 @@ name: Test gohan-api on: push: branches: - - "*" # TEMP + - "**" # TEMP + pull_request: + branches: + - "features/**" # temp jobs: build-push: From 584bde9be7a76cf6cf18265f79f8d522e458a521 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 20:29:28 -0400 Subject: [PATCH 20/84] patch: test includes env --- .github/workflows/api.test.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index 22b455eb..3ce3f857 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -19,7 +19,13 @@ jobs: steps: - name: Checkout uses: actions/checkout@v3 - + + - name: Gather default environment variables + id: envx + run: | + cp ./etc/example.env .env + source .env + - name: Load environment variables from .env file uses: xom9ikk/dotenv@v2 From 6882d1626e5f6b4e5b6d87bc879cd5f2dfb00f31 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 20:33:37 -0400 Subject: [PATCH 21/84] patch: include init in test-api --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5086aea4..df8b94c2 100644 --- a/Makefile +++ b/Makefile @@ -196,11 +196,12 @@ clean-api-drs-bridge-data: ## Tests -test-api: prepare-test-config +test-api: init prepare-test-config # # @# Run the tests directly from the api source directory # # cd src/api && \ # # go clean -cache && \ # # go test ./tests/unit/... -v + docker compose -f docker-compose.test.yaml down docker compose -f docker-compose.test.yaml up -d # ... From c4fd72f7815e21f491652f1a238939939515e3d5 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 21:12:08 -0400 Subject: [PATCH 22/84] patch: lowering default es resources --- etc/example.env | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/etc/example.env b/etc/example.env index 6a11c3fc..6b4d4e1f 100644 --- a/etc/example.env +++ b/etc/example.env @@ -85,8 +85,10 @@ GOHAN_ES_EXTERNAL_PORT_2=9300 GOHAN_ES_INTERNAL_PORT_2=9300 GOHAN_ES_DISC_TYP=single-node -GOHAN_ES_MEM_LIM=14G -GOHAN_ES_CPUS=6 +GOHAN_ES_MEM_LIM=4G +GOHAN_ES_CPUS=2 +#GOHAN_ES_MEM_LIM=14G +#GOHAN_ES_CPUS=6 # allocate a little bit less memory than what is stated in `mem_limit` ^^ GOHAN_ES_JAVA_OPTS="ES_JAVA_OPTS=-Xms13g -Xmx13g" From 9df8332a2dd81528bc247c2d6ec3c3e507188f7d Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 21:12:41 -0400 Subject: [PATCH 23/84] patch: rename test job --- .github/workflows/api.test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index 3ce3f857..2b5f82a9 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -9,7 +9,7 @@ on: - "features/**" # temp jobs: - build-push: + build-test: runs-on: ubuntu-latest permissions: @@ -25,7 +25,7 @@ jobs: run: | cp ./etc/example.env .env source .env - + - name: Load environment variables from .env file uses: xom9ikk/dotenv@v2 From 60f665d384d32a1978c48a7560594f59d2e59e23 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 21:20:30 -0400 Subject: [PATCH 24/84] patch: unit tests, debug false, logs --- Makefile | 9 ++-- etc/test.yml.tpl | 2 +- src/api/tests/unit/api/variants_test.go | 66 +++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 src/api/tests/unit/api/variants_test.go diff --git a/Makefile b/Makefile index df8b94c2..bba41010 100644 --- a/Makefile +++ b/Makefile @@ -204,15 +204,18 @@ test-api: init prepare-test-config docker compose -f docker-compose.test.yaml down docker compose -f docker-compose.test.yaml up -d - # ... + cd src/api && \ go clean -cache && \ + go test ./tests/unit/... -v && \ go test ./tests/integration/api/api_variant_test.go -v && \ cd ../.. + docker compose -f docker-compose.test.yaml stop - #docker logs gohan-api + + docker logs gohan-api | tail -n 50 + docker logs elasticsearch | tail -n 50 - # go test ./tests/unit/... -v && \ test-api-dev: prepare-dev-config @# Run the tests diff --git a/etc/test.yml.tpl b/etc/test.yml.tpl index 94a0fdfb..7a9b49b3 100644 --- a/etc/test.yml.tpl +++ b/etc/test.yml.tpl @@ -1,4 +1,4 @@ -debug: true +debug: false api: url: http://0.0.0.0:5000 port: ${GOHAN_API_INTERNAL_PORT} diff --git a/src/api/tests/unit/api/variants_test.go b/src/api/tests/unit/api/variants_test.go new file mode 100644 index 00000000..2faba3f9 --- /dev/null +++ b/src/api/tests/unit/api/variants_test.go @@ -0,0 +1,66 @@ +package api + +import ( + "encoding/json" + "gohan/api/contexts" + serviceInfo "gohan/api/models/constants/service-info" + serviceInfoMvc "gohan/api/mvc/service-info" + "gohan/api/tests/common" + "io" + + "net/http" + "net/http/httptest" + "testing" + + "github.com/labstack/echo" + "github.com/stretchr/testify/assert" +) + +func TestGetVariantsOverview(t *testing.T) { + cfg := common.InitConfig() + + setUpEcho := func(method string, path string) (*contexts.GohanContext, *httptest.ResponseRecorder) { + e := echo.New() + req := httptest.NewRequest(method, path, nil) + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + gc := &contexts.GohanContext{ + Context: c, + Es7Client: nil, // todo mockup + Config: cfg, + IngestionService: nil, + VariantService: nil, + } + return gc, rec + } + + getJsonBody := func(rec *httptest.ResponseRecorder) map[string]interface{} { + // - extract body bytes from response + body, _ := io.ReadAll(rec.Body) + // - unmarshal or decode the JSON to a declared empty interface. + var bodyJson map[string]interface{} + json.Unmarshal(body, &bodyJson) + + return bodyJson + } + t.Run("should return 200 status ok and internal error", func(t *testing.T) { + //set up + gc, rec := setUpEcho(http.MethodGet, "/variants/overview") + + // perform + serviceInfoMvc.GetServiceInfo(gc) + + // verify response status + assert.Equal(t, http.StatusOK, rec.Code) + + // verify body + json := getJsonBody(rec) + + // - detailed + assert.Equal(t, json["bento"].(map[string]interface{})["dataService"].(bool), true) + + assert.Equal(t, json["id"].(string), string(serviceInfo.SERVICE_ID)) + assert.Equal(t, json["name"].(string), string(serviceInfo.SERVICE_NAME)) + assert.Equal(t, json["description"].(string), string(serviceInfo.SERVICE_DESCRIPTION)) + }) +} From dd7fa0065c200d472bb78ea207f3caf5d08e1ddb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Tue, 13 Jun 2023 21:36:05 -0400 Subject: [PATCH 25/84] patch remove codecov --- .github/workflows/api.test.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index 2b5f82a9..62f2e837 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -44,6 +44,4 @@ jobs: # dockerfile: Dockerfile - name: Test run: | - make test-api - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 \ No newline at end of file + make test-api \ No newline at end of file From e8b5ef115e9ddc9c23a1657941929b073801be9c Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 14 Jun 2023 00:48:00 -0400 Subject: [PATCH 26/84] chore: include test drs --- docker-compose.test.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docker-compose.test.yaml b/docker-compose.test.yaml index a7a087f9..1ffc48e2 100644 --- a/docker-compose.test.yaml +++ b/docker-compose.test.yaml @@ -19,3 +19,10 @@ services: extends: file: docker-compose.yaml service: elasticsearch + + drs: + ports: + - "${GOHAN_DRS_EXTERNAL_PORT}:${GOHAN_DRS_INTERNAL_PORT}" + extends: + file: docker-compose.yaml + service: drs \ No newline at end of file From 617048101bfe4e0b87e9cc647ef9134fb611a8c9 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 14 Jun 2023 00:48:33 -0400 Subject: [PATCH 27/84] chore: build tests - beginning with demo ingestion --- Makefile | 3 +- etc/test.yml.tpl | 2 +- src/api/models/ingest/ingest.go | 11 + src/api/tests/build/api/variants_test.go | 209 ++++++++++++++++++ src/api/tests/common/common.go | 51 +++++ .../tests/integration/api/api_variant_test.go | 55 +---- 6 files changed, 277 insertions(+), 54 deletions(-) create mode 100644 src/api/tests/build/api/variants_test.go diff --git a/Makefile b/Makefile index bba41010..367ffde3 100644 --- a/Makefile +++ b/Makefile @@ -207,8 +207,7 @@ test-api: init prepare-test-config cd src/api && \ go clean -cache && \ - go test ./tests/unit/... -v && \ - go test ./tests/integration/api/api_variant_test.go -v && \ + go test ./tests/build/... -v && \ cd ../.. docker compose -f docker-compose.test.yaml stop diff --git a/etc/test.yml.tpl b/etc/test.yml.tpl index 7a9b49b3..f2b86ce4 100644 --- a/etc/test.yml.tpl +++ b/etc/test.yml.tpl @@ -11,7 +11,7 @@ elasticsearch: password: "${GOHAN_ES_PASSWORD}" drs: - url: drs:${GOHAN_DRS_INTERNAL_PORT} + url: gohan-drs:${GOHAN_DRS_INTERNAL_PORT} username: "${GOHAN_DRS_BASIC_AUTH_USERNAME}" password: "${GOHAN_DRS_BASIC_AUTH_PASSWORD}" diff --git a/src/api/models/ingest/ingest.go b/src/api/models/ingest/ingest.go index bc419a42..e3142c79 100644 --- a/src/api/models/ingest/ingest.go +++ b/src/api/models/ingest/ingest.go @@ -37,3 +37,14 @@ type IngestResponseDTO struct { State State `json:"state"` Message string `json:"message"` } + +type IngestStatsDto struct { + NumAdded int + NumFlushed int + NumFailed int + NumIndexed int + NumCreated int + NumUpdated int + NumDeleted int + NumRequests int +} diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go new file mode 100644 index 00000000..37c49641 --- /dev/null +++ b/src/api/tests/build/api/variants_test.go @@ -0,0 +1,209 @@ +package api + +import ( + "encoding/json" + "fmt" + ingest "gohan/api/models/ingest" + common "gohan/api/tests/common" + "io/ioutil" + "log" + "net/http" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" +) + +const ( + VariantsOverviewPath string = "%s/variants/overview" + VariantsGetBySampleIdsPathWithQueryString string = "%s/variants/get/by/sampleId%s" + IngestionRequestsPath string = "%s/variants/ingestion/requests" +) + +func TestIngest(t *testing.T) { + cfg := common.InitConfig() + + // create demo vcf string + sampleId := "abc1234" + vcfString := `##fileformat=VCFv4.2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S-1178-HAP +1 13656 . CAG C, 868.60 . BaseQRankSum=-5.505;DP=81;ExcessHet=3.0103;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=-2.985;RAW_MQandDP=43993,81;ReadPosRankSum=-0.136 GT:AD:DP:GQ:PL:SB 0:50,25,0:75:99:876,0,2024,1026,2099,3126:4,46,5,20 +10 28872481 . CAAAA C,CA,CAAA,CAAAAA,CAAAAAA, 652.60 . BaseQRankSum=0.029;DP=83;ExcessHet=3.0103;MLEAC=0,0,0,1,0,0;MLEAF=0.00,0.00,0.00,0.500,0.00,0.00;MQRankSum=-0.186;RAW_MQandDP=291409,83;ReadPosRankSum=-0.582 GT:AD:DP:GQ:PL:SB 0:19,3,2,5,29,9,0:67:99:660,739,2827,748,2714,2732,724,1672,1682,1587,0,340,338,249,265,321,956,929,699,245,898,866,1996,1991,1652,466,1006,1944:0,19,0,48 +19 3619025 . C . . END=3619025 GT:DP:GQ:MIN_DP:PL 0:19:21:19:0,21,660 +19 3619026 . T . . END=3619026 GT:DP:GQ:MIN_DP:PL 0:19:51:19:0,51,765` + + // - save string to vcf directory + localDataRootPath := getRootGohanPath() + "/data" + localVcfPath := localDataRootPath + "/vcfs" + + newFilePath := fmt.Sprintf("%s/%s.vcf", localVcfPath, sampleId) + + // - create file if not exists + var ( + file *os.File + err error + ) + + file, err = os.Create(newFilePath) + if isError(err) { + return + } + defer file.Close() + + // - reopen file using READ & WRITE permission. + file, err = os.OpenFile(newFilePath, os.O_RDWR, 0644) + if isError(err) { + return + } + defer file.Close() + + // - write some vcf string to file. + _, err = file.WriteString(vcfString) + if isError(err) { + return + } + defer func() { os.Remove(newFilePath) }() + + // compress the vcf file with bgzip + out, err := exec.Command("bgzip", newFilePath).Output() + if err != nil { + fmt.Println(err.Error()) + return + } + fmt.Println(string(out)) + + newGzFile := newFilePath + ".gz" + defer func() { os.Remove(newGzFile) }() + + // - ingest + assemblyId := "GRCh38" + tableId := uuid.NewString() + containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) + // containerizedVcfFilePath := "/data/S-1178.hc.g.vcf.gz" + + client := &http.Client{} + + queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&tableId=%s", assemblyId, containerizedVcfFilePath, tableId) + ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) + + ingestRequest, _ := http.NewRequest("GET", ingestUrl, nil) + + response, responseErr := client.Do(ingestRequest) + assert.Nil(t, responseErr) + defer response.Body.Close() + assert.Equal(t, 200, response.StatusCode, response.Status) + + fmt.Println(response.Body) + + var dtos = getDtosFromBody(response) + + // check ingestion request + for { + fmt.Println("Checking state of the ingestion..") + + // make the call + ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) + + ingReqsRequest, _ := http.NewRequest("GET", ingReqsUrl, nil) + response, responseErr := client.Do(ingReqsRequest) + assert.Nil(t, responseErr) + defer response.Body.Close() + assert.Equal(t, 200, response.StatusCode, response.Status) + + dtos = getDtosFromBody(response) + + foundDone := false + for _, dto := range dtos { + if dto.Filename == filepath.Base(containerizedVcfFilePath) && dto.State == "Done" { + foundDone = true + break + } + } + if foundDone { + fmt.Println("Done, moving on..") + break + } else { + // pause + time.Sleep(3 * time.Second) + } + } + + // check ingestion stats + for { + fmt.Println("Checking ingestion stats..") + // pause + time.Sleep(3 * time.Second) + + // make the call + ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/stats", cfg.Api.Url) + + statsRequest, _ := http.NewRequest("GET", ingReqsUrl, nil) + response, responseErr := client.Do(statsRequest) + assert.Nil(t, responseErr) + defer response.Body.Close() + assert.Equal(t, 200, response.StatusCode, response.Status) + + body, readErr := ioutil.ReadAll(response.Body) + if readErr != nil { + log.Fatal(readErr) + } + + stats := ingest.IngestStatsDto{} + jsonErr := json.Unmarshal(body, &stats) + if jsonErr != nil { + log.Fatal(jsonErr) + } + + fmt.Println(stats.NumAdded) + fmt.Println(stats.NumFlushed) + if stats.NumAdded == stats.NumFlushed { + fmt.Println("Done, moving on..") + break + } else { + // pause + time.Sleep(3 * time.Second) + } + } + + // todo: verify demo vcf was properly ingested + // by pinging it with specific queries + overviewJson := common.GetVariantsOverview(t, cfg) + assert.NotNil(t, overviewJson) +} + +func getDtosFromBody(response *http.Response) []ingest.IngestResponseDTO { + var dtos []ingest.IngestResponseDTO + if jsonErr := json.NewDecoder(response.Body).Decode(&dtos); jsonErr != nil { + fmt.Println(jsonErr) + } + + return dtos +} + +func isError(err error) bool { + if err != nil { + fmt.Println(err.Error()) + } + + return (err != nil) +} + +func getRootGohanPath() string { + // check if file exists + wd, err1 := os.Getwd() + if err1 != nil { + log.Println(err1) + } + fmt.Println(wd) // for example /home/user + + path := filepath.Dir(wd) + for i := 1; i < 5; i++ { + path = filepath.Dir(path) + } + + return path +} diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index 3adb398e..93ac6689 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -2,16 +2,26 @@ package common import ( "crypto/tls" + "encoding/json" "fmt" "gohan/api/models" + "io/ioutil" "net/http" "os" "path" "runtime" + "testing" + "github.com/stretchr/testify/assert" yaml "gopkg.in/yaml.v2" ) +const ( + VariantsOverviewPath string = "%s/variants/overview" + VariantsGetBySampleIdsPathWithQueryString string = "%s/variants/get/by/sampleId%s" + IngestionRequestsPath string = "%s/variants/ingestion/requests" +) + func InitConfig() *models.Config { var cfg models.Config @@ -43,3 +53,44 @@ func processError(err error) { fmt.Println(err) os.Exit(2) } + +func GetVariantsOverview(_t *testing.T, _cfg *models.Config) map[string]interface{} { + request, _ := http.NewRequest("GET", fmt.Sprintf(VariantsOverviewPath, _cfg.Api.Url), nil) + + client := &http.Client{} + response, responseErr := client.Do(request) + assert.Nil(_t, responseErr) + + defer response.Body.Close() + + // this test (at the time of writing) will only work if authorization is disabled + shouldBe := 200 + assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET / Status: %s ; Should be %d", response.Status, shouldBe)) + + // -- interpret array of ingestion requests from response + overviewRespBody, overviewRespBodyErr := ioutil.ReadAll(response.Body) + assert.Nil(_t, overviewRespBodyErr) + + // --- transform body bytes to string + overviewRespBodyString := string(overviewRespBody) + + // -- check for json error + var overviewRespJson map[string]interface{} + overviewJsonUnmarshallingError := json.Unmarshal([]byte(overviewRespBodyString), &overviewRespJson) + assert.Nil(_t, overviewJsonUnmarshallingError) + + // -- insure it's an empty array + chromosomesKey, ckOk := overviewRespJson["chromosomes"] + assert.True(_t, ckOk) + assert.NotNil(_t, chromosomesKey) + + variantIDsKey, vidkOk := overviewRespJson["variantIDs"] + assert.True(_t, vidkOk) + assert.NotNil(_t, variantIDsKey) + + sampleIDsKey, sidkOk := overviewRespJson["sampleIDs"] + assert.True(_t, sidkOk) + assert.NotNil(_t, sampleIDsKey) + + return overviewRespJson +} diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index fc784b28..0ae523e6 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -27,12 +27,6 @@ import ( "github.com/stretchr/testify/assert" ) -const ( - VariantsOverviewPath string = "%s/variants/overview" - VariantsGetBySampleIdsPathWithQueryString string = "%s/variants/get/by/sampleId%s" - IngestionRequestsPath string = "%s/variants/ingestion/requests" -) - func TestWithInvalidAuthenticationToken(t *testing.T) { cfg := common.InitConfig() @@ -60,14 +54,14 @@ func TestWithInvalidAuthenticationToken(t *testing.T) { func TestVariantsOverview(t *testing.T) { cfg := common.InitConfig() - overviewJson := getVariantsOverview(t, cfg) + overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) } func TestGetIngestionRequests(t *testing.T) { cfg := common.InitConfig() - request, _ := http.NewRequest("GET", fmt.Sprintf(IngestionRequestsPath, cfg.Api.Url), nil) + request, _ := http.NewRequest("GET", fmt.Sprintf(common.IngestionRequestsPath, cfg.Api.Url), nil) client := &http.Client{} response, responseErr := client.Do(request) @@ -657,52 +651,11 @@ func buildQueryAndMakeGetVariantsCall( if commaDeliminatedAlleles != "" { queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&alleles=%s", commaDeliminatedAlleles)) } - url := fmt.Sprintf(VariantsGetBySampleIdsPathWithQueryString, _cfg.Api.Url, queryString) + url := fmt.Sprintf(common.VariantsGetBySampleIdsPathWithQueryString, _cfg.Api.Url, queryString) return makeGetVariantsCall(url, ignoreStatusCode, _t) } -func getVariantsOverview(_t *testing.T, _cfg *models.Config) map[string]interface{} { - request, _ := http.NewRequest("GET", fmt.Sprintf(VariantsOverviewPath, _cfg.Api.Url), nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET / Status: %s ; Should be %d", response.Status, shouldBe)) - - // -- interpret array of ingestion requests from response - overviewRespBody, overviewRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, overviewRespBodyErr) - - // --- transform body bytes to string - overviewRespBodyString := string(overviewRespBody) - - // -- check for json error - var overviewRespJson map[string]interface{} - overviewJsonUnmarshallingError := json.Unmarshal([]byte(overviewRespBodyString), &overviewRespJson) - assert.Nil(_t, overviewJsonUnmarshallingError) - - // -- insure it's an empty array - chromosomesKey, ckOk := overviewRespJson["chromosomes"] - assert.True(_t, ckOk) - assert.NotNil(_t, chromosomesKey) - - variantIDsKey, vidkOk := overviewRespJson["variantIDs"] - assert.True(_t, vidkOk) - assert.NotNil(_t, variantIDsKey) - - sampleIDsKey, sidkOk := overviewRespJson["sampleIDs"] - assert.True(_t, sidkOk) - assert.NotNil(_t, sampleIDsKey) - - return overviewRespJson -} - func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct interface{}, assemblyIdsStruct interface{}) [][]string { var allCombinations = [][]string{} @@ -721,7 +674,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc cfg := common.InitConfig() // retrieve the overview - overviewJson := getVariantsOverview(_t, cfg) + overviewJson := common.GetVariantsOverview(_t, cfg) // ensure the response is valid // TODO: error check instead of nil check From bc6c80ac220cb5e5bda24a1a0cc0d014e077b4fa Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 14 Jun 2023 00:52:48 -0400 Subject: [PATCH 28/84] patch: install tabix on test host --- .github/workflows/api.test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index 62f2e837..719bc356 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -44,4 +44,5 @@ jobs: # dockerfile: Dockerfile - name: Test run: | + apt-get install -y tabix make test-api \ No newline at end of file From f526e38164e2d339a8370d39f802420e110c2ab0 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 14 Jun 2023 00:53:36 -0400 Subject: [PATCH 29/84] patch: sudo --- .github/workflows/api.test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index 719bc356..f958bb79 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -42,7 +42,7 @@ jobs: # image-name: ghcr.io/bento-platform/gohan-api # development-dockerfile: Dockerfile # dockerfile: Dockerfile - - name: Test + - name: API Test run: | - apt-get install -y tabix + sudo apt-get install -y tabix make test-api \ No newline at end of file From e5f933ec5110aeb4cc7693b5ef89c6d4d93826d3 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 14 Jun 2023 00:58:14 -0400 Subject: [PATCH 30/84] patch: ingestion test error handling --- src/api/tests/build/api/variants_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 37c49641..d5cff9c0 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -122,6 +122,9 @@ func TestIngest(t *testing.T) { foundDone = true break } + if dto.Filename == filepath.Base(containerizedVcfFilePath) && dto.State == "Error" { + log.Fatal(dto.Message) + } } if foundDone { fmt.Println("Done, moving on..") From 153b7bc8eb7fd193e847ef7d99d789f81d795bbb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Wed, 14 Jun 2023 01:04:49 -0400 Subject: [PATCH 31/84] patch: - temp file ownership setting, testing only - check for failed flushed files --- Makefile | 10 +++++----- src/api/tests/build/api/variants_test.go | 9 ++++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 367ffde3..de2a3112 100644 --- a/Makefile +++ b/Makefile @@ -51,27 +51,27 @@ init-vendor: init-data-dirs: mkdir -p ${GOHAN_API_DRS_BRIDGE_HOST_DIR} chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_API_DRS_BRIDGE_HOST_DIR} - chmod -R 770 ${GOHAN_API_DRS_BRIDGE_HOST_DIR} + chmod -R 777 ${GOHAN_API_DRS_BRIDGE_HOST_DIR} mkdir -p ${GOHAN_DRS_DATA_DIR} chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_DRS_DATA_DIR} - chmod -R 770 ${GOHAN_DRS_DATA_DIR} + chmod -R 777 ${GOHAN_DRS_DATA_DIR} mkdir -p ${GOHAN_ES_DATA_DIR} chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_ES_DATA_DIR} - chmod -R 770 ${GOHAN_ES_DATA_DIR} + chmod -R 777 ${GOHAN_ES_DATA_DIR} @# tmp: @# (setup for when gohan needs to preprocess vcf's at ingestion time): mkdir -p ${GOHAN_API_VCF_PATH} mkdir -p ${GOHAN_API_VCF_PATH}/tmp chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_API_VCF_PATH} - chmod -R 770 ${GOHAN_API_VCF_PATH}/tmp + chmod -R 777 ${GOHAN_API_VCF_PATH}/tmp mkdir -p ${GOHAN_API_GTF_PATH} mkdir -p ${GOHAN_API_GTF_PATH}/tmp chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_API_GTF_PATH} - chmod -R 770 ${GOHAN_API_GTF_PATH}/tmp + chmod -R 777 ${GOHAN_API_GTF_PATH}/tmp @echo ".. done!" diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index d5cff9c0..d5eff4d6 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -166,10 +166,13 @@ func TestIngest(t *testing.T) { if stats.NumAdded == stats.NumFlushed { fmt.Println("Done, moving on..") break - } else { - // pause - time.Sleep(3 * time.Second) } + if stats.NumFailed > 0 { + log.Fatal("More than one variant failed to flush") + } + + // pause + time.Sleep(3 * time.Second) } // todo: verify demo vcf was properly ingested From f5f6485e4f01521502a39bac6c057055c9f01beb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sat, 17 Jun 2023 19:05:08 -0400 Subject: [PATCH 32/84] chore: testing drs logs on error --- Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index de2a3112..de7e6ae5 100644 --- a/Makefile +++ b/Makefile @@ -205,9 +205,15 @@ test-api: init prepare-test-config docker compose -f docker-compose.test.yaml down docker compose -f docker-compose.test.yaml up -d + testing_error_exit() + { + docker logs gohan-api | tail -n 100 + docker logs gohan-drs | tail -n 100 + exit 1 + } && \ cd src/api && \ go clean -cache && \ - go test ./tests/build/... -v && \ + (go test ./tests/build/... -v || testing_error_exit) && \ cd ../.. docker compose -f docker-compose.test.yaml stop From d397358cbaaf55fe4edfeae12361ab369aad1a9c Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sat, 17 Jun 2023 19:13:34 -0400 Subject: [PATCH 33/84] patch: rearranging try to catch test errors --- Makefile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Makefile b/Makefile index de7e6ae5..f94294d9 100644 --- a/Makefile +++ b/Makefile @@ -205,15 +205,9 @@ test-api: init prepare-test-config docker compose -f docker-compose.test.yaml down docker compose -f docker-compose.test.yaml up -d - testing_error_exit() - { - docker logs gohan-api | tail -n 100 - docker logs gohan-drs | tail -n 100 - exit 1 - } && \ cd src/api && \ go clean -cache && \ - (go test ./tests/build/... -v || testing_error_exit) && \ + (go test ./tests/build/... -v || ((docker logs gohan-api | tail -n 100) && (docker logs gohan-drs | tail -n 100) && exit 1)) && \ cd ../.. docker compose -f docker-compose.test.yaml stop From d67e7153a0cf4cc165322818563e1bc994d670ea Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sat, 17 Jun 2023 19:20:19 -0400 Subject: [PATCH 34/84] patch: init drs db and obj dirs --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index f94294d9..ec338c83 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,8 @@ init-data-dirs: chmod -R 777 ${GOHAN_API_DRS_BRIDGE_HOST_DIR} mkdir -p ${GOHAN_DRS_DATA_DIR} + mkdir -p ${GOHAN_DRS_DATA_DIR}/db + mkdir -p ${GOHAN_DRS_DATA_DIR}/obj chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_DRS_DATA_DIR} chmod -R 777 ${GOHAN_DRS_DATA_DIR} From e9740583015fe72818dba9e171828aae8c71b0b6 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sat, 17 Jun 2023 20:24:37 -0400 Subject: [PATCH 35/84] chore: refactoring test functions --- src/api/tests/build/api/variants_test.go | 244 ++++++++---------- src/api/tests/common/common.go | 64 +++++ .../tests/integration/api/api_variant_test.go | 76 +----- src/api/utils/io.go | 25 ++ 4 files changed, 200 insertions(+), 209 deletions(-) create mode 100644 src/api/utils/io.go diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index d5eff4d6..8670c5ba 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -1,13 +1,11 @@ package api import ( - "encoding/json" "fmt" ingest "gohan/api/models/ingest" common "gohan/api/tests/common" - "io/ioutil" + "gohan/api/utils" "log" - "net/http" "os" "os/exec" "path/filepath" @@ -26,168 +24,134 @@ const ( func TestIngest(t *testing.T) { cfg := common.InitConfig() + tableId := uuid.NewString() - // create demo vcf string - sampleId := "abc1234" - vcfString := `##fileformat=VCFv4.2 + assert.True(t, t.Run("Ingest Demo VCF", func(t *testing.T) { + // create demo vcf string + sampleId := "abc1234" + vcfString := `##fileformat=VCFv4.2 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S-1178-HAP 1 13656 . CAG C, 868.60 . BaseQRankSum=-5.505;DP=81;ExcessHet=3.0103;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=-2.985;RAW_MQandDP=43993,81;ReadPosRankSum=-0.136 GT:AD:DP:GQ:PL:SB 0:50,25,0:75:99:876,0,2024,1026,2099,3126:4,46,5,20 10 28872481 . CAAAA C,CA,CAAA,CAAAAA,CAAAAAA, 652.60 . BaseQRankSum=0.029;DP=83;ExcessHet=3.0103;MLEAC=0,0,0,1,0,0;MLEAF=0.00,0.00,0.00,0.500,0.00,0.00;MQRankSum=-0.186;RAW_MQandDP=291409,83;ReadPosRankSum=-0.582 GT:AD:DP:GQ:PL:SB 0:19,3,2,5,29,9,0:67:99:660,739,2827,748,2714,2732,724,1672,1682,1587,0,340,338,249,265,321,956,929,699,245,898,866,1996,1991,1652,466,1006,1944:0,19,0,48 19 3619025 . C . . END=3619025 GT:DP:GQ:MIN_DP:PL 0:19:21:19:0,21,660 19 3619026 . T . . END=3619026 GT:DP:GQ:MIN_DP:PL 0:19:51:19:0,51,765` - // - save string to vcf directory - localDataRootPath := getRootGohanPath() + "/data" - localVcfPath := localDataRootPath + "/vcfs" - - newFilePath := fmt.Sprintf("%s/%s.vcf", localVcfPath, sampleId) - - // - create file if not exists - var ( - file *os.File - err error - ) - - file, err = os.Create(newFilePath) - if isError(err) { - return - } - defer file.Close() - - // - reopen file using READ & WRITE permission. - file, err = os.OpenFile(newFilePath, os.O_RDWR, 0644) - if isError(err) { - return - } - defer file.Close() - - // - write some vcf string to file. - _, err = file.WriteString(vcfString) - if isError(err) { - return - } - defer func() { os.Remove(newFilePath) }() + // - save string to vcf directory + localDataRootPath := getRootGohanPath() + "/data" + localVcfPath := localDataRootPath + "/vcfs" - // compress the vcf file with bgzip - out, err := exec.Command("bgzip", newFilePath).Output() - if err != nil { - fmt.Println(err.Error()) - return - } - fmt.Println(string(out)) + newFilePath := fmt.Sprintf("%s/%s.vcf", localVcfPath, sampleId) - newGzFile := newFilePath + ".gz" - defer func() { os.Remove(newGzFile) }() + // - create file if not exists + var ( + file *os.File + err error + ) - // - ingest - assemblyId := "GRCh38" - tableId := uuid.NewString() - containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) - // containerizedVcfFilePath := "/data/S-1178.hc.g.vcf.gz" - - client := &http.Client{} - - queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&tableId=%s", assemblyId, containerizedVcfFilePath, tableId) - ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) - - ingestRequest, _ := http.NewRequest("GET", ingestUrl, nil) - - response, responseErr := client.Do(ingestRequest) - assert.Nil(t, responseErr) - defer response.Body.Close() - assert.Equal(t, 200, response.StatusCode, response.Status) - - fmt.Println(response.Body) - - var dtos = getDtosFromBody(response) - - // check ingestion request - for { - fmt.Println("Checking state of the ingestion..") - - // make the call - ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) + file, err = os.Create(newFilePath) + if isError(err) { + return + } + defer file.Close() - ingReqsRequest, _ := http.NewRequest("GET", ingReqsUrl, nil) - response, responseErr := client.Do(ingReqsRequest) - assert.Nil(t, responseErr) - defer response.Body.Close() - assert.Equal(t, 200, response.StatusCode, response.Status) + // - reopen file using READ & WRITE permission. + file, err = os.OpenFile(newFilePath, os.O_RDWR, 0644) + if isError(err) { + return + } + defer file.Close() - dtos = getDtosFromBody(response) + // - write some vcf string to file. + _, err = file.WriteString(vcfString) + if isError(err) { + return + } + defer func() { os.Remove(newFilePath) }() - foundDone := false - for _, dto := range dtos { - if dto.Filename == filepath.Base(containerizedVcfFilePath) && dto.State == "Done" { - foundDone = true - break + // compress the vcf file with bgzip + out, err := exec.Command("bgzip", newFilePath).Output() + if err != nil { + fmt.Println(err.Error()) + return + } + fmt.Println(string(out)) + + newGzFile := newFilePath + ".gz" + defer func() { os.Remove(newGzFile) }() + + // - ingest + assemblyId := "GRCh38" + containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) + + queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&tableId=%s", assemblyId, containerizedVcfFilePath, tableId) + ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) + + initialIngestionDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingestUrl) + assert.True(t, len(initialIngestionDtos) > 0) + + // check ingestion request + for { + fmt.Println("Checking state of the ingestion..") + + // make the call + ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) + ingReqDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingReqsUrl) + assert.True(t, len(initialIngestionDtos) > 0) + + foundDone := false + for _, dto := range ingReqDtos { + if dto.Filename == filepath.Base(containerizedVcfFilePath) && dto.State == "Done" { + foundDone = true + break + } + if dto.Filename == filepath.Base(containerizedVcfFilePath) && dto.State == "Error" { + log.Fatal(dto.Message) + } } - if dto.Filename == filepath.Base(containerizedVcfFilePath) && dto.State == "Error" { - log.Fatal(dto.Message) + if foundDone { + fmt.Println("Done, moving on..") + break + } else { + // pause + time.Sleep(3 * time.Second) } } - if foundDone { - fmt.Println("Done, moving on..") - break - } else { + + // check ingestion stats + for { + fmt.Println("Checking ingestion stats..") // pause time.Sleep(3 * time.Second) - } - } - - // check ingestion stats - for { - fmt.Println("Checking ingestion stats..") - // pause - time.Sleep(3 * time.Second) - - // make the call - ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/stats", cfg.Api.Url) - statsRequest, _ := http.NewRequest("GET", ingReqsUrl, nil) - response, responseErr := client.Do(statsRequest) - assert.Nil(t, responseErr) - defer response.Body.Close() - assert.Equal(t, 200, response.StatusCode, response.Status) + // make the call + statsReqUrl := fmt.Sprintf("%s/variants/ingestion/stats", cfg.Api.Url) + stats := utils.GetRequestReturnStuff[ingest.IngestStatsDto](statsReqUrl) + assert.NotNil(t, stats) - body, readErr := ioutil.ReadAll(response.Body) - if readErr != nil { - log.Fatal(readErr) - } - - stats := ingest.IngestStatsDto{} - jsonErr := json.Unmarshal(body, &stats) - if jsonErr != nil { - log.Fatal(jsonErr) - } + fmt.Println(stats.NumAdded) + fmt.Println(stats.NumFlushed) + if stats.NumAdded == stats.NumFlushed { + fmt.Println("Done, moving on..") + break + } + if stats.NumFailed > 0 { + log.Fatal("More than one variant failed to flush") + } - fmt.Println(stats.NumAdded) - fmt.Println(stats.NumFlushed) - if stats.NumAdded == stats.NumFlushed { - fmt.Println("Done, moving on..") - break - } - if stats.NumFailed > 0 { - log.Fatal("More than one variant failed to flush") + // pause + time.Sleep(3 * time.Second) } + })) - // pause - time.Sleep(3 * time.Second) - } - - // todo: verify demo vcf was properly ingested + // verify demo vcf was properly ingested // by pinging it with specific queries - overviewJson := common.GetVariantsOverview(t, cfg) - assert.NotNil(t, overviewJson) -} - -func getDtosFromBody(response *http.Response) []ingest.IngestResponseDTO { - var dtos []ingest.IngestResponseDTO - if jsonErr := json.NewDecoder(response.Body).Decode(&dtos); jsonErr != nil { - fmt.Println(jsonErr) - } + assert.True(t, t.Run("Check Demo VCF Ingestion", func(t *testing.T) { + overviewJson := common.GetVariantsOverview(t, cfg) + assert.NotNil(t, overviewJson) - return dtos + dtos := common.BuildQueryAndMakeGetVariantsCall("1", "*", true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(dtos.Results[0].Calls) > 0) + })) } func isError(err error) bool { diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index 93ac6689..8ca5548b 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -5,6 +5,9 @@ import ( "encoding/json" "fmt" "gohan/api/models" + c "gohan/api/models/constants" + gq "gohan/api/models/constants/genotype-query" + "gohan/api/models/dtos" "io/ioutil" "net/http" "os" @@ -94,3 +97,64 @@ func GetVariantsOverview(_t *testing.T, _cfg *models.Config) map[string]interfac return overviewRespJson } + +func BuildQueryAndMakeGetVariantsCall( + chromosome string, sampleId string, includeInfo bool, + sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, + referenceAllelePattern string, alternativeAllelePattern string, commaDeliminatedAlleles string, + ignoreStatusCode bool, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse { + + queryString := fmt.Sprintf("?ids=%s&includeInfoInResultSet=%t&sortByPosition=%s&assemblyId=%s", sampleId, includeInfo, sortByPosition, assemblyId) + + if chromosome != "" { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&chromosome=%s", chromosome)) + } + + if genotype != gq.UNCALLED { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&genotype=%s", string(genotype))) + } + + if referenceAllelePattern != "" { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&reference=%s", referenceAllelePattern)) + } + if alternativeAllelePattern != "" { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&alternative=%s", alternativeAllelePattern)) + } + if commaDeliminatedAlleles != "" { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&alleles=%s", commaDeliminatedAlleles)) + } + url := fmt.Sprintf(VariantsGetBySampleIdsPathWithQueryString, _cfg.Api.Url, queryString) + + return makeGetVariantsCall(url, ignoreStatusCode, _t) +} + +func makeGetVariantsCall(url string, ignoreStatusCode bool, _t *testing.T) dtos.VariantGetReponse { + fmt.Printf("Calling %s\n", url) + request, _ := http.NewRequest("GET", url, nil) + + client := &http.Client{} + response, responseErr := client.Do(request) + assert.Nil(_t, responseErr) + + defer response.Body.Close() + + if !ignoreStatusCode { + // this test (at the time of writing) will only work if authorization is disabled + shouldBe := 200 + assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) + } + + // -- interpret array of ingestion requests from response + respBody, respBodyErr := ioutil.ReadAll(response.Body) + assert.Nil(_t, respBodyErr) + + // --- transform body bytes to string + respBodyString := string(respBody) + + // -- convert to json and check for error + var respDto dtos.VariantGetReponse + jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) + assert.Nil(_t, jsonUnmarshallingError) + + return respDto +} diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index 0ae523e6..effca81f 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -3,7 +3,6 @@ package api import ( "encoding/json" "fmt" - "gohan/api/models" c "gohan/api/models/constants" a "gohan/api/models/constants/assembly-id" gq "gohan/api/models/constants/genotype-query" @@ -321,7 +320,7 @@ func TestCanGetVariantsWithWildcardAlternatives(t *testing.T) { cfg := common.InitConfig() allele := "ATTN" // example allele - TODO: render more sophisticated randomization // TODO: improve variant call testing from being 1 call to many random ones - dtos := buildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -357,7 +356,7 @@ func TestCanGetVariantsWithWildcardReferences(t *testing.T) { cfg := common.InitConfig() allele := "ATTN" // example allele - TODO: render more sophisticated randomization // TODO: improve variant call testing from being 1 call to many random ones - dtos := buildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -400,7 +399,7 @@ func TestCanGetVariantsByAlleles(t *testing.T) { fmt.Println(qAllele) // check alleles in response - dtos := buildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -445,7 +444,7 @@ func TestCanGetVariantsWithWildcardAlleles(t *testing.T) { // iterate over all 'allele's queried for qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths for _, qAllele := range qAlleles { - dtos := buildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) for _, dto := range dtos.Results { fmt.Printf("Got %d calls from allele query %s \n", len(dto.Calls), qAllele) if len(dto.Calls) == 0 { @@ -484,7 +483,7 @@ func TestCanGetVariantsWithWildcardAllelePairs(t *testing.T) { // iterate over all 'allele pairs' for _, qAllelePair := range qAllelePairs { - dtos := buildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) for _, dto := range dtos.Results { if len(dto.Calls) == 0 { continue @@ -519,7 +518,7 @@ func TestGetVariantsCanHandleInvalidWildcardAlleleQuery(t *testing.T) { } // skip valid calls limitedAlleles := strings.Join(qAlleles[:i], ",") - invalidReqResObj := buildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) + invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) // make sure only an error was returned assert.True(t, invalidReqResObj.Status == 400) @@ -626,36 +625,6 @@ func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQ } } -func buildQueryAndMakeGetVariantsCall( - chromosome string, sampleId string, includeInfo bool, - sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, - referenceAllelePattern string, alternativeAllelePattern string, commaDeliminatedAlleles string, - ignoreStatusCode bool, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse { - - queryString := fmt.Sprintf("?ids=%s&includeInfoInResultSet=%t&sortByPosition=%s&assemblyId=%s", sampleId, includeInfo, sortByPosition, assemblyId) - - if chromosome != "" { - queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&chromosome=%s", chromosome)) - } - - if genotype != gq.UNCALLED { - queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&genotype=%s", string(genotype))) - } - - if referenceAllelePattern != "" { - queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&reference=%s", referenceAllelePattern)) - } - if alternativeAllelePattern != "" { - queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&alternative=%s", alternativeAllelePattern)) - } - if commaDeliminatedAlleles != "" { - queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&alleles=%s", commaDeliminatedAlleles)) - } - url := fmt.Sprintf(common.VariantsGetBySampleIdsPathWithQueryString, _cfg.Api.Url, queryString) - - return makeGetVariantsCall(url, ignoreStatusCode, _t) -} - func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct interface{}, assemblyIdsStruct interface{}) [][]string { var allCombinations = [][]string{} @@ -712,7 +681,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc assemblyId := a.CastToAssemblyId(_combination[2]) // make the call - dto := buildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) assert.Equal(_t, 1, len(dto.Results)) @@ -730,37 +699,6 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc return allDtoResponses } -func makeGetVariantsCall(url string, ignoreStatusCode bool, _t *testing.T) dtos.VariantGetReponse { - fmt.Printf("Calling %s\n", url) - request, _ := http.NewRequest("GET", url, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - if !ignoreStatusCode { - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) - } - - // -- interpret array of ingestion requests from response - respBody, respBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, respBodyErr) - - // --- transform body bytes to string - respBodyString := string(respBody) - - // -- convert to json and check for error - var respDto dtos.VariantGetReponse - jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) - assert.Nil(_t, jsonUnmarshallingError) - - return respDto -} - // --- sample validation func validateReferenceSample(__t *testing.T, call *dtos.VariantCall) { assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Reference)) diff --git a/src/api/utils/io.go b/src/api/utils/io.go new file mode 100644 index 00000000..44ce2dd1 --- /dev/null +++ b/src/api/utils/io.go @@ -0,0 +1,25 @@ +package utils + +import ( + "encoding/json" + "log" + "net/http" +) + +func GetRequestReturnStuff[T any](url string) T { + client := &http.Client{} + request, _ := http.NewRequest("GET", url, nil) + + response, responseErr := client.Do(request) + if responseErr != nil { + log.Fatal(responseErr) + } + defer response.Body.Close() + + var objects T + if jsonErr := json.NewDecoder(response.Body).Decode(&objects); jsonErr != nil { + log.Fatal(responseErr) + } + + return objects +} From 106b220a1e09b65cc83575aa0361f5aabe776c41 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sun, 18 Jun 2023 01:49:00 -0400 Subject: [PATCH 36/84] chore: refactoring build tests, common tooling --- src/api/tests/build/api/variants_test.go | 68 ++++++------------------ src/api/tests/common/common.go | 53 ++++++++++++++++++ 2 files changed, 70 insertions(+), 51 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 8670c5ba..0c3f2036 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -29,44 +29,30 @@ func TestIngest(t *testing.T) { assert.True(t, t.Run("Ingest Demo VCF", func(t *testing.T) { // create demo vcf string sampleId := "abc1234" - vcfString := `##fileformat=VCFv4.2 -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S-1178-HAP -1 13656 . CAG C, 868.60 . BaseQRankSum=-5.505;DP=81;ExcessHet=3.0103;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=-2.985;RAW_MQandDP=43993,81;ReadPosRankSum=-0.136 GT:AD:DP:GQ:PL:SB 0:50,25,0:75:99:876,0,2024,1026,2099,3126:4,46,5,20 -10 28872481 . CAAAA C,CA,CAAA,CAAAAA,CAAAAAA, 652.60 . BaseQRankSum=0.029;DP=83;ExcessHet=3.0103;MLEAC=0,0,0,1,0,0;MLEAF=0.00,0.00,0.00,0.500,0.00,0.00;MQRankSum=-0.186;RAW_MQandDP=291409,83;ReadPosRankSum=-0.582 GT:AD:DP:GQ:PL:SB 0:19,3,2,5,29,9,0:67:99:660,739,2827,748,2714,2732,724,1672,1682,1587,0,340,338,249,265,321,956,929,699,245,898,866,1996,1991,1652,466,1006,1944:0,19,0,48 -19 3619025 . C . . END=3619025 GT:DP:GQ:MIN_DP:PL 0:19:21:19:0,21,660 -19 3619026 . T . . END=3619026 GT:DP:GQ:MIN_DP:PL 0:19:51:19:0,51,765` // - save string to vcf directory - localDataRootPath := getRootGohanPath() + "/data" + localDataRootPath := common.GetRootGohanPath() + "/data" localVcfPath := localDataRootPath + "/vcfs" newFilePath := fmt.Sprintf("%s/%s.vcf", localVcfPath, sampleId) // - create file if not exists - var ( - file *os.File - err error - ) - - file, err = os.Create(newFilePath) - if isError(err) { - return - } - defer file.Close() - - // - reopen file using READ & WRITE permission. - file, err = os.OpenFile(newFilePath, os.O_RDWR, 0644) - if isError(err) { - return + file, err := common.CreateAndGetNewFile(newFilePath) + if err != nil { + log.Fatal(err) } - defer file.Close() + defer func() { + file.Close() + }() // - write some vcf string to file. - _, err = file.WriteString(vcfString) - if isError(err) { + _, err = file.WriteString(common.DemoVcf1) + if common.IsError(err) { return } - defer func() { os.Remove(newFilePath) }() + defer func() { + os.Remove(newFilePath) + }() // compress the vcf file with bgzip out, err := exec.Command("bgzip", newFilePath).Output() @@ -77,7 +63,9 @@ func TestIngest(t *testing.T) { fmt.Println(string(out)) newGzFile := newFilePath + ".gz" - defer func() { os.Remove(newGzFile) }() + defer func() { + os.Remove(newGzFile) + }() // - ingest assemblyId := "GRCh38" @@ -90,6 +78,7 @@ func TestIngest(t *testing.T) { assert.True(t, len(initialIngestionDtos) > 0) // check ingestion request + // TODO: avoid potential infinite loop for { fmt.Println("Checking state of the ingestion..") @@ -118,6 +107,7 @@ func TestIngest(t *testing.T) { } // check ingestion stats + // TODO: avoid potential infinite loop for { fmt.Println("Checking ingestion stats..") // pause @@ -153,27 +143,3 @@ func TestIngest(t *testing.T) { assert.True(t, len(dtos.Results[0].Calls) > 0) })) } - -func isError(err error) bool { - if err != nil { - fmt.Println(err.Error()) - } - - return (err != nil) -} - -func getRootGohanPath() string { - // check if file exists - wd, err1 := os.Getwd() - if err1 != nil { - log.Println(err1) - } - fmt.Println(wd) // for example /home/user - - path := filepath.Dir(wd) - for i := 1; i < 5; i++ { - path = filepath.Dir(path) - } - - return path -} diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index 8ca5548b..1b951bb3 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -9,9 +9,11 @@ import ( gq "gohan/api/models/constants/genotype-query" "gohan/api/models/dtos" "io/ioutil" + "log" "net/http" "os" "path" + "path/filepath" "runtime" "testing" @@ -23,6 +25,13 @@ const ( VariantsOverviewPath string = "%s/variants/overview" VariantsGetBySampleIdsPathWithQueryString string = "%s/variants/get/by/sampleId%s" IngestionRequestsPath string = "%s/variants/ingestion/requests" + + DemoVcf1 string = `##fileformat=VCFv4.2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S-1178-HAP +1 13656 . CAG C, 868.60 . BaseQRankSum=-5.505;DP=81;ExcessHet=3.0103;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=-2.985;RAW_MQandDP=43993,81;ReadPosRankSum=-0.136 GT:AD:DP:GQ:PL:SB 0:50,25,0:75:99:876,0,2024,1026,2099,3126:4,46,5,20 +10 28872481 . CAAAA C,CA,CAAA,CAAAAA,CAAAAAA, 652.60 . BaseQRankSum=0.029;DP=83;ExcessHet=3.0103;MLEAC=0,0,0,1,0,0;MLEAF=0.00,0.00,0.00,0.500,0.00,0.00;MQRankSum=-0.186;RAW_MQandDP=291409,83;ReadPosRankSum=-0.582 GT:AD:DP:GQ:PL:SB 0:19,3,2,5,29,9,0:67:99:660,739,2827,748,2714,2732,724,1672,1682,1587,0,340,338,249,265,321,956,929,699,245,898,866,1996,1991,1652,466,1006,1944:0,19,0,48 +19 3619025 . C . . END=3619025 GT:DP:GQ:MIN_DP:PL 0:19:21:19:0,21,660 +19 3619026 . T . . END=3619026 GT:DP:GQ:MIN_DP:PL 0:19:51:19:0,51,765` ) func InitConfig() *models.Config { @@ -98,6 +107,50 @@ func GetVariantsOverview(_t *testing.T, _cfg *models.Config) map[string]interfac return overviewRespJson } +func GetRootGohanPath() string { + // check if file exists + wd, err1 := os.Getwd() + if err1 != nil { + log.Println(err1) + } + fmt.Println(wd) // for example /home/user + + path := filepath.Dir(wd) + for i := 1; i < 5; i++ { + path = filepath.Dir(path) + } + + return path +} + +func IsError(err error) bool { + if err != nil { + fmt.Println(err.Error()) + } + + return (err != nil) +} + +func CreateAndGetNewFile(filePath string) (*os.File, error) { + // - create file if not exists + var ( + newFile *os.File + newFileErr error + ) + + _, newFileErr = os.Create(filePath) + if IsError(newFileErr) { + return nil, newFileErr + } + + // - reopen file using READ & WRITE permission. + newFile, newFileErr = os.OpenFile(filePath, os.O_RDWR, 0644) + if IsError(newFileErr) { + return nil, newFileErr + } + return newFile, newFileErr +} + func BuildQueryAndMakeGetVariantsCall( chromosome string, sampleId string, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, From 5c60e79195e713f9e1766df433bfbc31b5b6d3e2 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sun, 18 Jun 2023 02:28:53 -0400 Subject: [PATCH 37/84] chore: refactoring integration/build tests - increasing build test coverage - tooling --- src/api/tests/build/api/variants_test.go | 15 +++++ src/api/tests/common/common.go | 49 +++++++++++++++ .../tests/integration/api/api_variant_test.go | 61 ------------------- src/api/utils/utils.go | 9 +++ 4 files changed, 73 insertions(+), 61 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 0c3f2036..0b770967 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -136,10 +136,25 @@ func TestIngest(t *testing.T) { // verify demo vcf was properly ingested // by pinging it with specific queries assert.True(t, t.Run("Check Demo VCF Ingestion", func(t *testing.T) { + // check variants overview overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) + // simple chromosome-1 query dtos := common.BuildQueryAndMakeGetVariantsCall("1", "*", true, "asc", "", "GRCh38", "", "", "", false, t, cfg) assert.True(t, len(dtos.Results[0].Calls) > 0) + + // TODO: not hardcoded tests + // simple allele queries + common.GetAndVerifyVariantsResults(cfg, t, "CAG") + common.GetAndVerifyVariantsResults(cfg, t, "CAAAA") + common.GetAndVerifyVariantsResults(cfg, t, "T") + common.GetAndVerifyVariantsResults(cfg, t, "C") + + // random number between 1 and 5 + // allelleLen := rand.Intn(5) + 1 + + // random nucleotide string of length 'allelleLen' + // qAllele := utils.GenerateRandomFixedLengthString(utils.AcceptedNucleotideCharacters, allelleLen) })) } diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index 1b951bb3..a8337943 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -8,6 +8,7 @@ import ( c "gohan/api/models/constants" gq "gohan/api/models/constants/genotype-query" "gohan/api/models/dtos" + "gohan/api/utils" "io/ioutil" "log" "net/http" @@ -151,6 +152,54 @@ func CreateAndGetNewFile(filePath string) (*os.File, error) { return newFile, newFileErr } +func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, qAllele string) { + responseDtos := BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, _t, _cfg) + + assert.NotNil(_t, responseDtos.Results) + assert.True(_t, len(responseDtos.Results) > 0) + assert.NotNil(_t, responseDtos.Results[0].Calls) + assert.True(_t, len(responseDtos.Results[0].Calls) > 0) + + // check alleles in response + for _, dto := range responseDtos.Results { + for _, call := range dto.Calls { + // ensure, for each call, that at least + // 1 of the alleles present matches the allele + // queried for + allAllelesMatchUp := false + + // TODO: "does an allele exist matching the one queried" + // - iterate over all 'allele's in the call + for _, allele := range call.Alleles { + matched := make([]bool, len(qAllele)) + if len(qAllele) == len(allele) { + for alIndex, alChar := range allele { + // ensure the index is within bounds (length of the allele) + // 'ref's are slices of strings, and not all 'ref's in these slices need to match + if alIndex <= len(allele) { + // obtain the character at the index for the iteration + qAlleleChar := []rune(qAllele)[alIndex] + if string(qAlleleChar) == "N" || alChar == qAlleleChar { + // if the non-wildcard characters don't match, test fails + // alleleMatchesUp = false + matched[alIndex] = true + } + } else { + continue + } + } + if utils.AreAllBoolsTrue(matched) { + allAllelesMatchUp = true + break + } + } + } + + assert.True(_t, allAllelesMatchUp) + } + } +} + func BuildQueryAndMakeGetVariantsCall( chromosome string, sampleId string, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index effca81f..93f7ea85 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -13,7 +13,6 @@ import ( common "gohan/api/tests/common" testConsts "gohan/api/tests/common/constants" ratt "gohan/api/tests/common/constants/referenceAlternativeTestType" - "gohan/api/utils" "io/ioutil" "math/rand" "net/http" @@ -388,57 +387,6 @@ func TestCanGetVariantsWithWildcardReferences(t *testing.T) { } } -func TestCanGetVariantsByAlleles(t *testing.T) { - cfg := common.InitConfig() - - // random number between 1 and 5 - allelleLen := rand.Intn(5) + 1 - // random nucleotide string of length 'allelleLen' - qAllele := utils.GenerateRandomFixedLengthString(utils.AcceptedNucleotideCharacters, allelleLen) - - fmt.Println(qAllele) - - // check alleles in response - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) - for _, dto := range dtos.Results { - for _, call := range dto.Calls { - // ensure, for each call, that at least - // 1 of the alleles present matches the allele - // queried for - allAllelesMatchUp := false - - // TODO: "does an allele exist matching the one queried" - // - iterate over all 'allele's in the call - for _, allele := range call.Alleles { - matched := make([]bool, len(qAllele)) - if len(qAllele) == len(allele) { - for alIndex, alChar := range allele { - // ensure the index is within bounds (length of the allele) - // 'ref's are slices of strings, and not all 'ref's in these slices need to match - if alIndex <= len(allele) { - // obtain the character at the index for the iteration - qAlleleChar := []rune(qAllele)[alIndex] - if string(qAlleleChar) == "N" || alChar == qAlleleChar { - // if the non-wildcard characters don't match, test fails - // alleleMatchesUp = false - matched[alIndex] = true - } - } else { - continue - } - } - if areAllBoolsTrue(matched) { - allAllelesMatchUp = true - break - } - } - } - - assert.True(t, allAllelesMatchUp) - } - } -} - func TestCanGetVariantsWithWildcardAlleles(t *testing.T) { cfg := common.InitConfig() // iterate over all 'allele's queried for @@ -528,15 +476,6 @@ func TestGetVariantsCanHandleInvalidWildcardAlleleQuery(t *testing.T) { } // -- Common utility functions for api tests -func areAllBoolsTrue(sliceOfBools []bool) bool { - for _, b := range sliceOfBools { - if !b { - return false - } - } - return true -} - func executeReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { diff --git a/src/api/utils/utils.go b/src/api/utils/utils.go index 9878ee64..eb501314 100644 --- a/src/api/utils/utils.go +++ b/src/api/utils/utils.go @@ -90,3 +90,12 @@ func GenerateRandomFixedLengthString(availableCharactersSlice []string, length i return string(b) } + +func AreAllBoolsTrue(sliceOfBools []bool) bool { + for _, b := range sliceOfBools { + if !b { + return false + } + } + return true +} From 793696b31a1d05b15b4807499325cda321766919 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sun, 18 Jun 2023 02:53:21 -0400 Subject: [PATCH 38/84] chore: build/integration test refactoring - variants info --- Makefile | 8 +- src/api/tests/build/api/variants_test.go | 77 ++++++++++++++++-- src/api/tests/common/common.go | 78 +++++++++++++++++++ .../tests/integration/api/api_variant_test.go | 53 ------------- 4 files changed, 154 insertions(+), 62 deletions(-) diff --git a/Makefile b/Makefile index ec338c83..1aa9019f 100644 --- a/Makefile +++ b/Makefile @@ -204,16 +204,22 @@ test-api: init prepare-test-config # # go clean -cache && \ # # go test ./tests/unit/... -v + @# restart any running containers and print docker compose -f docker-compose.test.yaml down docker compose -f docker-compose.test.yaml up -d + @# run build tests + @# - print api and drs logs in the + @# event of a failued cd src/api && \ go clean -cache && \ (go test ./tests/build/... -v || ((docker logs gohan-api | tail -n 100) && (docker logs gohan-drs | tail -n 100) && exit 1)) && \ cd ../.. + @# shut down the containers and print + @# the tail end of the + @# api and elasticsearch logs docker compose -f docker-compose.test.yaml stop - docker logs gohan-api | tail -n 50 docker logs elasticsearch | tail -n 50 diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 0b770967..73f2edf3 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -2,6 +2,8 @@ package api import ( "fmt" + "gohan/api/models/dtos" + "gohan/api/models/indexes" ingest "gohan/api/models/ingest" common "gohan/api/tests/common" "gohan/api/utils" @@ -12,8 +14,13 @@ import ( "testing" "time" + gq "gohan/api/models/constants/genotype-query" + s "gohan/api/models/constants/sort" + "github.com/google/uuid" "github.com/stretchr/testify/assert" + + . "github.com/ahmetb/go-linq" ) const ( @@ -22,11 +29,11 @@ const ( IngestionRequestsPath string = "%s/variants/ingestion/requests" ) -func TestIngest(t *testing.T) { +func TestDemoVcfIngestion(t *testing.T) { cfg := common.InitConfig() tableId := uuid.NewString() - assert.True(t, t.Run("Ingest Demo VCF", func(t *testing.T) { + t.Run("Ingest Demo VCF", func(t *testing.T) { // create demo vcf string sampleId := "abc1234" @@ -131,19 +138,22 @@ func TestIngest(t *testing.T) { // pause time.Sleep(3 * time.Second) } - })) + }) // verify demo vcf was properly ingested - // by pinging it with specific queries - assert.True(t, t.Run("Check Demo VCF Ingestion", func(t *testing.T) { + t.Run("Test Variants Overview", func(t *testing.T) { // check variants overview overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) + }) + t.Run("Test Simple Chromosome Queries", func(t *testing.T) { // simple chromosome-1 query - dtos := common.BuildQueryAndMakeGetVariantsCall("1", "*", true, "asc", "", "GRCh38", "", "", "", false, t, cfg) - assert.True(t, len(dtos.Results[0].Calls) > 0) + chromQueryResponse := common.BuildQueryAndMakeGetVariantsCall("1", "*", true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(chromQueryResponse.Results[0].Calls) > 0) + }) + t.Run("Test Simple Allele Queries", func(t *testing.T) { // TODO: not hardcoded tests // simple allele queries common.GetAndVerifyVariantsResults(cfg, t, "CAG") @@ -156,5 +166,56 @@ func TestIngest(t *testing.T) { // random nucleotide string of length 'allelleLen' // qAllele := utils.GenerateRandomFixedLengthString(utils.AcceptedNucleotideCharacters, allelleLen) - })) + }) + + t.Run("Test Variant Info Present", func(t *testing.T) { + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, true, s.Undefined, gq.UNCALLED, "", "") + + // assert that all of the responses include valid sets of info + // - * accumulate all infos into a single list using the set of + // SelectManyT's and the SelectT + // - ** iterate over each info in the ForEachT + var accumulatedInfos []*indexes.Info + + From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * + return From(resp.Results) + }).SelectManyT(func(data dtos.VariantGetResult) Query { + return From(data.Calls) + }).SelectManyT(func(variant dtos.VariantCall) Query { + return From(variant.Info) + }).SelectT(func(info indexes.Info) indexes.Info { + return info + }).ForEachT(func(info indexes.Info) { // ** + accumulatedInfos = append(accumulatedInfos, &info) + }) + + if len(accumulatedInfos) == 0 { + t.Skip("No infos returned! Skipping --") + } + + for infoIndex, info := range accumulatedInfos { + // ensure the info is not nil + // - s.Id can be == "" + // - so can s.Value + assert.NotNil(t, info) + if info.Id == "" { + fmt.Printf("Note: Found empty info id at index %d with value %s \n", infoIndex, info.Value) + } + } + }) + + t.Run("Test No Variant Info Present", func(t *testing.T) { + + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Undefined, gq.UNCALLED, "", "") + + // assert that all responses from all combinations have no results + for _, dtoResponse := range allDtoResponses { + if len(dtoResponse.Results) > 0 { + firstDataPointCalls := dtoResponse.Results[0].Calls + if len(firstDataPointCalls) > 0 { + assert.Nil(t, firstDataPointCalls[0].Info) + } + } + } + }) } diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index a8337943..02171a98 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -6,16 +6,20 @@ import ( "fmt" "gohan/api/models" c "gohan/api/models/constants" + a "gohan/api/models/constants/assembly-id" gq "gohan/api/models/constants/genotype-query" + "gohan/api/models/dtos" "gohan/api/utils" "io/ioutil" "log" + "math/rand" "net/http" "os" "path" "path/filepath" "runtime" + "sync" "testing" "github.com/stretchr/testify/assert" @@ -230,6 +234,80 @@ func BuildQueryAndMakeGetVariantsCall( return makeGetVariantsCall(url, ignoreStatusCode, _t) } +func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { + cfg := InitConfig() + + // retrieve the overview + overviewJson := GetVariantsOverview(_t, cfg) + + // ensure the response is valid + // TODO: error check instead of nil check + assert.NotNil(_t, overviewJson) + + // generate all possible combinations of + // available samples, assemblys, and chromosomes + overviewCombinations := getOverviewResultCombinations(overviewJson["chromosomes"], overviewJson["sampleIDs"], overviewJson["assemblyIDs"]) + + // avoid overflow: + // - shuffle all combinations and take top x + x := 10 + croppedCombinations := make([][]string, len(overviewCombinations)) + perm := rand.Perm(len(overviewCombinations)) + for i, v := range perm { + croppedCombinations[v] = overviewCombinations[i] + } + if len(croppedCombinations) > x { + croppedCombinations = croppedCombinations[:x] + } + + // initialize a common slice in which to + // accumulate al responses asynchronously + allDtoResponses := []dtos.VariantGetReponse{} + allDtoResponsesMux := sync.RWMutex{} + + var combWg sync.WaitGroup + for _, combination := range croppedCombinations { + combWg.Add(1) + go func(_wg *sync.WaitGroup, _combination []string) { + defer _wg.Done() + + chrom := _combination[0] + sampleId := _combination[1] + assemblyId := a.CastToAssemblyId(_combination[2]) + + // make the call + dto := BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + + assert.Equal(_t, 1, len(dto.Results)) + + // accumulate all response objects + // to a common slice in an + // asynchronous-safe manner + allDtoResponsesMux.Lock() + allDtoResponses = append(allDtoResponses, dto) + allDtoResponsesMux.Unlock() + }(&combWg, combination) + } + + combWg.Wait() + + return allDtoResponses +} + +func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct interface{}, assemblyIdsStruct interface{}) [][]string { + var allCombinations = [][]string{} + + for i, _ := range chromosomeStruct.(map[string]interface{}) { + for j, _ := range sampleIdsStruct.(map[string]interface{}) { + for k, _ := range assemblyIdsStruct.(map[string]interface{}) { + allCombinations = append(allCombinations, []string{i, j, k}) + } + } + } + + return allCombinations +} + func makeGetVariantsCall(url string, ignoreStatusCode bool, _t *testing.T) dtos.VariantGetReponse { fmt.Printf("Calling %s\n", url) request, _ := http.NewRequest("GET", url, nil) diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index 93f7ea85..7ee11ec3 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -9,7 +9,6 @@ import ( s "gohan/api/models/constants/sort" z "gohan/api/models/constants/zygosity" "gohan/api/models/dtos" - "gohan/api/models/indexes" common "gohan/api/tests/common" testConsts "gohan/api/tests/common/constants" ratt "gohan/api/tests/common/constants/referenceAlternativeTestType" @@ -87,58 +86,6 @@ func TestGetIngestionRequests(t *testing.T) { assert.NotNil(t, len(ingestionRequestsRespJsonSlice)) } -func TestCanGetVariantsWithoutInfoInResultset(t *testing.T) { - - allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Undefined, gq.UNCALLED, "", "") - - // assert that all responses from all combinations have no results - for _, dtoResponse := range allDtoResponses { - if len(dtoResponse.Results) > 0 { - firstDataPointCalls := dtoResponse.Results[0].Calls - if len(firstDataPointCalls) > 0 { - assert.Nil(t, firstDataPointCalls[0].Info) - } - } - } -} - -func TestCanGetVariantsWithInfoInResultset(t *testing.T) { - - allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, true, s.Undefined, gq.UNCALLED, "", "") - - // assert that all of the responses include valid sets of info - // - * accumulate all infos into a single list using the set of - // SelectManyT's and the SelectT - // - ** iterate over each info in the ForEachT - var accumulatedInfos []*indexes.Info - - From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * - return From(resp.Results) - }).SelectManyT(func(data dtos.VariantGetResult) Query { - return From(data.Calls) - }).SelectManyT(func(variant dtos.VariantCall) Query { - return From(variant.Info) - }).SelectT(func(info indexes.Info) indexes.Info { - return info - }).ForEachT(func(info indexes.Info) { // ** - accumulatedInfos = append(accumulatedInfos, &info) - }) - - if len(accumulatedInfos) == 0 { - t.Skip("No infos returned! Skipping --") - } - - for infoIndex, info := range accumulatedInfos { - // ensure the info is not nil - // - s.Id can be == "" - // - so can s.Value - assert.NotNil(t, info) - if info.Id == "" { - fmt.Printf("Note: Found empty info id at index %d with value %s \n", infoIndex, info.Value) - } - } -} - func TestCanGetVariantsInAscendingPositionOrder(t *testing.T) { // retrieve responses in ascending order allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Ascending, gq.UNCALLED, "", "") From d9510fa05bc062f0ad99c459e2d5c2f2cfb7128a Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Sun, 18 Jun 2023 02:59:03 -0400 Subject: [PATCH 39/84] chore: build test get variants in order --- src/api/tests/build/api/variants_test.go | 57 +++++++++++++++++++ .../tests/integration/api/api_variant_test.go | 57 ------------------- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 73f2edf3..5d3f1187 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -218,4 +218,61 @@ func TestDemoVcfIngestion(t *testing.T) { } } }) + + t.Run("Test Get Variants in Ascending Order", func(t *testing.T) { + // retrieve responses in ascending order + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Ascending, gq.UNCALLED, "", "") + + // assert the dto response slice is plentiful + assert.NotNil(t, allDtoResponses) + + From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) { + // ensure there is data + assert.NotNil(t, dto.Results) + + // check the data + From(dto.Results).ForEachT(func(d dtos.VariantGetResult) { + // ensure the variants slice is plentiful + assert.NotNil(t, d.Calls) + + latestSmallest := 0 + From(d.Calls).ForEachT(func(dd dtos.VariantCall) { + // verify order + if latestSmallest != 0 { + assert.True(t, latestSmallest <= dd.Pos) + } + + latestSmallest = dd.Pos + }) + }) + }) + }) + + t.Run("Test Get Variants in Descending Order", func(t *testing.T) { + // retrieve responses in descending order + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Descending, gq.UNCALLED, "", "") + + // assert the dto response slice is plentiful + assert.NotNil(t, allDtoResponses) + + From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) { + // ensure there is data + assert.NotNil(t, dto.Results) + + // check the data + From(dto.Results).ForEachT(func(d dtos.VariantGetResult) { + // ensure the variants slice is plentiful + assert.NotNil(t, d.Calls) + + latestGreatest := 0 + From(d.Calls).ForEachT(func(dd dtos.VariantCall) { + if latestGreatest != 0 { + assert.True(t, latestGreatest >= dd.Pos) + } + + latestGreatest = dd.Pos + }) + }) + }) + }) } diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index 7ee11ec3..4f00b9b2 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -86,63 +86,6 @@ func TestGetIngestionRequests(t *testing.T) { assert.NotNil(t, len(ingestionRequestsRespJsonSlice)) } -func TestCanGetVariantsInAscendingPositionOrder(t *testing.T) { - // retrieve responses in ascending order - allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Ascending, gq.UNCALLED, "", "") - - // assert the dto response slice is plentiful - assert.NotNil(t, allDtoResponses) - - From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) { - // ensure there is data - assert.NotNil(t, dto.Results) - - // check the data - From(dto.Results).ForEachT(func(d dtos.VariantGetResult) { - // ensure the variants slice is plentiful - assert.NotNil(t, d.Calls) - - latestSmallest := 0 - From(d.Calls).ForEachT(func(dd dtos.VariantCall) { - // verify order - if latestSmallest != 0 { - assert.True(t, latestSmallest <= dd.Pos) - } - - latestSmallest = dd.Pos - }) - }) - }) -} - -func TestCanGetVariantsInDescendingPositionOrder(t *testing.T) { - // retrieve responses in descending order - allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Descending, gq.UNCALLED, "", "") - - // assert the dto response slice is plentiful - assert.NotNil(t, allDtoResponses) - - From(allDtoResponses).ForEachT(func(dto dtos.VariantGetReponse) { - // ensure there is data - assert.NotNil(t, dto.Results) - - // check the data - From(dto.Results).ForEachT(func(d dtos.VariantGetResult) { - // ensure the variants slice is plentiful - assert.NotNil(t, d.Calls) - - latestGreatest := 0 - From(d.Calls).ForEachT(func(dd dtos.VariantCall) { - if latestGreatest != 0 { - assert.True(t, latestGreatest >= dd.Pos) - } - - latestGreatest = dd.Pos - }) - }) - }) -} - func TestCanGetReferenceSamples(t *testing.T) { // trigger runAndValidateGenotypeQueryResults(t, gq.REFERENCE, validateReferenceSample) From a5976fdb2ea646088c4dad76b251cc65470b5bc7 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 19 Jun 2023 10:28:39 -0400 Subject: [PATCH 40/84] chore: genes ingestion build test --- src/api/mvc/genes/main.go | 2 +- src/api/tests/build/api/genes_test.go | 86 ++++++++++++++++++++++++ src/api/tests/build/api/variants_test.go | 2 +- 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 src/api/tests/build/api/genes_test.go diff --git a/src/api/mvc/genes/main.go b/src/api/mvc/genes/main.go index d5e7cbdd..4ee8bf4e 100644 --- a/src/api/mvc/genes/main.go +++ b/src/api/mvc/genes/main.go @@ -305,7 +305,7 @@ func GenesIngest(c echo.Context) error { assemblyWg.Wait() }() - return c.JSON(http.StatusOK, "{\"message\":\"please check in with /genes/overview !\"}") + return c.JSON(http.StatusOK, map[string]interface{}{"message": "please check in with /genes/overview !"}) } func GetAllGeneIngestionRequests(c echo.Context) error { diff --git a/src/api/tests/build/api/genes_test.go b/src/api/tests/build/api/genes_test.go new file mode 100644 index 00000000..d1d47fea --- /dev/null +++ b/src/api/tests/build/api/genes_test.go @@ -0,0 +1,86 @@ +package api + +import ( + "fmt" + ingest "gohan/api/models/ingest" + common "gohan/api/tests/common" + "gohan/api/utils" + "log" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +const ( + GenesOverviewPath string = "%s/genes/overview" + GenesIngestionRunPath string = "%s/genes/ingestion/run" + GenesIngestionRequestsPath string = "%s/genes/ingestion/requests" +) + +func TestGenesIngestion(t *testing.T) { + cfg := common.InitConfig() + + t.Run("Ingest And Check Genes", func(t *testing.T) { + // - ingest + ingestUrl := fmt.Sprintf(GenesIngestionRunPath, cfg.Api.Url) + + initialIngestionDtos := utils.GetRequestReturnStuff[ingest.GeneIngestRequest](ingestUrl) + assert.True(t, len(initialIngestionDtos.Message) > 0) + + // check ingestion request + // TODO: avoid potential infinite loop + for { + fmt.Println("Checking state of the ingestion..") + + // make the call + ingReqsUrl := fmt.Sprintf(GenesIngestionRequestsPath, cfg.Api.Url) + ingReqDtos := utils.GetRequestReturnStuff[[]ingest.GeneIngestRequest](ingReqsUrl) + assert.True(t, len(ingReqDtos) > 0) + + numFilesDone := 0 + numFilesRunning := len(ingReqDtos) + for _, dto := range ingReqDtos { + if dto.State == "Done" { + numFilesDone += 1 + } + if dto.State == "Error" { + log.Fatal(dto.Message) + } + } + if numFilesDone == numFilesRunning { + fmt.Println("Done, moving on..") + break + } else { + // pause + time.Sleep(3 * time.Second) + } + } + + // check ingestion stats + // TODO: avoid potential infinite loop + for { + fmt.Println("Checking ingestion stats..") + // pause + time.Sleep(3 * time.Second) + + // make the call + statsReqUrl := fmt.Sprintf("%s/genes/ingestion/stats", cfg.Api.Url) + stats := utils.GetRequestReturnStuff[ingest.IngestStatsDto](statsReqUrl) + assert.NotNil(t, stats) + + fmt.Println(stats.NumAdded) + fmt.Println(stats.NumFlushed) + if stats.NumAdded == stats.NumFlushed { + fmt.Println("Done, moving on..") + break + } + if stats.NumFailed > 0 { + log.Fatal("More than one gene failed to flush") + } + + // pause + time.Sleep(3 * time.Second) + } + }) +} diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 5d3f1187..a5a623f6 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -92,7 +92,7 @@ func TestDemoVcfIngestion(t *testing.T) { // make the call ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) ingReqDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingReqsUrl) - assert.True(t, len(initialIngestionDtos) > 0) + assert.True(t, len(ingReqDtos) > 0) foundDone := false for _, dto := range ingReqDtos { From ed9761b35cbb4565a9c1c44d2b9fb1801e0b2cc1 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 19 Jun 2023 10:33:58 -0400 Subject: [PATCH 41/84] patch: vcf/gtf data path permissions --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1aa9019f..861e80d4 100644 --- a/Makefile +++ b/Makefile @@ -68,12 +68,12 @@ init-data-dirs: mkdir -p ${GOHAN_API_VCF_PATH} mkdir -p ${GOHAN_API_VCF_PATH}/tmp chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_API_VCF_PATH} - chmod -R 777 ${GOHAN_API_VCF_PATH}/tmp + chmod -R 777 ${GOHAN_API_VCF_PATH} mkdir -p ${GOHAN_API_GTF_PATH} mkdir -p ${GOHAN_API_GTF_PATH}/tmp chown -R ${HOST_USER_UID}:${HOST_USER_GID} ${GOHAN_API_GTF_PATH} - chmod -R 777 ${GOHAN_API_GTF_PATH}/tmp + chmod -R 777 ${GOHAN_API_GTF_PATH} @echo ".. done!" From 6838d5d35acb52d92459ea393c4dcfba36701262 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 19 Jun 2023 11:08:31 -0400 Subject: [PATCH 42/84] chore: move genes integration tests to build --- src/api/main.go | 3 +- src/api/tests/build/api/genes_test.go | 185 +++++++++++++++++- .../tests/integration/api/api_gene_test.go | 184 ----------------- 3 files changed, 183 insertions(+), 189 deletions(-) delete mode 100644 src/api/tests/integration/api/api_gene_test.go diff --git a/src/api/main.go b/src/api/main.go index 354e7949..6fa94b4b 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -201,7 +201,8 @@ func main() { e.GET("/genes/overview", genesMvc.GetGenesOverview) e.GET("/genes/search", genesMvc.GenesGetByNomenclatureWildcard, // middleware - gam.ValidateOptionalChromosomeAttribute) + gam.ValidateOptionalChromosomeAttribute, + gam.MandateAssemblyIdAttribute) e.GET("/genes/ingestion/requests", genesMvc.GetAllGeneIngestionRequests) e.GET("/genes/ingestion/run", genesMvc.GenesIngest) e.GET("/genes/ingestion/stats", genesMvc.GenesIngestionStats) diff --git a/src/api/tests/build/api/genes_test.go b/src/api/tests/build/api/genes_test.go index d1d47fea..aac6ed90 100644 --- a/src/api/tests/build/api/genes_test.go +++ b/src/api/tests/build/api/genes_test.go @@ -1,21 +1,37 @@ package api import ( + "encoding/json" "fmt" - ingest "gohan/api/models/ingest" common "gohan/api/tests/common" + + "gohan/api/models" + c "gohan/api/models/constants" + a "gohan/api/models/constants/assembly-id" + ingest "gohan/api/models/ingest" + + "gohan/api/models/constants/chromosome" + "gohan/api/models/dtos" + "gohan/api/models/indexes" + "gohan/api/utils" + "io/ioutil" "log" + "net/http" + "sync" "testing" "time" "github.com/stretchr/testify/assert" + + . "github.com/ahmetb/go-linq" ) const ( - GenesOverviewPath string = "%s/genes/overview" - GenesIngestionRunPath string = "%s/genes/ingestion/run" - GenesIngestionRequestsPath string = "%s/genes/ingestion/requests" + GenesOverviewPath string = "%s/genes/overview" + GenesIngestionRunPath string = "%s/genes/ingestion/run" + GenesIngestionRequestsPath string = "%s/genes/ingestion/requests" + GenesSearchPathWithQueryString string = "%s/genes/search%s" ) func TestGenesIngestion(t *testing.T) { @@ -83,4 +99,165 @@ func TestGenesIngestion(t *testing.T) { time.Sleep(3 * time.Second) } }) + + // verify demo vcf was properly ingested + t.Run("Test Genes Overview", func(t *testing.T) { + // check variants overview + cfg := common.InitConfig() + + overviewJson := getGenesOverview(t, cfg) + assert.NotNil(t, overviewJson) + }) + + t.Run("Test Get Genes By AssemblyId And Chromosome", func(t *testing.T) { + // retrieve all possible combinations of responses + allDtoResponses := getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(t) + + // assert the dto response slice is plentiful + assert.NotNil(t, allDtoResponses) + + From(allDtoResponses).ForEachT(func(dto dtos.GenesResponseDTO) { + // ensure there are results in the response + assert.NotNil(t, dto.Results) + + // check the resulting data + From(dto.Results).ForEachT(func(gene indexes.Gene) { + // ensure the gene is legit + assert.NotNil(t, gene.Name) + assert.NotNil(t, gene.AssemblyId) + assert.True(t, chromosome.IsValidHumanChromosome(gene.Chrom)) + assert.Greater(t, gene.End, gene.Start) + }) + }) + }) + +} + +func getGenesOverview(_t *testing.T, _cfg *models.Config) map[string]interface{} { + request, _ := http.NewRequest("GET", fmt.Sprintf(GenesOverviewPath, _cfg.Api.Url), nil) + + client := &http.Client{} + response, responseErr := client.Do(request) + assert.Nil(_t, responseErr) + + defer response.Body.Close() + + // this test (at the time of writing) will only work if authorization is disabled + shouldBe := 200 + assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET / Status: %s ; Should be %d", response.Status, shouldBe)) + + // -- interpret array of ingestion requests from response + overviewRespBody, overviewRespBodyErr := ioutil.ReadAll(response.Body) + assert.Nil(_t, overviewRespBodyErr) + + // --- transform body bytes to string + overviewRespBodyString := string(overviewRespBody) + + // -- check for json error + var overviewRespJson map[string]interface{} + overviewJsonUnmarshallingError := json.Unmarshal([]byte(overviewRespBodyString), &overviewRespJson) + assert.Nil(_t, overviewJsonUnmarshallingError) + + // -- insure it's an empty array + assemblyIDsKey, assidkOk := overviewRespJson["assemblyIDs"] + assert.True(_t, assidkOk) + assert.NotNil(_t, assemblyIDsKey) + + return overviewRespJson +} + +func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []dtos.GenesResponseDTO { + cfg := common.InitConfig() + + // retrieve the overview + overviewJson := getGenesOverview(_t, cfg) + + // ensure the response is valid + // TODO: error check instead of nil check + assert.NotNil(_t, overviewJson) + + // initialize a common slice in which to + // accumulate al responses asynchronously + allDtoResponses := []dtos.GenesResponseDTO{} + allDtoResponsesMux := sync.RWMutex{} + + var combWg sync.WaitGroup + for _, assemblyIdOverviewBucket := range overviewJson { + + // range over all assembly IDs + for assemblyIdString, genesPerChromosomeBucket := range assemblyIdOverviewBucket.(map[string]interface{}) { + + fmt.Println(assemblyIdString) + fmt.Println(genesPerChromosomeBucket) + + castedBucket := genesPerChromosomeBucket.(map[string]interface{})["numberOfGenesPerChromosome"].(map[string]interface{}) + + for chromosomeString, _ := range castedBucket { // _ = number of genes (unused) + + combWg.Add(1) + go func(_wg *sync.WaitGroup, _assemblyIdString string, _chromosomeString string) { + defer _wg.Done() + + assemblyId := a.CastToAssemblyId(_assemblyIdString) + + // make the call + dto := buildQueryAndMakeGetGenesCall(_chromosomeString, "", assemblyId, _t, cfg) + + // ensure there is data returned + // (we'd be making a bad query, otherwise) + assert.True(_t, len(dto.Results) > 0) + + // accumulate all response objects + // to a common slice in an + // asynchronous-safe manner + allDtoResponsesMux.Lock() + allDtoResponses = append(allDtoResponses, dto) + allDtoResponsesMux.Unlock() + }(&combWg, assemblyIdString, chromosomeString) + } + + } + + } + combWg.Wait() + + return allDtoResponses +} + +func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) dtos.GenesResponseDTO { + + queryString := fmt.Sprintf("?chromosome=%s&assemblyId=%s", chromosome, assemblyId) + + url := fmt.Sprintf(GenesSearchPathWithQueryString, _cfg.Api.Url, queryString) + + return getGetGenesCall(url, _t) +} + +func getGetGenesCall(url string, _t *testing.T) dtos.GenesResponseDTO { + fmt.Printf("Calling %s\n", url) + request, _ := http.NewRequest("GET", url, nil) + + client := &http.Client{} + response, responseErr := client.Do(request) + assert.Nil(_t, responseErr) + + defer response.Body.Close() + + // this test (at the time of writing) will only work if authorization is disabled + shouldBe := 200 + assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) + + // -- interpret array of ingestion requests from response + respBody, respBodyErr := ioutil.ReadAll(response.Body) + assert.Nil(_t, respBodyErr) + + // --- transform body bytes to string + respBodyString := string(respBody) + + // -- convert to json and check for error + var respDto dtos.GenesResponseDTO + jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) + assert.Nil(_t, jsonUnmarshallingError) + + return respDto } diff --git a/src/api/tests/integration/api/api_gene_test.go b/src/api/tests/integration/api/api_gene_test.go deleted file mode 100644 index e2ed8852..00000000 --- a/src/api/tests/integration/api/api_gene_test.go +++ /dev/null @@ -1,184 +0,0 @@ -package api - -import ( - "encoding/json" - "fmt" - "gohan/api/models" - c "gohan/api/models/constants" - a "gohan/api/models/constants/assembly-id" - "gohan/api/models/constants/chromosome" - "gohan/api/models/dtos" - "gohan/api/models/indexes" - common "gohan/api/tests/common" - "io/ioutil" - "net/http" - "sync" - "testing" - - . "github.com/ahmetb/go-linq" - - "github.com/stretchr/testify/assert" -) - -const ( - GenesOverviewPath string = "%s/genes/overview" - GenesSearchPathWithQueryString string = "%s/genes/search%s" -) - -func TestGenesOverview(t *testing.T) { - cfg := common.InitConfig() - - overviewJson := getGenesOverview(t, cfg) - assert.NotNil(t, overviewJson) -} - -func TestCanGetGenesByAssemblyIdAndChromosome(t *testing.T) { - // retrieve all possible combinations of responses - allDtoResponses := getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(t) - - // assert the dto response slice is plentiful - assert.NotNil(t, allDtoResponses) - - From(allDtoResponses).ForEachT(func(dto dtos.GenesResponseDTO) { - // ensure there are results in the response - assert.NotNil(t, dto.Results) - - // check the resulting data - From(dto.Results).ForEachT(func(gene indexes.Gene) { - // ensure the gene is legit - assert.NotNil(t, gene.Name) - assert.NotNil(t, gene.AssemblyId) - assert.True(t, chromosome.IsValidHumanChromosome(gene.Chrom)) - assert.Greater(t, gene.End, gene.Start) - }) - }) -} - -func getAllDtosOfVariousCombinationsOfGenesAndAssemblyIDs(_t *testing.T) []dtos.GenesResponseDTO { - cfg := common.InitConfig() - - // retrieve the overview - overviewJson := getGenesOverview(_t, cfg) - - // ensure the response is valid - // TODO: error check instead of nil check - assert.NotNil(_t, overviewJson) - - // initialize a common slice in which to - // accumulate al responses asynchronously - allDtoResponses := []dtos.GenesResponseDTO{} - allDtoResponsesMux := sync.RWMutex{} - - var combWg sync.WaitGroup - for _, assemblyIdOverviewBucket := range overviewJson { - - // range over all assembly IDs - for assemblyIdString, genesPerChromosomeBucket := range assemblyIdOverviewBucket.(map[string]interface{}) { - - fmt.Println(assemblyIdString) - fmt.Println(genesPerChromosomeBucket) - - castedBucket := genesPerChromosomeBucket.(map[string]interface{})["numberOfGenesPerChromosome"].(map[string]interface{}) - - for chromosomeString, _ := range castedBucket { // _ = number of genes (unused) - - combWg.Add(1) - go func(_wg *sync.WaitGroup, _assemblyIdString string, _chromosomeString string) { - defer _wg.Done() - - assemblyId := a.CastToAssemblyId(_assemblyIdString) - - // make the call - dto := buildQueryAndMakeGetGenesCall(_chromosomeString, "", assemblyId, _t, cfg) - - // ensure there is data returned - // (we'd be making a bad query, otherwise) - assert.True(_t, len(dto.Results) > 0) - - // accumulate all response objects - // to a common slice in an - // asynchronous-safe manner - allDtoResponsesMux.Lock() - allDtoResponses = append(allDtoResponses, dto) - allDtoResponsesMux.Unlock() - }(&combWg, assemblyIdString, chromosomeString) - } - - } - - } - combWg.Wait() - - return allDtoResponses -} - -func getGenesOverview(_t *testing.T, _cfg *models.Config) map[string]interface{} { - request, _ := http.NewRequest("GET", fmt.Sprintf(GenesOverviewPath, _cfg.Api.Url), nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET / Status: %s ; Should be %d", response.Status, shouldBe)) - - // -- interpret array of ingestion requests from response - overviewRespBody, overviewRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, overviewRespBodyErr) - - // --- transform body bytes to string - overviewRespBodyString := string(overviewRespBody) - - // -- check for json error - var overviewRespJson map[string]interface{} - overviewJsonUnmarshallingError := json.Unmarshal([]byte(overviewRespBodyString), &overviewRespJson) - assert.Nil(_t, overviewJsonUnmarshallingError) - - // -- insure it's an empty array - assemblyIDsKey, assidkOk := overviewRespJson["assemblyIDs"] - assert.True(_t, assidkOk) - assert.NotNil(_t, assemblyIDsKey) - - return overviewRespJson -} - -func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c.AssemblyId, _t *testing.T, _cfg *models.Config) dtos.GenesResponseDTO { - - queryString := fmt.Sprintf("?chromosome=%s&assemblyId=%s", chromosome, assemblyId) - - url := fmt.Sprintf(GenesSearchPathWithQueryString, _cfg.Api.Url, queryString) - - return getGetGenesCall(url, _t) -} - -func getGetGenesCall(url string, _t *testing.T) dtos.GenesResponseDTO { - fmt.Printf("Calling %s\n", url) - request, _ := http.NewRequest("GET", url, nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(_t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(_t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET %s Status: %s ; Should be %d", url, response.Status, shouldBe)) - - // -- interpret array of ingestion requests from response - respBody, respBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(_t, respBodyErr) - - // --- transform body bytes to string - respBodyString := string(respBody) - - // -- convert to json and check for error - var respDto dtos.GenesResponseDTO - jsonUnmarshallingError := json.Unmarshal([]byte(respBodyString), &respDto) - assert.Nil(_t, jsonUnmarshallingError) - - return respDto -} From 2fd22ea7bfcac614f29cc4d04cb86cab0256c83e Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 19 Jun 2023 11:17:37 -0400 Subject: [PATCH 43/84] patch: small pause, triggering and checking ingest --- src/api/tests/build/api/genes_test.go | 2 +- src/api/tests/build/api/variants_test.go | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/api/tests/build/api/genes_test.go b/src/api/tests/build/api/genes_test.go index aac6ed90..9e018e7a 100644 --- a/src/api/tests/build/api/genes_test.go +++ b/src/api/tests/build/api/genes_test.go @@ -47,7 +47,7 @@ func TestGenesIngestion(t *testing.T) { // check ingestion request // TODO: avoid potential infinite loop for { - fmt.Println("Checking state of the ingestion..") + fmt.Println("Checking state of the genes ingestion..") // make the call ingReqsUrl := fmt.Sprintf(GenesIngestionRequestsPath, cfg.Api.Url) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index a5a623f6..40785666 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -84,10 +84,13 @@ func TestDemoVcfIngestion(t *testing.T) { initialIngestionDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingestUrl) assert.True(t, len(initialIngestionDtos) > 0) + // pause + time.Sleep(1 * time.Second) + // check ingestion request // TODO: avoid potential infinite loop for { - fmt.Println("Checking state of the ingestion..") + fmt.Println("Checking state of the variants ingestion..") // make the call ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) From 3d87c26fea726a4cc7802b38bade8e013c1488ee Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 3 Jul 2023 17:02:42 -0400 Subject: [PATCH 44/84] chore: test refactoring - var ingestion requests --- src/api/tests/build/api/variants_test.go | 7 ++++ .../tests/integration/api/api_variant_test.go | 33 ------------------- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 40785666..0cca3fe8 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -34,6 +34,13 @@ func TestDemoVcfIngestion(t *testing.T) { tableId := uuid.NewString() t.Run("Ingest Demo VCF", func(t *testing.T) { + // verify ingestion endpoint + // -- ensure nothing is running + initialIngestionState := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](fmt.Sprintf(common.IngestionRequestsPath, cfg.Api.Url)) + + assert.NotNil(t, len(initialIngestionState)) + assert.NotZero(t, len(initialIngestionState)) + // create demo vcf string sampleId := "abc1234" diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index 4f00b9b2..387ec01b 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -1,7 +1,6 @@ package api import ( - "encoding/json" "fmt" c "gohan/api/models/constants" a "gohan/api/models/constants/assembly-id" @@ -12,7 +11,6 @@ import ( common "gohan/api/tests/common" testConsts "gohan/api/tests/common/constants" ratt "gohan/api/tests/common/constants/referenceAlternativeTestType" - "io/ioutil" "math/rand" "net/http" "strings" @@ -55,37 +53,6 @@ func TestVariantsOverview(t *testing.T) { assert.NotNil(t, overviewJson) } -func TestGetIngestionRequests(t *testing.T) { - cfg := common.InitConfig() - - request, _ := http.NewRequest("GET", fmt.Sprintf(common.IngestionRequestsPath, cfg.Api.Url), nil) - - client := &http.Client{} - response, responseErr := client.Do(request) - assert.Nil(t, responseErr) - - defer response.Body.Close() - - // this test (at the time of writing) will only work if authorization is disabled - shouldBe := 200 - assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET / Status: %s ; Should be %d", response.Status, shouldBe)) - - // -- interpret array of ingestion requests from response - ingestionRequestsRespBody, ingestionRequestsRespBodyErr := ioutil.ReadAll(response.Body) - assert.Nil(t, ingestionRequestsRespBodyErr) - - // --- transform body bytes to string - ingestionRequestsRespBodyString := string(ingestionRequestsRespBody) - - // -- check for json error - var ingestionRequestsRespJsonSlice []map[string]interface{} - ingestionRequestsStringJsonUnmarshallingError := json.Unmarshal([]byte(ingestionRequestsRespBodyString), &ingestionRequestsRespJsonSlice) - assert.Nil(t, ingestionRequestsStringJsonUnmarshallingError) - - // -- ensure the response is not nil - assert.NotNil(t, len(ingestionRequestsRespJsonSlice)) -} - func TestCanGetReferenceSamples(t *testing.T) { // trigger runAndValidateGenotypeQueryResults(t, gq.REFERENCE, validateReferenceSample) From acb2a5320469c5e482db1a0b928f272af56cf23e Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 3 Jul 2023 18:12:05 -0400 Subject: [PATCH 45/84] patch: genes test touchup --- src/api/tests/build/api/genes_test.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/api/tests/build/api/genes_test.go b/src/api/tests/build/api/genes_test.go index 9e018e7a..b0e56f73 100644 --- a/src/api/tests/build/api/genes_test.go +++ b/src/api/tests/build/api/genes_test.go @@ -44,10 +44,14 @@ func TestGenesIngestion(t *testing.T) { initialIngestionDtos := utils.GetRequestReturnStuff[ingest.GeneIngestRequest](ingestUrl) assert.True(t, len(initialIngestionDtos.Message) > 0) + // - rest + time.Sleep(1 * time.Second) + // check ingestion request // TODO: avoid potential infinite loop + counter := 0 for { - fmt.Println("Checking state of the genes ingestion..") + fmt.Printf("\rChecking state of the genes ingestion.. [%d]\n", counter) // make the call ingReqsUrl := fmt.Sprintf(GenesIngestionRequestsPath, cfg.Api.Url) @@ -71,12 +75,14 @@ func TestGenesIngestion(t *testing.T) { // pause time.Sleep(3 * time.Second) } + counter++ } // check ingestion stats // TODO: avoid potential infinite loop + counter = 0 for { - fmt.Println("Checking ingestion stats..") + fmt.Printf("\rChecking ingestion stats.. [%d]\n", counter) // pause time.Sleep(3 * time.Second) @@ -97,6 +103,7 @@ func TestGenesIngestion(t *testing.T) { // pause time.Sleep(3 * time.Second) + counter++ } }) @@ -230,10 +237,10 @@ func buildQueryAndMakeGetGenesCall(chromosome string, term string, assemblyId c. url := fmt.Sprintf(GenesSearchPathWithQueryString, _cfg.Api.Url, queryString) - return getGetGenesCall(url, _t) + return getGenesCall(url, _t) } -func getGetGenesCall(url string, _t *testing.T) dtos.GenesResponseDTO { +func getGenesCall(url string, _t *testing.T) dtos.GenesResponseDTO { fmt.Printf("Calling %s\n", url) request, _ := http.NewRequest("GET", url, nil) From d1022039e65f61054c6f766ed4bbe87bc43c32e1 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 3 Jul 2023 18:17:01 -0400 Subject: [PATCH 46/84] chore: more variants build test refactoring --- src/api/tests/build/api/variants_test.go | 216 +++++++++++- src/api/tests/common/common.go | 76 +++++ .../tests/integration/api/api_variant_test.go | 315 ------------------ 3 files changed, 287 insertions(+), 320 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 0cca3fe8..e131cd0f 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -8,14 +8,20 @@ import ( common "gohan/api/tests/common" "gohan/api/utils" "log" + "math/rand" "os" "os/exec" "path/filepath" + "sync" "testing" "time" + c "gohan/api/models/constants" + a "gohan/api/models/constants/assembly-id" gq "gohan/api/models/constants/genotype-query" s "gohan/api/models/constants/sort" + z "gohan/api/models/constants/zygosity" + ratt "gohan/api/tests/common/constants/referenceAlternativeTestType" "github.com/google/uuid" "github.com/stretchr/testify/assert" @@ -37,9 +43,7 @@ func TestDemoVcfIngestion(t *testing.T) { // verify ingestion endpoint // -- ensure nothing is running initialIngestionState := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](fmt.Sprintf(common.IngestionRequestsPath, cfg.Api.Url)) - assert.NotNil(t, len(initialIngestionState)) - assert.NotZero(t, len(initialIngestionState)) // create demo vcf string sampleId := "abc1234" @@ -96,8 +100,9 @@ func TestDemoVcfIngestion(t *testing.T) { // check ingestion request // TODO: avoid potential infinite loop + counter := 0 for { - fmt.Println("Checking state of the variants ingestion..") + fmt.Printf("\rChecking state of the variants ingestion.. [%d]\n", counter) // make the call ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) @@ -121,12 +126,14 @@ func TestDemoVcfIngestion(t *testing.T) { // pause time.Sleep(3 * time.Second) } + counter++ } // check ingestion stats // TODO: avoid potential infinite loop + counter = 0 for { - fmt.Println("Checking ingestion stats..") + fmt.Printf("\rChecking ingestion stats.. [%d]\n", counter) // pause time.Sleep(3 * time.Second) @@ -147,6 +154,7 @@ func TestDemoVcfIngestion(t *testing.T) { // pause time.Sleep(3 * time.Second) + counter++ } }) @@ -215,7 +223,6 @@ func TestDemoVcfIngestion(t *testing.T) { }) t.Run("Test No Variant Info Present", func(t *testing.T) { - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Undefined, gq.UNCALLED, "", "") // assert that all responses from all combinations have no results @@ -285,4 +292,203 @@ func TestDemoVcfIngestion(t *testing.T) { }) }) }) + validateReferenceSample := func(__t *testing.T, call *dtos.VariantCall) { + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Reference)) + } + + validateAlternateSample := func(__t *testing.T, call *dtos.VariantCall) { + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Alternate)) + } + + validateHeterozygousSample := func(__t *testing.T, call *dtos.VariantCall) { + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Heterozygous)) + } + + validateHomozygousReferenceSample := func(__t *testing.T, call *dtos.VariantCall) { + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousReference)) + } + + validateHomozygousAlternateSample := func(__t *testing.T, call *dtos.VariantCall) { + assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousAlternate)) + } + t.Run("Test Get Variants Samples", func(t *testing.T) { + + // Reference Samples + runAndValidateGenotypeQueryResults(t, gq.REFERENCE, validateReferenceSample) + + // Alternate Samples + runAndValidateGenotypeQueryResults(t, gq.ALTERNATE, validateAlternateSample) + + // HeterozygousSamples + runAndValidateGenotypeQueryResults(t, gq.HETEROZYGOUS, validateHeterozygousSample) + + // HomozygousReferenceSamples + runAndValidateGenotypeQueryResults(t, gq.HOMOZYGOUS_REFERENCE, validateHomozygousReferenceSample) + + // Homozygous Alternate Samples + runAndValidateGenotypeQueryResults(t, gq.HOMOZYGOUS_ALTERNATE, validateHomozygousAlternateSample) + }) + t.Run("Test Get Variants Samples with Specific Alleles", func(t *testing.T) { + // Homozygous Alternate Variants With Various References + specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { + // ensure test is formatted correctly + assert.True(__t, alternativeAllelePattern == "") + + // validate variant + assert.Contains(__t, call.Ref, referenceAllelePattern) + + validateHomozygousAlternateSample(__t, call) + } + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) + + // Homozygous Reference Variants With Various References + specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { + // ensure test is formatted correctly + assert.True(__t, alternativeAllelePattern == "") + + // validate variant + assert.Contains(__t, call.Ref, referenceAllelePattern) + + validateHomozygousReferenceSample(__t, call) + } + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) + + //Heterozygous Variants With Various References + specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { + // ensure test is formatted correctly + assert.True(__t, alternativeAllelePattern == "") + + // validate variant + assert.Contains(__t, call.Ref, referenceAllelePattern) + + validateHeterozygousSample(__t, call) + } + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Reference, specificValidation) + + // Homozygous Alternate Variants With Various Alternatives + specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { + // ensure test is formatted correctly + assert.True(__t, referenceAllelePattern == "") + + // validate variant + assert.Contains(__t, call.Alt, alternativeAllelePattern) + + validateHomozygousAlternateSample(__t, call) + } + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Alternative, specificValidation) + + // Homozygous Reference Variants With Various Alternatives + specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { + // ensure test is formatted correctly + assert.True(__t, referenceAllelePattern == "") + + // validate variant + assert.Contains(__t, call.Alt, alternativeAllelePattern) + + validateHomozygousReferenceSample(__t, call) + } + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Alternative, specificValidation) + + // Heterozygous Variants With Various Alternatives + specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { + // ensure test is formatted correctly + assert.True(__t, referenceAllelePattern == "") + + // validate variant + assert.Contains(__t, call.Alt, alternativeAllelePattern) + + validateHeterozygousSample(__t, call) + } + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) + }) +} + +func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, call *dtos.VariantCall)) { + + allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, "", "") + + // assert that all of the responses include heterozygous sample sets + // - * accumulate all samples into a single list using the set of SelectManyT's and the SelectT + // - ** iterate over each sample in the ForEachT + // var accumulatedSamples []*indexes.Sample + var accumulatedCalls []*dtos.VariantCall + + From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * + return From(resp.Results) + }).SelectManyT(func(data dtos.VariantGetResult) Query { + return From(data.Calls) + }).ForEachT(func(call dtos.VariantCall) { // ** + accumulatedCalls = append(accumulatedCalls, &call) + }) + + // if len(accumulatedCalls) == 0 { + // _t.Skip("No samples returned! Skipping --") + // } + + for _, c := range accumulatedCalls { + assert.NotEmpty(_t, c.SampleId) + assert.NotEmpty(_t, c.GenotypeType) + + specificValidation(_t, c) + } +} + +func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { + cfg := common.InitConfig() + + // retrieve the overview + overviewJson := common.GetVariantsOverview(_t, cfg) + + // ensure the response is valid + // TODO: error check instead of nil check + assert.NotNil(_t, overviewJson) + + // generate all possible combinations of + // available samples, assemblys, and chromosomes + overviewCombinations := common.GetOverviewResultCombinations(overviewJson["chromosomes"], overviewJson["sampleIDs"], overviewJson["assemblyIDs"]) + + // avoid overflow: + // - shuffle all combinations and take top x + x := 10 + croppedCombinations := make([][]string, len(overviewCombinations)) + perm := rand.Perm(len(overviewCombinations)) + for i, v := range perm { + croppedCombinations[v] = overviewCombinations[i] + } + if len(croppedCombinations) > x { + croppedCombinations = croppedCombinations[:x] + } + + // initialize a common slice in which to + // accumulate al responses asynchronously + allDtoResponses := []dtos.VariantGetReponse{} + allDtoResponsesMux := sync.RWMutex{} + + var combWg sync.WaitGroup + for _, combination := range croppedCombinations { + combWg.Add(1) + go func(_wg *sync.WaitGroup, _combination []string) { + defer _wg.Done() + + chrom := _combination[0] + sampleId := _combination[1] + assemblyId := a.CastToAssemblyId(_combination[2]) + + // make the call + dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + + assert.Equal(_t, 1, len(dto.Results)) + + // accumulate all response objects + // to a common slice in an + // asynchronous-safe manner + allDtoResponsesMux.Lock() + allDtoResponses = append(allDtoResponses, dto) + allDtoResponsesMux.Unlock() + }(&combWg, combination) + } + + combWg.Wait() + + return allDtoResponses } diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index 02171a98..fb5d99d1 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -8,6 +8,9 @@ import ( c "gohan/api/models/constants" a "gohan/api/models/constants/assembly-id" gq "gohan/api/models/constants/genotype-query" + s "gohan/api/models/constants/sort" + testConsts "gohan/api/tests/common/constants" + ratt "gohan/api/tests/common/constants/referenceAlternativeTestType" "gohan/api/models/dtos" "gohan/api/utils" @@ -22,6 +25,7 @@ import ( "sync" "testing" + . "github.com/ahmetb/go-linq" "github.com/stretchr/testify/assert" yaml "gopkg.in/yaml.v2" ) @@ -338,3 +342,75 @@ func makeGetVariantsCall(url string, ignoreStatusCode bool, _t *testing.T) dtos. return respDto } + +func GetOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct interface{}, assemblyIdsStruct interface{}) [][]string { + var allCombinations = [][]string{} + + for i, _ := range chromosomeStruct.(map[string]interface{}) { + for j, _ := range sampleIdsStruct.(map[string]interface{}) { + for k, _ := range assemblyIdsStruct.(map[string]interface{}) { + allCombinations = append(allCombinations, []string{i, j, k}) + } + } + } + + return allCombinations +} + +func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, + genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, + specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { + + // TODO: use some kind of Allele Enum + patterns := []string{"A", "C", "T", "G"} + var patWg sync.WaitGroup + for _, pat := range patterns { + patWg.Add(1) + go func(_pat string, _patWg *sync.WaitGroup) { + defer _patWg.Done() + + switch refAltTestType { + case ratt.Reference: + runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, _pat, "", specificValidation) + case ratt.Alternative: + runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, "", _pat, specificValidation) + default: + println("Skipping Test -- no Ref/Alt Test Type provided") + } + + }(pat, &patWg) + } + patWg.Wait() +} + +func runAndValidateReferenceOrAlternativeQueryResults(_t *testing.T, + genotypeQuery c.GenotypeQuery, + referenceAllelePattern string, alternativeAllelePattern string, + specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { + + allDtoResponses := GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) + + // assert that all of the responses include sample sets with the appropriate zygosity + // - * accumulate all variants into a single list using the set of SelectManyT's and the SelectT + // - ** iterate over each variant in the ForEachT + // var accumulatedVariants []*indexes.Variant + var accumulatedCalls []*dtos.VariantCall + + From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * + return From(resp.Results) + }).SelectManyT(func(data dtos.VariantGetResult) Query { + return From(data.Calls) + }).ForEachT(func(call dtos.VariantCall) { // ** + accumulatedCalls = append(accumulatedCalls, &call) + }) + + // if len(accumulatedCalls) == 0 { + // _t.Skip(fmt.Sprintf("No variants returned for patterns ref: '%s', alt: '%s'! Skipping --", referenceAllelePattern, alternativeAllelePattern)) + // } + + for _, v := range accumulatedCalls { + assert.NotNil(_t, v.Id) + specificValidation(_t, v, referenceAllelePattern, alternativeAllelePattern) + } + +} diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index 387ec01b..9f170978 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -2,23 +2,11 @@ package api import ( "fmt" - c "gohan/api/models/constants" - a "gohan/api/models/constants/assembly-id" - gq "gohan/api/models/constants/genotype-query" - s "gohan/api/models/constants/sort" - z "gohan/api/models/constants/zygosity" - "gohan/api/models/dtos" common "gohan/api/tests/common" - testConsts "gohan/api/tests/common/constants" - ratt "gohan/api/tests/common/constants/referenceAlternativeTestType" - "math/rand" "net/http" "strings" - "sync" "testing" - . "github.com/ahmetb/go-linq" - "github.com/stretchr/testify/assert" ) @@ -53,125 +41,6 @@ func TestVariantsOverview(t *testing.T) { assert.NotNil(t, overviewJson) } -func TestCanGetReferenceSamples(t *testing.T) { - // trigger - runAndValidateGenotypeQueryResults(t, gq.REFERENCE, validateReferenceSample) -} - -func TestCanGetAlternateSamples(t *testing.T) { - // trigger - runAndValidateGenotypeQueryResults(t, gq.ALTERNATE, validateAlternateSample) -} - -func TestCanGetHeterozygousSamples(t *testing.T) { - // trigger - runAndValidateGenotypeQueryResults(t, gq.HETEROZYGOUS, validateHeterozygousSample) -} - -func TestCanGetHomozygousReferenceSamples(t *testing.T) { - // trigger - runAndValidateGenotypeQueryResults(t, gq.HOMOZYGOUS_REFERENCE, validateHomozygousReferenceSample) -} - -func TestCanGetHomozygousAlternateSamples(t *testing.T) { - // trigger - runAndValidateGenotypeQueryResults(t, gq.HOMOZYGOUS_ALTERNATE, validateHomozygousAlternateSample) -} - -func TestCanGetHomozygousAlternateVariantsWithVariousReferences(t *testing.T) { - // setup - specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { - // ensure test is formatted correctly - assert.True(__t, alternativeAllelePattern == "") - - // validate variant - assert.Contains(__t, call.Ref, referenceAllelePattern) - - validateHomozygousAlternateSample(__t, call) - } - - executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) -} - -func TestCanGetHomozygousReferenceVariantsWithVariousReferences(t *testing.T) { - // setup - specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { - // ensure test is formatted correctly - assert.True(__t, alternativeAllelePattern == "") - - // validate variant - assert.Contains(__t, call.Ref, referenceAllelePattern) - - validateHomozygousReferenceSample(__t, call) - } - - executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) -} - -func TestCanGetHeterozygousVariantsWithVariousReferences(t *testing.T) { - // setup - specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { - // ensure test is formatted correctly - assert.True(__t, alternativeAllelePattern == "") - - // validate variant - assert.Contains(__t, call.Ref, referenceAllelePattern) - - validateHeterozygousSample(__t, call) - } - - // trigger - executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Reference, specificValidation) -} - -func TestCanGetHomozygousAlternateVariantsWithVariousAlternatives(t *testing.T) { - // setup - specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { - // ensure test is formatted correctly - assert.True(__t, referenceAllelePattern == "") - - // validate variant - assert.Contains(__t, call.Alt, alternativeAllelePattern) - - validateHomozygousAlternateSample(__t, call) - } - - // trigger - executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Alternative, specificValidation) -} - -func TestCanGetHomozygousReferenceVariantsWithVariousAlternatives(t *testing.T) { - // setup - specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { - // ensure test is formatted correctly - assert.True(__t, referenceAllelePattern == "") - - // validate variant - assert.Contains(__t, call.Alt, alternativeAllelePattern) - - validateHomozygousReferenceSample(__t, call) - } - - // trigger - executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Alternative, specificValidation) -} - -func TestCanGetHeterozygousVariantsWithVariousAlternatives(t *testing.T) { - // setup - specificValidation := func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { - // ensure test is formatted correctly - assert.True(__t, referenceAllelePattern == "") - - // validate variant - assert.Contains(__t, call.Alt, alternativeAllelePattern) - - validateHeterozygousSample(__t, call) - } - - // trigger - executeReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) -} - func TestCanGetVariantsWithWildcardAlternatives(t *testing.T) { cfg := common.InitConfig() allele := "ATTN" // example allele - TODO: render more sophisticated randomization @@ -333,187 +202,3 @@ func TestGetVariantsCanHandleInvalidWildcardAlleleQuery(t *testing.T) { } // -- Common utility functions for api tests -func executeReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, - genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, - specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { - - // TODO: use some kind of Allele Enum - patterns := []string{"A", "C", "T", "G"} - var patWg sync.WaitGroup - for _, pat := range patterns { - patWg.Add(1) - go func(_pat string, _patWg *sync.WaitGroup) { - defer _patWg.Done() - - switch refAltTestType { - case ratt.Reference: - runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, _pat, "", specificValidation) - case ratt.Alternative: - runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, "", _pat, specificValidation) - default: - println("Skipping Test -- no Ref/Alt Test Type provided") - } - - }(pat, &patWg) - } - patWg.Wait() -} - -func runAndValidateReferenceOrAlternativeQueryResults(_t *testing.T, - genotypeQuery c.GenotypeQuery, - referenceAllelePattern string, alternativeAllelePattern string, - specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { - - allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) - - // assert that all of the responses include sample sets with the appropriate zygosity - // - * accumulate all variants into a single list using the set of SelectManyT's and the SelectT - // - ** iterate over each variant in the ForEachT - // var accumulatedVariants []*indexes.Variant - var accumulatedCalls []*dtos.VariantCall - - From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * - return From(resp.Results) - }).SelectManyT(func(data dtos.VariantGetResult) Query { - return From(data.Calls) - }).ForEachT(func(call dtos.VariantCall) { // ** - accumulatedCalls = append(accumulatedCalls, &call) - }) - - if len(accumulatedCalls) == 0 { - _t.Skip(fmt.Sprintf("No variants returned for patterns ref: '%s', alt: '%s'! Skipping --", referenceAllelePattern, alternativeAllelePattern)) - } - - for _, v := range accumulatedCalls { - assert.NotNil(_t, v.Id) - specificValidation(_t, v, referenceAllelePattern, alternativeAllelePattern) - } - -} - -func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, call *dtos.VariantCall)) { - - allDtoResponses := getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, "", "") - - // assert that all of the responses include heterozygous sample sets - // - * accumulate all samples into a single list using the set of SelectManyT's and the SelectT - // - ** iterate over each sample in the ForEachT - // var accumulatedSamples []*indexes.Sample - var accumulatedCalls []*dtos.VariantCall - - From(allDtoResponses).SelectManyT(func(resp dtos.VariantGetReponse) Query { // * - return From(resp.Results) - }).SelectManyT(func(data dtos.VariantGetResult) Query { - return From(data.Calls) - }).ForEachT(func(call dtos.VariantCall) { // ** - accumulatedCalls = append(accumulatedCalls, &call) - }) - - if len(accumulatedCalls) == 0 { - _t.Skip("No samples returned! Skipping --") - } - - for _, c := range accumulatedCalls { - assert.NotEmpty(_t, c.SampleId) - assert.NotEmpty(_t, c.GenotypeType) - - specificValidation(_t, c) - } -} - -func getOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct interface{}, assemblyIdsStruct interface{}) [][]string { - var allCombinations = [][]string{} - - for i, _ := range chromosomeStruct.(map[string]interface{}) { - for j, _ := range sampleIdsStruct.(map[string]interface{}) { - for k, _ := range assemblyIdsStruct.(map[string]interface{}) { - allCombinations = append(allCombinations, []string{i, j, k}) - } - } - } - - return allCombinations -} - -func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { - cfg := common.InitConfig() - - // retrieve the overview - overviewJson := common.GetVariantsOverview(_t, cfg) - - // ensure the response is valid - // TODO: error check instead of nil check - assert.NotNil(_t, overviewJson) - - // generate all possible combinations of - // available samples, assemblys, and chromosomes - overviewCombinations := getOverviewResultCombinations(overviewJson["chromosomes"], overviewJson["sampleIDs"], overviewJson["assemblyIDs"]) - - // avoid overflow: - // - shuffle all combinations and take top x - x := 10 - croppedCombinations := make([][]string, len(overviewCombinations)) - perm := rand.Perm(len(overviewCombinations)) - for i, v := range perm { - croppedCombinations[v] = overviewCombinations[i] - } - if len(croppedCombinations) > x { - croppedCombinations = croppedCombinations[:x] - } - - // initialize a common slice in which to - // accumulate al responses asynchronously - allDtoResponses := []dtos.VariantGetReponse{} - allDtoResponsesMux := sync.RWMutex{} - - var combWg sync.WaitGroup - for _, combination := range croppedCombinations { - combWg.Add(1) - go func(_wg *sync.WaitGroup, _combination []string) { - defer _wg.Done() - - chrom := _combination[0] - sampleId := _combination[1] - assemblyId := a.CastToAssemblyId(_combination[2]) - - // make the call - dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) - - assert.Equal(_t, 1, len(dto.Results)) - - // accumulate all response objects - // to a common slice in an - // asynchronous-safe manner - allDtoResponsesMux.Lock() - allDtoResponses = append(allDtoResponses, dto) - allDtoResponsesMux.Unlock() - }(&combWg, combination) - } - - combWg.Wait() - - return allDtoResponses -} - -// --- sample validation -func validateReferenceSample(__t *testing.T, call *dtos.VariantCall) { - assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Reference)) -} - -func validateAlternateSample(__t *testing.T, call *dtos.VariantCall) { - assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Alternate)) -} - -func validateHeterozygousSample(__t *testing.T, call *dtos.VariantCall) { - assert.True(__t, call.GenotypeType == z.ZygosityToString(z.Heterozygous)) -} - -func validateHomozygousReferenceSample(__t *testing.T, call *dtos.VariantCall) { - assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousReference)) -} - -func validateHomozygousAlternateSample(__t *testing.T, call *dtos.VariantCall) { - assert.True(__t, call.GenotypeType == z.ZygosityToString(z.HomozygousAlternate)) -} - -// -- From edd300d2e132a849a28d62b3362545f65be0b275 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 3 Jul 2023 18:24:39 -0400 Subject: [PATCH 47/84] patch: checkup logging typos --- src/api/tests/build/api/genes_test.go | 10 +++++----- src/api/tests/build/api/variants_test.go | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/api/tests/build/api/genes_test.go b/src/api/tests/build/api/genes_test.go index b0e56f73..3379f00d 100644 --- a/src/api/tests/build/api/genes_test.go +++ b/src/api/tests/build/api/genes_test.go @@ -51,7 +51,7 @@ func TestGenesIngestion(t *testing.T) { // TODO: avoid potential infinite loop counter := 0 for { - fmt.Printf("\rChecking state of the genes ingestion.. [%d]\n", counter) + fmt.Printf("\rChecking state of the genes ingestion.. [%d]", counter) // make the call ingReqsUrl := fmt.Sprintf(GenesIngestionRequestsPath, cfg.Api.Url) @@ -69,7 +69,7 @@ func TestGenesIngestion(t *testing.T) { } } if numFilesDone == numFilesRunning { - fmt.Println("Done, moving on..") + fmt.Println("\nDone, moving on..") break } else { // pause @@ -82,7 +82,7 @@ func TestGenesIngestion(t *testing.T) { // TODO: avoid potential infinite loop counter = 0 for { - fmt.Printf("\rChecking ingestion stats.. [%d]\n", counter) + fmt.Printf("\rChecking ingestion stats.. [%d]", counter) // pause time.Sleep(3 * time.Second) @@ -94,11 +94,11 @@ func TestGenesIngestion(t *testing.T) { fmt.Println(stats.NumAdded) fmt.Println(stats.NumFlushed) if stats.NumAdded == stats.NumFlushed { - fmt.Println("Done, moving on..") + fmt.Println("\nDone, moving on..") break } if stats.NumFailed > 0 { - log.Fatal("More than one gene failed to flush") + log.Fatal("\nMore than one gene failed to flush") } // pause diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index e131cd0f..0969452f 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -102,7 +102,7 @@ func TestDemoVcfIngestion(t *testing.T) { // TODO: avoid potential infinite loop counter := 0 for { - fmt.Printf("\rChecking state of the variants ingestion.. [%d]\n", counter) + fmt.Printf("\rChecking state of the variants ingestion.. [%d]", counter) // make the call ingReqsUrl := fmt.Sprintf("%s/variants/ingestion/requests", cfg.Api.Url) @@ -120,7 +120,7 @@ func TestDemoVcfIngestion(t *testing.T) { } } if foundDone { - fmt.Println("Done, moving on..") + fmt.Println("\nDone, moving on..") break } else { // pause @@ -133,7 +133,7 @@ func TestDemoVcfIngestion(t *testing.T) { // TODO: avoid potential infinite loop counter = 0 for { - fmt.Printf("\rChecking ingestion stats.. [%d]\n", counter) + fmt.Printf("\rChecking ingestion stats.. [%d]", counter) // pause time.Sleep(3 * time.Second) @@ -145,11 +145,11 @@ func TestDemoVcfIngestion(t *testing.T) { fmt.Println(stats.NumAdded) fmt.Println(stats.NumFlushed) if stats.NumAdded == stats.NumFlushed { - fmt.Println("Done, moving on..") + fmt.Println("\nDone, moving on..") break } if stats.NumFailed > 0 { - log.Fatal("More than one variant failed to flush") + log.Fatal("\nMore than one variant failed to flush") } // pause From 7f1cb5276d2ad4bd2ef979abe17f6550fe46f6c9 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 3 Jul 2023 18:47:19 -0400 Subject: [PATCH 48/84] chore: variants wildcard tests refactor --- src/api/tests/build/api/variants_test.go | 156 ++++++++++++++++ .../tests/integration/api/api_variant_test.go | 168 ------------------ 2 files changed, 156 insertions(+), 168 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 0969452f..9357085b 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -12,6 +12,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "sync" "testing" "time" @@ -401,6 +402,161 @@ func TestDemoVcfIngestion(t *testing.T) { } common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) }) + + t.Run("Test Can Get Variants With Wildcard Alternatives", func(t *testing.T) { + allele := "ATTN" // example allele - TODO: render more sophisticated randomization + // TODO: improve variant call testing from being 1 call to many random ones + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) + for _, dto := range dtos.Results { + for _, call := range dto.Calls { + // ensure, for each call, that at least + // 1 of the alt's present matches the allele + // queried for + allNonWildcardCharactersMatch := true + // iterate over all 'alt's in the call + for _, alt := range call.Alt { + // iterate over all characters for each alt + for altIndex, altChar := range alt { + // ensure the index is within bounds (length of the allele) + // 'alt's are slices of strings, and not all 'alt's in these slices need to match + if altIndex <= len(allele) { + // obtain the character at the index for the iteration + alleleChar := []rune(allele)[altIndex] + if string(alleleChar) != "N" && altChar != alleleChar { + // if the non-wildcard characters don't match, test fails + allNonWildcardCharactersMatch = false + break + } + } + } + if !allNonWildcardCharactersMatch { + break + } + } + assert.True(t, allNonWildcardCharactersMatch) + } + } + }) + + t.Run("Test Can Get Variants With Wildcard References", func(t *testing.T) { + allele := "ATTN" // example allele - TODO: render more sophisticated randomization + // TODO: improve variant call testing from being 1 call to many random ones + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) + for _, dto := range dtos.Results { + for _, call := range dto.Calls { + // ensure, for each call, that at least + // 1 of the ref's present matches the allele + // queried for + allNonWildcardCharactersMatch := true + // iterate over all 'ref's in the call + for _, ref := range call.Ref { + // iterate over all characters for each ref + for refIndex, refChar := range ref { + // ensure the index is within bounds (length of the allele) + // 'ref's are slices of strings, and not all 'ref's in these slices need to match + if refIndex <= len(allele) { + // obtain the character at the index for the iteration + alleleChar := []rune(allele)[refIndex] + if string(alleleChar) != "N" && refChar != alleleChar { + // if the non-wildcard characters don't match, test fails + allNonWildcardCharactersMatch = false + break + } + } + } + if !allNonWildcardCharactersMatch { + break + } + } + assert.True(t, allNonWildcardCharactersMatch) + } + } + }) + t.Run("Test Can Get Variants With Wildcard Alleles", func(t *testing.T) { + // iterate over all 'allele's queried for + qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths + for _, qAllele := range qAlleles { + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) + for _, dto := range dtos.Results { + fmt.Printf("Got %d calls from allele query %s \n", len(dto.Calls), qAllele) + if len(dto.Calls) == 0 { + continue + } + + for _, call := range dto.Calls { + // ensure, for each call, that at least + // 1 of the alleles present matches one of + // the alleles queried for + wildcardCharactersMatch := false + + // - iterate over all 'allele's in the call + for _, allele := range call.Alleles { + if len(qAllele) == len(allele) { + wildcardCharactersMatch = true + break + } + } + + assert.True(t, wildcardCharactersMatch) + } + } + } + }) + + t.Run("Test Can Get Variants With Wildcard Allele Pairs", func(t *testing.T) { + // wildcard allele pairs of different lengths + qAllelePairs := [][]string{ + {"N", "N"}, + {"N", "NN"}, + {"NN", "N"}, + {"N", "NNN"}, + {"NNN", "N"}} + + // iterate over all 'allele pairs' + for _, qAllelePair := range qAllelePairs { + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) + for _, dto := range dtos.Results { + if len(dto.Calls) == 0 { + continue + } + + for _, call := range dto.Calls { + // ensure, for each call, that the length + // of both alleles in the pair match either + // wildcard query allele-pair lengths + bothAllelesMatchesEitherQueriedAllele := (len(qAllelePair[0]) == len(call.Alleles[0]) && len(qAllelePair[1]) == len(call.Alleles[1])) || + (len(qAllelePair[1]) == len(call.Alleles[1]) && len(qAllelePair[0]) == len(call.Alleles[0])) || + (len(qAllelePair[0]) == len(call.Alleles[1]) && len(qAllelePair[1]) == len(call.Alleles[0])) || + (len(qAllelePair[1]) == len(call.Alleles[0]) && len(qAllelePair[0]) == len(call.Alleles[1])) + + if !bothAllelesMatchesEitherQueriedAllele { + fmt.Print(qAllelePair, call.Alleles) + } + + assert.True(t, bothAllelesMatchesEitherQueriedAllele) + } + } + } + }) + + t.Run("Tes Get Variants Can Handle Invalid Wildcard Allele Query", func(t *testing.T) { + // iterate over all 'allele's queried for + qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths + for i, _ := range qAlleles { + if i <= 2 { + continue + } // skip valid calls + + limitedAlleles := strings.Join(qAlleles[:i], ",") + invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) + + // make sure only an error was returned + assert.True(t, invalidReqResObj.Status == 400) + assert.True(t, len(invalidReqResObj.Message) != 0) + assert.True(t, len(invalidReqResObj.Results) == 0) + } + }) + } func runAndValidateGenotypeQueryResults(_t *testing.T, genotypeQuery c.GenotypeQuery, specificValidation func(__t *testing.T, call *dtos.VariantCall)) { diff --git a/src/api/tests/integration/api/api_variant_test.go b/src/api/tests/integration/api/api_variant_test.go index 9f170978..194f7241 100644 --- a/src/api/tests/integration/api/api_variant_test.go +++ b/src/api/tests/integration/api/api_variant_test.go @@ -4,7 +4,6 @@ import ( "fmt" common "gohan/api/tests/common" "net/http" - "strings" "testing" "github.com/stretchr/testify/assert" @@ -34,171 +33,4 @@ func TestWithInvalidAuthenticationToken(t *testing.T) { assert.Equal(t, shouldBe, response.StatusCode, fmt.Sprintf("Error -- Api GET / Status: %s ; Should be %d", response.Status, shouldBe)) } -func TestVariantsOverview(t *testing.T) { - cfg := common.InitConfig() - - overviewJson := common.GetVariantsOverview(t, cfg) - assert.NotNil(t, overviewJson) -} - -func TestCanGetVariantsWithWildcardAlternatives(t *testing.T) { - cfg := common.InitConfig() - allele := "ATTN" // example allele - TODO: render more sophisticated randomization - // TODO: improve variant call testing from being 1 call to many random ones - dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) - for _, dto := range dtos.Results { - for _, call := range dto.Calls { - // ensure, for each call, that at least - // 1 of the alt's present matches the allele - // queried for - allNonWildcardCharactersMatch := true - // iterate over all 'alt's in the call - for _, alt := range call.Alt { - // iterate over all characters for each alt - for altIndex, altChar := range alt { - // ensure the index is within bounds (length of the allele) - // 'alt's are slices of strings, and not all 'alt's in these slices need to match - if altIndex <= len(allele) { - // obtain the character at the index for the iteration - alleleChar := []rune(allele)[altIndex] - if string(alleleChar) != "N" && altChar != alleleChar { - // if the non-wildcard characters don't match, test fails - allNonWildcardCharactersMatch = false - break - } - } - } - if !allNonWildcardCharactersMatch { - break - } - } - assert.True(t, allNonWildcardCharactersMatch) - } - } - -} -func TestCanGetVariantsWithWildcardReferences(t *testing.T) { - cfg := common.InitConfig() - allele := "ATTN" // example allele - TODO: render more sophisticated randomization - // TODO: improve variant call testing from being 1 call to many random ones - dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) - for _, dto := range dtos.Results { - for _, call := range dto.Calls { - // ensure, for each call, that at least - // 1 of the ref's present matches the allele - // queried for - allNonWildcardCharactersMatch := true - // iterate over all 'ref's in the call - for _, ref := range call.Ref { - // iterate over all characters for each ref - for refIndex, refChar := range ref { - // ensure the index is within bounds (length of the allele) - // 'ref's are slices of strings, and not all 'ref's in these slices need to match - if refIndex <= len(allele) { - // obtain the character at the index for the iteration - alleleChar := []rune(allele)[refIndex] - if string(alleleChar) != "N" && refChar != alleleChar { - // if the non-wildcard characters don't match, test fails - allNonWildcardCharactersMatch = false - break - } - } - } - if !allNonWildcardCharactersMatch { - break - } - } - assert.True(t, allNonWildcardCharactersMatch) - } - } -} - -func TestCanGetVariantsWithWildcardAlleles(t *testing.T) { - cfg := common.InitConfig() - // iterate over all 'allele's queried for - qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths - for _, qAllele := range qAlleles { - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) - for _, dto := range dtos.Results { - fmt.Printf("Got %d calls from allele query %s \n", len(dto.Calls), qAllele) - if len(dto.Calls) == 0 { - continue - } - - for _, call := range dto.Calls { - // ensure, for each call, that at least - // 1 of the alleles present matches one of - // the alleles queried for - wildcardCharactersMatch := false - - // - iterate over all 'allele's in the call - for _, allele := range call.Alleles { - if len(qAllele) == len(allele) { - wildcardCharactersMatch = true - break - } - } - - assert.True(t, wildcardCharactersMatch) - } - } - } -} -func TestCanGetVariantsWithWildcardAllelePairs(t *testing.T) { - cfg := common.InitConfig() - - // wildcard allele pairs of different lengths - qAllelePairs := [][]string{ - {"N", "N"}, - {"N", "NN"}, - {"NN", "N"}, - {"N", "NNN"}, - {"NNN", "N"}} - - // iterate over all 'allele pairs' - for _, qAllelePair := range qAllelePairs { - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) - for _, dto := range dtos.Results { - if len(dto.Calls) == 0 { - continue - } - - for _, call := range dto.Calls { - // ensure, for each call, that the length - // of both alleles in the pair match either - // wildcard query allele-pair lengths - bothAllelesMatchesEitherQueriedAllele := (len(qAllelePair[0]) == len(call.Alleles[0]) && len(qAllelePair[1]) == len(call.Alleles[1])) || - (len(qAllelePair[1]) == len(call.Alleles[1]) && len(qAllelePair[0]) == len(call.Alleles[0])) || - (len(qAllelePair[0]) == len(call.Alleles[1]) && len(qAllelePair[1]) == len(call.Alleles[0])) || - (len(qAllelePair[1]) == len(call.Alleles[0]) && len(qAllelePair[0]) == len(call.Alleles[1])) - - if !bothAllelesMatchesEitherQueriedAllele { - fmt.Print(qAllelePair, call.Alleles) - } - - assert.True(t, bothAllelesMatchesEitherQueriedAllele) - } - } - } -} - -func TestGetVariantsCanHandleInvalidWildcardAlleleQuery(t *testing.T) { - cfg := common.InitConfig() - // iterate over all 'allele's queried for - qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths - for i, _ := range qAlleles { - if i <= 2 { - continue - } // skip valid calls - - limitedAlleles := strings.Join(qAlleles[:i], ",") - invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) - - // make sure only an error was returned - assert.True(t, invalidReqResObj.Status == 400) - assert.True(t, len(invalidReqResObj.Message) != 0) - assert.True(t, len(invalidReqResObj.Results) == 0) - } -} - // -- Common utility functions for api tests From 277347777e7ba588c67143d4fb438fdf469b39e7 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Fri, 7 Jul 2023 02:25:06 -0400 Subject: [PATCH 49/84] patch: tableId dataset typos --- src/api/mvc/variants/main.go | 2 +- src/api/tests/build/api/variants_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 53cd9cba..b9f8c4a4 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -484,7 +484,7 @@ func GetDatasetSummary(c echo.Context) error { // request #2 g.Go(func() error { - // obtain number of samples associated with this tableId + // obtain number of samples associated with this dataset resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) if bucketsError != nil { fmt.Printf("Failed to bucket dataset %s variants\n", dataset) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 9357085b..d72413f3 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -38,7 +38,7 @@ const ( func TestDemoVcfIngestion(t *testing.T) { cfg := common.InitConfig() - tableId := uuid.NewString() + dataset := uuid.NewString() t.Run("Ingest Demo VCF", func(t *testing.T) { // verify ingestion endpoint @@ -90,7 +90,7 @@ func TestDemoVcfIngestion(t *testing.T) { assemblyId := "GRCh38" containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) - queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&tableId=%s", assemblyId, containerizedVcfFilePath, tableId) + queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&dataset=%s", assemblyId, containerizedVcfFilePath, dataset) ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) initialIngestionDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingestUrl) From f4ed1ddcf89b19ce727a7ad33737018000a0c5eb Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Fri, 7 Jul 2023 03:10:11 -0400 Subject: [PATCH 50/84] patch|test: dataset uuid ingestion patch and test --- src/api/mvc/variants/main.go | 17 ++++++----------- src/api/services/ingestion.go | 2 +- src/api/tests/build/api/variants_test.go | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index b9f8c4a4..e9779ded 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -449,16 +449,11 @@ func GetAllVariantIngestionRequests(c echo.Context) error { func GetDatasetSummary(c echo.Context) error { fmt.Printf("[%s] - GetDatasetSummary hit!\n", time.Now()) - cfg := c.(*contexts.GohanContext).Config - es := c.(*contexts.GohanContext).Es7Client - // obtain dataset from the path - dataset := c.Param("dataset") + gc := c.(*contexts.GohanContext) + cfg := gc.Config + es := gc.Es7Client - // dataset must be provided - if dataset == "" { - fmt.Println("Missing dataset") - return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest("Missing dataset - please try again")) - } + dataset := gc.Dataset // parallelize these two es queries @@ -472,7 +467,7 @@ func GetDatasetSummary(c echo.Context) error { docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, "*", 0, 0, "", "", // note : both variantId and sampleId are deliberately set to "" - "", "", []string{}, "", "", dataset) + "", "", []string{}, "", "", dataset.String()) if countError != nil { fmt.Printf("Failed to count variants in dataset %s\n", dataset) return countError @@ -485,7 +480,7 @@ func GetDatasetSummary(c echo.Context) error { // request #2 g.Go(func() error { // obtain number of samples associated with this dataset - resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset) + resultingBuckets, bucketsError := esRepo.GetVariantsBucketsByKeywordAndDataset(cfg, es, "sample.id.keyword", dataset.String()) if bucketsError != nil { fmt.Printf("Failed to bucket dataset %s variants\n", dataset) return bucketsError diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index c74fb28b..40e434ec 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -420,7 +420,7 @@ func (i *IngestionService) ProcessVcf( tmpVariant["fileId"] = drsFileId tmpVariant["assemblyId"] = assemblyId - tmpVariant["dataset"] = dataset + tmpVariant["dataset"] = dataset.String() // skip this call if need be skipThisCall := false diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index d72413f3..83f35b8d 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -164,6 +164,22 @@ func TestDemoVcfIngestion(t *testing.T) { // check variants overview overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) + + // check datasets + assert.NotNil(t, overviewJson["datasets"]) + assert.NotNil(t, overviewJson["datasets"].(map[string]interface{})) + + datasets := overviewJson["datasets"].(map[string]interface{}) + assert.NotZero(t, len(datasets)) + for k, v := range datasets { + key := k + value := v.(float64) + assert.NotNil(t, key) + assert.NotNil(t, value) + assert.NotEmpty(t, key) + assert.NotEmpty(t, value) + assert.Greater(t, value, 0.0) + } }) t.Run("Test Simple Chromosome Queries", func(t *testing.T) { From 82939ffdb0cf0b4c4bf971d2d87bb2c46efe15e0 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Fri, 7 Jul 2023 03:24:58 -0400 Subject: [PATCH 51/84] test: variant overview content check --- src/api/tests/build/api/variants_test.go | 30 +++++++++++++----------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 83f35b8d..59c2ccf0 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -165,20 +165,22 @@ func TestDemoVcfIngestion(t *testing.T) { overviewJson := common.GetVariantsOverview(t, cfg) assert.NotNil(t, overviewJson) - // check datasets - assert.NotNil(t, overviewJson["datasets"]) - assert.NotNil(t, overviewJson["datasets"].(map[string]interface{})) - - datasets := overviewJson["datasets"].(map[string]interface{}) - assert.NotZero(t, len(datasets)) - for k, v := range datasets { - key := k - value := v.(float64) - assert.NotNil(t, key) - assert.NotNil(t, value) - assert.NotEmpty(t, key) - assert.NotEmpty(t, value) - assert.Greater(t, value, 0.0) + // verify variant overview content + for oK, oV := range overviewJson { + assert.NotNil(t, oV) + + assert.NotNil(t, overviewJson[oK]) + assert.NotNil(t, overviewJson[oK].(map[string]interface{})) + + for k, v := range oV.(map[string]interface{}) { + key := k + assert.NotNil(t, v) + value := v.(float64) + assert.NotNil(t, key) + assert.NotEmpty(t, key) + assert.NotEmpty(t, value) + assert.NotZero(t, value) + } } }) From 234008220d184d7f7c21c2e762da5eb3e6ff15d9 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 10 Jul 2023 17:16:10 -0400 Subject: [PATCH 52/84] chore: dataset query test coverage --- src/api/tests/build/api/variants_test.go | 55 ++++++++++++++---------- src/api/tests/common/common.go | 25 ++++++----- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 59c2ccf0..17c004c4 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -38,7 +38,7 @@ const ( func TestDemoVcfIngestion(t *testing.T) { cfg := common.InitConfig() - dataset := uuid.NewString() + dataset := uuid.New() t.Run("Ingest Demo VCF", func(t *testing.T) { // verify ingestion endpoint @@ -90,7 +90,7 @@ func TestDemoVcfIngestion(t *testing.T) { assemblyId := "GRCh38" containerizedVcfFilePath := "/data/" + filepath.Base(newGzFile) - queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&dataset=%s", assemblyId, containerizedVcfFilePath, dataset) + queryString := fmt.Sprintf("assemblyId=%s&fileNames=%s&dataset=%s", assemblyId, containerizedVcfFilePath, dataset.String()) ingestUrl := fmt.Sprintf("%s/variants/ingestion/run?%s", cfg.Api.Url, queryString) initialIngestionDtos := utils.GetRequestReturnStuff[[]ingest.IngestResponseDTO](ingestUrl) @@ -182,21 +182,30 @@ func TestDemoVcfIngestion(t *testing.T) { assert.NotZero(t, value) } } + fmt.Println(overviewJson) }) t.Run("Test Simple Chromosome Queries", func(t *testing.T) { // simple chromosome-1 query - chromQueryResponse := common.BuildQueryAndMakeGetVariantsCall("1", "*", true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + chromQueryResponse := common.BuildQueryAndMakeGetVariantsCall("1", "*", dataset, true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(chromQueryResponse.Results) > 0) assert.True(t, len(chromQueryResponse.Results[0].Calls) > 0) }) + t.Run("Test Query by Dataset", func(t *testing.T) { + // simple query by dataset using the id generated above and ingested with + byDatsetQueryResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(byDatsetQueryResponse.Results) > 0) + assert.True(t, len(byDatsetQueryResponse.Results[0].Calls) > 0) + }) + t.Run("Test Simple Allele Queries", func(t *testing.T) { // TODO: not hardcoded tests // simple allele queries - common.GetAndVerifyVariantsResults(cfg, t, "CAG") - common.GetAndVerifyVariantsResults(cfg, t, "CAAAA") - common.GetAndVerifyVariantsResults(cfg, t, "T") - common.GetAndVerifyVariantsResults(cfg, t, "C") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "CAG") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "CAAAA") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "T") + common.GetAndVerifyVariantsResults(cfg, t, dataset, "C") // random number between 1 and 5 // allelleLen := rand.Intn(5) + 1 @@ -206,7 +215,7 @@ func TestDemoVcfIngestion(t *testing.T) { }) t.Run("Test Variant Info Present", func(t *testing.T) { - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, true, s.Undefined, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, true, s.Undefined, gq.UNCALLED, "", "") // assert that all of the responses include valid sets of info // - * accumulate all infos into a single list using the set of @@ -242,7 +251,7 @@ func TestDemoVcfIngestion(t *testing.T) { }) t.Run("Test No Variant Info Present", func(t *testing.T) { - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Undefined, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, false, s.Undefined, gq.UNCALLED, "", "") // assert that all responses from all combinations have no results for _, dtoResponse := range allDtoResponses { @@ -257,7 +266,7 @@ func TestDemoVcfIngestion(t *testing.T) { t.Run("Test Get Variants in Ascending Order", func(t *testing.T) { // retrieve responses in ascending order - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Ascending, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, false, s.Ascending, gq.UNCALLED, "", "") // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) @@ -286,7 +295,7 @@ func TestDemoVcfIngestion(t *testing.T) { t.Run("Test Get Variants in Descending Order", func(t *testing.T) { // retrieve responses in descending order - allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, false, s.Descending, gq.UNCALLED, "", "") + allDtoResponses := common.GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(t, dataset, false, s.Descending, gq.UNCALLED, "", "") // assert the dto response slice is plentiful assert.NotNil(t, allDtoResponses) @@ -358,7 +367,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousAlternateSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_ALTERNATE, ratt.Reference, specificValidation) // Homozygous Reference Variants With Various References specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -370,7 +379,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousReferenceSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_REFERENCE, ratt.Reference, specificValidation) //Heterozygous Variants With Various References specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -382,7 +391,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHeterozygousSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Reference, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HETEROZYGOUS, ratt.Reference, specificValidation) // Homozygous Alternate Variants With Various Alternatives specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -394,7 +403,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousAlternateSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_ALTERNATE, ratt.Alternative, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_ALTERNATE, ratt.Alternative, specificValidation) // Homozygous Reference Variants With Various Alternatives specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -406,7 +415,7 @@ func TestDemoVcfIngestion(t *testing.T) { validateHomozygousReferenceSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HOMOZYGOUS_REFERENCE, ratt.Alternative, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HOMOZYGOUS_REFERENCE, ratt.Alternative, specificValidation) // Heterozygous Variants With Various Alternatives specificValidation = func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string) { @@ -418,13 +427,13 @@ func TestDemoVcfIngestion(t *testing.T) { validateHeterozygousSample(__t, call) } - common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) + common.ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(t, dataset, gq.HETEROZYGOUS, ratt.Alternative, specificValidation) }) t.Run("Test Can Get Variants With Wildcard Alternatives", func(t *testing.T) { allele := "ATTN" // example allele - TODO: render more sophisticated randomization // TODO: improve variant call testing from being 1 call to many random ones - dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", dataset, true, "asc", "HETEROZYGOUS", "GRCh37", "", allele, "", false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -459,7 +468,7 @@ func TestDemoVcfIngestion(t *testing.T) { t.Run("Test Can Get Variants With Wildcard References", func(t *testing.T) { allele := "ATTN" // example allele - TODO: render more sophisticated randomization // TODO: improve variant call testing from being 1 call to many random ones - dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("14", "*", dataset, true, "asc", "HETEROZYGOUS", "GRCh37", allele, "", "", false, t, cfg) for _, dto := range dtos.Results { for _, call := range dto.Calls { // ensure, for each call, that at least @@ -494,7 +503,7 @@ func TestDemoVcfIngestion(t *testing.T) { // iterate over all 'allele's queried for qAlleles := []string{"N", "NN", "NNN", "NNNN", "NNNNN"} // wildcard alleles of different lengths for _, qAllele := range qAlleles { - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", qAllele, false, t, cfg) for _, dto := range dtos.Results { fmt.Printf("Got %d calls from allele query %s \n", len(dto.Calls), qAllele) if len(dto.Calls) == 0 { @@ -532,7 +541,7 @@ func TestDemoVcfIngestion(t *testing.T) { // iterate over all 'allele pairs' for _, qAllelePair := range qAllelePairs { - dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) + dtos := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", strings.Join(qAllelePair, ","), false, t, cfg) for _, dto := range dtos.Results { if len(dto.Calls) == 0 { continue @@ -566,7 +575,7 @@ func TestDemoVcfIngestion(t *testing.T) { } // skip valid calls limitedAlleles := strings.Join(qAlleles[:i], ",") - invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) + invalidReqResObj := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", limitedAlleles, true, t, cfg) // make sure only an error was returned assert.True(t, invalidReqResObj.Status == 400) @@ -649,7 +658,7 @@ func getAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc assemblyId := a.CastToAssemblyId(_combination[2]) // make the call - dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + dto := common.BuildQueryAndMakeGetVariantsCall(chrom, sampleId, uuid.Nil, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) assert.Equal(_t, 1, len(dto.Results)) diff --git a/src/api/tests/common/common.go b/src/api/tests/common/common.go index fb5d99d1..0e14c118 100644 --- a/src/api/tests/common/common.go +++ b/src/api/tests/common/common.go @@ -26,6 +26,7 @@ import ( "testing" . "github.com/ahmetb/go-linq" + "github.com/google/uuid" "github.com/stretchr/testify/assert" yaml "gopkg.in/yaml.v2" ) @@ -160,8 +161,8 @@ func CreateAndGetNewFile(filePath string) (*os.File, error) { return newFile, newFileErr } -func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, qAllele string) { - responseDtos := BuildQueryAndMakeGetVariantsCall("", "*", true, "asc", "", "GRCh38", "", "", qAllele, false, _t, _cfg) +func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, dataset uuid.UUID, qAllele string) { + responseDtos := BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", qAllele, false, _t, _cfg) assert.NotNil(_t, responseDtos.Results) assert.True(_t, len(responseDtos.Results) > 0) @@ -209,7 +210,7 @@ func GetAndVerifyVariantsResults(_cfg *models.Config, _t *testing.T, qAllele str } func BuildQueryAndMakeGetVariantsCall( - chromosome string, sampleId string, includeInfo bool, + chromosome string, sampleId string, dataset uuid.UUID, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, assemblyId c.AssemblyId, referenceAllelePattern string, alternativeAllelePattern string, commaDeliminatedAlleles string, ignoreStatusCode bool, _t *testing.T, _cfg *models.Config) dtos.VariantGetReponse { @@ -220,6 +221,10 @@ func BuildQueryAndMakeGetVariantsCall( queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&chromosome=%s", chromosome)) } + if dataset != uuid.Nil && dataset.String() != "" { + queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&dataset=%s", dataset.String())) + } + if genotype != gq.UNCALLED { queryString = fmt.Sprintf("%s%s", queryString, fmt.Sprintf("&genotype=%s", string(genotype))) } @@ -238,7 +243,7 @@ func BuildQueryAndMakeGetVariantsCall( return makeGetVariantsCall(url, ignoreStatusCode, _t) } -func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { +func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, dataset uuid.UUID, includeInfo bool, sortByPosition c.SortDirection, genotype c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string) []dtos.VariantGetReponse { cfg := InitConfig() // retrieve the overview @@ -280,7 +285,7 @@ func GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t *testing.T, inc assemblyId := a.CastToAssemblyId(_combination[2]) // make the call - dto := BuildQueryAndMakeGetVariantsCall(chrom, sampleId, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) + dto := BuildQueryAndMakeGetVariantsCall(chrom, sampleId, dataset, includeInfo, sortByPosition, genotype, assemblyId, referenceAllelePattern, alternativeAllelePattern, "", false, _t, cfg) assert.Equal(_t, 1, len(dto.Results)) @@ -358,7 +363,7 @@ func GetOverviewResultCombinations(chromosomeStruct interface{}, sampleIdsStruct } func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, - genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, + dataset uuid.UUID, genotypeQuery c.GenotypeQuery, refAltTestType testConsts.ReferenceAlternativeTestType, specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { // TODO: use some kind of Allele Enum @@ -371,9 +376,9 @@ func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, switch refAltTestType { case ratt.Reference: - runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, _pat, "", specificValidation) + runAndValidateReferenceOrAlternativeQueryResults(_t, dataset, genotypeQuery, _pat, "", specificValidation) case ratt.Alternative: - runAndValidateReferenceOrAlternativeQueryResults(_t, genotypeQuery, "", _pat, specificValidation) + runAndValidateReferenceOrAlternativeQueryResults(_t, dataset, genotypeQuery, "", _pat, specificValidation) default: println("Skipping Test -- no Ref/Alt Test Type provided") } @@ -384,11 +389,11 @@ func ExecuteReferenceOrAlternativeQueryTestsOfVariousPatterns(_t *testing.T, } func runAndValidateReferenceOrAlternativeQueryResults(_t *testing.T, - genotypeQuery c.GenotypeQuery, + dataset uuid.UUID, genotypeQuery c.GenotypeQuery, referenceAllelePattern string, alternativeAllelePattern string, specificValidation func(__t *testing.T, call *dtos.VariantCall, referenceAllelePattern string, alternativeAllelePattern string)) { - allDtoResponses := GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) + allDtoResponses := GetAllDtosOfVariousCombinationsOfChromosomesAndSampleIds(_t, dataset, true, s.Undefined, genotypeQuery, referenceAllelePattern, alternativeAllelePattern) // assert that all of the responses include sample sets with the appropriate zygosity // - * accumulate all variants into a single list using the set of SelectManyT's and the SelectT From 1048d3fc93b5fa08352ed14c88fe9ceef917a775 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 10 Jul 2023 17:49:29 -0400 Subject: [PATCH 53/84] chore: improved datset query coverage - discovered bug (unknown dataset id returns results) --- src/api/tests/build/api/variants_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 17c004c4..54db9340 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -197,6 +197,17 @@ func TestDemoVcfIngestion(t *testing.T) { byDatsetQueryResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", dataset, true, "asc", "", "GRCh38", "", "", "", false, t, cfg) assert.True(t, len(byDatsetQueryResponse.Results) > 0) assert.True(t, len(byDatsetQueryResponse.Results[0].Calls) > 0) + // verify dataset ids + From(byDatsetQueryResponse.Results).SelectManyT(func(data dtos.VariantGetResult) Query { // * + return From(data.Calls) + }).ForEachT(func(variant dtos.VariantCall) { + assert.Equal(t, dataset.String(), variant.Dataset) + }) + + // test unknown random dataset id + shouldBeEmptyResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", uuid.New(), true, "", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(shouldBeEmptyResponse.Results) > 0) + assert.True(t, len(shouldBeEmptyResponse.Results[0].Calls) == 0) }) t.Run("Test Simple Allele Queries", func(t *testing.T) { From 241d2c6c9eca33508d51c14b152276e29e7ac557 Mon Sep 17 00:00:00 2001 From: Brennan Brouillette Date: Mon, 10 Jul 2023 18:36:26 -0400 Subject: [PATCH 54/84] patch|chore: dataset http middleware, repo query - improved tests --- src/api/main.go | 4 +++ src/api/middleware/datasetMiddleware.go | 27 ++++++++++++++++ src/api/mvc/main.go | 11 +++++-- src/api/mvc/variants/main.go | 26 +++++++++------- .../repositories/elasticsearch/variants.go | 31 +++++++++++++------ src/api/tests/build/api/variants_test.go | 6 ++++ 6 files changed, 81 insertions(+), 24 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index a2629965..be8b9a3f 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -144,6 +144,7 @@ func main() { e.GET("/variants/get/by/variantId", variantsMvc.VariantsGetByVariantId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, @@ -151,6 +152,7 @@ func main() { e.GET("/variants/get/by/sampleId", variantsMvc.VariantsGetBySampleId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, @@ -161,6 +163,7 @@ func main() { e.GET("/variants/count/by/variantId", variantsMvc.VariantsCountByVariantId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, @@ -168,6 +171,7 @@ func main() { e.GET("/variants/count/by/sampleId", variantsMvc.VariantsCountBySampleId, // middleware gam.ValidateOptionalChromosomeAttribute, + gam.OptionalDatasetAttribute, gam.MandateCalibratedBounds, gam.MandateCalibratedAlleles, gam.MandateAssemblyIdAttribute, diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index 93e16c60..85420145 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -39,3 +39,30 @@ func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { return next(gc) } } + +/* +Echo middleware to ensure a `dataset` HTTP query parameter is valid if provided +*/ +func OptionalDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + gc := c.(*contexts.GohanContext) + + // check for dataset query parameter + dataset := c.QueryParam("dataset") + if len(dataset) > 0 { + // verify dataset is a valid UUID + // - assume it's a valid dataset if it's a uuid, + // further verification is done later + if !utils.IsValidUUID(dataset) { + fmt.Printf("Invalid dataset %s\n", dataset) + + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) + } + + // forward a type-safe value down the pipeline + gc.Dataset = uuid.MustParse(dataset) + } + + return next(gc) + } +} diff --git a/src/api/mvc/main.go b/src/api/mvc/main.go index 635cc5f4..bce71a86 100644 --- a/src/api/mvc/main.go +++ b/src/api/mvc/main.go @@ -8,10 +8,11 @@ import ( "strings" "github.com/elastic/go-elasticsearch/v7" + "github.com/google/uuid" "github.com/labstack/echo" ) -func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId) { +func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, int, string, string, []string, constants.GenotypeQuery, constants.AssemblyId, string) { gc := c.(*contexts.GohanContext) es := gc.Es7Client @@ -20,6 +21,12 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, lowerBound := gc.LowerBound upperBound := gc.UpperBound + // optional + datasetString := "" + if gc.Dataset != uuid.Nil { + datasetString = gc.Dataset.String() + } + reference := c.QueryParam("reference") alternative := c.QueryParam("alternative") @@ -47,5 +54,5 @@ func RetrieveCommonElements(c echo.Context) (*elasticsearch.Client, string, int, assemblyId = a.CastToAssemblyId(assemblyIdQP) } - return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId + return es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString } diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index e9779ded..946edf69 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -466,8 +466,8 @@ func GetDatasetSummary(c echo.Context) error { g.Go(func() error { docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, "*", 0, 0, - "", "", // note : both variantId and sampleId are deliberately set to "" - "", "", []string{}, "", "", dataset.String()) + "", "", dataset.String(), // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "") if countError != nil { fmt.Printf("Failed to count variants in dataset %s\n", dataset) return countError @@ -518,9 +518,10 @@ func GetDatasetSummary(c echo.Context) error { } func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { - cfg := c.(*contexts.GohanContext).Config + gc := c.(*contexts.GohanContext) + cfg := gc.Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString = mvc.RetrieveCommonElements(c) // retrieve other query parameters relevent to this 'get' query --- getSampleIdsOnlyQP := c.QueryParam("getSampleIdsOnly") @@ -603,7 +604,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - _id, "", // note : "" is for sampleId + _id, "", datasetString, // note : "" is for sampleId reference, alternative, alleles, size, sortByPosition, includeInfoInResultSet, genotype, assemblyId, @@ -628,7 +629,7 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu docs, searchErr = esRepo.GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - "", _id, // note : "" is for variantId + "", _id, datasetString, // note : "" is for variantId reference, alternative, alleles, size, sortByPosition, includeInfoInResultSet, genotype, assemblyId, @@ -757,9 +758,10 @@ func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocu } func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) error { - cfg := c.(*contexts.GohanContext).Config + gc := c.(*contexts.GohanContext) + cfg := gc.Config - var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId = mvc.RetrieveCommonElements(c) + var es, chromosome, lowerBound, upperBound, reference, alternative, alleles, genotype, assemblyId, datasetString = mvc.RetrieveCommonElements(c) respDTO := dtos.VariantCountReponse{ Results: make([]dtos.VariantCountResult, 0), @@ -788,8 +790,8 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - _id, "", // note : "" is for sampleId - reference, alternative, alleles, genotype, assemblyId, "") + _id, "", datasetString, // note : "" is for sampleId + reference, alternative, alleles, genotype, assemblyId) } else { // implied sampleId query fmt.Printf("Executing Count-Samples for SampleId %s\n", _id) @@ -797,8 +799,8 @@ func executeCountByIds(c echo.Context, ids []string, isVariantIdQuery bool) erro docs, countError = esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, chromosome, lowerBound, upperBound, - "", _id, // note : "" is for variantId - reference, alternative, alleles, genotype, assemblyId, "") + "", _id, datasetString, // note : "" is for variantId + reference, alternative, alleles, genotype, assemblyId) } if countError != nil { diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index 0877ba60..e5dc733b 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -106,7 +106,7 @@ func GetDocumentsByDocumentId(cfg *models.Config, es *elasticsearch.Client, id s func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, es *elasticsearch.Client, chromosome string, lowerBound int, upperBound int, - variantId string, sampleId string, + variantId string, sampleId string, datasetString string, reference string, alternative string, alleles []string, size int, sortByPosition c.SortDirection, includeInfoInResultSet bool, @@ -144,6 +144,15 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e }) } + if datasetString != "" { + mustMap = append(mustMap, map[string]interface{}{ + "query_string": map[string]interface{}{ + "fields": []string{"dataset.keyword"}, + "query": datasetString, + }, + }) + } + if alternative != "" { mustMap = append(mustMap, map[string]interface{}{ "query_string": map[string]interface{}{ @@ -315,9 +324,9 @@ func GetDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, e func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, es *elasticsearch.Client, chromosome string, lowerBound int, upperBound int, - variantId string, sampleId string, + variantId string, sampleId string, datasetString string, reference string, alternative string, alleles []string, - genotype c.GenotypeQuery, assemblyId c.AssemblyId, dataset string) (map[string]interface{}, error) { + genotype c.GenotypeQuery, assemblyId c.AssemblyId) (map[string]interface{}, error) { // begin building the request body. mustMap := []map[string]interface{}{{ @@ -352,6 +361,15 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }) } + if datasetString != "" { + mustMap = append(mustMap, map[string]interface{}{ + "query_string": map[string]interface{}{ + "fields": []string{"dataset.keyword"}, + "query": datasetString, + }, + }) + } + if alternative != "" { mustMap = append(mustMap, map[string]interface{}{ "query_string": map[string]interface{}{ @@ -378,13 +396,6 @@ func CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg *models.Config, }) } - if dataset != "" { - mustMap = append(mustMap, map[string]interface{}{ - "query_string": map[string]interface{}{ - "query": "dataset:" + dataset, - }}) - } - rangeMapSlice := []map[string]interface{}{} // TODO: make upperbound and lowerbound nilable, somehow? diff --git a/src/api/tests/build/api/variants_test.go b/src/api/tests/build/api/variants_test.go index 54db9340..6d10a3e2 100644 --- a/src/api/tests/build/api/variants_test.go +++ b/src/api/tests/build/api/variants_test.go @@ -208,6 +208,12 @@ func TestDemoVcfIngestion(t *testing.T) { shouldBeEmptyResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", uuid.New(), true, "", "", "GRCh38", "", "", "", false, t, cfg) assert.True(t, len(shouldBeEmptyResponse.Results) > 0) assert.True(t, len(shouldBeEmptyResponse.Results[0].Calls) == 0) + + // test without dataset id + // - should have content + plentifulResponse := common.BuildQueryAndMakeGetVariantsCall("", "*", uuid.Nil, true, "", "", "GRCh38", "", "", "", false, t, cfg) + assert.True(t, len(plentifulResponse.Results) > 0) + assert.True(t, len(plentifulResponse.Results[0].Calls) > 0) }) t.Run("Test Simple Allele Queries", func(t *testing.T) { From 5682d716be6919bfe6c042e5360bea95b036a524 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 18:44:38 +0000 Subject: [PATCH 55/84] nodemon hot reload --- .github/workflows/api.build.yml | 2 +- src/api/dev.Dockerfile | 15 +++++++++++++++ src/api/nodemon.json | 19 +++++++++++++++++++ src/api/run.dev.bash | 7 +++++++ 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 src/api/dev.Dockerfile create mode 100644 src/api/nodemon.json create mode 100644 src/api/run.dev.bash diff --git a/.github/workflows/api.build.yml b/.github/workflows/api.build.yml index d9025228..87ef8983 100644 --- a/.github/workflows/api.build.yml +++ b/.github/workflows/api.build.yml @@ -46,5 +46,5 @@ jobs: registry-username: ${{ github.actor }} registry-password: ${{ secrets.GITHUB_TOKEN }} image-name: ghcr.io/bento-platform/gohan-api - development-dockerfile: Dockerfile + development-dockerfile: dev.Dockerfile dockerfile: Dockerfile diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile new file mode 100644 index 00000000..b17c1132 --- /dev/null +++ b/src/api/dev.Dockerfile @@ -0,0 +1,15 @@ +ARG BASE_IMAGE + +RUN apt-get update -y && \ + apt-get upgrade -y && \ + apt-get install -y tabix && \ + rm -rf /var/lib/apt/lists/* + +RUN npm install -g nodemon + +WORKDIR /gohan_api + +COPY run.dev.bash . +COPY nodemon.json . + +CMD ["bash", "./run.dev.bash"] diff --git a/src/api/nodemon.json b/src/api/nodemon.json new file mode 100644 index 00000000..422c87db --- /dev/null +++ b/src/api/nodemon.json @@ -0,0 +1,19 @@ +{ + "events": { + "crash": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi", + "restart": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi" + }, + "execMap": { + "go": "go run" + }, + "ext": "go", + "ignore": [ + "src/", + "node_modules/", + "build/", + ".github", + ".git" + ], + "delay": 500 + } + \ No newline at end of file diff --git a/src/api/run.dev.bash b/src/api/run.dev.bash new file mode 100644 index 00000000..235c7689 --- /dev/null +++ b/src/api/run.dev.bash @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +nodemon main.go & + +echo "==================== NODEMON GO WATCHING ====================" + +wait From dc3493f8d9da7bdc02f5f67193a83eac5736af67 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 18:48:28 +0000 Subject: [PATCH 56/84] add missing image dev.Dockerfile --- src/api/dev.Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index b17c1132..bffc7572 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,5 +1,7 @@ ARG BASE_IMAGE +FROM $BASE_IMAGE + RUN apt-get update -y && \ apt-get upgrade -y && \ apt-get install -y tabix && \ From 4a932531cf13e0740fc0dffe806590a357ff7666 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 18:52:08 +0000 Subject: [PATCH 57/84] node image --- src/api/dev.Dockerfile | 4 +--- src/api/nodemon.json | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index bffc7572..a5501011 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,6 +1,4 @@ -ARG BASE_IMAGE - -FROM $BASE_IMAGE +FROM ghcr.io/bento-platform/bento_base_image:node-debian-2023.03.22 RUN apt-get update -y && \ apt-get upgrade -y && \ diff --git a/src/api/nodemon.json b/src/api/nodemon.json index 422c87db..8c8245ca 100644 --- a/src/api/nodemon.json +++ b/src/api/nodemon.json @@ -8,7 +8,6 @@ }, "ext": "go", "ignore": [ - "src/", "node_modules/", "build/", ".github", From db1e78e2d5e2c74aeaa1e2768c15a327e7539d8c Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 19:22:32 +0000 Subject: [PATCH 58/84] go reload with air --- .air.toml | 41 +++++++++++++++++++++++++++++++++++++++++ src/api/dev.Dockerfile | 39 +++++++++++++++++++++++++++++++++------ src/api/nodemon.json | 18 ------------------ src/api/run.dev.bash | 7 ------- 4 files changed, 74 insertions(+), 31 deletions(-) create mode 100644 .air.toml delete mode 100644 src/api/nodemon.json delete mode 100644 src/api/run.dev.bash diff --git a/.air.toml b/.air.toml new file mode 100644 index 00000000..de4c9c0a --- /dev/null +++ b/.air.toml @@ -0,0 +1,41 @@ +# Working directory +# . or absolute path, please note that the directories following must be under root. +root = "." +tmp_dir = "tmp" + +[build] +# Just plain old shell command. You could use `make` as well. +cmd = "go build -o ./tmp/main ." +# Binary file yields from `cmd`. +bin = "tmp/main" +# Customize binary. +full_bin = "APP_ENV=dev APP_USER=air ./tmp/main" +# Watch these filename extensions. +include_ext = ["go", "tpl", "tmpl", "html"] +# Ignore these filename extensions or directories. +exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] +# Watch these directories if you specified. +include_dir = [./src] +# Exclude files. +exclude_file = [] +# It's not necessary to trigger build each time file changes if it's too frequent. +delay = 1000 # ms +# Stop to run old binary when build errors occur. +stop_on_error = true +# This log file places in your tmp_dir. +log = "air_errors.log" + +[log] +# Show log time +time = false + +[color] +# Customize each part's color. If no color found, use the raw app log. +main = "magenta" +watcher = "cyan" +build = "yellow" +runner = "green" + +[misc] +# Delete tmp directory on exit +clean_on_exit = true diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index a5501011..d02fa189 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,15 +1,42 @@ -FROM ghcr.io/bento-platform/bento_base_image:node-debian-2023.03.22 +ARG BUILDER_BASE_IMAGE +ARG BASE_IMAGE +# Stage 1 - builder +FROM $BUILDER_BASE_IMAGE as builder + +# Maintainer +LABEL maintainer="Brennan Brouillette " + +WORKDIR /build + +COPY . . + +# Build gohan api +RUN go mod vendor && \ + go build -ldflags="-s -w" -o gohan_api + +# Stage two - executioner +FROM $BASE_IMAGE + +# Debian updates +# - tabix for indexing VCFs +# - other base dependencies provided by the base image RUN apt-get update -y && \ apt-get upgrade -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* -RUN npm install -g nodemon +# Install air for hot-reload +RUN go get -u github.com/cosmtrek/air + +WORKDIR /app -WORKDIR /gohan_api +# Copy pre-built executable from builder stage +COPY --from=builder /build/gohan_api . -COPY run.dev.bash . -COPY nodemon.json . +# Copy static workflow files +COPY workflows/*.wdl /app/workflows/ -CMD ["bash", "./run.dev.bash"] +# Use base image entrypoint to set up user & gosu exec the command below +# Run +CMD [ "air" ] diff --git a/src/api/nodemon.json b/src/api/nodemon.json deleted file mode 100644 index 8c8245ca..00000000 --- a/src/api/nodemon.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "events": { - "crash": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi", - "restart": "PID=$(lsof -t -i :$INTERNAL_PORT | tr '\n' ' '); if [ -n $PID ]; then echo \"killing PID(s): $PID\"; kill -KILL $PID 2> /dev/null; fi" - }, - "execMap": { - "go": "go run" - }, - "ext": "go", - "ignore": [ - "node_modules/", - "build/", - ".github", - ".git" - ], - "delay": 500 - } - \ No newline at end of file diff --git a/src/api/run.dev.bash b/src/api/run.dev.bash deleted file mode 100644 index 235c7689..00000000 --- a/src/api/run.dev.bash +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -nodemon main.go & - -echo "==================== NODEMON GO WATCHING ====================" - -wait From 0a7d02770bc138c5e3fb8b59f2ecb23b15174f56 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 19:46:48 +0000 Subject: [PATCH 59/84] dev dockerfile use builder base --- src/api/dev.Dockerfile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index d02fa189..7a3d4f26 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,5 +1,4 @@ ARG BUILDER_BASE_IMAGE -ARG BASE_IMAGE # Stage 1 - builder FROM $BUILDER_BASE_IMAGE as builder @@ -15,9 +14,6 @@ COPY . . RUN go mod vendor && \ go build -ldflags="-s -w" -o gohan_api -# Stage two - executioner -FROM $BASE_IMAGE - # Debian updates # - tabix for indexing VCFs # - other base dependencies provided by the base image @@ -32,7 +28,7 @@ RUN go get -u github.com/cosmtrek/air WORKDIR /app # Copy pre-built executable from builder stage -COPY --from=builder /build/gohan_api . +COPY /build/gohan_api . # Copy static workflow files COPY workflows/*.wdl /app/workflows/ From a27783020230b799d5d8546815ee6357c0977ca2 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 14 Aug 2023 19:55:35 +0000 Subject: [PATCH 60/84] dev dockerfil fix --- src/api/dev.Dockerfile | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 7a3d4f26..9596a6e1 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -6,13 +6,12 @@ FROM $BUILDER_BASE_IMAGE as builder # Maintainer LABEL maintainer="Brennan Brouillette " -WORKDIR /build +WORKDIR /app COPY . . # Build gohan api -RUN go mod vendor && \ - go build -ldflags="-s -w" -o gohan_api +RUN go mod vendor # Debian updates # - tabix for indexing VCFs @@ -25,11 +24,6 @@ RUN apt-get update -y && \ # Install air for hot-reload RUN go get -u github.com/cosmtrek/air -WORKDIR /app - -# Copy pre-built executable from builder stage -COPY /build/gohan_api . - # Copy static workflow files COPY workflows/*.wdl /app/workflows/ From 566046833abf0d906a934d5a6ac5ff3521e0ad82 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Mon, 14 Aug 2023 18:00:31 -0400 Subject: [PATCH 61/84] air config --- .air.toml | 2 +- src/api/dev.Dockerfile | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.air.toml b/.air.toml index de4c9c0a..42cdf0a7 100644 --- a/.air.toml +++ b/.air.toml @@ -15,7 +15,7 @@ include_ext = ["go", "tpl", "tmpl", "html"] # Ignore these filename extensions or directories. exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] # Watch these directories if you specified. -include_dir = [./src] +include_dir = [] # Exclude files. exclude_file = [] # It's not necessary to trigger build each time file changes if it's too frequent. diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 9596a6e1..da13aafa 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -11,7 +11,7 @@ WORKDIR /app COPY . . # Build gohan api -RUN go mod vendor +RUN go mod vendor && go install github.com/cosmtrek/air@latest # Debian updates # - tabix for indexing VCFs @@ -21,12 +21,9 @@ RUN apt-get update -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* -# Install air for hot-reload -RUN go get -u github.com/cosmtrek/air - # Copy static workflow files COPY workflows/*.wdl /app/workflows/ # Use base image entrypoint to set up user & gosu exec the command below # Run -CMD [ "air" ] +CMD [ "air", "-c", ".air.toml" ] From 75461853a869398649cbb0c08be32025f2509e2a Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Mon, 14 Aug 2023 18:09:55 -0400 Subject: [PATCH 62/84] test air install --- src/api/dev.Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index da13aafa..e804de55 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -8,10 +8,10 @@ LABEL maintainer="Brennan Brouillette Date: Tue, 15 Aug 2023 10:28:55 -0400 Subject: [PATCH 63/84] working air conf --- .air.toml | 41 ----------------------------------------- .gitignore | 1 + src/api/dev.Dockerfile | 17 +++++++++-------- 3 files changed, 10 insertions(+), 49 deletions(-) delete mode 100644 .air.toml diff --git a/.air.toml b/.air.toml deleted file mode 100644 index 42cdf0a7..00000000 --- a/.air.toml +++ /dev/null @@ -1,41 +0,0 @@ -# Working directory -# . or absolute path, please note that the directories following must be under root. -root = "." -tmp_dir = "tmp" - -[build] -# Just plain old shell command. You could use `make` as well. -cmd = "go build -o ./tmp/main ." -# Binary file yields from `cmd`. -bin = "tmp/main" -# Customize binary. -full_bin = "APP_ENV=dev APP_USER=air ./tmp/main" -# Watch these filename extensions. -include_ext = ["go", "tpl", "tmpl", "html"] -# Ignore these filename extensions or directories. -exclude_dir = ["assets", "tmp", "vendor", "frontend/node_modules"] -# Watch these directories if you specified. -include_dir = [] -# Exclude files. -exclude_file = [] -# It's not necessary to trigger build each time file changes if it's too frequent. -delay = 1000 # ms -# Stop to run old binary when build errors occur. -stop_on_error = true -# This log file places in your tmp_dir. -log = "air_errors.log" - -[log] -# Show log time -time = false - -[color] -# Customize each part's color. If no color found, use the raw app log. -main = "magenta" -watcher = "cyan" -build = "yellow" -runner = "green" - -[misc] -# Delete tmp directory on exit -clean_on_exit = true diff --git a/.gitignore b/.gitignore index 279451d5..06f9d3cf 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ data/ data-x/ # vcfs +vcfs/* */vcfs/*.txt */vcfs/*.vcf */vcfs/*.vcf.gz diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index e804de55..6ef078c4 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -8,11 +8,6 @@ LABEL maintainer="Brennan Brouillette Date: Tue, 15 Aug 2023 16:29:07 -0400 Subject: [PATCH 64/84] fix missing workflows from path --- src/api/dev.Dockerfile | 4 +++- src/api/workflows/main.go | 2 +- src/api/workflows/vcf_gz.wdl | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 6ef078c4..5880a1ee 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -25,6 +25,8 @@ RUN go mod download && go mod vendor COPY workflows/*.wdl /app/workflows/ # Repository mounted to the container -WORKDIR /app/src/api +# WORKDIR /app/repo/src/api +WORKDIR /gohan-api/src/api + CMD [ "air" ] diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index c2387cdb..d04ec42a 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -14,7 +14,7 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "description": "This ingestion workflow will validate and ingest a BGZip-Compressed-VCF into Elasticsearch.", "data_type": "variant", "file": "vcf_gz.wdl", - "purpose": "ingestion", + "action": "ingestion", "inputs": []map[string]interface{}{ { "id": "vcf_gz_file_names", diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 603b54c1..83625fe7 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -3,7 +3,9 @@ workflow vcf_gz { Array[File] vcf_gz_file_names # redundant Array[String] original_vcf_gz_file_paths String assembly_id - String dataset + String project_id + String dataset_id + String service_url String filter_out_references String temp_token String temp_token_host @@ -14,7 +16,7 @@ workflow vcf_gz { input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, - dataset = dataset, + dataset = dataset_id, filter_out_references = filter_out_references, temp_token = temp_token, temp_token_host = temp_token_host From 6cffa72be3d174d2c0e53fae0b755e7d731fe9a3 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 16 Aug 2023 12:50:35 -0400 Subject: [PATCH 65/84] dev container uses workflow mount --- src/api/dev.Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 5880a1ee..601835db 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -21,12 +21,7 @@ RUN go install github.com/cosmtrek/air@latest COPY go.mod go.sum ./ RUN go mod download && go mod vendor -# Copy static workflow files -COPY workflows/*.wdl /app/workflows/ - # Repository mounted to the container -# WORKDIR /app/repo/src/api WORKDIR /gohan-api/src/api - CMD [ "air" ] From 1130879673dd7d32fc3e6d8bd283c98f0eeb0864 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 16 Aug 2023 20:39:04 +0000 Subject: [PATCH 66/84] vcf_gz.wdl calls gohan using access token --- src/api/workflows/vcf_gz.wdl | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 83625fe7..19d30cb7 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -1,26 +1,20 @@ workflow vcf_gz { - String gohan_url - Array[File] vcf_gz_file_names # redundant - Array[String] original_vcf_gz_file_paths + String service_url + Array[File] vcf_gz_file_names String assembly_id String project_id String dataset_id - String service_url String filter_out_references - String temp_token - String temp_token_host + String secret__access_token - # scatter(file_name in vcf_gz_file_names) { - scatter(file_name in original_vcf_gz_file_paths) { + scatter(file_name in vcf_gz_file_names) { call vcf_gz_gohan { - input: gohan_url = gohan_url, + input: gohan_url = service_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, dataset = dataset_id, filter_out_references = filter_out_references, - temp_token = temp_token, - temp_token_host = temp_token_host - + access_token = secret__access_token, } } } @@ -31,21 +25,18 @@ task vcf_gz_gohan { String assembly_id String dataset String filter_out_references - String temp_token - String temp_token_host + String access_token command { - echo "Using temporary-token : ${temp_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" # TODO: refactor # append temporary-token header if present - if [ "${temp_token}" == "" ] + if [ "${access_token}" == "" ] then RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') else - RUN_RESPONSE=$(curl -vvv -H "Host: ${temp_token_host}" -H "X-TT: ${temp_token}" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') + RUN_RESPONSE=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') fi echo $RUN_RESPONSE @@ -67,11 +58,11 @@ task vcf_gz_gohan { # TODO: refactor # fetch run requests # append temporary-token header if present - if [ "${temp_token}" == "" ] + if [ "${access_token}" == "" ] then REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) else - REQUESTS=$(curl -vvv -H "Host: ${temp_token_host}" -H "X-TT: ${temp_token}" "${gohan_url}/private/variants/ingestion/requests" -k) + REQUESTS=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/requests" -k) fi echo $REQUESTS From b10044d4b2a5bf9c0e178b9966c5b9f48561f4cc Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 16 Aug 2023 17:58:53 -0400 Subject: [PATCH 67/84] wes client token in workflow --- src/api/workflows/vcf_gz.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 19d30cb7..034df696 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -29,6 +29,7 @@ task vcf_gz_gohan { command { QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" + AUTH_HEADER="Authorization: Bearer ${access_token}" # TODO: refactor # append temporary-token header if present @@ -36,7 +37,7 @@ task vcf_gz_gohan { then RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') else - RUN_RESPONSE=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') + RUN_RESPONSE=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') fi echo $RUN_RESPONSE @@ -62,7 +63,7 @@ task vcf_gz_gohan { then REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) else - REQUESTS=$(curl -vvv -H "Authorization: ${access_token}" "${gohan_url}/private/variants/ingestion/requests" -k) + REQUESTS=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/requests" -k) fi echo $REQUESTS From 5afd1c6666e464d2b8e177d9b65f1666024665fe Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 17 Aug 2023 17:10:03 +0000 Subject: [PATCH 68/84] fix authz header in curl --- src/api/workflows/vcf_gz.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 034df696..38d17046 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -28,6 +28,8 @@ task vcf_gz_gohan { String access_token command { + echo "Using temporary-token : ${access_token}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" AUTH_HEADER="Authorization: Bearer ${access_token}" @@ -37,7 +39,7 @@ task vcf_gz_gohan { then RUN_RESPONSE=$(curl -vvv "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') else - RUN_RESPONSE=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') + RUN_RESPONSE=$(curl -vvv -H "$AUTH_HEADER" "${gohan_url}/private/variants/ingestion/run?$QUERY" -k | sed 's/"/\"/g') fi echo $RUN_RESPONSE @@ -63,7 +65,7 @@ task vcf_gz_gohan { then REQUESTS=$(curl -vvv "${gohan_url}/private/variants/ingestion/requests" -k) else - REQUESTS=$(curl -vvv -H $AUTH_HEADER "${gohan_url}/private/variants/ingestion/requests" -k) + REQUESTS=$(curl -vvv -H "$AUTH_HEADER" "${gohan_url}/private/variants/ingestion/requests" -k) fi echo $REQUESTS From d7d7de7262dd9a9e58e138be1e27788516b3cb9f Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 17 Aug 2023 20:34:08 +0000 Subject: [PATCH 69/84] vscode go debug config and doc --- .gitignore | 3 ++- .vscode/launch.json | 15 +++++++++++++++ README.md | 8 ++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 .vscode/launch.json diff --git a/.gitignore b/.gitignore index 06f9d3cf..54413354 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -.vscode +.vscode/* +!.vscode/launch.json .DS_store .idea diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..d2b742bc --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Attach to PID (Bento)", + "type": "go", + "request": "attach", + "mode": "local", + "processId": 0, + } + ] +} diff --git a/README.md b/README.md index 91fdecbf..b608a2e9 100644 --- a/README.md +++ b/README.md @@ -495,3 +495,11 @@ Once `elasticsearch`, `drs`, the `api`, and the `gateway` are up, run make test-api-dev ``` +## Dev Container debug + +Interactive debug in VSCode is only possible When using the development image of gohan-api. + +Using the "Attach to PID(Bento)" debug config, select the PID associated with the following path: +``` +/gohan-api/src/api/tmp/main +``` From 0213c1a73230fc0d0f7e59e2905e0f985d5009d7 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 17 Aug 2023 21:05:21 +0000 Subject: [PATCH 70/84] removed private url path prefix for drs ingest calls --- src/api/services/ingestion.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index 40e434ec..b70e23f1 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -257,7 +257,7 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto ) for { // prepare upload request to drs - r, _ := http.NewRequest("POST", drsUrl+"/private/ingest", bytes.NewBufferString(data)) + r, _ := http.NewRequest("POST", drsUrl+"/ingest", bytes.NewBufferString(data)) r.SetBasicAuth(drsUsername, drsPassword) r.Header.Add("Content-Type", "application/json") From 217cf8f737bfae2dd36bba843c4d2b187e23ae3e Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 21 Aug 2023 17:06:03 -0400 Subject: [PATCH 71/84] wes client auth --- src/api/mvc/variants/main.go | 13 +++++++++---- src/api/services/ingestion.go | 17 ++++++++++++----- src/api/workflows/vcf_gz.wdl | 4 +++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 946edf69..d1595476 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -104,8 +104,6 @@ func VariantsIngest(c echo.Context) error { cfg := gc.Config vcfPath := cfg.Api.VcfPath drsUrl := cfg.Drs.Url - drsUsername := cfg.Drs.Username - drsPassword := cfg.Drs.Password // query parameters assemblyId := gc.AssemblyId @@ -149,6 +147,13 @@ func VariantsIngest(c echo.Context) error { } // + // Authz related + authHeader := c.Request().Header.Get("Authorization") + datasetId := c.QueryParam("dataset") + projectId := c.QueryParam("project") + + c.Logger().Debug(authHeader, datasetId) + dirName := c.QueryParam("directory") if dirName != "" { if strings.HasPrefix(dirName, cfg.Drs.BridgeDirectory) { @@ -353,7 +358,7 @@ func VariantsIngest(c echo.Context) error { // --- push compressed to DRS fmt.Printf("Uploading %s to DRS !\n", gzippedFileName) - drsFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, gzippedFileName, drsUrl, drsUsername, drsPassword) + drsFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, gzippedFileName, drsUrl, projectId, datasetId, authHeader) if drsFileId == "" { msg := "Something went wrong: DRS File Id is empty for " + gzippedFileName fmt.Println(msg) @@ -367,7 +372,7 @@ func VariantsIngest(c echo.Context) error { // -- push tabix to DRS fmt.Printf("Uploading %s to DRS !\n", tabixFileNameWithRelativePath) - drsTabixFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, tabixFileNameWithRelativePath, drsUrl, drsUsername, drsPassword) + drsTabixFileId := ingestionService.UploadVcfGzToDrs(cfg, cfg.Drs.BridgeDirectory, tabixFileNameWithRelativePath, drsUrl, projectId, datasetId, authHeader) if drsTabixFileId == "" { msg := "Something went wrong: DRS Tabix File Id is empty for " + tabixFileNameWithRelativePath fmt.Println(msg) diff --git a/src/api/services/ingestion.go b/src/api/services/ingestion.go index b70e23f1..056a66ef 100644 --- a/src/api/services/ingestion.go +++ b/src/api/services/ingestion.go @@ -19,6 +19,7 @@ import ( "io/ioutil" "log" "net/http" + "net/url" "os" "os/exec" "path" @@ -239,13 +240,13 @@ func (i *IngestionService) GenerateTabix(gzippedFilePath string) (string, string return dir, file, nil } -func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirectory string, gzippedFileName string, drsUrl, drsUsername, drsPassword string) string { +func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirectory string, gzippedFileName string, drsUrl string, project_id, dataset_id string, authHeader string) string { if cfg.Debug { http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} } - data := fmt.Sprintf("{\"path\": \"%s/%s\"}", drsBridgeDirectory, gzippedFileName) + path := fmt.Sprintf("%s/%s", drsBridgeDirectory, gzippedFileName) var ( drsId string @@ -257,10 +258,16 @@ func (i *IngestionService) UploadVcfGzToDrs(cfg *models.Config, drsBridgeDirecto ) for { // prepare upload request to drs - r, _ := http.NewRequest("POST", drsUrl+"/ingest", bytes.NewBufferString(data)) + form := url.Values{} + form.Add("path", path) + form.Add("dataset_id", dataset_id) + form.Add("project_id", project_id) + form.Add("data_type", "variant") - r.SetBasicAuth(drsUsername, drsPassword) - r.Header.Add("Content-Type", "application/json") + r, _ := http.NewRequest("POST", drsUrl+"/ingest", strings.NewReader(form.Encode())) + + r.Header.Add("Authorization", authHeader) + r.Header.Add("Content-Type", "application/x-www-form-urlencoded") client := &http.Client{} diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 38d17046..66cd30cd 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -12,6 +12,7 @@ workflow vcf_gz { input: gohan_url = service_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, + project = project_id, dataset = dataset_id, filter_out_references = filter_out_references, access_token = secret__access_token, @@ -23,6 +24,7 @@ task vcf_gz_gohan { String gohan_url String vcf_gz_file_name String assembly_id + String project String dataset String filter_out_references String access_token @@ -30,7 +32,7 @@ task vcf_gz_gohan { command { echo "Using temporary-token : ${access_token}" - QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&filterOutReferences=${filter_out_references}" + QUERY="fileNames=${vcf_gz_file_name}&assemblyId=${assembly_id}&dataset=${dataset}&project=${project}&filterOutReferences=${filter_out_references}" AUTH_HEADER="Authorization: Bearer ${access_token}" # TODO: refactor From 8cfdc2a3c36703a6cdcd0a0ad5c81034bba7d40c Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 22 Aug 2023 14:20:35 -0400 Subject: [PATCH 72/84] gohan url from config --- src/api/workflows/main.go | 7 +++++++ src/api/workflows/vcf_gz.wdl | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/api/workflows/main.go b/src/api/workflows/main.go index d04ec42a..24903d8b 100644 --- a/src/api/workflows/main.go +++ b/src/api/workflows/main.go @@ -36,6 +36,13 @@ var WORKFLOW_VARIANT_SCHEMA WorkflowSchema = map[string]interface{}{ "values": []string{"true", "false"}, // simulate boolean type "default": "false", }, + { + "id": "gohan_url", + "type": "string", + "required": true, + "value": "FROM_CONFIG", + "hidden": true, + }, }, "outputs": []map[string]interface{}{ { diff --git a/src/api/workflows/vcf_gz.wdl b/src/api/workflows/vcf_gz.wdl index 66cd30cd..343f3de3 100644 --- a/src/api/workflows/vcf_gz.wdl +++ b/src/api/workflows/vcf_gz.wdl @@ -1,5 +1,5 @@ workflow vcf_gz { - String service_url + String gohan_url Array[File] vcf_gz_file_names String assembly_id String project_id @@ -9,7 +9,7 @@ workflow vcf_gz { scatter(file_name in vcf_gz_file_names) { call vcf_gz_gohan { - input: gohan_url = service_url, + input: gohan_url = gohan_url, vcf_gz_file_name = file_name, assembly_id = assembly_id, project = project_id, From 7075ca3922fcc9259639e12a244d89d6e7382830 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 23 Aug 2023 16:26:25 +0000 Subject: [PATCH 73/84] uniform dataset responses --- src/api/main.go | 1 + src/api/models/dtos/main.go | 6 +++++- src/api/mvc/variants/main.go | 38 ++++++++++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index be8b9a3f..8f55502b 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -180,6 +180,7 @@ func main() { // --- Dataset e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary) + e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes) // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, diff --git a/src/api/models/dtos/main.go b/src/api/models/dtos/main.go index 6d782967..6f3ee11b 100644 --- a/src/api/models/dtos/main.go +++ b/src/api/models/dtos/main.go @@ -60,11 +60,15 @@ type VariantCall struct { } // --- Dataset -type DatasetSummaryResponseDto struct { +type DataTypeSummaryResponseDto struct { Count int `json:"count"` DataTypeSpecific map[string]interface{} `json:"data_type_specific"` // TODO: type-safety? } +type DatasetDataTypeSummaryResponseDto struct { + Variant DataTypeSummaryResponseDto `json:"variant"` +} + // -- Genes type GenesResponseDTO struct { Status int `json:"status"` diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index d1595476..9f750863 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -21,6 +21,7 @@ import ( "gohan/api/models/dtos/errors" "gohan/api/models/indexes" "gohan/api/models/ingest" + "gohan/api/models/schemas" "gohan/api/mvc" esRepo "gohan/api/repositories/elasticsearch" variantService "gohan/api/services/variants" @@ -510,18 +511,43 @@ func GetDatasetSummary(c echo.Context) error { // wait for all HTTP fetches to complete. if err := g.Wait(); err == nil { fmt.Printf("Successfully Obtained Dataset '%s' Summary \n", dataset) - - return c.JSON(http.StatusOK, &dtos.DatasetSummaryResponseDto{ - Count: int(totalVariantsCount), - DataTypeSpecific: map[string]interface{}{ - "samples": len(bucketsMapped), + payload := &dtos.DatasetDataTypeSummaryResponseDto{ + Variant: dtos.DataTypeSummaryResponseDto{ + Count: int(totalVariantsCount), + DataTypeSpecific: map[string]interface{}{ + "samples": len(bucketsMapped), + }, }, - }) + } + return c.JSON(http.StatusOK, payload) } else { return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) } } +type DataTypeSummary struct { + Id string `json:"id"` + Label string `json:"label"` + Queryable bool `json:"queryable"` + Schema map[string]interface{} `json:"schema"` + Count int `json:"count"` +} + +type DataTypeResponseDto = []DataTypeSummary + +func GetDatasetDataTypes(c echo.Context) error { + count := 0 + return c.JSON(http.StatusOK, &DataTypeResponseDto{ + DataTypeSummary{ + Id: "variant", + Label: "Variants", + Queryable: true, + Schema: schemas.VARIANT_SCHEMA, + Count: count, + }, + }) +} + func executeGetByIds(c echo.Context, ids []string, isVariantIdQuery bool, isDocumentIdQuery bool) error { gc := c.(*contexts.GohanContext) cfg := gc.Config From 485d66a3cc7c2aa0cf77b4ac7e28213d5b8ffc30 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 23 Aug 2023 21:18:40 +0000 Subject: [PATCH 74/84] fix dataset routes --- src/api/main.go | 8 ++++++-- src/api/middleware/datasetMiddleware.go | 16 ++++++++++++++++ src/api/mvc/variants/main.go | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/api/main.go b/src/api/main.go index 8f55502b..696ca13e 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -179,8 +179,12 @@ func main() { gam.ValidatePotentialGenotypeQueryParameter) // --- Dataset - e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary) - e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes) + e.GET("/datasets/:dataset/summary", variantsMvc.GetDatasetSummary, + // middleware + gam.MandateDatasetPathParam) + e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes, + // middleware + gam.MandateDatasetPathParam) // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index 85420145..60252624 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -40,6 +40,22 @@ func MandateDatasetAttribute(next echo.HandlerFunc) echo.HandlerFunc { } } +func MandateDatasetPathParam(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + dataset := c.Param("dataset") + if !utils.IsValidUUID(dataset) { + fmt.Printf("Invalid dataset %s\n", dataset) + + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest(fmt.Sprintf("invalid dataset %s - please provide a valid uuid", dataset))) + } + + gc := c.(*contexts.GohanContext) + gc.Dataset = uuid.MustParse(dataset) + + return next(gc) + } +} + /* Echo middleware to ensure a `dataset` HTTP query parameter is valid if provided */ diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 9f750863..677ce994 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -453,13 +453,13 @@ func GetAllVariantIngestionRequests(c echo.Context) error { } func GetDatasetSummary(c echo.Context) error { - fmt.Printf("[%s] - GetDatasetSummary hit!\n", time.Now()) gc := c.(*contexts.GohanContext) cfg := gc.Config es := gc.Es7Client dataset := gc.Dataset + fmt.Printf("[%s] - GetDatasetSummary hit: [%s]!\n", time.Now(), dataset.String()) // parallelize these two es queries From eff15b64c09fbe7a8df636b67eb251d6a4d5931e Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 24 Aug 2023 16:18:10 +0000 Subject: [PATCH 75/84] variants count by dataset endpoint --- src/api/mvc/variants/main.go | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index 677ce994..f49b9ec7 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -452,6 +452,39 @@ func GetAllVariantIngestionRequests(c echo.Context) error { return c.JSON(http.StatusOK, m) } +func GetDatasetVariantsCount(c echo.Context) int { + gc := c.(*contexts.GohanContext) + cfg := gc.Config + es := gc.Es7Client + + dataset := gc.Dataset + + var ( + totalVariantsCount = 0.0 + g = new(errgroup.Group) + ) + // request #1 + g.Go(func() error { + docs, countError := esRepo.CountDocumentsContainerVariantOrSampleIdInPositionRange(cfg, es, + "*", 0, 0, + "", "", dataset.String(), // note : both variantId and sampleId are deliberately set to "" + "", "", []string{}, "", "") + if countError != nil { + fmt.Printf("Failed to count variants in dataset %s\n", dataset) + return countError + } + + totalVariantsCount = docs["count"].(float64) + return nil + }) + + // wait for all HTTP fetches to complete. + if err := g.Wait(); err == nil { + fmt.Printf("Successfully Obtained Dataset '%s' variants count: '%f' \n", dataset, totalVariantsCount) + } + return int(totalVariantsCount) +} + func GetDatasetSummary(c echo.Context) error { gc := c.(*contexts.GohanContext) @@ -536,7 +569,7 @@ type DataTypeSummary struct { type DataTypeResponseDto = []DataTypeSummary func GetDatasetDataTypes(c echo.Context) error { - count := 0 + count := GetDatasetVariantsCount(c) return c.JSON(http.StatusOK, &DataTypeResponseDto{ DataTypeSummary{ Id: "variant", From 93b5dad8dab3e0df3959b8ea90c921667f28d8c1 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Fri, 25 Aug 2023 14:46:30 -0400 Subject: [PATCH 76/84] metadata_schema in data type response --- src/api/models/schemas/schemas.go | 4 ++++ src/api/mvc/data-types/main.go | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/api/models/schemas/schemas.go b/src/api/models/schemas/schemas.go index 7e520ea6..6077c209 100644 --- a/src/api/models/schemas/schemas.go +++ b/src/api/models/schemas/schemas.go @@ -7,6 +7,10 @@ import ( type Schema map[string]interface{} +var OBJECT_SCHEMA Schema = Schema{ + "type": "object", +} + var VARIANT_METADATA_SCHEMA Schema = map[string]interface{}{ "$id": "variant:metadata", // TODO: Real ID "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/src/api/mvc/data-types/main.go b/src/api/mvc/data-types/main.go index 299ee94f..d95d3e07 100644 --- a/src/api/mvc/data-types/main.go +++ b/src/api/mvc/data-types/main.go @@ -12,10 +12,11 @@ import ( ) var variantDataTypeJson = map[string]interface{}{ - "id": "variant", - "label": "Variants", - "queryable": true, - "schema": schemas.VARIANT_SCHEMA, + "id": "variant", + "label": "Variants", + "queryable": true, + "schema": schemas.VARIANT_SCHEMA, + "metadata_schema": schemas.OBJECT_SCHEMA, } func GetDataTypes(c echo.Context) error { From b324c00bd21459616ec597abaaba0655c65bc1a3 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 28 Aug 2023 16:47:20 +0000 Subject: [PATCH 77/84] delete variants by dataset id endpoint --- src/api/contexts/contexts.go | 1 + src/api/main.go | 3 + src/api/middleware/datasetMiddleware.go | 15 +++++ src/api/mvc/variants/main.go | 35 ++++++++++++ .../repositories/elasticsearch/variants.go | 56 +++++++++++++++++++ 5 files changed, 110 insertions(+) diff --git a/src/api/contexts/contexts.go b/src/api/contexts/contexts.go index b1c36f38..6cde63fd 100644 --- a/src/api/contexts/contexts.go +++ b/src/api/contexts/contexts.go @@ -31,6 +31,7 @@ type ( Genotype constants.GenotypeQuery SampleIds []string Dataset uuid.UUID + DataType string PositionBounds } diff --git a/src/api/main.go b/src/api/main.go index 696ca13e..87dae683 100644 --- a/src/api/main.go +++ b/src/api/main.go @@ -185,6 +185,9 @@ func main() { e.GET("/datasets/:dataset/data-types", variantsMvc.GetDatasetDataTypes, // middleware gam.MandateDatasetPathParam) + e.DELETE("/datasets/:dataset/data-types/:dataType", variantsMvc.ClearDataset, + gam.MandateDatasetPathParam, + gam.MandateDataTypePathParam) // TODO: refactor (deduplicate) -- e.GET("/variants/ingestion/run", variantsMvc.VariantsIngest, diff --git a/src/api/middleware/datasetMiddleware.go b/src/api/middleware/datasetMiddleware.go index 60252624..0a51bb7f 100644 --- a/src/api/middleware/datasetMiddleware.go +++ b/src/api/middleware/datasetMiddleware.go @@ -56,6 +56,21 @@ func MandateDatasetPathParam(next echo.HandlerFunc) echo.HandlerFunc { } } +func MandateDataTypePathParam(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + dataType := c.Param("dataType") + if dataType != "variant" { + fmt.Printf("Invalid data-type provided: %s\n", dataType) + return c.JSON(http.StatusBadRequest, errors.CreateSimpleBadRequest( + fmt.Sprintf("invalid data-type %s - please provide a valid data-type (e.g. \"variant\")", dataType), + )) + } + gc := c.(*contexts.GohanContext) + gc.DataType = dataType + return next(gc) + } +} + /* Echo middleware to ensure a `dataset` HTTP query parameter is valid if provided */ diff --git a/src/api/mvc/variants/main.go b/src/api/mvc/variants/main.go index f49b9ec7..0b061783 100644 --- a/src/api/mvc/variants/main.go +++ b/src/api/mvc/variants/main.go @@ -558,6 +558,41 @@ func GetDatasetSummary(c echo.Context) error { } } +func ClearDataset(c echo.Context) error { + gc := c.(*contexts.GohanContext) + cfg := gc.Config + es := gc.Es7Client + + dataset := gc.Dataset + dataType := gc.DataType + fmt.Printf("[%s] - ClearDataset hit: [%s] - [%s]!\n", time.Now(), dataset.String(), dataType) + + var ( + deletionCount = 0.0 + g = new(errgroup.Group) + ) + // request #1 + g.Go(func() error { + deleteResponse, delErr := esRepo.DeleteVariantsByDatasetId(cfg, es, dataset.String()) + + if delErr != nil { + fmt.Printf("Failed to delete dataset %s variants\n", dataset) + return delErr + } + + deletionCount = deleteResponse["deleted"].(float64) + + return nil + }) + + if err := g.Wait(); err == nil { + fmt.Printf("Deleted %f variants from dataset %s\n", deletionCount, dataset) + return c.NoContent(http.StatusNoContent) + } else { + return c.JSON(http.StatusInternalServerError, errors.CreateSimpleInternalServerError("Something went wrong.. Please try again later!")) + } +} + type DataTypeSummary struct { Id string `json:"id"` Label string `json:"label"` diff --git a/src/api/repositories/elasticsearch/variants.go b/src/api/repositories/elasticsearch/variants.go index e5dc733b..32b1f963 100644 --- a/src/api/repositories/elasticsearch/variants.go +++ b/src/api/repositories/elasticsearch/variants.go @@ -668,6 +668,62 @@ func GetVariantsBucketsByKeywordAndDataset(cfg *models.Config, es *elasticsearch return result, nil } +func DeleteVariantsByDatasetId(cfg *models.Config, es *elasticsearch.Client, dataset string) (map[string]interface{}, error) { + + var buf bytes.Buffer + query := map[string]interface{}{ + "query": map[string]interface{}{ + "match": map[string]interface{}{ + "dataset": dataset, + }, + }, + } + + if err := json.NewEncoder(&buf).Encode(query); err != nil { + log.Fatalf("Error encoding query: %s\n", query) + } + + if cfg.Debug { + // view the outbound elasticsearch query + myString := string(buf.Bytes()[:]) + fmt.Println(myString) + } + + // Perform the delete request. + deleteRes, deleteErr := es.DeleteByQuery( + []string{wildcardVariantsIndex}, + bytes.NewReader(buf.Bytes()), + ) + if deleteErr != nil { + fmt.Printf("Error getting response: %s\n", deleteErr) + return nil, deleteErr + } + + defer deleteRes.Body.Close() + + resultString := deleteRes.String() + if cfg.Debug { + fmt.Println(resultString) + } + + // Prepare an empty interface + result := make(map[string]interface{}) + + // Unmarshal or Decode the JSON to the empty interface. + // Known bug: response comes back with a preceding '[200 OK] ' which needs trimming + bracketString, jsonBodyString := utils.GetLeadingStringInBetweenSquareBrackets(resultString) + if !strings.Contains(bracketString, "200") { + return nil, fmt.Errorf("failed to get documents by id : got '%s'", bracketString) + } + umErr := json.Unmarshal([]byte(jsonBodyString), &result) + if umErr != nil { + fmt.Printf("Error unmarshalling variant deletion response: %s\n", umErr) + return nil, umErr + } + + return result, nil +} + // -- internal use only -- func addAllelesToShouldMap(alleles []string, genotype c.GenotypeQuery, allelesShouldMap []map[string]interface{}) ([]map[string]interface{}, int) { minimumShouldMatch := 0 From afda847f930c83a5e5cd6641d34bc1912fabc69b Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 28 Aug 2023 19:51:04 +0000 Subject: [PATCH 78/84] chore: add dev-container metadata to dev image --- src/api/dev.Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 601835db..e286c785 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -3,6 +3,16 @@ ARG BUILDER_BASE_IMAGE # Stage 1 - builder FROM $BUILDER_BASE_IMAGE as builder +LABEL org.opencontainers.image.description="Local development image for Bento Gohan." +LABEL devcontainer.metadata='[{ \ + "customizations": { \ + "vscode": { \ + "extensions": ["golang.go", "eamodio.gitlens"], \ + "settings": {"workspaceFolder": "/gohan-api"} \ + } \ + } \ +}]' + # Maintainer LABEL maintainer="Brennan Brouillette " From f55926ea5ec8ae0c2bdfc17c945cf22a1c4e5e20 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 29 Aug 2023 15:56:05 -0400 Subject: [PATCH 79/84] chore: update drs version, authz disabled --- docker-compose.yaml | 1 + etc/example.env | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 7dedd6aa..aaa5657b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -151,6 +151,7 @@ services: - DATABASE=/drs/bento_drs/data/db/ # slightly confused naming, folder for database to go in - DATA=/drs/bento_drs/data/obj/ # DRS file objects, vs. the database - INTERNAL_PORT=${GOHAN_DRS_INTERNAL_PORT} + - AUTHZ_ENABLED=False volumes: - ${GOHAN_DRS_DATA_DIR}:/drs/bento_drs/data - ${GOHAN_API_DRS_BRIDGE_HOST_DIR}:${GOHAN_DRS_API_DRS_BRIDGE_DIR_CONTAINERIZED} diff --git a/etc/example.env b/etc/example.env index 6b4d4e1f..902c3fc4 100644 --- a/etc/example.env +++ b/etc/example.env @@ -114,7 +114,7 @@ GOHAN_KB_ES_PORT=9200 # DRS GOHAN_DRS_IMAGE=ghcr.io/bento-platform/bento_drs -GOHAN_DRS_VERSION=0.9.0 +GOHAN_DRS_VERSION=0.12.3 GOHAN_DRS_CONTAINER_NAME=gohan-drs GOHAN_DRS_INTERNAL_PORT=5000 GOHAN_DRS_EXTERNAL_PORT=6000 @@ -164,4 +164,4 @@ GOHAN_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC GOHAN_ES_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_ES_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC_PORT} GOHAN_DRS_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_DRS_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME} -GOHAN_KB_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_KB_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC_PORT} \ No newline at end of file +GOHAN_KB_PUBLIC_URL=${GOHAN_PUBLIC_PROTO}://${GOHAN_PUBLIC_KB_SUBDOMAIN}${GOHAN_PUBLIC_HOSTNAME}:${GOHAN_PUBLIC_PORT} From 299e20e377d94a96d189236fb179b3bccc1750ac Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 10:37:49 -0400 Subject: [PATCH 80/84] chore: set bento_user and git in dev image --- src/api/dev.Dockerfile | 5 ++++- src/api/entrypoint.bash | 17 +++++++++++++++++ src/api/run.dev.bash | 7 +++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 src/api/entrypoint.bash create mode 100644 src/api/run.dev.bash diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index e286c785..59eebb14 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -33,5 +33,8 @@ RUN go mod download && go mod vendor # Repository mounted to the container WORKDIR /gohan-api/src/api +COPY entrypoint.bash . +COPY run.dev.bash . -CMD [ "air" ] +ENTRYPOINT [ "bash", "./entrypoint.bash" ] +CMD [ "bash", "./run.dev.bash" ] diff --git a/src/api/entrypoint.bash b/src/api/entrypoint.bash new file mode 100644 index 00000000..b11103b3 --- /dev/null +++ b/src/api/entrypoint.bash @@ -0,0 +1,17 @@ +#!/bin/bash + +cd /gohan-api || exit + +# Create bento_user and home +source /create_service_user.bash + +# Create dev build directory +mkdir -p src/api/tmp + +# Set permissions / groups +chown -R bento_user:bento_user ./ +chown -R bento_user:bento_user /app +chmod -R o-rwx src/api/tmp + +# Drop into bento_user from root and execute the CMD specified for the image +exec gosu bento_user "$@" diff --git a/src/api/run.dev.bash b/src/api/run.dev.bash new file mode 100644 index 00000000..4a390876 --- /dev/null +++ b/src/api/run.dev.bash @@ -0,0 +1,7 @@ +#!/bin/bash + +# Set .gitconfig for development +/set_gitconfig.bash + +# Start gohan-api with hot reload using Air +air From 0a9836ae65d62d74cffe3eb797142ba7892c2b18 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 12:14:53 -0400 Subject: [PATCH 81/84] chore: bump base image version --- etc/example.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/example.env b/etc/example.env index 902c3fc4..1ecb98ed 100644 --- a/etc/example.env +++ b/etc/example.env @@ -39,7 +39,7 @@ GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye -GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.03.06 +GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.08.16.2000 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 From 5c868092a61c5e374995e9356bca774981ef0c75 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 15:42:35 -0400 Subject: [PATCH 82/84] chore: use bento golang base image --- .github/workflows/api.build.yml | 2 -- .github/workflows/api.test.yml | 3 +-- docker-compose.yaml | 1 - etc/example.env | 3 +-- src/api/Dockerfile | 15 ++------------- src/api/dev.Dockerfile | 7 +++---- 6 files changed, 7 insertions(+), 24 deletions(-) diff --git a/.github/workflows/api.build.yml b/.github/workflows/api.build.yml index 87ef8983..496f8d21 100644 --- a/.github/workflows/api.build.yml +++ b/.github/workflows/api.build.yml @@ -29,7 +29,6 @@ jobs: run: | cp ./etc/example.env .env source .env - echo "GOHAN_API_BUILDER_BASE_IMAGE=$GOHAN_API_BUILDER_BASE_IMAGE" >> $GITHUB_ENV echo "GOHAN_API_BASE_IMAGE=$GOHAN_API_BASE_IMAGE" >> $GITHUB_ENV - name: Load environment variables from .env file @@ -40,7 +39,6 @@ jobs: with: context: "{{defaultContext}}:src/api" build-args: | - BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} registry: ghcr.io registry-username: ${{ github.actor }} diff --git a/.github/workflows/api.test.yml b/.github/workflows/api.test.yml index f958bb79..5f39ae43 100644 --- a/.github/workflows/api.test.yml +++ b/.github/workflows/api.test.yml @@ -34,7 +34,6 @@ jobs: # with: # context: "{{defaultContext}}:src/api" # build-args: | - # BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} # BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} # registry: ghcr.io # registry-username: ${{ github.actor }} @@ -45,4 +44,4 @@ jobs: - name: API Test run: | sudo apt-get install -y tabix - make test-api \ No newline at end of file + make test-api diff --git a/docker-compose.yaml b/docker-compose.yaml index aaa5657b..641056ec 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -32,7 +32,6 @@ services: build: context: $PWD/src/api args: - BUILDER_BASE_IMAGE: ${GOHAN_API_BUILDER_BASE_IMAGE} BASE_IMAGE: ${GOHAN_API_BASE_IMAGE} # also passed in as an ENV from within Dockerfile : GOHAN_API_INTERNAL_PORT: ${GOHAN_API_INTERNAL_PORT} diff --git a/etc/example.env b/etc/example.env index 1ecb98ed..80aac62c 100644 --- a/etc/example.env +++ b/etc/example.env @@ -38,8 +38,7 @@ GOHAN_GATEWAY_CPUS=2 GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest -GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye -GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.08.16.2000 +GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.08.30 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 diff --git a/src/api/Dockerfile b/src/api/Dockerfile index 5384b40f..57a6f933 100644 --- a/src/api/Dockerfile +++ b/src/api/Dockerfile @@ -1,13 +1,11 @@ -ARG BUILDER_BASE_IMAGE ARG BASE_IMAGE -# Stage 1 - builder -FROM $BUILDER_BASE_IMAGE as builder +FROM $BASE_IMAGE # Maintainer LABEL maintainer="Brennan Brouillette " -WORKDIR /build +WORKDIR /app COPY . . @@ -15,10 +13,6 @@ COPY . . RUN go mod vendor && \ go build -ldflags="-s -w" -o gohan_api - -# Stage two - executioner -FROM $BASE_IMAGE - # Debian updates # - tabix for indexing VCFs # - other base dependencies provided by the base image @@ -27,11 +21,6 @@ RUN apt-get update -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* -WORKDIR /app - -# Copy pre-built executable from builder stage -COPY --from=builder /build/gohan_api . - # Copy static workflow files COPY workflows/*.wdl /app/workflows/ diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 59eebb14..394ab49f 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,7 +1,6 @@ -ARG BUILDER_BASE_IMAGE +ARG BASE_IMAGE -# Stage 1 - builder -FROM $BUILDER_BASE_IMAGE as builder +FROM $BASE_IMAGE LABEL org.opencontainers.image.description="Local development image for Bento Gohan." LABEL devcontainer.metadata='[{ \ @@ -37,4 +36,4 @@ COPY entrypoint.bash . COPY run.dev.bash . ENTRYPOINT [ "bash", "./entrypoint.bash" ] -CMD [ "bash", "./run.dev.bash" ] +CMD [ "air" ] From cca5188fb9322a8a867d4539fe041bc373dc15ae Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 30 Aug 2023 16:12:05 -0400 Subject: [PATCH 83/84] minimize prod image size with 2 stages --- .github/workflows/api.build.yml | 8 ++++++-- docker-compose.yaml | 4 +++- etc/example.env | 4 +++- src/api/Dockerfile | 15 ++++++++++++--- src/api/dev.Dockerfile | 4 ++-- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/.github/workflows/api.build.yml b/.github/workflows/api.build.yml index 496f8d21..5da512d1 100644 --- a/.github/workflows/api.build.yml +++ b/.github/workflows/api.build.yml @@ -29,7 +29,9 @@ jobs: run: | cp ./etc/example.env .env source .env - echo "GOHAN_API_BASE_IMAGE=$GOHAN_API_BASE_IMAGE" >> $GITHUB_ENV + echo "GOHAN_API_BUILDER_BASE_IMAGE=$GOHAN_API_BUILDER_BASE_IMAGE" >> $GITHUB_ENV + echo "GOHAN_API_DEV_BASE_IMAGE=$GOHAN_API_DEV_BASE_IMAGE" >> $GITHUB_ENV + echo "GOHAN_API_PROD_BASE_IMAGE=$GOHAN_API_PROD_BASE_IMAGE" >> $GITHUB_ENV - name: Load environment variables from .env file uses: xom9ikk/dotenv@v2 @@ -39,7 +41,9 @@ jobs: with: context: "{{defaultContext}}:src/api" build-args: | - BASE_IMAGE=${{ env.GOHAN_API_BASE_IMAGE }} + BUILDER_BASE_IMAGE=${{ env.GOHAN_API_BUILDER_BASE_IMAGE }} + BASE_DEV_IMAGE=${{ env.GOHAN_API_DEV_BASE_IMAGE }} + BASE_PROD_IMAGE=${{ env.GOHAN_API_PROD_BASE_IMAGE }} registry: ghcr.io registry-username: ${{ github.actor }} registry-password: ${{ secrets.GITHUB_TOKEN }} diff --git a/docker-compose.yaml b/docker-compose.yaml index 641056ec..ab1e4dd1 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -32,7 +32,9 @@ services: build: context: $PWD/src/api args: - BASE_IMAGE: ${GOHAN_API_BASE_IMAGE} + BUILDER_BASE_IMAGE: ${GOHAN_API_BUILDER_BASE_IMAGE} + BASE_PROD_IMAGE: ${GOHAN_API_PROD_BASE_IMAGE} + BASE_DEV_IMAGE: ${GOHAN_API_PROD_BASE_IMAGE} # also passed in as an ENV from within Dockerfile : GOHAN_API_INTERNAL_PORT: ${GOHAN_API_INTERNAL_PORT} networks: diff --git a/etc/example.env b/etc/example.env index 80aac62c..5dab7001 100644 --- a/etc/example.env +++ b/etc/example.env @@ -38,7 +38,9 @@ GOHAN_GATEWAY_CPUS=2 GOHAN_API_IMAGE=gohan-api GOHAN_API_VERSION=latest -GOHAN_API_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.08.30 +GOHAN_API_BUILDER_BASE_IMAGE=golang:1.20-bullseye +GOHAN_API_DEV_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:golang-debian-2023.08.30 +GOHAN_API_PROD_BASE_IMAGE=ghcr.io/bento-platform/bento_base_image:plain-debian-2023.08.30 GOHAN_API_CONTAINER_NAME=gohan-api GOHAN_API_SERVICE_HOST=0.0.0.0 diff --git a/src/api/Dockerfile b/src/api/Dockerfile index 57a6f933..e93e61f4 100644 --- a/src/api/Dockerfile +++ b/src/api/Dockerfile @@ -1,11 +1,13 @@ -ARG BASE_IMAGE +ARG BUILDER_BASE_IMAGE +ARG BASE_PROD_IMAGE -FROM $BASE_IMAGE +# Stage 1 - builder +FROM $BUILDER_BASE_IMAGE as builder # Maintainer LABEL maintainer="Brennan Brouillette " -WORKDIR /app +WORKDIR /build COPY . . @@ -13,6 +15,8 @@ COPY . . RUN go mod vendor && \ go build -ldflags="-s -w" -o gohan_api +FROM $BASE_PROD_IMAGE + # Debian updates # - tabix for indexing VCFs # - other base dependencies provided by the base image @@ -21,6 +25,11 @@ RUN apt-get update -y && \ apt-get install -y tabix && \ rm -rf /var/lib/apt/lists/* +WORKDIR /app + +# Copy pre-built executable from builder stage +COPY --from=builder /build/gohan_api . + # Copy static workflow files COPY workflows/*.wdl /app/workflows/ diff --git a/src/api/dev.Dockerfile b/src/api/dev.Dockerfile index 394ab49f..2b810113 100644 --- a/src/api/dev.Dockerfile +++ b/src/api/dev.Dockerfile @@ -1,6 +1,6 @@ -ARG BASE_IMAGE +ARG BASE_DEV_IMAGE -FROM $BASE_IMAGE +FROM $BASE_DEV_IMAGE LABEL org.opencontainers.image.description="Local development image for Bento Gohan." LABEL devcontainer.metadata='[{ \ From 48bacacc4eff75c6eea5d0a3c46623eab3447cda Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 5 Sep 2023 10:35:49 -0400 Subject: [PATCH 84/84] fix dev container entrypopint dir --- src/api/entrypoint.bash | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/api/entrypoint.bash b/src/api/entrypoint.bash index b11103b3..5fcbb0dc 100644 --- a/src/api/entrypoint.bash +++ b/src/api/entrypoint.bash @@ -13,5 +13,7 @@ chown -R bento_user:bento_user ./ chown -R bento_user:bento_user /app chmod -R o-rwx src/api/tmp +cd /gohan-api/src/api || exit + # Drop into bento_user from root and execute the CMD specified for the image exec gosu bento_user "$@"