Skip to content

Commit

Permalink
workflow format update
Browse files Browse the repository at this point in the history
  • Loading branch information
Chunmingl committed Sep 13, 2024
1 parent 001e25f commit da82a5c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 29 deletions.
5 changes: 3 additions & 2 deletions code/mnm_analysis/mnm_regression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,7 @@
" }\n",
" }\n",
" saveRDS(list(\"${_meta_info[2]}\" = finemapping_output), \"${_output[0]:ann}.univariate_bvrs.rds\", compress='xz')\n",
" \n",
" if (length(twas_output) > 0) saveRDS(list(\"${_meta_info[2]}\" = twas_output), \"${_output[0]:ann}.univariate_twas_weights.rds\", compress='xz')\n",
" if (empty_elements_cnt>0) {\n",
" message(empty_elements_cnt, \" analysis are skipped for failing to pass initial screen for potential association signals\")\n",
Expand Down Expand Up @@ -1145,8 +1146,8 @@
" other_quantities = list(dropped_samples = fdat$dropped_samples),\n",
" # filters\n",
" imiss_cutoff = ${imiss}, \n",
" maf_cutoff = 0.01,\n",
" xvar_cutoff = 0.01,\n",
" maf_cutoff = ${maf},\n",
" xvar_cutoff = ${xvar_cutoff},\n",
" ld_reference_meta_file = ${('\"%s\"' % ld_reference_meta_file) if not ld_reference_meta_file.is_dir() else \"NULL\"},\n",
" pip_cutoff_to_skip = pip_cutoff_to_skip,\n",
" # methods parameter configuration\n",
Expand Down
51 changes: 24 additions & 27 deletions code/pecotmr_integration/twas.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@
"source": [
"## Example\n",
"```\n",
"sos run /home/cl4215/githubrepo/xqtl-protocol/code/pecotmr_integration/twas.ipynb twas \\\n",
" --cwd /mnt/vast/hpc/csg/cl4215/mrmash/workflow/ \\\n",
"sos run xqtl-protocol/code/pecotmr_integration/twas.ipynb twas \\\n",
" --cwd output/ \\\n",
" --gwas_meta_data /mnt/vast/hpc/csg/cl4215/mrmash/workflow/GWAS/gwas_meta.tsv \\\n",
" --ld_meta_data /mnt/vast/hpc/csg/data_public/20240409_ADSP_LD_matrix/ld_meta_file.tsv \\\n",
" --regions /mnt/vast/hpc/csg/cl4215/mrmash/workflow/twas_mr/pipeline/EUR_LD_blocks_test.bed \\\n",
" --xqtl_meta_data /mnt/vast/hpc/csg/cl4215/mrmash/workflow/susie_twas/Fungen_xQTL.cis_results_db_TSS.tsv \\\n",
" --xqtl_type_table /mnt/vast/hpc/csg/cl4215/mrmash/workflow/susie_twas/Fungen_xQTL.cis_results_db_TSS_xqtl_type.tsv \\\n",
" --max_var_select 10 --p_value_cutoff 0.05 --rsq_threshold 0.01 --twas\n",
" --p_value_cutoff 0.05 --rsq_threshold 0.01 --twas\n",
"```"
]
},
Expand Down Expand Up @@ -392,7 +392,6 @@
"parameter: allele_qc = True\n",
"parameter: coverage = \"cs_coverage_0.95\"\n",
"# maximum number of top variant selection \n",
"parameter: max_var_select = 10\n",
"# Threashold for rsq and pvalue for imputability determination for a model \n",
"parameter: p_value_cutoff = 0.05\n",
"parameter: rsq_threshold = 0.01\n",
Expand Down Expand Up @@ -438,8 +437,8 @@
" gene_list <- c(${', '.join([f\"'{gene}'\" for gene in _filtered_region_info[4]])})\n",
"\n",
" # Process weights \n",
" refined_twas_weights_data = list()\n",
" refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]] <- list()\n",
" export_twas_weights_db = list()\n",
" export_twas_weights_db[[\"${_filtered_region_info[3]}\"]] <- list()\n",
" twas_weights_results=list()\n",
" weight_db_list <- c(${_input:r,})\n",
" names(weight_db_list) <- gene_list\n",
Expand All @@ -448,30 +447,28 @@
" weight_db_list_update <- lapply(weight_db_list, function(file_list){do.call(c,lapply(file_list, function(file) if (file.size(file) > 200) file))})\n",
" if(length(weight_db_list_update) <= 0) stop(paste0(\"No weight information available from \", paste0(weight_db_list, collapse=\",\"), \". \"))\n",
" \n",
" # Step 1: Load TWAS weights and generate twas weight db, output refined_twas_weights_data \n",
" # Step 1: Load TWAS weights and generate twas weight db, output export_twas_weights_db \n",
" for (gene_db in names(weight_db_list_update)) {\n",
" weight_dbs <- weight_db_list_update[[gene_db]]\n",
" data_type <- if (!is.null(xqtl_type_table)) unique(do.call(c, lapply(weight_dbs, function(weight_db) xqtl_type_table$type[xqtl_type_table$file==weight_db]))) else NULL \n",
" if (length(data_type)>1) stop(paste0(\"more than one data type information provided for weight db file \", weight_db, \". \"))\n",
" twas_weights_results[[gene_db]] = generate_twas_db(weight_dbs, contexts=NULL,variable_name_obj=c(\"preset_variants_result\", \"variant_names\"), \n",
" twas_weights_table = \"twas_weights\", max_var_selection=${max_var_select}, min_rsq_threshold = ${rsq_threshold}, \n",
" p_val_cutoff = ${p_value_cutoff}, data_type=data_type)\n",
" twas_weights_results[[gene_db]] = generate_twas_db(weight_dbs, contexts=NULL,variable_name_obj=c(\"variant_names\"), susie_obj=\"susie_weights_intermediate\",\n",
" twas_weights_table = \"twas_weights\", min_rsq_threshold = ${rsq_threshold}, \n",
" p_val_cutoff = ${p_value_cutoff}, data_type_table=xqtl_type_table)\n",
" if (gene_db %in% names(twas_weights_results)){\n",
" # merge refined_twas_weights_data by imputable genes\n",
" gene <- twas_weights_results[[gene_db]]$gene\n",
" # update refined_twas_weights_data for imputable genes, merge if same gene's other context results are available \n",
" refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]] <- c(refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]],\n",
" twas_weights_results[[gene_db]]$refined_twas_weights) \n",
" # merge and update export_twas_weights_db for imputable genes, merge if same gene's other context results are available \n",
" if (!is.null(twas_weights_results[[gene_db]])){\n",
" export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene_db]] <- c(export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene_db]],\n",
" twas_weights_results[[gene_db]]$export_twas_weights_db)\n",
" }\n",
" }\n",
" }\n",
" saveRDS(refined_twas_weights_data,\"${_output[0]}\", compress='xz')\n",
" saveRDS(export_twas_weights_db,\"${_output[0]}\", compress='xz')\n",
" \n",
" # Step 2: TWAS analysis for all methods for imputable gene\n",
" if (${\"TRUE\" if twas else \"FALSE\"}){\n",
" twas_result_table <- do.call(rbind, lapply(names(twas_weights_results), function(weight_db){\n",
" # harmonize twas weights and gwas sumstats against LD \n",
" twas_data_qced <- harmonize_twas(twas_weights_results[[weight_db]], LD_meta_file_path, \"${gwas_meta_data}\", \n",
" twas_data_loader=twas_weights_loader, scale_weights=TRUE)\n",
" twas_data_loader=twas_weights_loader, scale_weights=FALSE)\n",
" gene <- names(twas_data_qced)\n",
" if (length(gene)<1) stop(paste0(\"No gene's data processed at harmonization process for \", weight_db, \". \"))\n",
" contexts <- names(twas_data_qced[[gene]][[\"weights_qced\"]])\n",
Expand All @@ -492,23 +489,23 @@
" }\n",
" \n",
" # Step 3: Summarize and merge twas results - from all methods for all contexts for imputable genes. \n",
" genes <- names(refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]])\n",
" genes <- names(export_twas_weights_db[[\"${_filtered_region_info[3]}\"]])\n",
" twas_table <- do.call(rbind, lapply(genes, function(gene) {\n",
" contexts <- names(refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]])\n",
" contexts <- names(export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene]])\n",
" # merge twas_cv information for same gene across all weight db files\n",
" cv_data <- do.call(c, lapply(names(twas_weights_results), \n",
" function(file){if(twas_weights_results[[file]]$gene == gene) twas_weights_results[[file]]$cv_performance}))\n",
" # loop through each context for all methods\n",
" gene_table <- do.call(rbind, lapply(contexts, function(context){\n",
" methods <- gsub(\"_.*$\", \"\", names(cv_data[[context]]))\n",
" is_imputable = refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$is_imputable\n",
" selected_method = refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$selected_model\n",
" is_imputable = export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$is_imputable\n",
" selected_method = export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$selected_model\n",
" if(is.null(selected_method)) selected_method <- NA\n",
" is_selected_method <- ifelse(methods == selected_method, TRUE, FALSE)\n",
" cv_rsqs <- sapply(cv_data[[context]], function(x) x[, \"adj_rsq\"])\n",
" cv_pvals <- sapply(cv_data[[context]], function(x) x[, \"adj_rsq_pval\"])\n",
" cv_rsqs <- sapply(cv_data[[context]], function(x) x[, \"rsq\"])\n",
" cv_pvals <- sapply(cv_data[[context]], function(x) x[, \"pval\"])\n",
" context_table <- data.frame(context=context, method=methods, is_imputable = is_imputable, is_selected_method=is_selected_method,\n",
" rsq_adj_cv=cv_rsqs, pval_cv=cv_pvals)\n",
" rsq_cv=cv_rsqs, pval_cv=cv_pvals)\n",
" return(context_table)\n",
" }))\n",
" gene_table$gene = gene\n",
Expand All @@ -522,7 +519,7 @@
" \n",
" if (${\"TRUE\" if twas else \"FALSE\"}) twas_table <- merge(twas_table, twas_result_table, by=c(\"gene\", \"context\", \"method\"))\n",
" colname_ordered <- c(\"chr\", \"start\", \"end\", \"gene\", \"TSS\", \"context\", \"gwas_study\", \"method\", \"is_imputable\", \"is_selected_method\", \n",
" \"rsq_adj_cv\", \"pval_cv\", \"twas_z\", \"twas_pval\", \"block\")\n",
" \"rsq_cv\", \"pval_cv\", \"twas_z\", \"twas_pval\", \"block\")\n",
" \n",
" if (!${\"TRUE\" if twas else \"FALSE\"}) colname_ordered <- colname_ordered[!colname_ordered %in% c(\"twas_z\", \"twas_pval\", \"gwas_study\")]\n",
" write.table(twas_table[, colname_ordered], ${_output[1]:r}, sep=\"\\t\", row.names=FALSE, col.names=TRUE, quote=FALSE)"
Expand Down

0 comments on commit da82a5c

Please sign in to comment.