diff --git a/code/mnm_analysis/mnm_regression.ipynb b/code/mnm_analysis/mnm_regression.ipynb index 271f237a0..ffe7b4316 100644 --- a/code/mnm_analysis/mnm_regression.ipynb +++ b/code/mnm_analysis/mnm_regression.ipynb @@ -1038,6 +1038,7 @@ " }\n", " }\n", " saveRDS(list(\"${_meta_info[2]}\" = finemapping_output), \"${_output[0]:ann}.univariate_bvrs.rds\", compress='xz')\n", + " \n", " if (length(twas_output) > 0) saveRDS(list(\"${_meta_info[2]}\" = twas_output), \"${_output[0]:ann}.univariate_twas_weights.rds\", compress='xz')\n", " if (empty_elements_cnt>0) {\n", " message(empty_elements_cnt, \" analysis are skipped for failing to pass initial screen for potential association signals\")\n", @@ -1145,8 +1146,8 @@ " other_quantities = list(dropped_samples = fdat$dropped_samples),\n", " # filters\n", " imiss_cutoff = ${imiss}, \n", - " maf_cutoff = 0.01,\n", - " xvar_cutoff = 0.01,\n", + " maf_cutoff = ${maf},\n", + " xvar_cutoff = ${xvar_cutoff},\n", " ld_reference_meta_file = ${('\"%s\"' % ld_reference_meta_file) if not ld_reference_meta_file.is_dir() else \"NULL\"},\n", " pip_cutoff_to_skip = pip_cutoff_to_skip,\n", " # methods parameter configuration\n", diff --git a/code/pecotmr_integration/twas.ipynb b/code/pecotmr_integration/twas.ipynb index 5dc44fdf1..67a131ae3 100644 --- a/code/pecotmr_integration/twas.ipynb +++ b/code/pecotmr_integration/twas.ipynb @@ -162,14 +162,14 @@ "source": [ "## Example\n", "```\n", - "sos run /home/cl4215/githubrepo/xqtl-protocol/code/pecotmr_integration/twas.ipynb twas \\\n", - " --cwd /mnt/vast/hpc/csg/cl4215/mrmash/workflow/ \\\n", + "sos run xqtl-protocol/code/pecotmr_integration/twas.ipynb twas \\\n", + " --cwd output/ \\\n", " --gwas_meta_data /mnt/vast/hpc/csg/cl4215/mrmash/workflow/GWAS/gwas_meta.tsv \\\n", " --ld_meta_data /mnt/vast/hpc/csg/data_public/20240409_ADSP_LD_matrix/ld_meta_file.tsv \\\n", " --regions /mnt/vast/hpc/csg/cl4215/mrmash/workflow/twas_mr/pipeline/EUR_LD_blocks_test.bed \\\n", " --xqtl_meta_data /mnt/vast/hpc/csg/cl4215/mrmash/workflow/susie_twas/Fungen_xQTL.cis_results_db_TSS.tsv \\\n", " --xqtl_type_table /mnt/vast/hpc/csg/cl4215/mrmash/workflow/susie_twas/Fungen_xQTL.cis_results_db_TSS_xqtl_type.tsv \\\n", - " --max_var_select 10 --p_value_cutoff 0.05 --rsq_threshold 0.01 --twas\n", + " --p_value_cutoff 0.05 --rsq_threshold 0.01 --twas\n", "```" ] }, @@ -392,7 +392,6 @@ "parameter: allele_qc = True\n", "parameter: coverage = \"cs_coverage_0.95\"\n", "# maximum number of top variant selection \n", - "parameter: max_var_select = 10\n", "# Threashold for rsq and pvalue for imputability determination for a model \n", "parameter: p_value_cutoff = 0.05\n", "parameter: rsq_threshold = 0.01\n", @@ -438,8 +437,8 @@ " gene_list <- c(${', '.join([f\"'{gene}'\" for gene in _filtered_region_info[4]])})\n", "\n", " # Process weights \n", - " refined_twas_weights_data = list()\n", - " refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]] <- list()\n", + " export_twas_weights_db = list()\n", + " export_twas_weights_db[[\"${_filtered_region_info[3]}\"]] <- list()\n", " twas_weights_results=list()\n", " weight_db_list <- c(${_input:r,})\n", " names(weight_db_list) <- gene_list\n", @@ -448,30 +447,28 @@ " weight_db_list_update <- lapply(weight_db_list, function(file_list){do.call(c,lapply(file_list, function(file) if (file.size(file) > 200) file))})\n", " if(length(weight_db_list_update) <= 0) stop(paste0(\"No weight information available from \", paste0(weight_db_list, collapse=\",\"), \". \"))\n", " \n", - " # Step 1: Load TWAS weights and generate twas weight db, output refined_twas_weights_data \n", + " # Step 1: Load TWAS weights and generate twas weight db, output export_twas_weights_db \n", " for (gene_db in names(weight_db_list_update)) {\n", " weight_dbs <- weight_db_list_update[[gene_db]]\n", - " data_type <- if (!is.null(xqtl_type_table)) unique(do.call(c, lapply(weight_dbs, function(weight_db) xqtl_type_table$type[xqtl_type_table$file==weight_db]))) else NULL \n", - " if (length(data_type)>1) stop(paste0(\"more than one data type information provided for weight db file \", weight_db, \". \"))\n", - " twas_weights_results[[gene_db]] = generate_twas_db(weight_dbs, contexts=NULL,variable_name_obj=c(\"preset_variants_result\", \"variant_names\"), \n", - " twas_weights_table = \"twas_weights\", max_var_selection=${max_var_select}, min_rsq_threshold = ${rsq_threshold}, \n", - " p_val_cutoff = ${p_value_cutoff}, data_type=data_type)\n", + " twas_weights_results[[gene_db]] = generate_twas_db(weight_dbs, contexts=NULL,variable_name_obj=c(\"variant_names\"), susie_obj=\"susie_weights_intermediate\",\n", + " twas_weights_table = \"twas_weights\", min_rsq_threshold = ${rsq_threshold}, \n", + " p_val_cutoff = ${p_value_cutoff}, data_type_table=xqtl_type_table)\n", " if (gene_db %in% names(twas_weights_results)){\n", - " # merge refined_twas_weights_data by imputable genes\n", - " gene <- twas_weights_results[[gene_db]]$gene\n", - " # update refined_twas_weights_data for imputable genes, merge if same gene's other context results are available \n", - " refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]] <- c(refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]],\n", - " twas_weights_results[[gene_db]]$refined_twas_weights) \n", + " # merge and update export_twas_weights_db for imputable genes, merge if same gene's other context results are available \n", + " if (!is.null(twas_weights_results[[gene_db]])){\n", + " export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene_db]] <- c(export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene_db]],\n", + " twas_weights_results[[gene_db]]$export_twas_weights_db)\n", + " }\n", " }\n", " }\n", - " saveRDS(refined_twas_weights_data,\"${_output[0]}\", compress='xz')\n", + " saveRDS(export_twas_weights_db,\"${_output[0]}\", compress='xz')\n", " \n", " # Step 2: TWAS analysis for all methods for imputable gene\n", " if (${\"TRUE\" if twas else \"FALSE\"}){\n", " twas_result_table <- do.call(rbind, lapply(names(twas_weights_results), function(weight_db){\n", " # harmonize twas weights and gwas sumstats against LD \n", " twas_data_qced <- harmonize_twas(twas_weights_results[[weight_db]], LD_meta_file_path, \"${gwas_meta_data}\", \n", - " twas_data_loader=twas_weights_loader, scale_weights=TRUE)\n", + " twas_data_loader=twas_weights_loader, scale_weights=FALSE)\n", " gene <- names(twas_data_qced)\n", " if (length(gene)<1) stop(paste0(\"No gene's data processed at harmonization process for \", weight_db, \". \"))\n", " contexts <- names(twas_data_qced[[gene]][[\"weights_qced\"]])\n", @@ -492,23 +489,23 @@ " }\n", " \n", " # Step 3: Summarize and merge twas results - from all methods for all contexts for imputable genes. \n", - " genes <- names(refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]])\n", + " genes <- names(export_twas_weights_db[[\"${_filtered_region_info[3]}\"]])\n", " twas_table <- do.call(rbind, lapply(genes, function(gene) {\n", - " contexts <- names(refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]])\n", + " contexts <- names(export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene]])\n", " # merge twas_cv information for same gene across all weight db files\n", " cv_data <- do.call(c, lapply(names(twas_weights_results), \n", " function(file){if(twas_weights_results[[file]]$gene == gene) twas_weights_results[[file]]$cv_performance}))\n", " # loop through each context for all methods\n", " gene_table <- do.call(rbind, lapply(contexts, function(context){\n", " methods <- gsub(\"_.*$\", \"\", names(cv_data[[context]]))\n", - " is_imputable = refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$is_imputable\n", - " selected_method = refined_twas_weights_data[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$selected_model\n", + " is_imputable = export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$is_imputable\n", + " selected_method = export_twas_weights_db[[\"${_filtered_region_info[3]}\"]][[gene]][[context]]$selected_model\n", " if(is.null(selected_method)) selected_method <- NA\n", " is_selected_method <- ifelse(methods == selected_method, TRUE, FALSE)\n", - " cv_rsqs <- sapply(cv_data[[context]], function(x) x[, \"adj_rsq\"])\n", - " cv_pvals <- sapply(cv_data[[context]], function(x) x[, \"adj_rsq_pval\"])\n", + " cv_rsqs <- sapply(cv_data[[context]], function(x) x[, \"rsq\"])\n", + " cv_pvals <- sapply(cv_data[[context]], function(x) x[, \"pval\"])\n", " context_table <- data.frame(context=context, method=methods, is_imputable = is_imputable, is_selected_method=is_selected_method,\n", - " rsq_adj_cv=cv_rsqs, pval_cv=cv_pvals)\n", + " rsq_cv=cv_rsqs, pval_cv=cv_pvals)\n", " return(context_table)\n", " }))\n", " gene_table$gene = gene\n", @@ -522,7 +519,7 @@ " \n", " if (${\"TRUE\" if twas else \"FALSE\"}) twas_table <- merge(twas_table, twas_result_table, by=c(\"gene\", \"context\", \"method\"))\n", " colname_ordered <- c(\"chr\", \"start\", \"end\", \"gene\", \"TSS\", \"context\", \"gwas_study\", \"method\", \"is_imputable\", \"is_selected_method\", \n", - " \"rsq_adj_cv\", \"pval_cv\", \"twas_z\", \"twas_pval\", \"block\")\n", + " \"rsq_cv\", \"pval_cv\", \"twas_z\", \"twas_pval\", \"block\")\n", " \n", " if (!${\"TRUE\" if twas else \"FALSE\"}) colname_ordered <- colname_ordered[!colname_ordered %in% c(\"twas_z\", \"twas_pval\", \"gwas_study\")]\n", " write.table(twas_table[, colname_ordered], ${_output[1]:r}, sep=\"\\t\", row.names=FALSE, col.names=TRUE, quote=FALSE)"