Skip to content

Commit

Permalink
Merge pull request #1097 from al4225/main
Browse files Browse the repository at this point in the history
add region-name as an option in twas.ipynb
  • Loading branch information
gaow authored Oct 26, 2024
2 parents c88cb98 + 716948c commit d8fd12f
Showing 1 changed file with 79 additions and 13 deletions.
92 changes: 79 additions & 13 deletions code/pecotmr_integration/twas.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,52 @@
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here using `--region-name` we focus the analysis on 2 blocks: format as `chr_start_stop`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"sos run xqtl-protocol/code/pecotmr_integration/twas.ipynb quantile_twas \\\n",
" --cwd /output/ --name test \\\n",
" --gwas_meta_data /mnt/vast/hpc/csg/cl4215/mrmash/workflow/GWAS/gwas_meta.tsv \\\n",
" --ld_meta_data /mnt/vast/hpc/csg/data_public/20240409_ADSP_LD_matrix/ld_meta_file.tsv \\\n",
" --region_name chr11_84267999_86714492 chr7_54681006_57314931 \\\n",
" --xqtl_meta_data /home/al4225/project/quantile_twas/quantile_twas_analysis/test_data/small_region_gene_meta_data_test.tsv\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It is also possible to analyze a selected list of regions using option `--regions`. The 3 columns(chr, start, stop) of this file will be used for the block list to analyze. Here for example use the same list of regions as we used for LD block:"
]
},
{
"cell_type": "markdown",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"source": [
"```\n",
"sos run xqtl-protocol/code/pecotmr_integration/twas.ipynb quantile_twas \\\n",
" --cwd /output/ --name test \\\n",
" --gwas_meta_data /mnt/vast/hpc/csg/cl4215/mrmash/workflow/GWAS/gwas_meta.tsv \\\n",
" --ld_meta_data /mnt/vast/hpc/csg/data_public/20240409_ADSP_LD_matrix/ld_meta_file.tsv \\\n",
" --regions /mnt/vast/hpc/csg/cl4215/mrmash/workflow/pipeline_data/EUR_LD_blocks.bed \\\n",
" --xqtl_meta_data /home/al4225/project/quantile_twas/quantile_twas_analysis/test_data/small_region_gene_meta_data_test.tsv\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -215,6 +261,9 @@
"parameter: gwas_data = []\n",
"parameter: column_mapping = []\n",
"parameter: regions = path()\n",
"# Optional: if a region name is provided \n",
"# the analysis would be focused on the union of provides region list and region names\n",
"parameter: region_name = []\n",
"parameter: name = f\"{xqtl_meta_data:bn}.{gwas_meta_data:bn}\"\n",
"parameter: container = ''\n",
"import re\n",
Expand Down Expand Up @@ -295,7 +344,7 @@
" if missing_columns:\n",
" raise ValueError(f\"Missing required columns: {', '.join(missing_columns)}\")\n",
"\n",
"def extract_regional_data(gwas_meta_data, xqtl_meta_data, regions, gwas_name, gwas_data, column_mapping):\n",
"def extract_regional_data(gwas_meta_data, xqtl_meta_data, regions, region_name, gwas_name, gwas_data, column_mapping):\n",
" \"\"\"\n",
" Extracts data from GWAS and xQTL metadata files and additional GWAS data provided. \n",
"\n",
Expand Down Expand Up @@ -334,12 +383,30 @@
" gwas_dict = OrderedDict()\n",
" \n",
" # Reading LD regions info\n",
" regions_df = pd.read_csv(regions, sep=\"\\t\",skipinitialspace=True)\n",
" regions_df.columns = [col.strip() for col in regions_df.columns] # Strip spaces from column names before\n",
" regions_df['chr'] = regions_df['chr'].str.strip()\n",
" check_required_columns(regions_df, required_ld_columns)\n",
" # Initialize empty DataFrame for regions\n",
" regions_df = pd.DataFrame(columns=['chr', 'start', 'stop'])\n",
"\n",
" # Check if regions file exists and read it\n",
" if os.path.isfile(regions):\n",
" file_regions_df = pd.read_csv(regions, sep=\"\\t\", skipinitialspace=True)\n",
" file_regions_df.columns = [col.strip() for col in file_regions_df.columns] # Strip spaces from column names\n",
" file_regions_df['chr'] = file_regions_df['chr'].str.strip()\n",
" check_required_columns(file_regions_df, required_ld_columns)\n",
" regions_df = pd.concat([regions_df, file_regions_df])\n",
" # Process region_name if provided: \n",
" # fomat: region_name = [\"chr1_16103_2888443\", \"chr1_4320284_5853833\"]\n",
" if len(region_name) > 0:\n",
" # Split region_name entries into chr, start, and stop columns\n",
" extra_regions = [name.split(\"_\") for name in region_name]\n",
" extra_regions_df = pd.DataFrame(extra_regions, columns=['chr', 'start', 'stop'])\n",
" extra_regions_df['start'] = extra_regions_df['start'].astype(int)\n",
" extra_regions_df['stop'] = extra_regions_df['stop'].astype(int)\n",
" # Add extra regions to regions_df\n",
" regions_df = pd.concat([regions_df, extra_regions_df])\n",
" # Remove duplicates and reset index\n",
" regions_df = regions_df.drop_duplicates().reset_index(drop=True)\n",
" regions_dict = OrderedDict()\n",
" \n",
"\n",
" # Reading the xQTL weight metadata file\n",
" xqtl_df = pd.read_csv(xqtl_meta_data, sep=\" \")\n",
" check_required_columns(xqtl_df, required_xqtl_columns)\n",
Expand Down Expand Up @@ -398,7 +465,7 @@
" return gwas_dict, xqtl_dict, regions_dict\n",
"\n",
"\n",
"gwas_dict, xqtl_dict, regions_dict = extract_regional_data(gwas_meta_data, xqtl_meta_data,regions,gwas_name, gwas_data, column_mapping)\n",
"gwas_dict, xqtl_dict, regions_dict = extract_regional_data(gwas_meta_data, xqtl_meta_data,regions,region_name,gwas_name, gwas_data, column_mapping)\n",
"regional_data = dict([(\"GWAS\", gwas_dict), (\"xQTL\", xqtl_dict), (\"Regions\", regions_dict)])\n",
"\n",
"\n",
Expand Down Expand Up @@ -684,21 +751,20 @@
" quantile_twas = TRUE, \n",
" output_twas_data = ${\"TRUE\" if save_ctwas_data else \"FALSE\"} )\n",
"\n",
" # Merging with xQTL meta-data\n",
" # Merging with xQTL meta-data \n",
" message(\"Merging twas_result with xqtl_meta_df...\") \n",
" twas_results_db$twas_result <- merge(twas_results_db$twas_result, xqtl_meta_df[, c(\"region_id\",\"TSS\",\"start\",\"end\")], by.x=\"molecular_id\", by.y=\"region_id\")\n",
" \n",
" message(\"Preview of merged twas_result:\")\n",
" twas_results_db$twas_result <- unique(twas_results_db$twas_result)\n",
" fwrite(twas_results_db$twas_result[, c(2, 1, (ncol(twas_results_db$twas_result)-2):ncol(twas_results_db$twas_result), 3:(ncol(twas_results_db$twas_result)-3))], file = ${_output[0]:r}, sep = \"\\t\", compress = \"gzip\")\n",
"\n",
" # Step 3: reformat for follow up cTWAS analysis\n",
" if (${\"TRUE\" if save_ctwas_data else \"FALSE\"}) {\n",
" saveRDS(twas_results_db$twas_data, \"${_output[0]:nnn}.twas_data.rds\", compress='xz')\n",
" saveRDS(twas_results_db$twas_data, \"${_output[0]:nnn}.quantile_twas_data.rds\", compress='xz')\n",
" }\n",
" if (${\"TRUE\" if save_mr_result else \"FALSE\"}) {\n",
" fwrite(twas_results_db$mr_result, file = \"${_output[0]:nnn}.mr_result.tsv.gz\", sep = \"\\t\", compress = \"gzip\")\n",
" }"
" fwrite(twas_results_db$mr_result, file = \"${_output[0]:nnn}.quantile_mr_result.tsv.gz\", sep = \"\\t\", compress = \"gzip\")\n",
" }\n",
" message(\"quantile twas analysis is completed in this block.\") "
]
}
],
Expand Down

0 comments on commit d8fd12f

Please sign in to comment.