diff --git a/analyses/cell-type-consensus/exploratory-notebooks/02-explore-consensus-results.Rmd b/analyses/cell-type-consensus/exploratory-notebooks/02-explore-consensus-results.Rmd new file mode 100644 index 000000000..61ad9b9b0 --- /dev/null +++ b/analyses/cell-type-consensus/exploratory-notebooks/02-explore-consensus-results.Rmd @@ -0,0 +1,291 @@ +--- +title: "Explore consensus cell types" +author: Ally Hawkins +date: "`r Sys.Date()`" +output: + html_notebook: + toc: true + toc_depth: 3 + code_folding: show +--- + +This notebook summarizes the findings from assigning consensus cell type labels to all ScPCA samples. +All results from the `cell-type-consensus` module in `OpenScPCA-nf` must be saved to `results` prior to rendering this notebook. + +```{r packages} +suppressPackageStartupMessages({ + # load required packages + library(ggplot2) +}) + +# Set default ggplot theme +theme_set( + theme_classic() +) +``` + +## Functions + +```{r} +# function to read in project data frames with all cells in a project +# output is a summarized table with total cells per sample, total cells per annotation, and number of cell types +summarize_celltypes <- function(file, id){ + + # read in data + df <- readr::read_tsv(file) + + # get total cell count and number of assigned cell types per library + total_cells_df <- df |> + dplyr::group_by(library_id) |> + dplyr::summarize( + total_cells_per_library = length(library_id), + num_celltypes = length(unique(consensus_annotation)) + ) + + summary_df <- df |> + dplyr::group_by(library_id, consensus_annotation, consensus_ontology) |> + dplyr::summarize(total_cells_per_annotation = length(consensus_annotation)) |> + dplyr::left_join(total_cells_df, by = "library_id") |> + dplyr::mutate( + # add percentage + percent_cells_annotation = round((total_cells_per_annotation / total_cells_per_library) * 100, 2) + ) |> + dplyr::ungroup() + + return(summary_df) + +} +``` + +## Data setup + + +```{r base paths} +# The base path for the OpenScPCA repository, found by its (hidden) .git directory +repository_base <- rprojroot::find_root(rprojroot::is_git_root) +module_base <- file.path(repository_base, "analyses", "cell-type-consensus") + +# results directory with cell-type-consensus +results_dir <- file.path(module_base, "results", "cell-type-consensus") + +# diagnoses table used for labeling plots +diagnoses_file <- file.path(module_base, "sample-info", "project-diagnoses.tsv") +``` + +```{r} +# list all results files +results_files <- list.files(results_dir, pattern = "_consensus-cell-types\\.tsv.\\gz$", full.names = TRUE) + +# get project ids from file list +project_ids <- stringr::str_remove(basename(results_files), "_consensus-cell-types.tsv.gz") +names(results_files) <- project_ids + +# remove cell line projects from file list +cell_line_projects <- c("SCPCP000020", "SCPCP000024") +project_ids <- setdiff(project_ids, cell_line_projects) # remove cell line projects +results_files <- results_files[project_ids] +``` + + +```{r, message=FALSE} +# read in diagnoses +diagnoses_df <- readr::read_tsv(diagnoses_file) + + +# read in results and prep data frame for plotting +all_results_df <- results_files |> + purrr::imap(summarize_celltypes) |> + dplyr::bind_rows(.id = "project_id") |> + # add in diagnoses + dplyr::left_join(diagnoses_df, by = "project_id") |> + dplyr::mutate( + # create a label for plotting + project_label = glue::glue("{project_id}:{diagnosis}") + ) + +``` + +## Is it all just Unknown? + +The first thing we will look at is how many of the cells in each sample are categorized as "Unknown", which means no consensus between `SingleR` and `CellAssign` was identified. + +```{r, fig.height=7} +unknown_only <- all_results_df |> + dplyr::filter(consensus_annotation == "Unknown") + +ggplot(unknown_only, aes(x = project_label, y = percent_cells_annotation)) + + ggforce::geom_sina(size = 0.1) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), + plot.margin = margin(10,10,10,10)) + + labs( + x = "", + y = "Percent of cells annotated as Unknown" + ) + +``` + +It looks like we do have some samples that aren't just all "Unknown"! +It definitely varies by project, but for most projects we at least see some proportion of samples with assigned cell types. + +Let's look at how many samples actually have some cells outside of unknown identified. +To do this, we will identify all libraries that only have cells called as "Unknown". + +```{r} +high_tumor_df <- unknown_only |> + dplyr::mutate(no_cells_identified = percent_cells_annotation == 100) |> + dplyr::group_by(project_label) |> + dplyr::summarize(all_unknown = sum(no_cells_identified), + classified_cells = sum(!no_cells_identified), + percentage_unknown = round(all_unknown/(all_unknown + classified_cells)*100, 2), + # add number of libraries for plotting + total_libraries = length(library_id)) |> + # set order for plots + dplyr::mutate(project_label = forcats::fct_reorder(project_label, total_libraries, .desc = TRUE)) +``` + + +Which projects have the highest proportion of samples with all "Unknown"? + +```{r} +# table with percentage of samples +high_tumor_df |> + dplyr::select(project_label, percentage_unknown) |> + dplyr::arrange(desc(percentage_unknown)) + +``` + +It looks like all projects do have cell types identified that are not "Unknown". +However, `SCPCP000011` (retinoblastoma), has a fairly high percentage of samples without any consensus labels. + +## Number of cell types observed + +Below we look at the number of cell types observed in each project for all samples. +This does not include cells labeled as "Unknown". + +```{r, fig.height=10} +num_celltypes_df <- all_results_df |> + # add a new line for facet labels + dplyr::mutate(facet_label = glue::glue("{project_id}\n{diagnosis}")) |> + # remove unknown as a cell type + dplyr::filter(consensus_annotation != "Unknown") |> + dplyr::select(facet_label, library_id, num_celltypes) |> + unique() + +ggplot(num_celltypes_df, aes(x = num_celltypes)) + + geom_histogram(binwidth = 1, center = 0) + + facet_wrap(vars(facet_label), + ncol = 3) + + labs( + x = "Number of cell types" + ) + + theme_bw() +``` + +## Distribution of consensus cell types + +Now we look at the distribution of the cell types in each sample. +For these plots, we will pull out the top 9 cell types for each project. +All other cells will be labeled with "All remaining cell types". + +The top cell types are determined by counting how many libraries each cell type is found in within a project and taking the most frequent types. + +```{r} +plot_df <- all_results_df |> + dplyr::group_by(project_id) |> + dplyr::mutate( + # get most frequently observed cell types across libraries in that project + top_celltypes = forcats::fct_lump_n(consensus_annotation, 9, other_level = "All remaining cell types", ties.method = "first") |> + # sort by frequency + forcats::fct_infreq() |> + # make sure all remaining and unknown are last, use this to assign colors in specific order + forcats::fct_relevel("All remaining cell types", "Unknown", after = Inf) + ) + +# get all unique cell types ordered by frequency +unique_celltypes <- plot_df |> + dplyr::filter(!top_celltypes %in% c("All remaining cell types", "Unknown")) |> + dplyr::pull(top_celltypes) |> + unique() |> + sort() |> + as.character() + +# get color palette +colors <- c( + palette.colors(palette = "alphabet"), + "black", # 1 extra since alphabet is 26 and we have 27, this will be plasma cell which shows up once + "grey60", + "grey95" +) +names(colors) <- c(unique_celltypes, "All remaining cell types", "Unknown") +``` + + +```{r, fig.height=60, fig.width=10} +project_labels <- unique(all_results_df$project_label) + +# stacked bar chart showing the distribution of the top 9 cell types for each project, including Unknown +project_labels |> + purrr::map(\(label){ + + project_df <- plot_df |> + dplyr::filter(project_label == label) |> + dplyr::mutate( + # relevel factors for specific project + top_celltypes = forcats::fct_infreq(top_celltypes) |> + forcats::fct_relevel("All remaining cell types", "Unknown", after = Inf) + ) + + # make a stacked bar chart with top cell types + ggplot(project_df) + + aes( + x = library_id, + y = percent_cells_annotation, + fill = top_celltypes + ) + + geom_col() + + scale_y_continuous(expand = c(0,0)) + + scale_fill_manual(values = colors, name = "cell type") + + ggtitle(label) + + theme(axis.text.x = element_blank()) + + }) |> + patchwork::wrap_plots(ncol = 1) +``` + + +This looks really promising! +A few observations: + +- Cell types identified tend to line up with expectations for the type of tumor. +For example, leukemia libraries have T and B cells, brain tumors have macrophages, and solid tumors have fibroblasts and muscle cells. +- Projects that I would expect to be more difficult to classify (sarcomas, wilms, RB) have fewer cells classified then things like brain and leukemia. +Notably many of the solid tumor projects (4, 5, 12-16, and 23) have a handful of PDX samples where I would expect to see fewer normal cells. + +## Most frequently observed cell types + +The last thing we will do is look at the most frequently observed cell types across all samples. +The below table is ordered by the number of libraries the cell type is observed. + +```{r} +all_results_df |> + dplyr::filter(consensus_annotation != "Unknown") |> + dplyr::group_by(consensus_annotation) |> + dplyr::summarize( + total_libraries = dplyr::n(), + min_percentage = min(percent_cells_annotation), + mean_percentage = round(mean(percent_cells_annotation), 2), + median_percentage = median(percent_cells_annotation), + max_percentage = max(percent_cells_annotation) + ) |> + dplyr::arrange(desc(total_libraries)) + +``` + + +## Session info + +```{r session info} +# record the versions of the packages used in this analysis and other environment information +sessionInfo() +``` + diff --git a/analyses/cell-type-consensus/exploratory-notebooks/02-explore-consensus-results.nb.html b/analyses/cell-type-consensus/exploratory-notebooks/02-explore-consensus-results.nb.html new file mode 100644 index 000000000..e280b8148 --- /dev/null +++ b/analyses/cell-type-consensus/exploratory-notebooks/02-explore-consensus-results.nb.html @@ -0,0 +1,2247 @@ + + + + + + + + + + + + + + + +Explore consensus cell types + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+ +
+ + +

This notebook summarizes the findings from assigning consensus cell +type labels to all ScPCA samples. All results from the +cell-type-consensus module in OpenScPCA-nf +must be saved to results prior to rendering this +notebook.

+ + + + +
suppressPackageStartupMessages({
+  # load required packages
+  library(ggplot2)
+})
+
+# Set default ggplot theme
+theme_set(
+  theme_classic()
+)
+ + + + +
+

Functions

+ + + + +
# function to read in project data frames with all cells in a project
+# output is a summarized table with total cells per sample, total cells per annotation, and number of cell types 
+summarize_celltypes <- function(file, id){
+  
+  # read in data
+  df <- readr::read_tsv(file) 
+  
+  # get total cell count and number of assigned cell types per library
+  total_cells_df <- df |> 
+    dplyr::group_by(library_id) |> 
+    dplyr::summarize(
+      total_cells_per_library = length(library_id),
+      num_celltypes = length(unique(consensus_annotation))
+    )
+  
+  summary_df <- df |> 
+    dplyr::group_by(library_id, consensus_annotation, consensus_ontology) |> 
+    dplyr::summarize(total_cells_per_annotation = length(consensus_annotation)) |>
+    dplyr::left_join(total_cells_df, by = "library_id") |> 
+    dplyr::mutate(
+      # add percentage 
+      percent_cells_annotation = round((total_cells_per_annotation / total_cells_per_library) * 100, 2)
+    ) |> 
+    dplyr::ungroup()
+  
+  return(summary_df)
+  
+}
+ + + + +
+
+

Data setup

+ + + + +
# The base path for the OpenScPCA repository, found by its (hidden) .git directory
+repository_base <- rprojroot::find_root(rprojroot::is_git_root)
+module_base <- file.path(repository_base, "analyses", "cell-type-consensus")
+
+# results directory with cell-type-consensus 
+results_dir <- file.path(module_base, "results", "cell-type-consensus")
+
+# diagnoses table used for labeling plots 
+diagnoses_file <- file.path(module_base, "sample-info", "project-diagnoses.tsv")
+ + + + + + + + +
# list all results files 
+results_files <- list.files(results_dir, pattern = "_consensus-cell-types\\.tsv.\\gz$", full.names = TRUE)
+
+# get project ids from file list  
+project_ids <- stringr::str_remove(basename(results_files), "_consensus-cell-types.tsv.gz")
+names(results_files) <- project_ids
+
+# remove cell line projects from file list
+cell_line_projects <- c("SCPCP000020", "SCPCP000024")
+project_ids <- setdiff(project_ids, cell_line_projects) # remove cell line projects
+results_files <- results_files[project_ids]
+ + + + + + + + +
# read in diagnoses
+diagnoses_df <- readr::read_tsv(diagnoses_file)
+
+
+# read in results and prep data frame for plotting 
+all_results_df <- results_files |> 
+  purrr::imap(summarize_celltypes) |> 
+  dplyr::bind_rows(.id = "project_id") |> 
+  # add in diagnoses 
+  dplyr::left_join(diagnoses_df, by = "project_id") |> 
+  dplyr::mutate(
+    # create a label for plotting
+    project_label = glue::glue("{project_id}:{diagnosis}")
+  )
+
+ + + + +
+
+

Is it all just Unknown?

+

The first thing we will look at is how many of the cells in each +sample are categorized as “Unknown”, which means no consensus between +SingleR and CellAssign was identified.

+ + + + +
unknown_only <- all_results_df |> 
+  dplyr::filter(consensus_annotation == "Unknown")
+
+ggplot(unknown_only, aes(x = project_label, y = percent_cells_annotation)) +
+  ggforce::geom_sina(size = 0.1) +
+  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
+        plot.margin = margin(10,10,10,10)) +
+  labs(
+    x = "", 
+    y = "Percent of cells annotated as Unknown"
+  )
+ + + + +

+ + + + +
NA
+ + + + +

It looks like we do have some samples that aren’t just all “Unknown”! +It definitely varies by project, but for most projects we at least see +some proportion of samples with assigned cell types.

+

Let’s look at how many samples actually have some cells outside of +unknown identified. To do this, we will identify all libraries that only +have cells called as “Unknown”.

+ + + + +
high_tumor_df <- unknown_only |> 
+  dplyr::mutate(no_cells_identified = percent_cells_annotation == 100) |> 
+  dplyr::group_by(project_label) |> 
+  dplyr::summarize(all_unknown = sum(no_cells_identified),
+                   classified_cells = sum(!no_cells_identified),
+                   percentage_unknown = round(all_unknown/(all_unknown + classified_cells)*100, 2),
+                   # add number of libraries for plotting 
+                   total_libraries = length(library_id)) |>
+  # set order for plots 
+  dplyr::mutate(project_label = forcats::fct_reorder(project_label, total_libraries, .desc = TRUE))
+ + + + +

Which projects have the highest proportion of samples with all +“Unknown”?

+ + + + +
# table with percentage of samples 
+high_tumor_df |> 
+  dplyr::select(project_label, percentage_unknown) |> 
+  dplyr::arrange(desc(percentage_unknown))
+ + + + +
+ +
+ + + + +
NA
+ + + + +

It looks like all projects do have cell types identified that are not +“Unknown”. However, SCPCP000011 (retinoblastoma), has a +fairly high percentage of samples without any consensus labels.

+
+
+

Number of cell types observed

+

Below we look at the number of cell types observed in each project +for all samples. This does not include cells labeled as “Unknown”.

+ + + + +
num_celltypes_df <- all_results_df |> 
+  # add a new line for facet labels 
+  dplyr::mutate(facet_label = glue::glue("{project_id}\n{diagnosis}")) |>
+  # remove unknown as a cell type 
+  dplyr::filter(consensus_annotation != "Unknown") |> 
+  dplyr::select(facet_label, library_id, num_celltypes) |> 
+  unique()
+
+ggplot(num_celltypes_df, aes(x = num_celltypes)) +
+  geom_histogram(binwidth = 1, center = 0) +
+  facet_wrap(vars(facet_label), 
+             ncol = 3) +
+  labs(
+    x = "Number of cell types"
+  ) +
+  theme_bw()
+ + + + +

+ + + + +
+
+

Distribution of consensus cell types

+

Now we look at the distribution of the cell types in each sample. For +these plots, we will pull out the top 9 cell types for each project. All +other cells will be labeled with “All remaining cell types”.

+

The top cell types are determined by counting how many libraries each +cell type is found in within a project and taking the most frequent +types.

+ + + + +
plot_df <- all_results_df |> 
+    dplyr::group_by(project_id) |> 
+    dplyr::mutate(
+      # get most frequently observed cell types across libraries in that project 
+      top_celltypes = forcats::fct_lump_n(consensus_annotation, 9, other_level = "All remaining cell types", ties.method = "first") |> 
+        # sort by frequency 
+        forcats::fct_infreq() |> 
+        # make sure all remaining and unknown are last, use this to assign colors in specific order
+        forcats::fct_relevel("All remaining cell types", "Unknown", after = Inf)
+    )
+ + + +
Warning: There was 1 warning in `dplyr::mutate()`.
+ℹ In argument: `top_celltypes = forcats::fct_relevel(...)`.
+ℹ In group 19: `project_id = "SCPCP000021"`.
+Caused by warning:
+! 1 unknown level in `f`: All remaining cell types
+ + + +
# get all unique cell types ordered by frequency 
+unique_celltypes <- plot_df |> 
+  dplyr::filter(!top_celltypes %in% c("All remaining cell types", "Unknown")) |> 
+  dplyr::pull(top_celltypes) |> 
+  unique() |>
+  sort() |> 
+  as.character()
+
+# get color palette
+colors <- c(
+  palette.colors(palette = "alphabet"),
+  "black", # 1 extra since alphabet is 26 and we have 27, this will be plasma cell which shows up once 
+  "grey60", 
+  "grey95"
+)
+names(colors) <- c(unique_celltypes, "All remaining cell types", "Unknown")
+ + + + + + + + +
project_labels <- unique(all_results_df$project_label)
+
+# stacked bar chart showing the distribution of the top 9 cell types for each project, including Unknown
+project_labels |> 
+  purrr::map(\(label){
+    
+    project_df <- plot_df |> 
+      dplyr::filter(project_label == label) |> 
+      dplyr::mutate(
+        # relevel factors for specific project 
+        top_celltypes = forcats::fct_infreq(top_celltypes) |> 
+          forcats::fct_relevel("All remaining cell types", "Unknown", after = Inf)
+      )
+    
+    # make a stacked bar chart with top cell types 
+    ggplot(project_df) + 
+      aes(
+        x = library_id, 
+        y = percent_cells_annotation, 
+        fill = top_celltypes
+      ) +
+      geom_col() + 
+      scale_y_continuous(expand = c(0,0)) +
+      scale_fill_manual(values = colors, name = "cell type") +
+      ggtitle(label) +
+      theme(axis.text.x = element_blank())
+  
+    }) |>
+  patchwork::wrap_plots(ncol = 1)
+ + + + +

+ + + + +

This looks really promising! A few observations:

+ +
+
+

Most frequently observed cell types

+

The last thing we will do is look at the most frequently observed +cell types across all samples. The below table is ordered by the number +of libraries the cell type is observed.

+ + + + +
all_results_df |> 
+  dplyr::filter(consensus_annotation != "Unknown") |> 
+  dplyr::group_by(consensus_annotation) |> 
+  dplyr::summarize(
+    total_libraries = dplyr::n(),
+    min_percentage = min(percent_cells_annotation),
+    mean_percentage = round(mean(percent_cells_annotation), 2),
+    median_percentage = median(percent_cells_annotation),
+    max_percentage = max(percent_cells_annotation)
+  ) |> 
+  dplyr::arrange(desc(total_libraries))
+ + + + +
+ +
+ + + + +
NA
+ + + + +
+
+

Session info

+ + + + +
# record the versions of the packages used in this analysis and other environment information
+sessionInfo()
+ + + + + +
+ +
LS0tCnRpdGxlOiAiRXhwbG9yZSBjb25zZW5zdXMgY2VsbCB0eXBlcyIKYXV0aG9yOiBBbGx5IEhhd2tpbnMKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2RlcHRoOiAzCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKLS0tCgpUaGlzIG5vdGVib29rIHN1bW1hcml6ZXMgdGhlIGZpbmRpbmdzIGZyb20gYXNzaWduaW5nIGNvbnNlbnN1cyBjZWxsIHR5cGUgbGFiZWxzIHRvIGFsbCBTY1BDQSBzYW1wbGVzLiAKQWxsIHJlc3VsdHMgZnJvbSB0aGUgYGNlbGwtdHlwZS1jb25zZW5zdXNgIG1vZHVsZSBpbiBgT3BlblNjUENBLW5mYCBtdXN0IGJlIHNhdmVkIHRvIGByZXN1bHRzYCBwcmlvciB0byByZW5kZXJpbmcgdGhpcyBub3RlYm9vay4gCgpgYGB7ciBwYWNrYWdlc30Kc3VwcHJlc3NQYWNrYWdlU3RhcnR1cE1lc3NhZ2VzKHsKICAjIGxvYWQgcmVxdWlyZWQgcGFja2FnZXMKICBsaWJyYXJ5KGdncGxvdDIpCn0pCgojIFNldCBkZWZhdWx0IGdncGxvdCB0aGVtZQp0aGVtZV9zZXQoCiAgdGhlbWVfY2xhc3NpYygpCikKYGBgCgojIyBGdW5jdGlvbnMgCgpgYGB7cn0KIyBmdW5jdGlvbiB0byByZWFkIGluIHByb2plY3QgZGF0YSBmcmFtZXMgd2l0aCBhbGwgY2VsbHMgaW4gYSBwcm9qZWN0CiMgb3V0cHV0IGlzIGEgc3VtbWFyaXplZCB0YWJsZSB3aXRoIHRvdGFsIGNlbGxzIHBlciBzYW1wbGUsIHRvdGFsIGNlbGxzIHBlciBhbm5vdGF0aW9uLCBhbmQgbnVtYmVyIG9mIGNlbGwgdHlwZXMgCnN1bW1hcml6ZV9jZWxsdHlwZXMgPC0gZnVuY3Rpb24oZmlsZSwgaWQpewogIAogICMgcmVhZCBpbiBkYXRhCiAgZGYgPC0gcmVhZHI6OnJlYWRfdHN2KGZpbGUpIAogIAogICMgZ2V0IHRvdGFsIGNlbGwgY291bnQgYW5kIG51bWJlciBvZiBhc3NpZ25lZCBjZWxsIHR5cGVzIHBlciBsaWJyYXJ5CiAgdG90YWxfY2VsbHNfZGYgPC0gZGYgfD4gCiAgICBkcGx5cjo6Z3JvdXBfYnkobGlicmFyeV9pZCkgfD4gCiAgICBkcGx5cjo6c3VtbWFyaXplKAogICAgICB0b3RhbF9jZWxsc19wZXJfbGlicmFyeSA9IGxlbmd0aChsaWJyYXJ5X2lkKSwKICAgICAgbnVtX2NlbGx0eXBlcyA9IGxlbmd0aCh1bmlxdWUoY29uc2Vuc3VzX2Fubm90YXRpb24pKQogICAgKQogIAogIHN1bW1hcnlfZGYgPC0gZGYgfD4gCiAgICBkcGx5cjo6Z3JvdXBfYnkobGlicmFyeV9pZCwgY29uc2Vuc3VzX2Fubm90YXRpb24sIGNvbnNlbnN1c19vbnRvbG9neSkgfD4gCiAgICBkcGx5cjo6c3VtbWFyaXplKHRvdGFsX2NlbGxzX3Blcl9hbm5vdGF0aW9uID0gbGVuZ3RoKGNvbnNlbnN1c19hbm5vdGF0aW9uKSkgfD4KICAgIGRwbHlyOjpsZWZ0X2pvaW4odG90YWxfY2VsbHNfZGYsIGJ5ID0gImxpYnJhcnlfaWQiKSB8PiAKICAgIGRwbHlyOjptdXRhdGUoCiAgICAgICMgYWRkIHBlcmNlbnRhZ2UgCiAgICAgIHBlcmNlbnRfY2VsbHNfYW5ub3RhdGlvbiA9IHJvdW5kKCh0b3RhbF9jZWxsc19wZXJfYW5ub3RhdGlvbiAvIHRvdGFsX2NlbGxzX3Blcl9saWJyYXJ5KSAqIDEwMCwgMikKICAgICkgfD4gCiAgICBkcGx5cjo6dW5ncm91cCgpCiAgCiAgcmV0dXJuKHN1bW1hcnlfZGYpCiAgCn0KYGBgCgojIyBEYXRhIHNldHVwCgoKYGBge3IgYmFzZSBwYXRoc30KIyBUaGUgYmFzZSBwYXRoIGZvciB0aGUgT3BlblNjUENBIHJlcG9zaXRvcnksIGZvdW5kIGJ5IGl0cyAoaGlkZGVuKSAuZ2l0IGRpcmVjdG9yeQpyZXBvc2l0b3J5X2Jhc2UgPC0gcnByb2pyb290OjpmaW5kX3Jvb3QocnByb2pyb290Ojppc19naXRfcm9vdCkKbW9kdWxlX2Jhc2UgPC0gZmlsZS5wYXRoKHJlcG9zaXRvcnlfYmFzZSwgImFuYWx5c2VzIiwgImNlbGwtdHlwZS1jb25zZW5zdXMiKQoKIyByZXN1bHRzIGRpcmVjdG9yeSB3aXRoIGNlbGwtdHlwZS1jb25zZW5zdXMgCnJlc3VsdHNfZGlyIDwtIGZpbGUucGF0aChtb2R1bGVfYmFzZSwgInJlc3VsdHMiLCAiY2VsbC10eXBlLWNvbnNlbnN1cyIpCgojIGRpYWdub3NlcyB0YWJsZSB1c2VkIGZvciBsYWJlbGluZyBwbG90cyAKZGlhZ25vc2VzX2ZpbGUgPC0gZmlsZS5wYXRoKG1vZHVsZV9iYXNlLCAic2FtcGxlLWluZm8iLCAicHJvamVjdC1kaWFnbm9zZXMudHN2IikKYGBgCgpgYGB7cn0KIyBsaXN0IGFsbCByZXN1bHRzIGZpbGVzIApyZXN1bHRzX2ZpbGVzIDwtIGxpc3QuZmlsZXMocmVzdWx0c19kaXIsIHBhdHRlcm4gPSAiX2NvbnNlbnN1cy1jZWxsLXR5cGVzXFwudHN2LlxcZ3okIiwgZnVsbC5uYW1lcyA9IFRSVUUpCgojIGdldCBwcm9qZWN0IGlkcyBmcm9tIGZpbGUgbGlzdCAgCnByb2plY3RfaWRzIDwtIHN0cmluZ3I6OnN0cl9yZW1vdmUoYmFzZW5hbWUocmVzdWx0c19maWxlcyksICJfY29uc2Vuc3VzLWNlbGwtdHlwZXMudHN2Lmd6IikKbmFtZXMocmVzdWx0c19maWxlcykgPC0gcHJvamVjdF9pZHMKCiMgcmVtb3ZlIGNlbGwgbGluZSBwcm9qZWN0cyBmcm9tIGZpbGUgbGlzdApjZWxsX2xpbmVfcHJvamVjdHMgPC0gYygiU0NQQ1AwMDAwMjAiLCAiU0NQQ1AwMDAwMjQiKQpwcm9qZWN0X2lkcyA8LSBzZXRkaWZmKHByb2plY3RfaWRzLCBjZWxsX2xpbmVfcHJvamVjdHMpICMgcmVtb3ZlIGNlbGwgbGluZSBwcm9qZWN0cwpyZXN1bHRzX2ZpbGVzIDwtIHJlc3VsdHNfZmlsZXNbcHJvamVjdF9pZHNdCmBgYAoKCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQojIHJlYWQgaW4gZGlhZ25vc2VzCmRpYWdub3Nlc19kZiA8LSByZWFkcjo6cmVhZF90c3YoZGlhZ25vc2VzX2ZpbGUpCgoKIyByZWFkIGluIHJlc3VsdHMgYW5kIHByZXAgZGF0YSBmcmFtZSBmb3IgcGxvdHRpbmcgCmFsbF9yZXN1bHRzX2RmIDwtIHJlc3VsdHNfZmlsZXMgfD4gCiAgcHVycnI6OmltYXAoc3VtbWFyaXplX2NlbGx0eXBlcykgfD4gCiAgZHBseXI6OmJpbmRfcm93cyguaWQgPSAicHJvamVjdF9pZCIpIHw+IAogICMgYWRkIGluIGRpYWdub3NlcyAKICBkcGx5cjo6bGVmdF9qb2luKGRpYWdub3Nlc19kZiwgYnkgPSAicHJvamVjdF9pZCIpIHw+IAogIGRwbHlyOjptdXRhdGUoCiAgICAjIGNyZWF0ZSBhIGxhYmVsIGZvciBwbG90dGluZwogICAgcHJvamVjdF9sYWJlbCA9IGdsdWU6OmdsdWUoIntwcm9qZWN0X2lkfTp7ZGlhZ25vc2lzfSIpCiAgKQoKYGBgCgojIyBJcyBpdCBhbGwganVzdCBVbmtub3duPwoKVGhlIGZpcnN0IHRoaW5nIHdlIHdpbGwgbG9vayBhdCBpcyBob3cgbWFueSBvZiB0aGUgY2VsbHMgaW4gZWFjaCBzYW1wbGUgYXJlIGNhdGVnb3JpemVkIGFzICJVbmtub3duIiwgd2hpY2ggbWVhbnMgbm8gY29uc2Vuc3VzIGJldHdlZW4gYFNpbmdsZVJgIGFuZCBgQ2VsbEFzc2lnbmAgd2FzIGlkZW50aWZpZWQuIAoKYGBge3IsIGZpZy5oZWlnaHQ9N30KdW5rbm93bl9vbmx5IDwtIGFsbF9yZXN1bHRzX2RmIHw+IAogIGRwbHlyOjpmaWx0ZXIoY29uc2Vuc3VzX2Fubm90YXRpb24gPT0gIlVua25vd24iKQoKZ2dwbG90KHVua25vd25fb25seSwgYWVzKHggPSBwcm9qZWN0X2xhYmVsLCB5ID0gcGVyY2VudF9jZWxsc19hbm5vdGF0aW9uKSkgKwogIGdnZm9yY2U6Omdlb21fc2luYShzaXplID0gMC4xKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCwgaGp1c3QgPSAxLCB2anVzdCA9IDAuNSksCiAgICAgICAgcGxvdC5tYXJnaW4gPSBtYXJnaW4oMTAsMTAsMTAsMTApKSArCiAgbGFicygKICAgIHggPSAiIiwgCiAgICB5ID0gIlBlcmNlbnQgb2YgY2VsbHMgYW5ub3RhdGVkIGFzIFVua25vd24iCiAgKQogIApgYGAKCkl0IGxvb2tzIGxpa2Ugd2UgZG8gaGF2ZSBzb21lIHNhbXBsZXMgdGhhdCBhcmVuJ3QganVzdCBhbGwgIlVua25vd24iIQpJdCBkZWZpbml0ZWx5IHZhcmllcyBieSBwcm9qZWN0LCBidXQgZm9yIG1vc3QgcHJvamVjdHMgd2UgYXQgbGVhc3Qgc2VlIHNvbWUgcHJvcG9ydGlvbiBvZiBzYW1wbGVzIHdpdGggYXNzaWduZWQgY2VsbCB0eXBlcy4gCgpMZXQncyBsb29rIGF0IGhvdyBtYW55IHNhbXBsZXMgYWN0dWFsbHkgaGF2ZSBzb21lIGNlbGxzIG91dHNpZGUgb2YgdW5rbm93biBpZGVudGlmaWVkLiAKVG8gZG8gdGhpcywgd2Ugd2lsbCBpZGVudGlmeSBhbGwgbGlicmFyaWVzIHRoYXQgb25seSBoYXZlIGNlbGxzIGNhbGxlZCBhcyAiVW5rbm93biIuIAoKYGBge3J9CmhpZ2hfdHVtb3JfZGYgPC0gdW5rbm93bl9vbmx5IHw+IAogIGRwbHlyOjptdXRhdGUobm9fY2VsbHNfaWRlbnRpZmllZCA9IHBlcmNlbnRfY2VsbHNfYW5ub3RhdGlvbiA9PSAxMDApIHw+IAogIGRwbHlyOjpncm91cF9ieShwcm9qZWN0X2xhYmVsKSB8PiAKICBkcGx5cjo6c3VtbWFyaXplKGFsbF91bmtub3duID0gc3VtKG5vX2NlbGxzX2lkZW50aWZpZWQpLAogICAgICAgICAgICAgICAgICAgY2xhc3NpZmllZF9jZWxscyA9IHN1bSghbm9fY2VsbHNfaWRlbnRpZmllZCksCiAgICAgICAgICAgICAgICAgICBwZXJjZW50YWdlX3Vua25vd24gPSByb3VuZChhbGxfdW5rbm93bi8oYWxsX3Vua25vd24gKyBjbGFzc2lmaWVkX2NlbGxzKSoxMDAsIDIpLAogICAgICAgICAgICAgICAgICAgIyBhZGQgbnVtYmVyIG9mIGxpYnJhcmllcyBmb3IgcGxvdHRpbmcgCiAgICAgICAgICAgICAgICAgICB0b3RhbF9saWJyYXJpZXMgPSBsZW5ndGgobGlicmFyeV9pZCkpIHw+CiAgIyBzZXQgb3JkZXIgZm9yIHBsb3RzIAogIGRwbHlyOjptdXRhdGUocHJvamVjdF9sYWJlbCA9IGZvcmNhdHM6OmZjdF9yZW9yZGVyKHByb2plY3RfbGFiZWwsIHRvdGFsX2xpYnJhcmllcywgLmRlc2MgPSBUUlVFKSkKYGBgCgoKV2hpY2ggcHJvamVjdHMgaGF2ZSB0aGUgaGlnaGVzdCBwcm9wb3J0aW9uIG9mIHNhbXBsZXMgd2l0aCBhbGwgIlVua25vd24iPyAKCmBgYHtyfQojIHRhYmxlIHdpdGggcGVyY2VudGFnZSBvZiBzYW1wbGVzIApoaWdoX3R1bW9yX2RmIHw+IAogIGRwbHlyOjpzZWxlY3QocHJvamVjdF9sYWJlbCwgcGVyY2VudGFnZV91bmtub3duKSB8PiAKICBkcGx5cjo6YXJyYW5nZShkZXNjKHBlcmNlbnRhZ2VfdW5rbm93bikpCgpgYGAKCkl0IGxvb2tzIGxpa2UgYWxsIHByb2plY3RzIGRvIGhhdmUgY2VsbCB0eXBlcyBpZGVudGlmaWVkIHRoYXQgYXJlIG5vdCAiVW5rbm93biIuIApIb3dldmVyLCBgU0NQQ1AwMDAwMTFgIChyZXRpbm9ibGFzdG9tYSksIGhhcyBhIGZhaXJseSBoaWdoIHBlcmNlbnRhZ2Ugb2Ygc2FtcGxlcyB3aXRob3V0IGFueSBjb25zZW5zdXMgbGFiZWxzLiAKCiMjIE51bWJlciBvZiBjZWxsIHR5cGVzIG9ic2VydmVkCgpCZWxvdyB3ZSBsb29rIGF0IHRoZSBudW1iZXIgb2YgY2VsbCB0eXBlcyBvYnNlcnZlZCBpbiBlYWNoIHByb2plY3QgZm9yIGFsbCBzYW1wbGVzLiAKVGhpcyBkb2VzIG5vdCBpbmNsdWRlIGNlbGxzIGxhYmVsZWQgYXMgIlVua25vd24iLiAKCmBgYHtyLCBmaWcuaGVpZ2h0PTEwfQpudW1fY2VsbHR5cGVzX2RmIDwtIGFsbF9yZXN1bHRzX2RmIHw+IAogICMgYWRkIGEgbmV3IGxpbmUgZm9yIGZhY2V0IGxhYmVscyAKICBkcGx5cjo6bXV0YXRlKGZhY2V0X2xhYmVsID0gZ2x1ZTo6Z2x1ZSgie3Byb2plY3RfaWR9XG57ZGlhZ25vc2lzfSIpKSB8PgogICMgcmVtb3ZlIHVua25vd24gYXMgYSBjZWxsIHR5cGUgCiAgZHBseXI6OmZpbHRlcihjb25zZW5zdXNfYW5ub3RhdGlvbiAhPSAiVW5rbm93biIpIHw+IAogIGRwbHlyOjpzZWxlY3QoZmFjZXRfbGFiZWwsIGxpYnJhcnlfaWQsIG51bV9jZWxsdHlwZXMpIHw+IAogIHVuaXF1ZSgpCgpnZ3Bsb3QobnVtX2NlbGx0eXBlc19kZiwgYWVzKHggPSBudW1fY2VsbHR5cGVzKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMSwgY2VudGVyID0gMCkgKwogIGZhY2V0X3dyYXAodmFycyhmYWNldF9sYWJlbCksIAogICAgICAgICAgICAgbmNvbCA9IDMpICsKICBsYWJzKAogICAgeCA9ICJOdW1iZXIgb2YgY2VsbCB0eXBlcyIKICApICsKICB0aGVtZV9idygpCmBgYAoKIyMgRGlzdHJpYnV0aW9uIG9mIGNvbnNlbnN1cyBjZWxsIHR5cGVzIAoKTm93IHdlIGxvb2sgYXQgdGhlIGRpc3RyaWJ1dGlvbiBvZiB0aGUgY2VsbCB0eXBlcyBpbiBlYWNoIHNhbXBsZS4gCkZvciB0aGVzZSBwbG90cywgd2Ugd2lsbCBwdWxsIG91dCB0aGUgdG9wIDkgY2VsbCB0eXBlcyBmb3IgZWFjaCBwcm9qZWN0LiAKQWxsIG90aGVyIGNlbGxzIHdpbGwgYmUgbGFiZWxlZCB3aXRoICJBbGwgcmVtYWluaW5nIGNlbGwgdHlwZXMiLiAKClRoZSB0b3AgY2VsbCB0eXBlcyBhcmUgZGV0ZXJtaW5lZCBieSBjb3VudGluZyBob3cgbWFueSBsaWJyYXJpZXMgZWFjaCBjZWxsIHR5cGUgaXMgZm91bmQgaW4gd2l0aGluIGEgcHJvamVjdCBhbmQgdGFraW5nIHRoZSBtb3N0IGZyZXF1ZW50IHR5cGVzLiAKCmBgYHtyfQpwbG90X2RmIDwtIGFsbF9yZXN1bHRzX2RmIHw+IAogICAgZHBseXI6Omdyb3VwX2J5KHByb2plY3RfaWQpIHw+IAogICAgZHBseXI6Om11dGF0ZSgKICAgICAgIyBnZXQgbW9zdCBmcmVxdWVudGx5IG9ic2VydmVkIGNlbGwgdHlwZXMgYWNyb3NzIGxpYnJhcmllcyBpbiB0aGF0IHByb2plY3QgCiAgICAgIHRvcF9jZWxsdHlwZXMgPSBmb3JjYXRzOjpmY3RfbHVtcF9uKGNvbnNlbnN1c19hbm5vdGF0aW9uLCA5LCBvdGhlcl9sZXZlbCA9ICJBbGwgcmVtYWluaW5nIGNlbGwgdHlwZXMiLCB0aWVzLm1ldGhvZCA9ICJmaXJzdCIpIHw+IAogICAgICAgICMgc29ydCBieSBmcmVxdWVuY3kgCiAgICAgICAgZm9yY2F0czo6ZmN0X2luZnJlcSgpIHw+IAogICAgICAgICMgbWFrZSBzdXJlIGFsbCByZW1haW5pbmcgYW5kIHVua25vd24gYXJlIGxhc3QsIHVzZSB0aGlzIHRvIGFzc2lnbiBjb2xvcnMgaW4gc3BlY2lmaWMgb3JkZXIKICAgICAgICBmb3JjYXRzOjpmY3RfcmVsZXZlbCgiQWxsIHJlbWFpbmluZyBjZWxsIHR5cGVzIiwgIlVua25vd24iLCBhZnRlciA9IEluZikKICAgICkKCiMgZ2V0IGFsbCB1bmlxdWUgY2VsbCB0eXBlcyBvcmRlcmVkIGJ5IGZyZXF1ZW5jeSAKdW5pcXVlX2NlbGx0eXBlcyA8LSBwbG90X2RmIHw+IAogIGRwbHlyOjpmaWx0ZXIoIXRvcF9jZWxsdHlwZXMgJWluJSBjKCJBbGwgcmVtYWluaW5nIGNlbGwgdHlwZXMiLCAiVW5rbm93biIpKSB8PiAKICBkcGx5cjo6cHVsbCh0b3BfY2VsbHR5cGVzKSB8PiAKICB1bmlxdWUoKSB8PgogIHNvcnQoKSB8PiAKICBhcy5jaGFyYWN0ZXIoKQoKIyBnZXQgY29sb3IgcGFsZXR0ZQpjb2xvcnMgPC0gYygKICBwYWxldHRlLmNvbG9ycyhwYWxldHRlID0gImFscGhhYmV0IiksCiAgImJsYWNrIiwgIyAxIGV4dHJhIHNpbmNlIGFscGhhYmV0IGlzIDI2IGFuZCB3ZSBoYXZlIDI3LCB0aGlzIHdpbGwgYmUgcGxhc21hIGNlbGwgd2hpY2ggc2hvd3MgdXAgb25jZSAKICAiZ3JleTYwIiwgCiAgImdyZXk5NSIKKQpuYW1lcyhjb2xvcnMpIDwtIGModW5pcXVlX2NlbGx0eXBlcywgIkFsbCByZW1haW5pbmcgY2VsbCB0eXBlcyIsICJVbmtub3duIikKYGBgCgoKYGBge3IsIGZpZy5oZWlnaHQ9NjAsIGZpZy53aWR0aD0xMH0KcHJvamVjdF9sYWJlbHMgPC0gdW5pcXVlKGFsbF9yZXN1bHRzX2RmJHByb2plY3RfbGFiZWwpCgojIHN0YWNrZWQgYmFyIGNoYXJ0IHNob3dpbmcgdGhlIGRpc3RyaWJ1dGlvbiBvZiB0aGUgdG9wIDkgY2VsbCB0eXBlcyBmb3IgZWFjaCBwcm9qZWN0LCBpbmNsdWRpbmcgVW5rbm93bgpwcm9qZWN0X2xhYmVscyB8PiAKICBwdXJycjo6bWFwKFwobGFiZWwpewogICAgCiAgICBwcm9qZWN0X2RmIDwtIHBsb3RfZGYgfD4gCiAgICAgIGRwbHlyOjpmaWx0ZXIocHJvamVjdF9sYWJlbCA9PSBsYWJlbCkgfD4gCiAgICAgIGRwbHlyOjptdXRhdGUoCiAgICAgICAgIyByZWxldmVsIGZhY3RvcnMgZm9yIHNwZWNpZmljIHByb2plY3QgCiAgICAgICAgdG9wX2NlbGx0eXBlcyA9IGZvcmNhdHM6OmZjdF9pbmZyZXEodG9wX2NlbGx0eXBlcykgfD4gCiAgICAgICAgICBmb3JjYXRzOjpmY3RfcmVsZXZlbCgiQWxsIHJlbWFpbmluZyBjZWxsIHR5cGVzIiwgIlVua25vd24iLCBhZnRlciA9IEluZikKICAgICAgKQogICAgCiAgICAjIG1ha2UgYSBzdGFja2VkIGJhciBjaGFydCB3aXRoIHRvcCBjZWxsIHR5cGVzIAogICAgZ2dwbG90KHByb2plY3RfZGYpICsgCiAgICAgIGFlcygKICAgICAgICB4ID0gbGlicmFyeV9pZCwgCiAgICAgICAgeSA9IHBlcmNlbnRfY2VsbHNfYW5ub3RhdGlvbiwgCiAgICAgICAgZmlsbCA9IHRvcF9jZWxsdHlwZXMKICAgICAgKSArCiAgICAgIGdlb21fY29sKCkgKyAKICAgICAgc2NhbGVfeV9jb250aW51b3VzKGV4cGFuZCA9IGMoMCwwKSkgKwogICAgICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjb2xvcnMsIG5hbWUgPSAiY2VsbCB0eXBlIikgKwogICAgICBnZ3RpdGxlKGxhYmVsKSArCiAgICAgIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF9ibGFuaygpKQogIAogICAgfSkgfD4KICBwYXRjaHdvcms6OndyYXBfcGxvdHMobmNvbCA9IDEpCmBgYAoKClRoaXMgbG9va3MgcmVhbGx5IHByb21pc2luZyEKQSBmZXcgb2JzZXJ2YXRpb25zOiAKCi0gQ2VsbCB0eXBlcyBpZGVudGlmaWVkIHRlbmQgdG8gbGluZSB1cCB3aXRoIGV4cGVjdGF0aW9ucyBmb3IgdGhlIHR5cGUgb2YgdHVtb3IuIApGb3IgZXhhbXBsZSwgbGV1a2VtaWEgbGlicmFyaWVzIGhhdmUgVCBhbmQgQiBjZWxscywgYnJhaW4gdHVtb3JzIGhhdmUgbWFjcm9waGFnZXMsIGFuZCBzb2xpZCB0dW1vcnMgaGF2ZSBmaWJyb2JsYXN0cyBhbmQgbXVzY2xlIGNlbGxzLiAKLSBQcm9qZWN0cyB0aGF0IEkgd291bGQgZXhwZWN0IHRvIGJlIG1vcmUgZGlmZmljdWx0IHRvIGNsYXNzaWZ5IChzYXJjb21hcywgd2lsbXMsIFJCKSBoYXZlIGZld2VyIGNlbGxzIGNsYXNzaWZpZWQgdGhlbiB0aGluZ3MgbGlrZSBicmFpbiBhbmQgbGV1a2VtaWEuIApOb3RhYmx5IG1hbnkgb2YgdGhlIHNvbGlkIHR1bW9yIHByb2plY3RzICg0LCA1LCAxMi0xNiwgYW5kIDIzKSBoYXZlIGEgaGFuZGZ1bCBvZiBQRFggc2FtcGxlcyB3aGVyZSBJIHdvdWxkIGV4cGVjdCB0byBzZWUgZmV3ZXIgbm9ybWFsIGNlbGxzLiAKCiMjIE1vc3QgZnJlcXVlbnRseSBvYnNlcnZlZCBjZWxsIHR5cGVzIAoKVGhlIGxhc3QgdGhpbmcgd2Ugd2lsbCBkbyBpcyBsb29rIGF0IHRoZSBtb3N0IGZyZXF1ZW50bHkgb2JzZXJ2ZWQgY2VsbCB0eXBlcyBhY3Jvc3MgYWxsIHNhbXBsZXMuIApUaGUgYmVsb3cgdGFibGUgaXMgb3JkZXJlZCBieSB0aGUgbnVtYmVyIG9mIGxpYnJhcmllcyB0aGUgY2VsbCB0eXBlIGlzIG9ic2VydmVkLiAKCmBgYHtyfQphbGxfcmVzdWx0c19kZiB8PiAKICBkcGx5cjo6ZmlsdGVyKGNvbnNlbnN1c19hbm5vdGF0aW9uICE9ICJVbmtub3duIikgfD4gCiAgZHBseXI6Omdyb3VwX2J5KGNvbnNlbnN1c19hbm5vdGF0aW9uKSB8PiAKICBkcGx5cjo6c3VtbWFyaXplKAogICAgdG90YWxfbGlicmFyaWVzID0gZHBseXI6Om4oKSwKICAgIG1pbl9wZXJjZW50YWdlID0gbWluKHBlcmNlbnRfY2VsbHNfYW5ub3RhdGlvbiksCiAgICBtZWFuX3BlcmNlbnRhZ2UgPSByb3VuZChtZWFuKHBlcmNlbnRfY2VsbHNfYW5ub3RhdGlvbiksIDIpLAogICAgbWVkaWFuX3BlcmNlbnRhZ2UgPSBtZWRpYW4ocGVyY2VudF9jZWxsc19hbm5vdGF0aW9uKSwKICAgIG1heF9wZXJjZW50YWdlID0gbWF4KHBlcmNlbnRfY2VsbHNfYW5ub3RhdGlvbikKICApIHw+IAogIGRwbHlyOjphcnJhbmdlKGRlc2ModG90YWxfbGlicmFyaWVzKSkKICAKYGBgCgoKIyMgU2Vzc2lvbiBpbmZvIAoKYGBge3Igc2Vzc2lvbiBpbmZvfQojIHJlY29yZCB0aGUgdmVyc2lvbnMgb2YgdGhlIHBhY2thZ2VzIHVzZWQgaW4gdGhpcyBhbmFseXNpcyBhbmQgb3RoZXIgZW52aXJvbm1lbnQgaW5mb3JtYXRpb24Kc2Vzc2lvbkluZm8oKQpgYGAKCg==
+ + + +
+ + + + + + + + + + + + + + + + diff --git a/analyses/cell-type-consensus/exploratory-notebooks/README.md b/analyses/cell-type-consensus/exploratory-notebooks/README.md new file mode 100644 index 000000000..f1794ee6f --- /dev/null +++ b/analyses/cell-type-consensus/exploratory-notebooks/README.md @@ -0,0 +1,9 @@ +# Exploratory notebooks + +This folder contains exploratory notebooks for this module. + +1. `01-reference-exploration.Rmd`: This notebook was used to explore possible consensus label assignments between cell types in the `PanglaoDB` and `BlueprintEncodeData` references. +Observations made in this notebook were used to define the set of possible consensus labels to be included in [`references/consensus-cell-type-reference.tsv`](../references/consensus-cell-type-reference.tsv). + +2. `01-explore-consensus-results.Rmd`: This notebook summarizes the consensus labels assigned to all ScPCA samples. +Prior to rendering this notebook results from the `cell-type-consensus` module in `OpenScPCA-nf` using the `2024-11-25` were downloaded. diff --git a/analyses/cell-type-consensus/renv.lock b/analyses/cell-type-consensus/renv.lock index 1912a8caa..520a536e5 100644 --- a/analyses/cell-type-consensus/renv.lock +++ b/analyses/cell-type-consensus/renv.lock @@ -478,6 +478,19 @@ ], "Hash": "6b868847b365672d6c1677b1608da9ed" }, + "RcppEigen": { + "Package": "RcppEigen", + "Version": "0.3.4.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "Rcpp", + "stats", + "utils" + ], + "Hash": "4ac8e423216b8b70cb9653d1b3f71eb9" + }, "RcppTOML": { "Package": "RcppTOML", "Version": "0.2.2", @@ -1109,6 +1122,22 @@ ], "Hash": "bd1297f9b5b1fc1372d19e2c4cd82215" }, + "forcats": { + "Package": "forcats", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "magrittr", + "rlang", + "tibble" + ], + "Hash": "1a0a9a3d5083d0d573c4214576f1e690" + }, "fs": { "Package": "fs", "Version": "1.6.5", @@ -1141,6 +1170,35 @@ ], "Hash": "ed33b16c6d24f7ced1d68877ac2509ee" }, + "ggforce": { + "Package": "ggforce", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "MASS", + "R", + "Rcpp", + "RcppEigen", + "cli", + "ggplot2", + "grDevices", + "grid", + "gtable", + "lifecycle", + "polyclip", + "rlang", + "scales", + "stats", + "systemfonts", + "tidyselect", + "tweenr", + "utils", + "vctrs", + "withr" + ], + "Hash": "384b388bd9155468d2c851846ee69f9f" + }, "ggplot2": { "Package": "ggplot2", "Version": "3.5.1", @@ -1703,6 +1761,16 @@ ], "Hash": "bd54ba8a0a5faded999a7aab6e46b374" }, + "polyclip": { + "Package": "polyclip", + "Version": "1.10-7", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R" + ], + "Hash": "5879bf5aae702ffef0a315c44328f984" + }, "prettyunits": { "Package": "prettyunits", "Version": "1.2.0", @@ -2044,6 +2112,22 @@ "Repository": "RSPM", "Hash": "de342ebfebdbf40477d0758d05426646" }, + "systemfonts": { + "Package": "systemfonts", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "R", + "cpp11", + "grid", + "jsonlite", + "lifecycle", + "tools", + "utils" + ], + "Hash": "f8b2924480a2679e2bad9750646112fe" + }, "tibble": { "Package": "tibble", "Version": "3.2.1", @@ -2112,6 +2196,21 @@ ], "Hash": "3ec7e3ddcacc2d34a9046941222bf94d" }, + "tweenr": { + "Package": "tweenr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11", + "farver", + "magrittr", + "rlang", + "vctrs" + ], + "Hash": "82fac2b73e6a1f3874fc000aaf96d8bc" + }, "tzdb": { "Package": "tzdb", "Version": "0.4.0", diff --git a/analyses/cell-type-consensus/sample-info/README.md b/analyses/cell-type-consensus/sample-info/README.md new file mode 100644 index 000000000..57a542c35 --- /dev/null +++ b/analyses/cell-type-consensus/sample-info/README.md @@ -0,0 +1,6 @@ +# Sample info + +This folder contains any files with sample metadata needed for this module. + +1. `project-diagnoses.tsv`: This file contains a summarized label to use for all diagnoses in a given ScPCA project. +The contents of the `diagnosis` column is used to provide labels for plots in [`exploratory-notebooks/02-explore-consensus-results.Rmd`](../exploratory-notebooks/02-explore-consensus-results.Rmd). diff --git a/analyses/cell-type-consensus/sample-info/project-diagnoses.tsv b/analyses/cell-type-consensus/sample-info/project-diagnoses.tsv new file mode 100644 index 000000000..8a7bd45ce --- /dev/null +++ b/analyses/cell-type-consensus/sample-info/project-diagnoses.tsv @@ -0,0 +1,24 @@ +project_id diagnosis +SCPCP000001 high-grade glioma +SCPCP000002 low-grade glioma +SCPCP000003 acute lymphoblastic leukemia +SCPCP000004 neuroblastoma +SCPCP000005 rhabdoymyosarcoma +SCPCP000006 wilms +SCPCP000007 acute myeloid leukemia +SCPCP000008 acute lymphoblastic leukemia +SCPCP000009 brain +SCPCP000010 brain +SCPCP000011 retinoblastoma +SCPCP000012 other solid tumors +SCPCP000013 non-rhabdo soft tissue sarcoma +SCPCP000014 wilms +SCPCP000015 ewing sarcoma +SCPCP000016 rhabdoid tumor +SCPCP000017 osteosarcoma +SCPCP000018 osteosarcoma +SCPCP000020 neuroblastoma +SCPCP000021 high-grade glioma +SCPCP000022 leukemia +SCPCP000023 osteosarcoma +SCPCP000024 neuroblastoma \ No newline at end of file