Skip to content

Commit

Permalink
Merge pull request #21 from JCSzamosi/new_tests
Browse files Browse the repository at this point in the history
New read depth function
  • Loading branch information
JCSzamosi authored Aug 3, 2023
2 parents 03c89da + 0ff1c74 commit dc7f287
Show file tree
Hide file tree
Showing 21 changed files with 1,007 additions and 55 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
^CHANGELOG\.md$
^data-raw$
^benchmark/*
^ignore_me.R$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
test_data/*
ring_dir
private
ignore_me.R
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
* 2023-08-03 v0.0.1.9002 (development update)
* **BREAKING CHANGES**
* Completely re-writes `plot_read_depth()`.
* allows users to plot read depth with a variable on the X axis and
a colour parameter
* users can access the old function with `plt_read_depth()`
temporarily, but this will be removed before the next full release.
* `rank_abund()` is broken and is no longer exported. Please file a bug
report if you were using this function.
* exports `order_taxa()`, by request
* makes `prop_tax_down()` slightly more efficient by checking up front if
there is nothing to do.
* deprecates `order_levs()` because it isn't used anywhere. Its intended
function is performed by `order_taxa()`.
* introduces visual and automatic testing of the new `plot_read_depth()`
function.

* 2023-04-25 v0.0.1.9001 (development update)
* in `plot_tax_bar()`
* the `legloc` argument is now passed directly to
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ export(long_distance_df)
export(make_ord_df)
export(make_phy_df)
export(order_levs)
export(order_taxa)
export(plot_read_depth)
export(plot_tax_bar)
export(plt_ord)
export(plt_read_depth)
export(prop_tax_down)
export(rank_abund)
export(rotate_ticks)
export(tax_colours)
import(ggplot2)
Expand Down
17 changes: 9 additions & 8 deletions R/make_phy_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' @param indic a flag to indicate if the taxon names have level indicators. If
#' FALSE, they are added.
#' @param prop Specifies whether taxa need to be propogated down the taxonomy
#' table (default, `TRUE`) or if this has already been done.
#' table (default is `TRUE`) or if this has already been done.
#' @param count If `FALSE` (default) the function will expect a relative
#' abundance table and create an 'Other' category for taxa below the cutoff
#' (and will raise an error if the table is not relative abundance). If TRUE,
Expand All @@ -41,7 +41,7 @@ make_phy_df = function(physeq, rank = 'Genus', cutoff = 0.001, indic = FALSE,
stop('physeq must be a relative abundance table. You have counts > 1.')
}
ranks = colnames(phyloseq::tax_table(physeq))
if (rank == 'OTU'){
if ((rank == 'OTU') & !('OTU' %in% ranks)) {
ranks = c(ranks, rank)
}

Expand Down Expand Up @@ -131,7 +131,7 @@ remain = function(x, tot = 1){

#' Order Taxon Name Factors
#'
#' `order_taxa` reorders the taxon names in a taxon column (e.g. 'Class' or
#' `order_taxa()` reorders the taxon names in a taxon column (e.g. 'Class' or
#' 'Phylum') by the taxon's mean abundance (but always makes sure to put Other
#' first).
#'
Expand All @@ -144,14 +144,15 @@ remain = function(x, tot = 1){
#' @param abund The name of the abundances column. Defaults to 'Abundance'
#' @param decreasing Specifies whether the taxon order should be based on
#' decreasing or increasing abundance. Defaults to FALSE.
#' @export
order_taxa = function(phy_df, rank, abund = 'Abundance', decreasing = FALSE){

phy_df[,rank] = factor(phy_df[,rank])
phy_df %>%
dplyr::filter(UQ(sym(rank)) != 'Other') %>%
dplyr::group_by(UQ(sym(rank))) %>%
dplyr::summarize(Tot = sum(UQ(sym(abund)))) %>%
data.frame() -> total_abunds
total_abunds = (phy_df
%>% dplyr::filter(.data[[rank]] != 'Other')
%>% dplyr::group_by(.data[[rank]])
%>% dplyr::summarize(Tot = sum(.data[[abund]]))
%>% data.frame())

lev_ord = levels(droplevels(total_abunds[,rank]))
if (decreasing){
Expand Down
201 changes: 189 additions & 12 deletions R/plot_read_depth.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,196 @@
#### plot_read_depth -----------------------------------------------------------
#### plot_read_depth ------------------------------------------------------------

#' View read depth
#'
#' @section Details: Creates a read depth boxplot
#'
#' @section Value: A [ggplot2] object
#'
#' @param physeq A phyloseq object
#' @param xvar `NULL` A variable (column name) from `sample_data(physeq)` by
#' which to group samples on the x-axis. Must be categorical. If `NULL`, no
#' grouping will be performed.
#' @param lin `FALSE` If `TRUE`, the y-axis will be on a linear scale. By
#' default it is log-scaled.
#' @param cvar `NULL` A variable (column name) from `sample_data(physeq)` by
#' which the points should be coloured. If `NULL`, all points will be black.
#' May be continuous or categorical. See the `clrs` argument if you want to
#' specify your colours.
#' @param clrs `NULL` If the `cvar` parameter is set and this is unset, colours
#' will be taken from [tax_colours] if `cvar` is categorical and use
#' [ggplot2]'s default (blue) if `cvar` is continuous. If `cvar` is
#' categorical, you may provide a named colour vector to use instead of
#' [tax_colours] and if it is continuous you may provide a two-colour vector.
#' The vector must have names "low" and "high". If `cvar` is NOT set, you may
#' use this parameter to provide a single colour name or hex code that will be
#' applied to all points.
#' @export
plot_read_depth = function(physeq, xvar = NULL, lin = FALSE, cvar = NULL,
clrs = NULL){
# Check inputs
sample_sum_df = data.frame(Total = phyloseq::sample_sums(physeq),
phyloseq::sample_data(physeq))

prd_check(sample_sum_df, cvar, xvar)

# Deal with the colour vector
clrs = prd_clrs(sample_sum_df, cvar, clrs)


# Construct the plot foundation with or without a grouping variable
if (is.null(xvar)){
depth_plot = ggplot2::ggplot(sample_sum_df,
ggplot2::aes(x = 'All', y = Total))
} else {
depth_plot = ggplot2::ggplot(sample_sum_df,
ggplot2::aes(x = .data[[xvar]], y = Total))
}

# Add the jittered points, taking colour into account
# No colouring variable
if (is.null(cvar)){
# And no specified colour
if (is.null(clrs)){
depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2)
# And colour is specified
} else {
depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2,
colour = clrs)
}
# colouring variable is specified and continuous
} else if (is.numeric(sample_sum_df[[cvar]])) {
depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2,
ggplot2::aes(colour = .data[[cvar]]))
# if clrs not specified, do nothing. defaults will do.
if (is.null(clrs)){
NULL
# if clrs is specified, use it
} else {
depth_plot = depth_plot +
ggplot2::scale_colour_gradient(low = clrs['low'],
high = clrs['high'])
}
# if colouring variable is specified and categorical
} else {
depth_plot = depth_plot + ggplot2::geom_jitter(width = 0.2,
aes(colour = .data[[cvar]])) +
ggplot2::scale_colour_manual(values = clrs)
}

if (!lin){
depth_plot = depth_plot + ggplot2::scale_y_log10()
}
return(depth_plot)
}

#### prd_check() ---------------------------------------------------------------

#' Check inputs for plot_read_depth()
#'
#' Checks the inputs of `plot_read_depth()`
#' @param sample_sum_df data frame of sample sums and sample data
#' @param cvar,xvar passed through from `plot_read_depth()`

prd_check = function(sample_sum_df, cvar, xvar){
# Are cvar and xvar columns?
if (!is.null(xvar) && !(xvar %in% colnames(sample_sum_df))){
stop(paste('"xvar" must be one of the columns in sample_data(physeq).'))
}

if (!is.null(cvar) && !(cvar %in% colnames(sample_sum_df))){
stop(paste('"cvar" must be one of the columns in sample_data(physeq).'))
}
}

#### prd_clrs() ----------------------------------------------------------------

#' Deal w colour vector for plot_read_depth()
#'
#' @param cvar,clrs passed through from `plot_read_depth()`
prd_clrs = function(sample_sum_df, cvar, clrs){
# cvar unset, clrs set
if (is.null(cvar) & !is.null(clrs)){
if (length(clrs) > 1){
warn(paste('With no cvar specified, only the first colour in "clrs"',
'will be used.'))
clrs = clrs[1]
return(clrs)
}
} else if (!is.null(cvar)){
# Built-in colour vector
if (is.null(clrs)){
# Categorical
if (is.factor(sample_sum_df[[cvar]]) ||
is.character(sample_sum_df[[cvar]])){
n = dplyr::n_distinct(sample_sum_df[[cvar]])
nx = ceiling(n/length(tax_colours))
clrs = rep(tax_colours, nx)
return(clrs)

# continuous requires no action
} else {
return(clrs)
}
# User-specified colour vector
} else {
# Categorical
if (is.factor(sample_sum_df[[cvar]]) |
is.character(sample_sum_df[[cvar]])){
if (is.null(names(clrs))){
return(clrs)
# nothing to do here. use it as is

} else if (!all(unique(sample_sum_df) %in% names(clrs))){
stop(paste('If "clrs" is named, its names must contain',
'all the values in the "cvar" column'))
}
# if we get this far, the colour vector is fine.

return(clrs)

# Continuous
} else {
if (is.null(names(clrs)) |
!all(c('high','low') %in% names(clrs))){
stop(paste('With a continuous "cvar", the provided colour',
'vector "clrs" must have names "high" and "low".'))
} else {
# if we get this far, the colour vector is fine.
return(clrs)
}
return(clrs)
}
return(clrs)
}
return(clrs)
}
return(clrs)
}

#### plt_read_depth -----------------------------------------------------------

#' Plot read depth
#'
#' Creates a read depth histogram.
#' @describeIn
#' `r lifecycle::badge("deprecated")`
#'
#' Creates a read depth histogram. Deprecated in favour of `plot_read_depth()`
#' @param physeq A phyloseq object
#' @export
plot_read_depth = function(physeq){
sample_sum_df = data.frame(Total = sample_sums(physeq),
sample_data(physeq))
depth_plot = ggplot(sample_sum_df, aes(x = Total)) +
geom_histogram(colour = 'black', fill = 'grey57') +
scale_x_log10() +
xlab('Log10 read depth') +
ylab('Number of samples') +
ggtitle('Distribution of read depths') +
theme_bw()
plt_read_depth = function(physeq){
lifecycle::deprecate_soft('0.0.1.9002', 'plt_read_depth()',
'plot_read_depth()',
details = paste("This is the old version of",
"plot_read_depth(), and will be",
"removed soon."))
sample_sum_df = data.frame(Total = phyloseq::sample_sums(physeq),
phyloseq::sample_data(physeq))
depth_plot = ggplot2::ggplot(sample_sum_df, aes(x = Total)) +
ggplot2::geom_histogram(colour = 'black', fill = 'grey57') +
ggplot2::scale_x_log10() +
ggplot2::xlab('Log10 read depth') +
ggplot2::ylab('Number of samples') +
ggplot2::ggtitle('Distribution of read depths') +
ggplot2::theme_bw()
return(depth_plot)
}
27 changes: 14 additions & 13 deletions R/prep_plot_rank_ab.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,26 @@
#'
#' @param phy_df A dataframe of a phyloseq object, like that generated by
#' [phyloseq::psmelt()] or [make_phy_df()]
#' @param varbs (`NULL`) A character vector of grouping variables from
#' which the baseline values are chosen to define the abundance ordering. If
#' it is `NULL`, the ordering will be based on mean abundances in the
#' whole data frame.
#' @param bases (`NULL`) A character vector of baseline values for the
#' variables given in `vars`. The ordering of the taxa will be given
#' based only on the samples with these baseline values for these variables.
#' Must be in the same order as vars.
#' @param gvars (`NULL`) A character vector of grouping variables from which the
#' baseline values are chosen to define the abundance ordering. If it is
#' `NULL`, the ordering will be based on mean abundances in the whole data
#' frame.
#' @param bases (`NULL`) A character vector of baseline values for the variables
#' given in `gvars`. The ordering of the taxa will be given based only on the
#' samples with these baseline values for these variables. Must be in the same
#' order as `gvars`.
#' @param abunds (`'Abundance'`) The name of the abundance column.
#' @param rank (`'Genus'`) The rank to base the ordering on.
#' @param rank (`'Genus'`) The rank to base the ordering on. Must be a column in
#' `phy_df`
#' @param IDcol (`'X.SampleID'`) The column name of the sample IDs
#' @export
rank_abund = function(phy_df, varbs = NULL, bases = NULL, abunds = 'Abundance',
rank = 'Genus', IDcol = 'X.SampleID'){
#'
rank_abund = function(phy_df, gvars = NULL, bases = NULL, abunds = 'Abundance',
rank = 'Genus', IDcol){
# Set up the groups for the plotting totals
rank_abs = df_glom(phy_df, IDcol = IDcol, rank = rank, abunds = abunds)

# Subset and order
ranked = subset_order(rank_abs, varbs, bases, rank = rank)
ranked = subset_order(rank_abs, gvars, bases, rank = rank)

# Order the bigger data frame by the above ordering
lev_ord = levels(ranked[,rank])
Expand Down
5 changes: 5 additions & 0 deletions R/prop_tax_down.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ prop_tax_down = function(physeq, indic, dbig = TRUE){
colnames(tt) = colnames(phyloseq::tax_table(physeq))
rownames(tt) = sp

# If there are no unspecified taxa, stop
if (!any(is.na(tt))){
return(physeq)
}

tt = prop_tax_tab(tt, indic)
phyloseq::tax_table(physeq) = tt

Expand Down
8 changes: 8 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

#' Order the levels of one factor by the values of another
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' This function will be deprecated because it is just recapitulating
#' functionality provided by `ggplot2::facet_grid()`. If you are using this
#' function and don't want it it go away, please get in touch and let me know
#' what you're using it for.
#'
#' `order_levs()` takes two factors, the first of which has values that are
#' nested within the values of the second, and orders the levels of the first
#' factor such that they are clustered within the second factor.
Expand All @@ -14,6 +21,7 @@
#' @param f2 The factor to use when re-ordering `f1`.
#' @export
order_levs = function(f1,...){
lifecycle::deprecate_warn('0.0.2', 'order_levs()')

# if (is.numeric(f2)){
# ord = order(...)
Expand Down
Loading

0 comments on commit dc7f287

Please sign in to comment.