Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed rect.dendogram 2 rectangle bug #124

Merged
merged 5 commits into from
Sep 24, 2024
Merged

Conversation

alecbuetow
Copy link
Contributor

rect.dendogram bug described on stack overflow resolved

@alecbuetow
Copy link
Contributor Author

alecbuetow commented Sep 24, 2024

Fixed issue #119 described by @kalaivaniraju. Had to create placeholder distance_matrix and clustering_method but was able to recreate the issue. Minimal, Reproducible Examples of both bugs provided below for before (issue) and after (solved).

BEFORE:

library(dendextend)
# Generate example data: a random matrix with 20 rows (observations) and 4 columns (features)
set.seed(123) # For reproducibility
data <- matrix(rnorm(80), nrow = 20, ncol = 4)

# Alternatively, if you want to label the rows and columns:
rownames(data) <- paste0("SRR10395", 1:20)
colnames(data) <- c("Feature1", "Feature2", "Feature3", "Feature4")
# Example distance matrix
distance_matrix <- dist(data, method = "euclidean") # Replace 'data' with your actual data frame or matrix

# Clustering method
clustering_method <- "ward.D2"

# Apply hierarchical clustering
hclust.out <- hclust(distance_matrix, method = clustering_method)

# Define cut level (you may adjust this value based on your desired threshold)
cut_level <- 0.7 # This is an example value
h_cut <- cut_level * max(hclust.out$height)

# Save the dendrogram to a file
par(mar = c(5, 1, 1, 10), xpd = TRUE)
dend2 <- as.dendrogram(hclust.out)
dend2 %>% plot(horiz = TRUE, bty = "L")
dend2 %>% rect.dendrogram(h = h_cut, lty = 5, lwd = 0, col = rgb(0.8, 0.8, 0.8, 0.25), horiz = TRUE)
segments(x0 = h_cut, y0 = par("usr")[4], x1 = h_cut, y1 = 0, lty = 2)
if (!require("dendextend")) {install.packages("dendextend")} else {library("dendextend")}

data("iris", package = "datasets")

Data <- list()

Data$Lab <- as.character(iris[,5])
Data$dat <- prcomp(iris[,-5])$x[,1:2]

Data$dist <- dist(Data$dat, method = "euclidean")
Data$hist <- hclust(Data$dist, method = "complete")

# plot dendrogram

hcd <- as.dendrogram(Data$hist)

cluster.height <- 6

par(pty = "m",
    mar = c(1,2,1.5,1),
    mgp = c(1,0,0),
    tck = 0.01,
    cex.axis = 0.75,
    font.main = 1)
plot(sort(hcd), 
     ylab = "Height",
     leaflab = "none")
rect.dendrogram(sort(hcd),
                h = cluster.height,
                border = "black",
                xpd = NA,
                lower_rect = -0.1,
                upper_rect = 0)
abline(h = cluster.height,
       lty = 3)

AFTER:
(function redefined same as pull request)

strheight2 <- function(s, ...) {
  xusr <- par("usr")
  xh <- strwidth(s, cex = par("cex"), ...)
  yh <- strheight(s, cex = par("cex"), ...) * 5 / 3
  tmp <- xh
  xh <- yh / (xusr[4] - xusr[3]) * par("pin")[2]
  xh <- xh / par("pin")[1] * (xusr[2] - xusr[1])
  yh <- tmp / (xusr[2] - xusr[1]) * par("pin")[1]
  yh <- yh / par("pin")[2] * (xusr[4] - xusr[3])
  yh
}
rect.dendrogram <- function(tree, k = NULL, which = NULL, x = NULL, h = NULL, border = 2,
                            cluster = NULL, horiz = FALSE, density = NULL, angle = 45,
                            text = NULL, text_cex = 1, text_col = 1, xpd = TRUE,
                            lower_rect, upper_rect = 0, prop_k_height = 0.5,
                            stop_if_out = FALSE,
                            ...) {
  if (!is.dendrogram(tree)) stop("x is not a dendrogram object.")

  if (length(h) > 1L | length(k) > 1L) {
    stop("'k' and 'h' must be a scalar(i.e.: of length 1)")
  }

  # In tree_heights I am removing the first element
  # in order to be consistant with rect.hclust
  tree_heights <- heights_per_k.dendrogram(tree)[-1] # this is NOT really tree heights, but the height for which you need to cut in order to cut the tree
  tree_order <- order.dendrogram(tree)
  #    rm0 <- function(x) x[x != 0]
  #    height_to_add <- min(rm0(abs(diff(tree_heights))))/2 # the height to add so to be sure we get a "clear" cut



  if (!is.null(h)) {
    if (!is.null(k)) {
      stop("specify exactly one of 'k' and 'h'")
    }

    ss_ks <- tree_heights > h
    k <- max(as.numeric(names(ss_ks))[ss_ks])
    k <- max(k, 2) # I don't like this default...
  }
  else if (is.null(k)) {
    stop("specify exactly one of 'k' and 'h'")
  }
  if (k < 2 | k > length(tree_heights)) {
    if (stop_if_out) {
      stop(gettextf("k must be between 2 and %d", length(tree_heights)),
        domain = NA
      )
    } else {
      warning(gettextf("k must be between 2 and %d", length(tree_heights)),
        domain = NA
      )
    }
  }
  if (is.null(cluster)) {
    cluster <- cutree(tree, k = k)
  }

  clustab <- table(cluster)[unique(cluster[tree_order])]

  m <- c(0, cumsum(clustab))
  if (!is.null(x)) {
    if (!is.null(which)) {
      stop("specify exactly one of 'which' and 'x'")
    }
    which <- x
    for (n in seq_along(x)) which[n] <- max(which(m < x[n]))
  } else if (is.null(which)) {
    which <- 1L:k
  }
  if (any(which > k)) {
    stop(gettextf(
      "all elements of 'which' must be between 1 and %d",
      k
    ), domain = NA)
  }
  border <- rep_len(border, length(which))
  retval <- list()

  old_xpd <- par()["xpd"]
  par(xpd = xpd)

  for (n in seq_along(which)) {
    # this is to deal with the case when k is not defined for all values
    # and that we can not use the next k+1 value to decide on the height to use.
    next_k_height <- tree_heights[names(tree_heights) == k + 1]
    if (length(next_k_height) == 0) {
      next_k_height <- 0
      prop_k_height <- 1 # use only the "k" now.
      # if(upper_rect == 0) upper_rect <- min(abs(diff(tree_heights))) / 2
    }

    if (!horiz) { # the default
      xleft <- m[which[n]] + 0.66
      # if(missing(lower_rect)) lower_rect <- par("usr")[3L] - strheight("W")*(max(nchar(labels(tree))) + 1)
      if (missing(lower_rect)) {
        lower_rect <- -max(strheight2(labels(tree)))
        dLeaf <- -0.75 * strheight("x")
        extra_space <- -strheight2("_")
        lower_rect <- lower_rect + dLeaf + extra_space
      }

      ybottom <- lower_rect
      xright <- m[which[n] + 1] + 0.33
      #          ytop = mean(tree_heights[(k - 1):k])
      #          ytop = tree_heights[k] + abs(ybottom)
      #          ytop = mean(tree_heights[(k - 1):k]) + abs(ybottom)

      ytop <- max(tree_heights[names(tree_heights) == k]) * prop_k_height +
        next_k_height * (1 - prop_k_height) +
        upper_rect # tree_heights[k] + height_to_add # + abs(xright)
    } else {
      ybottom <- m[which[n]] + 0.66
      # if(missing(lower_rect)) lower_rect <- par("usr")[2L] + strwidth("X")*(max(nchar(labels(tree))) + 1)
      # if(missing(lower_rect)) lower_rect <- -max(strwidth(labels(dend)))

      if (missing(lower_rect)) {
        lower_rect <- min(strwidth(labels(tree))) # notice the char length is negative!
        dLeaf <- 0.75 * strwidth("w")
        extra_space <- strwidth("_")
        lower_rect <- lower_rect + dLeaf + extra_space
      }

      xright <- lower_rect

      ytop <- m[which[n] + 1] + 0.33
      #          xleft = mean(tree_heights[(k - 1):k])
      xleft <-
        tree_heights[names(tree_heights) == k] * prop_k_height +
        next_k_height * (1 - prop_k_height) +
        upper_rect # tree_heights[k] + height_to_add # + abs(xright)
    }
    rect(xleft,
      ybottom,
      xright,
      ytop,
      border = border[n], density = density, angle = angle, ...
    )

    # allow for a vectorized version of "text"
    if (!is.null(text)) {
      text((m[which[n]] + m[which[n] + 1] + 1) / 2,
        grconvertY(grconvertY(par("usr")[3L], "user", "ndc") + 0.02, "ndc", "user"),
        text[n],
        cex = text_cex, col = text_col
      )
    }

    retval[[n]] <- which(cluster == as.integer(names(clustab)[which[n]]))
  }


  par(xpd = old_xpd)
  invisible(retval)
}
# Generate example data: a random matrix with 20 rows (observations) and 4 columns (features)
set.seed(123) # For reproducibility
data <- matrix(rnorm(80), nrow = 20, ncol = 4)

# Alternatively, if you want to label the rows and columns:
rownames(data) <- paste0("SRR10395", 1:20)
colnames(data) <- c("Feature1", "Feature2", "Feature3", "Feature4")
# Example distance matrix
distance_matrix <- dist(data, method = "euclidean") # Replace 'data' with your actual data frame or matrix

# Clustering method
clustering_method <- "ward.D2"

# Apply hierarchical clustering
hclust.out <- hclust(distance_matrix, method = clustering_method)

# Define cut level (you may adjust this value based on your desired threshold)
cut_level <- 0.7 # This is an example value
h_cut <- cut_level * max(hclust.out$height)

# Save the dendrogram to a file
par(mar = c(5, 1, 1, 10), xpd = TRUE)
dend2 <- as.dendrogram(hclust.out)
dend2 %>% plot(horiz = TRUE, bty = "L")
dend2 %>% rect.dendrogram(h = h_cut, lty = 5, lwd = 0, col = rgb(0.8, 0.8, 0.8, 0.25), horiz = TRUE)
segments(x0 = h_cut, y0 = par("usr")[4], x1 = h_cut, y1 = 0, lty = 2)
if (!require("dendextend")) {install.packages("dendextend")} else {library("dendextend")}

data("iris", package = "datasets")

Data <- list()

Data$Lab <- as.character(iris[,5])
Data$dat <- prcomp(iris[,-5])$x[,1:2]

Data$dist <- dist(Data$dat, method = "euclidean")
Data$hist <- hclust(Data$dist, method = "complete")

# plot dendrogram

hcd <- as.dendrogram(Data$hist)

cluster.height <- 6

par(pty = "m",
    mar = c(1,2,1.5,1),
    mgp = c(1,0,0),
    tck = 0.01,
    cex.axis = 0.75,
    font.main = 1)
plot(sort(hcd), 
     ylab = "Height",
     leaflab = "none")
rect.dendrogram(sort(hcd),
                h = cluster.height,
                border = "black",
                xpd = NA,
                lower_rect = -0.1,
                upper_rect = 0)
abline(h = cluster.height,
       lty = 3)

@talgalili
Copy link
Owner

talgalili commented Sep 24, 2024

For future reference, here is the image before the fix:

image

Code:

library(dendextend)
# Generate example data: a random matrix with 20 rows (observations) and 4 columns (features)
set.seed(123) # For reproducibility
data <- matrix(rnorm(80), nrow = 20, ncol = 4)

# Alternatively, if you want to label the rows and columns:
rownames(data) <- paste0("SRR10395", 1:20)
colnames(data) <- c("Feature1", "Feature2", "Feature3", "Feature4")

# Example distance matrix
distance_matrix <- dist(data, method = "euclidean") # Replace 'data' with your actual data frame or matrix

# Clustering method
clustering_method <- "ward.D2"

# Apply hierarchical clustering
hclust.out <- hclust(distance_matrix, method = clustering_method)

# Define cut level (you may adjust this value based on your desired threshold)
cut_level <- 0.7 # This is an example value
h_cut <- cut_level * max(hclust.out$height)

# Save the dendrogram to a file
par(mar = c(5, 1, 1, 10), xpd = TRUE)
dend2 <- as.dendrogram(hclust.out)
dend2 %>% plot(horiz = TRUE, bty = "L")
dend2 %>% rect.dendrogram(h = h_cut, lty = 5, lwd = 0, col = rgb(0.8, 0.8, 0.8, 0.25), horiz = TRUE)
segments(x0 = h_cut, y0 = par("usr")[4], x1 = h_cut, y1 = 0, lty = 2)

if (!require("dendextend")) {install.packages("dendextend")} else {library("dendextend")}

data("iris", package = "datasets")

Data <- list()

Data$Lab <- as.character(iris[,5])
Data$dat <- prcomp(iris[,-5])$x[,1:2]

Data$dist <- dist(Data$dat, method = "euclidean")
Data$hist <- hclust(Data$dist, method = "complete")

# plot dendrogram

hcd <- as.dendrogram(Data$hist)

cluster.height <- 6

par(pty = "m",
    mar = c(1,2,1.5,1),
    mgp = c(1,0,0),
    tck = 0.01,
    cex.axis = 0.75,
    font.main = 1)
plot(sort(hcd), 
     ylab = "Height",
     leaflab = "none")

abline(h = cluster.height,
       lty = 3)

rect.hclust(as.hclust(hcd), h = cluster.height)

rect.dendrogram(sort(hcd),
                h = cluster.height,
                border = "black",
                xpd = NA,
                lower_rect = -0.1,
                upper_rect = 0)

After the fix:


rect.dendrogram(sort(hcd),
                h = cluster.height,
                border = "green",
                xpd = NA,
                lower_rect = -0.1,
                upper_rect = 0)

image

@talgalili talgalili merged commit 370e535 into talgalili:master Sep 24, 2024
15 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants