Skip to content

Commit

Permalink
utils for inspection of S-curves
Browse files Browse the repository at this point in the history
  • Loading branch information
dselivanov committed Mar 21, 2016
1 parent 099f76e commit ed6c9f0
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
39 changes: 39 additions & 0 deletions R/s_curve.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# df <- get_s_curve(2400, n_bands_min = 40, n_rows_per_band_min = 20)
#' @export
get_s_curve <- function(number_hashfun,
n_bands_min = 1,
n_rows_per_band_min = 1,
s = seq(0.5, 1, 0.01),
plot = TRUE) {

bands_number <- divisors(number_hashfun)
rows_per_band <- number_hashfun / bands_number
i <- bands_number >= n_bands_min & rows_per_band >= n_rows_per_band_min

bands_number <- bands_number[i]
rows_per_band <- rows_per_band[i]

s_curve <-
mapply(function(n_band, n_rows_per_band) {
data.table(probability_become_candidate = 1 - (1 - s ^ n_rows_per_band) ^ n_band,
similarity = s,
n_bands = n_band,
n_rows_per_band = n_rows_per_band)
#setup = paste0("bands=", n_band, ";rows_per_band=", n_rows_per_band))
}, bands_number, rows_per_band, SIMPLIFY = F) %>%
rbindlist

if (plot) {
g <-
ggplot(s_curve) +
geom_line(aes(x = similarity,
y = probability_become_candidate,
col = interaction(n_bands, n_rows_per_band, sep = " : " ))) +
scale_color_discrete("bands_number : rows_per_band")
print(g)
}

invisible(s_curve)
}


6 changes: 6 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ split_vector <- function(x, splits) {
knots = ceiling(seq.int(from = 1, to = length(x) + 1, length.out = splits + 1))
mapply(FUN = function(lower, upper) list(c(lower, upper)), knots[-length(knots)], knots[-1] - 1)
}

divisors <- function(x) {
y <- seq_len(x)
y <- y[-c(1, length(y))]
y[ x %% y == 0 ]
}

0 comments on commit ed6c9f0

Please sign in to comment.