From 91e40a20f6d4f507cc73bdd4c7c6b6a2a317e278 Mon Sep 17 00:00:00 2001 From: woznicak Date: Thu, 29 Jul 2021 00:22:26 +0200 Subject: [PATCH] rebrand model to player, split to round (#11) and print for epp class (#8) --- NAMESPACE | 1 + R/calculate_elo.R | 109 +++++++++++++++++------------ R/plot_wins_ratio.R | 8 +-- R/wald_test.R | 6 +- man/calculate_actual_wins.Rd | 6 +- man/calculate_epp.Rd | 6 +- man/print.epp_results.Rd | 19 +++++ man/print.epp_test.Rd | 2 +- tests/testthat/test_calculations.R | 4 +- 9 files changed, 99 insertions(+), 62 deletions(-) create mode 100644 man/print.epp_results.Rd diff --git a/NAMESPACE b/NAMESPACE index b24cd37..03a9da6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,epp_results) S3method(print,epp_test) export(calculate_actual_wins) export(calculate_epp) diff --git a/R/calculate_elo.R b/R/calculate_elo.R index 53f9e02..bf68ccb 100644 --- a/R/calculate_elo.R +++ b/R/calculate_elo.R @@ -1,36 +1,36 @@ #' @importFrom data.table setDT dcast #' @noRd -calculate_wins_one_model <- function(results,value_compare_with, model_base, split_compare_with, compare_function, compare_in_split =TRUE, aggregate = TRUE ){ - loser <- winner <- loses <- wins <- model <- score <- `.` <- `.N` <- `:=` <- NULL +calculate_wins_one_player <- function(results,value_compare_with, player_base, round_compare_with, compare_function, compare_in_round =TRUE, aggregate = TRUE ){ + loser <- winner <- loses <- wins <- player <- score <- `.` <- `.N` <- `:=` <- NULL setDT(results) if(aggregate){ - if(!compare_in_split){ + if(!compare_in_round){ results <- results[,.(wins = sum(compare_function(value_compare_with, score)), loses = sum(compare_function(score, value_compare_with)), match = .N), - by = model][,`:=`(winner = model_base, loser = model)][, `:=`(model=NULL)][winner!= loser] + by = player][,`:=`(winner = player_base, loser = player)][, `:=`(player=NULL)][winner!= loser] results <- results[wins + loses == 0, `:=`(wins = match/2,loses = match/2) ] }else{ - results <- results[split == split_compare_with][,.(wins = sum(compare_function(value_compare_with, score)), + results <- results[round == round_compare_with][,.(wins = sum(compare_function(value_compare_with, score)), loses = sum(compare_function(score, value_compare_with)), match = .N), - by = model][,`:=`(winner = model_base, loser = model)][, `:=`(model=NULL)][winner!= loser] + by = player][,`:=`(winner = player_base, loser = player)][, `:=`(player=NULL)][winner!= loser] results <- results[wins + loses == 0, `:=`(wins = match/2,loses = match/2) ] } } else{ - if(!compare_in_split){ + if(!compare_in_round){ results[,`:=`(wins = compare_function(value_compare_with, score), - loses = sum(compare_function(score, value_compare_with)), match = 1)][,`:=`(winner = model_base, loser = model)][, `:=`(model=NULL)][winner!= loser] + loses = sum(compare_function(score, value_compare_with)), match = 1)][,`:=`(winner = player_base, loser = player)][, `:=`(player=NULL)][winner!= loser] }else{ - results[split == split_compare_with][,`:=`(wins = compare_function(value_compare_with, score), - loses = sum(compare_function(score, value_compare_with)), match = 1)][,`:=`(winner = model_base, loser = model)][, `:=`(model=NULL)][winner!= loser] + results[round == round_compare_with][,`:=`(wins = compare_function(value_compare_with, score), + loses = sum(compare_function(score, value_compare_with)), match = 1)][,`:=`(winner = player_base, loser = player)][, `:=`(player=NULL)][winner!= loser] } } @@ -43,7 +43,7 @@ calculate_wins_one_model <- function(results,value_compare_with, model_base, spl #' @importFrom data.table setDT dcast rbindlist #' @importFrom utils setTxtProgressBar txtProgressBar #' @noRd -calculate_wins_all_model <- function(results, list_models, compare_in_split, compare_function, aggregate = TRUE){ +calculate_wins_all_player <- function(results, list_players, compare_in_round, compare_function, aggregate = TRUE){ loser <- winner <- loses <- wins <- players <- score <- `.` <- `:=` <- NULL results_list <- list() @@ -51,11 +51,11 @@ calculate_wins_all_model <- function(results, list_models, compare_in_split, com for(i in 1:nrow(results)){ row_sel <- i value_compare_with <- results[['score']][row_sel] - model_base <- results[['model']][row_sel] - split_compare_with <- results[['split']][row_sel] + player_base <- results[['player']][row_sel] + round_compare_with <- results[['round']][row_sel] - results_v2 <- calculate_wins_one_model(results = results, value_compare_with = value_compare_with, split_compare_with = split_compare_with, model_base = model_base, compare_function = compare_function, compare_in_split = compare_in_split, aggregate = aggregate) + results_v2 <- calculate_wins_one_player(results = results, value_compare_with = value_compare_with, round_compare_with = round_compare_with, player_base = player_base, compare_function = compare_function, compare_in_round = compare_in_round, aggregate = aggregate) results_list[[length(results_list)+1]] <- results_v2 setTxtProgressBar(pb, i) } @@ -79,13 +79,13 @@ calculate_wins_all_model <- function(results, list_models, compare_in_split, com #' @importFrom stats coefficients deviance #' @noRd -create_summary_model_glmnet <- function(model_epp, model_names, reference){ +create_summary_model_glmnet <- function(model_epp, player_names, reference){ residual_deviance <- list(value = deviance(model_epp), df = df.residual(model_epp)) vector_coeff_model <- as.vector(coefficients(model_epp)) intercept <- vector_coeff_model[1] - epp_summary <- data.frame(model = model_names, + epp_summary <- data.frame(player = player_names, epp = vector_coeff_model[-1] - intercept) rownames(epp_summary) <- NULL epp_summary[nrow(epp_summary),2] <- 0 @@ -94,7 +94,7 @@ create_summary_model_glmnet <- function(model_epp, model_names, reference){ residual_deviance = residual_deviance) if(!is.null(reference)){ - reference_level <- results[['epp']][which(results[['epp']][,"model"] == reference) ,"epp"][1] + reference_level <- results[['epp']][which(results[['epp']][,"player"] == reference) ,"epp"][1] results[['epp']][, 'epp'] <- results[["epp"]][,'epp'] - reference_level } @@ -106,20 +106,20 @@ create_summary_model_glmnet <- function(model_epp, model_names, reference){ #' @importFrom stats coefficients deviance #' @noRd - create_summary_model_glm <- function(model_epp, model_names, reference){ + create_summary_model_glm <- function(model_epp, player_names, reference){ residual_deviance <- list(value = deviance(model_epp), df = df.residual(model_epp)) - reference_glmmodel <- model_names[length(model_names)] + reference_glmmodel <- player_names[length(player_names)] epp_summary <- data.frame(coefficients(summary(model_epp))) colnames(epp_summary) <- c('epp', 'std_epp', 'z_statistic', 'p_value') intercept <- epp_summary[1,1] rownames(epp_summary)[1] <- reference_glmmodel - epp_summary[,'model'] <- rownames(epp_summary) + epp_summary[,'player'] <- rownames(epp_summary) epp_summary[,1] <- epp_summary[,1]-intercept epp_summary[,'conf_lower'] <- epp_summary[,1] - 1.96 * epp_summary[,2] epp_summary[,'conf_upper'] <- epp_summary[,1] + 1.96 * epp_summary[,2] - epp_summary <- epp_summary[,c('model', 'epp', 'std_epp', 'conf_lower', 'conf_upper', 'p_value')] + epp_summary <- epp_summary[,c('player', 'epp', 'std_epp', 'conf_lower', 'conf_upper', 'p_value')] rownames(epp_summary) <- NULL covariance_epp <- vcov(model_epp) @@ -135,7 +135,7 @@ create_summary_model_glmnet <- function(model_epp, model_names, reference){ if(!is.null(reference)){ - reference_level <- results[['epp']][which(results[['epp']][,"model"] == reference) ,"epp"][1] + reference_level <- results[['epp']][which(results[['epp']][,"player"] == reference) ,"epp"][1] results[['epp']][, 'epp'] <- results[["epp"]][,'epp'] - reference_level results[['epp_summary']][, 'epp'] <- results[["epp_summary"]][,'epp'] - reference_level @@ -148,7 +148,7 @@ create_summary_model_glmnet <- function(model_epp, model_names, reference){ } -#' @title Actual Results for Every Pair of Models +#' @title Actual Results for Every Pair of Players #' #' @description Calculate number of wins and loses for every pair of Players. #' @@ -157,7 +157,7 @@ create_summary_model_glmnet <- function(model_epp, model_names, reference){ #' #' @param results raw results #' @param decreasing_metric Logical. If Score is decreasing metrics. -#' @param compare_in_split Logical. If Players should be compared only in the same Round. Setting to FALSE increase the number of Matches, but Score values of Players will be compared between different Rounds. +#' @param compare_in_round Logical. If Players should be compared only in the same Round. Setting to FALSE increase the number of Matches, but Score values of Players will be compared between different Rounds. #' @param aggregate Logical. If results should be aggregated for every pair of Players. Otherwise, output will have many rows with binary response (according to amount of Rounds in the Tournament) for every pair of Players. #' #' @return data.frame @@ -165,7 +165,7 @@ create_summary_model_glmnet <- function(model_epp, model_names, reference){ #' @export -calculate_actual_wins <- function(results, decreasing_metric = TRUE, compare_in_split, aggregate = TRUE){ +calculate_actual_wins <- function(results, decreasing_metric = TRUE, compare_in_round, aggregate = TRUE){ ### define comparison of metrics if(decreasing_metric){ is_metric1_better <- function(metric1, metric2){ @@ -181,11 +181,11 @@ calculate_actual_wins <- function(results, decreasing_metric = TRUE, compare_in_ } - unique_model <- unique(results$model) + unique_player <- unique(results$player) - summary_results <- calculate_wins_all_model(results = results, - list_models = unique_model, - compare_in_split = compare_in_split, + summary_results <- calculate_wins_all_player(results = results, + list_players = unique_player, + compare_in_round = compare_in_round, compare_function = is_metric1_better, aggregate = aggregate) summary_results <- as.data.frame(summary_results) @@ -208,12 +208,12 @@ prepare_model_matrix <- function(actual_score){ colnames(model_matrix) <- unique(actual_score$winner) rownames(model_matrix) <- unique(actual_score$players) - model_winner <- gsub(" .*", "", rownames(model_matrix)) - model_loser <- gsub(".+? ", "", rownames(model_matrix)) + player_winner <- gsub(" .*", "", rownames(model_matrix)) + player_loser <- gsub(".+? ", "", rownames(model_matrix)) for(col in colnames(model_matrix)) { - model_matrix[col == model_winner, col] <- 1 - model_matrix[col == model_loser, col] <- -1 + model_matrix[col == player_winner, col] <- 1 + model_matrix[col == player_loser, col] <- -1 } model_matrix @@ -252,7 +252,7 @@ fit_glmnet_model <- function(glm_model_matrix_sparse, actual_score){ #' First 3 columns should correspond to: Player, Round, Score. See more in 'details' section. #' @param decreasing_metric Logical. If TRUE used Score is considered as decreasing, that means a Player with higher Score value is considered as winner. #' If FALSE used Score will be considered as increasing. -#' @param compare_in_split Logical. If TRUE compares Players only in the same fold. If FALSE compares Players across folds. +#' @param compare_in_round Logical. If TRUE compares Players only in the same fold. If FALSE compares Players across folds. #' @param keep_columns Logical. If TRUE original data frame with new 'epp' column will be returned. #' @param keep_model Logical. If TRUE logistic regression model to compute EPP will be returned. #' @param reference Character. Name of the Player that should be a reference level for EPP Meta-scores. It should be a name of one of the Players from @@ -265,7 +265,7 @@ fit_glmnet_model <- function(glm_model_matrix_sparse, actual_score){ #' Second column corresponds to indexes of Rounds As EPP is based on Elo rating system, power of Player is assessed by comparing its results #' with other Players on multiple Rounds. Therefore, each Player should be evaluated on multiple Rounds. #' Indexes of ROunds should be in this column. And should match across Players. -#' Third column contains Score used to evaluate models. It can be both decreasing or increasing metric. +#' Third column contains Score used to evaluate players. It can be both decreasing or increasing metric. #' just remember to set the \code{decreasing_metric} parameter accordingly. #' The following columns can be of any kind. #' @@ -280,39 +280,41 @@ fit_glmnet_model <- function(glm_model_matrix_sparse, actual_score){ #' #' @export #' @importFrom glmnet glmnet cv.glmnet bigGlm -calculate_epp <- function(results, decreasing_metric = TRUE, compare_in_split = TRUE, +calculate_epp <- function(results, decreasing_metric = TRUE, compare_in_round = TRUE, keep_columns = FALSE, keep_model = FALSE, reference = NULL, keep_data = TRUE, estimation = "glmnet"){ # some cleaning to make unified naming - models_results <- results[, 1:3] - colnames(models_results) <- c("model", "split", "score") - models_results <- models_results[order(models_results[["model"]], models_results[["split"]]),] - models_results[, "model"] <- factor(models_results[["model"]]) - actual_score <- calculate_actual_wins(results = models_results, + players_results <- results[, 1:3] + colnames(players_results) <- c("player", "round", "score") + players_results <- players_results[order(players_results[["player"]], players_results[["round"]]),] + players_results[, "player"] <- factor(players_results[["player"]]) + actual_score <- calculate_actual_wins(results = players_results, decreasing_metric = decreasing_metric, - compare_in_split=compare_in_split) + compare_in_round=compare_in_round) glm_model_matrix <- prepare_model_matrix(actual_score) if(estimation == "glm"){ model_epp <- fit_glm_model(glm_model_matrix, actual_score) - epp_list <- create_summary_model_glm(model_epp, model_names = colnames(glm_model_matrix), reference) + epp_list <- create_summary_model_glm(model_epp, player_names = colnames(glm_model_matrix), reference) # epp_list[nrow(epp_list),2] <- 0 }else if(estimation == "glmnet"){ glm_model_matrix_sparse <- Matrix(glm_model_matrix, sparse = TRUE) glm_model_matrix <- Matrix(glm_model_matrix, sparse = TRUE) model_epp <- fit_glmnet_model(glm_model_matrix_sparse, actual_score) - epp_list <- create_summary_model_glmnet(model_epp, model_names = colnames(glm_model_matrix_sparse), reference) + epp_list <- create_summary_model_glmnet(model_epp, player_names = colnames(glm_model_matrix_sparse), reference) } if(keep_columns == TRUE) { - tmp <- merge(epp_list[['epp']], models_results, by = "model") - epp_list[['epp']] <- merge(tmp, results, by.x = c("model", "split", "score"), by.y = colnames(results)[1:3]) + tmp <- merge(epp_list[['epp']], players_results, by = "player") + epp_list[['epp']] <- merge(tmp, results, by.x = c("player", "round", "score"), by.y = colnames(results)[1:3]) colnames(epp_list[['epp']])[1:3] <- colnames(results)[1:3] } + + if(keep_data == TRUE){ res <- c(epp_list, list(actual_score = actual_score)) @@ -328,3 +330,18 @@ calculate_epp <- function(results, decreasing_metric = TRUE, compare_in_split = res } + +#' @title Printing EPP results +#' +#' @param x epp_results. The result of a function \code{\link{calculate_epp}}. +#' @param ... other parameters +#' +#' @return No return value, prints the structure of the object +#' +#' @export + +print.epp_results <- function(x, ...){ + cat("Head of Players EPP: \n") + print(head(x$epp)) + cat("Type of estimation: ", x$estimation, "\n") +} diff --git a/R/plot_wins_ratio.R b/R/plot_wins_ratio.R index 969448e..2a8c298 100644 --- a/R/plot_wins_ratio.R +++ b/R/plot_wins_ratio.R @@ -41,15 +41,15 @@ plot_wins_ratio <- function(epp, random_sample = NULL, random_state = NULL){ set.seed(random_state) } - sample_player <- sample(epp_score$model, size = floor(random_sample * length(epp_score$model))) + sample_player <- sample(epp_score$player, size = floor(random_sample * length(epp_score$player))) actual_score <- actual_score[actual_score$winner %in% sample_player & actual_score$loser %in% sample_player,] - epp_score <- epp_score[epp_score$model %in% sample_player, ] + epp_score <- epp_score[epp_score$player %in% sample_player, ] } actual_score[["ratio"]] <- actual_score[["wins"]] / actual_score[["match"]] - actual_score <- merge(actual_score, epp_score, by.x ="winner", by.y = "model") + actual_score <- merge(actual_score, epp_score, by.x ="winner", by.y = "player") names(actual_score)[names(actual_score)=='epp'] <- "epp_winner" - actual_score <- merge(actual_score, epp_score, by.x ="loser", by.y = "model") + actual_score <- merge(actual_score, epp_score, by.x ="loser", by.y = "player") names(actual_score)[names(actual_score)=='epp'] <- "epp_loser" actual_score[['pred_ratio']] <- exp(actual_score[["epp_winner"]] - actual_score[['epp_loser']])/(1+exp(actual_score[["epp_winner"]] - actual_score[['epp_loser']])) diff --git a/R/wald_test.R b/R/wald_test.R index bf0bf1f..a996f6d 100644 --- a/R/wald_test.R +++ b/R/wald_test.R @@ -20,7 +20,7 @@ test_players_diff <- function(epp, player1, player2){ if(epp$estimation != "glm") stop("Test requires estimated covariance matrix. Use `estimation='glm'` in function `caluclate_epp()`.") coefs <- epp$epp_summary$epp - players <- epp$epp_summary$model + players <- epp$epp_summary$player covar <- epp$covariance_epp n <- length(coefs) @@ -43,14 +43,14 @@ test_players_diff <- function(epp, player1, player2){ #' @title Printing Summary of the EPP Test #' -#' @param x epp_results. The result of a function \code{\link{calculate_epp}}. +#' @param x epp_test. The result of a function \code{\link{test_players_diff}}. #' @param ... other parameters #' #' @return No return value, prints the structure of the object #' #' @export print.epp_test <- function(x, ...) { - cat("Wald-based test for the difference between ", x$player1, " and ", x$player1, ".\n", sep = "") + cat("Wald-based test for the difference between ", x$player1, " and ", x$player2, ".\n", sep = "") cat("Test statistic: ", x$statistic, "\n") cat("p-value: ", x$pval, "\n") } diff --git a/man/calculate_actual_wins.Rd b/man/calculate_actual_wins.Rd index dfb0da7..a9545f8 100644 --- a/man/calculate_actual_wins.Rd +++ b/man/calculate_actual_wins.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/calculate_elo.R \name{calculate_actual_wins} \alias{calculate_actual_wins} -\title{Actual Results for Every Pair of Models} +\title{Actual Results for Every Pair of Players} \usage{ calculate_actual_wins( results, decreasing_metric = TRUE, - compare_in_split, + compare_in_round, aggregate = TRUE ) } @@ -16,7 +16,7 @@ calculate_actual_wins( \item{decreasing_metric}{Logical. If Score is decreasing metrics.} -\item{compare_in_split}{Logical. If Players should be compared only in the same Round. Setting to FALSE increase the number of Matches, but Score values of Players will be compared between different Rounds.} +\item{compare_in_round}{Logical. If Players should be compared only in the same Round. Setting to FALSE increase the number of Matches, but Score values of Players will be compared between different Rounds.} \item{aggregate}{Logical. If results should be aggregated for every pair of Players. Otherwise, output will have many rows with binary response (according to amount of Rounds in the Tournament) for every pair of Players.} } diff --git a/man/calculate_epp.Rd b/man/calculate_epp.Rd index 11a9486..110cdee 100644 --- a/man/calculate_epp.Rd +++ b/man/calculate_epp.Rd @@ -7,7 +7,7 @@ calculate_epp( results, decreasing_metric = TRUE, - compare_in_split = TRUE, + compare_in_round = TRUE, keep_columns = FALSE, keep_model = FALSE, reference = NULL, @@ -22,7 +22,7 @@ First 3 columns should correspond to: Player, Round, Score. See more in 'details \item{decreasing_metric}{Logical. If TRUE used Score is considered as decreasing, that means a Player with higher Score value is considered as winner. If FALSE used Score will be considered as increasing.} -\item{compare_in_split}{Logical. If TRUE compares Players only in the same fold. If FALSE compares Players across folds.} +\item{compare_in_round}{Logical. If TRUE compares Players only in the same fold. If FALSE compares Players across folds.} \item{keep_columns}{Logical. If TRUE original data frame with new 'epp' column will be returned.} @@ -47,7 +47,7 @@ First column should correspond to a Player. Second column corresponds to indexes of Rounds As EPP is based on Elo rating system, power of Player is assessed by comparing its results with other Players on multiple Rounds. Therefore, each Player should be evaluated on multiple Rounds. Indexes of ROunds should be in this column. And should match across Players. -Third column contains Score used to evaluate models. It can be both decreasing or increasing metric. +Third column contains Score used to evaluate players. It can be both decreasing or increasing metric. just remember to set the \code{decreasing_metric} parameter accordingly. The following columns can be of any kind. diff --git a/man/print.epp_results.Rd b/man/print.epp_results.Rd new file mode 100644 index 0000000..50273d3 --- /dev/null +++ b/man/print.epp_results.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculate_elo.R +\name{print.epp_results} +\alias{print.epp_results} +\title{Printing EPP results} +\usage{ +\method{print}{epp_results}(x, ...) +} +\arguments{ +\item{x}{epp_results. The result of a function \code{\link{calculate_epp}}.} + +\item{...}{other parameters} +} +\value{ +No return value, prints the structure of the object +} +\description{ +Printing EPP results +} diff --git a/man/print.epp_test.Rd b/man/print.epp_test.Rd index d015c73..25aca67 100644 --- a/man/print.epp_test.Rd +++ b/man/print.epp_test.Rd @@ -7,7 +7,7 @@ \method{print}{epp_test}(x, ...) } \arguments{ -\item{x}{epp_results. The result of a function \code{\link{calculate_epp}}.} +\item{x}{epp_test. The result of a function \code{\link{test_players_diff}}.} \item{...}{other parameters} } diff --git a/tests/testthat/test_calculations.R b/tests/testthat/test_calculations.R index 5b4fecf..997d250 100644 --- a/tests/testthat/test_calculations.R +++ b/tests/testthat/test_calculations.R @@ -3,8 +3,8 @@ context("Testing functions calculating scores") data("auc_scores") auc_scores_short <- auc_scores[1:100,] test_that("Is epp calculated?", { - expect_is(calculate_epp(auc_scores_short, decreasing_metric = TRUE, compare_in_split = TRUE), + expect_is(calculate_epp(auc_scores_short, decreasing_metric = TRUE, compare_in_round = TRUE), "epp_results") - expect_is(calculate_epp(auc_scores_short, decreasing_metric = FALSE, compare_in_split = FALSE, keep_data = FALSE), + expect_is(calculate_epp(auc_scores_short, decreasing_metric = FALSE, compare_in_round = FALSE, keep_data = FALSE), "epp_results") })