-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCBB Pace and Four Factors Effect on Rating
131 lines (112 loc) · 5.98 KB
/
CBB Pace and Four Factors Effect on Rating
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#https://x.com/TotallyREALSpo1/status/1795697918898286729
# Read the datasets
acc_202324 <- read.csv("C:\\Users\\imksy\\Documents\\acc sr cbb.csv")
bigEast_202324 <- read.csv("C:\\Users\\imksy\\Documents\\big east sr cbb.csv")
big12_202324 <- read.csv("C:\\Users\\imksy\\Documents\\Big 12 sports reference.csv")
sec_202324 <- read.csv("C:\\Users\\imksy\\Documents\\sec sports reference cbb.csv")
pac12_202324 <- read.csv("C:\\Users\\imksy\\Documents\\pac 12 cbb sports reference.csv")
bigTen_202324 <- read.csv("C:\\Users\\imksy\\Documents\\big ten cbb 202324.csv")
bigTen_defense_202324 <- read.csv("C:\\Users\\imksy\\Documents\\Big Ten CBB Analytics 202324.csv")
# Define the columns containing percentage values
percentage_columns <- c('eFG.', 'ORB.', 'TOV.', 'FTA.Rate', 'Opp.eFG.', 'Opp.ORB.', 'Opp.TOV.', 'Opp.FTA.Rate')
# Remove the '%' sign and convert to decimal
bigTen_defense_202324[, percentage_columns] <- lapply(bigTen_defense_202324[, percentage_columns], function(x) as.numeric(sub("%", "", x)) / 100)
# sec_202324 <- sec_202324 %>% mutate(School = ifelse(School == "Mississippi St", "Mississippi State", School))
# Define a function to process each dataset
process_dataset <- function(df, conference_name, group_id) {
# Filter logos for the specific conference
logos <- mbb_logos %>%
filter(group_id == group_id)
# Join logos with the dataset
df <- df %>%
left_join(logos, by = c("School" = "team")) %>%
mutate(
ORB_percent = ORB / (FGA - FG),
TOV_percent = TOV / (FGA + 0.475 * FTA + AST + TOV),
FTAR = FTA / FGA,
Four_Factors = eFG. + ORB_percent + FTAR - TOV_percent
)
# Calculate the correlation coefficient
correlation_coefficient_ortg_ff <- cor(df$Four_Factors, df$ORtg, use = "complete.obs")
correlation_coefficient_ortg_pace <- cor(df$Pace, df$ORtg, use = "complete.obs")
# Create the scatter plot
ggplot(df, aes(x = Four_Factors, y = ORtg)) +
geom_image(aes(image = logo), size = 0.1) +
geom_vline(xintercept = mean(df$Four_Factors, na.rm = TRUE), linetype = "dashed") +
geom_hline(yintercept = mean(df$ORtg, na.rm = TRUE), linetype = "dashed") +
labs(x = "Four Factors Aggregate", y = "Offensive Rating (Points scored per 100 possessions)",
title = paste(conference_name, "Offensive Rating vs Four Factors, 2023-24")) +
scale_y_continuous(limits = c(94.7, 125.0), breaks = seq(95.0, 125.0, by = 5.0)) +
scale_x_continuous(limits = c(0.75, 1.27), breaks = seq(0.75, 1.25, by = 0.125)) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
panel.grid.major = element_line(color = "gray", linetype = "dashed"),
legend.position = "none"
) +
annotate("text", x = 1.2, y = 95, label = paste("Correlation:", round(correlation_coefficient_ortg_ff, 5)),
hjust = 1, vjust = 0, size = 3, color = "red")
# ggplot(df, aes(x = Pace, y = ORtg)) +
# geom_image(aes(image = logo), size = 0.1) +
# geom_vline(xintercept = mean(df$Pace, na.rm = TRUE), linetype = "dashed") +
# geom_hline(yintercept = mean(df$ORtg, na.rm = TRUE), linetype = "dashed") +
# labs(x = "Pace (Possessions per 40 minutes)", y = "Offensive Rating (Points scored per 100 possessions)",
# title = paste(conference_name, "Offensive Rating vs Pace, 2023-24")) +
# scale_y_continuous(limits = c(94.7, 125.0), breaks = seq(95.0, 125.0, by = 5.0)) +
# scale_x_continuous(limits = c(60, 75), breaks = seq(60, 75, by = 3)) +
# theme_minimal() +
# theme(
# plot.title = element_text(hjust = 0.5),
# panel.grid.major = element_line(color = "gray", linetype = "dashed"),
# legend.position = "none"
# ) +
# annotate("text", x = 74, y = 95, label = paste("Correlation:", round(correlation_coefficient_ortg_pace, 5)),
# hjust = 1, vjust = 0, size = 3, color = "red")
}
#Process each dataset and generate the plots
acc_plot <- process_dataset(acc_202324, "ACC", 2)
bigEast_plot <- process_dataset(bigEast_202324, "Big East", 4)
big12_plot <- process_dataset(big12_202324, "Big 12", 8)
sec_plot <- process_dataset(sec_202324, "SEC", 23)
pac12_plot <- process_dataset(pac12_202324, "Pac-12", 21)
bigTen_plot <- process_dataset(bigTen_202324, "Big Ten", 7)
#Display the plots
print(acc_plot)
print(bigEast_plot)
print(big12_plot)
print(sec_plot)
print(pac12_plot)
print(bigTen_plot)
# Define a function to process each dataset
process_dataset_defense <- function(df, conference_name, group_id) {
# Filter logos for the specific conference
logos <- mbb_logos %>%
filter(group_id == group_id)
# Join logos with the dataset
df <- df %>%
left_join(logos, by = c("Team.Name" = "team")) %>%
mutate(
Four_Factors_Defense = Opp.eFG. + Opp.ORB. - Opp.TOV. + Opp.FTA.Rate
)
# Calculate the correlation coefficient
correlation_coefficient_drtg_ff <- cor(df$Four_Factors_Defense, df$DRtg, use = "complete.obs")
# Create the scatter plot
ggplot(df, aes(x = Four_Factors_Defense, y = DRtg)) +
geom_image(aes(image = logo), size = 0.1) +
geom_vline(xintercept = mean(df$Four_Factors_Defense, na.rm = TRUE), linetype = "dashed") +
geom_hline(yintercept = mean(df$DRtg, na.rm = TRUE), linetype = "dashed") +
labs(x = "Defensive Four Factors Aggregate", y = "Defensive Rating (Points allowed per 100 possessions)",
title = paste(conference_name, "Defensive Rating vs Defensive Four Factors, 2023-24")) +
scale_y_reverse(limits = c(116, 98), breaks = seq(115.0, 100.0, by = -3.0)) +
scale_x_reverse(limits = c(1.10, 0.8), breaks = seq(1.10, 0.8, by = -0.10)) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
panel.grid.major = element_line(color = "gray", linetype = "dashed"),
legend.position = "none"
) +
annotate("text", x = 1.05, y = 115, label = paste("Correlation:", round(correlation_coefficient_drtg_ff, 5)),
hjust = 1, vjust = 0, size = 3, color = "red")
}
bigTen_plot_Defense <- process_dataset_defense(bigTen_defense_202324, "Big Ten", 7)
print(bigTen_plot_Defense)