generated from curso-r/template-pagina-do-curso
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path12-rf-xgboost-na-nao.R
87 lines (69 loc) · 1.98 KB
/
12-rf-xgboost-na-nao.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
library(tree)
library(magrittr)
library(ggplot2)
library(tidyverse)
x <- runif(100, -1, 1)
y <- sin(x*3) + rnorm(100, sd = 0.1)
dados <- tibble(x, y)
dados %>%
ggplot(aes(x = x, y = y)) +
geom_point()
# random forest -----------------------------------------------------------
arvores <- list()
trees <- 200
tree_depth <- 4
mtry <- 1
n <- nrow(dados)
features <- setdiff(names(dados), "y")
for(i in 1:trees) {
amostra_bootstrap <- dados[sample.int(n, n, replace = TRUE), c("y", sample(features, mtry))]
arvores[[i]] <- rpart::rpart(y ~ .,
data = amostra_bootstrap,
control = rpart::rpart.control(maxdepth = tree_depth))
}
f <- function(x, arvores) {
trees <- length(arvores)
pred <- rep(0, length = length(x))
for(i in 1:trees){
pred <- pred + predict(arvores[[i]], tibble(x = x))
}
return(pred/trees)
}
dados %>%
mutate(pred_rf = f(x, arvores)) %>%
ggplot() +
geom_point(aes(x = x, y = y)) +
geom_step(aes(x = x, y = pred_rf, colour = "RF"), size = 1)
# boosting ----------------------------------------------------------------
loss <- function(y, y_hat) (y - y_hat)^2
# gradiente (G)
G <- function(y, y_hat) - 2 * (y - y_hat)
# hessiana (H)
H <- function(y, y_hat) 2
# f(x) = a + b*x
# f(x, arvores) = 0.0 + lr * arvore1 + lr * arvore2 + ... + lr * arvoreN
f <- function(x, arvores) {
r <- rep(0, length(x))
# soma as árvores (os case_whens)
for (arvore in arvores) {
r <- r + lr * predict(arvore, tibble(x = x))
}
r
}
arvores <- list()
y_hat <- 0.5
lr <- 0.5
trees <- 200
lambda <- 15
gamma <- 40
tree_depth <- 3
for (i in 1:trees) {
r <- -G(y, y_hat)/(H(y, y_hat) + lambda) # output = - G/H
arvores[[i]] <- rpart::rpart(r ~ x,
control = rpart::rpart.control(maxdepth = tree_depth))
y_hat <- f(x, arvores)
}
tibble(x = x, y = y, y_hat = y_hat) %>%
ggplot() +
geom_point(aes(x = x, y = y)) +
geom_step(aes(x = x, y = y_hat), colour = "red", size = 1)