-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab02-hierarquico.qmd
301 lines (237 loc) · 12.8 KB
/
lab02-hierarquico.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
---
title: "Séries temporais hierárquicas"
format: html
---
Séries temporais hierárquicas podem ser de três tipos:
- **Aninhada**: quando as séries são aninhadas, como por exemplo, a venda de um produto em diferentes regiões do país.
- **Cruzada**: quando as séries não são aninhadas, mas possuem uma relação entre si, como por exemplo, a venda de produtos de diferentes categorias.
- **Mista**: quando as séries são aninhadas e cruzadas, como por exemplo, a venda de produtos de diferentes categorias em diferentes regiões do país.
Exemplo de série temporal aninhada:
```{r}
library(fpp3)
library(patchwork)
tourism <- tsibble::tourism |>
mutate(State = recode(State,
`New South Wales` = "NSW",
`Northern Territory` = "NT",
`Queensland` = "QLD",
`South Australia` = "SA",
`Tasmania` = "TAS",
`Victoria` = "VIC",
`Western Australia` = "WA"
))
tourism_hts <- tourism |>
aggregate_key(State / Region, Trips = sum(Trips))
#view(as_tibble(tourism_hts))
tail(tourism_hts)
tourism_hts |>
filter(is_aggregated(Region)) |>
autoplot(Trips) +
labs(y = "Trips ('000)",
title = "Australian tourism: national and states") +
facet_wrap(vars(State), scales = "free_y", ncol = 3) +
theme(legend.position = "none")
## ----seasonStates, echo=FALSE, fig.cap="Seasonal plots for overnight trips for Queensland and the Northern Territory, and Victoria and Tasmania highlighting the contrast in seasonal patterns between northern and southern states in Australia.", fig.asp=0.5, fig.width=7, out.width="80%", message=FALSE, warning=FALSE----
tourism_hts |>
filter(State == "NT" | State == "QLD" |
State == "TAS" | State == "VIC", is_aggregated(Region)) |>
select(-Region) |>
mutate(State = factor(State, levels=c("QLD","VIC","NT","TAS"))) |>
gg_season(Trips) +
facet_wrap(vars(State), nrow = 2, scales = "free_y")+
labs(y = "Trips ('000)")
## ----tourismRegions, echo=FALSE, fig.asp=0.6, fig.cap="Domestic overnight trips from 1998 Q1 to 2017 Q4 for some selected regions.", fig.width=9, message=FALSE, warning=FALSE----
tourism_hts |>
filter(vctrs::vec_in(Region, c("North Coast NSW", "Snowy Mountains", "Hunter", "New England North West", "Alice Springs", "Darwin", "Kakadu Arnhem", "MacDonnell", "Brisbane", "Gold Coast", "Northern Outback", "Sunshine Coast", "Tropical North Queensland", "Adelaide Hills", "Murraylands", "Yorke Peninsula", "Kangaroo Island", "Ballarat", "Great Ocean Road", "High Country", "Goulburn", "Australia's Coral Coast", "Australia's Golden Outback", "Australia's North West", "Australia's North West"))) |>
autoplot() +
facet_wrap(State ~ ., scales = "free_y", ncol = 3) +
labs(y = "Trips ('000)",
title = "Australian tourism: by regions nested within states") +
theme(legend.position = "none")
```
Exemplo de série temporal cruzada:
```{r}
## ----prisongts, fig.width=9, fig.asp = .7, echo=FALSE, fig.cap="Total Australian quarterly adult prison population, disaggregated by state, by legal status, and by gender.", warning=FALSE, message=FALSE, fig.pos="b", fig.env="figure*"----
prison <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/prison_population.csv") |>
mutate(Quarter = yearquarter(Date)) |>
select(-Date) |>
as_tsibble(key = c(Gender, Legal, State, Indigenous),
index = Quarter) |>
relocate(Quarter)
prison_gts <- prison |>
aggregate_key(Gender * Legal * State, Count = sum(Count) / 1e3)
p1 <- prison_gts |>
filter(
is_aggregated(Gender),
is_aggregated(Legal),
is_aggregated(State)
) |>
autoplot(Count) +
labs(y = "Number of prisoners ('000)",
title = "Prison population: Total")
p2 <- prison_gts |>
filter(
(!is_aggregated(Gender)) +
(!is_aggregated(Legal)) +
(!is_aggregated(State)) == 1) |>
mutate(
disaggregator = case_when(
!is_aggregated(Gender) ~ "Gender",
!is_aggregated(Legal) ~ "Legal",
!is_aggregated(State) ~ "State"
),
value = case_when(
!is_aggregated(Gender) ~ as.character(Gender),
!is_aggregated(Legal) ~ as.character(Legal),
!is_aggregated(State) ~ as.character(State)
),
series = paste(disaggregator, value, sep = "/")
) |>
ggplot(aes(x = Quarter, y = Count, colour = series)) +
geom_line() +
labs(y = "Number of prisoners ('000)") +
facet_wrap(vars(disaggregator), scales = "free_y")
p1 / p2
```
Exemplo de série temporal mista:
```{r}
## ----mixed, echo=TRUE-------------------------------------------------------------------------------------
tourism_full <- tourism |>
aggregate_key((State/Region) * Purpose, Trips = sum(Trips))
## ----mixed-purpose, fig.width=10, fig.asp = 0.6, echo=FALSE, fig.cap="Australian domestic overnight trips from 1998 Q1 to 2017 Q4 disaggregated by purpose of travel.", message=FALSE, warning=FALSE, dependson="mixed",fig.env="figure*"----
tourism_full |>
filter(is_aggregated(State), is_aggregated(Region), !is_aggregated(Purpose)) |>
ggplot(aes(x = Quarter, y = Trips,
group = as.character(Purpose), colour = as.character(Purpose))
) +
stat_summary(fun = sum, geom = "line") +
facet_wrap(~ as.character(Purpose), scales = "free_y", nrow = 2) +
labs(title = "Australian tourism: by purpose of travel",
y = "Trips ('000)") +
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(colour = guide_legend("Purpose"))
## ----mixed-state-purpose, fig.width=10, fig.asp = 0.6, echo=FALSE, fig.cap="Australian domestic overnight trips over the period 1998 Q1 to 2017 Q4 disaggregated by purpose of travel and by state.", message=FALSE, warning=FALSE, dependson="mixed",fig.env="figure*"----
tourism_full |>
filter(!is_aggregated(State), is_aggregated(Region), !is_aggregated(Purpose)) |>
ggplot(aes(x = Quarter, y = Trips,
group = as.character(Purpose), colour = as.character(Purpose))
) +
stat_summary(fun = sum, geom = "line") +
facet_wrap(~ as.character(State), scales = "free_y", nrow = 2) +
labs(title = "Australian tourism: by purpose of travel and state",
y = "Trips ('000)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(colour = guide_legend("Purpose"))
```
## Método Bottom-Up (BU)
O método Bottom-Up (BU) consiste em prever as séries temporais de menor nível e depois agregá-las para obter as previsões das séries de maior nível. Esse método é útil quando as séries temporais de menor nível são mais fáceis de prever do que as séries de maior nível. Por exemplo, as séries temporais de menor nível podem ser mais estáveis e menos voláteis do que as séries de maior nível. Nesse caso, o método BU pode produzir previsões mais precisas do que o método Top-Down (TD).
```{r}
## ----tourism_states, message=FALSE------------------------------------------------------------------------
tourism_states <- tourism |>
aggregate_key(State, Trips = sum(Trips))
## ----bu_by_hand, message=FALSE----------------------------------------------------------------------------
fcasts_state <- tourism_states |>
filter(!is_aggregated(State)) |>
model(ets = ETS(Trips)) |>
forecast()
# Sum bottom-level forecasts to get top-level forecasts
fcasts_national <- fcasts_state |>
summarise(value = sum(Trips), .mean = mean(value))
```
Usando a função `reconcile()`:
```{r}
## ----bottom_up, message=FALSE-----------------------------------------------------------------------------
tourism_states |>
model(ets = ETS(Trips)) |>
reconcile(bu = bottom_up(ets)) |>
forecast()
```
Meta-código para o método Bottom-Up:
```r
data |>
aggregate_key() |>
model() |>
reconcile() |>
forecast()
```
## Método Top-Down (TD)
O método Top-Down (TD) consiste em prever as séries temporais de maior nível e depois desagregá-las para obter as previsões das séries de menor nível. Esse método é útil quando as séries temporais de maior nível são mais fáceis de prever do que as séries de menor nível. Por exemplo, as séries temporais de maior nível podem ser mais estáveis e menos voláteis do que as séries de menor nível. Nesse caso, o método TD pode produzir previsões mais precisas do que o método Bottom-Up (BU).
Existem várias técnicas top-down, incluindo:
- **Proporção média**: a proporção é calculada fazendo-se a razão entre a série de menor nível e a série de maior nível, e depois tomando-se a média. No R, isso é implementado usando `method = "average_proportions"` da função `top_down()`.
- **Proporção da média**: a proporção é calculada fazendo-se a razão entre a média das séries de menor nível e a média das séries de maior nível. No R, isso é implementado usando `method = "proportion_averages"` da função `top_down()`.
Existe uma terceira técnica que consiste em calcular as proporções a partir dos forecasts. No R, isso é implementado usando `method = "forecast_proportions"` da função `top_down()`. Essa técnica pode produzir previsões mais precisas do que as duas primeiras técnicas, mas é mais difícil de implementar.
Finalmente, existem técnicas que combinam BU e TD. Por exemplo, podemos usar o método BU para prever as séries temporais de menor nível e depois usar o método TD para prever as séries temporais de maior nível. No R, isso é implementado usando a função `middle_out()`
## Minimum Trace (MinT)
O método Minimum Trace (MinT) consiste em minimizar a variância total das previsões dentro do grupo de previsões coerentes. O método MinT é implementado no R usando a função `min_trace()`, com diversas variações ("wsl_struct", "mint_cov", "mint_shrink", "ols").
Do livro do Hyndman:
> In summary, unlike any other existing approach, the optimal reconciliation forecasts are generated using all the information available within a hierarchical or a grouped structure. This is important, as particular aggregation levels or groupings may reveal features of the data that are of interest to the user and are important to be modelled. These features may be completely hidden or not easily identifiable at other levels.
> For example, consider the Australian tourism data introduced in Section 11.1, where the hierarchical structure followed the geographic division of a country into states and regions. Some areas will be largely summer destinations, while others may be winter destinations. We saw in Figure 11.4 the contrasting seasonal patterns between the northern and the southern states. These differences will be smoothed at the country level due to aggregation.
```{r}
tourism_full <- tourism |>
aggregate_key((State/Region) * Purpose, Trips = sum(Trips))
fit <- tourism_full |>
filter(year(Quarter) <= 2015) |>
model(base = ETS(Trips)) |>
reconcile(
bu = bottom_up(base),
ols = min_trace(base, method = "ols"),
mint = min_trace(base, method = "mint_shrink"),
)
fc <- fit |> forecast(h = "2 years")
fc |>
filter(is_aggregated(Region), is_aggregated(Purpose)) |>
autoplot(
tourism_full |> filter(year(Quarter) >= 2011),
level = NULL
) +
labs(y = "Trips ('000)") +
facet_wrap(vars(State), scales = "free_y")
fc |>
filter(is_aggregated(State), !is_aggregated(Purpose)) |>
autoplot(
tourism_full |> filter(year(Quarter) >= 2011),
level = NULL
) +
labs(y = "Trips ('000)") +
facet_wrap(vars(Purpose), scales = "free_y")
## ----tourism-evaluation, echo=FALSE, message=FALSE, warning=FALSE, dependson="tourismfc"------------------
tab <- matrix(NA, ncol = 8, nrow = 6)
rownames(tab) <- c("Total", "Purpose", "State", "Regions", "Bottom", "All series")
colnames(tab) <- c("Base", "Bottom-up", "MinT", "OLS", "Base", "Bottom-up", "MinT", "OLS")
filter_tab <- matrix(NA, ncol = 1, nrow = 6)
filter_tab[1] <- "fc |> filter(is_aggregated(State),is_aggregated(Region),is_aggregated(Purpose))"
filter_tab[2] <- "fc |>
filter(is_aggregated(State),is_aggregated(Region),!is_aggregated(Purpose))"
filter_tab[3] <- "fc |> filter(!is_aggregated(State),is_aggregated(Region),is_aggregated(Purpose))"
filter_tab[4] <- "fc |>
filter(!is_aggregated(State),!is_aggregated(Region),is_aggregated(Purpose))"
filter_tab[5] <- "fc |> filter(!is_aggregated(State),!is_aggregated(Region),!is_aggregated(Purpose))"
filter_tab[6] <- "fc"
for (i in 1:6) {
err <- eval(parse(text = filter_tab[i])) |>
accuracy(
data = tourism_full,
measures = list(rmse = RMSE, mase = MASE)
) |>
group_by(.model) |>
summarise(rmse = mean(rmse), mase = mean(mase))
tab[i, ] <- cbind(t(err[, 2]), t(err[, 3]))
}
view(tab)
## ----fcaccuracy2, message=FALSE, dependson="tourismfc"----------------------------------------------------
fc |>
filter(is_aggregated(State), is_aggregated(Purpose)) |>
accuracy(
data = tourism_full,
measures = list(rmse = RMSE, mase = MASE)
) |>
group_by(.model) |>
summarise(rmse = mean(rmse), mase = mean(mase))
fc |>
filter(is_aggregated(Purpose)) |>
accuracy(
data = tourism_full,
measures = list(rmse = RMSE, mase = MASE)
) |>
tidyr::pivot_wider(names_from = .model, values_from = c(rmse, mase))
```