-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTidyRtesting.Rmd
95 lines (77 loc) · 2.17 KB
/
TidyRtesting.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
---
title: "TidyRtesting"
author: "John Ryan"
date: "12/1/2017"
output: html_document
---
```{r}
library(tidyverse)
library(tidyr)
```
## Example datsets
## command option i to insert new R block
```{r}
AirPassengers
```
```{r}
## wide format
gap_wide <- readr::read_csv('https://raw.githubusercontent.com/OHI-Science/data-science-training/master/data/gapminder_wide.csv')
## yesterdays data format
gapminder <- readr::read_csv('https://raw.githubusercontent.com/OHI-Science/data-science-training/master/data/gapminder.csv')
head(gap_wide)
## Use gather to turn `gap_wide` into long format dataset
gap_long <- gap_wide %>%
gather(key = obstype_year,
value = obs_values,
dplyr::starts_with('pop'),
dplyr::starts_with("lifeExp"),
dplyr::starts_with("gdpPercap"))
```
## separate out the year
```{r}
gap_long <- gap_wide %>%
gather(key = obstype_year,
value = obs_values,
-continent, - country) %>%
separate(obstype_year,
into = c("obs_type", "year"),
sep = "_",
convert = T)
head(gap_long)
```
## plot long format data
```{r}
life_df <- gap_long %>%
filter(obs_type == "lifeExp",
continent == "Americas")
head(life_df)
ggplot(data = life_df, aes(x = year, y = obs_values, color = country)) +
geom_line()
```
## exercise: using gap_long mean life expectancy for each continent 1982 - 2007
## hint: use the dplyr::group_by() and dplyr::summarize() functions.
```{r}
# solution (no peeking!)
gap_long %>%
group_by(continent, obs_type) %>%
summarize(means = mean(obs_values))
cont <- gap_long %>%
filter(obs_type == "lifeExp",
year > 1980) %>%
group_by(continent, year) %>%
summarize(mean_le = mean(obs_values))
ggplot(data = cont, aes(x = year, y = mean_le, color = continent)) +
geom_line() +
labs(title = "Mean life expectancy",
x = "Year",
y = "Age (years)")
## Additional customization
ggplot(data = cont, aes(x = year, y = mean_le, color = continent)) +
geom_line() +
labs(title = "Mean life expectancy",
x = "Year",
y = "Age (years)",
color = "Continent") +
theme_classic() +
scale_fill_brewer(palette = "Blues")
```