title | author | date | output |
---|---|---|---|
Peer Graded Assignment_1 |
Anna Litvinenko |
8/23/2017 |
html_document |
Cleaning the workspace and loading the libraries
remove(list=ls())
library(ggplot2)
library(dplyr)
Loading data assuming the zipped data was saved to the working directory
unzip("activity.zip", exdir = getwd())
activity <- read.csv("activity.csv", na.strings = "NA")
Converting dates into date format
activity$date <- as.Date(activity$date)
Aggregating the total number of steps by date and plotting with ggplot2
total.steps.day <- summarise(group_by(activity, date), steps=sum(steps))
ggplot(data=total.steps.day, aes(x=date, y=steps))+geom_bar(stat="identity")+
ggtitle("Total Number Of Steps Taken Each Day")+xlab("Date")+
ylab("Number of steps")+theme(axis.text.x=element_text(angle = 90, hjust = 0))+
scale_x_date(date_labels="%d %b %y",date_breaks ="5 days")
## Warning: Removed 8 rows containing missing values (position_stack).
![plot of chunk hist_total steps](figure/hist_total steps-1.png)
mean(total.steps.day$steps, na.rm = TRUE)
## [1] 10766.19
median(total.steps.day$steps, na.rm = TRUE)
## [1] 10765
Aggregating the average number of steps by interval and plotting with ggplot2
average.steps.interval <- summarise(group_by(activity, interval), steps=mean(
steps, na.rm = TRUE))
ggplot(data=average.steps.interval, aes(x=interval, y=steps))+geom_line()+
ggtitle("Average Number Of Steps Taken Daily")+xlab("5-minute Interval")+
ylab("Number of steps")+theme(axis.text.x=element_text(angle = 90, hjust = 0))+
scale_x_continuous(breaks = pretty(average.steps.interval$interval, n=20))
![plot of chunk timeser_average steps](figure/timeser_average steps-1.png)
average.steps.interval[average.steps.interval[,"steps"]==max(average.steps.interval$steps, na.rm=TRUE),]
## # A tibble: 1 x 2
## interval steps
## <int> <dbl>
## 1 835 206.1698
Taking a look at missing vales
sum(is.na(activity))
## [1] 2304
NAs <- subset(activity, is.na(activity))
NAs <- transform(NAs, count=1)
table(NAs$date, NAs$count)
##
## 1
## 2012-10-01 288
## 2012-10-08 288
## 2012-11-01 288
## 2012-11-04 288
## 2012-11-09 288
## 2012-11-10 288
## 2012-11-14 288
## 2012-11-30 288
8 days are completely lost. These NAs will be substituted by the mean number of steps for a particular 5-minute interval across all days
activity.complete <- activity
for(i in 1:nrow(activity.complete)){
if(is.na(activity.complete$steps[i])==TRUE){
activity.complete$steps[i] <- average.steps.interval$steps[match(activity.complete$interval[i], average.steps.interval$interval)]
}
}
Checking if the NAs are gone
sum(is.na(activity.complete))
## [1] 0
Calculating new mean and median number for complete data
total.steps.day.complete <- summarise(group_by(activity.complete, date), steps=sum(steps))
mean(total.steps.day.complete$steps)
## [1] 10766.19
median(total.steps.day.complete$steps)
## [1] 10766.19
Using imputed data, aggregating the total number of steps by date and plotting with ggplot2
total.steps.day.complete <- summarise(group_by(activity.complete, date), steps=sum(steps))
ggplot(data=total.steps.day.complete, aes(x=date, y=steps))+geom_bar(stat="identity")+
ggtitle("Total Number Of Steps Taken Each Day_Complete Data")+xlab("Date")+
ylab("Number of steps")+theme(axis.text.x=element_text(angle = 90, hjust = 0))+
scale_x_date(date_labels="%d %b %y",date_breaks ="5 days")
![plot of chunk hist_total steps_imputed](figure/hist_total steps_imputed-1.png)
8. Panel plot comparing the average number of steps taken per 5-minute interval across weekdays and weekends
Recoding dates to "weekday" and "weekend"
activity.complete$daytype <- weekdays(activity.complete$date)
activity.complete$day <- ifelse(activity.complete$daytype %in% c("Saturday", "Sunday"), "weekend", "weekday")
Aggregating the average number of steps by weekday(end) and plotting with ggplot2
average.steps.interval.day <- summarise(group_by(activity.complete, interval, day), steps=mean(
steps, na.rm = TRUE))
ggplot(data=average.steps.interval.day, aes(x=interval, y=steps))+geom_line()+
ggtitle("Average Number Of Steps Taken Daily")+xlab("5-minute Interval")+
ylab("Number of steps")+theme(axis.text.x=element_text(angle = 90, hjust = 0))+
scale_x_continuous(breaks = pretty(average.steps.interval$interval, n=20))+
facet_grid(day~.)
![plot of chunk panelplot_average steps by weekday(end)](figure/panelplot_average steps by weekday(end)-1.png)