unzip('activity.zip')
activityData <- read.csv(file="activity.csv")
stepsData<- aggregate(steps ~ date, data = activityData, FUN=sum)
barplot(stepsData$steps, names.arg=stepsData$date, ylab='No. of Steps',las=2)
The mean number of steps taken per days is:
mean(stepsData$steps)
## [1] 10766.19
The median number of steps taken per days is:
median(stepsData$steps)
## [1] 10765
stepsData.interval <- aggregate(steps ~ interval, data = activityData, FUN=mean)
plot(stepsData.interval,type="l")
stepsData.interval$interval[which.max(stepsData.interval$steps)]
## [1] 835
sum(is.na(avtivityData))
imputedActivityData <- activityData
imputedActivityData <- merge(activityData, stepsData.interval, by="interval", suffixes=c("",".temp"))
imputedActivityData$steps[is.na(activityData$steps)] <- imputedActivityData$steps.temp[is.na(activityData$steps)]
imputedActivityData <- imputedActivityData[,1:3]
stepsData.date <- aggregate(steps ~ date, data = imputedActivityData, FUN = sum)
barplot(stepsData.date$steps, names.arg=stepsData.date$date, ylab='No. of Steps',las=2)
The mean number of steps taken per days is:
mean(stepsData.date$steps)
## [1] 9563.93
The median number of steps taken per days is:
median(stepsData.date$steps)
## [1] 11215.68
Yes, there is a difference. The number of steps is not biased anymore by missing values. The mean number of steps has decreased from before, whereas the median has increased.
typeOfDay <- function(date) {
if (weekdays(as.Date(date)) %in% c("Monday", "Tuesday","Wednesday","Thursday","Friday"))
return("Weekday")
else
return("Weekend")
}
imputedActivityData$typeOfDay <- as.factor(sapply(imputedActivityData$date, typeOfDay))
stepsData.Weekday <- aggregate(steps ~ interval, data = imputedActivityData,
subset = imputedActivityData$typeOfDay == "Weekday", FUN = mean)
stepsData.Weekend <- aggregate(steps ~ interval, data = imputedActivityData,
subset = imputedActivityData$typeOfDay == "Weekend", FUN = mean)
par(mfrow=c(2,1))
plot(stepsData.Weekday, type = "l", main = "Weekday")
plot(stepsData.Weekend, type = "l", main = "Weekend")