-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
64 lines (44 loc) · 2.55 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Please reference Readme.md and CodeBook.md for details on this script and its output.
# This code must be run when the working directory is set to the directory
# that contains the Samsung data (e.g. it contains README.txt, etc)
# https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip
library(dplyr) # need this library
# read in data- and label appropriately
activity_labels <- read.table("activity_labels.txt", header = FALSE, col.names=c("activity", "activity_name"))
features <-read.table("features.txt", header = FALSE)
# get indices of columns we want to examine
cols_we_want <- grep("mean|std", features$V2, ignore.case=TRUE)
saved_features <- features[cols_we_want, ] # saved for later
test_set <-read.table("test/X_test.txt", header = FALSE)
test_activity <-read.table("test/y_test.txt", header = FALSE)
test_subject <-read.table("test/subject_test.txt", header = FALSE)
train_set <-read.table("train/X_train.txt", header = FALSE)
train_activity <-read.table("train/y_train.txt" , header = FALSE)
train_subject <-read.table("train/subject_train.txt", header = FALSE)
# add column names to training and test data- to complete STEP 4
colnames(train_set) <- features[, 2]
colnames(test_set) <- features[, 2]
colnames(test_subject) <- "subject"
colnames(train_subject) <- "subject"
colnames(test_activity) <- "activity"
colnames(train_activity) <- "activity"
# combine test and train data into one big set- STEP 1 COMPLETE
combined_sets <- rbind(test_set, train_set)
# Extracts only the measurements on the mean
# and standard deviation for each measurement. STEP 2 COMPLETE
combined_sets_irrelevant_removed <-combined_sets[, cols_we_want]
# combine activity test and train activity data
combined_activity <- rbind(test_activity, train_activity)
# combine subject test and train subject data
combined_subject <- rbind(test_subject, train_subject)
#combine sets, activity, and subject together
combined_sets_final <- cbind(combined_subject, combined_activity,combined_sets_irrelevant_removed)
# add the activity names- STEP 3 COMPLETE
# Uses descriptive activity names to name the activities in the data set
labeled_table <- inner_join(activity_labels, combined_sets_final)
# groups by activity and subject- also add in name which corresponds to activity
grouped <- group_by(labeled_table, activity_name, activity, subject )
# apply mean to get summary data
final_output <- summarise_each(grouped, funs(mean(., na.rm = TRUE)))
# output summary to file- STEP 5 COMPLETE
write.table(final_output, file="summary_data_means.txt", row.names = FALSE)