-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWeek 1 Quiz
49 lines (37 loc) · 1.55 KB
/
Week 1 Quiz
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#Question 1
#Use Data Frame
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv",destfile = "American_Community_Survey.csv")
ACS<-read.csv("American_Community_Survey.csv")
summary(ACS)
library(dplyr)
ACS %>%
select(VAL) %>%
filter(VAL==24) %>%
count(VAL)
#Use Data Table Recommended: It's faster and easier and less code and package
data.table::fread("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv")
ACS_dt<-data.table::fread("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv")
ACS_dt[VAL==24,.N]
#Question 3
#Hard Part actually is the installation of 'xlsx' package, you may need to install rJava at first.
library(xlsx)
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FDATA.gov_NGAP.xlsx",destfile = "dat.xlsx",method = "curl")
dat<-read.xlsx("dat.xlsx",sheetIndex = 1,colIndex = 7:15,rowIndex = 18:23)
sum(dat$Zip*dat$Ext,na.rm=T)
#Question 4
install.packages("XML")
library(XML)
b_r<-xmlTreeParse("http://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Frestaurants.xml",useInternalNodes = TRUE)
rootnode<-xmlRoot(b_r)
zipcode<-xpathSApply(rootnode,"//zipcode",xmlValue)
zipcode_dt<-data.table::data.table(zipcode)
zipcode_dt[zipcode==21231,.N]
#Question 5
download.file("https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06pid.csv",destfile = "ACS.csv")
DT<-data.table::fread("ACS.csv")
head(DT)
head(DT$pwgtp15)
DT[,mean(pwgtp15),by=SEX]
system.time(mean(DT[DT$SEX==1,]$pwgtp15))
system.time(sapply(split(DT$pwgtp15,DT$SEX),mean))
system.time(DT[,mean(pwgtp15),by=SEX])