-
Notifications
You must be signed in to change notification settings - Fork 0
/
HWineLinearRegression.R
63 lines (41 loc) · 1.51 KB
/
HWineLinearRegression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Read and Explore --------------------------------------------------------
wine = read.csv("wine.csv");
str(wine);
summary(wine)
# One Variable Regression -------------------------------------------------
model1 = lm(Price ~ AGST, data=wine);
summary(model1)
# Sum of Squared Errors
model1$residuals;
SSE = sum(model1$residuals^2);
SSE
# Two Variable Regression -------------------------------------------------
model2 = lm(Price ~ AGST + HarvestRain, data=wine);
summary(model2)
# Sum of Squared Errors - smaller better
SSE = sum(model2$residuals^2);
SSE
# All Variable Regression -------------------------------------------------
model3 = lm(Price ~ AGST + HarvestRain + WinterRain + Age + FrancePop, data=wine);
summary(model3)
# Sum of Squared Errors
SSE = sum(model3$residuals^2)
SSE
# Colinearity -------------------------------------------------------------
# Remove FrancePop
model4 = lm(Price ~ AGST + HarvestRain + WinterRain + Age, data=wine); summary(model4)
# Correlations
cor(wine$WinterRain, wine$Price); cor(wine$Age, wine$FrancePop);cor(wine)
# Remove Age and FrancePop
model5 = lm(Price ~ AGST + HarvestRain + WinterRain, data=wine);
summary(model5)
# Read Test ---------------------------------------------------------------
wineTest = read.csv("wine_test.csv");
str(wineTest)
# Make test set predictions
predictTest = predict(model4, newdata=wineTest);
predictTest
# Compute R-squared
SSE = sum((wineTest$Price - predictTest)^2);
SST = sum((wineTest$Price - mean(wine$Price))^2)
1 - SSE/SST