-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbostonhousingMLapp.py
48 lines (35 loc) · 1.42 KB
/
bostonhousingMLapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#import dependencies
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split
#Load the Boston Housing Data set from sklearn.datasets and print it
from sklearn.datasets import load_boston
boston = load_boston
print(boston)
#Tranform the data set into a data frame
#datta = the data we want also known as the x values
#features_names = the column names of the data
#target = the target variable or the price of the houses also known as the y value
df_x = pd.DataFrame(boston.data, columns=boston.feature_names)
df_y = pd.DataFrame(boston.target)
#Get statistics from data set, count, mean
df_x.describe()
#Initialize the linear regression model
reg = linear_model.linearRegression()
#Split the data into 67% training and 33% testing data
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size= 0.33, random_state=42)
# Train the model with training data
reg.fit(x_train, y_train)
#Print coeffecients/weights for each feature/column of model
print(reg.coef_)
#Print predictions on test data
y_pred = reg.predict(x_test)
print(y_pred)
#Print actual values
print(y_test)
#Check the model performance/accuracy using Mean Squared Error (MSE) by numpy
print(np.mean( (y_pred-y_test)**2))
#Check the model performance/accuracy using Mean Squared Error (MSE) by sklearn
from sklearn.metrics import mean_squared_error
print( mean_squared_error(y_test, y_pred))