This repository has been archived by the owner on Mar 13, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_price_increase.py
83 lines (68 loc) · 3.39 KB
/
predict_price_increase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pandas as pd
import numpy as np
import final_modeling as fm
import final_data_clean as dc
def prepare_data(X_train_raw, affected_properties, updated_affected_properties):
'''
Prepares the dataframe of affected properties to have the model applied.
Args:
X_train_raw: original training features used to fit the imputer and
feature scaler for model training data
affected_properties: raw features and outcome for affected properties
without shifted distance to subway features
updated_affected_properties: raw features and outcome for affected
properties with shifted distance to subway features
Returns:
X_pre_lightrail: processed X features for affected properties prior to
distance to subway shift
X_post_lightrail: processed X features for affected properties after
the distance to subway shift
y_true: true outcome data for affected properties prior to shift
'''
affected_properties['sale_year'] = 2016
updated_affected_properties['sale_year'] = 2016
X_pre_lightrail, y_true = fm.create_target_var(affected_properties,
'price_per_sqft')
X_post_lightrail, _ = fm.create_target_var(updated_affected_properties,
'price_per_sqft')
_, X_pre_lightrail = dc.fill_na(X_train_raw,
X_pre_lightrail)
X_train_raw, X_post_lightrail = dc.fill_na(X_train_raw,
X_post_lightrail)
_, X_pre_lightrail = dc.normalize(X_train_raw,
X_pre_lightrail)
X_train_raw, X_post_lightrail = dc.normalize(X_train_raw,
X_post_lightrail)
return X_pre_lightrail, X_post_lightrail, y_true
def make_prediction(X_pre_lightrail, X_post_lightrail, y_true, model,
bbl_col, output = "price_increase.csv"):
'''
Predicts price_per_sqft for the dataframe with original and updated subway
information, and creates Pandas dataframe with affected BBLs, the original
predicted values for price per square feet under the true features in the
data, and the new predictions for price per square feet with subway
distances reduced to 0.5 mi.
Compares original predictions, altered predictions, and original ground
truth of sale price outcomes prior to lightrail introduction.
'''
predicted_pre = model.predict(X_pre_lightrail)
predicted_post = model.predict(X_post_lightrail)
predictions = pd.DataFrame({'y_true': y_true,
'y_pred_prelightrail': predicted_pre,
'y_pred_postlightrail': predicted_post,
'bbl': bbl_col})
predictions.to_csv(output, index = False)
print("Pre and post-lightrail predictions written to {}".format(output))
return predictions
def apply_model_to_lightrail(data_with_bbl, X_train_raw, model, model_name,
output_dir = "data/results",
bbl_path = "data/subway_bbls/QueensLightrail_full1.csv"):
# Apply fitted model to affected properties near the Queens Light Rail
affected_properties, updated_properties, affected_bbls = \
dc.extract_affected_properties(data_with_bbl, bbl_path)
X_pre_lightrail, X_post_lightrail, y_true = prepare_data(X_train_raw,
affected_properties, updated_properties)
output_price_increase = "{}/price_increase_{}.csv".format(
output_dir, model_name)
return make_prediction(X_pre_lightrail, X_post_lightrail, y_true, model,
bbl_col = affected_bbls, output = output_price_increase)