forked from HealthCatalyst/healthcareai-py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Example1.py
66 lines (53 loc) · 2.06 KB
/
Example1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""This file is used to create and compare two models on a particular dataset.
It provides examples of reading from both csv and SQL Server. Note that this
example can be run as-is after installing HCPyTools. After you have
found that one of the models works well on your data, move to Example2
"""
from healthcareai import DevelopSupervisedModel
import pandas as pd
import time
def main():
t0 = time.time()
# CSV snippet for reading data into dataframe
df = pd.read_csv('healthcareai/tests/fixtures/HCPyDiabetesClinical.csv',
na_values=['None'])
# SQL snippet for reading data into dataframe
# import pyodbc
# cnxn = pyodbc.connect("""SERVER=localhost;
# DRIVER={SQL Server Native Client 11.0};
# Trusted_Connection=yes;
# autocommit=True""")
#
# df = pd.read_sql(
# sql="""SELECT
# *
# FROM [SAM].[dbo].[HCPyDiabetesClinical]""",
# con=cnxn)
#
# # Set None string to be None type
# df.replace(['None'],[None],inplace=True)
# Look at data that's been pulled in
print(df.head())
print(df.dtypes)
# Drop columns that won't help machine learning
df.drop(['PatientID','InTestWindowFLG'],axis=1,inplace=True)
# Step 1: compare two models
o = DevelopSupervisedModel(modeltype='classification',
df=df,
predictedcol='ThirtyDayReadmitFLG',
graincol='PatientEncounterID', #OPTIONAL
impute=True,
debug=False)
# Run the linear model
o.linear(cores=1)
# Run the random forest model
o.random_forest(cores=1,
tune=True)
# Look at the RF feature importance rankings
o.plot_rffeature_importance(save=False)
# Create ROC plot to compare the two models
o.plot_roc(debug=False,
save=False)
print('\nTime:\n', time.time() - t0)
if __name__ == "__main__":
main()