-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzip_distance.py
146 lines (117 loc) · 4.87 KB
/
zip_distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""Project: echo-distance-calculator
Developed for Echo Church
https://echo.church/
Author: Anthony Lozano
Email: [email protected]
Date: 02/13/2021
Version 0.4
Desc: This tool takes in a list of address as a text file and calculates
the longitude and latitude between the two distances"""
import json
import re
import sys
import argparse
import pandas as pd
from numpy import random
# TODO: Replace key below with proper key from Google API. This is a randomly generated filler key
APIKEY = "FOOBAR---KpZyXSvp1MWjhQ"
distance_dict = {}
zips_seen = []
df_zips_seen = pd.read_csv('datasets/input_zip_dist.csv', index_col=["Zip"])
df_input_address = pd.read_csv('datasets/input.csv', index_col=["Email"])
def check_if_california(zip_code):
"""Checks if the zip code is in California or not.
California zip codes are between 90000 and 96100
Args:
zip_code (): [description]
Returns:
boolian: if the zip code is in california or not
"""
zip_three_digits = ((zip_code[0:3]))
zip_three_digits = int(zip_three_digits)
# True = in California
if 899 < zip_three_digits < 962:
return True
# False = not in California
else:
return False
def check_address(members_address, campus):
import requests
r = requests.get(f"https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial"
f"&origins={members_address}"
f"&destinations={campus}"
f"&key={APIKEY}")
if r.status_code != 200:
print(f"HTTP status code {r.status_code} received, program terminated.")
sys.exit()
else:
try:
json_data = r.json()
if json_data.get("status") == "REQUEST_DENIED":
print("API Key is broken or google can't find the address, please fix it")
sys.exit()
else:
print(json_data.get("rows"))
except ValueError as e:
print('Error while parsing JSON response, program terminated.')
print(e)
sys.exit()
return json.dumps(json_data)
def search_new_zip(searching_zip_code, campus_array):
zip_code = {}
smallest_distance = 10000000
in_california = check_if_california(searching_zip_code)
campus_dict = {"NSJ": "1180 Murphy Ave San Jose, CA 95131",
"SSJ": "6150 Snell Ave San Jose, CA 95123",
"SUN": "1145 E. Arques Ave Sunnyvale, CA 94085",
"FRE": "48989 Milmont Dr, Fremont, CA 94538"}
if in_california is True:
for single_campus in campus_array:
campus = campus_dict.get(single_campus, "NSJ")
json_api_resp = check_address(searching_zip_code, campus)
api_data = json.loads(json_api_resp)
current_distance = api_data['rows'][0]['elements'][0]['distance']['value']
current_duration = api_data['rows'][0]['elements'][0]['duration']['value']
# current_distance = (random.randint(90,100))
# current_duration = 100
zip_code[f"{single_campus}_DIST"]=current_distance
zip_code[f"{single_campus}_TIME"]=current_duration
if current_duration < smallest_distance:
smallest_distance = current_duration
closest_campus = single_campus
zip_code["Campus"]=closest_campus
else:
zip_code = {"NSJ_DIST": None,
"NSJ_TIME": None,
"SSJ_DIST": None,
"SSJ_TIME": None,
"SUN_DIST": None,
"SUN_TIME": None,
"FRE_DIST": None,
"FRE_TIME": None,
"Campus": "NSJ"}
df = pd.DataFrame(zip_code, index=[searching_zip_code])
with open('datasets/input_zip_dist.csv', 'a') as f:
df.to_csv(f, header=False)
return zip_code.get("Campus", "NSJ")
def main():
# First step is to check how many unique zip codes are in the file
for index in range(len(df_input_address)):
each_line = df_input_address.T.loc["Address"][index]
f_zip_code = re.findall(r'.*(\d{5}).*?$', each_line)
f_zip_code = int(f_zip_code[-1])
try:
proper_campus = df_zips_seen.loc[f_zip_code]['Campus']
# No need to know the size, it will pick first entry only
proper_campus.size
proper_campus = proper_campus.iloc[0]
# Attribute error means there is one entry and it's not a data frame
except AttributeError:
pass
except KeyError:
proper_campus = search_new_zip(str(f_zip_code), ["NSJ", "SSJ", "SUN", "FRE"])
# Now we have updated the database with the newest info
member_campus = df_input_address.iloc[index]["Campus"]= proper_campus
df_input_address.to_csv("datasets/output.csv")
if __name__ == "__main__":
main()