-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
130 lines (104 loc) · 4.98 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from modules.config import termid_class_map #,allmods_file_input,cst_file_input,output_path #,termids #,nontermid_path,distance_output,hour_output,fuel_output
from modules.algorithms import distance_algo,hour_algo,fuel_algo
from datetime import datetime
from haversine import haversine, Unit
from tqdm import tqdm
import pandas as pd
import argparse
import pytz
import pyreadr
import time
import warnings
warnings.filterwarnings("ignore")
tqdm.pandas()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process data from a file.')
parser.add_argument('-IA','--input_allmods', type=str, help='Path to the allmods data file.')
parser.add_argument('-IC','--input_cst', type=str, help='Path to the cst data file.')
parser.add_argument('-O','--output', type=str, help='Path to the output data folder.')
parser.add_argument('-t','--list', nargs=argparse.REMAINDER, type=int, default=[], help='Input list separated by spaces.')
args = parser.parse_args()
def timestamp_(date):
formatted_datetime = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d %H:%M:%S")
#print('T1')
return formatted_datetime
try:
df_chunks = []
for chunk in pd.read_csv(args.input_cst, chunksize=10000):
df_chunks.append(chunk)
df = pd.concat(df_chunks, ignore_index=True)
df['ts'] = df['ts'].progress_apply(timestamp_)
except:
df = pyreadr.read_r(args.input_cst)[None]
try:
mod_chunk=[]
for ch in pd.read_csv(args.input_allmods,chunksize=10000):
mod_chunk.append(ch)
mods_df = pd.concat(mod_chunk, ignore_index=True)
mods_df['strt'] = mods_df['strt'].progress_apply(timestamp_)
mods_df['end'] = mods_df['end'].progress_apply(timestamp_)
except:
mods_df = pyreadr.read_r(args.input_allmods)[None]
if len(args.list)!=0:
df = df[df['termid'].isin(args.list)]
mods_df = mods_df[mods_df['termid'].isin(args.list)]
termid_list = args.list
else:
termid_list = df['termid'].unique().tolist()
pass
def calculate_distance(row):
if row.name == 0:
return 0
prev_lat = df_1.at[row.name - 1, 'lt']
prev_lon = df_1.at[row.name - 1, 'lg']
curr_lat = row['lt']
curr_lon = row['lg']
distance = haversine((prev_lat, prev_lon), (curr_lat, curr_lon), unit=Unit.METERS)
return distance
def Utc_to_Ist(utc_time1):
utc_time = datetime.strptime(str(utc_time1), "%Y-%m-%d %H:%M:%S")
ist_timezone = pytz.timezone("Asia/Kolkata")
ist_time = utc_time.replace(tzinfo=pytz.UTC).astimezone(ist_timezone).strftime("%Y-%m-%d %H:%M:%S")
return ist_time
termid_class_map = termid_class_map
non_termid=[]
final_dist = pd.DataFrame(); final_hour=pd.DataFrame(); final_fuel=pd.DataFrame() #;before_bucket=pd.DataFrame();before_bucket_m=pd.DataFrame()
df = df[['lt','lg','ts','termid','currentFuelVolumeTank1','currentIgn','regNumb','disthav']]
mods_df = mods_df[['strt','end','lev1', 'lev2', 'fuel','termid','timediff','veh']]
df['ts'] = df['ts'].progress_apply(Utc_to_Ist)
mods_df[['strt','end']] = mods_df[['strt','end']].applymap(Utc_to_Ist)
start_time = time.time()
# just_filtered_mod = pd.DataFrame()
for i in tqdm(termid_list):
try:
df_1 = df[df['termid']==i]
df_1 = df_1.reset_index(drop=True)
df_1['Haversine_dist'] = df_1[['lt','lg']].apply(calculate_distance, axis=1)
df_1 = df_1.reset_index(drop=True)
df_1.loc[0,'Haversine_dist'] = 0
mods_df_1 = mods_df[mods_df['termid']==i]
dist_df = distance_algo(df_1,mods_df_1,i) # before_bucket_1,before_bucket_m1,
hour_df = hour_algo(df_1,mods_df_1)
fuel_df = fuel_algo(df_1,mods_df_1)
# try:
# d_df,h_df,f_df = main(df,mods_df,item)
# just_filtered_mod = just_filtered_mod.append(mods_df_1)
# before_bucket = before_bucket.append(before_bucket_1)
# before_bucket_m = before_bucket_m.append(before_bucket_m1)
final_dist = final_dist.append(dist_df)
final_hour = final_hour.append(hour_df)
final_fuel = final_fuel.append(fuel_df)
except:
non_termid.append(i)
end_time = time.time()
execution_time = end_time - start_time
final_dist.to_csv(args.output+'/Dist_data_dump.csv')
# before_bucket.to_csv(args.output+'/Before_Bucket.csv')
# before_bucket_m.to_csv(args.output+'/Before_Bucket_Mod.csv')
final_hour.to_csv(args.output+'/Hour_data_dump.csv')
final_fuel.to_csv(args.output+'/Fuel_data_dump.csv')
with open(args.output+'/nontermid_dump.txt', 'w') as file:
for i in non_termid:
file.write(str(i) + '\n')
print(f'Files saved successfully to this path: {args.output}')
print(f'Total execution time:{round(execution_time,2)} seconds')