forked from gdgtoledo/katangapp-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunauto-spider.sh
executable file
·134 lines (78 loc) · 3.66 KB
/
unauto-spider.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
CSV_FOLDER="csv"
mkdir -p $CSV_FOLDER
LINEAS_CSV=$CSV_FOLDER/lineas.csv
PARADAS_CSV=$CSV_FOLDER/paradas.csv
rm -fr $PARADAS_CSV
curl -s http://unauto.twa.es/code/getlineas.php | sed -n 's:.*<a href="javascript\:mostrarParadas(\(.*\)</a>.*:\1:p' | sed -e 's/)">/;/g' | sed -e "s/'//g" | sed -e "s/ //g" > $LINEAS_CSV
# Generate the JSON from the CSV
JSON_FOLDER="json"
mkdir -p $JSON_FOLDER
ROUTES_JSON=$JSON_FOLDER/routes.json
BUS_STOPS_JSON="$JSON_FOLDER/bus-stops.json"
rm -f $BUS_STOPS_JSON
echo "Generating the routes.json and the bus-stops.json from the CSV..."
echo '{ "routes": [ ' > $ROUTES_JSON
# Generate the bus stops CSV files
IFS=";"
while read routeId routeDescription
do
PARENT_FOLDER="$CSV_FOLDER/bus-stops/linea-$routeId"
OUTPUTFILE="$PARENT_FOLDER/paradas.csv"
echo "Capturing BusStops for Line [$routeId] - $routeDescription ..."
mkdir -p $PARENT_FOLDER
curl -s http://unauto.twa.es/code/getparadas.php?idl=$routeId | sed -n 's:.*<map name="imgmap" id="imgmap">\(.*\)</map>.*:\1:p' | sed -e "s/<area/;<area/g" | tr ';' '\012' | grep 'mostrarInfoParadas' | sed -n "s:.*value, '\(.*\) onmouseout.*:\1:p" | sed -e "s/')//g" | sed -e 's/"//g' | sed -e "s/::/;/g" > $OUTPUTFILE
# Creating a temporary file to write there the file
ADDRESSES_TEMP_FILE=$OUTPUTFILE".tmp"
while read idp ido
do
echo "Capturing Address for BusStop $idp with order $ido ..."
echo "$idp;$ido;`curl -s "http://unauto.twa.es/code/getparadas.php?idl=$routeId&idp=$idp&ido=$ido" | sed -n 's:.*<h3 id="titparada">Parada\: \(.*\)</h3>.*:\1:p'`, Toledo, España" >> $ADDRESSES_TEMP_FILE
done < $OUTPUTFILE
# swap files
mv $ADDRESSES_TEMP_FILE $OUTPUTFILE
# generate the route entry and its bus stops in the routes.json file
echo ' {' >> $ROUTES_JSON
echo ' "id": "'$routeId'",' >> $ROUTES_JSON
echo ' "name": "'$routeDescription'",' >> $ROUTES_JSON
echo ' "busStops": [' >> $ROUTES_JSON
IFS=";"
while read stopId stopOrder stopAddress stopLat stopLong
do
STOP_ORDER=`echo $stopOrder | sed -e "s/ //g"`
STOP_LAT=`echo $stopLat | sed -e "s/ //g"`
STOP_LONG=`echo $stopLong | sed -e "s/ //g"`
echo ' { "id": "'$stopId'", "order": "'$STOP_ORDER'" },' >> $ROUTES_JSON
DUPLICATED_REGEXP=""$stopId';'$stopAddress
DUPLICATED_COUNT=`grep "$DUPLICATED_REGEXP" $PARADAS_CSV | wc -l`
if [ $DUPLICATED_COUNT -eq 0 ]; then
echo ''$stopId';'$stopAddress';'$STOP_LAT';'$STOP_LONG >> $PARADAS_CSV
fi
if [ "$STOP_LAT" == "" ] && [ "$STOP_LONG" == "" ]; then
echo "Do not adding bus stop id ["$stopId"] until LatLong are present"
else
DUPLICATED_REGEXP="id\": \""$stopId'\", \"address\": \"'$stopAddress
DUPLICATED_COUNT=`grep "$DUPLICATED_REGEXP" $BUS_STOPS_JSON | wc -l`
if [ $DUPLICATED_COUNT -eq 0 ]; then
echo ' { "id": "'$stopId'", "address": "'$stopAddress'", "lat": "'$STOP_LAT'", "long": "'$STOP_LONG'" },' >> $BUS_STOPS_JSON
fi
fi
done < $OUTPUTFILE
cat $ROUTES_JSON | sed '$s/,$//' > $ROUTES_JSON.tmp && mv $ROUTES_JSON.tmp $ROUTES_JSON
echo ' ]' >> $ROUTES_JSON
echo ' },' >> $ROUTES_JSON
done < $LINEAS_CSV
# sort paradas file
sort $PARADAS_CSV > $PARADAS_CSV.tmp && mv $PARADAS_CSV.tmp $PARADAS_CSV
# remove last comma
cat $ROUTES_JSON | sed '$s/,$//' > $ROUTES_JSON.tmp && mv $ROUTES_JSON.tmp $ROUTES_JSON
echo ']}' >> $ROUTES_JSON
## Building bus-stops.json
# sort bus stops
sort $BUS_STOPS_JSON > $BUS_STOPS_JSON.tmp
cat $BUS_STOPS_JSON.tmp | sed '$s/,$//' > $BUS_STOPS_JSON.sort && mv $BUS_STOPS_JSON.sort $BUS_STOPS_JSON.tmp
echo '{ "busStops": [ ' > $BUS_STOPS_JSON
cat $BUS_STOPS_JSON.tmp >> $BUS_STOPS_JSON
echo ']}' >> $BUS_STOPS_JSON
rm $BUS_STOPS_JSON.tmp
echo 'routes.json and bus-stops.json generated.'