-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcaptureData.sh
executable file
·69 lines (59 loc) · 1.6 KB
/
captureData.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/sh
term=""
term_id=""
filename="$(date -u '+%Y-%m-%dT%H:%M:%SZ').json"
if [ -f data.tar.gz ]; then
tar -xzf data.tar.gz
fi
progress=0
total_number="$(wc -l < term | xargs)"
total_number="$(($total_number - 1))" # Since the first line is the term ID
term_cols="$(tput cols)"
avail_cols="$(($term_cols - 2))"
update_progress () {
chars_to_fill="$(($progress * $avail_cols / $total_number))"
printf '['
i=0
while [ "$i" -lt "$chars_to_fill" ]; do
printf '='
i="$(($i + 1))"
done
while [ "$i" -lt "$avail_cols" ]; do
printf ' '
i="$(($i + 1))"
done
printf ']\r'
progress="$(($progress + 1))"
}
one_class=""
while IFS= read -r class; do
if [ "$term" = "" ]; then
term="$class"
term_id="$(echo "$term" | cut -d'_' -f3)"
echo "Capturing data for term $term"
else
one_class="$class"
sed "s/\\\$\\\$TERMID\\\$\\\$/$term_id/g" template.json > \
captureData_temp.json
sed -i '' "s/\\\$\\\$TERMARRAY\\\$\\\$/$term/g" captureData_temp.json
sed -i '' "s/\\\$\\\$CLASS\\\$\\\$/$class/g" captureData_temp.json
mkdir -p "./data/$term/$class/"
python3 ./scraper/__main__.py captureData_temp.json > \
"./data/$term/$class/$filename"
update_progress
sleep 0.5
fi
done < term
update_progress
printf '\n'
# Prompt user to check that this at least sort of worked
echo 'Example file:'
cat "./data/$term/$one_class/$filename"
read -p "Continue? [n] " choice
case "$choice" in
y|Y ) echo "Continuing...";;
* ) echo "Exiting. Note that cleanup may be required."; exit 1;;
esac
rm -f captureData_temp.json
tar -czf data.tar.gz data/
rm -rf data