forked from WordPress/openverse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_sample_data.sh
executable file
·131 lines (116 loc) · 4.2 KB
/
load_sample_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
set -e
WEB_SERVICE_NAME="${WEB_SERVICE_NAME:-web}"
CACHE_SERVICE_NAME="${CACHE_SERVICE_NAME:-cache}"
UPSTREAM_DB_SERVICE_NAME="${UPSTREAM_DB_SERVICE_NAME:-upstream_db}"
DB_SERVICE_NAME="${DB_SERVICE_NAME:-db}"
while getopts 'c' OPTION; do
case "$OPTION" in
c)
echo "Loading upstream DB data..."
upstream_only=true
;;
?)
echo "Loading all sample data..."
;;
esac
done
###############
# Upstream DB #
###############
# Load sample data
function load_sample_data {
docker-compose exec -T "$UPSTREAM_DB_SERVICE_NAME" bash -c "psql <<-EOF
DELETE FROM $1;
\copy $1 \
from './sample_data/sample_$1.csv' \
with (FORMAT csv, HEADER true);
EOF"
}
function verify_loaded_data {
COUNT=$(docker-compose exec -T "$UPSTREAM_DB_SERVICE_NAME" bash -c "psql -AXqt <<-EOF
SELECT COUNT(*) FROM $1;
EOF")
if [ "$COUNT" -ne 5000 ]; then
echo "Error: table $1 count differs from expected."
exit 1
fi
}
load_sample_data "image"
verify_loaded_data "image"
load_sample_data "audio"
verify_loaded_data "audio"
# Terminate the script if received flag for only upstream data
if [ "$upstream_only" = true ]; then
exit 0
fi
#######
# API #
#######
# Set up API database and upstream
docker-compose exec -T "$WEB_SERVICE_NAME" bash -c "python3 manage.py migrate --noinput"
# Create a superuser and a user for integration testing
# Not that the Python code uses 4 spaces for indentation after the tab that is stripped by <<-
docker-compose exec -T "$WEB_SERVICE_NAME" bash -c "python3 manage.py shell <<-EOF
from django.contrib.auth.models import User
usernames = ['continuous_integration', 'deploy']
for username in usernames:
if User.objects.filter(username=username).exists():
print(f'User {username} already exists')
continue
if username == 'deploy':
user = User.objects.create_superuser(username, f'{username}@example.com', 'deploy')
else:
user = User.objects.create_user(username, f'{username}@example.com', 'deploy')
user.save()
EOF"
# Load content providers
docker-compose exec -T "$DB_SERVICE_NAME" bash -c "psql <<-EOF
DELETE FROM content_provider;
INSERT INTO content_provider
(created_on, provider_identifier, provider_name, domain_name, filter_content, media_type)
VALUES
(now(), 'flickr', 'Flickr', 'https://www.flickr.com', false, 'image'),
(now(), 'stocksnap', 'StockSnap', 'https://stocksnap.io', false, 'image'),
(now(), 'freesound', 'Freesound', 'https://freesound.org/', false, 'audio'),
(now(), 'jamendo', 'Jamendo', 'https://www.jamendo.com', false, 'audio'),
(now(), 'wikimedia_audio', 'Wikimedia', 'https://commons.wikimedia.org', false, 'audio');
EOF"
#############
# Ingestion #
#############
# Ingest and index the data
just ingestion_server/ingest-upstream "audio" "init"
just docker/es/wait-for-index "audio-init"
just docker/es/wait-for-count "audio-init"
just ingestion_server/promote "audio" "init" "audio"
just docker/es/wait-for-index "audio"
just docker/es/wait-for-count "audio"
just ingestion_server/create-and-populate-filtered-index "audio" "init"
just docker/es/wait-for-index "audio-init-filtered"
just ingestion_server/point-alias "audio" "init-filtered" "audio-filtered"
just docker/es/wait-for-index "audio-filtered" "audio-init-filtered"
# Image ingestion is flaky; but usually works on the next attempt
set +e
while true; do
just ingestion_server/ingest-upstream "image" "init"
if just docker/es/wait-for-index "image-init"; then
break
fi
((c++)) && ((c == 3)) && break
done
set -e
just docker/es/wait-for-count "image-init"
just ingestion_server/promote "image" "init" "image"
just docker/es/wait-for-index "image"
just docker/es/wait-for-count "image"
just ingestion_server/create-and-populate-filtered-index "image" "init"
just docker/es/wait-for-index "image-init-filtered"
just ingestion_server/point-alias "image" "init-filtered" "image-filtered"
just docker/es/wait-for-index "image-filtered" "image-init-filtered"
#########
# Redis #
#########
# Clear source cache since it's out of date after data has been loaded
docker-compose exec -T "$CACHE_SERVICE_NAME" bash -c 'echo "del :1:sources-image" | redis-cli'
docker-compose exec -T "$CACHE_SERVICE_NAME" bash -c 'echo "del :1:sources-audio" | redis-cli'