This repository has been archived by the owner on Aug 16, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathnancy_run.sh
executable file
·2565 lines (2378 loc) · 91.3 KB
/
nancy_run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
#
# 2018–2019 © Nikolay Samokhvalov [email protected]
# 2018–2019 © Postgres.ai
#
# Perform a single run of a database experiment
# Usage: use 'nancy run help' or see the corresponding code below.
#
# Globals (some of them can be modified below)
KB=1024
DEBUG=false
NO_OUTPUT=false
CURRENT_TS=$(date +%Y%m%d_%H%M%S%N_%Z)
DOCKER_MACHINE="nancy-$CURRENT_TS"
DOCKER_MACHINE="${DOCKER_MACHINE//_/-}"
KEEP_ALIVE=0
DURATION_WRKLD=""
VERBOSE_OUTPUT_REDIRECT=" > /dev/null"
STDERR_DST="/dev/null"
EBS_SIZE_MULTIPLIER=5
POSTGRES_VERSION_DEFAULT=12
AWS_BLOCK_DURATION=0
MSG_PREFIX=""
declare -a RUNS # i - delta_config i+1 delta_ddl_do i+2 delta_ddl_undo
#######################################
# Attach an EBS volume containing the database backup (made with pg_basebackup)
# Globals:
# DOCKER_MACHINE, AWS_REGION, DB_EBS_VOLUME_ID
# Arguments:
# None
# Returns:
# None
#######################################
function attach_db_ebs_drive() {
docker-machine ssh $DOCKER_MACHINE "sudo sh -c \"mkdir /home/backup\""
docker-machine ssh $DOCKER_MACHINE "wget http://s3.amazonaws.com/ec2metadata/ec2-metadata"
docker-machine ssh $DOCKER_MACHINE "chmod u+x ec2-metadata"
local instance_id=$(docker-machine ssh $DOCKER_MACHINE ./ec2-metadata -i)
instance_id=${instance_id:13}
local attach_result=$(aws --region=$AWS_REGION ec2 attach-volume \
--device /dev/xvdc --volume-id $DB_EBS_VOLUME_ID --instance-id $instance_id)
sleep 10
docker-machine ssh $DOCKER_MACHINE sudo mount /dev/xvdc /home/backup
dbg $(docker-machine ssh $DOCKER_MACHINE "sudo df -h /dev/xvdc")
}
#######################################
# Print an error/warning/notice message to STDERR
# Globals:
# None
# Arguments:
# (text) Error message
# Returns:
# None
#######################################
function err() {
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')] $@" >&2
}
#######################################
# Print a debug-level message to STDOUT
# Globals:
# DEBUG
# Arguments:
# (text) Message
# Returns:
# None
#######################################
function dbg() {
if $DEBUG ; then
msg "DEBUG: $@"
fi
}
#######################################
# Print values of parameters variables for a debug
# Globals:
# All cli parameters variables
# Arguments:
# (text) Message
# Returns:
# None
#######################################
function dbg_cli_parameters() {
START_PARAMS="--run-on: ${RUN_ON}
--container-id: ${CONTAINER_ID}
--pg-version: ${PG_VERSION}
--pg-config: ${PG_CONFIG}
--pg-config_auto: ${PG_CONFIG_AUTO}
--db-prepared-snapshot: ${DB_PREPARED_SNAPSHOT}
--db-dump: ${DB_DUMP}
--db-pgbench: '${DB_PGBENCH}'
--db-ebs-volume-id: ${DB_EBS_VOLUME_ID}
--db-local-pgdata: ${DB_LOCAL_PGDATA}
--pgdata-dir: ${PGDATA_DIR}
--db-name: ${DB_NAME}
--db-expose-port: ${DB_EXPOSE_PORT}
--commands-after-container-init: ${COMMANDS_AFTER_CONTAINER_INIT}
--sql-before-db-restore: ${SQL_BEFORE_DB_RESTORE}
--sql-after-db-restore: ${SQL_AFTER_DB_RESTORE}
--workload-custom-sql: ${WORKLOAD_CUSTOM_SQL}
--workload-pgbench: '${WORKLOAD_PGBENCH}'
--workload-real: ${WORKLOAD_REAL}
--workload-real-replay-speed: ${WORKLOAD_REAL_REPLAY_SPEED}
--workload-basis: ${WORKLOAD_BASIS}
--delta-sql_do: ${DELTA_SQL_DO}
--delta-sql_undo: ${DELTA_SQL_UNDO}
--delta-config: ${DELTA_CONFIG}
--aws-ec2-type: ${AWS_EC2_TYPE}
--aws-keypair-name: $AWS_KEYPAIR_NAME
--aws-ssh-key-path: $AWS_SSH_KEY_PATH
--aws-ebs_volume_size: ${AWS_EBS_VOLUME_SIZE}
--aws-region: ${AWS_REGION}
--aws-zone: ${AWS_ZONE}
--aws-block-duration: ${AWS_BLOCK_DURATION}
--aws-zfs: ${AWS_ZFS}
--s3-cfg-path: ${S3_CFG_PATH}
--no-perf: ${NO_PERF}
--debug: ${DEBUG}
--keep-alive: ${KEEP_ALIVE}
--tmp-path: ${TMP_PATH}
--artifacts-destination: ${ARTIFACTS_DESTINATION}
--artifacts-dirname: ${ARTIFACTS_DIRNAME}
"
if $DEBUG ; then
echo -e "Run params:
$START_PARAMS"
fi
}
#######################################
# Print an message to STDOUT
# Globals:
# None
# Arguments:
# (text) Message
# Returns:
# None
#######################################
function msg() {
if ! $NO_OUTPUT; then
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')] $@"
fi
}
#######################################
# Print an message to STDOUT without timestamp
# Globals:
# None
# Arguments:
# (text) Message
# Returns:
# None
#######################################
function msg_wo_dt() {
if ! $NO_OUTPUT; then
echo "$@"
fi
}
#######################################
# Check path to file/directory.
# Globals:
# None
# Arguments:
# (text) name of the variable holding the
# file path (starts with 'file://' or 's3://') or any string
# Returns:
# (integer) for input starting with 's3://' always returns 0
# for 'file://': 0 if file exists locally, error if it doesn't
# 1 if the input is empty,
# -1 otherwise.
#######################################
function check_path() {
if [[ -z $1 ]]; then
return 1
fi
eval path=\$$1
if [[ $path =~ "s3://" ]]; then
dbg "$1 looks like a S3 file path. Warning: Its presence will not be checked!"
return 0 # we do not actually check S3 paths at the moment
elif [[ $path =~ "file://" ]]; then
dbg "$1 looks like a local file path."
path=${path/file:\/\//}
if [[ -f $path ]]; then
dbg "$path found."
eval "$1=\"$path\"" # update original variable
return 0 # file found
else
err "ERROR: File '$path' has not been found locally."
exit 1
fi
else
dbg "Value of $1 is not a file path. Use its value as a content."
return 255
fi
}
#######################################
# Validate CLI parameters
# Globals:
# Variables related to all CLI parameters
# Arguments:
# None
# Returns:
# None
#######################################
function check_cli_parameters() {
### Check path|value variables for empty value ###
([[ ! -z ${DELTA_SQL_DO+x} ]] && [[ -z $DELTA_SQL_DO ]]) && unset -v DELTA_SQL_DO
([[ ! -z ${DELTA_SQL_UNDO+x} ]] && [[ -z $DELTA_SQL_UNDO ]]) && unset -v DELTA_SQL_UNDO
([[ ! -z ${DELTA_CONFIG+x} ]] && [[ -z $DELTA_CONFIG ]]) && unset -v DELTA_CONFIG
([[ ! -z ${WORKLOAD_REAL+x} ]] && [[ -z $WORKLOAD_REAL ]]) && unset -v WORKLOAD_REAL
([[ ! -z ${WORKLOAD_BASIS+x} ]] && [[ -z $WORKLOAD_BASIS ]]) && unset -v WORKLOAD_BASIS
([[ ! -z ${WORKLOAD_CUSTOM_SQL+x} ]] && [[ -z $WORKLOAD_CUSTOM_SQL ]]) && unset -v WORKLOAD_CUSTOM_SQL
([[ ! -z ${WORKLOAD_PGBENCH+x} ]] && [[ -z $WORKLOAD_PGBENCH ]]) && unset -v WORKLOAD_PGBENCH
([[ ! -z ${DB_DUMP+x} ]] && [[ -z $DB_DUMP ]]) && unset -v DB_DUMP
([[ ! -z ${DB_PGBENCH+x} ]] && [[ -z $DB_PGBENCH ]]) && unset -v DB_PGBENCH
([[ ! -z ${COMMANDS_AFTER_CONTAINER_INIT+x} ]] && [[ -z $COMMANDS_AFTER_CONTAINER_INIT ]]) && unset -v COMMANDS_AFTER_CONTAINER_INIT
([[ ! -z ${SQL_BEFORE_DB_RESTORE+x} ]] && [[ -z $SQL_BEFORE_DB_RESTORE ]]) && unset -v SQL_BEFORE_DB_RESTORE
([[ ! -z ${SQL_AFTER_DB_RESTORE+x} ]] && [[ -z $SQL_AFTER_DB_RESTORE ]]) && unset -v SQL_AFTER_DB_RESTORE
([[ ! -z ${AWS_ZONE+x} ]] && [[ -z $AWS_ZONE ]]) && unset -v AWS_ZONE
([[ ! -z ${CONFIG+x} ]] && [[ -z $CONFIG ]]) && unset -v CONFIG
### CLI parameters checks ###
if [[ "${RUN_ON}" == "aws" ]]; then
if [ ! -z ${CONTAINER_ID+x} ]; then
err "ERROR: Container ID may be specified only for local runs ('--run-on localhost')."
exit 1
fi
if [[ ! -z ${DB_LOCAL_PGDATA+x} ]]; then
err "ERROR: --db-local-pgdata may be specified only for local runs ('--run-on localhost')."
exit 1
fi
if [[ ! -z ${PGDATA_DIR+x} ]]; then
err "ERROR: --db-local-pgdata may be specified only for local runs ('--run-on localhost')."
exit 1
fi
if [[ -z ${AWS_KEYPAIR_NAME+x} ]] || [[ -z ${AWS_SSH_KEY_PATH+x} ]]; then
err "ERROR: AWS keypair name and SSH key file must be specified to run on AWS EC2."
exit 1
else
check_path AWS_SSH_KEY_PATH
fi
if [[ -z ${AWS_EC2_TYPE+x} ]]; then
err "ERROR: AWS EC2 Instance type is not specified."
exit 1
fi
if [[ -z ${AWS_REGION+x} ]]; then
err "NOTICE: AWS EC2 region is not specified. 'us-east-1' will be used."
AWS_REGION='us-east-1'
fi
if [[ -z ${AWS_ZONE+x} ]]; then
err "NOTICE: AWS EC2 zone is not specified. Will be determined during the price optimization process."
fi
if [[ -z ${AWS_ZFS+x} ]]; then
err "NOTICE: Ext4 will be used for PGDATA."
else
err "NOTICE: ZFS will be used for PGDATA."
fi
if [[ -z ${AWS_BLOCK_DURATION+x} ]]; then
# See https://aws.amazon.com/en/blogs/aws/new-ec2-spot-blocks-for-defined-duration-workloads/
err "NOTICE: EC2 spot block duration is not specified. Will use 60 minutes."
AWS_BLOCK_DURATION=60
else
case $AWS_BLOCK_DURATION in
0|60|120|240|300|360)
dbg "Container life time duration is $AWS_BLOCK_DURATION."
;;
*)
err "ERROR: The value of '--aws-block-duration' is invalid: $AWS_BLOCK_DURATION. Choose one of the following: 60, 120, 180, 240, 300, or 360."
exit 1
;;
esac
fi
if [[ ! -z ${AWS_EBS_VOLUME_SIZE+x} ]]; then
re='^[0-9]+$'
if ! [[ $AWS_EBS_VOLUME_SIZE =~ $re ]] ; then
err "ERROR: --aws-ebs-volume-size must be integer."
exit 1
fi
else
if [[ ! ${AWS_EC2_TYPE:0:2} == 'i3' ]]; then
err "NOTICE: EBS volume size is not given, will be calculated based on the dump file size (might be not enough)."
msg "It is recommended to specify EBS volume size explicitly (CLI option '--aws-ebs-volume-size')."
fi
fi
elif [[ "${RUN_ON}" == "localhost" ]]; then
if [[ ! -z ${CONTAINER_ID+x} ]] && [[ ! -z ${DB_LOCAL_PGDATA+x} ]]; then
err "ERROR: Both --container-id and --db-local-pgdata are provided. Cannot use --db-local-pgdata with existing container."
exit 1
fi
if [[ ! -z ${PGDATA_DIR+x} ]] && [[ ! -z ${DB_LOCAL_PGDATA+x} ]]; then
err "ERROR: Both --pgdata-dir and --db-local-pgdata are provided. Cannot use --pgdata-dir with existing PGDATA path specified by --db-local-pgdata."
exit 1
fi
if [[ ! -z ${AWS_KEYPAIR_NAME+x} ]] || [[ ! -z ${AWS_SSH_KEY_PATH+x} ]] ; then
err "ERROR: Options '--aws-keypair-name' and '--aws-ssh-key-path' may be used only with '--run-on aws'."
exit 1
fi
if [[ ! -z ${AWS_EC2_TYPE+x} ]]; then
err "ERROR: Option '--aws-ec2-type' may be used only with '--run-on aws'."
exit 1
fi
if [[ ! -z ${AWS_EBS_VOLUME_SIZE+x} ]]; then
err "ERROR: Option '--aws-ebs-volume-size' may be used only with '--run-on aws'."
exit 1
fi
if [[ ! -z ${AWS_REGION+x} ]]; then
err "ERROR: Option '--aws-region' may be used only with '--run-on aws'."
exit 1
fi
if [[ ! -z ${AWS_ZONE+x} ]]; then
err "ERROR: Option '--aws-zone' may be used only with '--run-on aws'."
exit 1
fi
if [[ ! -z ${AWS_ZFS+x} ]]; then
err "ERROR: Option '--aws-zfs' may be used only with '--run-on aws'."
exit 1
fi
if [[ "$AWS_BLOCK_DURATION" != "0" ]]; then
err "ERROR: Option '--aws-block-duration' may be used only with '--run-on aws'."
exit 1
fi
else
err "ERROR: The value for option '--run-on' is invalid: ${RUN_ON}"
exit 1
fi
if [[ -z ${PG_VERSION+x} ]]; then
err "NOTICE: The Postgres version is not specified. The default will be used: ${POSTGRES_VERSION_DEFAULT}."
PG_VERSION="$POSTGRES_VERSION_DEFAULT"
fi
if [[ "$PG_VERSION" = "9.6" ]]; then
CURRENT_LSN_FUNCTION="pg_current_xlog_location()"
else
CURRENT_LSN_FUNCTION="pg_current_wal_lsn()"
fi
if [[ -z ${TMP_PATH+x} ]]; then
TMP_PATH="/tmp"
err "NOTICE: The directory for temporary files is not specified. Default will be used: ${TMP_PATH}."
fi
# create $TMP_PATH directory if not found, then create a subdirectory
if [[ ! -d $TMP_PATH ]]; then
mkdir $TMP_PATH
fi
TMP_PATH=$(mktemp -u -d "${TMP_PATH}"/nancy_run_"$(date '+%Y%m%d_%H%M%S')_XXXXX")
if [[ ! -d $TMP_PATH ]]; then
mkdir $TMP_PATH
fi
dbg "NOTICE: Switched to a new sub-directory in the tmp directory: $TMP_PATH"
workloads_count=0
[[ ! -z ${WORKLOAD_BASIS+x} ]] && let workloads_count=$workloads_count+1
[[ ! -z ${WORKLOAD_REAL+x} ]] && let workloads_count=$workloads_count+1
[[ ! -z ${WORKLOAD_CUSTOM_SQL+x} ]] && let workloads_count=$workloads_count+1
[[ ! -z ${WORKLOAD_PGBENCH+x} ]] && let workloads_count=$workloads_count+1
if [[ "$workloads_count" -eq "0" ]]; then
err "ERROR: The workload is not defined."
exit 1
fi
if [[ $workloads_count > 1 ]]; then
err "ERROR: Too many kinds of workload are specified. Please specify only one."
exit 1
fi
objects_count=0
[[ ! -z ${DB_PREPARED_SNAPSHOT+x} ]] && let objects_count=$objects_count+1
[[ ! -z ${DB_DUMP+x} ]] && let objects_count=$objects_count+1
[[ ! -z ${DB_PGBENCH+x} ]] && let objects_count=$objects_count+1
[[ ! -z ${DB_EBS_VOLUME_ID+x} ]] && let objects_count=$objects_count+1
[[ ! -z ${DB_LOCAL_PGDATA+x} ]] && let objects_count=$objects_count+1
if [[ "$objects_count" -eq "0" ]]; then
err "ERROR: The object (database) is not defined."
exit 1
fi
if [[ $objects_count > 1 ]]; then
err "ERROR: Too many objects (ways to get PGDATA) are specified. Please specify only one."
exit 1
fi
if [[ ! -z ${DB_DUMP+x} ]]; then
check_path DB_DUMP
if [[ "$?" -ne "0" ]]; then
echo "$DB_DUMP" > $TMP_PATH/db_dump_tmp.sql
DB_DUMP="$TMP_PATH/db_dump_tmp.sql"
fi
DB_DUMP_FILENAME=$(basename $DB_DUMP)
DB_DUMP_EXT=${DB_DUMP_FILENAME##*.}
fi
if [[ -z ${DB_NAME+x} ]]; then
dbg "NOTICE: Database name is not given. Will use 'test'"
DB_NAME='test'
fi
if [[ -z ${DB_EXPOSE_PORT+x} ]]; then
DB_EXPOSE_PORT=""
else
DB_EXPOSE_PORT="-p $DB_EXPOSE_PORT:5432"
fi
if [[ -z ${PG_CONFIG+x} ]]; then
if [[ -z ${PG_CONFIG_AUTO+x} ]]; then
err "NOTICE: No PostgreSQL config is provided. Default will be used."
else
msg "Postgres config will be auto-tuned."
fi
else
check_path PG_CONFIG
if [[ "$?" -ne "0" ]]; then # TODO(NikolayS) support file:// and s3://
#err "WARNING: Value given as pg_config: '$PG_CONFIG' not found as file will use as content"
echo "$PG_CONFIG" > $TMP_PATH/pg_config_tmp.conf
PG_CONFIG="$TMP_PATH/pg_config_tmp.conf"
fi
fi
if [[ ! -z ${CONFIG+x} ]]; then # get config options from yml config file
#fill runs config
check_path CONFIG
if [[ "$?" -ne "0" ]]; then
err "ERROR: Runs config YML file not found."
exit 1;
fi
# load and parse file
source ${BASH_SOURCE%/*}/tools/parse_yaml.sh $CONFIG "yml_"
# preload runs config data
i=0
while : ; do
var_name_config="yml_run_"$i"_delta_config"
delta_config=$(eval echo \$$var_name_config)
delta_config=$(echo $delta_config | tr ";" "\n")
var_name_ddl_do="yml_run_"$i"_delta_ddl_do"
delta_ddl_do=$(eval echo \$$var_name_ddl_do)
var_name_ddl_undo="yml_run_"$i"_delta_ddl_undo"
delta_ddl_undo=$(eval echo \$$var_name_ddl_undo)
[[ -z $delta_config ]] && [[ -z $delta_ddl_do ]] && [[ -z $delta_ddl_undo ]] && break;
let j=$i*3
RUNS[$j]="$delta_config"
[[ -z $delta_config ]] && RUNS[$j]=""
RUNS[$j+1]="$delta_ddl_do"
[[ -z $delta_ddl_do ]] && RUNS[$j+1]=""
RUNS[$j+2]="$delta_ddl_undo"
[[ -z $delta_ddl_undo ]] && RUNS[$j+2]=""
let i=i+1
done
# validate runs config
runs_count=${#RUNS[*]}
let runs_count=runs_count/3
dbg "YML runs config count: $runs_count"
if [[ "$runs_count" -eq "0" ]] ; then
err "ERROR: Runs config YML file do not content valid configs."
exit 1;
fi
i=0
while : ; do
let j=$i*3
let d=$j+1
let u=$j+2
delta_config=${RUNS[$j]}
delta_ddl_do=${RUNS[$d]}
delta_ddl_undo=${RUNS[$u]}
if (\
([[ -z $delta_ddl_do ]] && [[ ! -z $delta_ddl_undo ]]) \
|| ([[ ! -z $delta_ddl_do ]] && [[ -z $delta_ddl_undo ]])
); then
err "ERROR: if 'delta_ddl_do' is specified in YML run config, 'delta_ddl_undo' must be also specified, and vice versa."
exit 1;
fi
if [[ ! -z "$delta_config" ]]; then
check_path delta_config
if [[ "$?" -ne "0" ]]; then
echo "$delta_config" > $TMP_PATH/target_config_tmp_$i.conf
RUNS[$j]="$TMP_PATH/target_config_tmp_$i.conf"
fi
fi
if [[ ! -z "$delta_ddl_do" ]]; then
check_path delta_ddl_do
if [[ "$?" -ne "0" ]]; then
echo "$delta_ddl_do" > $TMP_PATH/target_ddl_do_tmp_$i.sql
RUNS[$d]="$TMP_PATH/target_ddl_do_tmp_$i.sql"
fi
fi
if [[ ! -z "$delta_ddl_undo" ]]; then
check_path delta_ddl_undo
if [[ "$?" -ne "0" ]]; then
echo "$delta_ddl_undo" > $TMP_PATH/target_ddl_undo_tmp_$i.sql
RUNS[$u]="$TMP_PATH/target_ddl_undo_tmp_$i.sql"
fi
fi
let i=$i+1
[[ "$i" -eq "$runs_count" ]] && break;
done
else # get config params from options
if ( \
([[ -z ${DELTA_SQL_UNDO+x} ]] && [[ ! -z ${DELTA_SQL_DO+x} ]]) \
|| ([[ -z ${DELTA_SQL_DO+x} ]] && [[ ! -z ${DELTA_SQL_UNDO+x} ]])
); then
err "ERROR: if '--delta-sql-do' is specified, '--delta-sql-undo' must be also specified, and vice versa."
exit 1;
fi
if [[ ! -z ${DELTA_SQL_DO+x} ]]; then
check_path DELTA_SQL_DO
if [[ "$?" -ne "0" ]]; then
echo "$DELTA_SQL_DO" > $TMP_PATH/target_ddl_do_tmp.sql
DELTA_SQL_DO="$TMP_PATH/target_ddl_do_tmp.sql"
fi
fi
if [[ ! -z ${DELTA_SQL_UNDO+x} ]]; then
check_path DELTA_SQL_UNDO
if [[ "$?" -ne "0" ]]; then
echo "$DELTA_SQL_UNDO" > $TMP_PATH/target_ddl_undo_tmp.sql
DELTA_SQL_UNDO="$TMP_PATH/target_ddl_undo_tmp.sql"
fi
fi
if [[ ! -z ${DELTA_CONFIG+x} ]]; then
check_path DELTA_CONFIG
if [[ "$?" -ne "0" ]]; then
echo "$DELTA_CONFIG" > $TMP_PATH/target_config_tmp.conf
DELTA_CONFIG="$TMP_PATH/target_config_tmp.conf"
fi
fi
RUNS[0]=$DELTA_CONFIG
RUNS[1]=$DELTA_SQL_DO
RUNS[2]=$DELTA_SQL_UNDO
fi
if [[ -z ${ARTIFACTS_DESTINATION+x} ]]; then
dbg "NOTICE: Artifacts destination is not specified. Will use ./"
ARTIFACTS_DESTINATION="."
fi
if [[ -z ${ARTIFACTS_DIRNAME+x} ]]; then
dbg "Artifacts naming is not set. Will use: '$DOCKER_MACHINE'"
ARTIFACTS_DIRNAME=$DOCKER_MACHINE
fi
if [[ ! -z ${WORKLOAD_REAL+x} ]] && ! check_path WORKLOAD_REAL; then
err "ERROR: The workload file '$WORKLOAD_REAL' not found."
exit 1
fi
if [[ ! -z ${WORKLOAD_BASIS+x} ]] && ! check_path WORKLOAD_BASIS; then
err "ERROR: The workload file '$WORKLOAD_BASIS' not found."
exit 1
fi
if [[ ! -z ${WORKLOAD_CUSTOM_SQL+x} ]]; then
check_path WORKLOAD_CUSTOM_SQL
if [[ "$?" -ne "0" ]]; then
dbg "WARNING: Value given as workload-custom-sql: '$WORKLOAD_CUSTOM_SQL' not found as file will use as content"
echo "$WORKLOAD_CUSTOM_SQL" > $TMP_PATH/workload_custom_sql_tmp.sql
WORKLOAD_CUSTOM_SQL="$TMP_PATH/workload_custom_sql_tmp.sql"
fi
fi
if [[ ! -z ${COMMANDS_AFTER_CONTAINER_INIT+x} ]]; then
check_path COMMANDS_AFTER_CONTAINER_INIT
if [[ "$?" -ne "0" ]]; then
dbg "WARNING: Value given as after_db_init_code: '$COMMANDS_AFTER_CONTAINER_INIT' not found as file will use as content"
echo "$COMMANDS_AFTER_CONTAINER_INIT" > $TMP_PATH/after_docker_init_code_tmp.sh
COMMANDS_AFTER_CONTAINER_INIT="$TMP_PATH/after_docker_init_code_tmp.sh"
fi
fi
if [[ ! -z ${SQL_AFTER_DB_RESTORE+x} ]]; then
check_path SQL_AFTER_DB_RESTORE
if [[ "$?" -ne "0" ]]; then
echo "$SQL_AFTER_DB_RESTORE" > $TMP_PATH/after_db_init_code_tmp.sql
SQL_AFTER_DB_RESTORE="$TMP_PATH/after_db_init_code_tmp.sql"
fi
fi
if [[ ! -z ${SQL_BEFORE_DB_RESTORE+x} ]]; then
check_path SQL_BEFORE_DB_RESTORE
if [[ "$?" -ne "0" ]]; then
dbg "WARNING: Value given as before_db_init_code: '$SQL_BEFORE_DB_RESTORE' not found as file will use as content"
echo "$SQL_BEFORE_DB_RESTORE" > $TMP_PATH/before_db_init_code_tmp.sql
SQL_BEFORE_DB_RESTORE="$TMP_PATH/before_db_init_code_tmp.sql"
fi
fi
### End of CLI parameters checks ###
}
### Docker tools ###
#######################################
# Create Docker machine using an AWS EC2 spot instance
# See also: https://docs.docker.com/machine/reference/create/
# Globals:
# None
# Arguments:
# (text) [1] Machine name
# (text) [2] EC2 Instance type
# (text) [3] Spot instance bid price (in dollars)
# (int) [4] AWS spot instance duration in minutes (60, 120, 180, 240, 300,
# or 360)
# (text) [5] AWS keypair to use
# (text) [6] Path to Private Key file to use for instance
# Matching public key with .pub extension should exist
# (text) [7] The AWS region to launch the instance
# (for example us-east-1, eu-central-1)
# (text) [8] The AWS zone to launch the instance in (one of a,b,c,d,e)
# Returns:
# None
#######################################
function create_ec2_docker_machine() {
msg "Attempting to provision a Docker machine in region $7 with price $3..."
docker-machine create --driver=amazonec2 \
--amazonec2-request-spot-instance \
--amazonec2-instance-type=$2 \
--amazonec2-spot-price=$3 \
--amazonec2-block-duration-minutes=$4 \
--amazonec2-keypair-name="$5" \
--amazonec2-ssh-keypath="$6" \
--amazonec2-region="$7" \
--amazonec2-zone="$8" \
$1 2> >(grep -v "failed waiting for successful resource state" >&2) &
}
#######################################
# Order to destroy Docker machine (any platform)
# See also: https://docs.docker.com/machine/reference/rm/
# Globals:
# None
# Arguments:
# (text) Machine name
# Returns:
# None
#######################################
function destroy_docker_machine() {
# If spot request wasn't fulfilled, there is no associated instance,
# so "docker-machine rm" will show an error, which is safe to ignore.
# We better filter it out to avoid any confusions.
# What is used here is called "process substitution",
# see https://www.gnu.org/software/bash/manual/bash.html#Process-Substitution
# The same trick is used in create_ec2_docker_machine() to filter out errors
# when we have "price-too-low" attempts, such errors come in few minutes
# after an attempt and are generally unexpected by user.
cmdout=$(docker-machine rm --force $1 2> >(grep -v "unknown instance" >&2) )
msg "Termination requested for machine, current status: $cmdout"
}
#######################################
# Wait until EC2 instance with Docker maching is up and running
# Globals:
# None
# Arguments:
# (text) Machine name
# Returns:
# None
#######################################
function wait_ec2_docker_machine_ready() {
local machine=$1
local check_price=$2
while true; do
sleep 5
local stop_now=1
ps ax | grep "docker-machine create" | grep "$machine" >/dev/null && stop_now=0
((stop_now==1)) && return 0
if $check_price ; then
status=$( \
aws --region=$AWS_REGION ec2 describe-spot-instance-requests \
--filters="Name=launch.instance-type,Values=$AWS_EC2_TYPE" \
| jq '.SpotInstanceRequests | sort_by(.CreateTime) | .[] | .Status.Code' \
| tail -n 1
)
if [[ "$status" == "\"price-too-low\"" ]]; then
echo "price-too-low"; # this value is result of function (not message for user), to be checked later
return 0
fi
fi
done
}
#######################################
# Determine EC2 spot price from history with multiplier
# Globals:
# AWS_REGION, AWS_EC2_TYPE, EC2_PRICE
# Arguments:
# None
# Returns:
# None
# Result:
# Fill AWS_ZONE and EC2_PRICE variables, update AWS_REGION.
#######################################
function determine_history_ec2_spot_price() {
## Get max price from history and apply multiplier
# TODO detect region and/or allow to choose via options
prices=$(
aws --region=$AWS_REGION ec2 \
describe-spot-price-history --instance-types $AWS_EC2_TYPE --no-paginate \
--start-time=$(date +%s) --product-descriptions="Linux/UNIX (Amazon VPC)" \
--query 'SpotPriceHistory[*].{az:AvailabilityZone, price:SpotPrice}'
)
if [[ ! -z ${AWS_ZONE+x} ]]; then
# zone given by option
price_data=$(echo $prices | jq ".[] | select(.az == \"$AWS_REGION$AWS_ZONE\")")
else
# zone NOT given by options, will detected from min price
price_data=$(echo $prices | jq 'min_by(.price)')
fi
region=$(echo $price_data | jq '.az')
price=$(echo $price_data | jq '.price')
#region=$(echo $price_data | jq 'min_by(.price) | .az') #TODO(NikolayS) double-check zones®ions
region="${region/\"/}"
region="${region/\"/}"
price="${price/\"/}"
price="${price/\"/}"
AWS_ZONE=${region:$((${#region}-1)):1}
AWS_REGION=${region:0:$((${#region}-1))}
msg "Min price from history: ${price}/h in $AWS_REGION (zone: $AWS_ZONE)."
multiplier="1.01"
price=$(echo "$price * $multiplier" | bc -l)
msg "Increased price: ${price}/h"
EC2_PRICE=$price
}
#######################################
# Determine actual EC2 spot price from aws error message
# Globals:
# AWS_REGION, AWS_EC2_TYPE, EC2_PRICE
# Arguments:
# None
# Returns:
# None
# Result:
# Update EC2_PRICE variable or stop script if price do not determined
#######################################
function determine_actual_ec2_spot_price() {
aws --region=$AWS_REGION ec2 describe-spot-instance-requests \
--filters 'Name=status-code,Values=price-too-low' \
| grep SpotInstanceRequestId | awk '{gsub(/[,"]/, "", $2); print $2}' \
| xargs aws --region=$AWS_REGION ec2 cancel-spot-instance-requests \
--spot-instance-request-ids || true
corrrectPriceForLastFailedRequest=$( \
aws --region=$AWS_REGION ec2 describe-spot-instance-requests \
--filters="Name=launch.instance-type,Values=$AWS_EC2_TYPE" \
| jq '.SpotInstanceRequests[] | select(.Status.Code == "price-too-low") | .Status.Message' \
| grep -Eo '[0-9]+[.][0-9]+' | tail -n 1 &
)
if [[ ("$corrrectPriceForLastFailedRequest" != "") && ("$corrrectPriceForLastFailedRequest" != "null") ]]; then
EC2_PRICE=$corrrectPriceForLastFailedRequest
else
err "ERROR: Cannot determine actual price for the instance $AWS_EC2_TYPE."
exit 1
fi
}
#######################################
# (AWS only) Use ZFS on local NVMe disk or EBS drive
# Globals:
# DOCKER_MACHINE
# Arguments:
# 1 drive path (For example: /dev/nvme1 or /dev/xvdf)
# Return:
# None
#######################################
function use_aws_zfs_drive (){
drive=$1
options=""
if [[ $drive =~ "xvd" ]]; then
options="-f" # for ebs drives only
fi
# Format volume as ZFS and tune it
docker-machine ssh $DOCKER_MACHINE "sudo apt-get install -y zfsutils-linux"
docker-machine ssh $DOCKER_MACHINE "sudo rm -rf /home/storage >/dev/null 2>&1 || true"
docker-machine ssh $DOCKER_MACHINE "sudo zpool create -O compression=on \
-O atime=off \
-O recordsize=8k \
-O logbias=throughput \
-m /home/storage zpool ${drive} ${options}"
# Set ARC size as 30% of RAM
# get MemTotal (kB)
local memtotal_kb=$(docker-machine ssh $DOCKER_MACHINE "grep MemTotal /proc/meminfo | awk '{print \$2}'")
# Calculate recommended ARC size in bytes.
local arc_size_b=$(( memtotal_kb / 100 * 30 * 1024))
# If the calculated ARC is less than 1 GiB, then set it to 1 GiB.
if [[ "${arc_size_b}" -lt "1073741824" ]]; then
arc_size_b="1073741824" # 1 GiB
fi
# finally, change ARC MAX
docker-machine ssh $DOCKER_MACHINE "echo ${arc_size_b} | sudo tee /sys/module/zfs/parameters/zfs_arc_max"
docker-machine ssh $DOCKER_MACHINE "sudo cat /sys/module/zfs/parameters/zfs_arc_max"
msg "ARC MAX has been set to ${arc_size_b} bytes."
}
#######################################
# Mount nvme drive for i3 EC2 instances
# Globals:
# DOCKER_MACHINE
# Arguments:
# None
# Returns:
# None
# Result:
# Mount drive to /home/storage of docker machine and output drive size.
#######################################
function use_ec2_nvme_drive() {
# Init i3's NVMe storage, mounting one of the existing volumes to /storage
# The following commands are to be executed in the docker machine itself,
# not in the container.
if [[ -z ${AWS_ZFS+x} ]]; then
# Format volume as Ext4 and tune it
docker-machine ssh $DOCKER_MACHINE "sudo mkfs.ext4 /dev/nvme0n1"
docker-machine ssh $DOCKER_MACHINE "sudo mount -o noatime \
-o data=writeback \
-o barrier=0 \
-o nobh \
/dev/nvme0n1 /home/storage || exit 115"
else
use_aws_zfs_drive "/dev/nvme0n1"
fi
docker-machine ssh $DOCKER_MACHINE "sudo df -h /home/storage"
}
#######################################
# Determine needed drive size to store and use database for non i3 EC2 instances.
# Globals:
# RUN_ON, AWS_EC2_TYPE, AWS_EBS_VOLUME_SIZE, DB_DUMP, EBS_SIZE_MULTIPLIER, KB
# Arguments:
# None
# Returns:
# None
# Result:
# Update value of AWS_EBS_VOLUME_SIZE variable
#######################################
function determine_ebs_drive_size() {
# Determine dump file size
if [[ "$RUN_ON" == "aws" ]] && [[ ! ${AWS_EC2_TYPE:0:2} == "i3" ]] \
&& [[ -z ${AWS_EBS_VOLUME_SIZE+x} ]] && [[ ! -z ${DB_DUMP+x} ]]; then
dbg "Calculate EBS volume size."
local dumpFileSize=0
if [[ $DB_DUMP =~ "s3://" ]]; then
dumpFileSize=$(s3cmd info $DB_DUMP | grep "File size:" )
dumpFileSize=${dumpFileSize/File size:/}
dumpFileSize=${dumpFileSize/\t/}
dumpFileSize=${dumpFileSize// /}
dbg "S3 file size: $dumpFileSize"
elif [[ $DB_DUMP =~ "file://" ]]; then
dumpFileSize=$(stat -c%s "$DB_DUMP" | awk '{print $1}') # TODO(NikolayS) MacOS version
else
dumpFileSize=$(echo "$DB_DUMP" | wc -c)
fi
let dumpFileSize=dumpFileSize*$EBS_SIZE_MULTIPLIER
let minSize=50*$KB*$KB*$KB
local ebsSize=$minSize # 50 GB
if [[ "$dumpFileSize" -gt "$minSize" ]]; then
let ebsSize=$dumpFileSize
ebsSize=$(numfmt --to-unit=G $ebsSize) # TODO(NikolayS) coreutils are implicitly required!!
AWS_EBS_VOLUME_SIZE=$ebsSize
dbg "EBS volume size: $AWS_EBS_VOLUME_SIZE GB"
else
msg "EBS volume is not required."
fi
fi
}
#######################################
# Create and mount ebs drive for non i3 EC2 instances
# Globals:
# DOCKER_MACHINE, AWS_EBS_VOLUME_SIZE, AWS_REGION, AWS_ZONE, VOLUME_ID
# Arguments:
# None
# Returns:
# None
# Result:
# Create new ec2 ebs drive with size $AWS_EBS_VOLUME_SIZE in $AWS_REGION region
# Fill VOLUME_ID variable, mount drive to /home/storage of docker machine
# and output drive size.
#######################################
function use_ec2_ebs_drive() {
msg "Create and attach a new EBS volume (size: $AWS_EBS_VOLUME_SIZE GB)"
VOLUME_ID=$(aws --region=$AWS_REGION ec2 create-volume --size $AWS_EBS_VOLUME_SIZE --availability-zone $AWS_REGION$AWS_ZONE --volume-type gp2 | jq -r .VolumeId)
sleep 10 # wait to volume will created
instance_id=$(docker-machine ssh $DOCKER_MACHINE curl -s http://169.254.169.254/latest/meta-data/instance-id)
attachResult=$(aws --region=$AWS_REGION ec2 attach-volume --device /dev/xvdf --volume-id $VOLUME_ID --instance-id $instance_id)
sleep 10 # wait to volume will attached
if [[ -z ${AWS_ZFS+x} ]]; then
docker-machine ssh $DOCKER_MACHINE sudo mkfs.ext4 /dev/xvdf
docker-machine ssh $DOCKER_MACHINE "sudo mount -o noatime \
-o data=writeback \
-o barrier=0 \
-o nobh \
/dev/xvdf /home/storage || exit 115"
docker-machine ssh $DOCKER_MACHINE "sudo df -h /dev/xvdf"
else
use_aws_zfs_drive "/dev/xvdf"
fi
docker-machine ssh $DOCKER_MACHINE "sudo df -h /home/storage"
}
#######################################
# Print "How to connect" instructions
# Globals:
# DOCKER_MACHINE, CURRENT_TS, RUN_ON
# Arguments:
# None
# Returns:
# None
#######################################
function print_connection {
msg_wo_dt ""
msg_wo_dt " =========================================================="
if [[ "$RUN_ON" == "aws" ]]; then
msg_wo_dt " How to connect to the Docker machine:"
msg_wo_dt " docker-machine ssh ${DOCKER_MACHINE}"
msg_wo_dt " How to connect directly to the container:"
msg_wo_dt " docker \`docker-machine config ${DOCKER_MACHINE}\` exec -it pg_nancy_${CURRENT_TS} bash"
else
msg_wo_dt " How to connect to the container:"
msg_wo_dt " docker exec -it pg_nancy_${CURRENT_TS} bash"
fi
msg_wo_dt " =========================================================="
msg_wo_dt ""
}
#######################################
# Print estimated cost of experiment for run on aws
# Globals:
# None
# Arguments:
# None
# Returns:
# None
#######################################
function calc_estimated_cost {
if [[ "$RUN_ON" == "aws" ]]; then
END_TIME=$(date +%s)
DURATION=$(echo $((END_TIME-START_TIME)) | awk '{printf "%d:%02d:%02d", $1/3600, ($1/60)%60, $1%60}')
echo "All runs done for $DURATION"
let SECONDS_DURATION=$END_TIME-$START_TIME
if [[ ! -z ${EC2_PRICE+x} ]]; then
PRICE_PER_SECOND=$(echo "scale=10; $EC2_PRICE / 3600" | bc)
let DURATION_SECONDS=$END_TIME-$START_TIME
ESTIMATE_COST=$(echo "scale=10; $DURATION_SECONDS * $PRICE_PER_SECOND" | bc)
ESTIMATE_COST=$(printf "%02.03f\n" "$ESTIMATE_COST")
fi
if [[ ! -z "${ESTIMATE_COST+x}" ]]; then
echo -e "Estimated AWS cost: \$$ESTIMATE_COST"
fi
fi
}
#######################################
# Wait keep alive time and stop container with EC2 intstance.
# Also delete temp drive if it was created and attached for non i3 instances.
# Globals:
# KEEP_ALIVE, MACHINE_HOME, DOCKER_MACHINE, CURRENT_TS, VOLUME_ID, DONE
# Arguments:
# None
# Returns:
# None
#######################################
function cleanup_and_exit {
local exit_code="$?" # we can detect exit code here
if [ "$KEEP_ALIVE" -gt "0" ]; then
msg "According to '--keep-alive', the spot instance with the container will be up for additional ${KEEP_ALIVE} seconds."
print_connection
sleep $KEEP_ALIVE
fi
msg "Removing temporary files..." # if exists
if [[ ! -z "${DOCKER_CONFIG+x}" ]]; then
docker $DOCKER_CONFIG exec -i ${CONTAINER_HASH} bash -c "sudo rm -rf $MACHINE_HOME"
fi
if [[ ! -z "${PGDATA_DIR+x}" ]]; then
docker $DOCKER_CONFIG exec -i ${CONTAINER_HASH} bash -c "sudo /etc/init.d/postgresql stop $VERBOSE_OUTPUT_REDIRECT"
docker $DOCKER_CONFIG exec -i ${CONTAINER_HASH} bash -c "sudo rm -rf /pgdata/* $VERBOSE_OUTPUT_REDIRECT"
fi
rm -rf "$TMP_PATH"
if [[ "$RUN_ON" == "localhost" ]]; then
msg "Remove docker container"
out=$(docker container rm -f $CONTAINER_HASH)
elif [[ "$RUN_ON" == "aws" ]]; then
destroy_docker_machine $DOCKER_MACHINE
if [ ! -z ${VOLUME_ID+x} ]; then
msg "Wait and delete volume $VOLUME_ID"
sleep 60 # wait for the machine to be removed
delvolout=$(aws ec2 delete-volume --volume-id $VOLUME_ID)
msg "Volume $VOLUME_ID deleted"
fi
else
err "ERROR: (ASSERT) must not reach this point."
exit 1