forked from qdrant/vector-db-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request qdrant#67 from qdrant/feat/benchmark-automation-sc…
…ript feat: Scripts to automate benchmarking
- Loading branch information
Showing
13 changed files
with
335 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,4 +4,5 @@ __pycache__ | |
*.pyc | ||
NOTES.md | ||
|
||
results/* | ||
results/* | ||
tools/custom/data.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
# path relative to the script | ||
|
||
SCRIPT=$(realpath "$0") | ||
SCRIPTPATH=$(dirname "$SCRIPT") | ||
|
||
|
||
# Create server in custom Cloud | ||
|
||
SERVER_NAME=${SERVER_NAME:-test-server-1} | ||
|
||
|
||
SERVER_IP=$(jq ".[\"${SERVER_NAME}\"].public_ip" -r $SCRIPTPATH/data.json) | ||
|
||
SSH_USER=$(jq ".[\"${SERVER_NAME}\"].user" -r $SCRIPTPATH/data.json) | ||
|
||
echo "Server IP: ${SERVER_IP}" | ||
|
||
ssh-keygen -f "$HOME/.ssh/known_hosts" -R "${SERVER_IP}" || true | ||
|
||
# Wait for server to be ready | ||
|
||
while ! ssh -oStrictHostKeyChecking=no ${SSH_USER}@${SERVER_IP} echo "Server is ready"; do | ||
sleep 1 | ||
done | ||
|
||
# Create and install docker, poetry, etc | ||
|
||
cat "${SCRIPTPATH}/setup_vm.sh" | ssh "${SSH_USER}@${SERVER_IP}" sudo bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"benchmark-client-glove-100": { | ||
"public_ip": "aa.bb.cc.dd", | ||
"private_ip": "xx.y.z.a", | ||
"user": "root" | ||
}, | ||
"benchmark-server-glove-100": { | ||
"public_ip": "aa.bb.cc.de", | ||
"private_ip": "xx.y.z.b", | ||
"user": "root" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
# Get ip of the private network interface of custom cloud server | ||
|
||
# Usage: ./get_private_ip.sh <server_name> | ||
|
||
# Example: ./get_private_ip.sh benchmark-server-1 | ||
|
||
|
||
SCRIPT=$(realpath "$0") | ||
SCRIPTPATH=$(dirname "$SCRIPT") | ||
|
||
SERVER_IP=$(jq ".[\"${1}\"].private_ip" -r $SCRIPTPATH/data.json) | ||
|
||
echo "${SERVER_IP}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
# Get ip of the private network interface of custom cloud server | ||
|
||
# Usage: ./get_private_ip.sh <server_name> | ||
|
||
# Example: ./get_private_ip.sh benchmark-server-1 | ||
|
||
|
||
SCRIPT=$(realpath "$0") | ||
SCRIPTPATH=$(dirname "$SCRIPT") | ||
|
||
SERVER_IP=$(jq ".[\"${1}\"].public_ip" -r $SCRIPTPATH/data.json) | ||
|
||
echo "${SERVER_IP}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
# Get ip of the private network interface of custom cloud server | ||
|
||
# Usage: ./get_private_ip.sh <server_name> | ||
|
||
# Example: ./get_private_ip.sh benchmark-server-1 | ||
|
||
|
||
SCRIPT=$(realpath "$0") | ||
SCRIPTPATH=$(dirname "$SCRIPT") | ||
|
||
SSH_USER=$(jq ".[\"${1}\"].user" -r $SCRIPTPATH/data.json) | ||
|
||
echo "${SSH_USER}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
mkdir -p projects | ||
|
||
# Install docker | ||
|
||
apt-get update | ||
apt-get install -y \ | ||
ca-certificates \ | ||
curl \ | ||
gnupg \ | ||
lsb-release \ | ||
jq | ||
|
||
mkdir -p /etc/apt/keyrings | ||
|
||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg | ||
|
||
echo \ | ||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ | ||
$(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null | ||
|
||
apt-get update | ||
|
||
apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin | ||
|
||
docker run hello-world | ||
|
||
# Install poetry | ||
apt install -y python3-pip | ||
python3 -m pip install poetry | ||
python3 -m poetry --version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
set -x | ||
|
||
VECTOR_DB=${VECTOR_DB:-qdrant} | ||
BRANCH=${BRANCH:-master} | ||
|
||
if [ -d "./vector-db-benchmark" ]; then | ||
echo "vector-db-benchmark repo already exists" | ||
else | ||
git clone https://github.com/qdrant/vector-db-benchmark | ||
fi | ||
|
||
cd vector-db-benchmark | ||
git fetch && git checkout $BRANCH && git pull | ||
|
||
python3 -m poetry install | ||
|
||
# if using qdrant vector db | ||
if [ "$VECTOR_DB" == "qdrant" ]; then | ||
QDRANT_CONFIGS=$(cat experiments/configurations/qdrant-single-node{-rps,}.json | jq '.[] | .name' | grep -E 'qdrant(-rps)?-m-.*-ef-.*' | sed 's/"//g') | ||
|
||
for QDRANT_CONFIG in $QDRANT_CONFIGS; do | ||
# upload | ||
python3 -m poetry run python run.py --engines "${QDRANT_CONFIG}" --datasets $DATASET --host $PRIVATE_SERVER_IP --skip-search >> ${VECTOR_DB}.log 2>&1 | ||
|
||
# now run search (retry on errors) | ||
set +e | ||
while true; do | ||
python3 -m poetry run python run.py --engines "${QDRANT_CONFIG}" --datasets $DATASET --host $PRIVATE_SERVER_IP >> ${VECTOR_DB}.log --skip-upload 2>&1 | ||
if [ $? -ne 0 ]; then | ||
echo "retrying" | tee -a ${VECTOR_DB}.log | ||
sleep 1 | ||
else | ||
echo "done" | tee -a ${VECTOR_DB}.log | ||
break | ||
fi | ||
done | ||
set -e | ||
done | ||
else | ||
nohup python3 -m poetry run python run.py --engines "${VECTOR_DB}-m-*-ef-*" --datasets $DATASET --host $PRIVATE_SERVER_IP >> ${VECTOR_DB}.log 2>&1 & | ||
fi | ||
|
||
PID_BENCHMARK=$! | ||
echo $PID_BENCHMARK > benchmark.pid | ||
wait $PID_BENCHMARK |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
set -x | ||
|
||
VECTOR_DB=${VECTOR_DB:-qdrant} | ||
BRANCH=${BRANCH:-master} | ||
|
||
if [ -d "./vector-db-benchmark" ]; then | ||
echo "vector-db-benchmark repo already exists" | ||
else | ||
git clone https://github.com/qdrant/vector-db-benchmark | ||
fi | ||
|
||
cd vector-db-benchmark | ||
git fetch && git checkout $BRANCH && git pull | ||
|
||
# remove all running containers: | ||
RUNNING_CONTAINERS=$(docker ps -q) | ||
if [ -n "$RUNNING_CONTAINERS" ]; then | ||
docker container rm -f $RUNNING_CONTAINERS | ||
fi | ||
|
||
cd engine/servers/${VECTOR_DB}-single-node | ||
docker compose up -d | ||
|
||
# if vector DB is milvus or elasticsearch, wait for them to be up | ||
if [ "$VECTOR_DB" == "milvus" ] || [ "$VECTOR_DB" == "elasticsearch" ]; then | ||
sleep 30 # Throws connection reset which isn't handled by --retry-connrefused in curl. So we need to wait | ||
fi | ||
|
||
# Define a map for database types and their health check URLs | ||
declare -A db_health_urls | ||
db_health_urls["milvus"]="http://localhost:19530/v1/vector/collections" | ||
db_health_urls["qdrant"]="http://localhost:6333" | ||
db_health_urls["elasticsearch"]="http://localhost:9200/_cluster/health" | ||
|
||
# Check if the specified database type exists in the map | ||
if [ -n "${db_health_urls[$VECTOR_DB]}" ]; then | ||
url="${db_health_urls[$VECTOR_DB]}" | ||
# Retry logic for the specified URL | ||
curl --max-time 120 --retry-connrefused --retry 10 --retry-delay 10 "$url" | ||
else | ||
echo "Assuming engine $VECTOR_DB is already up" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/bin/bash | ||
|
||
# Usage: tools/run_benchmarks.sh "deep-image-96-angular" "new-benchmark-server" | ||
|
||
set -e | ||
set -x | ||
|
||
SCRIPT_PATH="$( cd "$(dirname "$0")" &>/dev/null ; pwd -P )" | ||
export CLOUD_NAME=${CLOUD_NAME:-"custom"} | ||
|
||
DATASETS=("glove-100-angular" "deep-image-96" "gist-960-euclidean" "dbpedia-openai-1M-1536-angular") | ||
VECTOR_DBS=("qdrant" "milvus" "elasticsearch" "weaviate" "redis") | ||
BRANCH="master" | ||
|
||
# Run only while setting up new benchmark server and client: | ||
# Create different servers and clients for each dataset so benchmarking can be done in parallel | ||
# for dataset in "${DATASETS[@]}"; do | ||
# SERVER_NAME=benchmark-client-${dataset} bash -x $SCRIPT_PATH/$CLOUD_NAME/create_and_install.sh | ||
# SERVER_NAME=benchmark-server-${dataset} bash -x $SCRIPT_PATH/$CLOUD_NAME/create_and_install.sh | ||
# done | ||
|
||
DATASET=$1 | ||
SERVER_NAME=$2 | ||
|
||
# replace "server" with "client" if 3rd argument is not passed | ||
CLIENT_NAME=${3:-"${SERVER_NAME/server/client}"} | ||
PRIVATE_SERVER_IP=$(bash $SCRIPT_PATH/$CLOUD_NAME/get_private_ip.sh $SERVER_NAME) | ||
|
||
for VECTOR_DB in "${VECTOR_DBS[@]}"; do | ||
echo Running benchmark for ${VECTOR_DB} on ${DATASET} | ||
|
||
RUN_SCRIPT="${SCRIPT_PATH}/remote/setup_benchmark_server.sh" \ | ||
ENV_CONTEXT="${VECTOR_DB@A} ${BRANCH@A}" \ | ||
SERVER_NAME=${SERVER_NAME} \ | ||
bash -x $SCRIPT_PATH/run_remote.sh | ||
|
||
RUN_SCRIPT="${SCRIPT_PATH}/remote/setup_benchmark_client.sh" \ | ||
ENV_CONTEXT="${VECTOR_DB@A} ${BRANCH@A} ${PRIVATE_SERVER_IP@A} ${DATASET@A}" \ | ||
SERVER_NAME=${CLIENT_NAME} \ | ||
bash -x $SCRIPT_PATH/run_remote.sh | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||
CLOUD_NAME=${CLOUD_NAME:-"hetzner"} | ||
|
||
|
||
RUN_SCRIPT=${RUN_SCRIPT:-""} | ||
SERVER_NAME=${SERVER_NAME:-""} | ||
|
||
DEFAULT_SSH_USER=$(bash $SCRIPT_PATH/$CLOUD_NAME/get_ssh_user.sh $SERVER_NAME) | ||
|
||
SSH_USER=${SSH_USER:-${DEFAULT_SSH_USER}} | ||
|
||
# List of env variables with values to pass to remote script | ||
# Should be constructed as `${VAR_1@A} ${VAR_2@A}` | ||
ENV_CONTEXT=${ENV_CONTEXT:-""} | ||
|
||
if [[ -z "$RUN_SCRIPT" ]] | ||
then | ||
echo "Please specify RUN_SCRIPT env variable" | ||
exit 1 | ||
fi | ||
|
||
if [[ -z "$SERVER_NAME" ]] | ||
then | ||
echo "Please specify SERVER_NAME env variable" | ||
exit 1 | ||
fi | ||
|
||
|
||
# Get server ip | ||
|
||
SERVER_IP=$(bash $SCRIPT_PATH/$CLOUD_NAME/get_public_ip.sh $SERVER_NAME) | ||
|
||
|
||
|
||
echo $ENV_CONTEXT | cat - "$RUN_SCRIPT" | ssh -oStrictHostKeyChecking=no "$SSH_USER@$SERVER_IP" sudo bash -x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
|
||
# Usage: tools/ssh.sh custom|hetzner <server-name> | ||
|
||
set -e | ||
|
||
SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||
CLOUD_NAME=${CLOUD_NAME:-$1} | ||
|
||
SERVER_NAME=${SERVER_NAME:-$2} | ||
|
||
DEFAULT_SSH_USER=$(bash $SCRIPT_PATH/$CLOUD_NAME/get_ssh_user.sh $SERVER_NAME) | ||
SSH_USER=${SSH_USER:-${DEFAULT_SSH_USER}} | ||
|
||
if [[ -z "$CLOUD_NAME" ]] | ||
then | ||
echo "Please pass CLOUD_NAME env variable" | ||
exit 1 | ||
fi | ||
|
||
if [[ -z "$SERVER_NAME" ]] | ||
then | ||
echo "Please specify SERVER_NAME env variable" | ||
exit 1 | ||
fi | ||
|
||
# Get server ip | ||
SERVER_IP=$(bash $SCRIPT_PATH/$CLOUD_NAME/get_public_ip.sh $SERVER_NAME) | ||
|
||
ssh $SSH_USER@$SERVER_IP |