diff --git a/README.md b/README.md
index 3f4163ff..5ec030a2 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,29 @@ Installing latest docker-compose:
> chmod +x docker-compose-linux-x86_64
> sudo mv docker-compose-linux-x86_64 /usr/bin/docker-compose
+### Local SOLR & Zookeeper server
+**SOLR** is available on http://localhost:8983 running in cloud mode with one node.
+The configured collection is `metis_sandbox_publish_local`
+```
+sandbox:
+ solr:
+ hosts: http://localhost:8983/solr/metis_sandbox_publish_local
+```
+**Zookeeper** is available on http://localhost:9983
+
+### S3 bucket with localstack
+Use the following example configuration for a local S3 bucket
+```
+sandbox:
+ s3:
+ access-key: bT3iWI27KcAQyLQCIOYT
+ secret-key: pMDcycDwMnKbLvkqa2Cxb2KJVeU1u67lE7Fb1Ie
+ endpoint: http://localhost:4566
+ signing-region: eu-west-2
+ thumbnails-bucket: metis-sandbox-bucket
+```
+The above keys are for local development
+
## API
Composed by 2 endpoints
diff --git a/docker-compose.yml b/docker-compose.yml
index 3cdb72a7..882a8d4c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,7 +18,7 @@ services:
- ./src/main/resources/database/schema_lockrepository.sql:/docker-entrypoint-initdb.d/schema_lockrepository.sql
- ./src/main/resources/database/schema_validation.sql:/docker-entrypoint-initdb.d/schema_validation.sql
rabbitmq:
- image: rabbitmq:3.9.12-management
+ image: rabbitmq:3.11.2-management-alpine
container_name: metis-sandbox-rabbitmq
environment:
- RABBIT_DEFAULT_VHOST=/
@@ -28,7 +28,7 @@ services:
- '5672:5672'
- '15672:15672'
mongo:
- image: mongo:4.2.9
+ image: mongo:6.0.12
container_name: metis-sandbox-mongo
environment:
MONGO_INITDB_DATABASE: metis-sandbox
@@ -36,6 +36,32 @@ services:
MONGO_INITDB_ROOT_PASSWORD: guest
ports:
- '27017:27017'
+ solr:
+ build:
+ context: docker/solr/
+ dockerfile: Dockerfile
+ container_name: metis-sandbox-solr
+ ports:
+ - "8983:8983"
+ - "9983:9983"
+ entrypoint:
+ - docker-entrypoint.sh
+ - solr
+ - start
+ - -c
+ - -f
+
+ localstack-s3:
+ image: localstack/localstack:s3-latest
+ environment:
+ - DEBUG=${DEBUG:-0}
+ ports:
+ - "4566:4566"
+ volumes:
+ - "${LOCALSTACK_VOLUME_DIR:-./docker/volume}:/var/lib/localstack"
+ - "/var/run/docker.sock:/var/run/docker.sock"
+ - "./docker/localstack/bucket.sh:/etc/localstack/init/ready.d/bucket.sh"
+
metis-sandbox-local:
image: europeana/metis-sandbox:develop
container_name: metis-sandbox-local
diff --git a/docker/localstack/bucket.sh b/docker/localstack/bucket.sh
new file mode 100755
index 00000000..2d9be342
--- /dev/null
+++ b/docker/localstack/bucket.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+pip install --upgrade pip
+pip uninstall awscli
+pip install awscli
+pip uninstall awscli-local
+pip install awscli-local
+
+export AWS_ACCESS_KEY_ID=bT3iWI27KcAQyLQCIOYT AWS_SECRET_ACCESS_KEY=pMDcycDwMnKbLvkqa2Cxb2KJVeU1u67lE7Fb1Ie
+awslocal s3api create-bucket --bucket metis-sandbox-bucket
diff --git a/docker/solr/Dockerfile b/docker/solr/Dockerfile
new file mode 100644
index 00000000..17d3dd14
--- /dev/null
+++ b/docker/solr/Dockerfile
@@ -0,0 +1,23 @@
+FROM solr:7.7.3-slim
+USER 0
+RUN apt-get update \
+ && apt-get install git -y \
+ && apt-get install rsync -y \
+ && apt-get install curl -y \
+ && apt-get install nano -y \
+ && apt-get clean \
+ && git clone https://github.com/europeana/search
+COPY solr-schema.sh /opt/solr/search
+COPY europeana-collection-aliasing-solr6.6.5-0.0.1-SNAPSHOT.jar /opt/solr/contrib/lib/europeana-collection-aliasing-solr6.6.5-0.0.1-SNAPSHOT.jar
+RUN chown -R solr:solr /opt/solr/search \
+ && chown -R solr:solr /opt/solr/contrib/lib \
+ && chmod ug+x /opt/solr/search/solr-schema.sh
+USER solr
+RUN solr start -c \
+ && solr create_collection -c metis_sandbox_publish_local -p 8983 \
+ && solr stop \
+ && mkdir -p /opt/solr/server/solr/metis_sandbox_publish_local_shard1_replica_n1/conf \
+ && cp /opt/solr/search/solr_confs/metadata/conf/query_aliases.xml /opt/solr/server/solr/metis_sandbox_publish_local_shard1_replica_n1/conf/query_aliases.xml \
+ && solr start -c \
+ && cd /opt/solr/search && /opt/solr/search/solr-schema.sh \
+ && solr stop
diff --git a/docker/solr/solr-schema.sh b/docker/solr/solr-schema.sh
new file mode 100755
index 00000000..d8f03d5c
--- /dev/null
+++ b/docker/solr/solr-schema.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+function main(){
+ declare_common_fields
+ declare_multiple_environments_fields
+ set_chosen_environment_fields
+ print_chosen_environment_and_options
+ git_get_selected_branch # Should set the COMMIT_HASH
+ rsync_local_dir_to_remote_dir
+ git_remove_pull_request_branches
+ zookeeper_find_current_and_old_configurations
+ zookeeper_upload_and_apply_new_configuration #Expects COMMIT_HASH
+ zookeeper_remove_current_and_old_configurations
+}
+
+function declare_common_fields() {
+ #We assume git is present on workspace from the configuration of the job. Ideally same git directory should not be accessible from multiple jobs, to avoid conflicts.
+ GIT_REPO_DIR=$(pwd)/
+ GIT_SOLR_CONF_SUBDIR=solr_confs/metadata/conf/
+ TARGET_SOLR_CONF_ROOT_DIR=/opt/solr/solr_configurations/
+ PULL_REQUEST_PREFIX=pull_request_
+}
+
+function declare_multiple_environments_fields() {
+ #The server has to have a zookeeper running for uploading the configuration.
+ ENVIRONMENT="LOCAL"
+ INDEX_ENVIRONMENT="PUBLISH"
+ BRANCH_OR_PR_NUMBER="master"
+ LOCAL_SOLR_SERVER=metis-sandbox-solr
+ LOCAL_ZOOKEEPER_SERVER=localhost
+ LOCAL_ZOOKEEPER_PORT="9983"
+ LOCAL_SOLR_PORT="8983"
+ LOCAL_SOLR_BINARIES_DIR=/opt/solr/
+ LOCAL_PUBLISH_COLLECTION=metis_sandbox_publish_local
+ LOCAL_PUBLISH_SOLR_CONF_DIR=local_publishConf
+}
+
+function set_chosen_environment_fields() {
+ #Initialize variables based on the chosen environment
+ TARGET_COMMAND_SERVER=${LOCAL_SOLR_SERVER}
+ ZOOKEEPER_PORT=${LOCAL_ZOOKEEPER_PORT}
+ SOLR_PORT=${LOCAL_SOLR_PORT}
+ SOLR_BINARIES_DIR=${LOCAL_SOLR_BINARIES_DIR}
+
+ COLLECTION_NAME=${LOCAL_PUBLISH_COLLECTION}
+ TARGET_SOLR_CONF_DIR=${LOCAL_PUBLISH_SOLR_CONF_DIR}
+}
+
+function print_chosen_environment_and_options() {
+ printf "%-40s \n" "Selected environment is:"
+ printf "%-40s %s\n" "Environment selected:" "${ENVIRONMENT}"
+ printf "%-40s %s\n" "Index environment selected:" "${INDEX_ENVIRONMENT}"
+ printf "%-40s %s\n" "Branch or PR specified:" "${BRANCH_OR_PR_NUMBER}"
+ printf "%-40s %s\n" "Server to execute update:" "${TARGET_COMMAND_SERVER}"
+ printf "%-40s %s\n" "Collection name chosen:" "${COLLECTION_NAME}"
+ printf "%-40s %s\n" "Target solr configuration directory:" "${TARGET_SOLR_CONF_DIR}"
+ printf "%-40s %s\n" "Zookeeper port:" "${ZOOKEEPER_PORT}"
+ printf "%-40s %s\n" "Solr port:" "${SOLR_PORT}"
+}
+
+function git_get_selected_branch() {
+ #Check first if there is a branch
+ git -C "${GIT_REPO_DIR}" checkout "${BRANCH_OR_PR_NUMBER}"
+ if [ "$?" -ne "0" ]; then
+ printf "WARNING: Branch: %s, could not be found. Trying pull request..\n" "${BRANCH_OR_PR_NUMBER}"
+ #Verify first if the value is actually a number
+ number_regex='^[0-9]+$'
+ if ! [[ ${BRANCH_OR_PR_NUMBER} =~ ${number_regex} ]]; then
+ printf "ERROR: Value %s is not a number. Exiting..\n" "${BRANCH_OR_PR_NUMBER}"
+ exit 1
+ fi
+ git -C "${GIT_REPO_DIR}" fetch -u origin "pull/${BRANCH_OR_PR_NUMBER}/head:${PULL_REQUEST_PREFIX}${BRANCH_OR_PR_NUMBER}"
+ #Verify if BRANCH_OR_PR_NUMBER specified could be fetched
+ if [ "$?" -ne "0" ]; then
+ printf "ERROR: Could not create branch from PR with number: %s. Exiting..\n" "${BRANCH_OR_PR_NUMBER}"
+ exit 1
+ fi
+ git -C "${GIT_REPO_DIR}" checkout "${PULL_REQUEST_PREFIX}${BRANCH_OR_PR_NUMBER}"
+ printf "Show git summary of PR:\n"
+ git -C "${GIT_REPO_DIR}" show --summary
+ else
+ git -C "${GIT_REPO_DIR}" pull
+ fi
+ COMMIT_HASH="$(git rev-parse --short "${BRANCH_OR_PR_NUMBER}" | tr -d '\n')"
+}
+
+function rsync_local_dir_to_remote_dir() {
+ #TODO This could be avoided and the config send directly to zookeeper. The downside would be that we always send all files.
+ local source="${GIT_REPO_DIR}${GIT_SOLR_CONF_SUBDIR}"
+ #local destination="$TARGET_COMMAND_SERVER:${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR}"
+ local destination="${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR}"
+ printf "Starting rsync from local directory: %s -> to directory: %s\n" "${source}" "${destination}"
+ mkdir -p "${destination}"
+ rsync --archive --compress --verbose --delete "${source}" "${destination}"
+}
+
+function git_remove_pull_request_branches() {
+ #Delete pull requests to avoid excessive diskspace
+ #Sed trims leading and trailing spaces
+ git checkout master
+ local pull_request_branches
+ pull_request_branches=$(git branch | sed 's/^ *//;s/ *$//' | grep ${PULL_REQUEST_PREFIX})
+
+ #Set the field separator to new line
+ IFS=$'\n'
+ for pull_request_branch in $pull_request_branches
+ do
+ printf "Delete pull request: %s\n" "${pull_request_branch}"
+ git branch --delete --force "${pull_request_branch}"
+ done
+ #Reset IFS
+ IFS=$' \t\n'
+}
+
+function zookeeper_find_current_and_old_configurations() {
+ printf "Check if there is current and old configuration.\n"
+ local zookeeper_command
+ zookeeper_command=$(zookeeper_create_command "-cmd ls /configs")
+
+ #Finds configurations of format #/configs/${COLLECTION_NAME}_ or auto created configurations such as /configs/${COLLECTION_NAME}.AUTOCREATED
+ #Temporary also remove the #/configs/${TARGET_SOLR_CONF_DIR}_ (the first sed group)
+ CURRENT_AND_OLD_CONFIGURATION_PATHS=$(eval "$(echo "${zookeeper_command} | sed -n 's/^\s*\(\/configs\/\(${TARGET_SOLR_CONF_DIR}_[^\/]*\|${COLLECTION_NAME}_[^\/]*\|${COLLECTION_NAME}.AUTOCREATED\)\)\s.*$/\1/p'")")
+ echo "current and old configurations: ${CURRENT_AND_OLD_CONFIGURATION_PATHS}"
+}
+
+function zookeeper_upload_and_apply_new_configuration() {
+ local date_stamp
+ date_stamp=$(date --iso-8601=seconds)
+ local new_configuration_name=${COLLECTION_NAME}_${COMMIT_HASH}_${date_stamp}
+
+ printf "Uploading zookeeper new configuration: %s\n" "${new_configuration_name}"
+ local zookeeper_command
+ zookeeper_command=$(zookeeper_create_command "-cmd upconfig --confdir ${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR} --confname ${new_configuration_name}")
+ $(echo "${zookeeper_command}")
+ #Update collection to take effect of the new configuration.
+ #Using MODIFYCOLLECTION instead of RELOAD command because MODIFYCOLLECTION will re-apply the mapping between the collection with the configuration name and then reload it. This helps to avoid the collection being mapped to another configuration name.
+ printf "Starting solr MODIFYCOLLECTION command\n"
+ $(echo "curl -v --get --data-urlencode collection=${COLLECTION_NAME} --data-urlencode collection.configName=${new_configuration_name} http://localhost:${SOLR_PORT}/solr/admin/collections?action=MODIFYCOLLECTION")
+}
+
+function zookeeper_remove_current_and_old_configurations() {
+ local zookeeper_command
+ #Set the field separator to new line
+ echo "Remove current and old configuration"
+ IFS=$'\n'
+ for configuration_path in $CURRENT_AND_OLD_CONFIGURATION_PATHS
+ do
+ printf "Removing zookeeper configuration: %s\n" "${configuration_path}"
+ zookeeper_command=$(zookeeper_create_command "-cmd clear $(echo "${configuration_path}")")
+ $(eval "${zookeeper_command}")
+ done
+ #Reset IFS
+ IFS=$' \t\n'
+}
+
+function zookeeper_create_command() {
+ #First argument should be the '-cmd' part onwards
+ local common_command_part="java -Dlog4j.configurationFile=file://${SOLR_BINARIES_DIR}server/resources/log4j2.xml -classpath .:${SOLR_BINARIES_DIR}server/lib/ext/*:${SOLR_BINARIES_DIR}server/solr-webapp/webapp/WEB-INF/lib/* org.apache.solr.cloud.ZkCLI -zkhost ${LOCAL_ZOOKEEPER_SERVER}:${ZOOKEEPER_PORT}"
+ local unique_command_part="${1}"
+ echo "${common_command_part} ${unique_command_part}"
+}
+
+function execute_remote_ssh_command(){
+ #ssh ${TARGET_COMMAND_SERVER} "${1}"
+ "${1}"
+}
+
+main "$@"
diff --git a/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java b/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java
index 50df7c76..5c69452d 100644
--- a/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java
+++ b/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java
@@ -38,6 +38,7 @@ AmazonS3 s3Client() {
return AmazonS3ClientBuilder
.standard()
.withCredentials(new AWSStaticCredentialsProvider(credentials))
+ .withPathStyleAccessEnabled(true)
.withEndpointConfiguration(
new EndpointConfiguration(endpoint, signingRegion))
.build();