diff --git a/README.md b/README.md index 3f4163ff..5ec030a2 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,29 @@ Installing latest docker-compose: > chmod +x docker-compose-linux-x86_64 > sudo mv docker-compose-linux-x86_64 /usr/bin/docker-compose +### Local SOLR & Zookeeper server +**SOLR** is available on http://localhost:8983 running in cloud mode with one node. +The configured collection is `metis_sandbox_publish_local` +``` +sandbox: + solr: + hosts: http://localhost:8983/solr/metis_sandbox_publish_local +``` +**Zookeeper** is available on http://localhost:9983 + +### S3 bucket with localstack +Use the following example configuration for a local S3 bucket +``` +sandbox: + s3: + access-key: bT3iWI27KcAQyLQCIOYT + secret-key: pMDcycDwMnKbLvkqa2Cxb2KJVeU1u67lE7Fb1Ie + endpoint: http://localhost:4566 + signing-region: eu-west-2 + thumbnails-bucket: metis-sandbox-bucket +``` +The above keys are for local development + ## API Composed by 2 endpoints diff --git a/docker-compose.yml b/docker-compose.yml index 3cdb72a7..882a8d4c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,7 @@ services: - ./src/main/resources/database/schema_lockrepository.sql:/docker-entrypoint-initdb.d/schema_lockrepository.sql - ./src/main/resources/database/schema_validation.sql:/docker-entrypoint-initdb.d/schema_validation.sql rabbitmq: - image: rabbitmq:3.9.12-management + image: rabbitmq:3.11.2-management-alpine container_name: metis-sandbox-rabbitmq environment: - RABBIT_DEFAULT_VHOST=/ @@ -28,7 +28,7 @@ services: - '5672:5672' - '15672:15672' mongo: - image: mongo:4.2.9 + image: mongo:6.0.12 container_name: metis-sandbox-mongo environment: MONGO_INITDB_DATABASE: metis-sandbox @@ -36,6 +36,32 @@ services: MONGO_INITDB_ROOT_PASSWORD: guest ports: - '27017:27017' + solr: + build: + context: docker/solr/ + dockerfile: Dockerfile + container_name: metis-sandbox-solr + ports: + - "8983:8983" + - "9983:9983" + entrypoint: + - docker-entrypoint.sh + - solr + - start + - -c + - -f + + localstack-s3: + image: localstack/localstack:s3-latest + environment: + - DEBUG=${DEBUG:-0} + ports: + - "4566:4566" + volumes: + - "${LOCALSTACK_VOLUME_DIR:-./docker/volume}:/var/lib/localstack" + - "/var/run/docker.sock:/var/run/docker.sock" + - "./docker/localstack/bucket.sh:/etc/localstack/init/ready.d/bucket.sh" + metis-sandbox-local: image: europeana/metis-sandbox:develop container_name: metis-sandbox-local diff --git a/docker/localstack/bucket.sh b/docker/localstack/bucket.sh new file mode 100755 index 00000000..2d9be342 --- /dev/null +++ b/docker/localstack/bucket.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +pip install --upgrade pip +pip uninstall awscli +pip install awscli +pip uninstall awscli-local +pip install awscli-local + +export AWS_ACCESS_KEY_ID=bT3iWI27KcAQyLQCIOYT AWS_SECRET_ACCESS_KEY=pMDcycDwMnKbLvkqa2Cxb2KJVeU1u67lE7Fb1Ie +awslocal s3api create-bucket --bucket metis-sandbox-bucket diff --git a/docker/solr/Dockerfile b/docker/solr/Dockerfile new file mode 100644 index 00000000..17d3dd14 --- /dev/null +++ b/docker/solr/Dockerfile @@ -0,0 +1,23 @@ +FROM solr:7.7.3-slim +USER 0 +RUN apt-get update \ + && apt-get install git -y \ + && apt-get install rsync -y \ + && apt-get install curl -y \ + && apt-get install nano -y \ + && apt-get clean \ + && git clone https://github.com/europeana/search +COPY solr-schema.sh /opt/solr/search +COPY europeana-collection-aliasing-solr6.6.5-0.0.1-SNAPSHOT.jar /opt/solr/contrib/lib/europeana-collection-aliasing-solr6.6.5-0.0.1-SNAPSHOT.jar +RUN chown -R solr:solr /opt/solr/search \ + && chown -R solr:solr /opt/solr/contrib/lib \ + && chmod ug+x /opt/solr/search/solr-schema.sh +USER solr +RUN solr start -c \ + && solr create_collection -c metis_sandbox_publish_local -p 8983 \ + && solr stop \ + && mkdir -p /opt/solr/server/solr/metis_sandbox_publish_local_shard1_replica_n1/conf \ + && cp /opt/solr/search/solr_confs/metadata/conf/query_aliases.xml /opt/solr/server/solr/metis_sandbox_publish_local_shard1_replica_n1/conf/query_aliases.xml \ + && solr start -c \ + && cd /opt/solr/search && /opt/solr/search/solr-schema.sh \ + && solr stop diff --git a/docker/solr/solr-schema.sh b/docker/solr/solr-schema.sh new file mode 100755 index 00000000..d8f03d5c --- /dev/null +++ b/docker/solr/solr-schema.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +function main(){ + declare_common_fields + declare_multiple_environments_fields + set_chosen_environment_fields + print_chosen_environment_and_options + git_get_selected_branch # Should set the COMMIT_HASH + rsync_local_dir_to_remote_dir + git_remove_pull_request_branches + zookeeper_find_current_and_old_configurations + zookeeper_upload_and_apply_new_configuration #Expects COMMIT_HASH + zookeeper_remove_current_and_old_configurations +} + +function declare_common_fields() { + #We assume git is present on workspace from the configuration of the job. Ideally same git directory should not be accessible from multiple jobs, to avoid conflicts. + GIT_REPO_DIR=$(pwd)/ + GIT_SOLR_CONF_SUBDIR=solr_confs/metadata/conf/ + TARGET_SOLR_CONF_ROOT_DIR=/opt/solr/solr_configurations/ + PULL_REQUEST_PREFIX=pull_request_ +} + +function declare_multiple_environments_fields() { + #The server has to have a zookeeper running for uploading the configuration. + ENVIRONMENT="LOCAL" + INDEX_ENVIRONMENT="PUBLISH" + BRANCH_OR_PR_NUMBER="master" + LOCAL_SOLR_SERVER=metis-sandbox-solr + LOCAL_ZOOKEEPER_SERVER=localhost + LOCAL_ZOOKEEPER_PORT="9983" + LOCAL_SOLR_PORT="8983" + LOCAL_SOLR_BINARIES_DIR=/opt/solr/ + LOCAL_PUBLISH_COLLECTION=metis_sandbox_publish_local + LOCAL_PUBLISH_SOLR_CONF_DIR=local_publishConf +} + +function set_chosen_environment_fields() { + #Initialize variables based on the chosen environment + TARGET_COMMAND_SERVER=${LOCAL_SOLR_SERVER} + ZOOKEEPER_PORT=${LOCAL_ZOOKEEPER_PORT} + SOLR_PORT=${LOCAL_SOLR_PORT} + SOLR_BINARIES_DIR=${LOCAL_SOLR_BINARIES_DIR} + + COLLECTION_NAME=${LOCAL_PUBLISH_COLLECTION} + TARGET_SOLR_CONF_DIR=${LOCAL_PUBLISH_SOLR_CONF_DIR} +} + +function print_chosen_environment_and_options() { + printf "%-40s \n" "Selected environment is:" + printf "%-40s %s\n" "Environment selected:" "${ENVIRONMENT}" + printf "%-40s %s\n" "Index environment selected:" "${INDEX_ENVIRONMENT}" + printf "%-40s %s\n" "Branch or PR specified:" "${BRANCH_OR_PR_NUMBER}" + printf "%-40s %s\n" "Server to execute update:" "${TARGET_COMMAND_SERVER}" + printf "%-40s %s\n" "Collection name chosen:" "${COLLECTION_NAME}" + printf "%-40s %s\n" "Target solr configuration directory:" "${TARGET_SOLR_CONF_DIR}" + printf "%-40s %s\n" "Zookeeper port:" "${ZOOKEEPER_PORT}" + printf "%-40s %s\n" "Solr port:" "${SOLR_PORT}" +} + +function git_get_selected_branch() { + #Check first if there is a branch + git -C "${GIT_REPO_DIR}" checkout "${BRANCH_OR_PR_NUMBER}" + if [ "$?" -ne "0" ]; then + printf "WARNING: Branch: %s, could not be found. Trying pull request..\n" "${BRANCH_OR_PR_NUMBER}" + #Verify first if the value is actually a number + number_regex='^[0-9]+$' + if ! [[ ${BRANCH_OR_PR_NUMBER} =~ ${number_regex} ]]; then + printf "ERROR: Value %s is not a number. Exiting..\n" "${BRANCH_OR_PR_NUMBER}" + exit 1 + fi + git -C "${GIT_REPO_DIR}" fetch -u origin "pull/${BRANCH_OR_PR_NUMBER}/head:${PULL_REQUEST_PREFIX}${BRANCH_OR_PR_NUMBER}" + #Verify if BRANCH_OR_PR_NUMBER specified could be fetched + if [ "$?" -ne "0" ]; then + printf "ERROR: Could not create branch from PR with number: %s. Exiting..\n" "${BRANCH_OR_PR_NUMBER}" + exit 1 + fi + git -C "${GIT_REPO_DIR}" checkout "${PULL_REQUEST_PREFIX}${BRANCH_OR_PR_NUMBER}" + printf "Show git summary of PR:\n" + git -C "${GIT_REPO_DIR}" show --summary + else + git -C "${GIT_REPO_DIR}" pull + fi + COMMIT_HASH="$(git rev-parse --short "${BRANCH_OR_PR_NUMBER}" | tr -d '\n')" +} + +function rsync_local_dir_to_remote_dir() { + #TODO This could be avoided and the config send directly to zookeeper. The downside would be that we always send all files. + local source="${GIT_REPO_DIR}${GIT_SOLR_CONF_SUBDIR}" + #local destination="$TARGET_COMMAND_SERVER:${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR}" + local destination="${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR}" + printf "Starting rsync from local directory: %s -> to directory: %s\n" "${source}" "${destination}" + mkdir -p "${destination}" + rsync --archive --compress --verbose --delete "${source}" "${destination}" +} + +function git_remove_pull_request_branches() { + #Delete pull requests to avoid excessive diskspace + #Sed trims leading and trailing spaces + git checkout master + local pull_request_branches + pull_request_branches=$(git branch | sed 's/^ *//;s/ *$//' | grep ${PULL_REQUEST_PREFIX}) + + #Set the field separator to new line + IFS=$'\n' + for pull_request_branch in $pull_request_branches + do + printf "Delete pull request: %s\n" "${pull_request_branch}" + git branch --delete --force "${pull_request_branch}" + done + #Reset IFS + IFS=$' \t\n' +} + +function zookeeper_find_current_and_old_configurations() { + printf "Check if there is current and old configuration.\n" + local zookeeper_command + zookeeper_command=$(zookeeper_create_command "-cmd ls /configs") + + #Finds configurations of format #/configs/${COLLECTION_NAME}_ or auto created configurations such as /configs/${COLLECTION_NAME}.AUTOCREATED + #Temporary also remove the #/configs/${TARGET_SOLR_CONF_DIR}_ (the first sed group) + CURRENT_AND_OLD_CONFIGURATION_PATHS=$(eval "$(echo "${zookeeper_command} | sed -n 's/^\s*\(\/configs\/\(${TARGET_SOLR_CONF_DIR}_[^\/]*\|${COLLECTION_NAME}_[^\/]*\|${COLLECTION_NAME}.AUTOCREATED\)\)\s.*$/\1/p'")") + echo "current and old configurations: ${CURRENT_AND_OLD_CONFIGURATION_PATHS}" +} + +function zookeeper_upload_and_apply_new_configuration() { + local date_stamp + date_stamp=$(date --iso-8601=seconds) + local new_configuration_name=${COLLECTION_NAME}_${COMMIT_HASH}_${date_stamp} + + printf "Uploading zookeeper new configuration: %s\n" "${new_configuration_name}" + local zookeeper_command + zookeeper_command=$(zookeeper_create_command "-cmd upconfig --confdir ${TARGET_SOLR_CONF_ROOT_DIR}${TARGET_SOLR_CONF_DIR} --confname ${new_configuration_name}") + $(echo "${zookeeper_command}") + #Update collection to take effect of the new configuration. + #Using MODIFYCOLLECTION instead of RELOAD command because MODIFYCOLLECTION will re-apply the mapping between the collection with the configuration name and then reload it. This helps to avoid the collection being mapped to another configuration name. + printf "Starting solr MODIFYCOLLECTION command\n" + $(echo "curl -v --get --data-urlencode collection=${COLLECTION_NAME} --data-urlencode collection.configName=${new_configuration_name} http://localhost:${SOLR_PORT}/solr/admin/collections?action=MODIFYCOLLECTION") +} + +function zookeeper_remove_current_and_old_configurations() { + local zookeeper_command + #Set the field separator to new line + echo "Remove current and old configuration" + IFS=$'\n' + for configuration_path in $CURRENT_AND_OLD_CONFIGURATION_PATHS + do + printf "Removing zookeeper configuration: %s\n" "${configuration_path}" + zookeeper_command=$(zookeeper_create_command "-cmd clear $(echo "${configuration_path}")") + $(eval "${zookeeper_command}") + done + #Reset IFS + IFS=$' \t\n' +} + +function zookeeper_create_command() { + #First argument should be the '-cmd' part onwards + local common_command_part="java -Dlog4j.configurationFile=file://${SOLR_BINARIES_DIR}server/resources/log4j2.xml -classpath .:${SOLR_BINARIES_DIR}server/lib/ext/*:${SOLR_BINARIES_DIR}server/solr-webapp/webapp/WEB-INF/lib/* org.apache.solr.cloud.ZkCLI -zkhost ${LOCAL_ZOOKEEPER_SERVER}:${ZOOKEEPER_PORT}" + local unique_command_part="${1}" + echo "${common_command_part} ${unique_command_part}" +} + +function execute_remote_ssh_command(){ + #ssh ${TARGET_COMMAND_SERVER} "${1}" + "${1}" +} + +main "$@" diff --git a/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java b/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java index 50df7c76..5c69452d 100644 --- a/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java +++ b/src/main/java/eu/europeana/metis/sandbox/config/S3Config.java @@ -38,6 +38,7 @@ AmazonS3 s3Client() { return AmazonS3ClientBuilder .standard() .withCredentials(new AWSStaticCredentialsProvider(credentials)) + .withPathStyleAccessEnabled(true) .withEndpointConfiguration( new EndpointConfiguration(endpoint, signingRegion)) .build();