From 009e3afe4aa383cb0ecebe1bb2826444f1bd6889 Mon Sep 17 00:00:00 2001 From: untergeek Date: Mon, 26 Aug 2024 11:53:23 -0600 Subject: [PATCH] Major-minor-major changes What looks like a lot of changes here is mostly cosmetic Release bump to `1.1.2` Version bump `es_client==8.15.1` Updated `docker_test` contents to follow https://github.com/untergeek/es-docker-test-scripts Updated Dockerfile & `post4docker.py` to be more generic for easy sharing Updated Dockerfile to use more recent versions of Python and cx_Freeze Updated `pyproject.toml` to use new cx_Freeze methodology Updated dates in `LICENSE.txt` Updated `cli.py` to import `show_all_options` from `es_client` Updated `README.md` to correct several typos Removed `setup.py` as it was only for older cx_Freeze version Removed `src/es_fieldusage/helpers/` `client.py`, `logging.py` as `es_client` now provides them for us. Removed older files in `docker_test` that were superseded by the new version All other updates are pylint/black/mypy formatting updates. --- .gitignore | 7 + Dockerfile | 48 +++-- LICENSE.txt | 2 +- README.md | 32 ++-- docker_test/VERSION | 4 + docker_test/ansi_clean.bash | 7 + docker_test/common.bash | 243 ++++++++++++++++++++++++++ docker_test/create.sh | 181 +++++++++++++++++++ docker_test/destroy.sh | 30 ++++ docker_test/env_var.yaml | 8 + docker_test/scripts/Dockerfile.tmpl | 7 - docker_test/scripts/create.sh | 73 -------- docker_test/scripts/destroy.sh | 22 --- docker_test/scripts/small.options | 2 - post4docker.py | 12 +- pyproject.toml | 78 ++------- run_script.py | 10 +- setup.py | 11 -- src/es_fieldusage/cli.py | 135 ++++++-------- src/es_fieldusage/commands.py | 251 ++++++++++++++++++++------- src/es_fieldusage/defaults.py | 88 +++------- src/es_fieldusage/exceptions.py | 23 ++- src/es_fieldusage/helpers/client.py | 189 -------------------- src/es_fieldusage/helpers/logging.py | 114 ------------ src/es_fieldusage/helpers/utils.py | 52 ++---- src/es_fieldusage/main.py | 60 ++++--- src/es_fieldusage/version.py | 3 +- 27 files changed, 882 insertions(+), 810 deletions(-) create mode 100644 docker_test/VERSION create mode 100644 docker_test/ansi_clean.bash create mode 100644 docker_test/common.bash create mode 100755 docker_test/create.sh create mode 100755 docker_test/destroy.sh create mode 100644 docker_test/env_var.yaml delete mode 100755 docker_test/scripts/Dockerfile.tmpl delete mode 100755 docker_test/scripts/create.sh delete mode 100755 docker_test/scripts/destroy.sh delete mode 100644 docker_test/scripts/small.options delete mode 100644 setup.py delete mode 100644 src/es_fieldusage/helpers/client.py delete mode 100644 src/es_fieldusage/helpers/logging.py diff --git a/.gitignore b/.gitignore index 68bc17f..7c4ef7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +.vscode/ +.flake8 +mypy.ini +pylintrc.toml +pytest.ini +src/testing + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/Dockerfile b/Dockerfile index 77baf57..664d1a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,19 @@ # syntax=docker/dockerfile:1 -ARG PYVER=3.11.5 -ARG ALPTAG=3.17 -FROM python:${PYVER}-alpine${ALPTAG} as builder +ARG EXENAME=es-fieldusage +ARG EXEPATH=/exe_path +ARG EXECUTABLE=${EXEPATH}/${EXENAME} +ARG LDPATH=${EXEPATH}/lib +ARG CONFIGPATH=/.config +ARG PYVER=3.12.5 +ARG ALPTAG=3.20 +FROM python:${PYVER}-alpine${ALPTAG} AS builder # Add the community repo for access to patchelf binary package ARG ALPTAG RUN echo "https://dl-cdn.alpinelinux.org/alpine/v${ALPTAG}/community/" >> /etc/apk/repositories RUN apk --no-cache upgrade && apk --no-cache add build-base tar musl-utils openssl-dev patchelf # patchelf-wrapper is necessary now for cx_Freeze, but not for Curator itself. -RUN pip3 install setuptools cx_Freeze patchelf-wrapper +RUN pip3 install cx_Freeze patchelf-wrapper COPY . . # alpine4docker.sh does some link magic necessary for cx_Freeze execution @@ -19,14 +24,13 @@ COPY . . # ln -s /lib /lib64 RUN /bin/sh alpine4docker.sh -# Install Curator locally +# Install project locally RUN pip3 install . -# Build (or rather Freeze) Curator -RUN python3 setup.py build_exe +# Build (or rather Freeze) the project +RUN cxfreeze build -# This will add the cacert.pem from certifi to the default location Curator will look -# and also move 'build/exe.{system().lower()}-{machine()}-{MAJOR}.{MINOR}' to fieldusage_build +# Rename 'build/exe.{system().lower()}-{machine()}-{MAJOR}.{MINOR}' to curator_build RUN python3 post4docker.py ### End `builder` segment @@ -35,12 +39,28 @@ RUN python3 post4docker.py ARG ALPTAG FROM alpine:${ALPTAG} RUN apk --no-cache upgrade && apk --no-cache add openssl-dev expat -# The path `fieldusage_build` is from `builder` and `post4docker.py` -COPY --from=builder fieldusage_build /esfieldusage/ -RUN mkdir /.esfieldusage + +# The path `executable_build` is from `builder` and `post4docker.py` +ARG EXEPATH +COPY --from=builder executable_build ${EXEPATH}/ + # This is for the Docker default filepath override RUN mkdir /fileoutput +RUN chown nobody:nobody /fileoutput + +ARG CONFIGPATH +RUN mkdir ${CONFIGPATH} + +ARG LDPATH +ENV LD_LIBRARY_PATH=${LDPATH} + +# COPY entrypoint.sh / + +ARG EXECUTABLE +RUN echo '#!/bin/sh' > /entrypoint.sh +RUN echo >> /entrypoint.sh +RUN echo "${EXECUTABLE} \"\$@\"" >> /entrypoint.sh +RUN chmod +x /entrypoint.sh USER nobody:nobody -ENV LD_LIBRARY_PATH /esfieldusage/lib:$LD_LIBRARY_PATH -ENTRYPOINT ["/esfieldusage/es-fieldusage"] +ENTRYPOINT ["/entrypoint.sh"] diff --git a/LICENSE.txt b/LICENSE.txt index 1c1147c..d7154bd 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright 2023 Elasticsearch and contributors. +Copyright 2023-2024 Elasticsearch and contributors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index e691818..3acf467 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,17 @@ **Table of Contents** -- [Installation](#installation) -- [Description](#description) - - [Options](#top-level-help-output) - - [Command: stdout](#command-stdout-help-output) - - [Command: file](#command-file-help-output) - - [Command: show-indices](#command-show-indices-help-output) -- [Docker Usage](#docker-usage) -- [License](#license) +- [es-fieldusage](#es-fieldusage) + - [Installation](#installation) + - [Description](#description) + - [Top-level help output](#top-level-help-output) + - [Command: `stdout` help output](#command-stdout-help-output) + - [Command `file` help output](#command-file-help-output) + - [Command `show-indices` help output](#command-show-indices-help-output) + - [Docker usage](#docker-usage) + - [Docker build](#docker-build) + - [Docker run](#docker-run) + - [License](#license) ## Installation @@ -55,7 +58,7 @@ Options: --password TEXT Elasticsearch password --request_timeout FLOAT Request timeout in seconds --verify_certs / --no-verify_certs - Verify SSL/TLS certificate(s) [default: verify_certs] + Verify SSL/TLS certificate(s) --ca_certs TEXT Path to CA certificate file or directory --client_cert TEXT Path to client certificate file --client_key TEXT Path to client key file @@ -63,6 +66,7 @@ Options: Log level --logfile TEXT Log file --logformat [default|ecs] Log output format + --blacklist TEXT Named entities will not be logged -v, --version Show the version and exit. -h, --help Show this message and exit. @@ -70,7 +74,7 @@ Commands: show-all-options Show all configuration options stdout Output field usage information to the console - Learn more at https://github.com/untergeek/elastic-grab-bag/es_fieldusage + Learn more at https://github.com/untergeek/es-fieldusage ``` ### Command: `stdout` help output @@ -92,10 +96,10 @@ Options: --show-unaccessed / --hide-unaccessed Show unaccessed fields [default: hide-unaccessed] --show-counts / --hide-counts Show field access counts [default: hide-counts] - --delimiter TEXT Value delimiter if access counts are shown [default: :] + --delimiter TEXT Value delimiter if access counts are shown [default: ,] -h, --help Show this message and exit. - Learn more at https://github.com/untergeek/elastic-grab-bag/es_fieldusage + Learn more at https://github.com/untergeek/es-fieldusage ``` ### Command `file` help output @@ -121,7 +125,7 @@ Options: --delimiter TEXT Value delimiter if access counts are shown [default: ,] -h, --help Show this message and exit. - Learn more at https://github.com/untergeek/elastic-grab-bag/es_fieldusage + Learn more at https://github.com/untergeek/es-fieldusage ``` ### Command `show-indices` help output @@ -139,7 +143,7 @@ Usage: es-fieldusage show-indices SEARCH_PATTERN Options: -h, --help Show this message and exit. - Learn more at https://github.com/untergeek/elastic-grab-bag/es-fieldusage + Learn more at https://github.com/untergeek/es-fieldusage ``` ## Docker usage diff --git a/docker_test/VERSION b/docker_test/VERSION new file mode 100644 index 0000000..d091155 --- /dev/null +++ b/docker_test/VERSION @@ -0,0 +1,4 @@ +Version: 1.1.1 +Released: 24 August 2024 + +# License and Changelog at https://github.com/untergeek/es-docker-test-scripts diff --git a/docker_test/ansi_clean.bash b/docker_test/ansi_clean.bash new file mode 100644 index 0000000..9d131f9 --- /dev/null +++ b/docker_test/ansi_clean.bash @@ -0,0 +1,7 @@ +#!/bin/bash + +ansi_clean () { + # This function is separate so nobody touches the control-M sequence + # in the second sed stream filter + echo ${1} | sed -e 's/\x1b\[[0-9;]*m//g' -e 's/ //g' +} diff --git a/docker_test/common.bash b/docker_test/common.bash new file mode 100644 index 0000000..6dd4bed --- /dev/null +++ b/docker_test/common.bash @@ -0,0 +1,243 @@ +# Common variables and functions + +# Source the common.bash file from the same path as the script +source $(dirname "$0")/ansi_clean.bash + +#MANUAL_PROJECT_NAME=project_name +DOCKER_PORT=9200 +LOCAL_PORT=9200 +URL_HOST=127.0.0.1 +ESUSR=elastic +ENVFILE=.env +CURLFILE=.kurl +REPODOCKER=/media +REPOJSON=createrepo.json +REPONAME=testing +LIMIT=30 # How many seconds to wait to obtain the credentials +IMAGE=docker.elastic.co/elasticsearch/elasticsearch +MEMORY=1GB # The heap will be half of this + + +############################# +### Function declarations ### +############################# + +docker_logline () { + # Return the line number that contains "${1}" + echo $(docker logs ${NAME} | grep -n "${1}" | awk -F\: '{print $1}') +} + +get_espw () { + # Start with an empty value + linenum='' + + # Make a pretty spinner + spin='-\|/' + # spin modulo tracker + s=0 + # tenths incrementer (of a second) + tenths=0 + # tenths modulo tracker + t=0 + # seconds incrementer + seconds=0 + + # Loop until we get a valid line number, or LIMIT tries + while [ "x${linenum}" == "x" ] && [ $seconds -lt $LIMIT ]; do + + # increment $s and modulo 4 + s=$(( (s+1) %4 )) + # increment $tenths + ((++tenths)) + # increment $t and modulo 10 + t=$(( (t+1) %10 )) + + # if $t is 0 (it was evenly divisible by 10) + if [ $t -eq 0 ]; then + # we increment seconds, because 1 second has elapsed + ((++seconds)) + # Get the docker log line associated with elasticsearch-reset-password + linenum=$(docker_logline "elasticsearch-reset-password") + fi + + # Print the spinner to stderr (so it shows up) + printf "\r${spin:$s:1} ${seconds}s elapsed (typically 15s - 25s)..." >&2 + + # wait 1/10th of a second before looping again + sleep 0.1 + done + # end while loop + + # Error out if we didn't get it + if [ "x${linenum}" == "x" ] || [ $seconds -ge $LIMIT ]; then + echo "ERROR: Unable to get password for user ${ESUSR}. Unable to continue. Exiting..." + exit 1 + fi + + # Increment the linenum (because we want the next line) + ((++linenum)) + + # Get the (next) line, i.e. incremented and tailed to isolate + retval=$(docker logs ${NAME} | head -n ${linenum} | tail -1 | awk '{print $1}') + + # Strip the ANSI color/bold here. External function because of the control-M sequence + ESPWD=$(ansi_clean "${retval}") +} + +change_espw () { + + # To shorten the command-line, we put this as a variable + exec_cmd=/usr/share/elasticsearch/bin/elasticsearch-reset-password + + ################################################# + # The change password command: # + # docker exec -it ${1} ${exec_cmd} -b -u $ESUSR # + ################################################# + ############################################################################# + # Output 1: Not ready response: # + # ERROR: Failed to determine the health of the cluster. , with exit code 69 # + ############################################################################# + ####################################################### + # Output 2: Successful response: # + # Password for the [elastic] user successfully reset. # + # New value: NEW_PASSWORD # + ####################################################### + + # awk '{print $3}' of the "Not ready response" is "to" + # So we start with retval='to' + retval='to' + + # We're only going to try this to the $LIMIT + count=0 + + # Loop until we get the expected response, or LIMIT tries + while [ "x$retval" == "xto" ] && [ $count -lt $LIMIT ]; do + retval=$(docker exec -it ${NAME} $exec_cmd -b -u ${ESUSR} | tail -1 | awk '{print $3}') + ((++count)) + sleep 1 + done + + # If we still don't have a value, send an empty reponse back, rather than "to" + if [ "x${retval}" == "xto" ]; then + echo '' + else + echo ${retval} + fi +} + +xpack_fork () { + + echo + echo "Getting Elasticsearch credentials from container \"${NAME}\"..." + echo + + # Get the password from the change_espw function. It sets ESPWD + get_espw + + # If we have an empty value, that's a problem + if [ "x${ESPWD}" == "x" ]; then + echo "ERROR: Unable to get password for user ${ESUSR}. Unable to continue. Exiting..." + exit 1 + fi + + # Put envvars in ${ENVCFG} + echo "export ESCLIENT_USERNAME=${ESUSR}" >> ${ENVCFG} + echo "export TEST_USER=${ESUSR}" >> ${ENVCFG} + # We escape the quotes so we can include them in case of special characters + echo "export ESCLIENT_PASSWORD=\"${ESPWD}\"" >> ${ENVCFG} + echo "export TEST_PASS=\"${ESPWD}\"" >> ${ENVCFG} + + + # Get the CA certificate and copy it to the PROJECT_ROOT + docker cp -q ${NAME}:/usr/share/elasticsearch/config/certs/http_ca.crt ${PROJECT_ROOT} + + # Put the credentials into ${CURLCFG} + echo "-u ${ESUSR}:${ESPWD}" >> ${CURLCFG} + echo "--cacert ${CACRT}" >> ${CURLCFG} + + # Complete + echo "Credentials captured!" +} + +# Save original execution path +EXECPATH=$(pwd) + +# Extract the path for the script +SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" + +# Ensure we are in the script path +cd ${SCRIPTPATH} + +# Get the directory name +SCRIPTPATH_NAME=$(pwd | awk -F\/ '{print $NF}') + +# Go up a level +cd ../ + +# Find out what the last part of this directory is called +PROJECT_NAME=$(pwd | awk -F\/ '{print $NF}') + +# Manually override the project name, if specified +if [ "x${MANUAL_PROJECT_NAME}" != "x" ]; then + PROJECT_NAME=${MANUAL_PROJECT_NAME} +fi + +# We should be at the project root dir now +PROJECT_ROOT=$(pwd) + +if [ "${SCRIPTPATH_NAME}" != "docker_test" ]; then + echo "$0 is not in parent directory 'docker_test'" + echo "This could cause issues as that is expected." + echo "PROJECT_ROOT is now set to ${SCRIPTPATH}" + echo "You may want to set MANUAL_PROJECT_NAME in common.bash" + PROJECT_ROOT=${SCRIPTPATH} +fi + +# If we have a tests/integration path, then we'll use that +if [ -d "tests/integration" ]; then + TESTPATH=${PROJECT_ROOT}/tests/integration +else + # Otherwise we will just dump it into the $SCRIPTPATH + TESTPATH=${SCRIPTPATH} +fi + +# Set the CACRT var +CACRT=${PROJECT_ROOT}/http_ca.crt + +# Set the .env file +ENVCFG=${PROJECT_ROOT}/${ENVFILE} +rm -rf ${ENVCFG} + +# Set the curl config file and ensure we're not reusing an old one +CURLCFG=${SCRIPTPATH}/${CURLFILE} +rm -rf ${CURLCFG} + +# Determine local IPs +OS=$(uname -a | awk '{print $1}') +if [[ "$OS" = "Linux" ]]; then + IPLIST=$(ip -4 -o addr show scope global | grep -v docker |awk '{gsub(/\/.*/,"",$4); print $4}') +elif [[ "$OS" = "Darwin" ]]; then + IPLIST=$(ifconfig | awk -F "[: ]+" '/inet / { if ($2 != "127.0.0.1") print $2 }') +else + echo "Could not determine local IPs for assigning environment variables..." + echo "Please manually determine your local non-loopback IP address and assign it," + echo "e.g. TEST_ES_SERVER=https://A.B.C.D:${LOCAL_PORT}" + exit 0 +fi + +####################### +### Set Docker vars ### +####################### + +# Set the Docker container name +NAME=${PROJECT_NAME}-test + +# Set the bind mount path for the snapshot repository +REPOLOCAL=${SCRIPTPATH}/repo + +# Navigate back to the script path +cd ${SCRIPTPATH} + +################### +### END COMMON ### +################### diff --git a/docker_test/create.sh b/docker_test/create.sh new file mode 100755 index 0000000..cfcd7ad --- /dev/null +++ b/docker_test/create.sh @@ -0,0 +1,181 @@ +#!/bin/bash + + +# Source the common.bash file from the same path as the script +source $(dirname "$0")/common.bash + +echo + +# Test to see if we were passed a VERSION +if [ "x${1}" == "x" ]; then + echo "Error! No Elasticsearch version provided." + echo "VERSION must be in Semver format, e.g. X.Y.Z, 8.6.0" + echo "USAGE: ${0} VERSION" + exit 1 +fi + +# Set the version +VERSION=${1} + +###################################### +### Setup snapshot repository path ### +###################################### + +# Nuke it from orbit, just to be sure +rm -rf ${REPOLOCAL} +mkdir -p ${REPOLOCAL} + +##################### +### Run Container ### +##################### + +docker network rm -f ${NAME}-net > /dev/null 2>&1 +docker network create ${NAME}-net > /dev/null 2>&1 + +# Start the container +echo "Starting container \"${NAME}\" from ${IMAGE}:${VERSION}" +echo -en "Container ID: " +docker run -q -d -it --name ${NAME} --network ${NAME}-net -m ${MEMORY} \ + -p ${LOCAL_PORT}:${DOCKER_PORT} \ + -v ${REPOLOCAL}:${REPODOCKER} \ + -e "discovery.type=single-node" \ + -e "cluster.name=local-cluster" \ + -e "node.name=local-node" \ + -e "xpack.monitoring.templates.enabled=false" \ + -e "xpack.searchable.snapshot.shared_cache.size=50M" \ + -e "path.repo=${REPODOCKER}" \ +${IMAGE}:${VERSION} + +# Set the URL +URL=https://${URL_HOST}:${LOCAL_PORT} + +# Add TESTPATH to ${ENVCFG}, creating it or overwriting it +echo "export CA_CRT=${PROJECT_ROOT}/http_ca.crt" >> ${ENVCFG} +echo "export TEST_PATH=${TESTPATH}" >> ${ENVCFG} +echo "export TEST_ES_SERVER=${URL}" >> ${ENVCFG} +echo "export TEST_ES_REPO=${REPONAME}" >> ${ENVCFG} + +# Write some ESCLIENT_ environment variables to the .env file +echo "export ESCLIENT_CA_CERTS=${CACRT}" >> ${ENVCFG} +echo "export ESCLIENT_HOSTS=${URL}" >> ${ENVCFG} + +# Set up the curl config file, first line creates a new file, all others append +echo "-o /dev/null" > ${CURLCFG} +echo "-s" >> ${CURLCFG} +echo '-w "%{http_code}\n"' >> ${CURLCFG} + +# Do the xpack_fork function, passing the container name and the .env file path +xpack_fork "${NAME}" "${ENVCFG}" + +# Did we get a bad return code? +if [ $? -eq 1 ]; then + + # That's an error, and we need to exit + echo "ERROR! Unable to get/reset elastic user password. Unable to continue. Exiting..." + exit 1 +fi + +# We expect a 200 HTTP rsponse +EXPECTED=200 + +# Set the NODE var +NODE="${NAME} instance" + +# Start with an empty value +ACTUAL=0 + +# Initialize loop counter +COUNTER=0 + +# Loop until we get our 200 code +echo +while [ "${ACTUAL}" != "${EXPECTED}" ] && [ ${COUNTER} -lt ${LIMIT} ]; do + + # Get our actual response + ACTUAL=$(curl -K ${CURLCFG} ${URL}) + + # Report what we received + echo -en "\rHTTP status code for ${NODE} is: ${ACTUAL}" + + # If we got what we expected, we're great! + if [ "${ACTUAL}" == "${EXPECTED}" ]; then + echo " --- ${NODE} is ready!" + + else + # Otherwise sleep and try again + sleep 1 + ((++COUNTER)) + fi + +done +# End while loop + +# If we still don't have what we expected, we hit the LIMIT +if [ "${ACTUAL}" != "${EXPECTED}" ]; then + + echo "Unable to connect to ${URL} in ${LIMIT} seconds. Unable to continue. Exiting..." + exit 1 + +fi + +# Initialize trial license +echo +response=$(curl -s \ + --cacert ${CACRT} -u "${ESUSR}:${ESPWD}" \ + -XPOST "${URL}/_license/start_trial?acknowledge=true") + +expected='{"acknowledged":true,"trial_was_started":true,"type":"trial"}' +if [ "$response" != "$expected" ]; then + echo "ERROR! Unable to start trial license!" +else + echo -n "Trial license started and acknowledged. " +fi + +# Set up snapshot repository. The following will create a JSON file suitable for use with +# curl -d @filename + +rm -f ${REPOJSON} + +# Build a pretty JSON object defining the repository settings +echo '{' >> $REPOJSON +echo ' "type": "fs",' >> $REPOJSON +echo ' "settings": {' >> $REPOJSON +echo -n ' "location": "' >> $REPOJSON +echo -n "${REPODOCKER}" >> $REPOJSON +echo '"' >> $REPOJSON +echo ' }' >> $REPOJSON +echo '}' >> $REPOJSON + +# Create snapshot repository +response=$(curl -s \ + --cacert ${CACRT} -u "${ESUSR}:${ESPWD}" \ + -H 'Content-Type: application/json' \ + -XPOST "${URL}/_snapshot/${REPONAME}?verify=false" \ + --json \@${REPOJSON}) + +expected='{"acknowledged":true}' +if [ "$response" != "$expected" ]; then + echo "ERROR! Unable to create snapshot repository" +else + echo "Snapshot repository \"${REPONAME}\" created." + rm -f ${REPOJSON} +fi + + +################## +### Wrap it up ### +################## + +echo +echo "${NAME} container is up using image elasticsearch:${VERSION}" +echo "Ready to test!" +echo + +if [ "$EXECPATH" == "$PROJECT_ROOT" ]; then + echo "Environment variables are in .env" +elif [ "$EXECPATH" == "$SCRIPTPATH" ]; then + echo "\$PWD is $SCRIPTPATH." + echo "Environment variables are in ../.env" +else + echo "Environment variables are in ${PROJECT_ROOT}/.env" +fi diff --git a/docker_test/destroy.sh b/docker_test/destroy.sh new file mode 100755 index 0000000..c54859b --- /dev/null +++ b/docker_test/destroy.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Source the common.bash file from the same path as the script +source $(dirname "$0")/common.bash + +echo + +# Stop and remove the docker container +RUNNING=$(docker ps -f name=${NAME} | grep -v NAMES | awk '{print $NF}') +EXISTS=$(docker ps -af name=${NAME} | grep -v NAMES | awk '{print $NF}') +if [ "${RUNNING}" == "${NAME}" ]; then + echo "Stopping container ${NAME}..." + echo "$(docker stop ${NAME}) stopped." +fi +if [ "${EXISTS}" == "${NAME}" ]; then + echo "Removing container ${NAME}..." + echo "$(docker rm -f ${NAME}) deleted." +fi + +# Delete Docker network +docker network rm -f ${NAME}-net > /dev/null 2>&1 + +# Delete .env file and curl config file +echo "Deleting remaining files and directories" +rm -rf ${REPOLOCAL} +rm -f ${ENVCFG} +rm -f ${CURLCFG} +rm -f ${PROJECT_ROOT}/http_ca.crt + +echo "Cleanup complete." diff --git a/docker_test/env_var.yaml b/docker_test/env_var.yaml new file mode 100644 index 0000000..e2a17f1 --- /dev/null +++ b/docker_test/env_var.yaml @@ -0,0 +1,8 @@ +--- +elasticsearch: + client: + hosts: ${ESCLIENT_HOSTS} + ca_certs: ${ESCLIENT_CA_CERTS} + other_settings: + username: ${ESCLIENT_USERNAME} + password: ${ESCLIENT_PASSWORD} diff --git a/docker_test/scripts/Dockerfile.tmpl b/docker_test/scripts/Dockerfile.tmpl deleted file mode 100755 index 7675629..0000000 --- a/docker_test/scripts/Dockerfile.tmpl +++ /dev/null @@ -1,7 +0,0 @@ -# syntax=docker/dockerfile:experimental -ARG VERSION=ES_VERSION -FROM elasticsearch:${VERSION} - -COPY --chown=1000:0 small.options /usr/share/elasticsearch/config/jvm.options.d -ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] -CMD ["eswrapper"] diff --git a/docker_test/scripts/create.sh b/docker_test/scripts/create.sh deleted file mode 100755 index e6cd2ae..0000000 --- a/docker_test/scripts/create.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -IMAGE=es_fieldusage_test -RUNNAME=es_fieldusage_test8 -LOCAL_PORT=9200 -URL=http://127.0.0.1:${LOCAL_PORT} - -if [ "x$1" == "x" ]; then - echo "Error! No Elasticsearch version provided." - echo "VERSION must be in Semver format, e.g. X.Y.Z, 8.6.0" - echo "USAGE: $0 VERSION" - exit 1 -fi - -VERSION=$1 - -# Save original execution path -EXECPATH=$(pwd) - -# Extract the path for the script -SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - -# Navigate to the script, regardless of whether we were there -cd $SCRIPTPATH - -# Go up one directory -cd .. - -# Find out what the last part of this directory is called -UPONE=$(pwd | awk -F\/ '{print $NF}') - -# Check if the image has been built. If not, build it. -if [[ "$(docker images -q ${IMAGE}:${VERSION} 2> /dev/null)" == "" ]]; then - echo "Docker image ${IMAGE}:${VERSION} not found. Building from Dockerfile..." - cd $SCRIPTPATH - # Create a Dockerfile from the template - cat Dockerfile.tmpl | sed -e "s/ES_VERSION/${VERSION}/" > Dockerfile - docker build . -t ${IMAGE}:${VERSION} -fi - -### Launch the containers (plural, in 8.x) -echo -en "\rStarting ${RUNNAME} container... " -docker run -d --name ${RUNNAME} -p ${LOCAL_PORT}:9200 \ --e "discovery.type=single-node" \ --e "cluster.name=local-cluster" \ --e "node.name=local" \ --e "xpack.monitoring.templates.enabled=false" \ --e "path.repo=/media" \ --e "xpack.security.enabled=false" \ -${IMAGE}:${VERSION} - -### Check to make sure the ES instances are up and running -echo -echo "Waiting for Elasticsearch instance to become available..." -echo -EXPECTED=200 -NODE="${RUNNAME} instance" -ACTUAL=0 -while [ $ACTUAL -ne $EXPECTED ]; do - ACTUAL=$(curl -o /dev/null -s -w "%{http_code}\n" $URL) - echo -en "\rHTTP status code for $NODE is: $ACTUAL" - if [ $EXPECTED -eq $ACTUAL ]; then - echo " --- $NODE is ready!" - fi - sleep 1 -done - -# Done -echo -echo "Creation complete. ${RUNNAME} container is up using image ${IMAGE}:${VERSION}" - -echo -echo "Ready to test!" diff --git a/docker_test/scripts/destroy.sh b/docker_test/scripts/destroy.sh deleted file mode 100755 index c93c9d6..0000000 --- a/docker_test/scripts/destroy.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Stop and remove the docker container -docker stop es_fieldusage_test8 -docker rm es_fieldusage_test8 - -### Now begins the Dockerfile cleanup phase - -# Save original execution path -EXECPATH=$(pwd) - -# Extract the path for the script -SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - -# Navigate to the script, regardless of whether we were there -cd $SCRIPTPATH - -# Remove the created Dockerfile -rm -f Dockerfile - -echo "Cleanup complete." - diff --git a/docker_test/scripts/small.options b/docker_test/scripts/small.options deleted file mode 100644 index 7297a3b..0000000 --- a/docker_test/scripts/small.options +++ /dev/null @@ -1,2 +0,0 @@ --Xms512m --Xmx512m diff --git a/post4docker.py b/post4docker.py index 987408b..b0ec4b7 100644 --- a/post4docker.py +++ b/post4docker.py @@ -1,15 +1,13 @@ #!/usr/bin/env python3 +"""Post Docker 'build' phase script""" import shutil from platform import machine, system, python_version -import certifi + MAJOR, MINOR = tuple(python_version().split('.')[:-1]) SYSTEM = system().lower() BUILD = f'build/exe.{system().lower()}-{machine()}-{MAJOR}.{MINOR}' -CERT = certifi.where() -TARGET = 'fieldusage_build' - -# First copy the cert to BUILD -shutil.copy(CERT, BUILD) +TARGET = 'executable_build' -# Then rename the path of BUILD itself +# Rename the path of BUILD to be generic enough for Dockerfile to get +# In other words, rename it to 'curator_build' shutil.move(BUILD, TARGET) diff --git a/pyproject.toml b/pyproject.toml index 93f90a7..408538a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] keywords = [ 'elasticsearch', @@ -29,9 +30,7 @@ keywords = [ 'usage', ] dependencies = [ - "es_client==8.10.3", - "ecs-logging==2.1.0", - "six>=1.16.0", + "es_client==8.15.1" ] [project.optional-dependencies] @@ -76,24 +75,21 @@ cov = [ ] [[tool.hatch.envs.all.matrix]] -python = ["3.8", "3.9", "3.10", "3.11"] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] [tool.hatch.envs.lint] detached = true dependencies = [ "black>=23.1.0", "mypy>=1.0.0", - "ruff>=0.0.243", ] [tool.hatch.envs.lint.scripts] typing = "mypy --install-types --non-interactive {args:src/es_fieldusage tests}" style = [ - "ruff {args:.}", "black --check --diff {args:.}", ] fmt = [ "black {args:.}", - "ruff --fix {args:.}", "style", ] all = [ @@ -103,64 +99,9 @@ all = [ [tool.black] target-version = ["py38"] -line-length = 120 +line-length = 88 skip-string-normalization = true -[tool.ruff] -target-version = "py38" -line-length = 120 -select = [ - "A", - "ARG", - "B", - "C", - "DTZ", - "E", - "EM", - "F", - "FBT", - "I", - "ICN", - "ISC", - "N", - "PLC", - "PLE", - "PLR", - "PLW", - "Q", - "RUF", - "S", - "T", - "TID", - "UP", - "W", - "YTT", -] -ignore = [ - # Allow non-abstract empty methods in abstract base classes - "B027", - # Allow boolean positional values in function calls, like `dict.get(... True)` - "FBT003", - # Ignore checks for possible passwords - "S105", "S106", "S107", - # Ignore complexity - "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", -] -unfixable = [ - # Don't touch unused imports - "F401", -] - -[tool.ruff.isort] -known-first-party = ["es_fieldusage"] - -[tool.ruff.flake8-tidy-imports] -ban-relative-imports = "all" - -[tool.ruff.per-file-ignores] -# Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252"] - [tool.coverage.run] source_pkgs = ["es_fieldusage", "tests"] branch = true @@ -199,4 +140,13 @@ dependencies = [ [tool.distutils.build_exe] excludes = ["tcltk", "tkinter", "unittest"] -zip_include_packages = ["certifi"] \ No newline at end of file +zip_include_packages = ["certifi"] + +[tool.cxfreeze] +executables = [ + {script="run_script.py", target_name="es-fieldusage"} +] + +[tool.cxfreeze.build_exe] +excludes = ["tcltk", "tkinter", "unittest"] +zip_include_packages = ["encodings", "certifi"] \ No newline at end of file diff --git a/run_script.py b/run_script.py index 410c26b..f140420 100755 --- a/run_script.py +++ b/run_script.py @@ -1,12 +1,14 @@ #!/usr/bin/env python # pylint: disable=broad-except, no-value-for-parameter """ -Wrapper for running the command-line script from an installed module. +Wrapper for running the command-line script from an installed module. -Because this script is up one level from src, it has to find the module es_fieldusage.cli from -installed modules. This makes it a good way to ensure everything will work when installed. +Because this script is up one level from src, it has to find the module +es_fieldusage.cli from installed modules. This makes it a good way to ensure +everything will work when installed. -To test development in progress, use the local_test.py script in src (and you must be in src to execute) +To test development in progress, use the local_test.py script in src (and you +must be in src to execute) """ import sys import click diff --git a/setup.py b/setup.py deleted file mode 100644 index d26dc5a..0000000 --- a/setup.py +++ /dev/null @@ -1,11 +0,0 @@ -import sys -from cx_Freeze import setup, Executable - -# base="Win32GUI" should be used only for Windows GUI app -base = "Win32GUI" if sys.platform == "win32" else None - -setup( - executables=[ - Executable("run_script.py", base=base, target_name="es-fieldusage"), - ] -) diff --git a/src/es_fieldusage/cli.py b/src/es_fieldusage/cli.py index f50d449..dd19bad 100644 --- a/src/es_fieldusage/cli.py +++ b/src/es_fieldusage/cli.py @@ -1,54 +1,57 @@ """Command-line interface""" + import click -from es_client.helpers import utils as escl -from es_fieldusage.defaults import EPILOG, get_context_settings -from es_fieldusage.helpers.utils import cli_opts +from es_client.commands import show_all_options +from es_client.defaults import OPTION_DEFAULTS +from es_client.helpers import config as escl +from es_client.helpers.logging import configure_logging +from es_fieldusage.defaults import EPILOG from es_fieldusage.commands import file, show_indices, stdout from es_fieldusage.version import __version__ -ONOFF = {'on': '', 'off': 'no-'} -click_opt_wrap = escl.option_wrapper() -# pylint: disable=unused-argument, redefined-builtin -@click.group(context_settings=get_context_settings(), epilog=EPILOG) -@click_opt_wrap(*escl.cli_opts('config')) -@click_opt_wrap(*escl.cli_opts('hosts')) -@click_opt_wrap(*escl.cli_opts('cloud_id')) -@click_opt_wrap(*escl.cli_opts('api_token')) -@click_opt_wrap(*escl.cli_opts('id')) -@click_opt_wrap(*escl.cli_opts('api_key')) -@click_opt_wrap(*escl.cli_opts('username')) -@click_opt_wrap(*escl.cli_opts('password')) -@click_opt_wrap(*escl.cli_opts('bearer_auth')) -@click_opt_wrap(*escl.cli_opts('opaque_id')) -@click_opt_wrap(*escl.cli_opts('request_timeout')) -@click_opt_wrap(*escl.cli_opts('http_compress', onoff=ONOFF)) -@click_opt_wrap(*escl.cli_opts('verify_certs', onoff=ONOFF)) -@click_opt_wrap(*escl.cli_opts('ca_certs')) -@click_opt_wrap(*escl.cli_opts('client_cert')) -@click_opt_wrap(*escl.cli_opts('client_key')) -@click_opt_wrap(*escl.cli_opts('ssl_assert_hostname')) -@click_opt_wrap(*escl.cli_opts('ssl_assert_fingerprint')) -@click_opt_wrap(*escl.cli_opts('ssl_version')) -@click_opt_wrap(*escl.cli_opts('master-only', onoff=ONOFF)) -@click_opt_wrap(*escl.cli_opts('skip_version_test', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('loglevel')) -@click_opt_wrap(*cli_opts('logfile')) -@click_opt_wrap(*cli_opts('logformat')) +# pylint: disable=R0913,R0914,W0613,W0622 + + +@click.group(context_settings=escl.context_settings(), epilog=EPILOG) +@escl.options_from_dict(OPTION_DEFAULTS) @click.version_option(__version__, '-v', '--version', prog_name="es-fieldusage") @click.pass_context def run( - ctx, config, hosts, cloud_id, api_token, id, api_key, username, password, bearer_auth, - opaque_id, request_timeout, http_compress, verify_certs, ca_certs, client_cert, client_key, - ssl_assert_hostname, ssl_assert_fingerprint, ssl_version, master_only, skip_version_test, - loglevel, logfile, logformat + ctx, + config, + hosts, + cloud_id, + api_token, + id, + api_key, + username, + password, + bearer_auth, + opaque_id, + request_timeout, + http_compress, + verify_certs, + ca_certs, + client_cert, + client_key, + ssl_assert_hostname, + ssl_assert_fingerprint, + ssl_version, + master_only, + skip_version_test, + loglevel, + logfile, + logformat, + blacklist, ): """Elasticsearch Index Field Usage Reporting Tool - - Sum all field query/request access for one or more indices using the Elastic Field Usage API - (https://ela.st/usagestats) - Generate a report at the command-line with the stdout command for all indices in INDEX_PATTERN: + Sum all field query/request access for one or more indices using the Elastic + Field Usage API (https://ela.st/usagestats) + + Generate a report at the command-line with the stdout command for all indices + in INDEX_PATTERN: $ es-fieldusage stdout INDEX_PATTERN @@ -56,54 +59,16 @@ def run( $ es-fieldusage stdout 'index-*' """ - ctx.obj = {} + escl.get_config(ctx, quiet=False) + configure_logging(ctx) + escl.generate_configdict(ctx) -# Here is the ``show-all-options`` command, which does nothing other than set ``show=True`` for -# the hidden options in the top-level menu so they are exposed for the --help output. -@run.command(context_settings=get_context_settings(), short_help='Show all configuration options') -@click_opt_wrap(*escl.cli_opts('config')) -@click_opt_wrap(*escl.cli_opts('hosts')) -@click_opt_wrap(*escl.cli_opts('cloud_id')) -@click_opt_wrap(*escl.cli_opts('api_token')) -@click_opt_wrap(*escl.cli_opts('id')) -@click_opt_wrap(*escl.cli_opts('api_key')) -@click_opt_wrap(*escl.cli_opts('username')) -@click_opt_wrap(*escl.cli_opts('password')) -@click_opt_wrap(*escl.cli_opts('bearer_auth', show=True)) -@click_opt_wrap(*escl.cli_opts('opaque_id', show=True)) -@click_opt_wrap(*escl.cli_opts('request_timeout')) -@click_opt_wrap(*escl.cli_opts('http_compress', onoff=ONOFF, show=True)) -@click_opt_wrap(*escl.cli_opts('verify_certs', onoff=ONOFF)) -@click_opt_wrap(*escl.cli_opts('ca_certs')) -@click_opt_wrap(*escl.cli_opts('client_cert')) -@click_opt_wrap(*escl.cli_opts('client_key')) -@click_opt_wrap(*escl.cli_opts('ssl_assert_hostname', show=True)) -@click_opt_wrap(*escl.cli_opts('ssl_assert_fingerprint', show=True)) -@click_opt_wrap(*escl.cli_opts('ssl_version', show=True)) -@click_opt_wrap(*escl.cli_opts('master-only', onoff=ONOFF, show=True)) -@click_opt_wrap(*escl.cli_opts('skip_version_test', onoff=ONOFF, show=True)) -@click_opt_wrap(*cli_opts('loglevel')) -@click_opt_wrap(*cli_opts('logfile')) -@click_opt_wrap(*cli_opts('logformat')) -@click.version_option(__version__, '-v', '--version', prog_name="es-fieldusage") -@click.pass_context -def show_all_options( - ctx, config, hosts, cloud_id, api_token, id, api_key, username, password, bearer_auth, - opaque_id, request_timeout, http_compress, verify_certs, ca_certs, client_cert, client_key, - ssl_assert_hostname, ssl_assert_fingerprint, ssl_version, master_only, skip_version_test, - loglevel, logfile, logformat -): - """ - ALL CLIENT OPTIONS - - The following is the full list of settings available for configuring a connection using - command-line options. - """ - ctx = click.get_current_context() - click.echo(ctx.get_help()) - ctx.exit() -# Add the subcommands +# This is now included with es_client. It works, so ignore weird typing issues +run.add_command(show_all_options) # type: ignore + +# Add the local subcommands run.add_command(show_indices) run.add_command(file) +# run.add_command(index) # Not ready yet run.add_command(stdout) diff --git a/src/es_fieldusage/commands.py b/src/es_fieldusage/commands.py index 4d4e6b4..2dd79c8 100644 --- a/src/es_fieldusage/commands.py +++ b/src/es_fieldusage/commands.py @@ -1,26 +1,33 @@ """Sub-commands for Click CLI""" + import os +from datetime import datetime, timezone +import json import logging import click -from es_client.helpers import utils as escl -from es_fieldusage.defaults import FILEPATH_OVERRIDE, EPILOG, get_context_settings +from es_client.helpers import config as escl +from es_client.helpers.logging import is_docker +from es_client.helpers.utils import option_wrapper +from es_fieldusage.defaults import OPTS, FILEPATH_OVERRIDE, EPILOG from es_fieldusage.exceptions import FatalException -from es_fieldusage.helpers.client import get_args, get_client -from es_fieldusage.helpers.utils import cli_opts, is_docker, output_report +from es_fieldusage.helpers.utils import output_report from es_fieldusage.main import FieldUsage -LOGGER = logging.getLogger(__name__) +SHW = {'on': 'show-', 'off': 'hide-'} +TRU = {'default': True} +WRP = option_wrapper() + +# pylint: disable=R0913,R0914 -ONOFF = {'on': 'show-', 'off': 'hide-'} -click_opt_wrap = escl.option_wrapper() def get_per_index(field_usage, per_index): """Return the per_index data set for reporting""" + logger = logging.getLogger(__name__) if per_index: try: all_data = field_usage.per_index_report except Exception as exc: - LOGGER.critical('Unable to get per_index_report data: %s', exc) + logger.critical('Unable to get per_index_report data: %s', exc) raise FatalException from exc else: all_data = { @@ -31,6 +38,7 @@ def get_per_index(field_usage, per_index): } return all_data + def format_delimiter(value): """Return a formatted delimiter""" delimiter = '' @@ -42,18 +50,21 @@ def format_delimiter(value): delimiter = value return delimiter + def header_msg(msg, show): """Return the message to show if show is True""" if not show: msg = '' return msg + def printout(data, show_counts, raw_delimiter): """Print output to stdout based on the provided values""" for line in output_generator(data, show_counts, raw_delimiter): # Since the generator is adding newlines, we set nl=False here click.secho(line, nl=False) + def output_generator(data, show_counts, raw_delimiter): """Generate output iterator based on the provided values""" delimiter = format_delimiter(raw_delimiter) @@ -63,42 +74,53 @@ def output_generator(data, show_counts, raw_delimiter): line = f'{key}{delimiter}{value}' else: line = f'{key}' - # In order to write newlines to a file descriptor, they must be part of the line + # In order to write newlines to a file descriptor, they must be part of + # the line yield f'{line}\n' + def override_filepath(): """Override the default filepath if we're running Docker""" if is_docker(): return {'default': FILEPATH_OVERRIDE} return {} -@click.command(context_settings=get_context_settings(), epilog=EPILOG) -@click_opt_wrap(*cli_opts('report', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('headers', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('accessed', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('unaccessed', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('counts', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('delimiter')) + +@click.command(epilog=EPILOG) +@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('headers', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('counts', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('delimiter', settings=OPTS)) @click.argument('search_pattern', type=str, nargs=1) @click.pass_context def stdout( - ctx, show_report, show_headers, show_accessed, show_unaccessed, show_counts, delimiter, - search_pattern): + ctx, + show_report, + show_headers, + show_accessed, + show_unaccessed, + show_counts, + delimiter, + search_pattern, +): """ Display field usage information on the console for SEARCH_PATTERN $ es-fieldusage stdout [OPTIONS] SEARCH_PATTERN - This is powerful if you want to pipe the output through grep for only certain fields or - patterns: + This is powerful if you want to pipe the output through grep for only certain + fields or patterns: - $ es-fieldusage stdout --hide-report --hide-headers --show-unaccessed 'index-*' | grep process + $ es-fieldusage stdout --hide-report --hide-headers --show-unaccessed 'index-*' \ + | grep process """ - client_args, other_args = get_args(ctx.parent.params) + logger = logging.getLogger(__name__) try: - field_usage = FieldUsage(client_args, other_args, search_pattern) + field_usage = FieldUsage(ctx.obj['configdict'], search_pattern) except Exception as exc: - LOGGER.critical('Exception encountered: %s', exc) + logger.critical('Exception encountered: %s', exc) raise FatalException from exc if show_report: output_report(search_pattern, field_usage.report) @@ -111,36 +133,49 @@ def stdout( click.secho(msg, overline=show_headers, underline=show_headers, bold=True) printout(field_usage.report['unaccessed'], show_counts, delimiter) -@click.command(context_settings=get_context_settings(), epilog=EPILOG) -@click_opt_wrap(*cli_opts('report', onoff=ONOFF)) -@click_opt_wrap(*cli_opts('accessed', onoff=ONOFF, override={'default': True})) -@click_opt_wrap(*cli_opts('unaccessed', onoff=ONOFF, override={'default': True})) -@click_opt_wrap(*cli_opts('counts', onoff=ONOFF, override={'default': True})) -@click_opt_wrap(*cli_opts('index', {'on': 'per-', 'off': 'not-per-'})) -@click_opt_wrap(*cli_opts('filepath', override=override_filepath())) -@click_opt_wrap(*cli_opts('prefix')) -@click_opt_wrap(*cli_opts('suffix')) -@click_opt_wrap(*cli_opts('delimiter')) + +@click.command(epilog=EPILOG) +@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW, override=TRU)) +@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW, override=TRU)) +@WRP(*escl.cli_opts('counts', settings=OPTS, onoff=SHW, override=TRU)) +@WRP(*escl.cli_opts('index', settings=OPTS, onoff={'on': 'per-', 'off': 'not-per-'})) +@WRP(*escl.cli_opts('filepath', settings=OPTS, override=override_filepath())) +@WRP(*escl.cli_opts('prefix', settings=OPTS)) +@WRP(*escl.cli_opts('suffix', settings=OPTS)) +@WRP(*escl.cli_opts('delimiter', settings=OPTS)) @click.argument('search_pattern', type=str, nargs=1) @click.pass_context def file( - ctx, show_report, show_accessed, show_unaccessed, show_counts, per_index, filepath, prefix, - suffix, delimiter, search_pattern): + ctx, + show_report, + show_accessed, + show_unaccessed, + show_counts, + per_index, + filepath, + prefix, + suffix, + delimiter, + search_pattern, +): """ Write field usage information to file for SEARCH_PATTERN $ es_fieldusage file [OPTIONS] SEARCH_PATTERN - When writing to file, the filename will be {prefix}-{INDEXNAME}.{suffix} where INDEXNAME will - be the name of the index if the --per-index option is used, or 'all_indices' if not. + When writing to file, the filename will be {prefix}-{INDEXNAME}.{suffix} + where INDEXNAME will be the name of the index if the --per-index option is + used, or 'all_indices' if not. - This allows you to write to one file per index automatically, should that be your desire. + This allows you to write to one file per index automatically, should that + be your desire. """ - client_args, other_args = get_args(ctx.parent.params) + logger = logging.getLogger(__name__) try: - field_usage = FieldUsage(client_args, other_args, search_pattern) + field_usage = FieldUsage(ctx.obj['configdict'], search_pattern) except Exception as exc: - LOGGER.critical('Exception encountered: %s', exc) + logger.critical('Exception encountered: %s', exc) raise FatalException from exc if show_report: output_report(search_pattern, field_usage.report) @@ -153,16 +188,32 @@ def file( fname = f'{prefix}-{idx}.{suffix}' filename = os.path.join(filepath, fname) - # if the file already exists, remove it first so we don't append to old data below + # if the file already exists, remove it first so we don't append to old + # data below if os.path.exists(filename): os.remove(filename) + # JSON output can be done from a dictionary. In order to preserve the + # ability to show/hide accessed & unaccessed, I need a clean dictionary + output = {} files_written.append(fname) - for key, boolval in {'accessed': show_accessed, 'unaccessed': show_unaccessed}.items(): + for key, boolval in { + 'accessed': show_accessed, + 'unaccessed': show_unaccessed, + }.items(): if boolval: - generator = output_generator(all_data[idx][key], show_counts, delimiter) - with open(filename, 'a', encoding='utf-8') as fdesc: - fdesc.writelines(generator) + output.update(all_data[idx][key]) + if not suffix == 'json': + generator = output_generator( + all_data[idx][key], show_counts, delimiter + ) + with open(filename, 'a', encoding='utf-8') as fdesc: + fdesc.writelines(generator) + # Now we write output as a JSON object, if we selected that + if suffix == 'json': + with open(filename, 'a', encoding='utf-8') as fdesc: + json.dump(output, fdesc, indent=2) + fdesc.write('\n') click.secho('Number of files written: ', nl=False) click.secho(len(files_written), bold=True) click.secho('Filenames: ', nl=False) @@ -172,7 +223,88 @@ def file( else: click.secho(files_written, bold=True) -@click.command(context_settings=get_context_settings(), epilog=EPILOG) + +@click.command(epilog=EPILOG) +@WRP(*escl.cli_opts('report', settings=OPTS, onoff=SHW)) +@WRP(*escl.cli_opts('accessed', settings=OPTS, onoff=SHW, override=TRU)) +@WRP(*escl.cli_opts('unaccessed', settings=OPTS, onoff=SHW, override=TRU)) +@WRP(*escl.cli_opts('index', settings=OPTS, onoff={'on': 'per-', 'off': 'not-per-'})) +@WRP(*escl.cli_opts('indexname', settings=OPTS)) +@click.argument('search_pattern', type=str, nargs=1) +@click.pass_context +def index( + ctx, + show_report, + show_accessed, + show_unaccessed, + per_index, + indexname, + search_pattern, +): + """ + Write field usage information to file for SEARCH_PATTERN + + $ es_fieldusage index [OPTIONS] SEARCH_PATTERN + + This will write a document per fieldname per index found in SEARCH_PATTERN + to INDEXNAME, where the JSON structure is: + + { + "index": SOURCEINDEXNAME, + "field": { + "name": "FIELDNAME", + "count": COUNT + } + } + """ + logger = logging.getLogger(__name__) + logger.debug('indexname = %s', indexname) + timestamp = f"{datetime.now(timezone.utc).isoformat().split('.')[0]}.000Z" + try: + field_usage = FieldUsage(ctx.obj['configdict'], search_pattern) + except Exception as exc: + logger.critical('Exception encountered: %s', exc) + raise FatalException from exc + # client = field_usage.client + if show_report: + output_report(search_pattern, field_usage.report) + click.secho() + + all_data = get_per_index(field_usage, per_index) + + # TESTING + fname = 'testing' + filepath = os.getcwd() + filename = os.path.join(filepath, fname) + + # If the file already exists, remove it so we don't append to old data + if os.path.exists(filename): + os.remove(filename) + # END TESTING + + output = [] + for idx in list(all_data.keys()): + for key, boolval in { + 'accessed': show_accessed, + 'unaccessed': show_unaccessed, + }.items(): + if boolval: + for fieldname, value in all_data[idx][key].items(): + obj = { + '@timestamp': timestamp, + 'index': idx, + 'field': {'name': fieldname, 'count': value}, + } + output.append(obj) + + # TESTING + with open(filename, 'a', encoding='utf-8') as fdesc: + json.dump(output, fdesc, indent=2) + fdesc.write('\n') + # END TESTING + + +@click.command(epilog=EPILOG) @click.argument('search_pattern', type=str, nargs=1) @click.pass_context def show_indices(ctx, search_pattern): @@ -181,19 +313,14 @@ def show_indices(ctx, search_pattern): $ es-fieldusage show_indices SEARCH_PATTERN - This is included as a way to ensure you are seeing the indices you expect before using the file - or stdout commands. + This is included as a way to ensure you are seeing the indices you expect + before using the file or stdout commands. """ - client_args, other_args = get_args(ctx.parent.params) + logger = logging.getLogger(__name__) try: - client = get_client(configdict={ - 'elasticsearch': { - 'client': escl.prune_nones(client_args.asdict()), - 'other_settings': escl.prune_nones(other_args.asdict()) - } - }) + client = escl.get_client(configdict=ctx.obj['configdict']) except Exception as exc: - LOGGER.critical('Exception encountered: %s', exc) + logger.critical('Exception encountered: %s', exc) raise FatalException from exc cat = client.cat.indices(index=search_pattern, h='index', format='json') indices = [] @@ -201,15 +328,17 @@ def show_indices(ctx, search_pattern): indices.append(item['index']) indices.sort() # Output - ## Search Pattern + # Search Pattern click.secho('\nSearch Pattern', nl=False, overline=True, underline=True, bold=True) click.secho(f': {search_pattern}', bold=True) - ## Indices Found + # Indices Found if len(indices) == 1: click.secho('\nIndex Found', nl=False, overline=True, underline=True, bold=True) click.secho(f': {indices[0]}', bold=True) else: - click.secho(f'\n{len(indices)} ', overline=True, underline=True, bold=True, nl=False) + click.secho( + f'\n{len(indices)} ', overline=True, underline=True, bold=True, nl=False + ) click.secho('Indices Found', overline=True, underline=True, bold=True, nl=False) click.secho(': ') for idx in indices: diff --git a/src/es_fieldusage/defaults.py b/src/es_fieldusage/defaults.py index 23f00db..0bcd579 100644 --- a/src/es_fieldusage/defaults.py +++ b/src/es_fieldusage/defaults.py @@ -1,122 +1,74 @@ """Default values and constants""" + import os -from shutil import get_terminal_size -import click -from six import string_types -from voluptuous import All, Any, Coerce, Optional, Schema # pylint: disable=E1120 # This value is hard-coded in the Dockerfile, so don't change it - FILEPATH_OVERRIDE = '/fileoutput' +INDEXNAME = 'es-fieldusage' + EPILOG = 'Learn more at https://github.com/untergeek/es-fieldusage' HELP_OPTIONS = {'help_option_names': ['-h', '--help']} -CLI_OPTIONS = { - 'loglevel': { - 'help': 'Log level', - "type": click.Choice(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']) - }, - 'logfile': {'help': 'Log file', 'type': str}, - 'logformat': { - 'help': 'Log output format', - "type": click.Choice(['default', 'ecs']) - }, - 'report':{ +OPTS = { + 'report': { 'help': 'Show a summary report', 'default': True, 'show_default': True, }, - 'headers':{ + 'headers': { 'help': 'Show block headers for un|accessed fields', 'default': True, 'show_default': True, }, - 'accessed':{ + 'accessed': { 'help': 'Show accessed fields', 'default': False, 'show_default': True, }, - 'unaccessed':{ + 'unaccessed': { 'help': 'Show unaccessed fields', 'default': False, 'show_default': True, }, - 'counts':{ + 'counts': { 'help': 'Show field access counts', 'default': False, 'show_default': True, }, - 'delimiter':{ + 'delimiter': { 'help': 'Value delimiter if access counts are shown', 'type': str, 'default': ',', 'show_default': True, }, - 'index':{ + 'index': { 'help': 'Create one file per index found', 'default': False, 'show_default': True, }, - 'filepath':{ + 'indexname': { + 'help': 'Write results to named ES index', + 'default': INDEXNAME, + 'show_default': True, + }, + 'filepath': { 'help': 'Path where files will be written', 'default': os.getcwd(), 'show_default': True, }, - 'prefix':{ + 'prefix': { 'help': 'Filename prefix', 'default': 'es_fieldusage', 'show_default': True, }, - 'suffix':{ + 'suffix': { 'help': 'Filename suffix', 'default': 'csv', 'show_default': True, }, - 'show_hidden': {'help': 'Show all options', 'is_flag': True, 'default': False} + 'show_hidden': {'help': 'Show all options', 'is_flag': True, 'default': False}, } - -def click_options(): - """Return the max version""" - return CLI_OPTIONS - -# Configuration file: logging -def config_logging(): - """ - Logging schema with defaults: - - .. code-block:: yaml - - logging: - loglevel: INFO - logfile: None - logformat: default - blacklist: ['elastic_transport', 'urllib3'] - - :returns: A valid :py:class:`~.voluptuous.schema_builder.Schema` of all acceptable values with - the default values set. - :rtype: :py:class:`~.voluptuous.schema_builder.Schema` - """ - return Schema( - { - Optional('loglevel', default='INFO'): - Any(None, 'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL', - All(Coerce(int), Any(0, 10, 20, 30, 40, 50)) - ), - Optional('logfile', default=None): Any(None, *string_types), - Optional('logformat', default='default'): - Any(None, All(Any(*string_types), Any('default', 'ecs'))), - Optional('blacklist', default=['elastic_transport', 'urllib3']): Any(None, list), - } - ) - -def get_context_settings(): - """Return Click context settings dictionary""" - return {**get_width(), **HELP_OPTIONS} - -def get_width(): - """Determine terminal width""" - return {"max_content_width": get_terminal_size()[0]} diff --git a/src/es_fieldusage/exceptions.py b/src/es_fieldusage/exceptions.py index 834185a..39bf5e2 100644 --- a/src/es_fieldusage/exceptions.py +++ b/src/es_fieldusage/exceptions.py @@ -1,43 +1,56 @@ """es-fieldusage Exceptions""" -### Parent exception + +# Parent exception + + class FieldUsageException(Exception): """ Base class for all exceptions raised by the tool which are not Elasticsearch exceptions. """ -### + + +# Child exceptions + class ClientException(FieldUsageException): """ - Exception raised when the Elasticsearch client and/or connection is the source of the problem. + Exception raised when the Elasticsearch client and/or connection is the + source of the problem. """ + class ConfigurationException(FieldUsageException): """ Exception raised when there is a configuration error """ + class MissingArgument(ConfigurationException): """ Exception raised when a required argument or parameter is missing """ + class ResultNotExpected(ClientException): """ - Exception raised when return value from Elasticsearch API call is not or does not contain the - expected result. + Exception raised when return value from Elasticsearch API call is not or does + not contain the expected result. """ + class TimeoutException(FieldUsageException): """ Exception raised when a task has failed because the allotted time ran out """ + class ValueMismatch(ConfigurationException): """ Exception raised when a received value does not match what was expected. """ + class FatalException(FieldUsageException): """ Exception raised when the program should be halted. diff --git a/src/es_fieldusage/helpers/client.py b/src/es_fieldusage/helpers/client.py deleted file mode 100644 index 45d84a8..0000000 --- a/src/es_fieldusage/helpers/client.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Client builder helper functions""" -import logging -from es_client.builder import Builder, ClientArgs, OtherArgs -from es_client.defaults import CLIENT_SETTINGS, VERSION_MAX, VERSION_MIN -from es_client.exceptions import ConfigurationError -from es_client.helpers import utils as escl -from es_fieldusage.exceptions import ClientException, ConfigurationException -from es_fieldusage.helpers.logging import check_logging_config, set_logging - -def cloud_id_override(args, params, client_args): - """ - If hosts are in the config file, but cloud_id is specified at the command-line, - we need to remove the hosts parameter as cloud_id and hosts are mutually exclusive - """ - logger = logging.getLogger(__name__) - if params['cloud_id']: - logger.info('cloud_id from command-line superseding configuration file settings') - client_args.hosts = None - args.pop('hosts', None) - return args - -def hosts_override(args, params, client_args): - """ - If hosts are provided at the command-line, but cloud_id was in the config file, we need to - remove the cloud_id parameter from the config file-based dictionary before merging - """ - logger = logging.getLogger(__name__) - if params['hosts']: - logger.info('hosts from command-line superseding configuration file settings') - client_args.hosts = None - client_args.cloud_id = None - args.pop('cloud_id', None) - return args - -def configure_logging(params, config): - """Configure logging based on params and config - - Values in params will override anything set in config - """ - # Check for log settings from config file - init_logcfg = check_logging_config(config) - - # Override anything with options from the command-line - if params['loglevel']: - init_logcfg['loglevel'] = params['loglevel'] - if params['logfile']: - init_logcfg['logfile'] = params['logfile'] - if params['logformat']: - init_logcfg['logformat'] = params['logformat'] - # Now enable logging with the merged settings - set_logging(check_logging_config({'logging': init_logcfg})) - -def get_arg_objects(config): - """Return initial tuple of ClientArgs, OtherArgs - - They will be either empty, or with values from config - """ - client_args = ClientArgs() - other_args = OtherArgs() - if config: - validated_config = escl.check_config(config) - client_args.update_settings(validated_config['client']) - other_args.update_settings(validated_config['other_settings']) - return client_args, other_args - -def get_client( - configdict=None, configfile=None, autoconnect=False, version_min=VERSION_MIN, - version_max=VERSION_MAX): - """Get an Elasticsearch Client using :py:class:`es_client.Builder` - - Build a client out of settings from `configfile` or `configdict` - If neither `configfile` nor `configdict` is provided, empty defaults will be used. - If both are provided, `configdict` will be used, and `configfile` ignored. - - :param configdict: A configuration dictionary - :param configfile: A configuration file - :param autoconnect: Connect to client automatically - :param verion_min: Minimum acceptable version of Elasticsearch (major, minor, patch) - :param verion_max: Maximum acceptable version of Elasticsearch (major, minor, patch) - - :type configdict: dict - :type configfile: str - :type autoconnect: bool - :type version_min: tuple - :type version_max: tuple - - :returns: A client connection object - :rtype: :py:class:`~.elasticsearch.Elasticsearch` - """ - logger = logging.getLogger(__name__) - logger.debug('Creating client object and testing connection') - - builder = Builder( - configdict=configdict, configfile=configfile, autoconnect=autoconnect, - version_min=version_min, version_max=version_max - ) - - try: - builder.connect() - except Exception as exc: - logger.critical('Unable to establish client connection to Elasticsearch!') - logger.critical('Exception encountered: %s', exc) - raise ClientException from exc - - return builder.client - -def get_config(params): - """If params['config'] is a valid path, return the validated dictionary from the YAML""" - config = {'config':{}} # Set a default empty value - if params['config']: - config = escl.get_yaml(params['config']) - return config - -def get_hosts(params): - """Return hostlist for client object""" - logger = logging.getLogger(__name__) - hostslist = [] - if params['hosts']: - for host in list(params['hosts']): - try: - hostslist.append(escl.verify_url_schema(host)) - except ConfigurationError as err: - logger.error('Incorrect URL Schema: %s', err) - raise ConfigurationException from err - else: - hostslist = None - return hostslist - -def override_client_args(params, client_args): - """Override client_args settings with values from params""" - # cli_client = escl.prune_nones({ - # 'hosts': get_hosts(params), - # 'cloud_id': params['cloud_id'], - # 'bearer_auth': params['bearer_auth'], - # 'opaque_id': params['opaque_id'], - # 'request_timeout': params['request_timeout'], - # 'http_compress': params['http_compress'], - # 'verify_certs': params['verify_certs'], - # 'ca_certs': params['ca_certs'], - # 'client_cert': params['client_cert'], - # 'client_key': params['client_key'], - # 'ssl_assert_hostname': params['ssl_assert_hostname'], - # 'ssl_assert_fingerprint': params['ssl_assert_fingerprint'], - # 'ssl_version': params['ssl_version'] - # }) - args = {} - for key, value in params.items(): - if key in CLIENT_SETTINGS: - if key == 'hosts': - args[key] = get_hosts(params) - elif value is not None: - args[key] = value - args = cloud_id_override(args, params, client_args) - args = hosts_override(args, params, client_args) - args = escl.prune_nones(args) - # Update the object if we have settings to override after pruning None values - if args: - client_args.update_settings(args) - -def override_other_args(params, other_args): - """Override other_args settings with values from params""" - args = escl.prune_nones({ - 'master_only': params['master_only'], - 'skip_version_test': params['skip_version_test'], - 'username': params['username'], - 'password': params['password'], - 'api_key': { - 'id': params['id'], - 'api_key': params['api_key'], - 'token': params['api_token'], - } - }) - - # Remove `api_key` root key if `id` and `api_key` and `token` are all None - if params['id'] is None and params['api_key'] is None and params['api_token'] is None: - del args['api_key'] - - if args: - other_args.update_settings(args) - -def get_args(params): - """Return ClientArgs, OtherArgs tuple from params""" - config = get_config(params) - configure_logging(params, config) - client_args, other_args = get_arg_objects(config) - override_client_args(params, client_args) - override_other_args(params, other_args) - - return client_args, other_args diff --git a/src/es_fieldusage/helpers/logging.py b/src/es_fieldusage/helpers/logging.py deleted file mode 100644 index a29afb0..0000000 --- a/src/es_fieldusage/helpers/logging.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Logging helpers""" -import sys -import logging -import click -import ecs_logging -from es_client.helpers.schemacheck import SchemaCheck -from es_client.helpers.utils import ensure_list, prune_nones -from es_fieldusage.defaults import config_logging -from es_fieldusage.helpers.utils import is_docker - -class Whitelist(logging.Filter): - """How to whitelist logs""" - # pylint: disable=super-init-not-called - def __init__(self, *whitelist): - self.whitelist = [logging.Filter(name) for name in whitelist] - - def filter(self, record): - return any(f.filter(record) for f in self.whitelist) - -class Blacklist(Whitelist): - """Blacklist monkey-patch of Whitelist""" - def filter(self, record): - return not Whitelist.filter(self, record) - -class LogInfo: - """Logging Class""" - def __init__(self, cfg): - """Class Setup - - :param cfg: The logging configuration - :type: cfg: dict - """ - cfg['loglevel'] = 'INFO' if not 'loglevel' in cfg else cfg['loglevel'] - cfg['logfile'] = None if not 'logfile' in cfg else cfg['logfile'] - cfg['logformat'] = 'default' if not 'logformat' in cfg else cfg['logformat'] - #: Attribute. The numeric equivalent of ``cfg['loglevel']`` - self.numeric_log_level = getattr(logging, cfg['loglevel'].upper(), None) - #: Attribute. The logging format string to use. - self.format_string = '%(asctime)s %(levelname)-9s %(message)s' - - if not isinstance(self.numeric_log_level, int): - msg = f"Invalid log level: {cfg['loglevel']}" - print(msg) - raise ValueError(msg) - - #: Attribute. Which logging handler to use - if is_docker(): - self.handler = logging.FileHandler('/proc/1/fd/1') - else: - self.handler = logging.StreamHandler(stream=sys.stdout) - if cfg['logfile']: - self.handler = logging.FileHandler(cfg['logfile']) - - if self.numeric_log_level == 10: # DEBUG - self.format_string = ( - '%(asctime)s %(levelname)-9s %(name)22s %(funcName)22s:%(lineno)-4d %(message)s') - - if cfg['logformat'] == 'ecs': - self.handler.setFormatter(ecs_logging.StdlibFormatter()) - else: - self.handler.setFormatter(logging.Formatter(self.format_string)) - -def check_logging_config(config): - """ - Ensure that the top-level key ``logging`` is in ``config`` before passing it to - :py:class:`~.es_client.helpers.schemacheck.SchemaCheck` for value validation. - - :param config: Logging configuration data - - :type config: dict - - :returns: :py:class:`~.es_client.helpers.schemacheck.SchemaCheck` validated logging - configuration. - """ - - if not isinstance(config, dict): - click.echo( - f'Must supply logging information as a dictionary. ' - f'You supplied: "{config}" which is "{type(config)}"' - f'Using default logging values.' - ) - log_settings = {} - elif not 'logging' in config: - # None provided. Use defaults. - log_settings = {} - else: - if config['logging']: - log_settings = prune_nones(config['logging']) - else: - log_settings = {} - return SchemaCheck( - log_settings, config_logging(), 'Logging Configuration', 'logging').result() - -def set_logging(log_opts): - """Configure global logging options - - :param log_opts: Logging configuration data - - :type log_opts: dict - - :rtype: None - """ - # Set up logging - loginfo = LogInfo(log_opts) - logging.root.addHandler(loginfo.handler) - logging.root.setLevel(loginfo.numeric_log_level) - _ = logging.getLogger('redacter.cli') - # Set up NullHandler() to handle nested elasticsearch8.trace Logger - # instance in elasticsearch python client - logging.getLogger('elasticsearch8.trace').addHandler(logging.NullHandler()) - if log_opts['blacklist']: - for bl_entry in ensure_list(log_opts['blacklist']): - for handler in logging.root.handlers: - handler.addFilter(Blacklist(bl_entry)) diff --git a/src/es_fieldusage/helpers/utils.py b/src/es_fieldusage/helpers/utils.py index ffbe5c6..6dfe592 100644 --- a/src/es_fieldusage/helpers/utils.py +++ b/src/es_fieldusage/helpers/utils.py @@ -1,49 +1,22 @@ """Utility helper functions""" -import logging -from pathlib import Path from collections import defaultdict from functools import reduce from itertools import chain from operator import getitem, itemgetter import click -from es_fieldusage.defaults import click_options from es_fieldusage.exceptions import ConfigurationException -LOGGER = logging.getLogger(__name__) -NOPE = 'DONOTUSE' - -def cli_opts(value, onoff=None, override=None): - """ - In order to make building a Click interface more cleanly, this function returns all Click - option settings indicated by ``value``, both forming the lone argument (e.g. ``--option``), - and all key word arguments as a dict. - - The single arg is rendered as ``f'--{value}'``. Likewise, ``value`` is the key to extract - all keyword args from the supplied dictionary. - The facilities to override default values and show hidden values is added here. - For default value overriding, the NOPE constant is used as None and False are valid default - values - """ - if override is None: - override = {} - argval = f'--{value}' - if isinstance(onoff, dict): - try: - argval = f'--{onoff["on"]}{value}/--{onoff["off"]}{value}' - except KeyError as exc: - raise ConfigurationException from exc - # return (argval,), override_hidden(retval, show=show) - return (argval,), override_settings(click_options()[value], override) def convert_mapping(data, new_dict=None): """ - Convert an Elasticsearch mapping into a dictionary more closely approximating the one coming - from the field usage API. + Convert an Elasticsearch mapping into a dictionary more closely approximating + the one coming from the field usage API. Receive the mapping dict as ``data`` Strip out "properties" keys. They are not in the field_usage stats paths. - Set the value at the end of each dict path to 0 (we merge counts from field usage later) + Set the value at the end of each dict path to 0 (we merge counts from field + usage later) """ if new_dict is None: new_dict = {} @@ -58,23 +31,20 @@ def convert_mapping(data, new_dict=None): retval[key] = 0 return retval + def detuple(path): """If we used a tuple to access a dict path, we fix it to be a list again here""" if len(path) == 1 and isinstance(path[0], tuple): return list(path[0]) return path + def get_value_from_path(data, path): """ Return value from dict ``data``. Recreate all keys from list ``path`` """ return reduce(getitem, path, data) -def is_docker(): - """Check if we're running in a docker container""" - cgroup = Path('/proc/self/cgroup') - return Path('/.dockerenv').is_file() or ( - cgroup.is_file() and 'docker' in cgroup.read_text(encoding='utf-8')) def iterate_paths(data, path=None): """Recursively extract all paths from a dictionary""" @@ -88,9 +58,6 @@ def iterate_paths(data, path=None): else: yield newpath -def option_wrapper(): - """Return the click decorator passthrough function""" - return passthrough(click.option) def output_report(search_pattern, report): """Output summary report data to command-line/console""" @@ -98,7 +65,7 @@ def output_report(search_pattern, report): click.secho('\nSummary Report', overline=True, underline=True, bold=True) click.secho('\nSearch Pattern: ', nl=False) # Search Pattern - click.secho(search_pattern, bold=True) + click.secho(search_pattern, bold=True) # Indices Found if not isinstance(report['indices'], list): click.secho('Index Found: ', nl=False) @@ -120,6 +87,7 @@ def output_report(search_pattern, report): click.secho('Unaccessed Fields: ', nl=False) click.secho(len(report['unaccessed'].keys()), bold=True) + def override_settings(data, new_data): """Override keys in data with values matching in new_data""" if not isinstance(new_data, dict): @@ -129,18 +97,22 @@ def override_settings(data, new_data): data[key] = new_data[key] return data + def passthrough(func): """Wrapper to make it easy to store click configuration elsewhere""" return lambda a, k: func(*a, **k) + def sort_by_name(data): """Sort dictionary by key alphabetically""" return dict(sorted(data.items(), key=itemgetter(0))) + def sort_by_value(data): """Sort dictionary by root key value, descending""" return dict(sorted(data.items(), key=itemgetter(1), reverse=True)) + def sum_dict_values(data): """Sum the values of data dict(s) into a new defaultdict""" # Sets up result to have every dictionary key be an integer by default diff --git a/src/es_fieldusage/main.py b/src/es_fieldusage/main.py index ba50fba..290f518 100644 --- a/src/es_fieldusage/main.py +++ b/src/es_fieldusage/main.py @@ -1,22 +1,19 @@ """Main app definition""" -# pylint: disable=broad-exception-caught + import logging -from es_client.helpers.utils import prune_nones -from es_fieldusage.helpers.client import get_client -from es_fieldusage.helpers import utils +from es_client.helpers.config import get_client +from es_fieldusage.helpers import utils as u from es_fieldusage.exceptions import ResultNotExpected, ValueMismatch +# pylint: disable=R0902 + + class FieldUsage: - """It's the main class""" + """Main Class""" - def __init__(self, client_args, other_args, search_pattern): + def __init__(self, configdict, search_pattern): self.logger = logging.getLogger(__name__) - self.client = get_client(configdict={ - 'elasticsearch': { - 'client': prune_nones(client_args.asdict()), - 'other_settings': prune_nones(other_args.asdict()) - } - }) + self.client = get_client(configdict=configdict) self.usage_stats = {} self.indices_data = [] self.per_index_data = {} @@ -27,8 +24,8 @@ def __init__(self, client_args, other_args, search_pattern): def get(self, search_pattern): """ - Get ``raw_data`` from the field_usage_stats API for all indices in ``search_pattern`` - Iterate over ``raw_data`` to build ``self.usage_stats`` + Get ``raw_data`` from the field_usage_stats API for all indices in + ``search_pattern`` Iterate over ``raw_data`` to build ``self.usage_stats`` """ try: field_usage = self.client.indices.field_usage_stats(index=search_pattern) @@ -41,7 +38,10 @@ def get(self, search_pattern): self.usage_stats[index] = self.sum_index_stats(field_usage, index) def get_field_mappings(self, idx): - """Return only the field mappings for index ``idx`` (not the entire index mapping)""" + """ + Return only the field mappings for index ``idx`` (not the entire index + mapping) + """ return self.client.indices.get_mapping(index=idx)[idx]['mappings']['properties'] def populate_values(self, idx, data): @@ -58,7 +58,7 @@ def get_resultset(self, idx): """Populate a result set with the fields in the index mapping""" result = {} if idx in self.usage_stats: - allfields = utils.convert_mapping(self.get_field_mappings(idx)) + allfields = u.convert_mapping(self.get_field_mappings(idx)) result = self.populate_values(idx, allfields) return result @@ -66,9 +66,9 @@ def merge_results(self, idx): """Merge field usage data with index mapping""" retval = {} data = self.get_resultset(idx) - for path in utils.iterate_paths(data): - value = utils.get_value_from_path(data, path) - key = '.'.join(utils.detuple(path)) + for path in u.iterate_paths(data): + value = u.get_value_from_path(data, path) + key = '.'.join(u.detuple(path)) retval[key] = value return retval @@ -81,8 +81,8 @@ def verify_single_index(self, index=None): if isinstance(self.indices, list): if len(self.indices) > 1: msg = ( - f'Too many indices found. Indicate single index for result, or use ' - f'results for all indices. Found: {self.indices}' + f'Too many indices found. Indicate single index for result, ' + f'or use results for all indices. Found: {self.indices}' ) raise ValueMismatch(msg) if len(self.indices) < 1: @@ -126,13 +126,14 @@ def report(self): def result(self, idx=None): """Return a single index result as a dictionary""" idx = self.verify_single_index(index=idx) - return utils.sort_by_value(self.merge_results(idx)) + return u.sort_by_value(self.merge_results(idx)) @property def results_by_index(self): """ - Return all results as a dictionary, with the index name as the root key, and all stats for - that index as the value, which is a dictionary generated by ``self.result()``. + Return all results as a dictionary, with the index name as the root key, + and all stats for that index as the value, which is a dictionary generated + by ``self.result()``. """ if not self.per_index_data: if not isinstance(self.indices, list): @@ -148,8 +149,8 @@ def results(self): """Return results for all indices found with values summed per mapping leaf""" # The summing re-orders things so it needs to be re-sorted if not self.results_data: - self.results_data = dict( - utils.sort_by_value(utils.sum_dict_values(self.results_by_index))) + _ = u.sort_by_value(u.sum_dict_values(self.results_by_index)) + self.results_data = dict(_) return self.results_data @property @@ -163,17 +164,20 @@ def indices(self): def sum_index_stats(self, field_usage, idx): """Per field, sum all of the usage stats for all shards in ``idx``""" + def appender(result, field, value): - if not field in result: + if field not in result: result[field] = value else: result[field] += value return result + result = {} for shard in field_usage[idx]['shards']: for field in list(shard['stats']['fields'].keys()): if field in ['_id', '_source']: - # We don't care about these because these can be used by runtime queries + # We don't care about these because these can be used by + # runtime queries continue result = appender(result, field, shard['stats']['fields'][field]['any']) return result diff --git a/src/es_fieldusage/version.py b/src/es_fieldusage/version.py index 200044c..8d404f9 100644 --- a/src/es_fieldusage/version.py +++ b/src/es_fieldusage/version.py @@ -1,2 +1,3 @@ """es-fieldusage Version""" -__version__ = '1.1.1' + +__version__ = '1.1.2'