diff --git a/scripts/performance/README.md b/scripts/performance/README.md
new file mode 100644
index 000000000000..d0740b379f1a
--- /dev/null
+++ b/scripts/performance/README.md
@@ -0,0 +1,288 @@
+# AWS CLI Performance Benchmarks
+
+This document outlines details of the AWS CLI performance benchmarks,
+including how to run benchmarks and how to add your own.
+
+## Running the Benchmarks
+
+Our benchmark executor works by running all benchmarks defined in
+`benchmarks.json`. For each benchmark defined in this JSON file, it
+runs the command for a configurable number of iterations (default: 1)
+and benchmarks metrics such as memory usage, CPU utilization, and
+timings.
+
+The benchmark executor also stubs an HTTP client with mock responses
+defined in `benchmarks.json`. This ensures the timings produced in
+the results reflect only the AWS CLI and **not** external factors
+such as service latency or network throughput.
+
+### Example
+
+The following example command runs the benchmarks defined in `benchmarks.json`,
+and executes each command 2 times.
+
+`./run-benchmark --result-dir ./results --num-iterations 2`
+
+An example output for this command is shown below.
+
+```json
+{
+ "results":[
+ {
+ "name":"s3.cp.upload",
+ "dimensions":[
+ {
+ "FileSize":"32MB"
+ },
+ {
+ "S3TransferClient":"Classic"
+ }
+ ],
+ "measurements":[
+ {
+ "total_time":0.2531106472015381,
+ "max_memory":76791808.0,
+ "max_cpu":5.0,
+ "p50_memory":51412992.0,
+ "p95_memory":75235328.0,
+ "p50_cpu":1.5,
+ "p95_cpu":2.4,
+ "first_client_invocation_time":0.24789667129516602
+ },
+ {
+ "total_time":0.17595314979553223,
+ "max_memory":76939264.0,
+ "max_cpu":6.2,
+ "p50_memory":52297728.0,
+ "p95_memory":75710464.0,
+ "p50_cpu":2.1,
+ "p95_cpu":2.5,
+ "first_client_invocation_time":0.17173004150390625
+ }
+ ]
+ },
+ {
+ "name":"s3.cp.upload",
+ "dimensions":[
+ {
+ "FileSize":"32MB"
+ },
+ {
+ "S3TransferClient":"CRT"
+ }
+ ],
+ "measurements":[
+ {
+ "total_time":0.7724411487579346,
+ "max_memory":81002496.0,
+ "max_cpu":4.1,
+ "p50_memory":78479360.0,
+ "p95_memory":80822272.0,
+ "p50_cpu":0.0,
+ "p95_cpu":2.4,
+ "first_client_invocation_time":0.17360806465148926
+ },
+ {
+ "total_time":0.6735439300537109,
+ "max_memory":80658432.0,
+ "max_cpu":5.2,
+ "p50_memory":78495744.0,
+ "p95_memory":80412672.0,
+ "p50_cpu":0.0,
+ "p95_cpu":2.4,
+ "first_client_invocation_time":0.17362713813781738
+ }
+ ]
+ },
+ {
+ "name":"s3.mv.upload",
+ "dimensions":[
+ {
+ "FileSize":"32MB"
+ }
+ ],
+ "measurements":[
+ {
+ "total_time":0.17440271377563477,
+ "max_memory":76972032.0,
+ "max_cpu":4.6,
+ "p50_memory":52166656.0,
+ "p95_memory":75776000.0,
+ "p50_cpu":2.1,
+ "p95_cpu":2.5,
+ "first_client_invocation_time":0.16981887817382812
+ },
+ {
+ "total_time":0.17231082916259766,
+ "max_memory":75825152.0,
+ "max_cpu":6.1,
+ "p50_memory":52199424.0,
+ "p95_memory":74842112.0,
+ "p50_cpu":2.1,
+ "p95_cpu":2.5,
+ "first_client_invocation_time":0.16803598403930664
+ }
+ ]
+ },
+ {
+ "name":"s3.mv.download",
+ "dimensions":[
+ {
+ "FileSize":"32MB"
+ },
+ {
+ "S3TransferClient":"Classic"
+ }
+ ],
+ "measurements":[
+ {
+ "total_time":0.17304229736328125,
+ "max_memory":76152832.0,
+ "max_cpu":4.0,
+ "p50_memory":52674560.0,
+ "p95_memory":74907648.0,
+ "p50_cpu":2.1,
+ "p95_cpu":2.4,
+ "first_client_invocation_time":0.16739511489868164
+ },
+ {
+ "total_time":0.16962409019470215,
+ "max_memory":76693504.0,
+ "max_cpu":4.9,
+ "p50_memory":52314112.0,
+ "p95_memory":75431936.0,
+ "p50_cpu":2.1,
+ "p95_cpu":2.6,
+ "first_client_invocation_time":0.16400408744812012
+ }
+ ]
+ },
+ {
+ "name":"s3.sync.upload",
+ "dimensions":[
+ {
+ "FileCount":"5,000"
+ },
+ {
+ "FileSize":"4KB"
+ },
+ {
+ "S3TransferClient":"Classic"
+ }
+ ],
+ "measurements":[
+ {
+ "total_time":11.370934963226318,
+ "max_memory":134578176.0,
+ "max_cpu":20.7,
+ "p50_memory":106397696.0,
+ "p95_memory":132235264.0,
+ "p50_cpu":2.4,
+ "p95_cpu":2.7,
+ "first_client_invocation_time":0.6362888813018799
+ },
+ {
+ "total_time":12.029011964797974,
+ "max_memory":134676480.0,
+ "max_cpu":18.6,
+ "p50_memory":105955328.0,
+ "p95_memory":131727360.0,
+ "p50_cpu":2.4,
+ "p95_cpu":2.7,
+ "first_client_invocation_time":0.6395571231842041
+ }
+ ]
+ },
+ {
+ "name":"s3.sync.upload",
+ "dimensions":[
+ {
+ "FileCount":"5,000"
+ },
+ {
+ "FileSize":"4KB"
+ },
+ {
+ "S3TransferClient":"CRT"
+ }
+ ],
+ "measurements":[
+ {
+ "total_time":90.28388690948486,
+ "max_memory":188809216.0,
+ "max_cpu":17.9,
+ "p50_memory":144375808.0,
+ "p95_memory":188792832.0,
+ "p50_cpu":0.0,
+ "p95_cpu":3.4,
+ "first_client_invocation_time":0.656865119934082
+ },
+ {
+ "total_time":84.99997591972351,
+ "max_memory":190808064.0,
+ "max_cpu":20.7,
+ "p50_memory":143917056.0,
+ "p95_memory":186728448.0,
+ "p50_cpu":0.0,
+ "p95_cpu":3.5,
+ "first_client_invocation_time":0.7549021244049072
+ }
+ ]
+ }
+ ]
+}
+```
+
+## Defining Your own Benchmarks for Local Performance Testing
+
+To create your own benchmark definitions, create a file on your machine containing
+a JSON-formatted list of benchmark definitions. Each benchmark definition supports
+the keys below. Each key is required unless specified otherwise.
+
+- `name` (string): The name of the benchmark.
+- `command` (list): The AWS CLI command to benchmark, including arguments.
+ - Each element of the list is a string component of the command.
+ - Example value: `["s3", "cp", "test_file", "s3://bucket/test_file", "--quiet"]`.
+- `dimensions` (list) **(optional)**: Used to specify additional dimensions for
+interpreting this metric.
+ - Each element in the list is an object with a single key-value pair.
+The key is the name of the dimension (e.g. `FileSize`), and the value
+is the value of the dimension (e.g. `32MB`).
+- `environment` (object) **(optional)**: Specifies settings for the environment to run
+the command in.
+ - The environment object supports the following keys:
+ - `files` (list) **(optional)**: Specifies the files that must be
+created before executing the benchmark. The files created will be filled with
+null bytes to achieve the specified size.
+ - Each element is an object with the following keys:
+ - `name` (string): Name of the file to create
+ - `size` (int): The size of the file to create in bytes.
+ - `file_dirs` (list) **(optional)**: Specifies the directories that must
+be created before executing the benchmark. The directories will be created
+and filled with the specified number of files, each of which will be filled
+with null bytes to achieve the specified file size.
+ - Each element is an object with the following keys:
+ - `name` (string): Name of the directory
+ - `file_count` (int): The number of files to create in the directory.
+ - `file_size` (int): The size of each file in the directory, in bytes.
+ - `config` (string) **(optional)**: The contents of the AWS config
+file to use for the benchmark execution.
+ - Default: `"[default]"`.
+ - Example value: `"[default]\ns3 =\n preferred_transfer_client = crt"`
+- `responses` (list) **(optional)**: A list of HTTP responses to stub from
+the service for each request made during command execution.
+ - Default: `[{{"headers": {}, "body": ""}]`
+ - Each element of the list is an object with the following keys:
+ - `status_code` (int) **(optional)**: The status code of the response.
+ - Default: `200`
+ - `headers` (object) **(optional)**: Used to specify the HTTP headers of
+the response. Each key-value pair corresponds to a single header name (key)
+and its value.
+ - Default: `{}`
+ - `body` (string) **(optional)**: The raw HTTP response.
+ - Default: `""`
+ - `instances` (int) **(optional)**: The total number of times to stub
+this response; this prevents the need to repeat the same response many times.
+ - Default: 1
+ - This is useful for commands such as `aws s3 sync`, that may execute many
+ HTTP requests with similar responses.
\ No newline at end of file
diff --git a/scripts/performance/benchmark-cp b/scripts/performance/benchmark-cp
deleted file mode 100755
index e63ae7cd8d56..000000000000
--- a/scripts/performance/benchmark-cp
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-from benchmark_utils import summarize, clean
-from benchmark_utils import get_default_argparser, get_transfer_command
-from benchmark_utils import create_random_subfolder, benchmark_command
-
-
-def benchmark_cp(args):
- destination = args.destination
- if args.recursive:
- destination = create_random_subfolder(destination)
- command = 'cp %s %s' % (args.source, destination)
- command = get_transfer_command(command, args.recursive, args.quiet)
-
- def cleanup():
- if not args.no_cleanup:
- clean(destination, args.recursive)
-
- benchmark_command(
- command, args.benchmark_script, args.summarize_script,
- args.result_dir, args.num_iterations, args.dry_run,
- cleanup=cleanup
- )
-
-
-if __name__ == "__main__":
- parser = get_default_argparser()
- parser.add_argument(
- '-s', '--source', required=True,
- help='A local path or s3 path.'
- )
- parser.add_argument(
- '-d', '--destination', required=True,
- help='A local path or s3 path. A directory will be created in this '
- 'location to copy to in the case of a recursive transfer.'
- )
- benchmark_cp(parser.parse_args())
diff --git a/scripts/performance/benchmark-mv b/scripts/performance/benchmark-mv
deleted file mode 100755
index b6e679425edd..000000000000
--- a/scripts/performance/benchmark-mv
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env python
-from benchmark_utils import backup, copy, clean, get_default_argparser
-from benchmark_utils import create_random_subfolder, benchmark_command
-from benchmark_utils import get_transfer_command
-
-
-def benchmark_mv(args):
- destination = args.destination
- if args.recursive:
- destination = create_random_subfolder(destination)
- command = 'mv %s %s' % (args.source, destination)
- command = get_transfer_command(command, args.recursive, args.quiet)
- backup_path = backup(args.source, args.recursive)
-
- def cleanup():
- if not args.no_cleanup:
- clean(destination, args.recursive)
- clean(backup_path, args.recursive)
-
- def upkeep():
- clean(args.source, args.recursive)
- copy(backup_path, args.source, args.recursive)
-
- benchmark_command(
- command, args.benchmark_script, args.summarize_script,
- args.result_dir, args.num_iterations, args.dry_run,
- upkeep=upkeep,
- cleanup=cleanup
- )
-
-
-if __name__ == "__main__":
- parser = get_default_argparser()
- parser.add_argument(
- '-s', '--source', required=True,
- help='A local path or s3 path.'
- )
- parser.add_argument(
- '-d', '--destination', required=True,
- help='A local path or s3 path. A directory will be created in this '
- 'location to move to in the case of a recursive transfer.'
- )
- benchmark_mv(parser.parse_args())
diff --git a/scripts/performance/benchmark-rm b/scripts/performance/benchmark-rm
deleted file mode 100755
index 16009c696cda..000000000000
--- a/scripts/performance/benchmark-rm
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-from benchmark_utils import benchmark_command, get_transfer_command
-from benchmark_utils import backup, copy, clean, get_default_argparser
-
-
-def benchmark_rm(args):
- command = get_transfer_command(
- 'rm %s' % args.target, args.recursive, args.quiet)
- backup_path = backup(args.target, args.recursive)
-
- benchmark_command(
- command, args.benchmark_script, args.summarize_script,
- args.result_dir, args.num_iterations, args.dry_run,
- upkeep=lambda: copy(backup_path, args.target, args.recursive),
- cleanup=lambda: clean(backup_path, args.recursive)
- )
-
-
-if __name__ == "__main__":
- parser = get_default_argparser()
- parser.add_argument('-t', '--target', required=True, help='An S3 path.')
- benchmark_rm(parser.parse_args())
diff --git a/scripts/performance/benchmark_utils.py b/scripts/performance/benchmark_utils.py
index da48ae372d81..f9b69d8ade0c 100644
--- a/scripts/performance/benchmark_utils.py
+++ b/scripts/performance/benchmark_utils.py
@@ -1,251 +1,396 @@
-import s3transfer
+import json
+import math
+import time
+import psutil
+
import os
-import subprocess
-import uuid
import shutil
-import argparse
-import tempfile
-
-
-def summarize(script, result_dir, summary_dir):
- """Run the given summary script on every file in the given directory.
-
- :param script: A summarization script that takes a list of csv files.
- :param result_dir: A directory containing csv performance result files.
- :param summary_dir: The directory to put the summary file in.
+from awscli.botocore.awsrequest import AWSResponse
+
+from unittest import mock
+from awscli.clidriver import AWSCLIEntryPoint, create_clidriver
+from awscli.compat import BytesIO
+
+
+class Summarizer:
+ DATA_INDEX_IN_ROW = {'time': 0, 'memory': 1, 'cpu': 2}
+
+ def __init__(self):
+ self._start_time = None
+ self._end_time = None
+ self._samples = []
+ self._sums = {
+ 'memory': 0.0,
+ 'cpu': 0.0,
+ }
+
+ def summarize(self, samples):
+ """Processes benchmark data from a dictionary."""
+ self._samples = samples
+ self._validate_samples(samples)
+ for idx, sample in enumerate(samples):
+ # If the sample is the first one, collect the start time.
+ if idx == 0:
+ self._start_time = self._get_time(sample)
+ self.process_data_sample(sample)
+ self._end_time = self._get_time(samples[-1])
+ metrics = self._finalize_processed_data_for_file(samples)
+ return metrics
+
+ def _validate_samples(self, samples):
+ if not samples:
+ raise RuntimeError(
+ 'Benchmark samples could not be processed. '
+ 'The samples list is empty'
+ )
+
+ def process_data_sample(self, sample):
+ self._add_to_sums('memory', sample['memory'])
+ self._add_to_sums('cpu', sample['cpu'])
+
+ def _finalize_processed_data_for_file(self, samples):
+ # compute percentiles
+ self._samples.sort(key=self._get_memory)
+ memory_p50 = self._compute_metric_percentile(50, 'memory')
+ memory_p95 = self._compute_metric_percentile(95, 'memory')
+ self._samples.sort(key=self._get_cpu)
+ cpu_p50 = self._compute_metric_percentile(50, 'cpu')
+ cpu_p95 = self._compute_metric_percentile(95, 'cpu')
+ max_memory = max(samples, key=self._get_memory)['memory']
+ max_cpu = max(samples, key=self._get_cpu)['cpu']
+ # format computed statistics
+ metrics = {
+ 'average_memory': self._sums['memory'] / len(samples),
+ 'average_cpu': self._sums['cpu'] / len(samples),
+ 'max_memory': max_memory,
+ 'max_cpu': max_cpu,
+ 'memory_p50': memory_p50,
+ 'memory_p95': memory_p95,
+ 'cpu_p50': cpu_p50,
+ 'cpu_p95': cpu_p95,
+ }
+ # reset samples array
+ self._samples.clear()
+ return metrics
+
+ def _compute_metric_percentile(self, percentile, name):
+ num_samples = len(self._samples)
+ p_idx = math.ceil(percentile*num_samples/100) - 1
+ return self._samples[p_idx][name]
+
+ def _get_time(self, sample):
+ return sample['time']
+
+ def _get_memory(self, sample):
+ return sample['memory']
+
+ def _get_cpu(self, sample):
+ return sample['cpu']
+
+ def _add_to_sums(self, name, data_point):
+ self._sums[name] += data_point
+
+
+class RawResponse(BytesIO):
"""
- summarize_args = [script]
- for f in os.listdir(result_dir):
- path = os.path.join(result_dir, f)
- if os.path.isfile(path):
- summarize_args.append(path)
-
- with open(os.path.join(summary_dir, 'summary.txt'), 'wb') as f:
- subprocess.check_call(summarize_args, stdout=f)
- with open(os.path.join(summary_dir, 'summary.json'), 'wb') as f:
- summarize_args.extend(['--output-format', 'json'])
- subprocess.check_call(summarize_args, stdout=f)
-
-
-def _get_s3transfer_performance_script(script_name):
- """Retrieves an s3transfer performance script if available."""
- s3transfer_directory = os.path.dirname(s3transfer.__file__)
- s3transfer_directory = os.path.dirname(s3transfer_directory)
- scripts_directory = 'scripts/performance'
- scripts_directory = os.path.join(s3transfer_directory, scripts_directory)
- script = os.path.join(scripts_directory, script_name)
-
- if os.path.isfile(script):
- return script
- else:
- return None
-
-
-def get_benchmark_script():
- return _get_s3transfer_performance_script('benchmark')
-
-
-def get_summarize_script():
- return _get_s3transfer_performance_script('summarize')
-
-
-def backup(source, recursive):
- """Backup a given source to a temporary location.
-
- :type source: str
- :param source: A local path or s3 path to backup.
-
- :type recursive: bool
- :param recursive: if True, the source will be treated as a directory.
+ A bytes-like streamable HTTP response representation.
"""
- if source[:5] == 's3://':
- parts = source.split('/')
- parts.insert(3, str(uuid.uuid4()))
- backup_path = '/'.join(parts)
- else:
- name = os.path.split(source)[-1]
- temp_dir = tempfile.mkdtemp()
- backup_path = os.path.join(temp_dir, name)
-
- copy(source, backup_path, recursive)
- return backup_path
-
+ def stream(self, **kwargs):
+ contents = self.read()
+ while contents:
+ yield contents
+ contents = self.read()
-def copy(source, destination, recursive):
- """Copy files from one location to another.
- The source and destination must both be s3 paths or both be local paths.
-
- :type source: str
- :param source: A local path or s3 path to backup.
-
- :type destination: str
- :param destination: A local path or s3 path to backup the source to.
-
- :type recursive: bool
- :param recursive: if True, the source will be treated as a directory.
+class StubbedHTTPClient(object):
"""
- if 's3://' in [source[:5], destination[:5]]:
- cp_args = ['aws', 's3', 'cp', source, destination, '--quiet']
- if recursive:
- cp_args.append('--recursive')
- subprocess.check_call(cp_args)
- return
-
- if recursive:
- shutil.copytree(source, destination)
- else:
- shutil.copy(source, destination)
-
-
-def clean(destination, recursive):
- """Delete a file or directory either locally or on S3."""
- if destination[:5] == 's3://':
- rm_args = ['aws', 's3', 'rm', '--quiet', destination]
- if recursive:
- rm_args.append('--recursive')
- subprocess.check_call(rm_args)
- else:
- if recursive:
- shutil.rmtree(destination)
- else:
- os.remove(destination)
-
-
-def create_random_subfolder(destination):
- """Create a random subdirectory in a given directory."""
- folder_name = str(uuid.uuid4())
- if destination.startswith('s3://'):
- parts = destination.split('/')
- parts.append(folder_name)
- return '/'.join(parts)
- else:
- parts = list(os.path.split(destination))
- parts.append(folder_name)
- path = os.path.join(*parts)
- os.makedirs(path)
- return path
-
-
-def get_transfer_command(command, recursive, quiet):
- """Get a full cli transfer command.
-
- Performs common transformations, e.g. adding --quiet
+ A generic stubbed HTTP client.
"""
- cli_command = 'aws s3 ' + command
-
- if recursive:
- cli_command += ' --recursive'
-
- if quiet:
- cli_command += ' --quiet'
- else:
- print(cli_command)
-
- return cli_command
-
-
-def benchmark_command(command, benchmark_script, summarize_script,
- output_dir, num_iterations, dry_run, upkeep=None,
- cleanup=None):
- """Benchmark several runs of a long-running command.
-
- :type command: str
- :param command: The full aws cli command to benchmark
-
- :type benchmark_script: str
- :param benchmark_script: A benchmark script that takes a command to run
- and outputs performance data to a file. This should be from s3transfer.
-
- :type summarize_script: str
- :param summarize_script: A summarization script that the output of the
- benchmark script. This should be from s3transfer.
-
- :type output_dir: str
- :param output_dir: The directory to output performance results to.
-
- :type num_iterations: int
- :param num_iterations: The number of times to run the benchmark on the
- command.
-
- :type dry_run: bool
- :param dry_run: Whether or not to actually run the benchmarks.
-
- :type upkeep: function that takes no arguments
- :param upkeep: A function that is run after every iteration of the
- benchmark process. This should be used for upkeep, such as restoring
- files that were deleted as part of the command executing.
+ def setup(self):
+ urllib3_session_send = 'botocore.httpsession.URLLib3Session.send'
+ self._urllib3_patch = mock.patch(urllib3_session_send)
+ self._send = self._urllib3_patch.start()
+ self._send.side_effect = self.get_response
+ self._responses = []
+
+ def tearDown(self):
+ self._urllib3_patch.stop()
+
+ def get_response(self, request):
+ response = self._responses.pop(0)
+ if isinstance(response, Exception):
+ raise response
+ return response
+
+ def add_response(self, body, headers, status_code):
+ response = AWSResponse(
+ url='http://169.254.169.254/',
+ status_code=status_code,
+ headers=headers,
+ raw=RawResponse(body.encode())
+ )
+ self._responses.append(response)
+
+
+class ProcessBenchmarker(object):
+ """
+ Periodically samples CPU and memory usage of a process given its pid.
+ """
+ def benchmark_process(self, pid, data_interval):
+ parent_pid = os.getpid()
+ try:
+ # Benchmark the process where the script is being run.
+ return self._run_benchmark(pid, data_interval)
+ except KeyboardInterrupt:
+ # If there is an interrupt, then try to clean everything up.
+ proc = psutil.Process(parent_pid)
+ procs = proc.children(recursive=True)
+
+ for child in procs:
+ child.terminate()
+
+ gone, alive = psutil.wait_procs(procs, timeout=1)
+ for child in alive:
+ child.kill()
+ raise
+
+ def _run_benchmark(self, pid, data_interval):
+ process_to_measure = psutil.Process(pid)
+ samples = []
+
+ while process_to_measure.is_running():
+ if process_to_measure.status() == psutil.STATUS_ZOMBIE:
+ process_to_measure.kill()
+ break
+ time.sleep(data_interval)
+ try:
+ # Collect the memory and cpu usage.
+ memory_used = process_to_measure.memory_info().rss
+ cpu_percent = process_to_measure.cpu_percent()
+ except (psutil.AccessDenied, psutil.ZombieProcess):
+ # Trying to get process information from a closed or
+ # zombie process will result in corresponding exceptions.
+ break
+ # Determine the lapsed time for bookkeeping
+ current_time = time.time()
+ samples.append({
+ "time": current_time, "memory": memory_used, "cpu": cpu_percent
+ })
+ return samples
+
+
+class BenchmarkHarness(object):
+ _DEFAULT_FILE_CONFIG_CONTENTS = "[default]"
- :type cleanup: function that takes no arguments
- :param cleanup: A function that is run at the end of the benchmark
- process or if there are any problems during the benchmark process.
- It should be uses for the final cleanup, such as deleting files that
- were created at some destination.
"""
- performance_dir = os.path.join(output_dir, 'performance')
- if os.path.exists(performance_dir):
- shutil.rmtree(performance_dir)
- os.makedirs(performance_dir)
-
- try:
- for i in range(num_iterations):
- out_file = 'performance%s.csv' % i
- out_file = os.path.join(performance_dir, out_file)
- benchmark_args = [
- benchmark_script, command, '--output-file', out_file
- ]
- if not dry_run:
- subprocess.check_call(benchmark_args)
- if upkeep is not None:
- upkeep()
-
- if not dry_run:
- summarize(summarize_script, performance_dir, output_dir)
- finally:
- if not dry_run and cleanup is not None:
- cleanup()
-
-
-def get_default_argparser():
- """Get an ArgumentParser with all the base benchmark arguments added in."""
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--no-cleanup', action='store_true', default=False,
- help='Do not remove the destination after the tests complete.'
- )
- parser.add_argument(
- '--recursive', action='store_true', default=False,
- help='Indicates that this is a recursive transfer.'
- )
- benchmark_script = get_benchmark_script()
- parser.add_argument(
- '--benchmark-script', default=benchmark_script,
- required=benchmark_script is None,
- help=('The benchmark script to run the commands with. This should be '
- 'from s3transfer.')
- )
- summarize_script = get_summarize_script()
- parser.add_argument(
- '--summarize-script', default=summarize_script,
- required=summarize_script is None,
- help=('The summarize script to run the commands with. This should be '
- 'from s3transfer.')
- )
- parser.add_argument(
- '-o', '--result-dir', default='results',
- help='The directory to output performance results to. Existing '
- 'results will be deleted.'
- )
- parser.add_argument(
- '--dry-run', default=False, action='store_true',
- help='If set, commands will only be printed out, not executed.'
- )
- parser.add_argument(
- '--quiet', default=False, action='store_true',
- help='If set, output is suppressed.'
- )
- parser.add_argument(
- '-n', '--num-iterations', default=1, type=int,
- help='The number of times to run the test.'
- )
- return parser
+ Orchestrates running benchmarks in isolated, configurable environments defined
+ via a specified JSON file.
+ """
+ def __init__(self):
+ self._summarizer = Summarizer()
+
+ def _get_default_env(self, config_file):
+ return {
+ 'AWS_CONFIG_FILE': config_file,
+ 'AWS_DEFAULT_REGION': 'us-west-2',
+ 'AWS_ACCESS_KEY_ID': 'access_key',
+ 'AWS_SECRET_ACCESS_KEY': 'secret_key'
+ }
+
+ def _create_file_with_size(self, path, size):
+ """
+ Creates a full-access file in the given directory with the
+ specified name and size. The created file will be full of
+ null bytes to achieve the specified size.
+ """
+ f = open(path, 'wb')
+ os.chmod(path, 0o777)
+ size = int(size)
+ f.truncate(size)
+ f.close()
+
+ def _create_file_dir(self, dir_path, file_count, size):
+ """
+ Creates a directory with the specified name. Also creates identical files
+ with the given size in the created directory. The number of identical files
+ to be created is specified by file_count. Each file will be full of
+ null bytes to achieve the specified size.
+ """
+ os.mkdir(dir_path, 0o777)
+ for i in range(int(file_count)):
+ file_path = os.path.join(dir_path, f'{i}')
+ self._create_file_with_size(file_path, size)
+
+ def _setup_iteration(
+ self,
+ benchmark,
+ client,
+ result_dir,
+ config_file
+ ):
+ """
+ Performs the environment setup for a single iteration of a
+ benchmark. This includes creating the files used by a
+ command and stubbing the HTTP client to use during execution.
+ """
+ # create necessary files for iteration
+ env = benchmark.get('environment', {})
+ if "files" in env:
+ for file_def in env['files']:
+ path = os.path.join(result_dir, file_def['name'])
+ self._create_file_with_size(path, file_def['size'])
+ if "file_dirs" in env:
+ for file_dir_def in env['file_dirs']:
+ dir_path = os.path.join(result_dir, file_dir_def['name'])
+ self._create_file_dir(
+ dir_path,
+ file_dir_def['file_count'],
+ file_dir_def['file_size']
+ )
+ # create config file at specified path
+ with open(config_file, 'w') as f:
+ f.write(env.get('config', self._DEFAULT_FILE_CONFIG_CONTENTS))
+ f.flush()
+ # setup and stub HTTP client
+ client.setup()
+ self._stub_responses(
+ benchmark.get('responses', [{"headers": {}, "body": ""}]),
+ client
+ )
+
+ def _stub_responses(self, responses, client):
+ """
+ Stubs the supplied HTTP client using the response instructions in the supplied
+ responses struct. Each instruction will generate one or more stubbed responses.
+ """
+ for response in responses:
+ body = response.get("body", "")
+ headers = response.get("headers", {})
+ status_code = response.get("status_code", 200)
+ # use the instances key to support duplicating responses a configured number of times
+ if "instances" in response:
+ for _ in range(int(response['instances'])):
+ client.add_response(body, headers, status_code)
+ else:
+ client.add_response(body, headers, status_code)
+
+ def _run_command_with_metric_hooks(self, cmd, result_dir):
+ """
+ Runs a CLI command and logs CLI-specific metrics to a file.
+ """
+ first_client_invocation_time = None
+ start_time = time.time()
+ driver = create_clidriver()
+ event_emitter = driver.session.get_component('event_emitter')
+
+ def _log_invocation_time(params, request_signer, model, **kwargs):
+ nonlocal first_client_invocation_time
+ if first_client_invocation_time is None:
+ first_client_invocation_time = time.time()
+
+ event_emitter.register_last(
+ 'before-call',
+ _log_invocation_time,
+ 'benchmarks.log-invocation-time'
+ )
+ AWSCLIEntryPoint(driver).main(cmd)
+ end_time = time.time()
+
+ # write the collected metrics to a file
+ metrics_f = open(os.path.join(result_dir, 'metrics.json'), 'w')
+ metrics_f.write(json.dumps(
+ {
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'first_client_invocation_time': first_client_invocation_time
+ }
+ ))
+ metrics_f.close()
+ # terminate the process
+ os._exit(0)
+
+ def _run_isolated_benchmark(
+ self,
+ result_dir,
+ benchmark,
+ client,
+ process_benchmarker,
+ args
+ ):
+ """
+ Runs a single iteration of one benchmark execution. Includes setting up
+ the environment, running the benchmarked execution, formatting
+ the results, and cleaning up the environment.
+ """
+ assets_dir = os.path.join(result_dir, 'assets')
+ config_file = os.path.join(assets_dir, 'config')
+ os.makedirs(assets_dir, 0o777)
+ # setup for iteration of benchmark
+ self._setup_iteration(benchmark, client, result_dir, config_file)
+ os.chdir(result_dir)
+ # patch the OS environment with our supplied defaults
+ env_patch = mock.patch.dict('os.environ', self._get_default_env(config_file))
+ env_patch.start()
+ # fork a child process to run the command on.
+ # the parent process benchmarks the child process until the child terminates.
+ pid = os.fork()
+
+ try:
+ # execute command on child process
+ if pid == 0:
+ self._run_command_with_metric_hooks(benchmark['command'], result_dir)
+ # benchmark child process from parent process until child terminates
+ samples = process_benchmarker.benchmark_process(
+ pid,
+ args.data_interval
+ )
+ # summarize benchmark results and process summary
+ summary = self._summarizer.summarize(samples)
+ # load the child-collected metrics and append to the summary
+ metrics_f = json.load(open(os.path.join(result_dir, 'metrics.json'), 'r'))
+ summary['total_time'] = metrics_f['end_time'] - metrics_f['start_time']
+ summary['first_client_invocation_time'] = (metrics_f['first_client_invocation_time']
+ - metrics_f['start_time'])
+ finally:
+ # cleanup iteration of benchmark
+ client.tearDown()
+ shutil.rmtree(result_dir, ignore_errors=True)
+ os.makedirs(result_dir, 0o777)
+ env_patch.stop()
+ return summary
+
+ def run_benchmarks(self, args):
+ """
+ Orchestrates benchmarking via the benchmark definitions in
+ the arguments.
+ """
+ summaries = {'results': []}
+ result_dir = args.result_dir
+ client = StubbedHTTPClient()
+ process_benchmarker = ProcessBenchmarker()
+ definitions = json.load(open(args.benchmark_definitions, 'r'))
+ if os.path.exists(result_dir):
+ shutil.rmtree(result_dir)
+ os.makedirs(result_dir, 0o777)
+
+ try:
+ for benchmark in definitions:
+ benchmark_result = {
+ 'name': benchmark['name'],
+ 'dimensions': benchmark['dimensions'],
+ 'measurements': []
+ }
+ for _ in range(args.num_iterations):
+ measurements = self._run_isolated_benchmark(
+ result_dir,
+ benchmark,
+ client,
+ process_benchmarker,
+ args
+ )
+ benchmark_result['measurements'].append(measurements)
+ summaries['results'].append(benchmark_result)
+ finally:
+ # final cleanup
+ shutil.rmtree(result_dir, ignore_errors=True)
+ print(json.dumps(summaries, indent=2))
diff --git a/scripts/performance/benchmarks.json b/scripts/performance/benchmarks.json
new file mode 100644
index 000000000000..e23afb2a7dd4
--- /dev/null
+++ b/scripts/performance/benchmarks.json
@@ -0,0 +1,166 @@
+[
+ {
+ "name": "s3.cp.upload",
+ "command": ["s3", "cp", "test_file", "s3://bucket/test_file", "--quiet"],
+ "dimensions": [
+ {"FileSize": "32MB"},
+ {"S3TransferClient": "Classic"}
+ ],
+ "environment": {
+ "files": [
+ {
+ "name": "test_file",
+ "size": 3.2e7
+ }
+ ],
+ "config": "[default]\ns3 =\n preferred_transfer_client = classic"
+ },
+ "responses": [
+ {
+ "body": "bucket key upload-id"
+ },
+ {
+ "headers": {"ETag": "etag"},
+ "instances": 4
+ },
+ {
+ "body": "bucket key etag-123"
+ }
+ ]
+ },
+ {
+ "name": "s3.cp.upload",
+ "command": ["s3", "cp", "test_file", "s3://bucket/test_file", "--quiet"],
+ "dimensions": [
+ {"FileSize": "32MB"},
+ {"S3TransferClient": "CRT"}
+ ],
+ "environment": {
+ "files": [
+ {
+ "name": "test_file",
+ "size": 3.2e7
+ }
+ ],
+ "config": "[default]\ns3 =\n preferred_transfer_client = crt"
+ },
+ "responses": [
+ {
+ "body": "bucket key upload-id"
+ },
+ {
+ "headers": {"ETag": "etag"},
+ "instances": 4
+ },
+ {
+ "body": "bucket key etag-123"
+ }
+ ]
+ },
+ {
+ "name": "s3.mv.upload",
+ "command": ["s3", "mv", "test_file", "s3://bucket/test_file", "--quiet"],
+ "dimensions": [
+ {"FileSize": "32MB"}
+ ],
+ "environment": {
+ "files": [
+ {
+ "name": "test_file",
+ "size": 3.2e7
+ }
+ ]
+ },
+ "responses": [
+ {
+ "headers": {},
+ "body": "bucket key upload-id"
+ },
+ {
+ "headers": {"ETag": "etag"},
+ "instances": 4
+ },
+ {
+ "headers": {},
+ "body": "bucket key etag-123"
+ }
+ ]
+ },
+ {
+ "name": "s3.mv.download",
+ "command": ["s3", "mv", "s3://bucket/test_file123", "./test_file123", "--quiet"],
+ "dimensions": [
+ {"FileSize": "32MB"},
+ {"S3TransferClient": "Classic"}
+ ],
+ "environment": {
+ "config": "[default]\ns3 =\n preferred_transfer_client = classic"
+ },
+ "responses": [
+ {
+ "headers": {"Content-Length": "0", "Last-Modified": "Thu, 18 Oct 2018 23:00:00 GMT", "ETag": "etag-1"}
+ },
+ {
+ "headers": {"Content-Length": "7", "Last-Modified": "Thu, 18 Oct 2018 23:00:00 GMT", "ETag": "etag-1"},
+ "body": "content"
+ },
+ {}
+ ]
+ },
+ {
+ "name": "s3.sync.upload",
+ "command": ["s3", "sync", "./test_dir", "s3://bucket/test_dir", "--quiet"],
+ "dimensions": [
+ {"FileCount": "5,000"},
+ {"FileSize": "4KB"},
+ {"S3TransferClient": "Classic"}
+ ],
+ "environment": {
+ "file_dirs": [
+ {
+ "name": "test_dir",
+ "file_count": 5e3,
+ "file_size": 4e3
+ }
+ ],
+ "config": "[default]\ns3 =\n preferred_transfer_client = classic"
+ },
+ "responses": [
+ {
+ "body": "2015-12-08T18:26:43.000Z key 100 "
+ },
+ {
+ "headers": {"ETag": "etag"},
+ "instances": 5e3
+ }
+ ]
+ },
+ {
+ "name": "s3.sync.upload",
+ "command": ["s3", "sync", "./test_dir", "s3://bucket/test_dir", "--quiet"],
+ "dimensions": [
+ {"FileCount": "5,000"},
+ {"FileSize": "4KB"},
+ {"S3TransferClient": "CRT"}
+ ],
+ "environment": {
+ "file_dirs": [
+ {
+ "name": "test_dir",
+ "file_count": 5e3,
+ "file_size": 4e3
+ }
+ ],
+ "config": "[default]\ns3 =\n preferred_transfer_client = crt"
+ },
+ "responses": [
+ {
+ "body": "2015-12-08T18:26:43.000Z key 100 "
+ },
+ {
+ "headers": {"ETag": "etag"},
+ "instances": 5e3
+ }
+ ]
+ }
+]
diff --git a/scripts/performance/perfcmp b/scripts/performance/perfcmp
deleted file mode 100755
index d2b2c8378e87..000000000000
--- a/scripts/performance/perfcmp
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python
-"""Compare 2 perf runs.
-
-To use, specify the local directories that contain
-the run information::
-
- $ ./perfcmp /results/2016-01-01-1111/ /results/2016-01-01-2222/
-
-"""
-import os
-import json
-import argparse
-
-from colorama import Fore, Style
-from tabulate import tabulate
-
-
-class RunComparison(object):
-
- MEMORY_FIELDS = ['average_memory', 'max_memory']
- TIME_FIELDS = ['total_time']
- # Fields that aren't memory or time fields, they require
- # no special formatting.
- OTHER_FIELDS = ['average_cpu']
-
- def __init__(self, old_summary, new_summary):
- self.old_summary = old_summary
- self.new_summary = new_summary
-
- def iter_field_names(self):
- for field in self.TIME_FIELDS + self.MEMORY_FIELDS + self.OTHER_FIELDS:
- yield field
-
- def old(self, field):
- value = self.old_summary[field]
- return self._format(field, value)
-
- def old_suffix(self, field):
- value = self.old_summary[field]
- return self._format_suffix(field, value)
-
- def new_suffix(self, field):
- value = self.new_summary[field]
- return self._format_suffix(field, value)
-
- def _format_suffix(self, field, value):
- if field in self.TIME_FIELDS:
- return 'sec'
- elif field in self.OTHER_FIELDS:
- return ''
- else:
- # The suffix depends on the actual value.
- return self._human_readable_size(value)[1]
-
- def old_stddev(self, field):
- real_field = 'std_dev_%s' % field
- return self.old(real_field)
-
- def new(self, field):
- value = self.new_summary[field]
- return self._format(field, value)
-
- def new_stddev(self, field):
- real_field = 'std_dev_%s' % field
- return self.new(real_field)
-
- def _format(self, field, value):
- if field.startswith('std_dev_'):
- field = field[len('std_dev_'):]
- if field in self.MEMORY_FIELDS:
- return self._human_readable_size(value)[0]
- elif field in self.TIME_FIELDS:
- return '%-3.2f' % value
- else:
- return '%.2f' % value
-
- def _human_readable_size(self, value):
- hummanize_suffixes = ('KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB')
- base = 1024
- bytes_int = float(value)
-
- if bytes_int == 1:
- return '1 Byte'
- elif bytes_int < base:
- return '%d Bytes' % bytes_int
-
- for i, suffix in enumerate(hummanize_suffixes):
- unit = base ** (i+2)
- if round((bytes_int / unit) * base) < base:
- return ['%.2f' % (base * bytes_int / unit), suffix]
-
- def diff_percent(self, field):
- diff_percent = (
- (self.new_summary[field] - self.old_summary[field]) /
- float(self.old_summary[field])) * 100
- return diff_percent
-
-
-def compare_runs(old_dir, new_dir):
- for dirname in os.listdir(old_dir):
- old_run_dir = os.path.join(old_dir, dirname)
- new_run_dir = os.path.join(new_dir, dirname)
- if not os.path.isdir(old_run_dir):
- continue
- old_summary = get_summary(old_run_dir)
- new_summary = get_summary(new_run_dir)
- comp = RunComparison(old_summary, new_summary)
- header = [Style.BRIGHT + dirname + Style.RESET_ALL,
- Style.BRIGHT + 'old' + Style.RESET_ALL,
- # Numeric suffix (MiB, GiB, sec).
- '',
- 'std_dev',
- Style.BRIGHT + 'new' + Style.RESET_ALL,
- # Numeric suffix (MiB, GiB, sec).
- '',
- 'std_dev',
- Style.BRIGHT + 'delta' + Style.RESET_ALL]
- rows = []
- for field in comp.iter_field_names():
- row = [field, comp.old(field), comp.old_suffix(field),
- comp.old_stddev(field), comp.new(field),
- comp.new_suffix(field), comp.new_stddev(field)]
- diff_percent = comp.diff_percent(field)
- diff_percent_str = '%.2f%%' % diff_percent
- if diff_percent < 0:
- diff_percent_str = (
- Fore.GREEN + diff_percent_str + Style.RESET_ALL)
- else:
- diff_percent_str = (
- Fore.RED + diff_percent_str + Style.RESET_ALL)
- row.append(diff_percent_str)
- rows.append(row)
- print(tabulate(rows, headers=header, tablefmt='plain'))
- print('')
-
-
-def get_summary(benchmark_dir):
- summary_json = os.path.join(benchmark_dir, 'summary.json')
- with open(summary_json) as f:
- return json.load(f)
-
-
-def main():
- parser = argparse.ArgumentParser(description='__doc__')
- parser.add_argument('oldrunid', help='Path to old run idir')
- parser.add_argument('newrunid', help='Local to new run dir')
- args = parser.parse_args()
- compare_runs(args.oldrunid, args.newrunid)
-
-
-if __name__ == '__main__':
- main()
diff --git a/scripts/performance/run-benchmarks b/scripts/performance/run-benchmarks
new file mode 100755
index 000000000000..bce6f614929f
--- /dev/null
+++ b/scripts/performance/run-benchmarks
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+import argparse
+import os
+
+from scripts.performance.benchmark_utils import BenchmarkHarness
+
+
+_BENCHMARK_DEFINITIONS = os.path.join(
+ os.path.dirname(os.path.abspath(__file__)),
+ 'benchmarks.json'
+ )
+
+
+if __name__ == "__main__":
+ harness = BenchmarkHarness()
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--benchmark-definitions', default=_BENCHMARK_DEFINITIONS,
+ help=('The JSON file defining the commands to benchmark.')
+ )
+ parser.add_argument(
+ '-o', '--result-dir', default='results',
+ help='The directory to output performance results to. Existing '
+ 'results will be deleted.'
+ )
+ parser.add_argument(
+ '--data-interval',
+ default=0.001,
+ type=float,
+ help='The interval in seconds to poll for data points.',
+ )
+ parser.add_argument(
+ '--num-iterations',
+ default=1,
+ type=int,
+ help='The number of iterations to repeat the benchmark for.',
+ )
+ harness.run_benchmarks(parser.parse_args())