Skip to content

Commit

Permalink
Initial Import of Cryptofuzz from AWS-LC
Browse files Browse the repository at this point in the history
This directly imports the related CI source for Cryptofuzz from AWS-LC so that the ACCP specific changes show up better in PR and the source history.
  • Loading branch information
geedo0 committed Nov 10, 2023
1 parent 0baa47b commit 430b607
Show file tree
Hide file tree
Showing 8 changed files with 698 additions and 0 deletions.
131 changes: 131 additions & 0 deletions tests/ci/cdk/cdk/accp_github_fuzz_ci_stack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0 OR ISC

from aws_cdk import Duration, Size, Stack, aws_codebuild as codebuild, aws_iam as iam, aws_ec2 as ec2, aws_efs as efs
from constructs import Construct

from cdk.components import PruneStaleGitHubBuilds
from util.ecr_util import ecr_arn
from util.iam_policies import code_build_batch_policy_in_json, \
code_build_publish_metrics_in_json
from util.metadata import AWS_ACCOUNT, AWS_REGION, GITHUB_PUSH_CI_BRANCH_TARGETS, GITHUB_REPO_OWNER, GITHUB_REPO_NAME
from util.build_spec_loader import BuildSpecLoader


class AwsLcGitHubFuzzCIStack(Stack):
"""Define a stack used to batch execute AWS-LC tests in GitHub."""

def __init__(self,
scope: Construct,
id: str,
spec_file_path: str,
**kwargs) -> None:
super().__init__(scope, id, **kwargs)

# Define CodeBuild resource.
git_hub_source = codebuild.Source.git_hub(
owner=GITHUB_REPO_OWNER,
repo=GITHUB_REPO_NAME,
webhook=True,
webhook_filters=[
codebuild.FilterGroup.in_event_of(
codebuild.EventAction.PULL_REQUEST_CREATED,
codebuild.EventAction.PULL_REQUEST_UPDATED,
codebuild.EventAction.PULL_REQUEST_REOPENED),
codebuild.FilterGroup.in_event_of(codebuild.EventAction.PUSH).and_branch_is(
GITHUB_PUSH_CI_BRANCH_TARGETS),
],
webhook_triggers_batch_build=True)

# Define a IAM role for this stack.
code_build_batch_policy = iam.PolicyDocument.from_json(
code_build_batch_policy_in_json([id])
)
fuzz_policy = iam.PolicyDocument.from_json(code_build_publish_metrics_in_json())
inline_policies = {"code_build_batch_policy": code_build_batch_policy,
"fuzz_policy": fuzz_policy}
role = iam.Role(scope=self,
id="{}-role".format(id),
assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com"),
inline_policies=inline_policies)

# Create the VPC for EFS and CodeBuild
public_subnet = ec2.SubnetConfiguration(name="PublicFuzzingSubnet", subnet_type=ec2.SubnetType.PUBLIC)
private_subnet = ec2.SubnetConfiguration(name="PrivateFuzzingSubnet", subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS)

# Create a VPC with a single public and private subnet in a single AZ. This is to avoid the elastic IP limit
# being used up by a bunch of idle NAT gateways
fuzz_vpc = ec2.Vpc(
scope=self,
id="{}-FuzzingVPC".format(id),
subnet_configuration=[public_subnet, private_subnet],
max_azs=1
)
build_security_group = ec2.SecurityGroup(
scope=self,
id="{}-FuzzingSecurityGroup".format(id),
vpc=fuzz_vpc
)

build_security_group.add_ingress_rule(
peer=build_security_group,
connection=ec2.Port.all_traffic(),
description="Allow all traffic inside security group"
)

efs_subnet_selection = ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS)

# Create the EFS to store the corpus and logs. EFS allows new filesystems to burst to 100 MB/s for the first 2
# TB of data read/written, after that the rate is limited based on the size of the filesystem. As of late
# 2021 our corpus is less than one GB which results in EFS limiting all reads and writes to the minimum 1 MB/s.
# To have the fuzzing be able to finish in a reasonable amount of time use the Provisioned capacity option.
# For now this uses 100 MB/s which matches the performance used for 2021. Looking at EFS metrics in late 2021
# during fuzz runs EFS sees 4-22 MB/s of transfers thus 100 MB/s gives lots of buffer and allows ~4-5 fuzz runs
# to start at the same time with no issue.
# https://docs.aws.amazon.com/efs/latest/ug/performance.html
fuzz_filesystem = efs.FileSystem(
scope=self,
id="{}-FuzzingEFS".format(id),
file_system_name="AWS-LC-Fuzz-Corpus",
enable_automatic_backups=True,
encrypted=True,
security_group=build_security_group,
vpc=fuzz_vpc,
vpc_subnets=efs_subnet_selection,
performance_mode=efs.PerformanceMode.GENERAL_PURPOSE,
throughput_mode=efs.ThroughputMode.PROVISIONED,
provisioned_throughput_per_second=Size.mebibytes(100),
)

# Define CodeBuild.
fuzz_codebuild = codebuild.Project(
scope=self,
id="FuzzingCodeBuild",
project_name=id,
source=git_hub_source,
role=role,
timeout=Duration.minutes(120),
environment=codebuild.BuildEnvironment(compute_type=codebuild.ComputeType.LARGE,
privileged=True,
build_image=codebuild.LinuxBuildImage.STANDARD_4_0),
build_spec=BuildSpecLoader.load(spec_file_path),
vpc=fuzz_vpc,
security_groups=[build_security_group])
fuzz_codebuild.enable_batch_builds()

# CDK raw overrides: https://docs.aws.amazon.com/cdk/latest/guide/cfn_layer.html#cfn_layer_raw
# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-codebuild-project.html#aws-resource-codebuild-project-properties
# The EFS identifier needs to match tests/ci/common_fuzz.sh, CodeBuild defines an environment variable named
# codebuild_$identifier.
# https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-codebuild-project-projectfilesystemlocation.html
#
# TODO: add this to the CDK project above when it supports EfsFileSystemLocation
cfn_codebuild = fuzz_codebuild.node.default_child
cfn_codebuild.add_override("Properties.FileSystemLocations", [{
"Identifier": "fuzzing_root",
"Location": "%s.efs.%s.amazonaws.com:/" % (fuzz_filesystem.file_system_id, AWS_REGION),
"MountPoint": "/efs_fuzzing_root",
"Type": "EFS"
}])

PruneStaleGitHubBuilds(scope=self, id="PruneStaleGitHubBuilds", project=fuzz_codebuild)
144 changes: 144 additions & 0 deletions tests/ci/common_fuzz.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0 OR ISC

source tests/ci/common_posix_setup.sh

if [ -v CODEBUILD_FUZZING_ROOT ]; then
CORPUS_ROOT="${CODEBUILD_FUZZING_ROOT}/fuzzing"
else
CORPUS_ROOT="${BUILD_ROOT}/mock_efs/fuzzing"
fi
echo "$CORPUS_ROOT"

if [ -v CODEBUILD_BUILD_ID ]; then
BUILD_ID=$CODEBUILD_BUILD_ID
else
# Generate a random string in bash https://unix.stackexchange.com/questions/230673/how-to-generate-a-random-string
BUILD_ID=$(tr -dc A-Za-z0-9 </dev/urandom | head -c 13 ; echo '')
fi
echo "$BUILD_ID"

DATE_NOW="$(date +%Y-%m-%d)"
SHARED_FAILURE_ROOT="${CORPUS_ROOT}/runs/${DATE_NOW}/${BUILD_ID}"
LOCAL_RUN_ROOT="${BUILD_ROOT}/fuzz_run_root"
rm -rf "$LOCAL_RUN_ROOT"

function put_metric_count {
put_metric --unit Count "$@"
}

function put_metric {
# This call to publish the metric could fail but we don't want to fail the build +e turns off exit on error
set +e
aws cloudwatch put-metric-data \
--namespace AWS-LC-Fuzz \
"$@" || echo "Publishing metric failed, continuing with the rest of the build"
# Turn it back on for the rest of the build
set -e
}

function run_fuzz_test {
SHARED_FUZZ_TEST_CORPUS="${CORPUS_ROOT}/shared_corpus/${FUZZ_NAME}/shared_corpus"
LOCAL_FUZZ_TEST_ROOT="${LOCAL_RUN_ROOT}/${FUZZ_NAME}"
LOCAL_SHARED_CORPUS="${LOCAL_FUZZ_TEST_ROOT}/local_shared_corpus"
LOCAL_RUN_CORPUS="${LOCAL_FUZZ_TEST_ROOT}/run_corpus"
LOCAL_ARTIFACTS_FOLDER="${LOCAL_FUZZ_TEST_ROOT}/artifacts"
LOCAL_FUZZ_RUN_LOGS="${LOCAL_FUZZ_TEST_ROOT}/logs"
SUMMARY_LOG="${LOCAL_FUZZ_RUN_LOGS}/summary.log"
mkdir -p "$SHARED_FUZZ_TEST_CORPUS" "$LOCAL_FUZZ_TEST_ROOT" "$LOCAL_RUN_CORPUS" "$LOCAL_ARTIFACTS_FOLDER" "$LOCAL_FUZZ_RUN_LOGS"

# To avoid having each libfuzzer thread read from the shared corpus copy it to the local CodeBuild directory one time
cp -r "$SHARED_FUZZ_TEST_CORPUS" "$LOCAL_SHARED_CORPUS"

# Calculate starting metrics and post to CloudWatch, this counts the files in LOCAL_SHARED_CORPUS but publishes them
# as the SharedCorpusFileCount, which it basically everything in SHARED_FUZZ_TEST_CORPUS was just copied to
# LOCAL_SHARED_CORPUS
ORIGINAL_CORPUS_FILE_COUNT=$(find "$LOCAL_SHARED_CORPUS" -type f | wc -l)
put_metric_count --metric-name SharedCorpusFileCount --value "$ORIGINAL_CORPUS_FILE_COUNT" --dimensions "FuzzTest=$FUZZ_NAME"

# Perform the actual fuzzing!
# Step 1 run each fuzz test for the determined time. This will use the existing shared corpus copied from EFS to
# LOCAL_SHARED_CORPUS and any files checked into the GitHub SRC_CORPUS. This runs the fuzzer with three
# folders: the first folder is where new inputs will go (LOCAL_RUN_CORPUS), all other folders will be used as input
# for fuzzing (LOCAL_SHARED_CORPUS and SRC_CORPUS).
# https://llvm.org/docs/LibFuzzer.html#options
#
# Run with NUM_CPU_THREADS which will be physical cores on ARM and virtualized cores on x86 with hyper threading.
# Looking at the overall system fuzz rate running 1:1 with virtualized cores provides a noticeable speed up. This
# is slightly different than libfuzzer's recommendation of #cores/2.
# This could fail and we want to capture that so we can publish metrics and save logs (+e)
set +e
FUZZ_RUN_FAILURE=0
# 2048 is the default memory usage. https://llvm.org/docs/LibFuzzer.html
MEM_USAGE_LIMIT=2048
if [[ ("${FUZZ_NAME}" == *"cryptofuzz"*) && (("$(uname -p)" == 'aarch64')) ]]; then
# On arm, libFuzzer: out-of-memory (used: 2063Mb; limit: 2048Mb)
# Below is set based on ARM BUILD_GENERAL1_LARGE(8vCPU, 16 GB memory).
# 2500MB x 6 / 1024 = 14.6GB
MEM_USAGE_LIMIT=2500
NUM_CPU_THREADS=6
fi
time "${FUZZ_TEST_PATH}" -rss_limit_mb=${MEM_USAGE_LIMIT} -print_final_stats=1 -timeout="$FUZZ_TEST_TIMEOUT" -max_total_time="$TIME_FOR_EACH_FUZZ" \
-jobs="$NUM_CPU_THREADS" -workers="$NUM_CPU_THREADS" \
-artifact_prefix="$LOCAL_ARTIFACTS_FOLDER/" \
"$LOCAL_RUN_CORPUS" "$LOCAL_SHARED_CORPUS" "$SRC_CORPUS" 2>&1 | tee "$SUMMARY_LOG"
# This gets the status of the fuzz run which determines if we want to fail the build or not, otherwise we'd get the results of tee
if [ "${PIPESTATUS[0]}" == 1 ]; then
FUZZ_RUN_FAILURE=1
fi

# The libfuzzer logs are written to the current working directory and need to be moved after the test is done
mv ./*.log "${LOCAL_FUZZ_RUN_LOGS}/."

if [ "$FUZZ_RUN_FAILURE" == 1 ]; then
FUZZ_TEST_FAILURE_ROOT="${SHARED_FAILURE_ROOT}/${FUZZ_NAME}"
mkdir -p "$FUZZ_TEST_FAILURE_ROOT"

if [[ "$FUZZ_NAME" == "cryptofuzz" ]]; then
for ARTIFACT in "$LOCAL_ARTIFACTS_FOLDER"/*; do
base64 $ARTIFACT
ARTIFACT_NAME=$(basename "$ARTIFACT")
"${FUZZ_TEST_PATH}" --debug "$ARTIFACT" | tee "${LOCAL_FUZZ_RUN_LOGS}/${ARTIFACT_NAME}.log"
done
fi

cp -r "$LOCAL_FUZZ_TEST_ROOT" "$SHARED_FAILURE_ROOT"
cp "$FUZZ_TEST_PATH" "${FUZZ_TEST_FAILURE_ROOT}/${FUZZ_NAME}"

# If this fuzz run has failed the below metrics won't make a lot of sense, it could fail on the first input and
# publish a TestCount of 1 which makes all the metrics look weird
echo "${FUZZ_NAME} failed, see the above output for details. For all the logs see ${SHARED_FAILURE_ROOT} in EFS"
exit 1
else
echo "Fuzz test ${FUZZ_NAME} finished successfully, not copying run logs and run corpus"
fi

set -e

# Step 2 merge any new files from the run corpus and GitHub src corpus into the shared corpus, the first folder is
# where to merge the new corpus (SHARED_FUZZ_TEST_CORPUS), the second two are where to read new inputs from
# (LOCAL_RUN_CORPUS and SRC_CORPUS).
time "${FUZZ_TEST_PATH}" -merge=1 "$SHARED_FUZZ_TEST_CORPUS" "$LOCAL_RUN_CORPUS" "$SRC_CORPUS"

# Calculate interesting metrics and post results to CloudWatch, this checks the shared (EFS) corpus after the new test
# run corpus has been merged in
FINAL_SHARED_CORPUS_FILE_COUNT=$(find "$SHARED_FUZZ_TEST_CORPUS" -type f | wc -l)
put_metric_count --metric-name SharedCorpusFileCount --value "$FINAL_SHARED_CORPUS_FILE_COUNT" --dimensions "FuzzTest=$FUZZ_NAME"

RUN_CORPUS_FILE_COUNT=$(find "$LOCAL_RUN_CORPUS" -type f | wc -l)
put_metric_count --metric-name RunCorpusFileCount --value "$RUN_CORPUS_FILE_COUNT" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"

TEST_COUNT=$(grep -o "stat::number_of_executed_units: [0-9]*" "$SUMMARY_LOG" | awk '{test_count += $2} END {print test_count}')
put_metric_count --metric-name TestCount --value "$TEST_COUNT" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"

TESTS_PER_SECOND=$((TEST_COUNT/TIME_FOR_EACH_FUZZ))
put_metric --metric-name TestRate --value "$TESTS_PER_SECOND" --unit Count/Second --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"

FEATURE_COVERAGE=$(grep -o "ft: [0-9]*" "$SUMMARY_LOG" | awk '{print $2}' | sort -n | tail -1)
put_metric_count --metric-name FeatureCoverage --value "$FEATURE_COVERAGE" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"

BLOCK_COVERAGE=$(grep -o "cov: [0-9]*" "$SUMMARY_LOG" | awk '{print $2}' | sort -n | tail -1)
put_metric_count --metric-name BlockCoverage --value "$BLOCK_COVERAGE" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"

echo "${FUZZ_NAME} starting shared ${ORIGINAL_CORPUS_FILE_COUNT} final shared ${FINAL_SHARED_CORPUS_FILE_COUNT} new files ${RUN_CORPUS_FILE_COUNT} total test count ${TEST_COUNT} test rate ${TESTS_PER_SECOND} code coverage ${BLOCK_COVERAGE} feature coverage ${FEATURE_COVERAGE}"
}
Loading

0 comments on commit 430b607

Please sign in to comment.