diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ca31f0..2a4e4dd 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.4.0] - 2021-09-28 + +### Added + +- A new pipeline to deploy [AWS SageMaker Model Quality Monitor](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality.html). The new pipeline monitors the performance of a deployed model by comparing the + predictions that the model makes with the actual ground truth labels that the model attempts to predict. + +### Updated + +- The Model Monitor pipeline's API call. Now, the Model Monitor pipeline is split into two pipelines, Data Quality Monitor pipeline, and Model Quality Monitor pipeline. +- The format of CloudFormation templates parameters' names from `PARAMETERNAME` to `ParameterName`. +- The APIs of the Realtime Inference pipeline to support passing an optional custom endpoint name. +- The data quality baseline's Lambda to use AWS SageMaker SDK to create the baseline, instead of using Boto3. +- AWS Cloud Development Kit (AWS CDK) and AWS Solutions Constructs to version 1.117.0. + ## [1.3.0] - 2021-06-24 ### Added - The option to use [Amazon SageMaker Model Registry](https://docs.aws.amazon.com/sagemaker/latest/dg/model-registry.html) to deploy versioned models. The model registry allows you to catalog models for production, manage model versions, associate metadata with models, manage the approval status of a model, deploy models to production, and automate model deployment with CI/CD. -- The option to use an [AWS Organizations delegated administrator account](https://docs.amazonaws.cn/en_us/AWSCloudFormation/latest/UserGuide/stacksets-orgs-delegated-admin.html) to orchestrate the deployment of Machine Learning (ML) workloads across the AWS Organizations accounts using AWS CloudFormation StackSets. +- The option to use an [AWS Organizations delegated administrator account](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/stacksets-orgs-delegated-admin.html) to orchestrate the deployment of Machine Learning (ML) workloads across the AWS Organizations accounts using AWS CloudFormation StackSets. ### Updated diff --git a/README.md b/README.md index f3a7758..c0b006e 100755 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ pipeline for building and registering Docker images for custom algorithms that c deployment on an [Amazon SageMaker](https://aws.amazon.com/sagemaker/) endpoint. You can use batch and real-time data inferences to configure the pipeline for your business context. -You can also provision multiple Model Monitor pipelines to periodically monitor the quality of deployed +You can also provision multiple data quality and model quality Monitor pipelines to periodically monitor the quality of deployed Amazon SageMaker ML models. This solution increases your team’s agility and efficiency by allowing them to repeat successful processes at scale. @@ -119,7 +119,7 @@ chmod +x ./build-s3-dist.sh ./build-s3-dist.sh $DIST_OUTPUT_BUCKET $SOLUTION_NAME $VERSION ``` -- Upload the distributable assets to your Amazon S3 bucket in your account. Note: Ensure that you own the Amazon S3 bucket before uploading the assets. To upload the assets to the S3 bucket, you can use the AWS Console or the AWS CLI as shown below. +- Upload the distributable assets to your Amazon S3 bucket in your account. Note: ensure that you own the Amazon S3 bucket before uploading the assets. To upload the assets to the S3 bucket, you can use the AWS Console or the AWS CLI as shown below. ``` aws s3 cp ./global-s3-assets/ s3://my-bucket-name-/aws-mlops-framework// --recursive --acl bucket-owner-full-control --profile aws-cred-profile-name diff --git a/deployment/build-s3-dist.sh b/deployment/build-s3-dist.sh index 0ff64b2..756dfaf 100755 --- a/deployment/build-s3-dist.sh +++ b/deployment/build-s3-dist.sh @@ -28,7 +28,7 @@ set -e # Important: CDK global version number -cdk_version=1.96.0 +cdk_version=1.117.0 # Check to see if the required parameters have been provided: if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then @@ -111,23 +111,27 @@ echo "npm install -g aws-cdk@$cdk_version" npm install -g aws-cdk@$cdk_version #Run 'cdk synth for BYOM blueprints -echo "cdk synth ModelMonitorStack > lib/blueprints/byom/byom_model_monitor.yaml" -cdk synth ModelMonitorStack > lib/blueprints/byom/byom_model_monitor.yaml -echo "cdk synth SingleAccountCodePipelineStack > lib/blueprints/byom/single_account_codepipeline.yaml" -cdk synth SingleAccountCodePipelineStack > lib/blueprints/byom/single_account_codepipeline.yaml -echo "cdk synth MultiAccountCodePipelineStack > lib/blueprints/byom/multi_account_codepipeline.yaml" -cdk synth MultiAccountCodePipelineStack > lib/blueprints/byom/multi_account_codepipeline.yaml -echo "cdk synth BYOMRealtimePipelineStack > lib/blueprints/byom/byom_realtime_inference_pipeline.yaml" -cdk synth BYOMRealtimePipelineStack > lib/blueprints/byom/byom_realtime_inference_pipeline.yaml -echo "cdk synth BYOMCustomAlgorithmImageBuilderStack > lib/blueprints/byom/byom_custom_algorithm_image_builder.yaml" -cdk synth BYOMCustomAlgorithmImageBuilderStack > lib/blueprints/byom/byom_custom_algorithm_image_builder.yaml -echo "cdk synth BYOMBatchStack > lib/blueprints/byom/byom_batch_pipeline.yaml" -cdk synth BYOMBatchStack > lib/blueprints/byom/byom_batch_pipeline.yaml +echo "cdk synth DataQualityModelMonitorStack > lib/blueprints/byom/byom_data_quality_monitor.yaml --path-metadata false --version-reporting false" +cdk synth DataQualityModelMonitorStack > lib/blueprints/byom/byom_data_quality_monitor.yaml --path-metadata false --version-reporting false +echo "cdk synth ModelQualityModelMonitorStack > lib/blueprints/byom/byom_model_quality_monitor.yaml --path-metadata false --version-reporting false" +cdk synth ModelQualityModelMonitorStack > lib/blueprints/byom/byom_model_quality_monitor.yaml --path-metadata false --version-reporting false +echo "cdk synth SingleAccountCodePipelineStack > lib/blueprints/byom/single_account_codepipeline.yaml --path-metadata false --version-reporting false" +cdk synth SingleAccountCodePipelineStack > lib/blueprints/byom/single_account_codepipeline.yaml --path-metadata false --version-reporting false +echo "cdk synth MultiAccountCodePipelineStack > lib/blueprints/byom/multi_account_codepipeline.yaml --path-metadata false --version-reporting false" +cdk synth MultiAccountCodePipelineStack > lib/blueprints/byom/multi_account_codepipeline.yaml --path-metadata false --version-reporting false +echo "cdk synth BYOMRealtimePipelineStack > lib/blueprints/byom/byom_realtime_inference_pipeline.yaml --path-metadata false --version-reporting false" +cdk synth BYOMRealtimePipelineStack > lib/blueprints/byom/byom_realtime_inference_pipeline.yaml --path-metadata false --version-reporting false +echo "cdk synth BYOMCustomAlgorithmImageBuilderStack > lib/blueprints/byom/byom_custom_algorithm_image_builder.yaml --path-metadata false --version-reporting false" +cdk synth BYOMCustomAlgorithmImageBuilderStack > lib/blueprints/byom/byom_custom_algorithm_image_builder.yaml --path-metadata false --version-reporting false +echo "cdk synth BYOMBatchStack > lib/blueprints/byom/byom_batch_pipeline.yaml --path-metadata false --version-reporting false" +cdk synth BYOMBatchStack > lib/blueprints/byom/byom_batch_pipeline.yaml --path-metadata false --version-reporting false # Replace %%VERSION%% in other templates replace="s/%%VERSION%%/$3/g" -echo "sed -i -e $replace lib/blueprints/byom/byom_model_monitor.yaml" -sed -i -e $replace lib/blueprints/byom/byom_model_monitor.yaml +echo "sed -i -e $replace lib/blueprints/byom/byom_data_quality_monitor.yaml" +sed -i -e $replace lib/blueprints/byom/byom_data_quality_monitor.yaml +echo "sed -i -e $replace lib/blueprints/byom/byom_model_quality_monitor.yaml" +sed -i -e $replace lib/blueprints/byom/byom_model_quality_monitor.yaml echo "sed -i -e $replace lib/blueprints/byom/byom_realtime_inference_pipeline.yaml" sed -i -e $replace lib/blueprints/byom/byom_realtime_inference_pipeline.yaml echo "sed -i -e $replace lib/blueprints/byom/single_account_codepipeline.yaml" @@ -140,10 +144,10 @@ echo "sed -i -e $replace lib/blueprints/byom/byom_batch_pipeline.yaml" sed -i -e $replace lib/blueprints/byom/byom_batch_pipeline.yaml # Run 'cdk synth' for main templates to generate raw solution outputs -echo "cdk synth aws-mlops-single-account-framework --output=$staging_dist_dir" -cdk synth aws-mlops-single-account-framework --output=$staging_dist_dir -echo "cdk synth aws-mlops-multi-account-framework --output=$staging_dist_dir" -cdk synth aws-mlops-multi-account-framework --output=$staging_dist_dir +echo "cdk synth aws-mlops-single-account-framework --path-metadata false --version-reporting false --output=$staging_dist_dir" +cdk synth aws-mlops-single-account-framework --path-metadata false --version-reporting false --output=$staging_dist_dir +echo "cdk synth aws-mlops-multi-account-framework --path-metadata false --version-reporting false --output=$staging_dist_dir" +cdk synth aws-mlops-multi-account-framework --path-metadata false --version-reporting false --output=$staging_dist_dir # Remove unnecessary output files echo "cd $staging_dist_dir" diff --git a/source/app.py b/source/app.py index bf3ef76..a229fc2 100644 --- a/source/app.py +++ b/source/app.py @@ -56,26 +56,38 @@ batch_stack = BYOMBatchStack( app, "BYOMBatchStack", - description=( - f"({solution_id}byom-bt) - BYOM Batch Transform pipeline" f"in AWS MLOps Framework. Version {version}" - ), + description=(f"({solution_id}byom-bt) - BYOM Batch Transform pipeline in AWS MLOps Framework. Version {version}"), ) core.Aspects.of(batch_stack).add(AwsSDKConfigAspect(app, "SDKUserAgentBatch", solution_id, version)) -model_monitor_stack = ModelMonitorStack( +data_quality_monitor_stack = ModelMonitorStack( + app, + "DataQualityModelMonitorStack", + monitoring_type="DataQuality", + description=(f"({solution_id}byom-dqmm) - DataQuality Model Monitor pipeline. Version {version}"), +) + +core.Aspects.of(data_quality_monitor_stack).add( + AwsSDKConfigAspect(app, "SDKUserAgentDataMonitor", solution_id, version) +) + +model_quality_monitor_stack = ModelMonitorStack( app, - "ModelMonitorStack", - description=(f"({solution_id}byom-mm) - Model Monitor pipeline. Version {version}"), + "ModelQualityModelMonitorStack", + monitoring_type="ModelQuality", + description=(f"({solution_id}byom-mqmm) - ModelQuality Model Monitor pipeline. Version {version}"), ) -core.Aspects.of(model_monitor_stack).add(AwsSDKConfigAspect(app, "SDKUserAgentMonitor", solution_id, version)) +core.Aspects.of(model_quality_monitor_stack).add( + AwsSDKConfigAspect(app, "SDKUserAgentModelMonitor", solution_id, version) +) realtime_stack = BYOMRealtimePipelineStack( app, "BYOMRealtimePipelineStack", - description=(f"({solution_id}byom-rip) - BYOM Realtime Inference Pipleline. Version {version}"), + description=(f"({solution_id}byom-rip) - BYOM Realtime Inference Pipeline. Version {version}"), ) core.Aspects.of(realtime_stack).add(AwsSDKConfigAspect(app, "SDKUserAgentRealtime", solution_id, version)) diff --git a/source/architecture-option-2.png b/source/architecture-option-2.png index 967396f..f4013f9 100644 Binary files a/source/architecture-option-2.png and b/source/architecture-option-2.png differ diff --git a/source/lambdas/pipeline_orchestration/index.py b/source/lambdas/pipeline_orchestration/index.py index c757871..cdd4d62 100644 --- a/source/lambdas/pipeline_orchestration/index.py +++ b/source/lambdas/pipeline_orchestration/index.py @@ -14,6 +14,8 @@ from json import JSONEncoder import os import datetime +from botocore.client import BaseClient +from typing import Dict, Any, List, Union from shared.wrappers import BadRequest, api_exception_handler from shared.logger import get_logger from shared.helper import get_client @@ -45,7 +47,7 @@ def default(self, obj): @api_exception_handler -def handler(event, context): +def handler(event: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]: if "httpMethod" in event and event["httpMethod"] == "POST": # Lambda is being invoked from API Gateway if event["path"] == "/provisionpipeline": return provision_pipeline(json.loads(event["body"])) @@ -57,12 +59,16 @@ def handler(event, context): return provision_pipeline(event) else: raise BadRequest( - "Bad request format. Expected httpMethod or pipeline_type, recevied none. Check documentation " + "Bad request format. Expected httpMethod or pipeline_type, received none. Check documentation " + "for API & config formats." ) -def provision_pipeline(event, client=cloudformation_client, s3_client=s3_client): +def provision_pipeline( + event: Dict[str, Any], + client: BaseClient = cloudformation_client, + s3_client: BaseClient = s3_client, +) -> Dict[str, Any]: """ provision_pipeline takes the lambda event object and creates a cloudformation stack @@ -109,7 +115,7 @@ def provision_pipeline(event, client=cloudformation_client, s3_client=s3_client) codepipeline_params = get_codepipeline_params( is_multi_account, provisioned_pipeline_stack_name, template_zip_name, template_file_name ) - # format the params (the format is the same for multi-accouunt parameters) + # format the params (the format is the same for multi-account parameters) formatted_codepipeline_params = format_template_parameters(codepipeline_params, "True") # create the codepipeline stack_response = create_codepipeline_stack( @@ -143,7 +149,12 @@ def provision_pipeline(event, client=cloudformation_client, s3_client=s3_client) return response -def update_stack(codepipeline_stack_name, pipeline_template_url, template_parameters, client): +def update_stack( + codepipeline_stack_name: str, + pipeline_template_url: str, + template_parameters: List[Dict[str, str]], + client: BaseClient, +) -> Dict[str, str]: try: update_response = client.update_stack( StackName=codepipeline_stack_name, @@ -171,8 +182,11 @@ def update_stack(codepipeline_stack_name, pipeline_template_url, template_parame def create_codepipeline_stack( - codepipeline_stack_name, pipeline_template_url, template_parameters, client=cloudformation_client -): + codepipeline_stack_name: str, + pipeline_template_url: str, + template_parameters: List[Dict[str, str]], + client: BaseClient = cloudformation_client, +) -> Dict[str, str]: try: stack_response = client.create_stack( StackName=codepipeline_stack_name, @@ -204,7 +218,9 @@ def create_codepipeline_stack( raise e -def pipeline_status(event, cfn_client=cloudformation_client, cp_client=codepipeline_client): +def pipeline_status( + event: Dict[str, Any], cfn_client: BaseClient = cloudformation_client, cp_client: BaseClient = codepipeline_client +) -> Dict[str, Any]: """ pipeline_status takes the lambda event object and returns the status of codepipeline project that's running the pipeline diff --git a/source/lambdas/pipeline_orchestration/lambda_helpers.py b/source/lambdas/pipeline_orchestration/lambda_helpers.py index 5aaba8c..b57c516 100644 --- a/source/lambdas/pipeline_orchestration/lambda_helpers.py +++ b/source/lambdas/pipeline_orchestration/lambda_helpers.py @@ -16,28 +16,30 @@ import shutil import tempfile import uuid +from typing import Dict, List, Tuple, Union, Any +from botocore.client import BaseClient from shared.wrappers import BadRequest -from shared.helper import get_built_in_model_monitor_container_uri from shared.logger import get_logger logger = get_logger(__name__) -def template_url(pipeline_type): +def template_url(pipeline_type: str) -> str: """ template_url is a helper function that determines the cloudformation stack's file name based on inputs :pipeline_type: type of pipeline. Supported values: "byom_realtime_builtin"|"byom_realtime_custom"|"byom_batch_builtin"|"byom_batch_custom"| - "byom_model_monitor"|"byom_image_builder"|"single_account_codepipeline"| + "byom_data_quality_monitor"|"byom_model_quality_monitor"|"byom_image_builder"|"single_account_codepipeline"| "multi_account_codepipeline" - :return: returns a link to the appropriate coudformation template files which can be one of these values: + :return: returns a link to the appropriate cloudformation template files which can be one of these values: byom_realtime_inference_pipeline.yaml byom_batch_pipeline.yaml - byom_model_monitor.yaml + byom_data_quality_monitor.yaml + byom_model_quality_monitor.yaml byom_custom_algorithm_image_builder.yaml single_account_codepipeline.yaml multi_account_codepipeline.yaml @@ -51,7 +53,8 @@ def template_url(pipeline_type): "byom_realtime_custom": realtime_inference_template, "byom_batch_builtin": batch_inference_template, "byom_batch_custom": batch_inference_template, - "byom_model_monitor": "blueprints/byom/byom_model_monitor.yaml", + "byom_data_quality_monitor": "blueprints/byom/byom_data_quality_monitor.yaml", + "byom_model_quality_monitor": "blueprints/byom/byom_model_quality_monitor.yaml", "byom_image_builder": f"{url}/byom_custom_algorithm_image_builder.yaml", "single_account_codepipeline": f"{url}/single_account_codepipeline.yaml", "multi_account_codepipeline": f"{url}/multi_account_codepipeline.yaml", @@ -64,7 +67,7 @@ def template_url(pipeline_type): raise BadRequest(f"Bad request. Pipeline type: {pipeline_type} is not supported.") -def get_stage_param(event, api_key, stage): +def get_stage_param(event: Dict[str, Any], api_key: str, stage: str) -> str: api_key_value = event.get(api_key, "") if isinstance(api_key_value, dict) and stage in list(api_key_value.keys()): api_key_value = api_key_value[stage] @@ -72,7 +75,7 @@ def get_stage_param(event, api_key, stage): return api_key_value -def get_stack_name(event): +def get_stack_name(event: Dict[str, Any]) -> str: pipeline_type = event.get("pipeline_type") pipeline_stack_name = os.environ["PIPELINE_STACK_NAME"] model_name = event.get("model_name", "").lower().strip() @@ -92,8 +95,11 @@ def get_stack_name(event): # name of stack provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{model_name}-{postfix[pipeline_type]}" - elif pipeline_type == "byom_model_monitor": - provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{model_name}-BYOMModelMonitor" + elif pipeline_type == "byom_data_quality_monitor": + provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{model_name}-BYOMDataQualityMonitor" + + elif pipeline_type == "byom_model_quality_monitor": + provisioned_pipeline_stack_name = f"{pipeline_stack_name}-{model_name}-BYOMModelQualityMonitor" elif pipeline_type == "byom_image_builder": image_tag = event.get("image_tag") @@ -102,15 +108,15 @@ def get_stack_name(event): return provisioned_pipeline_stack_name.lower() -def get_template_parameters(event, is_multi_account, stage=None): +def get_template_parameters(event: Dict[str, Any], is_multi_account: bool, stage: str = None) -> List[Tuple[str, str]]: pipeline_type = event.get("pipeline_type") region = os.environ["REGION"] kms_key_arn = get_stage_param(event, "kms_key_arn", stage) common_params = [ - ("ASSETSBUCKET", os.environ["ASSETS_BUCKET"]), - ("KMSKEYARN", kms_key_arn), - ("BLUEPRINTBUCKET", os.environ["BLUEPRINT_BUCKET"]), + ("AssetsBucket", os.environ["ASSETS_BUCKET"]), + ("KmsKeyArn", kms_key_arn), + ("BlueprintBucket", os.environ["BLUEPRINT_BUCKET"]), ] if pipeline_type in [ "byom_realtime_builtin", @@ -121,18 +127,20 @@ def get_template_parameters(event, is_multi_account, stage=None): common_params.extend(get_common_realtime_batch_params(event, region, stage)) - # add realtime specfic parameters + # add realtime specific parameters if pipeline_type in ["byom_realtime_builtin", "byom_realtime_custom"]: common_params.extend(get_realtime_specific_params(event, stage)) # else add batch params else: - common_params.extend(get_bacth_specific_params(event, stage)) + common_params.extend(get_batch_specific_params(event, stage)) return common_params - elif pipeline_type == "byom_model_monitor": - common_params.extend(get_model_monitor_params(event, region, stage)) - return common_params + elif pipeline_type == "byom_data_quality_monitor": + return [*common_params, *get_model_monitor_params(event, region, stage)] + + elif pipeline_type == "byom_model_quality_monitor": + return [*common_params, *get_model_monitor_params(event, region, stage, monitoring_type="ModelQuality")] elif pipeline_type == "byom_image_builder": return get_image_builder_params(event) @@ -141,40 +149,42 @@ def get_template_parameters(event, is_multi_account, stage=None): raise BadRequest("Bad request format. Please provide a supported pipeline") -def get_codepipeline_params(is_multi_account, stack_name, template_zip_name, template_file_name): +def get_codepipeline_params( + is_multi_account: str, stack_name: str, template_zip_name: str, template_file_name: str +) -> List[Tuple[str, str]]: single_account_params = [ - ("NOTIFICATIONEMAIL", os.environ["NOTIFICATION_EMAIL"]), - ("TEMPLATEZIPNAME", template_zip_name), - ("TEMPLATEFILENAME", template_file_name), - ("ASSETSBUCKET", os.environ["ASSETS_BUCKET"]), - ("STACKNAME", stack_name), + ("NotificationEmail", os.environ["NOTIFICATION_EMAIL"]), + ("TemplateZipFileName", template_zip_name), + ("TemplateFileName", template_file_name), + ("AssetsBucket", os.environ["ASSETS_BUCKET"]), + ("StackName", stack_name), ] if is_multi_account == "False": - single_account_params.extend([("TEMPLATEPARAMSNAME", "template_params.json")]) + single_account_params.extend([("TemplateParamsName", "template_params.json")]) return single_account_params else: single_account_params.extend( [ - ("DEVPARAMSNAME", "dev_template_params.json"), - ("STAGINGPARAMSNAME", "staging_template_params.json"), - ("PRODPARAMSNAME", "prod_template_params.json"), - ("DEVACCOUNTID", os.environ["DEV_ACCOUNT_ID"]), - ("DEVORGID", os.environ["DEV_ORG_ID"]), - ("STAGINGACCOUNTID", os.environ["STAGING_ACCOUNT_ID"]), - ("STAGINGORGID", os.environ["STAGING_ORG_ID"]), - ("PRODACCOUNTID", os.environ["PROD_ACCOUNT_ID"]), - ("PRODORGID", os.environ["PROD_ORG_ID"]), - ("BLUEPRINTBUCKET", os.environ["BLUEPRINT_BUCKET"]), - ("DELEGATEDADMINACCOUNT", os.environ["IS_DELEGATED_ADMIN"]), + ("DevParamsName", "dev_template_params.json"), + ("StagingParamsName", "staging_template_params.json"), + ("ProdParamsName", "prod_template_params.json"), + ("DevAccountId", os.environ["DEV_ACCOUNT_ID"]), + ("DevOrgId", os.environ["DEV_ORG_ID"]), + ("StagingAccountId", os.environ["STAGING_ACCOUNT_ID"]), + ("StagingOrgId", os.environ["STAGING_ORG_ID"]), + ("ProdAccountId", os.environ["PROD_ACCOUNT_ID"]), + ("ProdOrgId", os.environ["PROD_ORG_ID"]), + ("BlueprintBucket", os.environ["BLUEPRINT_BUCKET"]), + ("DelegatedAdminAccount", os.environ["IS_DELEGATED_ADMIN"]), ] ) return single_account_params -def get_common_realtime_batch_params(event, region, stage): +def get_common_realtime_batch_params(event: Dict[str, Any], region: str, stage: str) -> List[Tuple[str, str]]: inference_instance = get_stage_param(event, "inference_instance", stage) image_uri = ( get_image_uri(event.get("pipeline_type"), event, region) if os.environ["USE_MODEL_REGISTRY"] == "No" else "" @@ -187,84 +197,119 @@ def get_common_realtime_batch_params(event, region, stage): else "" ) return [ - ("MODELNAME", event.get("model_name")), - ("MODELARTIFACTLOCATION", event.get("model_artifact_location", "")), - ("INFERENCEINSTANCE", inference_instance), - ("CUSTOMALGORITHMSECRREPOARN", os.environ["ECR_REPO_ARN"]), - ("IMAGEURI", image_uri), - ("MODELPACKAGEGROUPNAME", model_package_group_name), - ("MODELPACKAGENAME", event.get("model_package_name", "")), + ("ModelName", event.get("model_name")), + ("ModelArtifactLocation", event.get("model_artifact_location", "")), + ("InferenceInstance", inference_instance), + ("CustomAlgorithmsECRRepoArn", os.environ["ECR_REPO_ARN"]), + ("ImageUri", image_uri), + ("ModelPackageGroupName", model_package_group_name), + ("ModelPackageName", event.get("model_package_name", "")), ] -def clean_param(param): +def clean_param(param: str) -> str: + # if the paramter's value ends with '/', remove it if param.endswith("/"): return param[:-1] else: return param -def get_realtime_specific_params(event, stage): +def get_realtime_specific_params(event: Dict[str, Any], stage: str) -> List[Tuple[str, str]]: data_capture_location = clean_param(get_stage_param(event, "data_capture_location", stage)) - return [("DATACAPTURELOCATION", data_capture_location)] + endpoint_name = get_stage_param(event, "endpoint_name", stage).lower().strip() + return [("DataCaptureLocation", data_capture_location), ("EndpointName", endpoint_name)] -def get_bacth_specific_params(event, stage): +def get_batch_specific_params(event: Dict[str, Any], stage: str) -> List[Tuple[str, str]]: batch_inference_data = get_stage_param(event, "batch_inference_data", stage) batch_job_output_location = clean_param(get_stage_param(event, "batch_job_output_location", stage)) return [ - ("BATCHINPUTBUCKET", batch_inference_data.split("/")[0]), - ("BATCHINFERENCEDATA", batch_inference_data), - ("BATCHOUTPUTLOCATION", batch_job_output_location), + ("BatchInputBucket", batch_inference_data.split("/")[0]), + ("BatchInferenceData", batch_inference_data), + ("BatchOutputLocation", batch_job_output_location), ] -def get_model_monitor_params(event, region, stage): +def get_built_in_model_monitor_image_uri(region, image_name="model-monitor"): + model_monitor_image_uri = sagemaker.image_uris.retrieve( + framework=image_name, + region=region, + ) + + return model_monitor_image_uri + + +def get_model_monitor_params( + event: Dict[str, Any], region: str, stage: str, monitoring_type: str = "DataQuality" +) -> List[Tuple[str, str]]: endpoint_name = get_stage_param(event, "endpoint_name", stage).lower().strip() - monitoring_type = event.get("monitoring_type", "dataquality") # generate jobs names - baseline_job_name = f"{endpoint_name}-baseline-job-{str(uuid.uuid4())[:4]}" - monitoring_schedule_name = f"{endpoint_name}-monitor-{str(uuid.uuid4())[:4]}" + # make sure baseline_job_name and monitoring_schedule_name are <= 63 characters long, especially + # if endpoint_name was dynamically generated by AWS CDK. + baseline_job_name = f"{endpoint_name}-{monitoring_type.lower()}-baseline-{str(uuid.uuid4())[:4]}" + monitoring_schedule_name = f"{endpoint_name}-{monitoring_type.lower()}-monitor-{str(uuid.uuid4())[:4]}" baseline_job_output_location = clean_param(get_stage_param(event, "baseline_job_output_location", stage)) - data_capture_location = clean_param(get_stage_param(event, "baseline_job_output_location", stage)) + data_capture_location = clean_param(get_stage_param(event, "data_capture_location", stage)) instance_type = get_stage_param(event, "instance_type", stage) instance_volume_size = get_stage_param(event, "instance_volume_size", stage) - max_runtime_seconds = get_stage_param(event, "max_runtime_seconds", stage) + baseline_max_runtime_seconds = get_stage_param(event, "baseline_max_runtime_seconds", stage) + monitor_max_runtime_seconds = get_stage_param(event, "monitor_max_runtime_seconds", stage) monitoring_output_location = clean_param(get_stage_param(event, "monitoring_output_location", stage)) schedule_expression = get_stage_param(event, "schedule_expression", stage) - - return [ - ("BASELINEJOBNAME", baseline_job_name), - ("BASELINEOUTPUTBUCKET", baseline_job_output_location.split("/")[0]), - ("BASELINEJOBOUTPUTLOCATION", baseline_job_output_location), - ("DATACAPTUREBUCKET", data_capture_location.split("/")[0]), - ("DATACAPTURELOCATION", data_capture_location), - ("ENDPOINTNAME", endpoint_name), - ("IMAGEURI", get_built_in_model_monitor_container_uri(region)), - ("INSTANCETYPE", instance_type), - ("INSTANCEVOLUMESIZE", instance_volume_size), - ("MAXRUNTIMESECONDS", max_runtime_seconds), - ("MONITORINGOUTPUTLOCATION", monitoring_output_location), - ("MONITORINGSCHEDULENAME", monitoring_schedule_name), - ("MONITORINGTYPE", monitoring_type), - ("SCHEDULEEXPRESSION", schedule_expression), - ("TRAININGDATA", event.get("training_data")), + monitor_ground_truth_input = get_stage_param(event, "monitor_ground_truth_input", stage) + + monitor_params = [ + ("BaselineJobName", baseline_job_name), + ("BaselineOutputBucket", baseline_job_output_location.split("/")[0]), + ("BaselineJobOutputLocation", f"{baseline_job_output_location}/{baseline_job_name}"), + ("DataCaptureBucket", data_capture_location.split("/")[0]), + ("DataCaptureLocation", data_capture_location), + ("EndpointName", endpoint_name), + ("ImageUri", get_built_in_model_monitor_image_uri(region)), + ("InstanceType", instance_type), + ("InstanceVolumeSize", instance_volume_size), + ("BaselineMaxRuntimeSeconds", baseline_max_runtime_seconds), + ("MonitorMaxRuntimeSeconds", monitor_max_runtime_seconds), + ("MonitoringOutputLocation", monitoring_output_location), + ("MonitoringScheduleName", monitoring_schedule_name), + ("ScheduleExpression", schedule_expression), + ("BaselineData", event.get("baseline_data")), ] + # add ModelQuality parameters + if monitoring_type == "ModelQuality": + monitor_params.extend( + [ + ("BaselineInferenceAttribute", event.get("baseline_inference_attribute", "").strip()), + ("BaselineProbabilityAttribute", event.get("baseline_probability_attribute", "").strip()), + ("BaselineGroundTruthAttribute", event.get("baseline_ground_truth_attribute", "").strip()), + ("ProblemType", event.get("problem_type", "").strip()), + ("MonitorInferenceAttribute", event.get("monitor_inference_attribute", "").strip()), + ("MonitorProbabilityAttribute", event.get("monitor_probability_attribute", "").strip()), + ("ProbabilityThresholdAttribute", event.get("probability_threshold_attribute", "").strip()), + ("MonitorGroundTruthInput", monitor_ground_truth_input), + ] + ) + + return monitor_params -def get_image_builder_params(event): + +def get_image_builder_params(event: Dict[str, Any]) -> List[Tuple[str, str]]: return [ - ("NOTIFICATIONEMAIL", os.environ["NOTIFICATION_EMAIL"]), - ("ASSETSBUCKET", os.environ["ASSETS_BUCKET"]), - ("CUSTOMCONTAINER", event.get("custom_algorithm_docker")), - ("ECRREPONAME", event.get("ecr_repo_name")), - ("IMAGETAG", event.get("image_tag")), + ("NotificationEmail", os.environ["NOTIFICATION_EMAIL"]), + ("AssetsBucket", os.environ["ASSETS_BUCKET"]), + ("CustomImage", event.get("custom_algorithm_docker")), + ("ECRRepoName", event.get("ecr_repo_name")), + ("ImageTag", event.get("image_tag")), ] -def format_template_parameters(key_value_list, is_multi_account): +def format_template_parameters( + key_value_list: List[str], is_multi_account: str +) -> Union[List[Dict[str, str]], Dict[str, Dict[str, str]]]: if is_multi_account == "True": # for the multi-account option, the StackSet action, used by multi-account codepipeline, # requires this parameters format @@ -275,36 +320,42 @@ def format_template_parameters(key_value_list, is_multi_account): return {"Parameters": {param[0]: param[1] for param in key_value_list}} -def write_params_to_json(params, file_path): +def write_params_to_json(params: Union[List[Dict[str, str]], Dict[str, Dict[str, str]]], file_path: str) -> None: with open(file_path, "w") as fp: json.dump(params, fp, indent=4) -def upload_file_to_s3(local_file_path, s3_bucket_name, s3_file_key, s3_client): +def upload_file_to_s3(local_file_path: str, s3_bucket_name: str, s3_file_key: str, s3_client: BaseClient) -> None: s3_client.upload_file(local_file_path, s3_bucket_name, s3_file_key) -def download_file_from_s3(s3_bucket_name, file_key, local_file_path, s3_client): +def download_file_from_s3(s3_bucket_name: str, file_key: str, local_file_path: str, s3_client: BaseClient) -> None: s3_client.download_file(s3_bucket_name, file_key, local_file_path) def create_template_zip_file( - event, blueprint_bucket, assets_bucket, template_url, template_zip_name, is_multi_account, s3_client -): + event: Dict[str, Any], + blueprint_bucket: str, + assets_bucket: str, + template_url: str, + template_zip_name: str, + is_multi_account: str, + s3_client: BaseClient, +) -> None: zip_output_filename = "template" - # create a tmpdir for the zip file to downlaod + # create a tmpdir for the zip file to download local_directory = tempfile.mkdtemp() local_file_path = os.path.join(local_directory, template_url.split("/")[-1]) - # downloawd the template from the blueprints bucket + # download the template from the blueprints bucket download_file_from_s3(blueprint_bucket, template_url, local_file_path, s3_client) # create tmpdir to zip clodformation and stages parameters zip_local_directory = tempfile.mkdtemp() zip_file_path = os.path.join(zip_local_directory, zip_output_filename) - # downloawd the template from the blueprints bucket + # download the template from the blueprints bucket download_file_from_s3(blueprint_bucket, template_url, f"{local_directory}/{template_url.split('/')[-1]}", s3_client) # write the params to json file(s) @@ -326,7 +377,7 @@ def create_template_zip_file( local_directory, ) - # uploda file + # upload file upload_file_to_s3( f"{zip_file_path}.zip", assets_bucket, @@ -335,7 +386,7 @@ def create_template_zip_file( ) -def get_image_uri(pipeline_type, event, region): +def get_image_uri(pipeline_type: str, event: Dict[str, Any], region: str) -> str: if pipeline_type in ["byom_realtime_custom", "byom_batch_custom"]: return event.get("custom_image_uri") elif pipeline_type in ["byom_realtime_builtin", "byom_batch_builtin"]: @@ -343,10 +394,24 @@ def get_image_uri(pipeline_type, event, region): framework=event.get("model_framework"), region=region, version=event.get("model_framework_version") ) else: - raise Exception("Unsupported pipeline by get_image_uri function") - - -def get_required_keys(pipeline_type, use_model_registry): + raise ValueError("Unsupported pipeline by get_image_uri function") + + +def get_required_keys(pipeline_type: str, use_model_registry: str, problem_type: str = None) -> List[str]: + # common required keys between model monitor types + common_monitor_keys = [ + "pipeline_type", + "model_name", + "endpoint_name", + "baseline_data", + "baseline_job_output_location", + "data_capture_location", + "monitoring_output_location", + "schedule_expression", + "monitor_max_runtime_seconds", + "instance_type", + "instance_volume_size", + ] # Realtime/batch pipelines if pipeline_type in [ "byom_realtime_builtin", @@ -374,19 +439,31 @@ def get_required_keys(pipeline_type, use_model_registry): return keys_map[pipeline_type] - # Model Monitor pipeline - elif pipeline_type == "byom_model_monitor": + # Data Quality Monitor pipeline + elif pipeline_type == "byom_data_quality_monitor": + return common_monitor_keys + # Model Quality Monitor pipeline + elif pipeline_type == "byom_model_quality_monitor": + common_model_keys = [ + "baseline_inference_attribute", + "baseline_ground_truth_attribute", + "problem_type", + "monitor_ground_truth_input", + ] + if problem_type in ["Regression", "MulticlassClassification"]: + common_model_keys.append("monitor_inference_attribute") + + elif problem_type == "BinaryClassification": + common_model_keys.extend( + ["monitor_probability_attribute", "probability_threshold_attribute", "baseline_probability_attribute"] + ) + + else: + raise BadRequest("Bad request format. Unsupported problem_type in byom_model_quality_monitor pipeline") + return [ - "pipeline_type", - "model_name", - "endpoint_name", - "training_data", - "baseline_job_output_location", - "data_capture_location", - "monitoring_output_location", - "schedule_expression", - "instance_type", - "instance_volume_size", + *common_monitor_keys, + *common_model_keys, ] # Image Builder pipeline elif pipeline_type == "byom_image_builder": @@ -403,17 +480,19 @@ def get_required_keys(pipeline_type, use_model_registry): ) -def validate(event): +def validate(event: Dict[str, Any]) -> Dict[str, Any]: """ validate is a helper function that checks if all required input parameters are present in the handler's event object :event: Lambda function's event object - :return: returns the event back if it passes the validation othewise it raises a bad request exception + :return: returns the event back if it passes the validation otherwise it raises a bad request exception :raises: BadRequest Exception """ # get the required keys to validate the event - required_keys = get_required_keys(event.get("pipeline_type", ""), os.environ["USE_MODEL_REGISTRY"]) + required_keys = get_required_keys( + event.get("pipeline_type", "").strip(), os.environ["USE_MODEL_REGISTRY"], event.get("problem_type", "").strip() + ) for key in required_keys: if key not in event: logger.error(f"Request event did not have parameter: {key}") diff --git a/source/lambdas/pipeline_orchestration/shared/helper.py b/source/lambdas/pipeline_orchestration/shared/helper.py index 04f70ce..4de616e 100644 --- a/source/lambdas/pipeline_orchestration/shared/helper.py +++ b/source/lambdas/pipeline_orchestration/shared/helper.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -38,40 +38,3 @@ def get_client(service_name, config=CLIENT_CONFIG): def reset_client(): global _helpers_service_clients _helpers_service_clients = dict() - - -# Currently, retriving the sagemaker-model-monitor-analyzer image url is not supported by sagemaker.image_uris.retrieve -# For the latest images per region, see https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-pre-built-container.html -# These are SageMaker service account numbers for the built-in SageMaker containers. -def get_built_in_model_monitor_container_uri(region): - regions_to_accounts = { - "us-east-1": "156813124566", - "us-east-2": "777275614652", - "us-west-1": "890145073186", - "us-west-2": "159807026194", - "af-south-1": "875698925577", - "ap-east-1": "001633400207", - "ap-northeast-1": "574779866223", - "ap-northeast-2": "709848358524", - "ap-south-1": "126357580389", - "ap-southeast-1": "245545462676", - "ap-southeast-2": "563025443158", - "ca-central-1": "536280801234", - "cn-north-1": "453000072557", - "cn-northwest-1": "453252182341", - "eu-central-1": "048819808253", - "eu-north-1": "895015795356", - "eu-south-1": "933208885752", - "eu-west-1": "468650794304", - "eu-west-2": "749857270468", - "eu-west-3": "680080141114", - "me-south-1": "607024016150", - "sa-east-1": "539772159869", - "us-gov-west-1": "362178532790", - } - - container_uri = ( - f"{regions_to_accounts[region]}.dkr.ecr.{region}.amazonaws.com/sagemaker-model-monitor-analyzer:latest" - ) - - return container_uri diff --git a/source/lambdas/pipeline_orchestration/tests/fixtures/orchestrator_fixtures.py b/source/lambdas/pipeline_orchestration/tests/fixtures/orchestrator_fixtures.py index e10bf16..d70fb9e 100644 --- a/source/lambdas/pipeline_orchestration/tests/fixtures/orchestrator_fixtures.py +++ b/source/lambdas/pipeline_orchestration/tests/fixtures/orchestrator_fixtures.py @@ -51,40 +51,55 @@ def mock_env_variables(): @pytest.fixture def api_byom_event(): - def _api_byom_event(pipeline_type, is_multi=False): + def _api_byom_event(pipeline_type, is_multi=False, endpint_name_provided=False): + # create a map {: {is_multi (True/False): }} + maping = dict( + inference_instance={ + "True": { + "dev": os.environ["INSTANCETYPE"], + "staging": os.environ["INSTANCETYPE"], + "prod": os.environ["INSTANCETYPE"], + }, + "False": os.environ["INSTANCETYPE"], + }, + batch_job_output_location={ + "True": {"dev": "bucket/dev_output", "staging": "bucket/staging_output", "prod": "bucket/prod_output"}, + "False": os.environ["BATCHOUTPUT"], + }, + data_capture_location={ + "True": { + "dev": "bucket/dev_datacapture", + "staging": "bucket/staging_datacapture", + "prod": "bucket/prod_datacapture", + }, + "False": os.environ["DATACAPTURE"], + }, + endpoint_name={ + "True": { + "dev": "dev-endpoint", + "staging": "staging-endpoint", + "prod": "prod-endpoint", + }, + "False": "test-endpoint", + }, + ) event = { "pipeline_type": pipeline_type, "model_name": "testmodel", "model_artifact_location": os.environ["MODELARTIFACTLOCATION"], "model_package_name": os.environ["MODEL_PACKAGE_NAME"], } - if is_multi: - event["inference_instance"] = { - "dev": os.environ["INSTANCETYPE"], - "staging": os.environ["INSTANCETYPE"], - "prod": os.environ["INSTANCETYPE"], - } - else: - event["inference_instance"] = os.environ["INSTANCETYPE"] + event["inference_instance"] = maping["inference_instance"][str(is_multi)] + if pipeline_type in ["byom_batch_builtin", "byom_batch_custom"]: event["batch_inference_data"] = os.environ["INFERENCEDATA"] - if is_multi: - event["batch_job_output_location"] = { - "dev": "bucket/dev_output", - "staging": "bucket/staging_output", - "prod": "bucket/prod_output", - } - else: - event["batch_job_output_location"] = os.environ["BATCHOUTPUT"] + event["batch_job_output_location"] = maping["batch_job_output_location"][str(is_multi)] + if pipeline_type in ["byom_realtime_builtin", "byom_realtime_custom"]: - if is_multi: - event["data_capture_location"] = { - "dev": "bucket/dev_datacapture", - "staging": "bucket/staging_datacapture", - "prod": "bucket/prod_datacapture", - } - else: - event["data_capture_location"] = os.environ["DATACAPTURE"] + event["data_capture_location"] = maping["data_capture_location"][str(is_multi)] + # add optional endpoint_name + if endpint_name_provided: + event["endpoint_name"] = maping["endpoint_name"][str(is_multi)] if pipeline_type in ["byom_realtime_builtin", "byom_batch_builtin"]: event["model_framework"] = "xgboost" @@ -98,22 +113,39 @@ def _api_byom_event(pipeline_type, is_multi=False): @pytest.fixture -def api_monitor_event(): +def api_data_quality_event(): return { - "pipeline_type": "byom_model_monitor", + "pipeline_type": "byom_data_quality_monitor", "model_name": "testmodel", "endpoint_name": "test_endpoint", - "training_data": os.environ["TRAININGDATA"], + "baseline_data": os.environ["TRAININGDATA"], "baseline_job_output_location": os.environ["BASELINEOUTPUT"], "monitoring_output_location": "testbucket/model_monitor/monitor_output", "data_capture_location": "testbucket/xgboost/datacapture", "schedule_expression": os.environ["SCHEDULEEXP"], "instance_type": os.environ["INSTANCETYPE"], "instance_volume_size": "20", - "max_runtime_seconds": "3600", + "baseline_max_runtime_seconds": "3600", + "monitor_max_runtime_seconds": "1800", } +@pytest.fixture +def api_model_quality_event(api_data_quality_event): + model_quality_event = api_data_quality_event.copy() + model_quality_event.update( + { + "pipeline_type": "byom_model_quality_monitor", + "baseline_inference_attribute": "0", + "problem_type": "Regression", + "baseline_ground_truth_attribute": "label", + "monitor_inference_attribute": "0", + "monitor_ground_truth_input": "s3://test-bucket/groundtruth", + } + ) + return model_quality_event + + @pytest.fixture def api_image_builder_event(): return { @@ -126,80 +158,111 @@ def api_image_builder_event(): @pytest.fixture def expected_params_realtime_custom(): - return [ - ("ASSETSBUCKET", "testassetsbucket"), - ("KMSKEYARN", ""), - ("BLUEPRINTBUCKET", "testbucket"), - ("MODELNAME", "testmodel"), - ("MODELARTIFACTLOCATION", os.environ["MODELARTIFACTLOCATION"]), - ("INFERENCEINSTANCE", os.environ["INSTANCETYPE"]), - ("CUSTOMALGORITHMSECRREPOARN", "test-ecr-repo"), - ("IMAGEURI", "custom-image-uri"), - ("MODELPACKAGEGROUPNAME", ""), - ("MODELPACKAGENAME", os.environ["MODEL_PACKAGE_NAME"]), - ("DATACAPTURELOCATION", os.environ["DATACAPTURE"]), - ] + def _expected_params_realtime_custom(endpoint_name_provided=False): + endpoint_name = "test-endpoint" if endpoint_name_provided else "" + expected_params = [ + ("AssetsBucket", "testassetsbucket"), + ("KmsKeyArn", ""), + ("BlueprintBucket", "testbucket"), + ("ModelName", "testmodel"), + ("ModelArtifactLocation", os.environ["MODELARTIFACTLOCATION"]), + ("InferenceInstance", os.environ["INSTANCETYPE"]), + ("CustomAlgorithmsECRRepoArn", "test-ecr-repo"), + ("ImageUri", "custom-image-uri"), + ("ModelPackageGroupName", ""), + ("ModelPackageName", os.environ["MODEL_PACKAGE_NAME"]), + ("DataCaptureLocation", os.environ["DATACAPTURE"]), + ("EndpointName", endpoint_name), + ] + + return expected_params + + return _expected_params_realtime_custom @pytest.fixture -def expected_model_monitor_params(): +def expected_data_quality_monitor_params(): return [ - ("BASELINEJOBNAME", "test_endpoint-baseline-job-ec3a"), - ("BASELINEOUTPUTBUCKET", "testbucket"), - ("BASELINEJOBOUTPUTLOCATION", os.environ["BASELINEOUTPUT"]), - ("DATACAPTUREBUCKET", "testbucket"), - ("DATACAPTURELOCATION", os.environ["BASELINEOUTPUT"]), - ("ENDPOINTNAME", "test_endpoint"), - ("IMAGEURI", "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer:latest"), - ("INSTANCETYPE", os.environ["INSTANCETYPE"]), - ("INSTANCEVOLUMESIZE", "20"), - ("MAXRUNTIMESECONDS", "3600"), - ("MONITORINGOUTPUTLOCATION", "testbucket/model_monitor/monitor_output"), - ("MONITORINGSCHEDULENAME", "test_endpoint-monitor-2a87"), - ("MONITORINGTYPE", "dataquality"), - ("SCHEDULEEXPRESSION", os.environ["SCHEDULEEXP"]), - ("TRAININGDATA", os.environ["TRAININGDATA"]), + ("BaselineJobName", "test_endpoint-baseline-job-ec3a"), + ("BaselineOutputBucket", "testbucket"), + ("BaselineJobOutputLocation", os.environ["BASELINEOUTPUT"]), + ("DataCaptureBucket", "testbucket"), + ("DataCaptureLocation", os.environ["BASELINEOUTPUT"]), + ("EndpointName", "test_endpoint"), + ("ImageUri", "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer"), + ("InstanceType", os.environ["INSTANCETYPE"]), + ("InstanceVolumeSize", "20"), + ("BaselineMaxRuntimeSeconds", "3600"), + ("MonitorMaxRuntimeSeconds", "1800"), + ("MonitoringOutputLocation", "testbucket/model_monitor/monitor_output"), + ("MonitoringScheduleName", "test_endpoint-monitor-2a87"), + ("ScheduleExpression", os.environ["SCHEDULEEXP"]), + ("BaselineData", os.environ["TRAININGDATA"]), ] +@pytest.fixture +def expected_model_quality_monitor_params(expected_data_quality_monitor_params): + expected_model_quality = expected_data_quality_monitor_params.copy() + + expected_model_quality.extend( + [ + ("BaselineInferenceAttribute", "prediction"), + ("BaselineProbabilityAttribute", "probability"), + ("BaselineGroundTruthAttribute", "label"), + ("ProblemType", "Regression"), + ("MonitorInferenceAttribute", "0"), + ("MonitorProbabilityAttribute", "0"), + ("ProbabilityThresholdAttribute", "0.5"), + ("MonitorGroundTruthInput", "s3://test-bucket/groundtruth"), + ] + ) + + return expected_model_quality + + @pytest.fixture def expected_common_realtime_batch_params(): return [ - ("MODELNAME", "testmodel"), - ("MODELARTIFACTLOCATION", os.environ["MODELARTIFACTLOCATION"]), - ("INFERENCEINSTANCE", os.environ["INSTANCETYPE"]), - ("CUSTOMALGORITHMSECRREPOARN", "test-ecr-repo"), - ("IMAGEURI", "custom-image-uri"), - ("MODELPACKAGEGROUPNAME", ""), - ("MODELPACKAGENAME", os.environ["MODEL_PACKAGE_NAME"]), + ("ModelName", "testmodel"), + ("ModelArtifactLocation", os.environ["MODELARTIFACTLOCATION"]), + ("InferenceInstance", os.environ["INSTANCETYPE"]), + ("CustomAlgorithmsECRRepoArn", "test-ecr-repo"), + ("ImageUri", "custom-image-uri"), + ("ModelPackageGroupName", ""), + ("ModelPackageName", os.environ["MODEL_PACKAGE_NAME"]), ] @pytest.fixture def expected_image_builder_params(): return [ - ("NOTIFICATIONEMAIL", os.environ["NOTIFICATION_EMAIL"]), - ("ASSETSBUCKET", "testassetsbucket"), - ("CUSTOMCONTAINER", os.environ["CUSTOMIMAGE"]), - ("ECRREPONAME", "mlops-ecrrep"), - ("IMAGETAG", "tree"), + ("NotificationEmail", os.environ["NOTIFICATION_EMAIL"]), + ("AssetsBucket", "testassetsbucket"), + ("CustomImage", os.environ["CUSTOMIMAGE"]), + ("ECRRepoName", "mlops-ecrrep"), + ("ImageTag", "tree"), ] @pytest.fixture def expected_realtime_specific_params(): - return [("DATACAPTURELOCATION", os.environ["DATACAPTURE"])] + def _expected_realtime_specific_params(endpoint_name_provided=False): + endpoint_name = "test-endpoint" if endpoint_name_provided else "" + return [("DataCaptureLocation", os.environ["DATACAPTURE"]), ("EndpointName", endpoint_name)] + + return _expected_realtime_specific_params @pytest.fixture def expect_single_account_params_format(): return { "Parameters": { - "NOTIFICATIONEMAIL": os.environ["NOTIFICATION_EMAIL"], - "ASSETSBUCKET": "testassetsbucket", - "CUSTOMCONTAINER": os.environ["CUSTOMIMAGE"], - "ECRREPONAME": "mlops-ecrrep", - "IMAGETAG": "tree", + "NotificationEmail": os.environ["NOTIFICATION_EMAIL"], + "AssetsBucket": "testassetsbucket", + "CustomImage": os.environ["CUSTOMIMAGE"], + "ECRRepoName": "mlops-ecrrep", + "ImageTag": "tree", } } @@ -212,39 +275,39 @@ def stack_name(): @pytest.fixture def expected_multi_account_params_format(): return [ - {"ParameterKey": "NOTIFICATIONEMAIL", "ParameterValue": os.environ["NOTIFICATION_EMAIL"]}, - {"ParameterKey": "ASSETSBUCKET", "ParameterValue": "testassetsbucket"}, - {"ParameterKey": "CUSTOMCONTAINER", "ParameterValue": os.environ["CUSTOMIMAGE"]}, - {"ParameterKey": "ECRREPONAME", "ParameterValue": "mlops-ecrrep"}, - {"ParameterKey": "IMAGETAG", "ParameterValue": "tree"}, + {"ParameterKey": "NotificationEmail", "ParameterValue": os.environ["NOTIFICATION_EMAIL"]}, + {"ParameterKey": "AssetsBucket", "ParameterValue": "testassetsbucket"}, + {"ParameterKey": "CustomImage", "ParameterValue": os.environ["CUSTOMIMAGE"]}, + {"ParameterKey": "ECRRepoName", "ParameterValue": "mlops-ecrrep"}, + {"ParameterKey": "ImageTag", "ParameterValue": "tree"}, ] @pytest.fixture def expected_batch_specific_params(): return [ - ("BATCHINPUTBUCKET", "inference"), - ("BATCHINFERENCEDATA", os.environ["INFERENCEDATA"]), - ("BATCHOUTPUTLOCATION", os.environ["BATCHOUTPUT"]), + ("BatchInputBucket", "inference"), + ("BatchInferenceData", os.environ["INFERENCEDATA"]), + ("BatchOutputLocation", os.environ["BATCHOUTPUT"]), ] @pytest.fixture def expected_batch_params(): return [ - ("ASSETSBUCKET", "testassetsbucket"), - ("KMSKEYARN", ""), - ("BLUEPRINTBUCKET", "testbucket"), - ("MODELNAME", "testmodel"), - ("MODELARTIFACTLOCATION", os.environ["MODELARTIFACTLOCATION"]), - ("INFERENCEINSTANCE", os.environ["INSTANCETYPE"]), - ("CUSTOMALGORITHMSECRREPOARN", "test-ecr-repo"), - ("IMAGEURI", "custom-image-uri"), - ("MODELPACKAGEGROUPNAME", ""), - ("MODELPACKAGENAME", os.environ["MODEL_PACKAGE_NAME"]), - ("BATCHINPUTBUCKET", "inference"), - ("BATCHINFERENCEDATA", os.environ["INFERENCEDATA"]), - ("BATCHOUTPUTLOCATION", os.environ["BATCHOUTPUT"]), + ("AssetsBucket", "testassetsbucket"), + ("KmsKeyArn", ""), + ("BlueprintBucket", "testbucket"), + ("ModelName", "testmodel"), + ("ModelArtifactLocation", os.environ["MODELARTIFACTLOCATION"]), + ("InferenceInstance", os.environ["INSTANCETYPE"]), + ("CustomAlgorithmsECRRepoArn", "test-ecr-repo"), + ("ImageUri", "custom-image-uri"), + ("ModelPackageGroupName", ""), + ("ModelPackageName", os.environ["MODEL_PACKAGE_NAME"]), + ("BatchInputBucket", "inference"), + ("BatchInferenceData", os.environ["INFERENCEDATA"]), + ("BatchOutputLocation", os.environ["BATCHOUTPUT"]), ] @@ -349,28 +412,42 @@ def _api_model_monitor_event(monitoring_type=""): @pytest.fixture def required_api_keys_model_monitor(): - def _required_api_keys_model_monitor(default=True): - default_keys = [ + def _required_api_keys_model_monitor(monitoring_type, problem_type=None): + common_keys = [ "pipeline_type", "model_name", "endpoint_name", + "baseline_data", "baseline_job_output_location", + "data_capture_location", "monitoring_output_location", "schedule_expression", - "training_data", + "monitor_max_runtime_seconds", "instance_type", - "data_capture_location", "instance_volume_size", ] - if default: - return default_keys + if monitoring_type != "ModelQuality": + return common_keys else: - return default_keys + [ - "features_attribute", - "inference_attribute", - "probability_attribute", - "probability_threshold_attribute", + common_model_keys = [ + "baseline_inference_attribute", + "baseline_ground_truth_attribute", + "problem_type", + "monitor_ground_truth_input", ] + if problem_type in ["Regression", "MulticlassClassification"]: + common_model_keys.append("monitor_inference_attribute") + + # BinaryClassification problem + else: + common_model_keys.extend( + [ + "monitor_probability_attribute", + "probability_threshold_attribute", + "baseline_probability_attribute", + ] + ) + return [*common_keys, *common_model_keys] return _required_api_keys_model_monitor @@ -613,5 +690,5 @@ def cf_client_params(api_byom_event, template_parameters_realtime_builtin): @pytest.fixture -def expcted_update_response(stack_name): +def expected_update_response(stack_name): return {"StackId": f"Pipeline {stack_name} is already provisioned. No updates are to be performed."} \ No newline at end of file diff --git a/source/lambdas/pipeline_orchestration/tests/test_helper.py b/source/lambdas/pipeline_orchestration/tests/test_helper.py index 6056725..eac8138 100644 --- a/source/lambdas/pipeline_orchestration/tests/test_helper.py +++ b/source/lambdas/pipeline_orchestration/tests/test_helper.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -11,19 +11,12 @@ # and limitations under the License. # # ##################################################################################################################### import pytest -from shared.helper import get_built_in_model_monitor_container_uri, get_client, reset_client +from shared.helper import get_client, reset_client _helpers_service_clients = dict() -def test_get_built_in_model_monitor_container_uri(): - assert ( - get_built_in_model_monitor_container_uri("us-east-1") - == "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer:latest" - ) - - @pytest.mark.parametrize("service,enpoint_url", [("s3", "https://s3"), ("cloudformation", "https://cloudformation")]) def test_get_client(service, enpoint_url): client = get_client(service) diff --git a/source/lambdas/pipeline_orchestration/tests/test_pipeline_orchestration.py b/source/lambdas/pipeline_orchestration/tests/test_pipeline_orchestration.py index 1843bd3..843c4be 100644 --- a/source/lambdas/pipeline_orchestration/tests/test_pipeline_orchestration.py +++ b/source/lambdas/pipeline_orchestration/tests/test_pipeline_orchestration.py @@ -25,7 +25,7 @@ clean_param, get_stack_name, get_common_realtime_batch_params, - get_bacth_specific_params, + get_batch_specific_params, get_model_monitor_params, get_image_builder_params, format_template_parameters, @@ -40,6 +40,7 @@ get_template_parameters, get_required_keys, validate, + get_built_in_model_monitor_image_uri, ) from pipeline_orchestration.index import ( handler, @@ -58,9 +59,11 @@ expected_realtime_specific_params, expected_batch_specific_params, stack_name, - api_monitor_event, - expcted_update_response, - expected_model_monitor_params, + api_data_quality_event, + api_model_quality_event, + expected_update_response, + expected_data_quality_monitor_params, + expected_model_quality_monitor_params, required_api_image_builder, expected_batch_params, api_image_builder_event, @@ -110,7 +113,7 @@ def test_handler(): "isBase64Encoded": False, "body": json.dumps( { - "message": "Bad request format. Expected httpMethod or pipeline_type, recevied none. " + "message": "Bad request format. Expected httpMethod or pipeline_type, received none. " + "Check documentation for API & config formats." } ), @@ -197,24 +200,24 @@ def test_provision_pipeline(api_image_builder_event, api_byom_event): @mock_s3 def test_upload_file_to_s3(): - s3_clinet = boto3.client("s3", region_name="us-east-1") + s3_client = boto3.client("s3", region_name="us-east-1") testfile = tempfile.NamedTemporaryFile() - s3_clinet.create_bucket(Bucket="assetsbucket") - upload_file_to_s3(testfile.name, "assetsbucket", os.environ["TESTFILE"], s3_clinet) + s3_client.create_bucket(Bucket="assetsbucket") + upload_file_to_s3(testfile.name, "assetsbucket", os.environ["TESTFILE"], s3_client) @mock_s3 def test_download_file_from_s3(): - s3_clinet = boto3.client("s3", region_name="us-east-1") + s3_client = boto3.client("s3", region_name="us-east-1") testfile = tempfile.NamedTemporaryFile() - s3_clinet.create_bucket(Bucket="assetsbucket") - upload_file_to_s3(testfile.name, "assetsbucket", os.environ["TESTFILE"], s3_clinet) - download_file_from_s3("assetsbucket", os.environ["TESTFILE"], testfile.name, s3_clinet) + s3_client.create_bucket(Bucket="assetsbucket") + upload_file_to_s3(testfile.name, "assetsbucket", os.environ["TESTFILE"], s3_client) + download_file_from_s3("assetsbucket", os.environ["TESTFILE"], testfile.name, s3_client) -def test_create_codepipeline_stack(cf_client_params, stack_name, expcted_update_response): +def test_create_codepipeline_stack(cf_client_params, stack_name, expected_update_response): cf_client = botocore.session.get_session().create_client("cloudformation") - not_image_satck = "teststack-testmodel-BYOMPipelineReatimeBuiltIn" + not_image_stack = "teststack-testmodel-BYOMPipelineReatimeBuiltIn" stubber = Stubber(cf_client) expected_params = cf_client_params cfn_response = {"StackId": "1234"} @@ -222,7 +225,7 @@ def test_create_codepipeline_stack(cf_client_params, stack_name, expcted_update_ stubber.add_response("create_stack", cfn_response, expected_params) with stubber: response = create_codepipeline_stack( - not_image_satck, + not_image_stack, expected_params["TemplateURL"], expected_params["Parameters"], cf_client, @@ -234,16 +237,16 @@ def test_create_codepipeline_stack(cf_client_params, stack_name, expcted_update_ with stubber: with pytest.raises(Exception): create_codepipeline_stack( - not_image_satck, + not_image_stack, expected_params["TemplateURL"], expected_params["Parameters"], cf_client, ) stubber.add_client_error("create_stack", service_message="already exists") - expected_response = {"StackId": f"Pipeline {not_image_satck} is already provisioned. Updating template parameters."} + expected_response = {"StackId": f"Pipeline {not_image_stack} is already provisioned. Updating template parameters."} with stubber: response = create_codepipeline_stack( - not_image_satck, + not_image_stack, expected_params["TemplateURL"], expected_params["Parameters"], cf_client, @@ -254,7 +257,7 @@ def test_create_codepipeline_stack(cf_client_params, stack_name, expcted_update_ # Test if the stack is image builder stubber.add_client_error("create_stack", service_message="already exists") stubber.add_client_error("update_stack", service_message="No updates are to be performed") - expected_response = expcted_update_response + expected_response = expected_update_response with stubber: response = create_codepipeline_stack( stack_name, @@ -266,7 +269,7 @@ def test_create_codepipeline_stack(cf_client_params, stack_name, expcted_update_ assert response == expected_response -def test_update_stack(cf_client_params, stack_name, expcted_update_response): +def test_update_stack(cf_client_params, stack_name, expected_update_response): cf_client = botocore.session.get_session().create_client("cloudformation") expected_params = cf_client_params @@ -289,7 +292,7 @@ def test_update_stack(cf_client_params, stack_name, expcted_update_response): # Test for no update error stubber.add_client_error("update_stack", service_message="No updates are to be performed") - expected_response = expcted_update_response + expected_response = expected_update_response with stubber: response = update_stack( stack_name, @@ -424,7 +427,7 @@ def test_pipeline_status(): assert response == expected_response_no_cp -def test_get_stack_name(api_byom_event, api_monitor_event, api_image_builder_event): +def test_get_stack_name(api_byom_event, api_data_quality_event, api_model_quality_event, api_image_builder_event): # realtime builtin pipeline realtime_builtin = api_byom_event("byom_realtime_builtin") assert ( @@ -435,8 +438,17 @@ def test_get_stack_name(api_byom_event, api_monitor_event, api_image_builder_eve batch_builtin = api_byom_event("byom_batch_builtin") assert get_stack_name(batch_builtin) == f"mlops-pipeline-{batch_builtin['model_name']}-byompipelinebatchbuiltin" - # model monitor pipeline - assert get_stack_name(api_monitor_event) == f"mlops-pipeline-{api_monitor_event['model_name']}-byommodelmonitor" + # data quality monitor pipeline + assert ( + get_stack_name(api_data_quality_event) + == f"mlops-pipeline-{api_data_quality_event['model_name']}-byomdataqualitymonitor" + ) + + # model quality monitor pipeline + assert ( + get_stack_name(api_model_quality_event) + == f"mlops-pipeline-{api_model_quality_event['model_name']}-byommodelqualitymonitor" + ) # image builder pipeline assert ( @@ -447,7 +459,8 @@ def test_get_stack_name(api_byom_event, api_monitor_event, api_image_builder_eve def test_get_required_keys( api_byom_event, # NOSONAR:S107 this test function is designed to take many fixtures - api_monitor_event, + api_data_quality_event, + api_model_quality_event, required_api_byom_realtime_builtin, required_api_byom_batch_builtin, required_api_byom_realtime_custom, @@ -471,21 +484,29 @@ def test_get_required_keys( returned_keys = get_required_keys("byom_batch_custom", "No") expected_keys = required_api_byom_batch_custom TestCase().assertCountEqual(expected_keys, returned_keys) - # Required keys in model_monitor, default (no monitoring_type provided) - returned_keys = get_required_keys("byom_model_monitor", "No") - expected_keys = required_api_keys_model_monitor() + # Required keys in data quality monitor + returned_keys = get_required_keys("byom_data_quality_monitor", "No") + expected_keys = required_api_keys_model_monitor("DataQuality") + TestCase().assertCountEqual(expected_keys, returned_keys) + # Required keys in model quality monitor, problem type Regression + returned_keys = get_required_keys("byom_model_quality_monitor", "No", "Regression") + expected_keys = required_api_keys_model_monitor("ModelQuality", "Regression") TestCase().assertCountEqual(expected_keys, returned_keys) - # Required keys in model_monitor, with monitoring_type provided - returned_keys = get_required_keys("byom_model_monitor", "No") - expected_keys = required_api_keys_model_monitor(True) + # Required keys in model quality monitor, problem type BinaryClassification + returned_keys = get_required_keys("byom_model_quality_monitor", "No", "BinaryClassification") + expected_keys = required_api_keys_model_monitor("ModelQuality", "BinaryClassification") TestCase().assertCountEqual(expected_keys, returned_keys) + # test exception for unsupported problem type + with pytest.raises(BadRequest) as error: + get_required_keys("byom_model_quality_monitor", "No", "UnsupportedProblemType") + assert str(error.value) == "Bad request format. Unsupported problem_type in byom_model_quality_monitor pipeline" # Required keys in image builder returned_keys = get_required_keys("byom_image_builder", "No") expected_keys = required_api_image_builder TestCase().assertCountEqual(expected_keys, returned_keys) # assert for exceptions with pytest.raises(BadRequest) as exceinfo: - get_required_keys({"pipeline_type": "not_supported"}, "No") + get_required_keys("not_supported", "No") assert ( str(exceinfo.value) == "Bad request format. Pipeline type not supported. Check documentation for API & config formats" @@ -498,7 +519,7 @@ def test_get_required_keys( def test_get_stage_param(api_byom_event): single_event = api_byom_event("byom_realtime_custom", False) - TestCase().assertEqual(get_stage_param(single_event, "data_capture_location", None), "bucket/datacapture") + TestCase().assertEqual(get_stage_param(single_event, "data_capture_location", "None"), "bucket/datacapture") multi_event = api_byom_event("byom_realtime_custom", True) TestCase().assertEqual(get_stage_param(multi_event, "data_capture_location", "dev"), "bucket/dev_datacapture") @@ -506,18 +527,40 @@ def test_get_stage_param(api_byom_event): def test_get_template_parameters( api_byom_event, api_image_builder_event, + api_data_quality_event, + api_model_quality_event, expected_params_realtime_custom, expected_image_builder_params, expected_batch_params, + expected_data_quality_monitor_params, + expected_model_quality_monitor_params, ): single_event = api_byom_event("byom_realtime_custom", False) - TestCase().assertEqual(get_template_parameters(single_event, False), expected_params_realtime_custom) + # realtime pipeline + TestCase().assertEqual(get_template_parameters(single_event, False), expected_params_realtime_custom()) + # image builder pipeline TestCase().assertEqual(get_template_parameters(api_image_builder_event, False), expected_image_builder_params) + # batch pipeline TestCase().assertEqual( get_template_parameters(api_byom_event("byom_batch_custom", False), False), expected_batch_params, ) + # data quality pipeline + assert len(get_template_parameters(api_data_quality_event, False)) == len( + [ + *expected_data_quality_monitor_params, + *[("AssetsBucket", "testassetsbucket"), ("KmsKeyArn", ""), ("BlueprintBucket", "testbucket")], + ] + ) + + # model quality pipeline + assert len(get_template_parameters(api_model_quality_event, False)) == len( + [ + *expected_model_quality_monitor_params, + *[("AssetsBucket", "testassetsbucket"), ("KmsKeyArn", ""), ("BlueprintBucket", "testbucket")], + ] + ) # test for exception with pytest.raises(BadRequest): get_template_parameters({"pipeline_type": "unsupported"}, False) @@ -528,23 +571,56 @@ def test_get_common_realtime_batch_params(api_byom_event, expected_common_realti batch_event = api_byom_event("byom_batch_custom", False) realtime_event.update(batch_event) TestCase().assertEqual( - get_common_realtime_batch_params(realtime_event, False, None), expected_common_realtime_batch_params + get_common_realtime_batch_params(realtime_event, "us-east-1", "None"), expected_common_realtime_batch_params ) def test_get_realtime_specific_params(api_byom_event, expected_realtime_specific_params): + # test with endpoint_name not provided realtime_event = api_byom_event("byom_realtime_builtin", False) - TestCase().assertEqual(get_realtime_specific_params(realtime_event, None), expected_realtime_specific_params) + TestCase().assertEqual(get_realtime_specific_params(realtime_event, "None"), expected_realtime_specific_params()) + # test with endpoint_name provided + realtime_event = api_byom_event("byom_realtime_builtin", False, True) + TestCase().assertEqual( + get_realtime_specific_params(realtime_event, "None"), expected_realtime_specific_params(True) + ) + # test with endpoint_name provided for multi-account + realtime_event = api_byom_event("byom_realtime_builtin", False, True) + TestCase().assertEqual(get_realtime_specific_params(realtime_event, "dev"), expected_realtime_specific_params(True)) -def test_get_bacth_specific_params(api_byom_event, expected_batch_specific_params): +def test_get_batch_specific_params(api_byom_event, expected_batch_specific_params): batch_event = api_byom_event("byom_batch_custom", False) - TestCase().assertEqual(get_bacth_specific_params(batch_event, None), expected_batch_specific_params) + TestCase().assertEqual(get_batch_specific_params(batch_event, "None"), expected_batch_specific_params) + + +def test_get_built_in_model_monitor_container_uri(): + # assert the returned value by an actual Model Monitor Image URI for the region. + assert ( + get_built_in_model_monitor_image_uri("us-east-1") + == "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer" + ) -def test_get_model_monitor_params(api_monitor_event, expected_model_monitor_params): +@patch("lambda_helpers.sagemaker.image_uris.retrieve") +def test_get_model_monitor_params( + mocked_image_retrieve, + api_data_quality_event, + api_model_quality_event, + expected_data_quality_monitor_params, + expected_model_quality_monitor_params, +): + # provide an actual Model Monitor image URI (us-east-1) as the return value + mocked_image_retrieve.return_value = "156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer" + # data quality monitor + TestCase().assertEqual( + len(get_model_monitor_params(api_data_quality_event, "us-east-1", "None")), + len(expected_data_quality_monitor_params), + ) + # model quality monitor TestCase().assertEqual( - len(get_model_monitor_params(api_monitor_event, "us-east-1", None)), len(expected_model_monitor_params) + len(get_model_monitor_params(api_model_quality_event, "us-east-1", "None", monitoring_type="ModelQuality")), + len(expected_model_quality_monitor_params), ) @@ -567,22 +643,22 @@ def test_format_template_parameters( def test_get_image_uri(mocked_sm, api_byom_event): custom_event = api_byom_event("byom_realtime_custom", False) TestCase().assertEqual(get_image_uri("byom_realtime_custom", custom_event, "us-east-1"), "custom-image-uri") - mocked_sm.return_value = "test-imge-uri" + mocked_sm.return_value = "test-image-uri" builtin_event = api_byom_event("byom_realtime_builtin", False) - TestCase().assertEqual(get_image_uri("byom_realtime_builtin", builtin_event, "us-east-1"), "test-imge-uri") + TestCase().assertEqual(get_image_uri("byom_realtime_builtin", builtin_event, "us-east-1"), "test-image-uri") mocked_sm.assert_called_with( framework=builtin_event.get("model_framework"), region="us-east-1", version=builtin_event.get("model_framework_version"), ) - # assert exception for an unspported pipeline + # assert exception for an unsupported pipeline with pytest.raises(Exception) as exc: get_image_uri("not_spoorted_pipeline", builtin_event, "us-east-1") assert str(exc.value) == "Unsupported pipeline by get_image_uri function" +@patch("lambda_helpers.sagemaker.image_uris.retrieve") @patch("boto3.client") -@patch("builtins.open") @patch("lambda_helpers.shutil.make_archive") @patch("lambda_helpers.write_params_to_json") @patch("lambda_helpers.format_template_parameters") @@ -594,54 +670,54 @@ def test_create_template_zip_file( mocked_mkdir, mocked_get_template, mocked_format, - mocked_wrire, + mocked_write, mocked_shutil, - mocked_open, mocked_client, - api_monitor_event, + mocked_get_image, + api_image_builder_event, ): mocked_path.return_value = False - s3_clinet = boto3.client("s3", region_name="us-east-1") + s3_client = boto3.client("s3", region_name="us-east-1") # multi account create_template_zip_file( - api_monitor_event, "blueprint", "assets_bucket", "byom/template.yaml", "zipfile", "True", s3_clinet + api_image_builder_event, "blueprint", "assets_bucket", "byom/template.yaml", "zipfile", "True", s3_client ) # single account create_template_zip_file( - api_monitor_event, "blueprint", "assets_bucket", "byom/template.yaml", "zipfile", "False", s3_clinet + api_image_builder_event, "blueprint", "assets_bucket", "byom/template.yaml", "zipfile", "False", s3_client ) def test_get_codepipeline_params(): common_params = [ - ("NOTIFICATIONEMAIL", "test@example.com"), - ("TEMPLATEZIPNAME", "template_zip_name"), - ("TEMPLATEFILENAME", "template_file_name"), - ("ASSETSBUCKET", "testassetsbucket"), - ("STACKNAME", "stack_name"), + ("NotificationEmail", "test@example.com"), + ("TemplateZipFileName", "template_zip_name"), + ("TemplateFileName", "template_file_name"), + ("AssetsBucket", "testassetsbucket"), + ("StackName", "stack_name"), ] # multi account codepipeline TestCase().assertEqual( get_codepipeline_params("True", "stack_name", "template_zip_name", "template_file_name"), common_params + [ - ("DEVPARAMSNAME", "dev_template_params.json"), - ("STAGINGPARAMSNAME", "staging_template_params.json"), - ("PRODPARAMSNAME", "prod_template_params.json"), - ("DEVACCOUNTID", "dev_account_id"), - ("DEVORGID", "dev_org_id"), - ("STAGINGACCOUNTID", "staging_account_id"), - ("STAGINGORGID", "staging_org_id"), - ("PRODACCOUNTID", "prod_account_id"), - ("PRODORGID", "prod_org_id"), - ("BLUEPRINTBUCKET", "testbucket"), - ("DELEGATEDADMINACCOUNT", "No"), + ("DevParamsName", "dev_template_params.json"), + ("StagingParamsName", "staging_template_params.json"), + ("ProdParamsName", "prod_template_params.json"), + ("DevAccountId", "dev_account_id"), + ("DevOrgId", "dev_org_id"), + ("StagingAccountId", "staging_account_id"), + ("StagingOrgId", "staging_org_id"), + ("ProdAccountId", "prod_account_id"), + ("ProdOrgId", "prod_org_id"), + ("BlueprintBucket", "testbucket"), + ("DelegatedAdminAccount", "No"), ], ) # single account codepipeline TestCase().assertEqual( get_codepipeline_params("False", "stack_name", "template_zip_name", "template_file_name"), - common_params + [("TEMPLATEPARAMSNAME", "template_params.json")], + common_params + [("TemplateParamsName", "template_params.json")], ) diff --git a/source/lambdas/solution_helper/lambda_function.py b/source/lambdas/solution_helper/lambda_function.py index 5298217..1e79ae2 100644 --- a/source/lambdas/solution_helper/lambda_function.py +++ b/source/lambdas/solution_helper/lambda_function.py @@ -21,6 +21,18 @@ def _sanitize_data(resource_properties): + # Define allowed keys. You need to update this list with new metrics + main_keys = [ + "bucketSelected", + "gitSelected", + "Region", + "IsMultiAccount", + "UseModelRegistry", + "Version", + ] + optional_keys = ["IsDelegatedAccount"] + allowed_keys = main_keys + optional_keys + # Remove ServiceToken (lambda arn) to avoid sending AccountId resource_properties.pop("ServiceToken", None) resource_properties.pop("Resource", None) @@ -29,7 +41,37 @@ def _sanitize_data(resource_properties): resource_properties.pop("SolutionId", None) resource_properties.pop("UUID", None) - return resource_properties + # send only allowed metrics + sanitized_data = {key: resource_properties[key] for key in allowed_keys if key in resource_properties} + + return sanitized_data + + +def _send_anonymous_metrics(request_type, resource_properties): + try: + metrics_data = _sanitize_data(copy(resource_properties)) + metrics_data["RequestType"] = request_type + + headers = {"Content-Type": "application/json"} + + # create the payload + payload = { + "Solution": resource_properties["SolutionId"], + "UUID": resource_properties["UUID"], + "TimeStamp": datetime.utcnow().isoformat(), + "Data": metrics_data, + } + + logger.info(f"Sending payload: {payload}") + response = requests.post("https://metrics.awssolutionsbuilder.com/generic", json=payload, headers=headers) + # log the response + logger.info(f"Response from the metrics endpoint: {response.status_code} {response.reason}") + # raise error if response is an 404, 503, 500, 403 etc. + response.raise_for_status() + return response + except Exception as e: + logger.exception(f"Error when trying to send anonymous_metrics: {str(e)}") + return None @helper.create @@ -44,25 +86,8 @@ def custom_resource(event, _): random_id = str(uuid.uuid4()) helper.Data.update({"UUID": random_id}) elif resource == "AnonymousMetric": - try: - metrics_data = _sanitize_data(copy(resource_properties)) - metrics_data["RequestType"] = request_type - - headers = {"Content-Type": "application/json"} - - # create the payload - payload = { - "Solution": resource_properties["SolutionId"], - "UUID": resource_properties["UUID"], - "TimeStamp": datetime.utcnow().isoformat(), - "Data": metrics_data, - } - - logger.info(f"Sending payload: {payload}") - response = requests.post("https://metrics.awssolutionsbuilder.com/generic", json=payload, headers=headers) - logger.info(f"Response from metrics endpoint: {response.status_code} {response.reason}") - except Exception as e: - logger.exception(f"Error when trying to send usage data: {str(e)}") + # send Anonymous Metrics to AWS + _send_anonymous_metrics(request_type, resource_properties) def handler(event, context): diff --git a/source/lambdas/solution_helper/test_lambda_function.py b/source/lambdas/solution_helper/test_lambda_function.py index e4ccab3..5ad7b42 100644 --- a/source/lambdas/solution_helper/test_lambda_function.py +++ b/source/lambdas/solution_helper/test_lambda_function.py @@ -11,9 +11,9 @@ # and limitations under the License. # ###################################################################################################################### -import unittest, requests +import unittest +import requests from unittest import mock -import pytest from lambda_function import handler @@ -23,20 +23,53 @@ def __init__(self, status_code, reason): self.status_code = status_code self.reason = reason + def raise_for_status(self): + pass # NOSONAR this is just used as a mocked response object + return MockResponse(200, "OK") class LambdaTest(unittest.TestCase): - def test_create_unique_id(self): + exception_message = "Exception should not be raised when metrics cannot be sent" + + def test_custom_resource(self): import lambda_function + # test resource == "UUID" event = {"RequestType": "Create", "ResourceProperties": {"Resource": "UUID"}} lambda_function.custom_resource(event, None) self.assertIsNotNone(lambda_function.helper.Data.get("UUID")) + # test resource == "AnonymousMetric" + with mock.patch("requests.post", side_effect=mocked_requests_post) as mock_post: + event = { + "RequestType": "Create", + "ResourceProperties": { + "Resource": "AnonymousMetric", + "SolutionId": "SO1234", + "gitSelected": "True", + "bucketSelected": "False", + "IsMultiAccount": "True", + "IsDelegatedAccount": "True", + "UUID": "some-uuid", + }, + } + lambda_function.custom_resource(event, None) + actual_payload = mock_post.call_args.kwargs["json"] + self.assertEqual( + actual_payload["Data"], + { + "RequestType": "Create", + "gitSelected": "True", + "bucketSelected": "False", + "IsMultiAccount": "True", + "IsDelegatedAccount": "True", + }, + ) + @mock.patch("requests.post", side_effect=mocked_requests_post) - def test_send_metrics_successful(self, mock_post): + def test_send_anonymous_metrics_successful(self, mock_post): event = { "RequestType": "Create", "ResourceProperties": { @@ -49,9 +82,11 @@ def test_send_metrics_successful(self, mock_post): }, } - from lambda_function import custom_resource + from lambda_function import _send_anonymous_metrics + + response = _send_anonymous_metrics(event["RequestType"], event["ResourceProperties"]) - custom_resource(event, None) + self.assertIsNotNone(response) expected_metrics_endpoint = "https://metrics.awssolutionsbuilder.com/generic" actual_metrics_endpoint = mock_post.call_args.args[0] @@ -69,47 +104,69 @@ def test_send_metrics_successful(self, mock_post): self.assertIn("Data", actual_payload) self.assertEqual( actual_payload["Data"], - {"Foo": "Bar", "RequestType": "Create", "gitSelected": "True", "bucketSelected": "False"}, + {"RequestType": "Create", "gitSelected": "True", "bucketSelected": "False"}, ) # delete a key from the resource properties. It should send data with no errors del event["ResourceProperties"]["bucketSelected"] - custom_resource(event, None) + response = _send_anonymous_metrics(event["RequestType"], event["ResourceProperties"]) + self.assertIsNotNone(response) actual_payload = mock_post.call_args.kwargs["json"] self.assertEqual( actual_payload["Data"], - {"Foo": "Bar", "RequestType": "Create", "gitSelected": "True"}, + {"RequestType": "Create", "gitSelected": "True"}, ) - @mock.patch("requests.post") - def test_send_metrics_connection_error(self, mock_post): - mock_post.side_effect = requests.exceptions.ConnectionError() + @mock.patch("requests.post", side_effect=mocked_requests_post(404, "HTTPError")) + def test_send_anonymous_metrics_http_error(self, mock_post): + event = { + "RequestType": "Create", + "ResourceProperties": {"Resource": "AnonymousMetric", "SolutionId": "SO1234", "UUID": "some-uuid"}, + } + + try: + from lambda_function import _send_anonymous_metrics + + response = _send_anonymous_metrics(event["RequestType"], event["ResourceProperties"]) + # the function shouldn't throw an exception, and return None + self.assertIsNone(response) + + except AssertionError as e: + self.fail(str(e)) + @mock.patch("requests.post", side_effect=mocked_requests_post) + def test_send_anonymous_metrics_connection_error(self, mock_post): + mock_post.side_effect = requests.exceptions.ConnectionError() event = { "RequestType": "Update", "ResourceProperties": {"Resource": "AnonymousMetric", "SolutionId": "SO1234", "UUID": "some-uuid"}, } try: - from lambda_function import custom_resource + from lambda_function import _send_anonymous_metrics + + response = _send_anonymous_metrics(event["RequestType"], event["ResourceProperties"]) + # the function shouldn't throw an exception, and return None + self.assertIsNone(response) - custom_resource(event, None) - except: - self.fail("Exception should not be raised when metrics cannot be sent") + except AssertionError as e: + self.fail(str(e)) @mock.patch("requests.post") - def test_send_metrics_other_error(self, mock_post): + def test_send_anonymous_metrics_other_error(self, mock_post): try: invalid_event = { "RequestType": "Delete", "ResourceProperties": {"Resource": "AnonymousMetric", "UUID": "some-uuid"}, } - from lambda_function import custom_resource + from lambda_function import _send_anonymous_metrics - custom_resource(invalid_event, None) - except: - self.fail("Exception should not be raised when metrics cannot be sent") + response = _send_anonymous_metrics(invalid_event["RequestType"], invalid_event["ResourceProperties"]) + # the function shouldn't throw an exception, and return None + assert response is None + except AssertionError as e: + self.fail(str(e)) def test_sanitize_data(self): from lambda_function import _sanitize_data @@ -120,10 +177,16 @@ def test_sanitize_data(self): "SolutionId": "SO1234", "UUID": "some-uuid", "Region": "us-east-1", + "gitSelected": "True", + "bucketSelected": "False", "Foo": "Bar", } - expected_response = {"Region": "us-east-1", "Foo": "Bar"} + expected_response = { + "Region": "us-east-1", + "gitSelected": "True", + "bucketSelected": "False", + } actual_response = _sanitize_data(resource_properties) self.assertCountEqual(expected_response, actual_response) diff --git a/source/lib/aws_mlops_stack.py b/source/lib/aws_mlops_stack.py index b80efce..239fa80 100644 --- a/source/lib/aws_mlops_stack.py +++ b/source/lib/aws_mlops_stack.py @@ -31,18 +31,8 @@ suppress_lambda_policies, ) from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_notification_email_parameter, - create_git_address_parameter, - create_existing_bucket_parameter, - create_existing_ecr_repo_parameter, - create_git_address_provided_condition, - create_existing_bucket_provided_condition, - create_existing_ecr_provided_condition, - create_new_bucket_condition, - create_new_ecr_repo_condition, - create_use_model_registry_parameter, - create_model_registry_parameter, - create_model_registry_condition, + ParameteresFactory as pf, + ConditionsFactory as cf, ) from lib.blueprints.byom.pipeline_definitions.deploy_actions import ( sagemaker_layer, @@ -52,7 +42,7 @@ create_copy_assets_lambda, ) from lib.blueprints.byom.pipeline_definitions.iam_policies import ( - create_inovoke_lambda_policy, + create_invoke_lambda_policy, create_orchestrator_policy, ) from lib.blueprints.byom.pipeline_definitions.configure_multi_account import ( @@ -67,34 +57,34 @@ def __init__(self, scope: core.Construct, id: str, *, multi_account=False, **kwa super().__init__(scope, id, **kwargs) # Get stack parameters: - notification_email = create_notification_email_parameter(self) - git_address = create_git_address_parameter(self) + notification_email = pf.create_notification_email_parameter(self) + git_address = pf.create_git_address_parameter(self) # Get the optional S3 assets bucket to use - existing_bucket = create_existing_bucket_parameter(self) + existing_bucket = pf.create_existing_bucket_parameter(self) # Get the optional S3 assets bucket to use - existing_ecr_repo = create_existing_ecr_repo_parameter(self) + existing_ecr_repo = pf.create_existing_ecr_repo_parameter(self) # Will SageMaker's Model Registry be used to provision models - use_model_registry = create_use_model_registry_parameter(self) + use_model_registry = pf.create_use_model_registry_parameter(self) # Does the user want the solution to create model registry - create_model_registry = create_model_registry_parameter(self) + create_model_registry = pf.create_model_registry_parameter(self) # Conditions - git_address_provided = create_git_address_provided_condition(self, git_address) + git_address_provided = cf.create_git_address_provided_condition(self, git_address) # client provided an existing S3 bucket name, to be used for assets - existing_bucket_provided = create_existing_bucket_provided_condition(self, existing_bucket) + existing_bucket_provided = cf.create_existing_bucket_provided_condition(self, existing_bucket) # client provided an existing Amazon ECR name - existing_ecr_provided = create_existing_ecr_provided_condition(self, existing_ecr_repo) + existing_ecr_provided = cf.create_existing_ecr_provided_condition(self, existing_ecr_repo) # client wants the solution to create model registry - model_registry_condition = create_model_registry_condition(self, create_model_registry) + model_registry_condition = cf.create_model_registry_condition(self, create_model_registry) # S3 bucket needs to be created for assets - create_new_bucket = create_new_bucket_condition(self, existing_bucket) + create_new_bucket = cf.create_new_bucket_condition(self, existing_bucket) # Amazon ECR repo needs too be created for custom Algorithms - create_new_ecr_repo = create_new_ecr_repo_condition(self, existing_ecr_repo) + create_new_ecr_repo = cf.create_new_ecr_repo_condition(self, existing_ecr_repo) # Constants pipeline_stack_name = "mlops-pipeline" @@ -259,7 +249,7 @@ def __init__(self, scope: core.Construct, id: str, *, multi_account=False, **kwa }, ) - # add lambda supressions + # add lambda suppressions provisioner_apigw_lambda.lambda_function.node.default_child.cfn_options.metadata = suppress_lambda_policies() provision_resource = provisioner_apigw_lambda.api_gateway.root.add_resource("provisionpipeline") @@ -370,10 +360,10 @@ def __init__(self, scope: core.Construct, id: str, *, multi_account=False, **kwa cross_account_keys=False, ) codecommit_pipeline.add_to_role_policy( - create_inovoke_lambda_policy([provisioner_apigw_lambda.lambda_function.function_arn]) + create_invoke_lambda_policy([provisioner_apigw_lambda.lambda_function.function_arn]) ) codebuild_project.add_to_role_policy( - create_inovoke_lambda_policy([provisioner_apigw_lambda.lambda_function.function_arn]) + create_invoke_lambda_policy([provisioner_apigw_lambda.lambda_function.function_arn]) ) pipeline_child_nodes = codecommit_pipeline.node.find_all() pipeline_child_nodes[1].node.default_child.cfn_options.metadata = { @@ -425,7 +415,23 @@ def __init__(self, scope: core.Construct, id: str, *, multi_account=False, **kwa }, } + # configure mutli-account parameters and permissions + is_delegated_admin = None + if multi_account: + paramaters_list, paramaters_labels, is_delegated_admin = configure_multi_account_parameters_permissions( + self, + assets_bucket, + blueprint_repository_bucket, + ecr_repo, + model_registry, + provisioner_apigw_lambda.lambda_function, + paramaters_list, + paramaters_labels, + ) + # properties of send data custom resource + # if you add new metrics to the cr properties, make sure to updated the allowed keys + # to send in the "_sanitize_data" function in source/lambdas/solution_helper/lambda_function.py send_data_cr_properties = { "Resource": "AnonymousMetric", "UUID": create_id_function.get_att_string("UUID"), @@ -441,29 +447,12 @@ def __init__(self, scope: core.Construct, id: str, *, multi_account=False, **kwa ).to_string(), "Region": core.Aws.REGION, "IsMultiAccount": str(multi_account), + "IsDelegatedAccount": is_delegated_admin if multi_account else core.Aws.NO_VALUE, "UseModelRegistry": use_model_registry.value_as_string, "SolutionId": get_cdk_context_value(self, "SolutionId"), "Version": get_cdk_context_value(self, "Version"), } - # configure mutli-account parameters and permissions - if multi_account: - ( - paramaters_list, - paramaters_labels, - send_data_cr_properties, - ) = configure_multi_account_parameters_permissions( - self, - assets_bucket, - blueprint_repository_bucket, - ecr_repo, - model_registry, - provisioner_apigw_lambda.lambda_function, - paramaters_list, - paramaters_labels, - send_data_cr_properties, - ) - # create send data custom resource send_data_function = create_send_data_custom_resource( self, helper_function.function_arn, send_data_cr_properties diff --git a/source/lib/blueprints/byom/byom_batch_pipeline.py b/source/lib/blueprints/byom/byom_batch_pipeline.py index 7571fcb..8b3a3e7 100644 --- a/source/lib/blueprints/byom/byom_batch_pipeline.py +++ b/source/lib/blueprints/byom/byom_batch_pipeline.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -22,22 +22,8 @@ from lib.blueprints.byom.pipeline_definitions.sagemaker_role import create_sagemaker_role from lib.blueprints.byom.pipeline_definitions.sagemaker_model import create_sagemaker_model from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_blueprint_bucket_name_parameter, - create_assets_bucket_name_parameter, - create_algorithm_image_uri_parameter, - create_batch_input_bucket_name_parameter, - create_batch_inference_data_parameter, - create_batch_job_output_location_parameter, - create_custom_algorithms_ecr_repo_arn_parameter, - create_inference_instance_parameter, - create_kms_key_arn_parameter, - create_model_artifact_location_parameter, - create_model_name_parameter, - create_custom_algorithms_ecr_repo_arn_provided_condition, - create_kms_key_arn_provided_condition, - create_model_package_name_parameter, - create_model_registry_provided_condition, - create_model_package_group_name_parameter, + ParameteresFactory as pf, + ConditionsFactory as cf, ) @@ -46,31 +32,33 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Parameteres # - blueprint_bucket_name = create_blueprint_bucket_name_parameter(self) - assets_bucket_name = create_assets_bucket_name_parameter(self) - custom_algorithms_ecr_repo_arn = create_custom_algorithms_ecr_repo_arn_parameter(self) - kms_key_arn = create_kms_key_arn_parameter(self) - algorithm_image_uri = create_algorithm_image_uri_parameter(self) - model_name = create_model_name_parameter(self) - model_artifact_location = create_model_artifact_location_parameter(self) - inference_instance = create_inference_instance_parameter(self) - batch_input_bucket = create_batch_input_bucket_name_parameter(self) - batch_inference_data = create_batch_inference_data_parameter(self) - batch_job_output_location = create_batch_job_output_location_parameter(self) - model_package_group_name = create_model_package_group_name_parameter(self) - model_package_name = create_model_package_name_parameter(self) + blueprint_bucket_name = pf.create_blueprint_bucket_name_parameter(self) + assets_bucket_name = pf.create_assets_bucket_name_parameter(self) + custom_algorithms_ecr_repo_arn = pf.create_custom_algorithms_ecr_repo_arn_parameter(self) + kms_key_arn = pf.create_kms_key_arn_parameter(self) + algorithm_image_uri = pf.create_algorithm_image_uri_parameter(self) + model_name = pf.create_model_name_parameter(self) + model_artifact_location = pf.create_model_artifact_location_parameter(self) + inference_instance = pf.create_inference_instance_parameter(self) + batch_input_bucket = pf.create_batch_input_bucket_name_parameter(self) + batch_inference_data = pf.create_batch_inference_data_parameter(self) + batch_job_output_location = pf.create_batch_job_output_location_parameter(self) + model_package_group_name = pf.create_model_package_group_name_parameter(self) + model_package_name = pf.create_model_package_name_parameter(self) # Conditions - custom_algorithms_ecr_repo_arn_provided = create_custom_algorithms_ecr_repo_arn_provided_condition( + custom_algorithms_ecr_repo_arn_provided = cf.create_custom_algorithms_ecr_repo_arn_provided_condition( self, custom_algorithms_ecr_repo_arn ) - kms_key_arn_provided = create_kms_key_arn_provided_condition(self, kms_key_arn) - model_registry_provided = create_model_registry_provided_condition(self, model_package_name) + kms_key_arn_provided = cf.create_kms_key_arn_provided_condition(self, kms_key_arn) + model_registry_provided = cf.create_model_registry_provided_condition(self, model_package_name) # Resources # - assets_bucket = s3.Bucket.from_bucket_name(self, "AssetsBucket", assets_bucket_name.value_as_string) + assets_bucket = s3.Bucket.from_bucket_name(self, "ImportedAssetsBucket", assets_bucket_name.value_as_string) # getting blueprint bucket object from its name - will be used later in the stack - blueprint_bucket = s3.Bucket.from_bucket_name(self, "BlueprintBucket", blueprint_bucket_name.value_as_string) + blueprint_bucket = s3.Bucket.from_bucket_name( + self, "ImportedBlueprintBucket", blueprint_bucket_name.value_as_string + ) sm_layer = sagemaker_layer(self, blueprint_bucket) # creating a sagemaker model @@ -147,7 +135,7 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: core.CfnOutput( self, - id="ModelName", + id="SageMakerModelName", value=sagemaker_model.attr_model_name, description="The name of the SageMaker model used by the batch transform job", ) diff --git a/source/lib/blueprints/byom/byom_custom_algorithm_image_builder.py b/source/lib/blueprints/byom/byom_custom_algorithm_image_builder.py index 29d6b1f..594f943 100644 --- a/source/lib/blueprints/byom/byom_custom_algorithm_image_builder.py +++ b/source/lib/blueprints/byom/byom_custom_algorithm_image_builder.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -28,13 +28,7 @@ suppress_iam_complex, suppress_sns, ) -from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_notification_email_parameter, - create_assets_bucket_name_parameter, - create_custom_container_parameter, - create_ecr_repo_name_parameter, - create_image_tag_parameter, -) +from lib.blueprints.byom.pipeline_definitions.templates_parameters import ParameteresFactory as pf class BYOMCustomAlgorithmImageBuilderStack(core.Stack): @@ -42,14 +36,14 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Parameteres # - notification_email = create_notification_email_parameter(self) - assets_bucket_name = create_assets_bucket_name_parameter(self) - custom_container = create_custom_container_parameter(self) - ecr_repo_name = create_ecr_repo_name_parameter(self) - image_tag = create_image_tag_parameter(self) + notification_email = pf.create_notification_email_parameter(self) + assets_bucket_name = pf.create_assets_bucket_name_parameter(self) + custom_container = pf.create_custom_container_parameter(self) + ecr_repo_name = pf.create_ecr_repo_name_parameter(self) + image_tag = pf.create_image_tag_parameter(self) # Resources # - assets_bucket = s3.Bucket.from_bucket_name(self, "AssetsBucket", assets_bucket_name.value_as_string) + assets_bucket = s3.Bucket.from_bucket_name(self, "ImportedAssetsBucket", assets_bucket_name.value_as_string) # Defining pipeline stages # source stage @@ -75,7 +69,7 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: image_builder_pipeline = codepipeline.Pipeline( self, - "BYOMPipelineReatimeBuild", + "BYOMPipelineRealtimeBuild", stages=[source_stage, build_stage], cross_account_keys=False, ) @@ -103,7 +97,7 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: ) ) - # add cfn nag supressions + # add cfn nag suppressions pipeline_child_nodes = image_builder_pipeline.node.find_all() pipeline_child_nodes[1].node.default_child.cfn_options.metadata = suppress_pipeline_bucket() pipeline_child_nodes[6].node.default_child.cfn_options.metadata = suppress_iam_complex() diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/.coveragerc b/source/lib/blueprints/byom/lambdas/create_baseline_job/.coveragerc similarity index 100% rename from source/lib/blueprints/byom/lambdas/create_data_baseline_job/.coveragerc rename to source/lib/blueprints/byom/lambdas/create_baseline_job/.coveragerc diff --git a/source/lib/blueprints/byom/lambdas/create_baseline_job/baselines_helper.py b/source/lib/blueprints/byom/lambdas/create_baseline_job/baselines_helper.py new file mode 100644 index 0000000..779c1aa --- /dev/null +++ b/source/lib/blueprints/byom/lambdas/create_baseline_job/baselines_helper.py @@ -0,0 +1,324 @@ +# ##################################################################################################################### +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# # +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # +# with the License. A copy of the License is located at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # +# and limitations under the License. # +# ##################################################################################################################### +from typing import Callable, Any, Dict, List, Optional +import logging +import sagemaker +from sagemaker.model_monitor import DefaultModelMonitor +from sagemaker.model_monitor import ModelQualityMonitor +from sagemaker.model_monitor.dataset_format import DatasetFormat + +logger = logging.getLogger(__name__) + + +def exception_handler(func: Callable[..., Any]) -> Any: + """ + Docorator function to handle exceptions + + Args: + func (object): function to be decorated + + Returns: + func's return value + + Raises: + Exception thrown by the decorated function + """ + + def wrapper_function(*args, **kwargs): + try: + return func(*args, **kwargs) + + except Exception as e: + logger.error(f"Error in {func.__name__}: {str(e)}") + raise e + + return wrapper_function + + +class SolutionSageMakerBaselines: + """ + Creates Amazon SageMaker (DataQuality or ModelQuality) Baselines Jobs + + Attributes: + monitoring_type (str): type of SageMaker Model Monitor. Supported values ['DataQuality'|'ModelQuality'] + instance_type (str): compute instance type for the baseline job, in the form of a CDK CfnParameter object + instance_count (int): number of EC2 instances + instance_volume_size (int): volume size of the EC2 instance + role_arn (str): Sagemaker role's arn to be used to create the baseline job + baseline_job_name (str): name of the baseline job to be created + baseline_dataset (str): S3 URI location of the baseline data (file's format: csv) + output_s3_uri (str): S3 prefix of the baseline job's output + max_runtime_seconds (int): optional max time the job is allowed to run + kms_key_arn (str): optional arn of the kms key used to encrypt datacapture and + to encrypt job's output + problem_type (str): used with ModelQuality baseline. Type of Machine Learning problem. Valid values are + ['Regression'|'BinaryClassification'|'MulticlassClassification'] (default: None). + ground_truth_attribute (str): index or JSONpath to locate actual label(s) (used with ModelQuality baseline). + (default: None). + inference_attribute (str): index or JSONpath to locate predicted label(s) (used with ModelQuality baseline). + Required for 'Regression'|'MulticlassClassification' problems, + and not required for 'BinaryClassification' if 'probability_attribute' and + 'probability_threshold_attribute' are provided (default: None). + probability_attribute (str): index or JSONpath to locate probabilities(used with ModelQuality baseline). + Used only with 'BinaryClassification' problem if 'inference_attribute' is not provided (default: None). + probability_threshold_attribute (float): threshold to convert probabilities to binaries (used with ModelQuality baseline). + Used only with 'BinaryClassification' problem if 'inference_attribute' is not provided (default: None). + sagemaker_session: (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker + APIs and any other AWS services needed. If not specified, one is created using the default AWS configuration + chain (default: None). + tags (list[dict[str, str]]): resource tags (default: None). + """ + + @exception_handler + def __init__( + self, # NOSONAR:S107 the class is designed to take many attributes + monitoring_type: str, + instance_type: str, + instance_count: int, + instance_volume_size: int, + role_arn: str, + baseline_job_name: str, + baseline_dataset: str, + output_s3_uri: str, + max_runtime_in_seconds: Optional[int] = None, + kms_key_arn: Optional[str] = None, + problem_type: Optional[str] = None, + ground_truth_attribute: Optional[str] = None, + inference_attribute: Optional[str] = None, + probability_attribute: Optional[str] = None, + probability_threshold_attribute: Optional[float] = None, + sagemaker_session: Optional[sagemaker.session.Session] = None, + tags: Optional[List[Dict[str, str]]] = None, + ) -> None: + # validate the provided monitoring_type + if monitoring_type not in ["DataQuality", "ModelQuality"]: + raise ValueError( + ( + f"The provided monitoring type: {monitoring_type} is not valid. " + + "It must be 'DataQuality'|'ModelQuality'" + ) + ) + self.monitoring_type = monitoring_type + self.instance_type = instance_type + self.instance_count = instance_count + self.instance_volume_size = instance_volume_size + self.role_arn = role_arn + self.baseline_job_name = baseline_job_name + self.baseline_dataset = baseline_dataset + self.output_s3_uri = output_s3_uri + self.max_runtime_in_seconds = max_runtime_in_seconds + self.kms_key_arn = kms_key_arn + self.problem_type = problem_type + self.ground_truth_attribute = ground_truth_attribute + self.inference_attribute = inference_attribute + self.probability_attribute = probability_attribute + self.probability_threshold_attribute = probability_threshold_attribute + self.sagemaker_session = sagemaker_session + self.tags = tags + + @exception_handler + def create_baseline_job(self) -> sagemaker.processing.ProcessingJob: + """ + Gets the *BaselineJob based on the monitoring_type + + Returns: + sagemaker.processing.ProcessingJob object + """ + # create *Baseline Job MonitoringType->function_name map + type_function_map = dict( + DataQuality="_create_data_quality_baseline", ModelQuality="_create_model_quality_baseline" + ) + + # get the formated baseline job arguments + baseline_job_args = self._get_baseline_job_args() + + # call the right function to create the *Baseline Job + baseline_processing_job = getattr(self, type_function_map[self.monitoring_type])(baseline_job_args) + + return baseline_processing_job + + @exception_handler + def _get_baseline_job_args( + self, + ) -> Dict[str, Dict[str, str]]: + """ + Gets the baseline job arguments to create the *baseline job + + Returns: + dict[str, dict[str, str]]: the arguments to create the *baseline job + """ + # validate baseline_dataset + if not self._is_valid_argument_value(self.baseline_dataset): + raise ValueError( + f"BaselineDataset S3Uri must be provided to create the {self.monitoring_type} baseline job" + ) + + baseline_args = dict( + # args passed to the Monitor class's construct + class_args=dict( + instance_type=self.instance_type, + instance_count=self.instance_count, + volume_size_in_gb=self.instance_volume_size, + role=self.role_arn, + ), + # args passed to the Monitor class's suggest_baseline function + suggest_args=dict( + job_name=self.baseline_job_name, + dataset_format=DatasetFormat.csv(header=True), + baseline_dataset=self.baseline_dataset, + output_s3_uri=self.output_s3_uri, + ), + ) + + # add max_runtime_in_seconds if provided + if self.max_runtime_in_seconds: + baseline_args["class_args"].update({"max_runtime_in_seconds": self.max_runtime_in_seconds}) + + # add sagemaker session if provided + if self.sagemaker_session: + baseline_args["class_args"].update({"sagemaker_session": self.sagemaker_session}) + + # add tags if provided + if self.tags: + baseline_args["class_args"].update({"tags": self.tags}) + + # add kms key if provided + if self.kms_key_arn: + baseline_args["class_args"].update({"output_kms_key": self.kms_key_arn}) + baseline_args["class_args"].update({"volume_kms_key": self.kms_key_arn}) + + # add ModelQuality args + if self.monitoring_type == "ModelQuality": + baseline_args = self._add_model_quality_args(baseline_args) + + return baseline_args + + @exception_handler + def _add_model_quality_args( + self, + baseline_args: Dict[str, Dict[str, str]], + ) -> Dict[str, Dict[str, str]]: + """ + Adds ModelQuality's specific arguments to the passed baseline_args + + Args: + baseline_args (dict[str, dict[str, str]]): arguments to create the baseline job + + Returns: + dict[str, dict[str, str]]: The combined arguments to create the baseline job + """ + # validate the problem_type + if self.problem_type not in ["Regression", "BinaryClassification", "MulticlassClassification"]: + raise ValueError( + ( + f"The {self.problem_type} is not valid. ProblemType must be " + + "['Regression'|'BinaryClassification'|'MulticlassClassification']" + ) + ) + baseline_args["suggest_args"].update({"problem_type": self.problem_type}) + + # For Regression or MulticlassClassification, inference_attribute is required + if self.problem_type in ["Regression", "MulticlassClassification"]: + # validate InferenceAttribute value + if not self._is_valid_argument_value(self.inference_attribute): + raise ValueError( + "InferenceAttribute must not be provided for ProblemType: Regression or MulticlassClassification" + ) + # add to args dict + baseline_args["suggest_args"].update({"inference_attribute": self.inference_attribute}) + # For BinaryClassification, use probability_attribute and probability_threshold_attribute or inference_attribute + else: + if self._is_valid_argument_value(self.probability_attribute) and self._is_valid_argument_value( + self.probability_threshold_attribute + ): + baseline_args["suggest_args"].update({"probability_attribute": self.probability_attribute}) + baseline_args["suggest_args"].update( + {"probability_threshold_attribute": self.probability_threshold_attribute} + ) + + elif self._is_valid_argument_value(self.inference_attribute): + baseline_args["suggest_args"].update({"inference_attribute": self.inference_attribute}) + else: + raise ValueError( + ( + "InferenceAttribute or (ProbabilityAttribute/ProbabilityThresholdAttribute) must be provided " + + "for ProblemType: BinaryClassification" + ) + ) + + # validate ground_truth_attribute + if not self._is_valid_argument_value(self.ground_truth_attribute): + raise ValueError("GroundTruthAttribute must be provided") + + baseline_args["suggest_args"].update({"ground_truth_attribute": self.ground_truth_attribute}) + + return baseline_args + + @exception_handler + def _create_data_quality_baseline( + self, data_quality_baseline_job_args: Dict[str, Dict[str, str]] + ) -> sagemaker.processing.ProcessingJob: + """ + Creates SageMaker DataQuality baseline job + + Args: + data_quality_baseline_job_args (dict[str, dict[str, str]]): The DataQuality baseline job arguments + + Returns: + sagemaker.processing.ProcessingJob object + """ + logger.info( + f"Creating DataQuality baseline job {data_quality_baseline_job_args['suggest_args']['job_name']} ..." + ) + + # create DefaultModelMonitor + data_quality_monitor = DefaultModelMonitor(**data_quality_baseline_job_args["class_args"]) + + # create the DataQuality baseline job + data_baseline_job = data_quality_monitor.suggest_baseline( + **data_quality_baseline_job_args["suggest_args"], + ) + + return data_baseline_job + + @exception_handler + def _create_model_quality_baseline( + self, + model_quality_baseline_job_args: Dict[str, Dict[str, str]], + ) -> sagemaker.processing.ProcessingJob: + """ + Creates SageMaker ModelQuality baseline job + + Args: + model_quality_baseline_job_config (dict[str, dict[str, str]]): The ModelQuality baseline job arguments + + Returns: + sagemaker.processing.ProcessingJob object + """ + logger.info( + f"Creating ModelQuality baseline job {model_quality_baseline_job_args['suggest_args']['job_name']} ..." + ) + + # create ModelQualityMonitor + model_quality_monitor = ModelQualityMonitor(**model_quality_baseline_job_args["class_args"]) + + # create the DataQuality baseline job + model_baseline_job = model_quality_monitor.suggest_baseline( + **model_quality_baseline_job_args["suggest_args"], + ) + + return model_baseline_job + + def _is_valid_argument_value(self, value: str) -> bool: + # validate the argument's value is not None or empty string + return True if value else False diff --git a/source/lib/blueprints/byom/lambdas/create_baseline_job/main.py b/source/lib/blueprints/byom/lambdas/create_baseline_job/main.py new file mode 100644 index 0000000..80fb55d --- /dev/null +++ b/source/lib/blueprints/byom/lambdas/create_baseline_job/main.py @@ -0,0 +1,57 @@ +# ##################################################################################################################### +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# # +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # +# with the License. A copy of the License is located at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # +# and limitations under the License. # +# ##################################################################################################################### +import os +import sagemaker +from shared.logger import get_logger +from baselines_helper import SolutionSageMakerBaselines, exception_handler + + +logger = get_logger(__name__) +sagemaker_session = sagemaker.session.Session() + + +@exception_handler +def handler(event, context): + # get some environment variables + assets_bucket = os.environ["ASSETS_BUCKET"] + monitoring_type = os.environ.get("MONITORING_TYPE") + baseline_job_name = os.environ["BASELINE_JOB_NAME"] + max_runtime_seconds = os.environ.get("MAX_RUNTIME_SECONDS") + + logger.info(f"Creating {monitoring_type} baseline processing job {baseline_job_name} ...") + + # create a SageMakerBaselines instance + sagemaker_baseline = SolutionSageMakerBaselines( + monitoring_type=os.environ.get("MONITORING_TYPE"), + instance_type=os.environ.get("INSTANCE_TYPE", "ml.m5.large"), + instance_count=int(os.environ.get("INSTANCE_COUNT", "1")), + instance_volume_size=int(os.environ.get("INSTANCE_VOLUME_SIZE", "30")), + role_arn=os.environ["ROLE_ARN"], + baseline_job_name=os.environ["BASELINE_JOB_NAME"], + baseline_dataset=f"s3://{assets_bucket}/{os.environ['BASELINE_DATA_LOCATION']}", + output_s3_uri=f"s3://{os.environ['BASELINE_JOB_OUTPUT_LOCATION']}", + max_runtime_in_seconds=int(max_runtime_seconds) if max_runtime_seconds else None, + kms_key_arn=os.environ.get("KMS_KEY_ARN"), + problem_type=os.environ.get("PROBLEM_TYPE"), + ground_truth_attribute=os.environ.get("GROUND_TRUTH_ATTRIBUTE"), + inference_attribute=os.environ.get("INFERENCE_ATTRIBUTE"), + probability_attribute=os.environ.get("PROBABILITY_ATTRIBUTE"), + probability_threshold_attribute=os.environ.get("PROBABILITY_THRESHOLD_ATTRIBUTE"), + sagemaker_session=sagemaker_session, + tags=[{"Key": "stack_name", "Value": os.environ["STACK_NAME"]}], + ) + + # create the SageMaker Baseline Job + baseline_job = sagemaker_baseline.create_baseline_job() + + logger.info(f"Started {monitoring_type} baseline processing job. Job info: {baseline_job.describe()}") diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/requirements-test.txt b/source/lib/blueprints/byom/lambdas/create_baseline_job/requirements-test.txt similarity index 100% rename from source/lib/blueprints/byom/lambdas/create_data_baseline_job/requirements-test.txt rename to source/lib/blueprints/byom/lambdas/create_baseline_job/requirements-test.txt diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/setup.py b/source/lib/blueprints/byom/lambdas/create_baseline_job/setup.py similarity index 100% rename from source/lib/blueprints/byom/lambdas/create_data_baseline_job/setup.py rename to source/lib/blueprints/byom/lambdas/create_baseline_job/setup.py diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/__init__.py b/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/__init__.py similarity index 100% rename from source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/__init__.py rename to source/lib/blueprints/byom/lambdas/create_baseline_job/tests/__init__.py diff --git a/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/fixtures/baseline_fixtures.py b/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/fixtures/baseline_fixtures.py new file mode 100644 index 0000000..b035a6f --- /dev/null +++ b/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/fixtures/baseline_fixtures.py @@ -0,0 +1,175 @@ +####################################################################################################################### +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# # +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # +# with the License. A copy of the License is located at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # +# and limitations under the License. # +# ##################################################################################################################### +import os +import pytest +from baselines_helper import SolutionSageMakerBaselines +from sagemaker.model_monitor.dataset_format import DatasetFormat +import sagemaker + +# create sagemaker session +sagemaker_session = sagemaker.session.Session() + + +@pytest.fixture +def mock_basic_data_quality_env(): + data_quality_env = { + "MONITORING_TYPE": "DataQuality", + "BASELINE_JOB_NAME": "test-baseline-job", + "ASSETS_BUCKET": "testbucket", + "SAGEMAKER_ENDPOINT_NAME": "Sagemaker-test-endpoint", + "BASELINE_DATA_LOCATION": "baseline_data.csv", + "BASELINE_JOB_OUTPUT_LOCATION": "s3://testbucket/baseline_output", + "INSTANCE_TYPE": "ml.m5.4xlarge", + "INSTANCE_VOLUME_SIZE": "20", + "ROLE_ARN": "arn:aws:iam::account:role/myrole", + "STACK_NAME": "test_stack", + "LOG_LEVEL": "INFO", + } + + return data_quality_env + + +@pytest.fixture +def mock_data_quality_env_with_optional_vars(mock_basic_data_quality_env): + data_quality_env = mock_basic_data_quality_env.copy() + data_quality_env.update( + { + "MAX_RUNTIME_SECONDS": "3300", + "KMS_KEY_ARN": "arn:aws:kms:region:accountid:key/mykey", + } + ) + + return data_quality_env + + +@pytest.fixture +def mock_model_quality_env_with_optional_vars(mock_data_quality_env_with_optional_vars): + model_quality_env = mock_data_quality_env_with_optional_vars.copy() + model_quality_env.update( + { + "MONITORING_TYPE": "ModelQuality", + "PROBLEM_TYPE": "BinaryClassification", + "GROUND_TRUTH_ATTRIBUTE": "label", + "INFERENCE_ATTRIBUTE": "prediction", + "PROBABILITY_ATTRIBUTE": "probability", + "PROBABILITY_THRESHOLD_ATTRIBUTE": "0.5", + } + ) + + return model_quality_env + + +@pytest.fixture +def mocked_sagemaker_baseline_attributes( + monkeypatch, + mock_basic_data_quality_env, + mock_data_quality_env_with_optional_vars, + mock_model_quality_env_with_optional_vars, +): + def _mocked_sagemaker_baseline_attributes(monitoring_type, with_optional=False): + # set the env variables based on monitoring_type, with_optional + if monitoring_type == "DataQuality": + if with_optional: + envs = mock_data_quality_env_with_optional_vars + else: + envs = mock_basic_data_quality_env + else: + envs = mock_model_quality_env_with_optional_vars + + monkeypatch.setattr(os, "environ", envs) + max_runtime_seconds = os.environ.get("MAX_RUNTIME_SECONDS") + + return { + "monitoring_type": monitoring_type, + "instance_type": os.environ.get("INSTANCE_TYPE", "ml.m5.large"), + "instance_count": int(os.environ.get("INSTANCE_COUNT", "1")), + "instance_volume_size": int(os.environ.get("INSTANCE_VOLUME_SIZE", "30")), + "role_arn": os.environ["ROLE_ARN"], + "baseline_job_name": os.environ["BASELINE_JOB_NAME"], + "baseline_dataset": f"s3://{os.environ['ASSETS_BUCKET']}/{os.environ['BASELINE_DATA_LOCATION']}", + "output_s3_uri": os.environ["BASELINE_JOB_OUTPUT_LOCATION"], + "max_runtime_in_seconds": int(max_runtime_seconds) if max_runtime_seconds else None, + "kms_key_arn": os.environ.get("KMS_KEY_ARN"), + "problem_type": os.environ.get("PROBLEM_TYPE"), + "ground_truth_attribute": os.environ.get("GROUND_TRUTH_ATTRIBUTE"), + "inference_attribute": os.environ.get("INFERENCE_ATTRIBUTE"), + "probability_attribute": os.environ.get("PROBABILITY_ATTRIBUTE"), + "probability_threshold_attribute": os.environ.get("PROBABILITY_THRESHOLD_ATTRIBUTE"), + "sagemaker_session": sagemaker_session, + "tags": [{"Key": "stack_name", "Value": os.environ["STACK_NAME"]}], + } + + return _mocked_sagemaker_baseline_attributes + + +@pytest.fixture +def mocked_sagemaker_baselines_instance(mocked_sagemaker_baseline_attributes): + def _mocked_sagemaker_baselines_instance(monitoring_type, with_optional=True): + return SolutionSageMakerBaselines(**mocked_sagemaker_baseline_attributes(monitoring_type, with_optional)) + + return _mocked_sagemaker_baselines_instance + + +@pytest.fixture +def mocked_expected_baseline_args(mocked_sagemaker_baselines_instance): + def _mocked_expected_baseline_args(monitoring_type): + sagemaker_baselines_instance = mocked_sagemaker_baselines_instance(monitoring_type) + baseline_args = dict( + # args passed to the Monitor class's construct + class_args=dict( + instance_type=sagemaker_baselines_instance.instance_type, + instance_count=sagemaker_baselines_instance.instance_count, + volume_size_in_gb=sagemaker_baselines_instance.instance_volume_size, + role=sagemaker_baselines_instance.role_arn, + max_runtime_in_seconds=sagemaker_baselines_instance.max_runtime_in_seconds, + output_kms_key=sagemaker_baselines_instance.kms_key_arn, + volume_kms_key=sagemaker_baselines_instance.kms_key_arn, + sagemaker_session=sagemaker_baselines_instance.sagemaker_session, + tags=sagemaker_baselines_instance.tags, + ), + # args passed to the Monitor class's suggest_baseline function + suggest_args=dict( + job_name=sagemaker_baselines_instance.baseline_job_name, + dataset_format=DatasetFormat.csv(header=True), + baseline_dataset=sagemaker_baselines_instance.baseline_dataset, + output_s3_uri=sagemaker_baselines_instance.output_s3_uri, + ), + ) + + # add ModelQuality + if monitoring_type == "ModelQuality": + baseline_args["suggest_args"].update({"problem_type": sagemaker_baselines_instance.problem_type}) + if sagemaker_baselines_instance.problem_type in ["Regression", "MulticlassClassification"]: + baseline_args["suggest_args"].update( + {"inference_attribute": sagemaker_baselines_instance.inference_attribute} + ) + else: + baseline_args["suggest_args"].update( + {"probability_attribute": sagemaker_baselines_instance.probability_attribute} + ) + baseline_args["suggest_args"].update( + {"probability_threshold_attribute": sagemaker_baselines_instance.probability_threshold_attribute} + ) + baseline_args["suggest_args"].update( + {"ground_truth_attribute": sagemaker_baselines_instance.ground_truth_attribute} + ) + return baseline_args + + return _mocked_expected_baseline_args + + +@pytest.fixture() +def event(): + return { + "message": "Start data baseline job", + } diff --git a/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/test_create_data_baseline.py b/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/test_create_data_baseline.py new file mode 100644 index 0000000..764115d --- /dev/null +++ b/source/lib/blueprints/byom/lambdas/create_baseline_job/tests/test_create_data_baseline.py @@ -0,0 +1,195 @@ +####################################################################################################################### +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# # +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # +# with the License. A copy of the License is located at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # +# and limitations under the License. # +# ##################################################################################################################### +from unittest.mock import patch +from unittest import TestCase +import pytest +import os +from main import handler +from tests.fixtures.baseline_fixtures import ( + mock_basic_data_quality_env, + mock_data_quality_env_with_optional_vars, + mock_model_quality_env_with_optional_vars, + mocked_sagemaker_baseline_attributes, + mocked_sagemaker_baselines_instance, + mocked_expected_baseline_args, + event, +) +from baselines_helper import SolutionSageMakerBaselines + + +def test_init(mocked_sagemaker_baseline_attributes): + # test object creation based on MonitoringType and env variables + # test DataQuality with Optional parameters (max_run_time and kms_key_arn) are not provided + baselines = SolutionSageMakerBaselines(**mocked_sagemaker_baseline_attributes("DataQuality")) + assert baselines.monitoring_type == os.environ["MONITORING_TYPE"] + assert baselines.problem_type is None + assert baselines.ground_truth_attribute is None + assert baselines.inference_attribute is None + assert baselines.probability_attribute is None + assert baselines.probability_threshold_attribute is None + assert baselines.max_runtime_in_seconds is None + assert baselines.kms_key_arn is None + + # test DataQuality with Optional parameters are provided + baselines = SolutionSageMakerBaselines(**mocked_sagemaker_baseline_attributes("DataQuality", with_optional=True)) + assert baselines.monitoring_type == os.environ["MONITORING_TYPE"] + assert baselines.max_runtime_in_seconds == int(os.environ["MAX_RUNTIME_SECONDS"]) + assert baselines.kms_key_arn == os.environ["KMS_KEY_ARN"] + + # test ModelQuality with Optional parameters (max_run_time and kms_key_arn) are not provided + baselines = SolutionSageMakerBaselines(**mocked_sagemaker_baseline_attributes("ModelQuality")) + assert baselines.monitoring_type == os.environ["MONITORING_TYPE"] + assert baselines.problem_type == os.environ["PROBLEM_TYPE"] + assert baselines.ground_truth_attribute == os.environ["GROUND_TRUTH_ATTRIBUTE"] + assert baselines.inference_attribute == os.environ["INFERENCE_ATTRIBUTE"] + assert baselines.probability_attribute == os.environ["PROBABILITY_ATTRIBUTE"] + assert baselines.probability_threshold_attribute == os.environ["PROBABILITY_THRESHOLD_ATTRIBUTE"] + + # test exception if non-supported monitoring type is passed + with pytest.raises(ValueError) as error: + SolutionSageMakerBaselines(**mocked_sagemaker_baseline_attributes("NotSupported")) + assert str(error.value) == ( + "The provided monitoring type: NotSupported is not valid. It must be 'DataQuality'|'ModelQuality'" + ) + + +def test_get_baseline_job_args( + mocked_sagemaker_baselines_instance, mocked_expected_baseline_args, mocked_sagemaker_baseline_attributes +): + sagemaker_baselines = mocked_sagemaker_baselines_instance("DataQuality") + # assert the returned baseline args for DataQuality baseline + TestCase().assertDictEqual( + sagemaker_baselines._get_baseline_job_args(), mocked_expected_baseline_args("DataQuality") + ) + + # assert the returned baseline args for ModelQuality baseline + sagemaker_baselines = mocked_sagemaker_baselines_instance("ModelQuality") + TestCase().assertDictEqual( + sagemaker_baselines._get_baseline_job_args(), mocked_expected_baseline_args("ModelQuality") + ) + + # test BinaryClassification with only inference_attribute provided + baseline_attributes = mocked_sagemaker_baseline_attributes("ModelQuality") + baseline_attributes["probability_attribute"] = "" + baseline_attributes["probability_threshold_attribute"] = None + baseline_instance = SolutionSageMakerBaselines(**baseline_attributes) + baseline_args = baseline_instance._get_baseline_job_args() + assert baseline_args["suggest_args"]["problem_type"] == "BinaryClassification" + assert baseline_args["suggest_args"]["inference_attribute"] == baseline_attributes["inference_attribute"] + assert baseline_args["suggest_args"].get("probability_attribute") is None + assert baseline_args["suggest_args"].get("probability_threshold_attribute") is None + + # test problem_type = "Regression"|"MulticlassClassification" + baseline_attributes = mocked_sagemaker_baseline_attributes("ModelQuality") + baseline_attributes["problem_type"] = "Regression" + baseline_instance = SolutionSageMakerBaselines(**baseline_attributes) + baseline_args = baseline_instance._get_baseline_job_args() + assert baseline_args["suggest_args"]["problem_type"] == "Regression" + assert baseline_args["suggest_args"]["inference_attribute"] == baseline_attributes["inference_attribute"] + assert baseline_args["suggest_args"].get("probability_attribute") is None + assert baseline_args["suggest_args"].get("probability_threshold_attribute") is None + + +def test_get_baseline_job_args_exceptions(mocked_sagemaker_baseline_attributes): + # test exception if baseline_dataset is not provided + baseline_attributes = mocked_sagemaker_baseline_attributes("ModelQuality") + # provide an empty baseline_dataset + baseline_attributes["baseline_dataset"] = "" + with pytest.raises(ValueError) as error: + baseline = SolutionSageMakerBaselines(**baseline_attributes) + baseline._get_baseline_job_args() + assert str(error.value) == "BaselineDataset S3Uri must be provided to create the ModelQuality baseline job" + # reset value + baseline_attributes["baseline_dataset"] = os.environ["BASELINE_DATA_LOCATION"] + + # test exception for unsupported problem_type + baseline_attributes["problem_type"] = "Unsupported" + with pytest.raises(ValueError) as error: + baseline = SolutionSageMakerBaselines(**baseline_attributes) + baseline._get_baseline_job_args() + assert str(error.value) == ( + "The Unsupported is not valid. ProblemType must be " + + "['Regression'|'BinaryClassification'|'MulticlassClassification']" + ) + + # test exception if inference_attribute not provided + baseline_attributes["problem_type"] = "Regression" + baseline_attributes["inference_attribute"] = "" + with pytest.raises(ValueError) as error: + baseline = SolutionSageMakerBaselines(**baseline_attributes) + baseline._get_baseline_job_args() + assert str(error.value) == ( + "InferenceAttribute must not be provided for ProblemType: Regression or MulticlassClassification" + ) + + # test exception if none of inference_attribute, probability_attribute and probability_threshold_attribute + # in not provided for BinaryClassification problem + baseline_attributes["problem_type"] = "BinaryClassification" + baseline_attributes["inference_attribute"] = "" + baseline_attributes["probability_attribute"] = "" + baseline_attributes["probability_threshold_attribute"] = None + with pytest.raises(ValueError) as error: + baseline = SolutionSageMakerBaselines(**baseline_attributes) + baseline._get_baseline_job_args() + assert str(error.value) == ( + "InferenceAttribute or (ProbabilityAttribute/ProbabilityThresholdAttribute) must be provided " + + "for ProblemType: BinaryClassification" + ) + # reset values + baseline_attributes["inference_attribute"] = os.environ["INFERENCE_ATTRIBUTE"] + baseline_attributes["probability_attribute"] = os.environ["PROBABILITY_ATTRIBUTE"] + baseline_attributes["probability_threshold_attribute"] = os.environ["PROBABILITY_THRESHOLD_ATTRIBUTE"] + + # test exception if ground_truth_attribute is not provides + baseline_attributes["ground_truth_attribute"] = "" + with pytest.raises(ValueError) as error: + baseline = SolutionSageMakerBaselines(**baseline_attributes) + baseline._get_baseline_job_args() + assert str(error.value) == "GroundTruthAttribute must be provided" + + +@patch("baselines_helper.SolutionSageMakerBaselines._create_model_quality_baseline") +@patch("baselines_helper.SolutionSageMakerBaselines._create_data_quality_baseline") +def test_create_baseline_job( + mocked_create_data_quality_baseline, mocked_create_model_quality_baseline, mocked_sagemaker_baselines_instance +): + sagemaker_baselines = mocked_sagemaker_baselines_instance("DataQuality") + sagemaker_baselines.create_baseline_job() + baseline_args = sagemaker_baselines._get_baseline_job_args() + mocked_create_data_quality_baseline.assert_called_with(baseline_args) + + +@patch("baselines_helper.DefaultModelMonitor.suggest_baseline") +def test_create_data_quality_baseline(mocked_default_monitor_suggest_baseline, mocked_sagemaker_baselines_instance): + sagemaker_baselines = mocked_sagemaker_baselines_instance("DataQuality") + expected_baseline_args = sagemaker_baselines._get_baseline_job_args() + sagemaker_baselines._create_data_quality_baseline(expected_baseline_args) + mocked_default_monitor_suggest_baseline.assert_called_with(**expected_baseline_args["suggest_args"]) + + +@patch("baselines_helper.ModelQualityMonitor.suggest_baseline") +def test_create_model_quality_baseline(mocked_model_monitor_suggest_baseline, mocked_sagemaker_baselines_instance): + sagemaker_baselines = mocked_sagemaker_baselines_instance("ModelQuality") + expected_baseline_args = sagemaker_baselines._get_baseline_job_args() + sagemaker_baselines._create_model_quality_baseline(expected_baseline_args) + mocked_model_monitor_suggest_baseline.assert_called_with(**expected_baseline_args["suggest_args"]) + + +@patch("baselines_helper.SolutionSageMakerBaselines.create_baseline_job") +def test_handler(mocked_create_baseline_job, event, mocked_sagemaker_baseline_attributes): + # set the environment variables + mocked_sagemaker_baseline_attributes("ModelQuality") + # calling the handler function should create the SolutionSageMakerBaselines object + # and call the create_baseline_job function + handler(event, {}) + mocked_create_baseline_job.assert_called() diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/main.py b/source/lib/blueprints/byom/lambdas/create_data_baseline_job/main.py deleted file mode 100644 index eb733f4..0000000 --- a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/main.py +++ /dev/null @@ -1,102 +0,0 @@ -# ##################################################################################################################### -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # -# # -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # -# with the License. A copy of the License is located at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # -# and limitations under the License. # -# ##################################################################################################################### -import os -import botocore -import boto3 -from shared.logger import get_logger -from shared.helper import get_client, get_built_in_model_monitor_container_uri - -logger = get_logger(__name__) -sm_client = get_client("sagemaker") - - -def handler(event, context): - baseline_job_name = os.environ["BASELINE_JOB_NAME"] - assets_bucket = os.environ["ASSETS_BUCKET"] - training_data_location = os.environ["TRAINING_DATA_LOCATION"] - baseline_job_output_location = os.environ["BASELINE_JOB_OUTPUT_LOCATION"] - instance_type = os.environ["INSTANCE_TYPE"] - instance_volume_size = int(os.environ["INSTANCE_VOLUME_SIZE"]) - role_arn = os.environ["ROLE_ARN"] - kms_key_arn = os.environ.get("KMS_KEY_ARN") - stack_name = os.environ["STACK_NAME"] - max_runtime_seconds = int(os.environ["MAX_RUNTIME_SECONDS"]) - - try: - logger.info(f"Creating data baseline processing job {baseline_job_name} ...") - request = { - "ProcessingJobName": baseline_job_name, - "ProcessingInputs": [ - { - "InputName": "baseline_dataset_input", - "S3Input": { - "S3Uri": f"s3://{assets_bucket}/{training_data_location}", - "LocalPath": "/opt/ml/processing/input/baseline_dataset_input", - "S3DataType": "S3Prefix", - "S3InputMode": "File", - "S3DataDistributionType": "FullyReplicated", - "S3CompressionType": "None", - }, - } - ], - "ProcessingOutputConfig": { - "Outputs": [ - { - "OutputName": "baseline_dataset_output", - "S3Output": { - "S3Uri": f"s3://{baseline_job_output_location}/{baseline_job_name}", - "LocalPath": "/opt/ml/processing/output", - "S3UploadMode": "EndOfJob", - }, - }, - ], - }, - "ProcessingResources": { - "ClusterConfig": { - "InstanceCount": 1, - "InstanceType": instance_type, - "VolumeSizeInGB": instance_volume_size, - } - }, - "AppSpecification": { - "ImageUri": get_built_in_model_monitor_container_uri(boto3.session.Session().region_name), - }, - "Environment": { - "dataset_format": '{"csv": {"header": true, "output_columns_position": "START"}}', - "dataset_source": "/opt/ml/processing/input/baseline_dataset_input", - "output_path": "/opt/ml/processing/output", - "publish_cloudwatch_metrics": "Disabled", - }, - "RoleArn": role_arn, - "Tags": [ - {"Key": "stack_name", "Value": stack_name}, - ], - } - - # optional value, if the client did not provide a value, the orchestraion lambda sets it to -1 - if max_runtime_seconds != -1: - request.update({"StoppingCondition": {"MaxRuntimeInSeconds": max_runtime_seconds}}) - # add kms key if provided - if kms_key_arn: - request["ProcessingOutputConfig"].update({"KmsKeyId": kms_key_arn}) - request["ProcessingResources"]["ClusterConfig"].update({"VolumeKmsKeyId": kms_key_arn}) - - # Sending request to create data baseline processing job - response = sm_client.create_processing_job(**request) - - logger.info(f"Finished creating data baseline processing job. respons: {response}") - logger.info("Data Baseline Processing JobArn: " + response["ProcessingJobArn"]) - - except botocore.exceptions.ClientError as error: - logger.info(str(error)) - logger.info(f"Creation of baseline processing job: {baseline_job_name} faild.") diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/fixtures/baseline_fixtures.py b/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/fixtures/baseline_fixtures.py deleted file mode 100644 index ed4af2e..0000000 --- a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/fixtures/baseline_fixtures.py +++ /dev/null @@ -1,115 +0,0 @@ -################################################################################################################## -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # -# # -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # -# with the License. A copy of the License is located at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # -# and limitations under the License. # -# ##################################################################################################################### -import os -import boto3 -import pytest -from shared.helper import get_built_in_model_monitor_container_uri - - -@pytest.fixture(autouse=True) -def mock_env_variables(): - new_env = { - "BASELINE_JOB_NAME": "test-baseline-job", - "ASSETS_BUCKET": "testbucket", - "TRAINING_DATA_LOCATION": "training_data.csv", - "BASELINE_JOB_OUTPUT_LOCATION": "baseline_output", - "INSTANCE_TYPE": "ml.m5.4xlarge", - "INSTANCE_VOLUME_SIZE": "20", - "ROLE_ARN": "arn:aws:iam::account:role/myrole", - "STACK_NAME": "test-stack", - "KMS_KEY_ARN": "mykey", - "MAX_RUNTIME_SECONDS": "3600", - } - os.environ = {**os.environ, **new_env} - - -@pytest.fixture -def sm_describe_processing_job_params(): - return {"ProcessingJobName": os.environ["BASELINE_JOB_NAME"]} - - -local_path = "/opt/ml/processing/input/baseline_dataset_input" -output_path = "/opt/ml/processing/output" - - -@pytest.fixture -def sm_create_baseline_expected_params(): - return { - "ProcessingJobName": os.environ["BASELINE_JOB_NAME"], - "ProcessingInputs": [ - { - "InputName": "baseline_dataset_input", - "S3Input": { - "S3Uri": "s3://" + os.environ["ASSETS_BUCKET"] + "/" + os.environ["TRAINING_DATA_LOCATION"], - "LocalPath": local_path, - "S3DataType": "S3Prefix", - "S3InputMode": "File", - "S3DataDistributionType": "FullyReplicated", - "S3CompressionType": "None", - }, - } - ], - "ProcessingOutputConfig": { - "Outputs": [ - { - "OutputName": "baseline_dataset_output", - "S3Output": { - "S3Uri": "s3://" - + os.environ["BASELINE_JOB_OUTPUT_LOCATION"] - + "/" - + os.environ["BASELINE_JOB_NAME"], - "LocalPath": output_path, - "S3UploadMode": "EndOfJob", - }, - }, - ], - "KmsKeyId": "mykey", - }, - "ProcessingResources": { - "ClusterConfig": { - "InstanceCount": 1, - "InstanceType": os.environ["INSTANCE_TYPE"], - "VolumeSizeInGB": int(os.environ["INSTANCE_VOLUME_SIZE"]), - "VolumeKmsKeyId": "mykey", - } - }, - "StoppingCondition": {"MaxRuntimeInSeconds": int(os.environ["MAX_RUNTIME_SECONDS"])}, - "AppSpecification": { - "ImageUri": get_built_in_model_monitor_container_uri(boto3.session.Session().region_name), - }, - "Environment": { - "dataset_format": '{"csv": {"header": true, "output_columns_position": "START"}}', - "dataset_source": local_path, - "output_path": output_path, - "publish_cloudwatch_metrics": "Disabled", - }, - "RoleArn": os.environ["ROLE_ARN"], - "Tags": [ - {"Key": "stack_name", "Value": os.environ["STACK_NAME"]}, - ], - } - - -@pytest.fixture -def sm_create_job_response_200(): - return { - "ResponseMetadata": {"HTTPStatusCode": 200}, - "ProcessingJobArn": "arn:aws:sagemaker:region:account:processing-job/name", - } - - -@pytest.fixture() -def event(): - return { - "message": "Start data baseline job", - } diff --git a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/test_create_data_baseline.py b/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/test_create_data_baseline.py deleted file mode 100644 index 26014ee..0000000 --- a/source/lib/blueprints/byom/lambdas/create_data_baseline_job/tests/test_create_data_baseline.py +++ /dev/null @@ -1,47 +0,0 @@ -################################################################################################################## -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # -# # -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # -# with the License. A copy of the License is located at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # -# and limitations under the License. # -# ##################################################################################################################### -from unittest.mock import patch -from moto import mock_sts -from botocore.stub import Stubber -from main import handler -from shared.helper import get_client, reset_client, get_built_in_model_monitor_container_uri -from tests.fixtures.baseline_fixtures import ( - mock_env_variables, - sm_create_baseline_expected_params, - sm_create_job_response_200, - event, -) - - -@mock_sts -def test_handler_success( - sm_create_baseline_expected_params, - sm_create_job_response_200, - event, -): - - sm_client = get_client("sagemaker") - sm_stubber = Stubber(sm_client) - - # success path - sm_stubber.add_response("create_processing_job", sm_create_job_response_200, sm_create_baseline_expected_params) - - with sm_stubber: - handler(event, {}) - reset_client() - - -def test_handler_exception(event): - with patch("boto3.client"): - handler(event, context={}) - reset_client() diff --git a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/main.py b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/main.py index 7b2ff6c..7ae467c 100644 --- a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/main.py +++ b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/main.py @@ -59,7 +59,7 @@ def lambda_handler(event, context): # {"stackset_name": "model2", "artifact":"SourceArtifact", # "template_file":"realtime-inference-pipeline.yaml", # "stage_params_file":"staging-config.json", - # "accound_ids":[""], "org_ids":[""], + # "account_ids":[""], "org_ids":[""], # "regions":["us-east-1"]} params = get_user_params(job_data) @@ -70,15 +70,15 @@ def lambda_handler(event, context): artifact = params["artifact"] template_file = params["template_file"] stage_params_file = params["stage_params_file"] - accound_ids = params["accound_ids"] + account_ids = params["account_ids"] org_ids = params["org_ids"] regions = params["regions"] if "continuationToken" in job_data: - logger.info(f"Ckecking the status of {stackset_name}") + logger.info(f"Checking the status of {stackset_name}") # If we're continuing then the create/update has already been triggered # we just need to check if it has finished. - check_stackset_update_status(job_id, stackset_name, accound_ids[0], regions[0], cf_client, cp_client) + check_stackset_update_status(job_id, stackset_name, account_ids[0], regions[0], cf_client, cp_client) else: logger.info(f"Creating StackSet {stackset_name} and its instances") @@ -95,7 +95,7 @@ def lambda_handler(event, context): stackset_name, template, json.loads(stage_params), - accound_ids, + account_ids, org_ids, regions, cf_client, diff --git a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/stackset_helpers.py b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/stackset_helpers.py index 4984ee9..4b7bc90 100644 --- a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/stackset_helpers.py +++ b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/stackset_helpers.py @@ -42,7 +42,7 @@ def find_artifact(artifacts, name): if artifact["name"] == name: return artifact - raise Exception(f"Input artifact named {name} not found in lambda's event") + raise ValueError(f"Input artifact named {name} not found in lambda's event") def get_template(s3_client, artifact, template_file_in_zip, params_file_in_zip): @@ -377,14 +377,14 @@ def validate_user_params(decoded_params, list_of_required_params): Args: decoded_params: json object of user parameters passed via codepipline's event - list_of_required_params: list of reqyured parameters + list_of_required_params: list of required parameters Raises: Your UserParameters JSON must include """ for param in list_of_required_params: if param not in decoded_params: - raise Exception(f"Your UserParameters JSON must include {param}") + raise ValueError(f"Your UserParameters JSON must include {param}") def get_user_params(job_data): @@ -405,13 +405,13 @@ def get_user_params(job_data): "artifact", "template_file", "stage_params_file", - "accound_ids", + "account_ids", "org_ids", "regions", ] try: # Get the user parameters which contain the stackset_name, artifact, template_name, - # stage_params, accound_ids, org_ids, and regions + # stage_params, account_ids, org_ids, and regions user_parameters = job_data["actionConfiguration"]["configuration"]["UserParameters"] decoded_parameters = json.loads(user_parameters) @@ -419,7 +419,7 @@ def get_user_params(job_data): # We're expecting the user parameters to be encoded as JSON # so we can pass multiple values. If the JSON can't be decoded # then fail the job with a helpful message. - raise Exception("UserParameters could not be decoded as JSON", e) + raise ValueError("UserParameters could not be decoded as JSON", e) # Validate required params were provided validate_user_params( diff --git a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/fixtures/stackset_fixtures.py b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/fixtures/stackset_fixtures.py index febd2a3..374d214 100644 --- a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/fixtures/stackset_fixtures.py +++ b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/fixtures/stackset_fixtures.py @@ -61,7 +61,7 @@ def required_user_params(): "artifact", "template_file", "stage_params_file", - "accound_ids", + "account_ids", "org_ids", "regions", ] @@ -74,7 +74,7 @@ def mocked_decoded_parameters(): "artifact": "SourceArtifact", "template_file": "template.yaml", "stage_params_file": "staging-config-test.json", - "accound_ids": ["moceked_account_id"], + "account_ids": ["mocked_account_id"], "org_ids": ["mocked_org_unit_id"], "regions": ["us-east-1"], } @@ -178,4 +178,4 @@ def mocked_stackset(cf_client, stackset_name, mocked_template_parameters): StackSetName=stackset_name, TemplateBody=stackset_name, Parameters=mocked_template_parameters, - ) \ No newline at end of file + ) diff --git a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/test_create_update_cf_stackset.py b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/test_create_update_cf_stackset.py index 13add37..5f3aa01 100644 --- a/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/test_create_update_cf_stackset.py +++ b/source/lib/blueprints/byom/lambdas/create_update_cf_stackset/tests/test_create_update_cf_stackset.py @@ -297,7 +297,7 @@ def test_check_stackset_update_status( ) # assert the put_job_success is called mocked_put_job_success.assert_called_once() - # assert it was called with the exoected arguments + # assert it was called with the expected arguments mocked_put_job_success.assert_called_with(mocked_job_id, "StackSet and its instance update complete", None) # Case 2: asserting for the second branch status in ["RUNNING","PENDING"]: @@ -311,7 +311,7 @@ def test_check_stackset_update_status( ) # assert the put_job_continuation is called mocked_put_job_continuation.assert_called_once() - # assert it was called with the exoected arguments + # assert it was called with the expected arguments mocked_put_job_continuation.assert_called_with(mocked_job_id, "StackSet update still in progress", None) # Case 3: asserting for the last branch status not one of ["RUNNING","PENDING", "SUCCEEDED"]: @@ -325,7 +325,7 @@ def test_check_stackset_update_status( ) # assert the put_job_continuation is called mocked_put_job_failure.assert_called_once() - # assert it was called with the exoected arguments + # assert it was called with the expected arguments mocked_put_job_failure.assert_called_with(mocked_job_id, "Update failed: FAILED", None) diff --git a/source/lib/blueprints/byom/lambdas/invoke_lambda_custom_resource/index.py b/source/lib/blueprints/byom/lambdas/invoke_lambda_custom_resource/index.py index f47f467..315f2c7 100644 --- a/source/lib/blueprints/byom/lambdas/invoke_lambda_custom_resource/index.py +++ b/source/lib/blueprints/byom/lambdas/invoke_lambda_custom_resource/index.py @@ -46,7 +46,7 @@ def invoke_lambda(event, _, lm_client=lambda_client): return resource_id else: - raise Exception(f"The Resource {resource} is unsupported by the Invoke Lambda custom resource.") + raise ValueError(f"The Resource {resource} is unsupported by the Invoke Lambda custom resource.") except Exception as e: logger.error(f"Custom resource failed: {str(e)}") diff --git a/source/lib/blueprints/byom/model_monitor.py b/source/lib/blueprints/byom/model_monitor.py index 937bb10..159c3e2 100644 --- a/source/lib/blueprints/byom/model_monitor.py +++ b/source/lib/blueprints/byom/model_monitor.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -15,110 +15,117 @@ core, ) from lib.blueprints.byom.pipeline_definitions.deploy_actions import ( - create_data_baseline_job, + create_baseline_job_lambda, + sagemaker_layer, create_invoke_lambda_custom_resource, ) from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_blueprint_bucket_name_parameter, - create_assets_bucket_name_parameter, - create_baseline_job_name_parameter, - create_monitoring_schedule_name_parameter, - create_endpoint_name_parameter, - create_baseline_job_output_location_parameter, - create_monitoring_output_location_parameter, - create_instance_type_parameter, - create_training_data_parameter, - create_monitoring_type_parameter, - create_instance_volume_size_parameter, - create_max_runtime_seconds_parameter, - create_kms_key_arn_parameter, - create_kms_key_arn_provided_condition, - create_data_capture_bucket_name_parameter, - create_data_capture_location_parameter, - create_schedule_expression_parameter, - create_algorithm_image_uri_parameter, - create_baseline_output_bucket_name_parameter, + ParameteresFactory as pf, + ConditionsFactory as cf, ) from lib.blueprints.byom.pipeline_definitions.sagemaker_monitor_role import create_sagemaker_monitor_role -from lib.blueprints.byom.pipeline_definitions.sagemaker_monitoring_schedule import create_sagemaker_monitoring_scheduale +from lib.blueprints.byom.pipeline_definitions.sagemaker_model_monitor_construct import SageMakerModelMonitor class ModelMonitorStack(core.Stack): - def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: + def __init__(self, scope: core.Construct, id: str, monitoring_type: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) + # validate the provided monitoring_type + if monitoring_type not in ["DataQuality", "ModelQuality"]: + raise ValueError( + ( + f"The {monitoring_type} is not valid. Currently supported Monitoring Types are: " + f"['DataQuality'|'ModelQuality']" + ) + ) + + # Baseline/Monitor attributes, this will be updated based on the monitoring_type + self.baseline_attributes = dict() + self.monitor_attributes = dict() + # Parameteres # - blueprint_bucket_name = create_blueprint_bucket_name_parameter(self) - assets_bucket_name = create_assets_bucket_name_parameter(self) - endpoint_name = create_endpoint_name_parameter(self) - baseline_job_output_location = create_baseline_job_output_location_parameter(self) - training_data = create_training_data_parameter(self) - instance_type = create_instance_type_parameter(self) - instance_volume_size = create_instance_volume_size_parameter(self) - monitoring_type = create_monitoring_type_parameter(self) - max_runtime_seconds = create_max_runtime_seconds_parameter(self) - kms_key_arn = create_kms_key_arn_parameter(self) - baseline_job_name = create_baseline_job_name_parameter(self) - monitoring_schedule_name = create_monitoring_schedule_name_parameter(self) - data_capture_bucket = create_data_capture_bucket_name_parameter(self) - baseline_output_bucket = create_baseline_output_bucket_name_parameter(self) - data_capture_s3_location = create_data_capture_location_parameter(self) - monitoring_output_location = create_monitoring_output_location_parameter(self) - schedule_expression = create_schedule_expression_parameter(self) - image_uri = create_algorithm_image_uri_parameter(self) + blueprint_bucket_name = pf.create_blueprint_bucket_name_parameter(self) + assets_bucket_name = pf.create_assets_bucket_name_parameter(self) + endpoint_name = pf.create_endpoint_name_parameter(self) + baseline_job_output_location = pf.create_baseline_job_output_location_parameter(self) + baseline_data = pf.create_baseline_data_parameter(self) + instance_type = pf.create_instance_type_parameter(self) + instance_count = pf.create_instance_count_parameter(self) + instance_volume_size = pf.create_instance_volume_size_parameter(self) + baseline_max_runtime_seconds = pf.create_baseline_max_runtime_seconds_parameter(self) + monitor_max_runtime_seconds = pf.create_monitor_max_runtime_seconds_parameter(self, "ModelQuality") + kms_key_arn = pf.create_kms_key_arn_parameter(self) + baseline_job_name = pf.create_baseline_job_name_parameter(self) + monitoring_schedule_name = pf.create_monitoring_schedule_name_parameter(self) + data_capture_bucket = pf.create_data_capture_bucket_name_parameter(self) + baseline_output_bucket = pf.create_baseline_output_bucket_name_parameter(self) + data_capture_s3_location = pf.create_data_capture_location_parameter(self) + monitoring_output_location = pf.create_monitoring_output_location_parameter(self) + schedule_expression = pf.create_schedule_expression_parameter(self) + image_uri = pf.create_algorithm_image_uri_parameter(self) + + # add ModelQuality specific parameters/conditions, and update self.baseline_attributes/self.monitor_attributes + if monitoring_type == "ModelQuality": + self._add_model_quality_resources() # conditions - kms_key_arn_provided = create_kms_key_arn_provided_condition(self, kms_key_arn) + kms_key_arn_provided = cf.create_kms_key_arn_provided_condition(self, kms_key_arn) # Resources # - assets_bucket = s3.Bucket.from_bucket_name(self, "AssetsBucket", assets_bucket_name.value_as_string) + assets_bucket = s3.Bucket.from_bucket_name(self, "ImportedAssetsBucket", assets_bucket_name.value_as_string) # getting blueprint bucket object from its name - will be used later in the stack - blueprint_bucket = s3.Bucket.from_bucket_name(self, "BlueprintBucket", blueprint_bucket_name.value_as_string) + blueprint_bucket = s3.Bucket.from_bucket_name( + self, "ImportedBlueprintBucket", blueprint_bucket_name.value_as_string + ) + + # create sagemaker layer + sm_layer = sagemaker_layer(self, blueprint_bucket) - # creating data baseline job - baseline_job_lambda = create_data_baseline_job( + # update Baseline attributes + self.baseline_attributes.update( + dict( + monitoring_type=monitoring_type, + baseline_job_name=baseline_job_name.value_as_string, + baseline_data_location=baseline_data.value_as_string, + baseline_job_output_location=baseline_job_output_location.value_as_string, + endpoint_name=endpoint_name.value_as_string, + instance_type=instance_type.value_as_string, + instance_volume_size=instance_volume_size.value_as_string, + max_runtime_seconds=baseline_max_runtime_seconds.value_as_string, + kms_key_arn=core.Fn.condition_if( + kms_key_arn_provided.logical_id, kms_key_arn.value_as_string, core.Aws.NO_VALUE + ).to_string(), + kms_key_arn_provided_condition=kms_key_arn_provided, + stack_name=core.Aws.STACK_NAME, + ) + ) + # create baseline job lambda action + baseline_job_lambda = create_baseline_job_lambda( self, - blueprint_bucket, - assets_bucket, - baseline_job_name.value_as_string, - training_data.value_as_string, - baseline_job_output_location.value_as_string, - endpoint_name.value_as_string, - instance_type.value_as_string, - instance_volume_size.value_as_string, - max_runtime_seconds.value_as_string, - core.Fn.condition_if( - kms_key_arn_provided.logical_id, kms_key_arn.value_as_string, core.Aws.NO_VALUE - ).to_string(), - kms_key_arn_provided, - core.Aws.STACK_NAME, - ) - - # create custom resource to invoke the batch transform lambda + blueprint_bucket=blueprint_bucket, + assets_bucket=assets_bucket, + sm_layer=sm_layer, + **self.baseline_attributes, + ) + + # create custom resource to invoke the baseline job lambda + # remove the condition from the custom resource properties. Otherwise, CFN will give an error + del self.baseline_attributes["kms_key_arn_provided_condition"] invoke_lambda_custom_resource = create_invoke_lambda_custom_resource( - self, - "InvokeBaselineLambda", - baseline_job_lambda.function_arn, - baseline_job_lambda.function_name, - blueprint_bucket, - { + scope=self, + id="InvokeBaselineLambda", + lambda_function_arn=baseline_job_lambda.function_arn, + lambda_function_name=baseline_job_lambda.function_name, + blueprint_bucket=blueprint_bucket, + # add baseline attributes to the invoke lambda custom resource, so any change to these attributes + # (via template update) will re-invoke the baseline lambda and re-calculate the baseline + custom_resource_properties={ "Resource": "InvokeLambda", "function_name": baseline_job_lambda.function_name, "assets_bucket_name": assets_bucket_name.value_as_string, - "endpoint_name": endpoint_name.value_as_string, - "instance_type": instance_type.value_as_string, - "baseline_job_output_location": baseline_job_output_location.value_as_string, - "training_data": training_data.value_as_string, - "instance_volume_size": instance_volume_size.value_as_string, - "monitoring_schedule_name": monitoring_schedule_name.value_as_string, - "baseline_job_name": baseline_job_name.value_as_string, - "max_runtime_seconds": max_runtime_seconds.value_as_string, - "data_capture_s3_location": data_capture_s3_location.value_as_string, - "monitoring_output_location": monitoring_output_location.value_as_string, - "schedule_expression": schedule_expression.value_as_string, - "image_uri": image_uri.value_as_string, - "kms_key_arn": kms_key_arn.value_as_string, + **self.baseline_attributes, }, ) @@ -127,8 +134,8 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: # creating monitoring schedule sagemaker_role = create_sagemaker_monitor_role( - self, - "MLOpsSagemakerMonitorRole", + scope=self, + id="MLOpsSagemakerMonitorRole", kms_key_arn=kms_key_arn.value_as_string, assets_bucket_name=assets_bucket_name.value_as_string, data_capture_bucket=data_capture_bucket.value_as_string, @@ -138,37 +145,49 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: output_s3_location=monitoring_output_location.value_as_string, kms_key_arn_provided_condition=kms_key_arn_provided, baseline_job_name=baseline_job_name.value_as_string, - monitoring_schedual_name=monitoring_schedule_name.value_as_string, + monitoring_schedule_name=monitoring_schedule_name.value_as_string, + endpoint_name=endpoint_name.value_as_string, + model_monitor_ground_truth_input=None + if monitoring_type == "DataQuality" + else self.monitor_attributes["ground_truth_s3_uri"], ) + # resource tags + resource_tags = [{"key": "stack-name", "value": core.Aws.STACK_NAME}] + + # update attributes + self.monitor_attributes.update( + dict( + monitoring_schedule_name=monitoring_schedule_name.value_as_string, + endpoint_name=endpoint_name.value_as_string, + baseline_job_name=baseline_job_name.value_as_string, + baseline_job_output_location=baseline_job_output_location.value_as_string, + schedule_expression=schedule_expression.value_as_string, + monitoring_output_location=monitoring_output_location.value_as_string, + instance_type=instance_type.value_as_string, + instance_count=instance_count.value_as_string, + instance_volume_size=instance_volume_size.value_as_string, + max_runtime_seconds=monitor_max_runtime_seconds.value_as_string, + kms_key_arn=core.Fn.condition_if( + kms_key_arn_provided.logical_id, kms_key_arn.value_as_string, core.Aws.NO_VALUE + ).to_string(), + role_arn=sagemaker_role.role_arn, + image_uri=image_uri.value_as_string, + monitoring_type=monitoring_type, + tags=resource_tags, + ) + ) # create Sagemaker monitoring Schedule - sagemaker_monitoring_scheduale = create_sagemaker_monitoring_scheduale( - self, - "MonitoringSchedule", - monitoring_schedule_name.value_as_string, - endpoint_name.value_as_string, - baseline_job_name.value_as_string, - baseline_job_output_location.value_as_string, - schedule_expression.value_as_string, - monitoring_output_location.value_as_string, - instance_type.value_as_string, - instance_volume_size.value_as_number, - max_runtime_seconds.value_as_number, - core.Fn.condition_if( - kms_key_arn_provided.logical_id, kms_key_arn.value_as_string, core.Aws.NO_VALUE - ).to_string(), - sagemaker_role.role_arn, - image_uri.value_as_string, - core.Aws.STACK_NAME, - ) - - # add dependency on invoke_lambda_custom_resource - sagemaker_monitoring_scheduale.node.add_dependency(invoke_lambda_custom_resource) + sagemaker_monitor = SageMakerModelMonitor(self, f"{monitoring_type}Monitor", **self.monitor_attributes) + + # add job definition dependency on sagemaker role and invoke_lambda_custom_resource (so, the baseline job is created) + sagemaker_monitor.job_definition.node.add_dependency(sagemaker_role) + sagemaker_monitor.job_definition.node.add_dependency(invoke_lambda_custom_resource) # Outputs # core.CfnOutput( self, - id="DataBaselineJobName", + id="BaselineName", value=baseline_job_name.value_as_string, ) core.CfnOutput( @@ -179,19 +198,16 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: core.CfnOutput( self, id="MonitoringScheduleType", - value=monitoring_type.value_as_string, + value=monitoring_type, ) core.CfnOutput( self, - id="BaselineJobOutputLocation", - value=( - f"https://s3.console.aws.amazon.com/s3/buckets/{baseline_job_output_location.value_as_string}" - f"/{baseline_job_name.value_as_string}/" - ), + id="BaselineJobOutput", + value=f"https://s3.console.aws.amazon.com/s3/buckets/{baseline_job_output_location.value_as_string}/", ) core.CfnOutput( self, - id="MonitoringScheduleOutputLocation", + id="MonitoringScheduleOutput", value=( f"https://s3.console.aws.amazon.com/s3/buckets/{monitoring_output_location.value_as_string}/" f"{endpoint_name.value_as_string}/{monitoring_schedule_name.value_as_string}/" @@ -204,9 +220,70 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: ) core.CfnOutput( self, - id="DataCaptureLocation", + id="DataCaptureS3Location", value=( f"https://s3.console.aws.amazon.com/s3/buckets/{data_capture_s3_location.value_as_string}" f"/{endpoint_name.value_as_string}/" ), ) + + def _add_model_quality_resources(self): + """ + Adds ModelQuality specific parameters/conditions and updates self.baseline_attributes/self.monitor_attributes + """ + # add baseline job attributes (they are different from Monitor attributes) + baseline_inference_attribute = pf.create_inference_attribute_parameter(self, "Baseline") + baseline_probability_attribute = pf.create_probability_attribute_parameter(self, "Baseline") + ground_truth_attribute = pf.create_ground_truth_attribute_parameter(self) + # add monitor attributes + monitor_inference_attribute = pf.create_inference_attribute_parameter(self, "Monitor") + monitor_probability_attribute = pf.create_probability_attribute_parameter(self, "Monitor") + ground_truth_s3_uri = pf.create_ground_truth_s3_uri_parameter(self) + # problem_type and probability_threshold_attribute are the same for both + problem_type = pf.create_problem_type_parameter(self) + probability_threshold_attribute = pf.create_probability_threshold_attribute_parameter(self) + + # add conditions (used by monitor) + is_regression_or_multiclass_classification_problem = ( + cf.create_problem_type_regression_or_multiclass_classification_condition(self, problem_type) + ) + is_binary_classification_problem = cf.create_problem_type_binary_classification_condition(self, problem_type) + + # add ModelQuality Baseline attributes + self.baseline_attributes.update( + dict( + problem_type=problem_type.value_as_string, + ground_truth_attribute=ground_truth_attribute.value_as_string, + inference_attribute=baseline_inference_attribute.value_as_string, + probability_attribute=baseline_probability_attribute.value_as_string, + probability_threshold_attribute=probability_threshold_attribute.value_as_string, + ) + ) + + # add ModelQuality Monitor attributes + self.monitor_attributes.update( + dict( + problem_type=problem_type.value_as_string, + ground_truth_s3_uri=ground_truth_s3_uri.value_as_string, + # inference_attribute is required for Regression/Multiclass Classification problems + # probability_attribute/probability_threshold_attribute are not used + inference_attribute=core.Fn.condition_if( + is_regression_or_multiclass_classification_problem.logical_id, + monitor_inference_attribute.value_as_string, + core.Aws.NO_VALUE, + ).to_string(), + # for a Binary Classification problem, we use probability_attribute and probability_threshold_attribute. + # note: probability_attribute is the index of the predicted probability in the captured data by the + # SageMaker endpoint. Tepically, probability_attribute="0" and probability_threshold_attribute="0.5" + probability_attribute=core.Fn.condition_if( + is_binary_classification_problem.logical_id, + monitor_probability_attribute.value_as_string, + core.Aws.NO_VALUE, + ).to_string(), + probability_threshold_attribute=core.Fn.condition_if( + is_binary_classification_problem.logical_id, + probability_threshold_attribute.value_as_string, + core.Aws.NO_VALUE, + ).to_string(), + ) + ) diff --git a/source/lib/blueprints/byom/multi_account_codepipeline.py b/source/lib/blueprints/byom/multi_account_codepipeline.py index b93689e..2037529 100644 --- a/source/lib/blueprints/byom/multi_account_codepipeline.py +++ b/source/lib/blueprints/byom/multi_account_codepipeline.py @@ -32,17 +32,8 @@ suppress_sns, ) from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_notification_email_parameter, - create_template_zip_name_parameter, - create_template_file_name_parameter, - create_stage_params_file_name_parameter, - create_blueprint_bucket_name_parameter, - create_assets_bucket_name_parameter, - create_stack_name_parameter, - create_account_id_parameter, - create_org_id_parameter, - create_delegated_admin_parameter, - create_delegated_admin_condition, + ParameteresFactory as pf, + ConditionsFactory as cf, ) @@ -51,38 +42,40 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Parameteres # - notification_email = create_notification_email_parameter(self) - template_zip_name = create_template_zip_name_parameter(self) - template_file_name = create_template_file_name_parameter(self) - dev_params_file_name = create_stage_params_file_name_parameter(self, "DEV_PARAMS_NAME", "development") - staging_params_file_name = create_stage_params_file_name_parameter(self, "STAGING_PARAMS_NAME", "staging") - prod_params_file_name = create_stage_params_file_name_parameter(self, "PROD_PARAMS_NAME", "production") + notification_email = pf.create_notification_email_parameter(self) + template_zip_name = pf.create_template_zip_name_parameter(self) + template_file_name = pf.create_template_file_name_parameter(self) + dev_params_file_name = pf.create_stage_params_file_name_parameter(self, "DevParamsName", "development") + staging_params_file_name = pf.create_stage_params_file_name_parameter(self, "StagingParamsName", "staging") + prod_params_file_name = pf.create_stage_params_file_name_parameter(self, "ProdParamsName", "production") # create development parameters account_type = "development" - dev_account_id = create_account_id_parameter(self, "DEV_ACCOUNT_ID", account_type) - dev_org_id = create_org_id_parameter(self, "DEV_ORG_ID", account_type) + dev_account_id = pf.create_account_id_parameter(self, "DevAccountId", account_type) + dev_org_id = pf.create_org_id_parameter(self, "DevOrgId", account_type) # create staging parameters account_type = "staging" - staging_account_id = create_account_id_parameter(self, "STAGING_ACCOUNT_ID", account_type) - staging_org_id = create_org_id_parameter(self, "STAGING_ORG_ID", account_type) + staging_account_id = pf.create_account_id_parameter(self, "StagingAccountId", account_type) + staging_org_id = pf.create_org_id_parameter(self, "StagingOrgId", account_type) # create production parameters account_type = "production" - prod_account_id = create_account_id_parameter(self, "PROD_ACCOUNT_ID", account_type) - prod_org_id = create_org_id_parameter(self, "PROD_ORG_ID", account_type) + prod_account_id = pf.create_account_id_parameter(self, "ProdAccountId", account_type) + prod_org_id = pf.create_org_id_parameter(self, "ProdOrgId", account_type) # assets parameters - blueprint_bucket_name = create_blueprint_bucket_name_parameter(self) - assets_bucket_name = create_assets_bucket_name_parameter(self) - stack_name = create_stack_name_parameter(self) + blueprint_bucket_name = pf.create_blueprint_bucket_name_parameter(self) + assets_bucket_name = pf.create_assets_bucket_name_parameter(self) + stack_name = pf.create_stack_name_parameter(self) # delegated admin account - is_delegated_admin = create_delegated_admin_parameter(self) + is_delegated_admin = pf.create_delegated_admin_parameter(self) # create use delegated admin account condition - delegated_admin_account_condition = create_delegated_admin_condition(self, is_delegated_admin) + delegated_admin_account_condition = cf.create_delegated_admin_condition(self, is_delegated_admin) # Resources # - assets_bucket = s3.Bucket.from_bucket_name(self, "AssetsBucket", assets_bucket_name.value_as_string) + assets_bucket = s3.Bucket.from_bucket_name(self, "ImportedAssetsBucket", assets_bucket_name.value_as_string) # getting blueprint bucket object from its name - will be used later in the stack - blueprint_bucket = s3.Bucket.from_bucket_name(self, "BlueprintBucket", blueprint_bucket_name.value_as_string) + blueprint_bucket = s3.Bucket.from_bucket_name( + self, "ImportedBlueprintBucket", blueprint_bucket_name.value_as_string + ) # create sns topic and subscription pipeline_notification_topic = sns.Topic( diff --git a/source/lib/blueprints/byom/pipeline_definitions/configure_multi_account.py b/source/lib/blueprints/byom/pipeline_definitions/configure_multi_account.py index b8bed0b..c210679 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/configure_multi_account.py +++ b/source/lib/blueprints/byom/pipeline_definitions/configure_multi_account.py @@ -18,11 +18,7 @@ create_ecr_repo_policy, model_package_group_policy, ) -from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_account_id_parameter, - create_org_id_parameter, - create_delegated_admin_parameter, -) +from lib.blueprints.byom.pipeline_definitions.templates_parameters import ParameteresFactory as pf def configure_multi_account_parameters_permissions( @@ -34,7 +30,6 @@ def configure_multi_account_parameters_permissions( orchestrator_lambda_function, paramaters_list, paramaters_labels, - send_data_cr_properties, ): """ configure_multi_account_parameters_permissions creates parameters and permissions for the multi-account option @@ -47,25 +42,24 @@ def configure_multi_account_parameters_permissions( :orchestrator_lambda_function: orchestrator lambda function CDK object :paramaters_list: list parameters' logical ids :paramaters_labels: dictionary of paramaters labels - :send_data_cr_properties: custom resource properties (dictionary) :return: (paramaters_list, paramaters_labels, send_data_cr_properties) """ # add parameters # delegated admin account - is_delegated_admin = create_delegated_admin_parameter(scope) + is_delegated_admin = pf.create_delegated_admin_parameter(scope) # create development parameters account_type = "development" - dev_account_id = create_account_id_parameter(scope, "DEV_ACCOUNT_ID", account_type) - dev_org_id = create_org_id_parameter(scope, "DEV_ORG_ID", account_type) + dev_account_id = pf.create_account_id_parameter(scope, "DevAccountId", account_type) + dev_org_id = pf.create_org_id_parameter(scope, "DevOrgId", account_type) # create staging parameters account_type = "staging" - staging_account_id = create_account_id_parameter(scope, "STAGING_ACCOUNT_ID", account_type) - staging_org_id = create_org_id_parameter(scope, "STAGING_ORG_ID", account_type) + staging_account_id = pf.create_account_id_parameter(scope, "StagingAccountId", account_type) + staging_org_id = pf.create_org_id_parameter(scope, "StagingOrgId", account_type) # create production parameters account_type = "production" - prod_account_id = create_account_id_parameter(scope, "PROD_ACCOUNT_ID", account_type) - prod_org_id = create_org_id_parameter(scope, "PROD_ORG_ID", account_type) + prod_account_id = pf.create_account_id_parameter(scope, "ProdAccountId", account_type) + prod_org_id = pf.create_org_id_parameter(scope, "ProdOrgId", account_type) principals = [ iam.AccountPrincipal(dev_account_id.value_as_string), @@ -73,7 +67,7 @@ def configure_multi_account_parameters_permissions( iam.AccountPrincipal(prod_account_id.value_as_string), ] - # add permission to access the assets bicket + # add permission to access the assets bucket assets_bucket.add_to_resource_policy( s3_policy_read( [assets_bucket.bucket_arn, f"{assets_bucket.bucket_arn}/*"], @@ -89,7 +83,7 @@ def configure_multi_account_parameters_permissions( ) ) - # add permissios to other account to pull images + # add permissions to other account to pull images ecr_repo.add_to_resource_policy(create_ecr_repo_policy(principals)) # give other accounts permissions to use the model registry @@ -137,8 +131,5 @@ def configure_multi_account_parameters_permissions( } ) - # add is_delegated_admin_account to the collected data - send_data_cr_properties.update({"IsDelegatedAccount": is_delegated_admin.value_as_string}) - - # return parameters, labels, and send_data_cr_properties - return (paramaters_list, paramaters_labels, send_data_cr_properties) + # return parameters, labels, and is_delegated_admin + return (paramaters_list, paramaters_labels, is_delegated_admin.value_as_string) diff --git a/source/lib/blueprints/byom/pipeline_definitions/deploy_actions.py b/source/lib/blueprints/byom/pipeline_definitions/deploy_actions.py index 03afdf2..ec5dbdd 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/deploy_actions.py +++ b/source/lib/blueprints/byom/pipeline_definitions/deploy_actions.py @@ -86,7 +86,7 @@ def batch_transform( :batch_inference_data: location of the batch inference data in assets bucket, in the form of a CDK CfnParameter object :batch_job_output_location: S3 bucket location where the result of the batch job will be stored - :kms_key_arn: optionl kmsKeyArn used to encrypt job's output and instance volume. + :kms_key_arn: optional kmsKeyArn used to encrypt job's output and instance volume. :sm_layer: sagemaker lambda layer :return: Lambda function """ @@ -149,12 +149,13 @@ def batch_transform( return batch_transform_lambda -def create_data_baseline_job( +def create_baseline_job_lambda( scope, # NOSONAR:S107 this function is designed to take many arguments blueprint_bucket, assets_bucket, + monitoring_type, baseline_job_name, - training_data_location, + baseline_data_location, baseline_job_output_location, endpoint_name, instance_type, @@ -163,28 +164,48 @@ def create_data_baseline_job( kms_key_arn, kms_key_arn_provided_condition, stack_name, + sm_layer, + problem_type=None, + ground_truth_attribute=None, + inference_attribute=None, + probability_attribute=None, + probability_threshold_attribute=None, ): """ - create_baseline_job creates a data baseline processing job in a lambda invoked codepipeline action + create_baseline_job_lambda creates a data/model baseline processing job in a lambda invoked codepipeline action :scope: CDK Construct scope that's needed to create CDK resources :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :assets_bucket: the bucket cdk object where pipeline assets are stored + :monitoring_type: SageMaker's monitoring type. Currently supported types (DataQualit/ModelQuality) :baseline_job_name: name of the baseline job to be created - :training_data_location: location of the training data used to train the deployed model + :baseline_data_location: location of the baseline data to create the SageMaker Model Monitor baseline :baseline_job_output_location: S3 prefix in the S3 assets bucket to store the output of the job :endpoint_name: name of the deployed SageMaker endpoint to be monitored :instance_type: compute instance type for the baseline job, in the form of a CDK CfnParameter object :instance_volume_size: volume size of the EC2 instance - :max_runtime_seconds: max time the job is allowd to run + :max_runtime_seconds: max time the job is allowed to run :kms_key_arn: kms key arn to encrypt the baseline job's output :stack_name: model monitor stack name + :sm_layer: sagemaker lambda layer + :problem_type: used with ModelQuality baseline. Type of Machine Learning problem. Valid values are + ['Regression'|'BinaryClassification'|'MulticlassClassification'] (default: None) + :ground_truth_attribute: index or JSONpath to locate actual label(s) (used with ModelQuality baseline). + (default: None). + :inference_attribute: index or JSONpath to locate predicted label(s) (used with ModelQuality baseline). + Required for 'Regression'|'MulticlassClassification' problems, + and not required for 'BinaryClassification' if 'probability_attribute' and + 'probability_threshold_attribute' are provided (default: None). + :probability_attribute: index or JSONpath to locate probabilities(used with ModelQuality baseline). + Used only with 'BinaryClassification' problem if 'inference_attribute' is not provided (default: None). + :probability_threshold_attribute: threshold to convert probabilities to binaries (used with ModelQuality baseline). + Used only with 'BinaryClassification' problem if 'inference_attribute' is not provided (default: None). :return: codepipeline action in a form of a CDK object that can be attached to a codepipeline stage """ s3_read = s3_policy_read( [ f"arn:aws:s3:::{assets_bucket.bucket_name}", - f"arn:aws:s3:::{assets_bucket.bucket_name}/{training_data_location}", + f"arn:aws:s3:::{assets_bucket.bucket_name}/{baseline_data_location}", ] ) s3_write = s3_policy_write( @@ -194,7 +215,7 @@ def create_data_baseline_job( ) create_baseline_job_policy = sagemaker_baseline_job_policy(baseline_job_name) - sagemaker_logs_policy = sagemaker_logs_metrics_policy_document(scope, "BaselineLogsMetrcis") + sagemaker_logs_policy = sagemaker_logs_metrics_policy_document(scope, "BaselineLogsMetrics") # Kms Key permissions kms_policy = kms_policy_document(scope, "BaselineKmsPolicy", kms_key_arn) @@ -234,27 +255,43 @@ def create_data_baseline_job( add_logs_policy(lambda_role) # defining the lambda function that gets invoked in this stage + # create environment variabes + lambda_environment_variables = { + "MONITORING_TYPE": monitoring_type, + "BASELINE_JOB_NAME": baseline_job_name, + "ASSETS_BUCKET": assets_bucket.bucket_name, + "SAGEMAKER_ENDPOINT_NAME": endpoint_name, + "BASELINE_DATA_LOCATION": baseline_data_location, + "BASELINE_JOB_OUTPUT_LOCATION": baseline_job_output_location, + "INSTANCE_TYPE": instance_type, + "INSTANCE_VOLUME_SIZE": instance_volume_size, + "MAX_RUNTIME_SECONDS": max_runtime_seconds, + "ROLE_ARN": sagemaker_role.role_arn, + "KMS_KEY_ARN": kms_key_arn, + "STACK_NAME": stack_name, + "LOG_LEVEL": "INFO", + } + + # add ModelQuality related variables (they will be passed by the Model Monitor stack) + if monitoring_type == "ModelQuality": + lambda_environment_variables.update( + { + "PROBLEM_TYPE": problem_type, + "GROUND_TRUTH_ATTRIBUTE": ground_truth_attribute, + "INFERENCE_ATTRIBUTE": inference_attribute, + "PROBABILITY_ATTRIBUTE": probability_attribute, + "PROBABILITY_THRESHOLD_ATTRIBUTE": probability_threshold_attribute, + } + ) create_baseline_job_lambda = lambda_.Function( scope, "create_data_baseline_job", runtime=lambda_.Runtime.PYTHON_3_8, handler=lambda_handler, role=lambda_role, - code=lambda_.Code.from_bucket(blueprint_bucket, "blueprints/byom/lambdas/create_data_baseline_job.zip"), - environment={ - "BASELINE_JOB_NAME": baseline_job_name, - "ASSETS_BUCKET": assets_bucket.bucket_name, - "SAGEMAKER_ENDPOINT_NAME": endpoint_name, - "TRAINING_DATA_LOCATION": training_data_location, - "BASELINE_JOB_OUTPUT_LOCATION": baseline_job_output_location, - "INSTANCE_TYPE": instance_type, - "INSTANCE_VOLUME_SIZE": instance_volume_size, - "MAX_RUNTIME_SECONDS": max_runtime_seconds, - "ROLE_ARN": sagemaker_role.role_arn, - "KMS_KEY_ARN": kms_key_arn, - "STACK_NAME": stack_name, - "LOG_LEVEL": "INFO", - }, + code=lambda_.Code.from_bucket(blueprint_bucket, "blueprints/byom/lambdas/create_baseline_job.zip"), + layers=[sm_layer], + environment=lambda_environment_variables, timeout=core.Duration.minutes(10), ) @@ -273,7 +310,7 @@ def create_stackset_action( artifact, template_file, stage_params_file, - accound_ids, + account_ids, org_ids, regions, assets_bucket, @@ -287,11 +324,11 @@ def create_stackset_action( :action_name: name of the StackSet action :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :source_output: CDK object of the Source action's output - :artifact: name of the input aritifcat to the StackSet action + :artifact: name of the input artifact to the StackSet action :template_file: name of the Cloudformation template to be deployed - :stage_params_file: name of the template parameters for the satge - :accound_ids: list of AWS acounts where the stack with be deployed - :org_ids: list of AWS orginizational ids where the stack with be deployed + :stage_params_file: name of the template parameters for the stage + :account_ids: list of AWS accounts where the stack with be deployed + :org_ids: list of AWS organizational ids where the stack with be deployed :regions: list of regions where the stack with be deployed :assets_bucket: the bucket cdk object where pipeline assets are stored :stack_name: name of the stack to be deployed @@ -357,7 +394,7 @@ def create_stackset_action( "artifact": artifact, "template_file": template_file, "stage_params_file": stage_params_file, - "accound_ids": accound_ids, + "account_ids": account_ids, "org_ids": org_ids, "regions": regions, }, @@ -370,7 +407,7 @@ def create_cloudformation_action( scope, action_name, stack_name, source_output, template_file, template_parameters_file, run_order=1 ): """ - create_cloudformation_actio a CloudFormation action to be added to AWS Codepipeline stage + create_cloudformation_action a CloudFormation action to be added to AWS Codepipeline stage :scope: CDK Construct scope that's needed to create CDK resources :action_name: name of the StackSet action @@ -387,7 +424,7 @@ def create_cloudformation_action( stack_name=stack_name, capabilities=[cloudformation.CloudFormationCapabilities.NAMED_IAM], template_path=source_output.at_path(template_file), - # Admin permissions are added to the deployement role used by the CF action for simplicity + # Admin permissions are added to the deployment role used by the CF action for simplicity # and deploy different resources by different MLOps pipelines. Roles are defined by the # pipelines' cloudformation templates. admin_permissions=True, @@ -441,7 +478,7 @@ def create_invoke_lambda_custom_resource( invoke_lambda_custom_resource = core.CustomResource( scope, - f"{id}CustomeResource", + f"{id}CustomResource", service_token=custom_resource_lambda_fn.function_arn, properties={ "function_name": lambda_function_name, @@ -533,7 +570,7 @@ def create_uuid_custom_resource(scope, create_model_registry, helper_function_ar service_token=helper_function_arn, # add the template's paramater "create_model_registry" to the custom resource properties # so that a new UUID is generated when this value is updated - # the generated UUID is appeneded to the name of the model registry to be created + # the generated UUID is appended to the name of the model registry to be created properties={"Resource": "UUID", "CreateModelRegistry": create_model_registry}, resource_type="Custom::CreateUUID", ) diff --git a/source/lib/blueprints/byom/pipeline_definitions/iam_policies.py b/source/lib/blueprints/byom/pipeline_definitions/iam_policies.py index 0fd2864..b281a3a 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/iam_policies.py +++ b/source/lib/blueprints/byom/pipeline_definitions/iam_policies.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -17,34 +17,41 @@ suppress_delegated_admin_policy, ) - -def sagemaker_policiy_statement(): +sagemaker_arn_prefix = f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}" + + +def sagemaker_policy_statement(is_realtime_pipeline, endpoint_name, endpoint_name_provided): + actions = ["sagemaker:CreateModel", "sagemaker:DescribeModel", "sagemaker:DeleteModel"] + resources = [f"{sagemaker_arn_prefix}:model/mlopssagemakermodel*"] + + if is_realtime_pipeline: + # extend actions + actions.extend( + [ + "sagemaker:CreateEndpointConfig", + "sagemaker:DescribeEndpointConfig", + "sagemaker:DeleteEndpointConfig", + "sagemaker:CreateEndpoint", + "sagemaker:DescribeEndpoint", + "sagemaker:DeleteEndpoint", + ] + ) + + # if a custom endpoint_name is provided, use it. Otherwise, use the generated name + endpoint = core.Fn.condition_if( + endpoint_name_provided.logical_id, endpoint_name.value_as_string, "mlopssagemakerendpoint*" + ).to_string() + + # extend resources and add + resources.extend( + [ + f"{sagemaker_arn_prefix}:endpoint-config/mlopssagemakerendpointconfig*", + f"{sagemaker_arn_prefix}:endpoint/{endpoint}", + ] + ) return iam.PolicyStatement( - actions=[ - "sagemaker:CreateModel", # NOSONAR: permission needs to be repeated for clarity - "sagemaker:DescribeModel", - "sagemaker:DeleteModel", - "sagemaker:CreateEndpointConfig", - "sagemaker:DescribeEndpointConfig", - "sagemaker:DeleteEndpointConfig", - "sagemaker:CreateEndpoint", - "sagemaker:DescribeEndpoint", - "sagemaker:DeleteEndpoint", - ], - resources=[ - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:model/" - f"mlopssagemakermodel*" - ), - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:endpoint-config/" - f"mlopssagemakerendpointconfig*" - ), - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:endpoint/" - f"mlopssagemakerendpoint*" - ), - ], + actions=actions, + resources=resources, ) @@ -56,12 +63,7 @@ def sagemaker_baseline_job_policy(baseline_job_name): "sagemaker:StopProcessingJob", "sagemaker:DeleteProcessingJob", ], - resources=[ - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" - f"processing-job/{baseline_job_name}" - ), - ], + resources=[f"{sagemaker_arn_prefix}:processing-job/{baseline_job_name}"], ) @@ -70,12 +72,7 @@ def batch_transform_policy(): actions=[ "sagemaker:CreateTransformJob", ], - resources=[ - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" - f"transform-job/mlopssagemakermodel-*-batch-transform-*" - ), - ], + resources=[f"{sagemaker_arn_prefix}:transform-job/mlopssagemakermodel-*-batch-transform-*"], ) @@ -88,7 +85,7 @@ def create_service_role(scope, id, service, description): ) -def sagemaker_monitor_policiy_statement(baseline_job_name, monitoring_schedual_name): +def sagemaker_monitor_policy_statement(baseline_job_name, monitoring_schedule_name, endpoint_name): return iam.PolicyStatement( actions=[ "sagemaker:DescribeEndpointConfig", @@ -98,24 +95,20 @@ def sagemaker_monitor_policiy_statement(baseline_job_name, monitoring_schedual_n "sagemaker:StopMonitoringSchedule", "sagemaker:DeleteMonitoringSchedule", "sagemaker:DescribeProcessingJob", + "sagemaker:CreateDataQualityJobDefinition", + "sagemaker:DescribeDataQualityJobDefinition", + "sagemaker:DeleteDataQualityJobDefinition", + "sagemaker:CreateModelQualityJobDefinition", + "sagemaker:DescribeModelQualityJobDefinition", + "sagemaker:DeleteModelQualityJobDefinition", ], resources=[ - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:endpoint-config/" - f"mlopssagemakerendpointconfig*" - ), - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:endpoint/" - f"mlopssagemakerendpoint*" - ), - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" - f"monitoring-schedule/{monitoring_schedual_name}" - ), - ( - f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" - f"processing-job/{baseline_job_name}" - ), + f"{sagemaker_arn_prefix}:endpoint-config/mlopssagemakerendpointconfig*", + f"{sagemaker_arn_prefix}:endpoint/{endpoint_name}", + f"{sagemaker_arn_prefix}:monitoring-schedule/{monitoring_schedule_name}", + f"{sagemaker_arn_prefix}:processing-job/{baseline_job_name}", + f"{sagemaker_arn_prefix}:data-quality-job-definition/*", + f"{sagemaker_arn_prefix}:model-quality-job-definition/*", ], ) @@ -126,7 +119,7 @@ def sagemaker_tags_policy_statement(): "sagemaker:AddTags", "sagemaker:DeleteTags", ], - resources=[f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:*"], + resources=[f"{sagemaker_arn_prefix}:*"], ) @@ -283,8 +276,8 @@ def get_model_registry_actions_resources(model_package_group_name): ] resources = [ - f"arn:aws:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:model-package-group/{model_package_group_name}", - f"arn:aws:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:model-package/{model_package_group_name}/*", + f"{sagemaker_arn_prefix}:model-package-group/{model_package_group_name}", + f"{sagemaker_arn_prefix}:model-package/{model_package_group_name}/*", ] return (actions, resources) @@ -442,7 +435,7 @@ def create_orchestrator_policy( f"arn:{core.Aws.PARTITION}:codebuild:{core.Aws.REGION}:" f"{core.Aws.ACCOUNT_ID}:project/VerifySagemaker*" ), - (f"arn:{core.Aws.PARTITION}:codebuild:{core.Aws.REGION}:" f"{core.Aws.ACCOUNT_ID}:report-group/*"), + f"arn:{core.Aws.PARTITION}:codebuild:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:report-group/*", ], ), iam.PolicyStatement( @@ -562,7 +555,7 @@ def create_orchestrator_policy( ) -def create_inovoke_lambda_policy(lambda_functions_list): +def create_invoke_lambda_policy(lambda_functions_list): return iam.PolicyStatement( actions=["lambda:InvokeFunction"], # NOSONAR: permission needs to be repeated for clarity resources=lambda_functions_list, diff --git a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint.py b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint.py index d524c8b..597677c 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint.py +++ b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint.py @@ -15,12 +15,13 @@ ) -def create_sagemaker_endpoint(scope, id, endpoint_config_name, model_name, **kwargs): +def create_sagemaker_endpoint(scope, id, endpoint_config_name, endpoint_name, model_name, **kwargs): # create Sagemaker endpoint sagemaker_endpoint = sagemaker.CfnEndpoint( scope, id, endpoint_config_name=endpoint_config_name, + endpoint_name=endpoint_name, tags=[{"key": "endpoint-name", "value": f"{model_name}-endpoint"}], **kwargs, ) diff --git a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint_config.py b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint_config.py index 473662f..a5724f3 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint_config.py +++ b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_endpoint_config.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -44,7 +44,7 @@ def create_sagemaker_endpoint_config( "destinationS3Uri": f"s3://{data_capture_location}", "captureOptions": [{"captureMode": "Output"}, {"captureMode": "Input"}], "captureContentTypeHeader": {"csvContentTypes": ["text/csv"]}, - # The key specfied here is used to encrypt data on S3 captured by the endpoint. If you don't provide + # The key specified here is used to encrypt data on S3 captured by the endpoint. If you don't provide # a KMS key ID, Amazon SageMaker uses the default KMS key for Amazon S3 for your role's account. # for more info see DataCaptureConfig # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-sagemaker-endpointconfig.html diff --git a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_model_monitor_construct.py b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_model_monitor_construct.py new file mode 100644 index 0000000..7f9cef3 --- /dev/null +++ b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_model_monitor_construct.py @@ -0,0 +1,322 @@ +# ##################################################################################################################### +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# # +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # +# with the License. A copy of the License is located at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # +# and limitations under the License. # +# ##################################################################################################################### +from typing import List, Dict, Union, Optional +from aws_cdk import aws_sagemaker as sagemaker, core + + +class SageMakerModelMonitor(core.Construct): + """ + Creates Amazon SageMaker Model Monitor (DataQuality or ModelQuality) + + Attributes: + scope (CDK Construct scope): that's needed to create CDK resources + id (str): CDK resource's logical id + monitoring_schedule_name (str): name of the monitoring job to be created + endpoint_name (str): name of the deployed SageMaker endpoint to be monitored + baseline_job_name (str): name of the baseline job + schedule_expression (str): cron job expression + monitoring_output_location (str): S3 location where the output will be stored + instance_type (str): compute instance type for the baseline job, in the form of a CDK CfnParameter object + instance_volume_size (str): volume size of the EC2 instance + instance_count (str): number of EC2 instances + max_runtime_seconds (str): max time the job is allowed to run + kms_key_arn (str): optional arn of the kms key used to encrypt datacapture and + to encrypt job's output + role_arn (str): Sagemaker role's arn to be used to create the monitoring schedule + image_uri (str): the Model Monitor's Docker image URI + monitoring_type (str): type of SageMaker Model Monitor. Supported values ['DataQuality'|'ModelQuality'] + tags (list[dict[str, str]]): resource tags + ground_truth_s3_uri (str): used with ModelQuality monitor. Location of the ground truth labels (default: None) + problem_type (str): used with ModelQuality monitor. Type of Machine Learning problem. Valid values are + ['Regression'|'BinaryClassification'|'MulticlassClassification'] (default: None). + inference_attribute (str): used with ModelQuality monitor. Index or JSONpath to locate predicted label(s). + Required for 'Regression'|'MulticlassClassification' problems, + and not required for 'BinaryClassification' if 'probability_attribute' and + 'probability_threshold_attribute' are provided (default: None). + probability_attribute (str): used with ModelQuality monitor. index or JSONpath to locate probabilities. + Used only with 'BinaryClassification' problem if 'inference_attribute' is not provided (default: None). + probability_threshold_attribute (str): used with ModelQuality monitor. Threshold to convert probabilities to + binaries. Used only with 'BinaryClassification' problem if 'inference_attribute' is not provided (default: None). + """ + + def __init__( + self, # NOSONAR:S107 the class is designed to take many attributes + scope: core.Construct, + id: str, + monitoring_schedule_name: str, + endpoint_name: str, + baseline_job_name: str, + baseline_job_output_location: str, + schedule_expression: str, + monitoring_output_location: str, + instance_type: str, + instance_count: str, + instance_volume_size: str, + max_runtime_seconds: str, + kms_key_arn: str, + role_arn: str, + image_uri: str, + monitoring_type: str, + tags: List[Dict[str, str]], + ground_truth_s3_uri: Optional[str] = None, + problem_type: Optional[str] = None, + inference_attribute: Optional[str] = None, + probability_attribute: Optional[str] = None, + probability_threshold_attribute: Optional[str] = None, + **kwargs, + ) -> None: + super().__init__(scope, id, **kwargs) + self.scope = scope + self.id = id + self.monitoring_schedule_name = monitoring_schedule_name + self.endpoint_name = endpoint_name + self.baseline_job_name = baseline_job_name + self.baseline_job_output_location = baseline_job_output_location + self.schedule_expression = schedule_expression + self.monitoring_output_location = monitoring_output_location + self.instance_type = instance_type + self.instance_count = instance_count + self.instance_volume_size = instance_volume_size + self.max_runtime_seconds = max_runtime_seconds + self.kms_key_arn = kms_key_arn + self.role_arn = role_arn + self.image_uri = image_uri + self.monitoring_type = monitoring_type + self.tags = tags + self.ground_truth_s3_uri = ground_truth_s3_uri + self.problem_type = problem_type + self.inference_attribute = inference_attribute + self.probability_attribute = probability_attribute + self.probability_threshold_attribute = probability_threshold_attribute + + # validate the provided monitoring_type + if monitoring_type not in ["DataQuality", "ModelQuality"]: + raise ValueError( + ( + f"The provided monitoring type: {monitoring_type} is not valid. " + + "It must be 'DataQuality'|'ModelQuality'" + ) + ) + + # create the [DataQuality|ModelQuality]JobDefinition + self.__job_definition = self._get_job_definition( + monitoring_type=monitoring_type, id=f"{monitoring_type}JobDefinition" + ) + + # create the monitoring schedule + self.__monitoring_schedule = self._create_sagemaker_monitoring_schedule( + monitoring_schedule_name=self.monitoring_schedule_name, + monitor_job_definition=self.__job_definition, + ) + + def _get_job_definition( + self, monitoring_type: str, id: str + ) -> Union[sagemaker.CfnDataQualityJobDefinition, sagemaker.CfnModelQualityJobDefinition]: + """ + Gets the *JobDefinition based on the monitoring_type + + Args: + monitoring_type (str): possible values [DataQuality, ModelQuality] + id (str): CDK resource's logical id + + Returns: + sagemaker.CfnDataQualityJobDefinition or sagemaker.CfnModelQualityJobDefinition object + """ + # create *JobDefinition MonitoringType->function_name map + type_function_map = dict( + DataQuality="_create_data_quality_job_definition", ModelQuality="_create_model_quality_job_definition" + ) + + # call the right function to create the *JobDefinition + job_definition = getattr(self, type_function_map[monitoring_type])(id) + + return job_definition + + def _create_data_quality_job_definition( + self, + id: str, + ) -> sagemaker.CfnDataQualityJobDefinition: + """ + Creates Amazon SageMaker's Data Quality Job Definition + + Args: + id (str): CDK resource's logical id + + Returns: + sagemaker.CfnDataQualityJobDefinition object + """ + data_quality_job_definition = sagemaker.CfnDataQualityJobDefinition( + self.scope, + id, + data_quality_app_specification=sagemaker.CfnDataQualityJobDefinition.DataQualityAppSpecificationProperty( + image_uri=self.image_uri + ), + data_quality_baseline_config=sagemaker.CfnDataQualityJobDefinition.DataQualityBaselineConfigProperty( + constraints_resource=sagemaker.CfnDataQualityJobDefinition.ConstraintsResourceProperty( + s3_uri=f"s3://{self.baseline_job_output_location}/constraints.json" + ), + statistics_resource=sagemaker.CfnDataQualityJobDefinition.StatisticsResourceProperty( + s3_uri=f"s3://{self.baseline_job_output_location}/statistics.json" + ), + ), + data_quality_job_input=sagemaker.CfnDataQualityJobDefinition.DataQualityJobInputProperty( + endpoint_input=sagemaker.CfnDataQualityJobDefinition.EndpointInputProperty( + endpoint_name=self.endpoint_name, + local_path="/opt/ml/processing/input/data_quality_input", + ) + ), + data_quality_job_output_config=sagemaker.CfnDataQualityJobDefinition.MonitoringOutputConfigProperty( + monitoring_outputs=[ + sagemaker.CfnDataQualityJobDefinition.MonitoringOutputProperty( + s3_output=sagemaker.CfnDataQualityJobDefinition.S3OutputProperty( + s3_uri=f"s3://{self.monitoring_output_location}", + local_path="/opt/ml/processing/output/data_quality_output", + s3_upload_mode="EndOfJob", + ) + ) + ], + kms_key_id=self.kms_key_arn, + ), + job_resources=sagemaker.CfnDataQualityJobDefinition.MonitoringResourcesProperty( + cluster_config=sagemaker.CfnDataQualityJobDefinition.ClusterConfigProperty( + instance_count=core.Token.as_number(self.instance_count), + instance_type=self.instance_type, + volume_size_in_gb=core.Token.as_number(self.instance_volume_size), + volume_kms_key_id=self.kms_key_arn, + ) + ), + stopping_condition=sagemaker.CfnDataQualityJobDefinition.StoppingConditionProperty( + max_runtime_in_seconds=core.Token.as_number(self.max_runtime_seconds) + ), + role_arn=self.role_arn, + tags=self.tags, + ) + + return data_quality_job_definition + + def _create_model_quality_job_definition( + self, + id: str, + ) -> sagemaker.CfnModelQualityJobDefinition: + """ + Creates Amazon SageMaker's Model Quality Job Definition + + Args: + id (str): CDK resource's logical id + + Returns: + sagemaker.CfnModelQualityJobDefinition object + """ + + # create the ModelQualityJobDefinition + model_quality_job_definition = sagemaker.CfnModelQualityJobDefinition( + self.scope, + id, + model_quality_app_specification=sagemaker.CfnModelQualityJobDefinition.ModelQualityAppSpecificationProperty( + problem_type=self.problem_type, image_uri=self.image_uri + ), + model_quality_baseline_config=sagemaker.CfnModelQualityJobDefinition.ModelQualityBaselineConfigProperty( + constraints_resource=sagemaker.CfnModelQualityJobDefinition.ConstraintsResourceProperty( + s3_uri=f"s3://{self.baseline_job_output_location}/constraints.json" + ), + ), + model_quality_job_input=sagemaker.CfnModelQualityJobDefinition.ModelQualityJobInputProperty( + endpoint_input=sagemaker.CfnModelQualityJobDefinition.EndpointInputProperty( + endpoint_name=self.endpoint_name, + local_path="/opt/ml/processing/input/model_quality_input", + inference_attribute=self.inference_attribute, + probability_attribute=self.probability_attribute, + probability_threshold_attribute=core.Token.as_number(self.probability_threshold_attribute), + ), + ground_truth_s3_input=sagemaker.CfnModelQualityJobDefinition.MonitoringGroundTruthS3InputProperty( + s3_uri=f"s3://{self.ground_truth_s3_uri}" + ), + ), + model_quality_job_output_config=sagemaker.CfnModelQualityJobDefinition.MonitoringOutputConfigProperty( + monitoring_outputs=[ + sagemaker.CfnModelQualityJobDefinition.MonitoringOutputProperty( + s3_output=sagemaker.CfnModelQualityJobDefinition.S3OutputProperty( + s3_uri=f"s3://{self.monitoring_output_location}", + local_path="/opt/ml/processing/output/model_quality_output", + s3_upload_mode="EndOfJob", + ) + ) + ], + kms_key_id=self.kms_key_arn, + ), + job_resources=sagemaker.CfnModelQualityJobDefinition.MonitoringResourcesProperty( + cluster_config=sagemaker.CfnModelQualityJobDefinition.ClusterConfigProperty( + instance_count=core.Token.as_number(self.instance_count), + instance_type=self.instance_type, + volume_size_in_gb=core.Token.as_number(self.instance_volume_size), + volume_kms_key_id=self.kms_key_arn, + ) + ), + stopping_condition=sagemaker.CfnModelQualityJobDefinition.StoppingConditionProperty( + max_runtime_in_seconds=core.Token.as_number(self.max_runtime_seconds) + ), + role_arn=self.role_arn, + tags=self.tags, + ) + + return model_quality_job_definition + + def _create_sagemaker_monitoring_schedule( + self, + monitoring_schedule_name: str, + monitor_job_definition: Union[sagemaker.CfnDataQualityJobDefinition, sagemaker.CfnModelQualityJobDefinition], + ) -> sagemaker.CfnMonitoringSchedule: + """ + Creates Amazon SageMaker's Monitoring Schedule object + + Args: + monitoring_schedule_name (str): name of the monitoring job to be created + monitor_job_definition (sagemaker.CfnDataQualityJobDefinition or sagemaker.CfnModelQualityJobDefinition): + monitor job definition + + Returns: + sagemaker.CfnMonitoringSchedule object + """ + + # create the monitoring schedule + schedule = sagemaker.CfnMonitoringSchedule( + self.scope, + f"{self.id}Schedule", + monitoring_schedule_name=monitoring_schedule_name, + monitoring_schedule_config=sagemaker.CfnMonitoringSchedule.MonitoringScheduleConfigProperty( + schedule_config=sagemaker.CfnMonitoringSchedule.ScheduleConfigProperty( + schedule_expression=self.schedule_expression + ), + # *JobDefinition's name is not specified, so stack updates won't fail + # hence, "monitor_job_definition.job_definition_name" has no value. + # The get_att is used to get the generated *JobDefinition's name + monitoring_job_definition_name=core.Fn.get_att( + monitor_job_definition.logical_id, "JobDefinitionName" + ).to_string(), + monitoring_type=self.monitoring_type, + ), + tags=self.tags, + ) + + # add dependency on teh monitor job defintion + schedule.add_depends_on(monitor_job_definition) + + return schedule + + @property + def job_definition(self) -> Union[sagemaker.CfnDataQualityJobDefinition, sagemaker.CfnModelQualityJobDefinition]: + return self.__job_definition + + @property + def monitoring_schedule(self) -> sagemaker.CfnMonitoringSchedule: + return self.__monitoring_schedule diff --git a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitor_role.py b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitor_role.py index d933bfa..824a7dd 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitor_role.py +++ b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitor_role.py @@ -18,7 +18,7 @@ from lib.blueprints.byom.pipeline_definitions.iam_policies import ( kms_policy_document, - sagemaker_monitor_policiy_statement, + sagemaker_monitor_policy_statement, sagemaker_tags_policy_statement, sagemaker_logs_metrics_policy_document, s3_policy_read, @@ -40,9 +40,11 @@ def create_sagemaker_monitor_role( output_s3_location, kms_key_arn_provided_condition, baseline_job_name, - monitoring_schedual_name, + monitoring_schedule_name, + endpoint_name, + model_monitor_ground_truth_input, ): - # create optional polocies + # create optional policies kms_policy = kms_policy_document(scope, "MLOpsKmsPolicy", kms_key_arn) # add conditions to KMS and ECR policies @@ -52,27 +54,32 @@ def create_sagemaker_monitor_role( role = iam.Role(scope, id, assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com")) # permissions to create sagemaker resources - sagemaker_policy = sagemaker_monitor_policiy_statement(baseline_job_name, monitoring_schedual_name) + sagemaker_policy = sagemaker_monitor_policy_statement(baseline_job_name, monitoring_schedule_name, endpoint_name) # sagemaker tags permissions sagemaker_tags_policy = sagemaker_tags_policy_statement() # logs/metrics permissions logs_metrics_policy = sagemaker_logs_metrics_policy_document(scope, "SagemakerLogsMetricsPolicy") # S3 permissions - s3_read = s3_policy_read( - list( - set( - [ - f"arn:aws:s3:::{assets_bucket_name}", - f"arn:aws:s3:::{assets_bucket_name}/*", - f"arn:aws:s3:::{data_capture_bucket}", - f"arn:aws:s3:::{data_capture_s3_location}/*", - f"arn:aws:s3:::{baseline_output_bucket}", - f"arn:aws:s3:::{baseline_job_output_location}/*", - ] - ) + s3_read_resources = list( + set( # set is used since a same bucket can be used more than once + [ + f"arn:aws:s3:::{assets_bucket_name}", + f"arn:aws:s3:::{assets_bucket_name}/*", + f"arn:aws:s3:::{data_capture_bucket}", + f"arn:aws:s3:::{data_capture_s3_location}/*", + f"arn:aws:s3:::{baseline_output_bucket}", + f"arn:aws:s3:::{baseline_job_output_location}/*", + ] ) ) + + # add permissions to read ground truth data (only for ModelQuality monitor) + if model_monitor_ground_truth_input: + s3_read_resources.extend( + [f"arn:aws:s3:::{model_monitor_ground_truth_input}", f"arn:aws:s3:::{model_monitor_ground_truth_input}/*"] + ) + s3_read = s3_policy_read(s3_read_resources) s3_write = s3_policy_write( [ f"arn:aws:s3:::{output_s3_location}/*", @@ -83,7 +90,7 @@ def create_sagemaker_monitor_role( # IAM GetRole permission get_role_policy = get_role_policy_statement(role) - # add policy statments + # add policy statements role.add_to_policy(sagemaker_policy) role.add_to_policy(sagemaker_tags_policy) role.add_to_policy(s3_read) diff --git a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitoring_schedule.py b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitoring_schedule.py deleted file mode 100644 index 812b6c7..0000000 --- a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_monitoring_schedule.py +++ /dev/null @@ -1,131 +0,0 @@ -# ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # -# # -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # -# with the License. A copy of the License is located at # -# # -# http://www.apache.org/licenses/LICENSE-2.0 # -# # -# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES # -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions # -# and limitations under the License. # -# ##################################################################################################################### -from aws_cdk import aws_sagemaker as sagemaker, core - - -def create_sagemaker_monitoring_scheduale( - scope, # NOSONAR:S107 this function is designed to take many arguments - id, - monitoring_schedule_name, - endpoint_name, - baseline_job_name, - baseline_job_output_location, - schedule_expression, - monitoring_output_location, - instance_type, - instance_volume_size, - max_runtime_seconds, - kms_key_arn, - role_arn, - image_uri, - stack_name, -): - """ - create_sagemaker_monitoring_scheduale creates a monitoring schedule using CDK - - :scope: CDK Construct scope that's needed to create CDK resources - :monitoring_schedual_name: name of the monitoring job to be created - :endpoint_name: name of the deployed SageMaker endpoint to be monitored - :baseline_job_name: name of the baseline job - :baseline_job_output_location: S3 prefix in the S3 assets bucket to store the output of the job - :schedule_expression: cron job expression - :monitoring_output_location: S3 location where the output will be stored - :instance_type: compute instance type for the baseline job, in the form of a CDK CfnParameter object - :instance_volume_size: volume size of the EC2 instance - :max_runtime_seconds: max time the job is allowd to run - :kms_key_arn": optional arn of the kms key used to encrypt datacapture and to encrypt job's output - :role_arn: Sagemaker role's arn to be used to create the monitoring schedule - :image_uri: the name of the stack where the schedule will be created - :return: return an sagemaker.CfnMonitoringSchedule object - - """ - schedule = sagemaker.CfnMonitoringSchedule( - scope, - id, - monitoring_schedule_name=monitoring_schedule_name, - monitoring_schedule_config=sagemaker.CfnMonitoringSchedule.MonitoringScheduleConfigProperty( - schedule_config=sagemaker.CfnMonitoringSchedule.ScheduleConfigProperty( - schedule_expression=schedule_expression - ), - monitoring_job_definition=sagemaker.CfnMonitoringSchedule.MonitoringJobDefinitionProperty( - baseline_config=sagemaker.CfnMonitoringSchedule.BaselineConfigProperty( - constraints_resource=sagemaker.CfnMonitoringSchedule.ConstraintsResourceProperty( - s3_uri=f"s3://{baseline_job_output_location}/{baseline_job_name}/constraints.json" - ), - statistics_resource=sagemaker.CfnMonitoringSchedule.StatisticsResourceProperty( - s3_uri=f"s3://{baseline_job_output_location}/{baseline_job_name}/statistics.json" - ), - ), - monitoring_inputs=sagemaker.CfnMonitoringSchedule.MonitoringInputsProperty( - monitoring_inputs=[ - sagemaker.CfnMonitoringSchedule.MonitoringInputProperty( - endpoint_input=sagemaker.CfnMonitoringSchedule.EndpointInputProperty( - endpoint_name=endpoint_name, - local_path="/opt/ml/processing/input/monitoring_dataset_input", - s3_input_mode="File", - s3_data_distribution_type="FullyReplicated", - ) - ) - ] - ), - monitoring_output_config=sagemaker.CfnMonitoringSchedule.MonitoringOutputConfigProperty( - monitoring_outputs=[ - sagemaker.CfnMonitoringSchedule.MonitoringOutputProperty( - s3_output=sagemaker.CfnMonitoringSchedule.S3OutputProperty( - s3_uri=f"s3://{monitoring_output_location}", - local_path="/opt/ml/processing/output", - s3_upload_mode="EndOfJob", - ) - ) - ], - kms_key_id=kms_key_arn, - ), - monitoring_resources=sagemaker.CfnMonitoringSchedule.MonitoringResourcesProperty( - cluster_config=sagemaker.CfnMonitoringSchedule.ClusterConfigProperty( - instance_count=1.0, - instance_type=instance_type, - volume_size_in_gb=core.Token.as_number(instance_volume_size), - volume_kms_key_id=kms_key_arn, - ) - ), - monitoring_app_specification=sagemaker.CfnMonitoringSchedule.MonitoringAppSpecificationProperty( - image_uri=image_uri - ), - stopping_condition=sagemaker.CfnMonitoringSchedule.StoppingConditionProperty( - max_runtime_in_seconds=core.Token.as_number(max_runtime_seconds) - ), - role_arn=role_arn, - ), - ), - tags=[ - {"key": "stack_name", "value": stack_name}, - ], - ) - - # This is a workaround the current bug in CDK aws-sagemaker, where the MonitoringInputs property - # is duplicated. link to the bug https://github.com/aws/aws-cdk/issues/12208 - schedule.add_property_override( - "MonitoringScheduleConfig.MonitoringJobDefinition.MonitoringInputs", - [ - { - "EndpointInput": { - "EndpointName": {"Ref": "ENDPOINTNAME"}, - "LocalPath": "/opt/ml/processing/input/monitoring_dataset_input", - "S3DataDistributionType": "FullyReplicated", - "S3InputMode": "File", - } - } - ], - ) - - return schedule diff --git a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_role.py b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_role.py index a448f77..246e605 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/sagemaker_role.py +++ b/source/lib/blueprints/byom/pipeline_definitions/sagemaker_role.py @@ -19,8 +19,8 @@ from lib.blueprints.byom.pipeline_definitions.iam_policies import ( ecr_policy_document, kms_policy_document, - sagemaker_policiy_statement, - sagemaker_monitor_policiy_statement, + sagemaker_policy_statement, + sagemaker_monitor_policy_statement, sagemaker_tags_policy_statement, sagemaker_logs_metrics_policy_document, s3_policy_read, @@ -44,8 +44,11 @@ def create_sagemaker_role( ecr_repo_arn_provided_condition, kms_key_arn_provided_condition, model_registry_provided_condition, + is_realtime_pipeline=False, + endpoint_name=None, + endpoint_name_provided=None, ): - # create optional polocies + # create optional policies ecr_policy = ecr_policy_document(scope, "MLOpsECRPolicy", custom_algorithms_ecr_arn) kms_policy = kms_policy_document(scope, "MLOpsKmsPolicy", kms_key_arn) model_registry = model_registry_policy_document(scope, "ModelRegistryPolicy", model_package_group_name) @@ -59,7 +62,7 @@ def create_sagemaker_role( role = iam.Role(scope, id, assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com")) # permissions to create sagemaker resources - sagemaker_policy = sagemaker_policiy_statement() + sagemaker_policy = sagemaker_policy_statement(is_realtime_pipeline, endpoint_name, endpoint_name_provided) # sagemaker tags permissions sagemaker_tags_policy = sagemaker_tags_policy_statement() @@ -88,7 +91,7 @@ def create_sagemaker_role( # IAM GetRole permission get_role_policy = get_role_policy_statement(role) - # add policy statments + # add policy statements role.add_to_policy(sagemaker_policy) role.add_to_policy(sagemaker_tags_policy) logs_policy.attach_to_role(role) diff --git a/source/lib/blueprints/byom/pipeline_definitions/templates_parameters.py b/source/lib/blueprints/byom/pipeline_definitions/templates_parameters.py index a4aaea6..86124a8 100644 --- a/source/lib/blueprints/byom/pipeline_definitions/templates_parameters.py +++ b/source/lib/blueprints/byom/pipeline_definitions/templates_parameters.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -13,518 +13,655 @@ from aws_cdk import core -def create_notification_email_parameter(scope): - return core.CfnParameter( - scope, - "NOTIFICATION_EMAIL", - type="String", - description="email for pipeline outcome notifications", - allowed_pattern="^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", - constraint_description="Please enter an email address with correct format (example@exmaple.com)", - min_length=5, - max_length=320, - ) - - -def create_git_address_parameter(scope): - return core.CfnParameter( - scope, - "CodeCommit Repo Address", - type="String", - description="AWS CodeCommit repository clone URL to connect to the framework.", - allowed_pattern=( - "^(((https:\/\/|ssh:\/\/)(git\-codecommit)\.[a-zA-Z0-9_.+-]+(amazonaws\.com\/)[a-zA-Z0-9-.]" - "+(\/)[a-zA-Z0-9-.]+(\/)[a-zA-Z0-9-.]+$)|^$)" - ), - min_length=0, - max_length=320, - constraint_description=( - "CodeCommit address must follow the pattern: ssh or " - "https://git-codecommit.REGION.amazonaws.com/version/repos/REPONAME" - ), - ) - - -def create_existing_bucket_parameter(scope): - return core.CfnParameter( - scope, - "ExistingS3Bucket", - type="String", - description="Name of existing S3 bucket to be used for ML assests. S3 Bucket must be in the same region as the deployed stack, and has versioning enabled. If not provided, a new S3 bucket will be created.", - allowed_pattern="((?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$)|^$)", - min_length=0, - max_length=63, - ) - - -def create_existing_ecr_repo_parameter(scope): - return core.CfnParameter( - scope, - "ExistingECRRepo", - type="String", - description="Name of existing Amazom ECR repository for custom algorithms. If not provided, a new ECR repo will be created.", - allowed_pattern="((?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*|^$)", - min_length=0, - max_length=63, - ) - - -def create_account_id_parameter(scope, id, account_type): - return core.CfnParameter( - scope, - id, - type="String", - description=f"AWS {account_type} account number where the CF template will be deployed", - allowed_pattern="^\d{12}$", - ) - - -def create_org_id_parameter(scope, id, account_type): - return core.CfnParameter( - scope, - id, - type="String", - description=f"AWS {account_type} organizational unit id where the CF template will be deployed", - allowed_pattern="^ou-[0-9a-z]{4,32}-[a-z0-9]{8,32}$", - ) - - -def create_blueprint_bucket_name_parameter(scope): - return core.CfnParameter( - scope, - "BLUEPRINT_BUCKET", - type="String", - description="Bucket name for blueprints of different types of ML Pipelines.", - min_length=3, - ) - - -def create_data_capture_bucket_name_parameter(scope): - return core.CfnParameter( - scope, - "DATA_CAPTURE_BUCKET", - type="String", - description="Bucket name where the data captured from SageMaker endpoint will be stored.", - min_length=3, - ) - - -def create_baseline_output_bucket_name_parameter(scope): - return core.CfnParameter( - scope, - "BASELINE_OUTPUT_BUCKET", - type="String", - description="Bucket name where the output of the baseline job will be stored.", - min_length=3, - ) - - -def create_batch_input_bucket_name_parameter(scope): - return core.CfnParameter( - scope, - "BATCH_INPUT_BUCKET", - type="String", - description="Bucket name where the data input of the bact transform is stored.", - min_length=3, - ) - - -def create_assets_bucket_name_parameter(scope): - return core.CfnParameter( - scope, - "ASSETS_BUCKET", - type="String", - description="Bucket name where the model and training data are stored.", - min_length=3, - ) - - -def create_custom_algorithms_ecr_repo_arn_parameter(scope): - return core.CfnParameter( - scope, - "CUSTOM_ALGORITHMS_ECR_REPO_ARN", - type="String", - description="The arn of the Amazon ECR repository where custom algorithm image is stored (optional)", - allowed_pattern="(^arn:aws:ecr:(us(-gov)?|ap|ca|cn|eu|sa)-(central|(north|south)?(east|west)?)-\\d:\\d{12}:repository/.+|^$)", - constraint_description="Please enter valid ECR repo ARN", - min_length=0, - max_length=2048, - ) - - -def create_kms_key_arn_parameter(scope): - return core.CfnParameter( - scope, - "KMS_KEY_ARN", - type="String", - description="The KMS ARN to encrypt the output of the batch transform job and instance volume (optional).", - allowed_pattern="(^arn:aws:kms:(us(-gov)?|ap|ca|cn|eu|sa)-(central|(north|south)?(east|west)?)-\d:\d{12}:key/.+|^$)", - constraint_description="Please enter kmsKey ARN", - min_length=0, - max_length=2048, - ) - - -def create_algorithm_image_uri_parameter(scope): - return core.CfnParameter( - scope, - "IMAGE_URI", - type="String", - description="The algorithm image uri (build-in or custom)", - ) - - -def create_model_name_parameter(scope): - return core.CfnParameter( - scope, "MODEL_NAME", type="String", description="An arbitrary name for the model.", min_length=1 - ) - - -def create_stack_name_parameter(scope): - return core.CfnParameter( - scope, "STACK_NAME", type="String", description="The name to assign to the deployed CF stack.", min_length=1 - ) - - -def create_endpoint_name_parameter(scope): - return core.CfnParameter( - scope, "ENDPOINT_NAME", type="String", description="The name of the ednpoint to monitor", min_length=1 - ) - - -def create_model_artifact_location_parameter(scope): - return core.CfnParameter( - scope, - "MODEL_ARTIFACT_LOCATION", - type="String", - description="Path to model artifact inside assets bucket.", - ) - - -def create_inference_instance_parameter(scope): - return core.CfnParameter( - scope, - "INFERENCE_INSTANCE", - type="String", - description="Inference instance that inference requests will be running on. E.g., ml.m5.large", - allowed_pattern="^[a-zA-Z0-9_.+-]+\.[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", - min_length=7, - ) - - -def create_batch_inference_data_parameter(scope): - return core.CfnParameter( - scope, - "BATCH_INFERENCE_DATA", - type="String", - description="S3 bukcet path (including bucket name) to batch inference data file.", - ) - - -def create_batch_job_output_location_parameter(scope): - return core.CfnParameter( - scope, - "BATCH_OUTPUT_LOCATION", - type="String", - description="S3 path (including bucket name) to store the results of the batch job.", - ) - - -def create_data_capture_location_parameter(scope): - return core.CfnParameter( - scope, - "DATA_CAPTURE_LOCATION", - type="String", - description="S3 path (including bucket name) to store captured data from the Sagemaker endpoint.", - min_length=3, - ) - - -def create_baseline_job_output_location_parameter(scope): - return core.CfnParameter( - scope, - "BASELINE_JOB_OUTPUT_LOCATION", - type="String", - description="S3 path (including bucket name) to store the Data Baseline Job's output.", - min_length=3, - ) - - -def create_monitoring_output_location_parameter(scope): - return core.CfnParameter( - scope, - "MONITORING_OUTPUT_LOCATION", - type="String", - description="S3 path (including bucket name) to store the output of the Monitoring Schedule.", - min_length=3, - ) - - -def create_schedule_expression_parameter(scope): - return core.CfnParameter( - scope, - "SCHEDULE_EXPRESSION", - type="String", - description="cron expression to run the monitoring schedule. E.g., cron(0 * ? * * *), cron(0 0 ? * * *), etc.", - allowed_pattern="^cron(\\S+\\s){5}\\S+$", - ) - - -def create_training_data_parameter(scope): - return core.CfnParameter( - scope, - "TRAINING_DATA", - type="String", - description="Location of the training data in Assets S3 Bucket.", - ) - - -def create_instance_type_parameter(scope): - return core.CfnParameter( - scope, - "INSTANCE_TYPE", - type="String", - description="EC2 instance type that model moniroing jobs will be running on. E.g., ml.m5.large", - allowed_pattern="^[a-zA-Z0-9_.+-]+\.[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", - min_length=7, - ) - - -def create_instance_volume_size_parameter(scope): - return core.CfnParameter( - scope, - "INSTANCE_VOLUME_SIZE", - type="Number", - description="Instance volume size used in model moniroing jobs. E.g., 20", - ) - - -def create_monitoring_type_parameter(scope): - return core.CfnParameter( - scope, - "MONITORING_TYPE", - type="String", - allowed_values=["dataquality", "modelquality", "modelbias", "modelexplainability"], - default="dataquality", - description="Type of model monitoring. Possible values: DataQuality | ModelQuality | ModelBias | ModelExplainability ", - ) - - -def create_max_runtime_seconds_parameter(scope): - return core.CfnParameter( - scope, - "MAX_RUNTIME_SECONDS", - type="Number", - description="Max runtime in secodns the job is allowed to run. E.g., 3600", - ) - - -def create_baseline_job_name_parameter(scope): - return core.CfnParameter( - scope, - "BASELINE_JOB_NAME", - type="String", - description="Unique name of the data baseline job", - min_length=3, - max_length=63, - ) - - -def create_monitoring_schedule_name_parameter(scope): - return core.CfnParameter( - scope, - "MONITORING_SCHEDULE_NAME", - type="String", - description="Unique name of the monitoring schedule job", - min_length=3, - max_length=63, - ) - - -def create_template_zip_name_parameter(scope): - return core.CfnParameter( - scope, - "TEMPLATE_ZIP_NAME", - type="String", - allowed_pattern="^.*\.zip$", - description="The zip file's name containing the CloudFormation template and its parameters files", - ) - - -def create_template_file_name_parameter(scope): - return core.CfnParameter( - scope, - "TEMPLATE_FILE_NAME", - type="String", - allowed_pattern="^.*\.yaml$", - description="CloudFormation template's file name", - ) - - -def create_stage_params_file_name_parameter(scope, id, stage_type): - return core.CfnParameter( - scope, - id, - type="String", - allowed_pattern="^.*\.json$", - description=f"parameters json file's name for the {stage_type} stage", - ) - - -def create_custom_container_parameter(scope): - return core.CfnParameter( - scope, - "CUSTOM_CONTAINER", - default="", - type="String", - description=( - "Should point to a zip file containing dockerfile and assets for building a custom model. " - "If empty it will beusing containers from SageMaker Registry" - ), - ) - - -def create_ecr_repo_name_parameter(scope): - return core.CfnParameter( - scope, - "ECR_REPO_NAME", - type="String", - description="Name of the Amazon ECR repository. This repo will be useed to store custom algorithms images.", - allowed_pattern="(?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*", - min_length=1, - ) - - -def create_image_tag_parameter(scope): - return core.CfnParameter( - scope, "IMAGE_TAG", type="String", description="Docker image tag for the custom algorithm", min_length=1 - ) - - -def create_custom_algorithms_ecr_repo_arn_provided_condition(scope, custom_algorithms_ecr_repo_arn): - return core.CfnCondition( - scope, - "CustomECRRepoProvided", - expression=core.Fn.condition_not(core.Fn.condition_equals(custom_algorithms_ecr_repo_arn, "")), - ) - - -def create_kms_key_arn_provided_condition(scope, kms_key_arn): - return core.CfnCondition( - scope, - "KMSKeyProvided", - expression=core.Fn.condition_not(core.Fn.condition_equals(kms_key_arn, "")), - ) - - -def create_git_address_provided_condition(scope, git_address): - return core.CfnCondition( - scope, - "GitAddressProvided", - expression=core.Fn.condition_not(core.Fn.condition_equals(git_address, "")), - ) - - -def create_existing_bucket_provided_condition(scope, existing_bucket): - return core.CfnCondition( - scope, - "S3BucketProvided", - expression=core.Fn.condition_not(core.Fn.condition_equals(existing_bucket.value_as_string, "")), - ) - - -def create_existing_ecr_provided_condition(scope, existing_ecr_repo): - return core.CfnCondition( - scope, - "ECRProvided", - expression=core.Fn.condition_not(core.Fn.condition_equals(existing_ecr_repo.value_as_string, "")), - ) - - -def create_new_bucket_condition(scope, existing_bucket): - return core.CfnCondition( - scope, - "CreateS3Bucket", - expression=core.Fn.condition_equals(existing_bucket.value_as_string, ""), - ) - - -def create_new_ecr_repo_condition(scope, existing_ecr_repo): - return core.CfnCondition( - scope, - "CreateECRRepo", - expression=core.Fn.condition_equals(existing_ecr_repo.value_as_string, ""), - ) - - -def create_delegated_admin_parameter(scope): - return core.CfnParameter( - scope, - "DELEGATED_ADMIN_ACCOUNT", - type="String", - allowed_values=["Yes", "No"], - default="Yes", - description="Is a delegated administrator account used to deploy accross account", - ) - - -def create_delegated_admin_condition(scope, delegated_admin_parameter): - return core.CfnCondition( - scope, - "UseDelegatedAdmin", - expression=core.Fn.condition_equals(delegated_admin_parameter.value_as_string, "Yes"), - ) - - -def create_use_model_registry_parameter(scope): - return core.CfnParameter( - scope, - "USE_MODEL_REGISTRY", - type="String", - allowed_values=["Yes", "No"], - default="No", - description="Will Amazon SageMaker's Model Registry be used to provision models?", - ) - - -def create_model_registry_parameter(scope): - return core.CfnParameter( - scope, - "CREATE_MODEL_REGISTRY", - type="String", - allowed_values=["Yes", "No"], - default="No", - description="Do you want the solution to create the SageMaker Model Package Group Name (i.e., Model Registry)", - ) - - -def create_model_registry_condition(scope, create_model_registry): - return core.CfnCondition( - scope, - "CreateModelRegistryCondition", - expression=core.Fn.condition_equals(create_model_registry.value_as_string, "Yes"), - ) - - -def create_model_package_group_name_parameter(scope): - return core.CfnParameter( - scope, "MODEL_PACKAGE_GROUP_NAME", type="String", description="SageMaker model package group name", min_length=0 - ) - - -def create_model_package_name_parameter(scope): - return core.CfnParameter( - scope, - "MODEL_PACKAGE_NAME", - allowed_pattern="(^arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:model-package/.*|^$)", - type="String", - description="The model name (version arn) in SageMaker's model package name group", - ) - - -def create_model_registry_provided_condition(scope, model_package_name): - return core.CfnCondition( - scope, - "ModelRegistryProvided", - expression=core.Fn.condition_not(core.Fn.condition_equals(model_package_name, "")), - ) \ No newline at end of file +class ParameteresFactory: + @staticmethod + def create_notification_email_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "NotificationEmail", + type="String", + description="email for pipeline outcome notifications", + allowed_pattern="^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", + constraint_description="Please enter an email address with correct format (example@exmaple.com)", + min_length=5, + max_length=320, + ) + + @staticmethod + def create_git_address_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "CodeCommitRepoAddress", + type="String", + description="AWS CodeCommit repository clone URL to connect to the framework.", + allowed_pattern=( + "^(((https:\/\/|ssh:\/\/)(git\-codecommit)\.[a-zA-Z0-9_.+-]+(amazonaws\.com\/)[a-zA-Z0-9-.]" + "+(\/)[a-zA-Z0-9-.]+(\/)[a-zA-Z0-9-.]+$)|^$)" + ), + min_length=0, + max_length=320, + constraint_description=( + "CodeCommit address must follow the pattern: ssh or " + "https://git-codecommit.REGION.amazonaws.com/version/repos/REPONAME" + ), + ) + + @staticmethod + def create_existing_bucket_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ExistingS3Bucket", + type="String", + description="Name of existing S3 bucket to be used for ML assets. S3 Bucket must be in the same region as the deployed stack, and has versioning enabled. If not provided, a new S3 bucket will be created.", + allowed_pattern="((?=^.{3,63}$)(?!^(\d+\.)+\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])\.)*([a-z0-9]|[a-z0-9][a-z0-9\-]*[a-z0-9])$)|^$)", + min_length=0, + max_length=63, + ) + + @staticmethod + def create_existing_ecr_repo_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ExistingECRRepo", + type="String", + description="Name of existing Amazon ECR repository for custom algorithms. If not provided, a new ECR repo will be created.", + allowed_pattern="((?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*|^$)", + min_length=0, + max_length=63, + ) + + @staticmethod + def create_account_id_parameter(scope: core.Construct, id: str, account_type: str) -> core.CfnParameter: + return core.CfnParameter( + scope, + id, + type="String", + description=f"AWS {account_type} account number where the CF template will be deployed", + allowed_pattern="^\d{12}$", + ) + + @staticmethod + def create_org_id_parameter(scope: core.Construct, id: str, account_type: str) -> core.CfnParameter: + return core.CfnParameter( + scope, + id, + type="String", + description=f"AWS {account_type} organizational unit id where the CF template will be deployed", + allowed_pattern="^ou-[0-9a-z]{4,32}-[a-z0-9]{8,32}$", + ) + + @staticmethod + def create_blueprint_bucket_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BlueprintBucket", + type="String", + description="Bucket name for blueprints of different types of ML Pipelines.", + min_length=3, + ) + + @staticmethod + def create_data_capture_bucket_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "DataCaptureBucket", + type="String", + description="Bucket name where the data captured from SageMaker endpoint will be stored.", + min_length=3, + ) + + @staticmethod + def create_baseline_output_bucket_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BaselineOutputBucket", + type="String", + description="Bucket name where the output of the baseline job will be stored.", + min_length=3, + ) + + @staticmethod + def create_batch_input_bucket_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BatchInputBucket", + type="String", + description="Bucket name where the data input of the bact transform is stored.", + min_length=3, + ) + + @staticmethod + def create_assets_bucket_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "AssetsBucket", + type="String", + description="Bucket name where the model and baselines data are stored.", + min_length=3, + ) + + @staticmethod + def create_custom_algorithms_ecr_repo_arn_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "CustomAlgorithmsECRRepoArn", + type="String", + description="The arn of the Amazon ECR repository where custom algorithm image is stored (optional)", + allowed_pattern="(^arn:aws:ecr:(us(-gov)?|ap|ca|cn|eu|sa)-(central|(north|south)?(east|west)?)-\\d:\\d{12}:repository/.+|^$)", + constraint_description="Please enter valid ECR repo ARN", + min_length=0, + max_length=2048, + ) + + @staticmethod + def create_kms_key_arn_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "KmsKeyArn", + type="String", + description="The KMS ARN to encrypt the output of the batch transform job and instance volume (optional).", + allowed_pattern="(^arn:aws:kms:(us(-gov)?|ap|ca|cn|eu|sa)-(central|(north|south)?(east|west)?)-\d:\d{12}:key/.+|^$)", + constraint_description="Please enter kmsKey ARN", + min_length=0, + max_length=2048, + ) + + @staticmethod + def create_algorithm_image_uri_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ImageUri", + type="String", + description="The algorithm image uri (build-in or custom)", + ) + + @staticmethod + def create_model_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, "ModelName", type="String", description="An arbitrary name for the model.", min_length=1 + ) + + @staticmethod + def create_stack_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, "StackName", type="String", description="The name to assign to the deployed CF stack.", min_length=1 + ) + + @staticmethod + def create_endpoint_name_parameter(scope: core.Construct, optional=False) -> core.CfnParameter: + return core.CfnParameter( + scope, + "EndpointName", + type="String", + description="The name of the AWS SageMaker's endpoint", + min_length=0 if optional else 1, + ) + + @staticmethod + def create_model_artifact_location_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ModelArtifactLocation", + type="String", + description="Path to model artifact inside assets bucket.", + ) + + @staticmethod + def create_inference_instance_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "InferenceInstance", + type="String", + description="Inference instance that inference requests will be running on. E.g., ml.m5.large", + allowed_pattern="^[a-zA-Z0-9_.+-]+\.[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", + min_length=7, + ) + + @staticmethod + def create_batch_inference_data_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BatchInferenceData", + type="String", + description="S3 bucket path (including bucket name) to batch inference data file.", + ) + + @staticmethod + def create_batch_job_output_location_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BatchOutputLocation", + type="String", + description="S3 path (including bucket name) to store the results of the batch job.", + ) + + @staticmethod + def create_data_capture_location_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "DataCaptureLocation", + type="String", + description="S3 path (including bucket name) to store captured data from the Sagemaker endpoint.", + min_length=3, + ) + + @staticmethod + def create_baseline_job_output_location_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BaselineJobOutputLocation", + type="String", + description="S3 path (including bucket name) to store the Data Baseline Job's output.", + min_length=3, + ) + + @staticmethod + def create_monitoring_output_location_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "MonitoringOutputLocation", + type="String", + description="S3 path (including bucket name) to store the output of the Monitoring Schedule.", + min_length=3, + ) + + @staticmethod + def create_schedule_expression_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ScheduleExpression", + type="String", + description="cron expression to run the monitoring schedule. E.g., cron(0 * ? * * *), cron(0 0 ? * * *), etc.", + allowed_pattern="^cron(\\S+\\s){5}\\S+$", + ) + + @staticmethod + def create_baseline_data_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BaselineData", + type="String", + description="Location of the Baseline data in Assets S3 Bucket.", + ) + + @staticmethod + def create_instance_type_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "InstanceType", + type="String", + description="EC2 instance type that model monitoring jobs will be running on. E.g., ml.m5.large", + allowed_pattern="^[a-zA-Z0-9_.+-]+\.[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", + min_length=7, + ) + + @staticmethod + def create_instance_volume_size_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "InstanceVolumeSize", + type="Number", + description="Instance volume size used in model monitoring jobs. E.g., 20", + ) + + @staticmethod + def create_baseline_max_runtime_seconds_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BaselineMaxRuntimeSeconds", + type="String", + default="", + description="Optional Maximum runtime in seconds the baseline job is allowed to run. E.g., 3600", + ) + + @staticmethod + def create_monitor_max_runtime_seconds_parameter(scope: core.Construct, monitoring_type: str) -> core.CfnParameter: + max_default = "1800" if monitoring_type in ["ModelQuality", "ModelBias"] else "3600" + return core.CfnParameter( + scope, + "MonitorMaxRuntimeSeconds", + type="Number", + default=max_default, + description=( + f" Required Maximum runtime in seconds the job is allowed to run the {monitoring_type} baseline job. " + + "For data quality and model explainability, this can be up to 3600 seconds for an hourly schedule. " + + "For model bias and model quality hourly schedules, this can be up to 1800 seconds." + ), + min_value=1, + max_value=86400, + ) + + @staticmethod + def create_baseline_job_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BaselineJobName", + type="String", + description="Unique name of the data baseline job", + min_length=3, + max_length=63, + ) + + @staticmethod + def create_monitoring_schedule_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "MonitoringScheduleName", + type="String", + description="Unique name of the monitoring schedule job", + min_length=3, + max_length=63, + ) + + @staticmethod + def create_template_zip_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "TemplateZipFileName", + type="String", + allowed_pattern="^.*\.zip$", + description="The zip file's name containing the CloudFormation template and its parameters files", + ) + + @staticmethod + def create_template_file_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "TemplateFileName", + type="String", + allowed_pattern="^.*\.yaml$", + description="CloudFormation template's file name", + ) + + @staticmethod + def create_stage_params_file_name_parameter(scope: core.Construct, id: str, stage_type: str) -> core.CfnParameter: + return core.CfnParameter( + scope, + id, + type="String", + allowed_pattern="^.*\.json$", + description=f"parameters json file's name for the {stage_type} stage", + ) + + @staticmethod + def create_custom_container_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "CustomImage", + default="", + type="String", + description=( + "Should point to a zip file containing dockerfile and assets for building a custom model. " + "If empty it will be using containers from SageMaker Registry" + ), + ) + + @staticmethod + def create_ecr_repo_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ECRRepoName", + type="String", + description="Name of the Amazon ECR repository. This repo will be used to store custom algorithms images.", + allowed_pattern="(?:[a-z0-9]+(?:[._-][a-z0-9]+)*/)*[a-z0-9]+(?:[._-][a-z0-9]+)*", + min_length=1, + ) + + @staticmethod + def create_image_tag_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, "ImageTag", type="String", description="Docker image tag for the custom algorithm", min_length=1 + ) + + @staticmethod + def create_delegated_admin_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "DelegatedAdminAccount", + type="String", + allowed_values=["Yes", "No"], + default="Yes", + description="Is a delegated administrator account used to deploy accross account", + ) + + @staticmethod + def create_use_model_registry_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "UseModelRegistry", + type="String", + allowed_values=["Yes", "No"], + default="No", + description="Will Amazon SageMaker's Model Registry be used to provision models?", + ) + + @staticmethod + def create_model_registry_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "CreateModelRegistry", + type="String", + allowed_values=["Yes", "No"], + default="No", + description="Do you want the solution to create the SageMaker Model Package Group Name (i.e., Model Registry)", + ) + + @staticmethod + def create_model_package_group_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ModelPackageGroupName", + type="String", + description="SageMaker model package group name", + min_length=0, + ) + + @staticmethod + def create_model_package_name_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ModelPackageName", + allowed_pattern="(^arn:aws[a-z\-]*:sagemaker:[a-z0-9\-]*:[0-9]{12}:model-package/.*|^$)", + type="String", + description="The model name (version arn) in SageMaker's model package name group", + ) + + @staticmethod + def create_instance_count_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "MonitoringJobInstanceCount", + type="Number", + default="1", + description="Instance count used by model monitoring job. For example, 1", + ) + + @staticmethod + def create_ground_truth_s3_uri_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "MonitorGroundTruthInput", + type="String", + description="Amazon S3 prefix that contains the ground truth data", + min_length=3, + ) + + @staticmethod + def create_problem_type_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ProblemType", + type="String", + allowed_values=["Regression", "BinaryClassification", "MulticlassClassification"], + description="Problem type. Possible values: Regression | BinaryClassification | MulticlassClassification", + ) + + @staticmethod + def create_inference_attribute_parameter(scope: core.Construct, job_type: str) -> core.CfnParameter: + return core.CfnParameter( + scope, + f"{job_type}InferenceAttribute", + type="String", + description="Index or JSONpath to locate predicted label(s)", + ) + + @staticmethod + def create_probability_attribute_parameter(scope: core.Construct, job_type: str) -> core.CfnParameter: + return core.CfnParameter( + scope, + f"{job_type}ProbabilityAttribute", + type="String", + description="Index or JSONpath to locate probabilities.", + ) + + @staticmethod + def create_ground_truth_attribute_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "BaselineGroundTruthAttribute", + type="String", + description="Index or JSONpath to locate ground truth label.", + ) + + @staticmethod + def create_probability_threshold_attribute_parameter(scope: core.Construct) -> core.CfnParameter: + return core.CfnParameter( + scope, + "ProbabilityThresholdAttribute", + default="0.5", + type="Number", + description="Threshold to convert probabilities to binaries", + ) + + +class ConditionsFactory: + @staticmethod + def create_custom_algorithms_ecr_repo_arn_provided_condition( + scope: core.Construct, custom_algorithms_ecr_repo_arn: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "CustomECRRepoProvided", + expression=core.Fn.condition_not( + core.Fn.condition_equals(custom_algorithms_ecr_repo_arn.value_as_string, "") + ), + ) + + @staticmethod + def create_kms_key_arn_provided_condition( + scope: core.Construct, kms_key_arn: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "KmsKeyProvided", + expression=core.Fn.condition_not(core.Fn.condition_equals(kms_key_arn.value_as_string, "")), + ) + + @staticmethod + def create_git_address_provided_condition( + scope: core.Construct, git_address: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "GitAddressProvided", + expression=core.Fn.condition_not(core.Fn.condition_equals(git_address.value_as_string, "")), + ) + + @staticmethod + def create_existing_bucket_provided_condition( + scope: core.Construct, existing_bucket: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "S3BucketProvided", + expression=core.Fn.condition_not(core.Fn.condition_equals(existing_bucket.value_as_string, "")), + ) + + @staticmethod + def create_existing_ecr_provided_condition( + scope: core.Construct, existing_ecr_repo: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "ECRProvided", + expression=core.Fn.condition_not(core.Fn.condition_equals(existing_ecr_repo.value_as_string, "")), + ) + + @staticmethod + def create_new_bucket_condition(scope: core.Construct, existing_bucket: core.CfnParameter) -> core.CfnCondition: + return core.CfnCondition( + scope, + "CreateS3Bucket", + expression=core.Fn.condition_equals(existing_bucket.value_as_string, ""), + ) + + @staticmethod + def create_new_ecr_repo_condition(scope: core.Construct, existing_ecr_repo: core.CfnParameter) -> core.CfnCondition: + return core.CfnCondition( + scope, + "CreateECRRepo", + expression=core.Fn.condition_equals(existing_ecr_repo.value_as_string, ""), + ) + + @staticmethod + def create_delegated_admin_condition( + scope: core.Construct, delegated_admin_parameter: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "UseDelegatedAdmin", + expression=core.Fn.condition_equals(delegated_admin_parameter.value_as_string, "Yes"), + ) + + @staticmethod + def create_model_registry_condition( + scope: core.Construct, create_model_registry: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "CreateModelRegistryCondition", + expression=core.Fn.condition_equals(create_model_registry.value_as_string, "Yes"), + ) + + @staticmethod + def create_model_registry_provided_condition( + scope: core.Construct, model_package_name: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "ModelRegistryProvided", + expression=core.Fn.condition_not(core.Fn.condition_equals(model_package_name.value_as_string, "")), + ) + + @staticmethod + def create_endpoint_name_provided_condition( + scope: core.Construct, endpoint_name: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "EndpointNameProvided", + expression=core.Fn.condition_not(core.Fn.condition_equals(endpoint_name.value_as_string, "")), + ) + + @staticmethod + def create_problem_type_regression_or_multiclass_classification_condition( + scope: core.Construct, problem_type: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "ProblemTypeRegressionOrMulticlassClassification", + expression=core.Fn.condition_or( + core.Fn.condition_equals(problem_type.value_as_string, "Regression"), + core.Fn.condition_equals(problem_type.value_as_string, "MulticlassClassification"), + ), + ) + + @staticmethod + def create_problem_type_binary_classification_condition( + scope: core.Construct, problem_type: core.CfnParameter + ) -> core.CfnCondition: + return core.CfnCondition( + scope, + "ProblemTypeBinaryClassification", + expression=core.Fn.condition_equals(problem_type.value_as_string, "BinaryClassification"), + ) diff --git a/source/lib/blueprints/byom/realtime_inference_pipeline.py b/source/lib/blueprints/byom/realtime_inference_pipeline.py index f490846..fd0b5df 100644 --- a/source/lib/blueprints/byom/realtime_inference_pipeline.py +++ b/source/lib/blueprints/byom/realtime_inference_pipeline.py @@ -1,5 +1,5 @@ # ##################################################################################################################### -# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # # # Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance # # with the License. A copy of the License is located at # @@ -25,20 +25,8 @@ from lib.blueprints.byom.pipeline_definitions.sagemaker_endpoint import create_sagemaker_endpoint from lib.blueprints.byom.pipeline_definitions.helpers import suppress_lambda_policies from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_blueprint_bucket_name_parameter, - create_assets_bucket_name_parameter, - create_algorithm_image_uri_parameter, - create_custom_algorithms_ecr_repo_arn_parameter, - create_inference_instance_parameter, - create_kms_key_arn_parameter, - create_model_artifact_location_parameter, - create_model_name_parameter, - create_data_capture_location_parameter, - create_custom_algorithms_ecr_repo_arn_provided_condition, - create_kms_key_arn_provided_condition, - create_model_package_name_parameter, - create_model_registry_provided_condition, - create_model_package_group_name_parameter, + ParameteresFactory as pf, + ConditionsFactory as cf, ) @@ -47,28 +35,33 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Parameteres # - assets_bucket_name = create_assets_bucket_name_parameter(self) - blueprint_bucket_name = create_blueprint_bucket_name_parameter(self) - custom_algorithms_ecr_repo_arn = create_custom_algorithms_ecr_repo_arn_parameter(self) - kms_key_arn = create_kms_key_arn_parameter(self) - algorithm_image_uri = create_algorithm_image_uri_parameter(self) - model_name = create_model_name_parameter(self) - model_artifact_location = create_model_artifact_location_parameter(self) - data_capture_location = create_data_capture_location_parameter(self) - inference_instance = create_inference_instance_parameter(self) - model_package_group_name = create_model_package_group_name_parameter(self) - model_package_name = create_model_package_name_parameter(self) + assets_bucket_name = pf.create_assets_bucket_name_parameter(self) + blueprint_bucket_name = pf.create_blueprint_bucket_name_parameter(self) + custom_algorithms_ecr_repo_arn = pf.create_custom_algorithms_ecr_repo_arn_parameter(self) + kms_key_arn = pf.create_kms_key_arn_parameter(self) + algorithm_image_uri = pf.create_algorithm_image_uri_parameter(self) + model_name = pf.create_model_name_parameter(self) + model_artifact_location = pf.create_model_artifact_location_parameter(self) + data_capture_location = pf.create_data_capture_location_parameter(self) + inference_instance = pf.create_inference_instance_parameter(self) + model_package_group_name = pf.create_model_package_group_name_parameter(self) + model_package_name = pf.create_model_package_name_parameter(self) + # add the optional endpoint_name + endpoint_name = pf.create_endpoint_name_parameter(self, optional=True) # Conditions - custom_algorithms_ecr_repo_arn_provided = create_custom_algorithms_ecr_repo_arn_provided_condition( + custom_algorithms_ecr_repo_arn_provided = cf.create_custom_algorithms_ecr_repo_arn_provided_condition( self, custom_algorithms_ecr_repo_arn ) - kms_key_arn_provided = create_kms_key_arn_provided_condition(self, kms_key_arn) - model_registry_provided = create_model_registry_provided_condition(self, model_package_name) + kms_key_arn_provided = cf.create_kms_key_arn_provided_condition(self, kms_key_arn) + model_registry_provided = cf.create_model_registry_provided_condition(self, model_package_name) + endpoint_name_provided = cf.create_endpoint_name_provided_condition(self, endpoint_name) # Resources # # getting blueprint bucket object from its name - will be used later in the stack - blueprint_bucket = s3.Bucket.from_bucket_name(self, "BlueprintBucket", blueprint_bucket_name.value_as_string) + blueprint_bucket = s3.Bucket.from_bucket_name( + self, "ImportedBlueprintBucket", blueprint_bucket_name.value_as_string + ) # provision api gateway and lambda for inference using solution constructs inference_api_gateway = aws_apigateway_lambda.ApiGatewayToLambda( @@ -87,7 +80,7 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: "proxy": False, }, ) - # add supressions + # add suppressions inference_api_gateway.lambda_function.node.default_child.cfn_options.metadata = suppress_lambda_policies() provision_resource = inference_api_gateway.api_gateway.root.add_resource("inference") provision_resource.add_method("POST") @@ -106,6 +99,9 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: ecr_repo_arn_provided_condition=custom_algorithms_ecr_repo_arn_provided, kms_key_arn_provided_condition=kms_key_arn_provided, model_registry_provided_condition=model_registry_provided, + is_realtime_pipeline=True, + endpoint_name=endpoint_name, + endpoint_name_provided=endpoint_name_provided, ) # create sagemaker model @@ -121,15 +117,15 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: model_name=model_name.value_as_string, ) - # Create Sagemaker EndpointConfg + # Create Sagemaker EndpointConfig sagemaker_endpoint_config = create_sagemaker_endpoint_config( - self, - "MLOpsSagemakerEndpointConfig", - sagemaker_model.attr_model_name, - model_name.value_as_string, - inference_instance.value_as_string, - data_capture_location.value_as_string, - core.Fn.condition_if( + scope=self, + id="MLOpsSagemakerEndpointConfig", + sagemaker_model_name=sagemaker_model.attr_model_name, + model_name=model_name.value_as_string, + inference_instance=inference_instance.value_as_string, + data_capture_location=data_capture_location.value_as_string, + kms_key_arn=core.Fn.condition_if( kms_key_arn_provided.logical_id, kms_key_arn.value_as_string, core.Aws.NO_VALUE ).to_string(), ) @@ -139,10 +135,15 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: # create Sagemaker endpoint sagemaker_endpoint = create_sagemaker_endpoint( - self, - "MLOpsSagemakerEndpoint", - sagemaker_endpoint_config.attr_endpoint_config_name, - model_name.value_as_string, + scope=self, + id="MLOpsSagemakerEndpoint", + endpoint_config_name=sagemaker_endpoint_config.attr_endpoint_config_name, + endpoint_name=core.Fn.condition_if( + endpoint_name_provided.logical_id, + endpoint_name.value_as_string, + core.Aws.NO_VALUE, + ).to_string(), + model_name=model_name.value_as_string, ) # add dependency on endpoint config diff --git a/source/lib/blueprints/byom/single_account_codepipeline.py b/source/lib/blueprints/byom/single_account_codepipeline.py index 4d3dc01..b16c5de 100644 --- a/source/lib/blueprints/byom/single_account_codepipeline.py +++ b/source/lib/blueprints/byom/single_account_codepipeline.py @@ -29,14 +29,7 @@ suppress_sns, suppress_cloudformation_action, ) -from lib.blueprints.byom.pipeline_definitions.templates_parameters import ( - create_notification_email_parameter, - create_template_zip_name_parameter, - create_template_file_name_parameter, - create_stage_params_file_name_parameter, - create_assets_bucket_name_parameter, - create_stack_name_parameter, -) +from lib.blueprints.byom.pipeline_definitions.templates_parameters import ParameteresFactory as pf class SingleAccountCodePipelineStack(core.Stack): @@ -44,15 +37,15 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Parameteres # - notification_email = create_notification_email_parameter(self) - template_zip_name = create_template_zip_name_parameter(self) - template_file_name = create_template_file_name_parameter(self) - template_params_file_name = create_stage_params_file_name_parameter(self, "TEMPLATE_PARAMS_NAME", "main") - assets_bucket_name = create_assets_bucket_name_parameter(self) - stack_name = create_stack_name_parameter(self) + notification_email = pf.create_notification_email_parameter(self) + template_zip_name = pf.create_template_zip_name_parameter(self) + template_file_name = pf.create_template_file_name_parameter(self) + template_params_file_name = pf.create_stage_params_file_name_parameter(self, "TemplateParamsName", "main") + assets_bucket_name = pf.create_assets_bucket_name_parameter(self) + stack_name = pf.create_stack_name_parameter(self) # Resources # - assets_bucket = s3.Bucket.from_bucket_name(self, "AssetsBucket", assets_bucket_name.value_as_string) + assets_bucket = s3.Bucket.from_bucket_name(self, "ImportedAssetsBucket", assets_bucket_name.value_as_string) # create sns topic and subscription pipeline_notification_topic = sns.Topic( @@ -119,7 +112,7 @@ def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: ) ) - # add cfn supressions + # add cfn suppressions pipeline_child_nodes = single_account_pipeline.node.find_all() pipeline_child_nodes[1].node.default_child.cfn_options.metadata = suppress_pipeline_bucket() pipeline_child_nodes[6].node.default_child.cfn_options.metadata = suppress_iam_complex() diff --git a/source/requirements.txt b/source/requirements.txt index 09c336b..7fea932 100644 --- a/source/requirements.txt +++ b/source/requirements.txt @@ -1,34 +1,34 @@ -aws-cdk.assets==1.96.0 -aws-cdk.aws-apigateway==1.96.0 -aws-cdk.aws-cloudformation==1.96.0 -aws-cdk.aws-cloudwatch==1.96.0 -aws-cdk.aws-codebuild==1.96.0 -aws-cdk.aws-codecommit==1.96.0 -aws-cdk.aws-codedeploy==1.96.0 -aws-cdk.aws-codepipeline==1.96.0 -aws-cdk.aws-codepipeline-actions==1.96.0 -aws-cdk.core==1.96.0 -aws-cdk.aws-ecr==1.96.0 -aws-cdk.aws-ecr-assets==1.96.0 -aws-cdk.aws-events==1.96.0 -aws-cdk.aws-events-targets==1.96.0 -aws-cdk.aws-iam==1.96.0 -aws-cdk.aws-kms==1.96.0 -aws-cdk.aws-lambda==1.96.0 -aws-cdk.aws-lambda-event-sources==1.96.0 -aws-cdk.aws-logs==1.96.0 -aws-cdk.aws-s3==1.96.0 -aws-cdk.aws-s3-assets==1.96.0 -aws-cdk.aws-s3-deployment==1.96.0 -aws-cdk.aws-s3-notifications==1.96.0 -aws-cdk.aws-sagemaker==1.96.0 -aws-cdk.aws-sns==1.96.0 -aws-cdk.aws-sns-subscriptions==1.96.0 -aws-cdk.core==1.96.0 -aws-cdk.custom-resources==1.96.0 -aws-cdk.region-info==1.96.0 -aws-solutions-constructs.aws-apigateway-lambda==1.96.0 -aws-solutions-constructs.aws-lambda-sagemakerendpoint==1.96.0 -aws-solutions-constructs.core==1.96.0 -aws-cdk.cloudformation-include==1.96.0 -aws-cdk.aws-cloudformation==1.96.0 +aws-cdk.assets==1.117.0 +aws-cdk.aws-apigateway==1.117.0 +aws-cdk.aws-cloudformation==1.117.0 +aws-cdk.aws-cloudwatch==1.117.0 +aws-cdk.aws-codebuild==1.117.0 +aws-cdk.aws-codecommit==1.117.0 +aws-cdk.aws-codedeploy==1.117.0 +aws-cdk.aws-codepipeline==1.117.0 +aws-cdk.aws-codepipeline-actions==1.117.0 +aws-cdk.core==1.117.0 +aws-cdk.aws-ecr==1.117.0 +aws-cdk.aws-ecr-assets==1.117.0 +aws-cdk.aws-events==1.117.0 +aws-cdk.aws-events-targets==1.117.0 +aws-cdk.aws-iam==1.117.0 +aws-cdk.aws-kms==1.117.0 +aws-cdk.aws-lambda==1.117.0 +aws-cdk.aws-lambda-event-sources==1.117.0 +aws-cdk.aws-logs==1.117.0 +aws-cdk.aws-s3==1.117.0 +aws-cdk.aws-s3-assets==1.117.0 +aws-cdk.aws-s3-deployment==1.117.0 +aws-cdk.aws-s3-notifications==1.117.0 +aws-cdk.aws-sagemaker==1.117.0 +aws-cdk.aws-sns==1.117.0 +aws-cdk.aws-sns-subscriptions==1.117.0 +aws-cdk.core==1.117.0 +aws-cdk.custom-resources==1.117.0 +aws-cdk.region-info==1.117.0 +aws-solutions-constructs.aws-apigateway-lambda==1.117.0 +aws-solutions-constructs.aws-lambda-sagemakerendpoint==1.117.0 +aws-solutions-constructs.core==1.117.0 +aws-cdk.cloudformation-include==1.117.0 +aws-cdk.aws-cloudformation==1.117.0