diff --git a/infrastructure/terraform/per_account/modules/vpc/lambda-connectivity-vpc.tf b/infrastructure/terraform/per_account/modules/vpc/lambda-connectivity-vpc.tf index f62f3652b..acd080111 100644 --- a/infrastructure/terraform/per_account/modules/vpc/lambda-connectivity-vpc.tf +++ b/infrastructure/terraform/per_account/modules/vpc/lambda-connectivity-vpc.tf @@ -137,3 +137,50 @@ resource "aws_vpc_endpoint_security_group_association" "ldap_endpoint" { vpc_endpoint_id = aws_vpc_endpoint.ldap_endpoint.id security_group_id = aws_security_group.sds-ldap.id } + +resource "aws_vpc_endpoint" "state_machine" { + vpc_id = aws_vpc.lambda-connectivity.id + vpc_endpoint_type = "Interface" + service_name = "com.amazonaws.eu-west-2.states" + private_dns_enabled = true + + tags = { + Name = "${var.prefix}-state-machine-vpc-endpoint-${var.environment}" + Environment = var.environment + } +} + +resource "aws_vpc_endpoint_subnet_association" "state_machine" { + count = var.subnet_count.private + vpc_endpoint_id = aws_vpc_endpoint.state_machine.id + subnet_id = element(aws_subnet.lambda-connectivity-private, count.index).id +} + +resource "aws_vpc_endpoint_security_group_association" "state_machine" { + vpc_endpoint_id = aws_vpc_endpoint.state_machine.id + security_group_id = aws_security_group.sds-ldap.id +} + + +resource "aws_vpc_endpoint" "notify_lambda" { + vpc_id = aws_vpc.lambda-connectivity.id + vpc_endpoint_type = "Interface" + service_name = "com.amazonaws.eu-west-2.lambda" + private_dns_enabled = true + + tags = { + Name = "${var.prefix}-notify-lambda-vpc-endpoint-${var.environment}" + Environment = var.environment + } +} + +resource "aws_vpc_endpoint_subnet_association" "notify_lambda" { + count = var.subnet_count.private + vpc_endpoint_id = aws_vpc_endpoint.notify_lambda.id + subnet_id = element(aws_subnet.lambda-connectivity-private, count.index).id +} + +resource "aws_vpc_endpoint_security_group_association" "notify_lambda" { + vpc_endpoint_id = aws_vpc_endpoint.notify_lambda.id + security_group_id = aws_security_group.sds-ldap.id +} diff --git a/infrastructure/terraform/per_workspace/modules/etl/sds/trigger/main.tf b/infrastructure/terraform/per_workspace/modules/etl/sds/trigger/main.tf index e1862558f..e6345a275 100644 --- a/infrastructure/terraform/per_workspace/modules/etl/sds/trigger/main.tf +++ b/infrastructure/terraform/per_workspace/modules/etl/sds/trigger/main.tf @@ -9,7 +9,7 @@ module "lambda_function" { description = "${replace(var.workspace_prefix, "_", "-")} ${var.etl_name} (${var.trigger_name}) trigger lambda function" handler = "etl.sds.trigger.${var.trigger_name}.${var.trigger_name}.handler" runtime = var.python_version - timeout = 120 + timeout = 10 timeouts = { create = "5m" diff --git a/scripts/etl/clear_state_inputs.py b/scripts/etl/clear_state_inputs.py index cf0766f55..ed3daa5ef 100644 --- a/scripts/etl/clear_state_inputs.py +++ b/scripts/etl/clear_state_inputs.py @@ -5,7 +5,7 @@ """ from collections import deque - +import os import boto3 from etl_utils.constants import CHANGELOG_NUMBER, WorkerKey from etl_utils.io import pkl_dumps_lz4 @@ -34,6 +34,11 @@ def main(): ) s3_client.delete_object(Bucket=etl_bucket, Key=CHANGELOG_NUMBER) + if os.environ.get("SET_CHANGELOG_NUMBER"): + s3_client.put_object( + Bucket=etl_bucket, Key=CHANGELOG_NUMBER, Body=EXPECTED_CHANGELOG_NUMBER + ) + if __name__ == "__main__": main() diff --git a/scripts/etl/etl.mk b/scripts/etl/etl.mk index 225d45339..7a8f42d6f 100644 --- a/scripts/etl/etl.mk +++ b/scripts/etl/etl.mk @@ -1,2 +1,4 @@ +SET_CHANGELOG_NUMBER := + etl--clear-state: aws--login ## Clear the ETL state - AWS_DEFAULT_REGION=$(AWS_DEFAULT_REGION) AWS_ACCESS_KEY_ID=$(AWS_ACCESS_KEY_ID) AWS_SECRET_ACCESS_KEY=$(AWS_SECRET_ACCESS_KEY) AWS_SESSION_TOKEN=$(AWS_SESSION_TOKEN) poetry run python scripts/etl/clear_state_inputs.py + AWS_DEFAULT_REGION=$(AWS_DEFAULT_REGION) AWS_ACCESS_KEY_ID=$(AWS_ACCESS_KEY_ID) AWS_SECRET_ACCESS_KEY=$(AWS_SECRET_ACCESS_KEY) AWS_SESSION_TOKEN=$(AWS_SESSION_TOKEN) SET_CHANGELOG_NUMBER=$(SET_CHANGELOG_NUMBER) poetry run python scripts/etl/clear_state_inputs.py diff --git a/src/etl/sds/trigger/update/operations.py b/src/etl/sds/trigger/update/operations.py index efa5ce67e..75177a1a2 100644 --- a/src/etl/sds/trigger/update/operations.py +++ b/src/etl/sds/trigger/update/operations.py @@ -6,6 +6,7 @@ from etl_utils.ldif.model import DistinguishedName from etl_utils.trigger.operations import object_exists from sds.domain.changelog import ChangelogRecord +from nhs_context_logging import log_action if TYPE_CHECKING: from mypy_boto3_s3 import S3Client @@ -19,17 +20,6 @@ class BadChangeLogNumber(Exception): pass -def prepare_ldap_client( - ldap_client: LdapClientProtocol, ldap: LdapModuleProtocol, cert_file, key_file -): - ldap_client.set_option(ldap.OPT_X_TLS_CERTFILE, str(cert_file)) - ldap_client.set_option(ldap.OPT_X_TLS_KEYFILE, str(key_file)) - ldap_client.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) - ldap_client.set_option(ldap.OPT_X_TLS_NEWCTX, 0) - ldap_client.simple_bind_s() - return ldap_client - - def get_certs_from_s3_truststore( s3_client: "S3Client", truststore_bucket: str, cert_file: Path, key_file: Path ): @@ -37,6 +27,18 @@ def get_certs_from_s3_truststore( s3_client.download_file(truststore_bucket, key_path.name, key_path) +def prepare_ldap_client( + ldap: LdapModuleProtocol, ldap_host: str, cert_file: str, key_file: str +) -> LdapClientProtocol: + ldap_client = ldap.initialize(ldap_host) + ldap_client.set_option(ldap.OPT_X_TLS_CERTFILE, cert_file) + ldap_client.set_option(ldap.OPT_X_TLS_KEYFILE, key_file) + ldap_client.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) + ldap_client.set_option(ldap.OPT_X_TLS_NEWCTX, 0) + ldap_client.simple_bind_s() + return ldap_client + + def get_current_changelog_number_from_s3(s3_client: "S3Client", bucket: str) -> int: if not object_exists(s3_client=s3_client, bucket=bucket, key=CHANGELOG_NUMBER): raise NoExistingChangeLogNumber( @@ -49,6 +51,18 @@ def get_current_changelog_number_from_s3(s3_client: "S3Client", bucket: str) -> return int(changelog_number) +@log_action(log_args=["base", "scope", "filterstr", "attrlist"], log_result=True) +def _ldap_search( + ldap_client: LdapClientProtocol, + base: str, + scope: str, + filterstr: str, + attrlist: list[str] = None, +): + ldap_client.search(base=base, scope=scope, filterstr=filterstr, attrlist=attrlist) + return ldap_client.result() + + def get_latest_changelog_number_from_ldap( ldap_client: LdapClientProtocol, ldap: LdapModuleProtocol ) -> int: @@ -57,17 +71,15 @@ def get_latest_changelog_number_from_ldap( but currently doesn't. Speak with Arvind to understand why not. In the meanwhile, we return the value int(2) as a placeholder. """ - ldap_client.search( + _, (record,) = _ldap_search( + ldap_client=ldap_client, base="cn=Changelog,o=nhs", scope=ldap.SCOPE_BASE, filterstr="(objectClass=*)", attrlist=["firstchangenumber", "lastchangenumber"], ) - _, (record,) = ldap_client.result() - # return record["lastchangenumber"] <-- think this is what we need to return, but currently empty - print(record) # noqa - return 2 + return 0 def get_changelog_entries_from_ldap( @@ -81,12 +93,12 @@ def get_changelog_entries_from_ldap( for changelog_number in range( current_changelog_number + 1, latest_changelog_number + 1 ): - ldap_client.search( + _, (record,) = _ldap_search( + ldap_client=ldap_client, base="cn=Changelog,o=nhs", scope=ldap.SCOPE_ONELEVEL, filterstr=f"(changenumber={changelog_number})", ) - _, (record,) = ldap_client.result() changelog_records.append(record) return changelog_records diff --git a/src/etl/sds/trigger/update/steps.py b/src/etl/sds/trigger/update/steps.py index 42c6ccd93..d5e7c3c10 100644 --- a/src/etl/sds/trigger/update/steps.py +++ b/src/etl/sds/trigger/update/steps.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, TypedDict from etl_utils.constants import LDIF_RECORD_DELIMITER -from etl_utils.ldap_typing import LdapClientProtocol, LdapModuleProtocol +from etl_utils.ldap_typing import LdapModuleProtocol from etl_utils.trigger.model import StateMachineInput from etl_utils.trigger.operations import start_execution, validate_state_keys_are_empty @@ -26,8 +26,8 @@ class Cache(TypedDict): step_functions_client: "SFNClient" state_machine_arn: str truststore_bucket: str - ldap_client: LdapClientProtocol ldap: LdapModuleProtocol + ldap_host: str cert_file: Path key_file: Path etl_bucket: str @@ -38,15 +38,6 @@ class CorruptChangelogNumber(Exception): pass -def _prepare_ldap_client(data, cache: Cache): - return prepare_ldap_client( - ldap_client=cache["ldap_client"], - ldap=cache["ldap"], - cert_file=cache["cert_file"], - key_file=cache["key_file"], - ) - - def _get_certs_from_s3_truststore(data, cache: Cache): return get_certs_from_s3_truststore( s3_client=cache["s3_client"], @@ -56,6 +47,15 @@ def _get_certs_from_s3_truststore(data, cache: Cache): ) +def _prepare_ldap_client(data, cache: Cache): + return prepare_ldap_client( + ldap=cache["ldap"], + ldap_host=cache["ldap_host"], + cert_file=str(cache["cert_file"]), + key_file=str(cache["key_file"]), + ) + + def _get_current_changelog_number_from_s3(data, cache: Cache): return get_current_changelog_number_from_s3( s3_client=cache["s3_client"], bucket=cache["etl_bucket"] @@ -142,8 +142,8 @@ def _put_to_history(data, cache: Cache): steps = [ - _prepare_ldap_client, _get_certs_from_s3_truststore, + _prepare_ldap_client, _get_current_changelog_number_from_s3, _get_latest_changelog_number_from_ldap, _create_state_machine_input, diff --git a/src/etl/sds/trigger/update/update.py b/src/etl/sds/trigger/update/update.py index 0cc245f14..e5bc8010d 100644 --- a/src/etl/sds/trigger/update/update.py +++ b/src/etl/sds/trigger/update/update.py @@ -32,6 +32,7 @@ class ChangelogTriggerEnvironment(BaseEnvironment): "cert_file": Path(f"/tmp/{ENVIRONMENT.CPM_FQDN}.crt"), "key_file": Path(f"/tmp/{ENVIRONMENT.CPM_FQDN}.key"), "etl_bucket": ENVIRONMENT.ETL_BUCKET, + "ldap_host": ENVIRONMENT.LDAP_HOST, } @@ -40,7 +41,6 @@ def handler(event={}, context=None): import ldap CACHE["ldap"] = ldap - CACHE["ldap_client"] = ldap.initialize(ENVIRONMENT.LDAP_HOST) step_chain = StepChain(step_chain=steps, step_decorators=[log_action]) step_chain.run(cache=CACHE) diff --git a/src/layers/etl_utils/ldap_typing/__init__.py b/src/layers/etl_utils/ldap_typing/__init__.py index 2deccd30b..07c70c501 100644 --- a/src/layers/etl_utils/ldap_typing/__init__.py +++ b/src/layers/etl_utils/ldap_typing/__init__.py @@ -20,3 +20,5 @@ class LdapModuleProtocol(Protocol): SCOPE_BASE: str SCOPE_ONELEVEL: str + + def initialize(self, host: str) -> LdapClientProtocol: ...