From 672b5fe4918fef562d7816d8e9833269c801156f Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 3 Apr 2023 14:18:50 -0700 Subject: [PATCH] Remove duplicate EC2 cleanup jobs (#1151) Fast follow of https://github.com/lyft/cartography/pull/1146 - removes duplicate calls to cleanup jobs. All these manual jobs will soon be deprecated as we refactor modules to the new data model. --- .../aws_import_ec2_key_pairs_cleanup.json | 20 ------------------- ...import_ec2_security_groupinfo_cleanup.json | 5 ----- .../cleanup/aws_import_volumes_cleanup.json | 20 ------------------- ...aws_ingest_network_interfaces_cleanup.json | 15 -------------- cartography/intel/aws/ec2/instances.py | 5 ----- cartography/intel/aws/ec2/key_pairs.py | 5 +++-- .../intel/aws/ec2/network_interfaces.py | 3 +++ cartography/intel/aws/ec2/security_groups.py | 3 +++ cartography/intel/aws/ec2/subnets.py | 3 +++ cartography/intel/aws/ec2/volumes.py | 9 +++------ 10 files changed, 15 insertions(+), 73 deletions(-) delete mode 100644 cartography/data/jobs/cleanup/aws_import_ec2_key_pairs_cleanup.json delete mode 100644 cartography/data/jobs/cleanup/aws_import_volumes_cleanup.json diff --git a/cartography/data/jobs/cleanup/aws_import_ec2_key_pairs_cleanup.json b/cartography/data/jobs/cleanup/aws_import_ec2_key_pairs_cleanup.json deleted file mode 100644 index f7065396b..000000000 --- a/cartography/data/jobs/cleanup/aws_import_ec2_key_pairs_cleanup.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "statements": [ - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(n:EC2KeyPair) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)<-[r:SSH_LOGIN_TO]-(:EC2KeyPair) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[r:RESOURCE]->(:EC2KeyPair) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - } - ], - "name": "cleanup EC2KeyPair" -} diff --git a/cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json b/cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json index 89f1addee..dd9344afb 100644 --- a/cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +++ b/cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json @@ -1,10 +1,5 @@ { "statements": [ - { - "query": "MATCH (n:EC2SecurityGroup)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, { "query": "MATCH (n:IpRule)-[:MEMBER_OF_EC2_SECURITY_GROUP]->(:EC2SecurityGroup)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", "iterative": true, diff --git a/cartography/data/jobs/cleanup/aws_import_volumes_cleanup.json b/cartography/data/jobs/cleanup/aws_import_volumes_cleanup.json deleted file mode 100644 index d3c59e718..000000000 --- a/cartography/data/jobs/cleanup/aws_import_volumes_cleanup.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "statements": [ - { - "query": "MATCH (n:EBSVolume)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:EBSVolume)<-[r:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:EBSVolume)-[r:ATTACHED_TO_EC2_INSTANCE]->(:EC2Instance)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - } - ], - "name": "cleanup EC2 Volumes" -} diff --git a/cartography/data/jobs/cleanup/aws_ingest_network_interfaces_cleanup.json b/cartography/data/jobs/cleanup/aws_ingest_network_interfaces_cleanup.json index cc4a411aa..9eed8cab1 100644 --- a/cartography/data/jobs/cleanup/aws_ingest_network_interfaces_cleanup.json +++ b/cartography/data/jobs/cleanup/aws_ingest_network_interfaces_cleanup.json @@ -20,25 +20,10 @@ "iterationsize": 100, "iterative": true }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[r:PART_OF_SUBNET]->(:EC2Subnet) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterationsize": 100, - "iterative": true - }, { "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:LoadBalancer)-[r:PART_OF_SUBNET]->(:EC2Subnet) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", "iterationsize": 100, "iterative": true - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(n:NetworkInterface) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:AWSAccount{id: $AWS_ID})-[r:RESOURCE]->(:NetworkInterface) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 } ], "name": "cleanup NetworkInterface" diff --git a/cartography/intel/aws/ec2/instances.py b/cartography/intel/aws/ec2/instances.py index 869585677..11bac4a38 100644 --- a/cartography/intel/aws/ec2/instances.py +++ b/cartography/intel/aws/ec2/instances.py @@ -308,11 +308,6 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) logger.debug("Running EC2 instance cleanup") GraphJob.from_node_schema(EC2ReservationSchema(), common_job_parameters).run(neo4j_session) GraphJob.from_node_schema(EC2InstanceSchema(), common_job_parameters).run(neo4j_session) - GraphJob.from_node_schema(EC2SubnetSchema(), common_job_parameters).run(neo4j_session) - GraphJob.from_node_schema(EC2SecurityGroupSchema(), common_job_parameters).run(neo4j_session) - GraphJob.from_node_schema(EC2KeyPairSchema(), common_job_parameters).run(neo4j_session) - GraphJob.from_node_schema(EC2NetworkInterfaceSchema(), common_job_parameters).run(neo4j_session) - GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session) @timeit diff --git a/cartography/intel/aws/ec2/key_pairs.py b/cartography/intel/aws/ec2/key_pairs.py index ede4f46af..df6dd4621 100644 --- a/cartography/intel/aws/ec2/key_pairs.py +++ b/cartography/intel/aws/ec2/key_pairs.py @@ -6,8 +6,9 @@ import neo4j from .util import get_botocore_config +from cartography.graph.job import GraphJob +from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema from cartography.util import aws_handle_regions -from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) @@ -55,7 +56,7 @@ def load_ec2_key_pairs( @timeit def cleanup_ec2_key_pairs(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: - run_cleanup_job('aws_import_ec2_key_pairs_cleanup.json', neo4j_session, common_job_parameters) + GraphJob.from_node_schema(EC2KeyPairSchema(), common_job_parameters).run(neo4j_session) @timeit diff --git a/cartography/intel/aws/ec2/network_interfaces.py b/cartography/intel/aws/ec2/network_interfaces.py index bb68a615f..612d93586 100644 --- a/cartography/intel/aws/ec2/network_interfaces.py +++ b/cartography/intel/aws/ec2/network_interfaces.py @@ -7,6 +7,8 @@ import neo4j from .util import get_botocore_config +from cartography.graph.job import GraphJob +from cartography.models.aws.ec2.networkinterfaces import EC2NetworkInterfaceSchema from cartography.util import aws_handle_regions from cartography.util import run_cleanup_job from cartography.util import timeit @@ -265,6 +267,7 @@ def load(neo4j_session: neo4j.Session, data: List[Dict], region: str, aws_accoun @timeit def cleanup_network_interfaces(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: run_cleanup_job('aws_ingest_network_interfaces_cleanup.json', neo4j_session, common_job_parameters) + GraphJob.from_node_schema(EC2NetworkInterfaceSchema(), common_job_parameters).run(neo4j_session) @timeit diff --git a/cartography/intel/aws/ec2/security_groups.py b/cartography/intel/aws/ec2/security_groups.py index 8b102a9eb..9819f6490 100644 --- a/cartography/intel/aws/ec2/security_groups.py +++ b/cartography/intel/aws/ec2/security_groups.py @@ -7,6 +7,8 @@ import neo4j from .util import get_botocore_config +from cartography.graph.job import GraphJob +from cartography.models.aws.ec2.securitygroups import EC2SecurityGroupSchema from cartography.util import aws_handle_regions from cartography.util import run_cleanup_job from cartography.util import timeit @@ -146,6 +148,7 @@ def cleanup_ec2_security_groupinfo(neo4j_session: neo4j.Session, common_job_para neo4j_session, common_job_parameters, ) + GraphJob.from_node_schema(EC2SecurityGroupSchema(), common_job_parameters).run(neo4j_session) @timeit diff --git a/cartography/intel/aws/ec2/subnets.py b/cartography/intel/aws/ec2/subnets.py index 52a93b898..a46b39c14 100644 --- a/cartography/intel/aws/ec2/subnets.py +++ b/cartography/intel/aws/ec2/subnets.py @@ -6,6 +6,8 @@ import neo4j from .util import get_botocore_config +from cartography.graph.job import GraphJob +from cartography.models.aws.ec2.subnets import EC2SubnetSchema from cartography.util import aws_handle_regions from cartography.util import run_cleanup_job from cartography.util import timeit @@ -76,6 +78,7 @@ def load_subnets( @timeit def cleanup_subnets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: run_cleanup_job('aws_ingest_subnets_cleanup.json', neo4j_session, common_job_parameters) + GraphJob.from_node_schema(EC2SubnetSchema(), common_job_parameters).run(neo4j_session) @timeit diff --git a/cartography/intel/aws/ec2/volumes.py b/cartography/intel/aws/ec2/volumes.py index 7fb5dcf73..6de03c7d4 100644 --- a/cartography/intel/aws/ec2/volumes.py +++ b/cartography/intel/aws/ec2/volumes.py @@ -6,8 +6,9 @@ import boto3 import neo4j +from cartography.graph.job import GraphJob +from cartography.models.aws.ec2.volumes import EBSVolumeSchema from cartography.util import aws_handle_regions -from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) @@ -97,11 +98,7 @@ def load_volume_relationships( @timeit def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: - run_cleanup_job( - 'aws_import_volumes_cleanup.json', - neo4j_session, - common_job_parameters, - ) + GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session) @timeit