From 36abe8257070bd77e58333ae9e125d21f523d237 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Tue, 24 Oct 2023 12:35:45 -0600 Subject: [PATCH 1/7] Adding black formatter pre-commit --- .pre-commit-config.yaml | 6 + buildstockbatch/__version__.py | 20 +- buildstockbatch/aws/aws.py | 1435 +++++++++-------- buildstockbatch/aws/awsbase.py | 205 +-- buildstockbatch/aws/s3_assets/bsb_post.py | 58 +- .../aws/s3_assets/lambda_function.py | 6 +- .../aws/s3_assets/setup_postprocessing.py | 22 +- buildstockbatch/base.py | 637 +++++--- buildstockbatch/eagle.py | 709 ++++---- buildstockbatch/exc.py | 1 - buildstockbatch/local.py | 304 ++-- buildstockbatch/postprocessing.py | 588 ++++--- buildstockbatch/sampler/__init__.py | 5 +- buildstockbatch/sampler/base.py | 18 +- buildstockbatch/sampler/commercial_sobol.py | 126 +- buildstockbatch/sampler/downselect.py | 48 +- buildstockbatch/sampler/precomputed.py | 11 +- buildstockbatch/sampler/residential_quota.py | 89 +- buildstockbatch/sampler/sobol_lib.py | 474 ++++-- buildstockbatch/test/conftest.py | 114 +- buildstockbatch/test/shared_testing_stuff.py | 8 +- buildstockbatch/test/test_base.py | 318 ++-- buildstockbatch/test/test_docker.py | 25 +- buildstockbatch/test/test_eagle.py | 493 +++--- buildstockbatch/test/test_local.py | 77 +- buildstockbatch/test/test_postprocessing.py | 104 +- buildstockbatch/test/test_utils.py | 29 +- buildstockbatch/test/test_validation.py | 360 +++-- buildstockbatch/utils.py | 52 +- .../workflow_generator/__init__.py | 2 +- buildstockbatch/workflow_generator/base.py | 15 +- .../workflow_generator/commercial.py | 150 +- .../workflow_generator/residential_hpxml.py | 572 +++---- .../test_workflow_generator.py | 465 +++--- docs/conf.py | 103 +- setup.py | 116 +- 36 files changed, 4525 insertions(+), 3240 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..489940ae --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.10.1 + hooks: + - id: black + language_version: python3.11 \ No newline at end of file diff --git a/buildstockbatch/__version__.py b/buildstockbatch/__version__.py index cfab5a93..b5750e31 100644 --- a/buildstockbatch/__version__.py +++ b/buildstockbatch/__version__.py @@ -1,12 +1,14 @@ import datetime as dt -__title__ = 'buildstockbatch' -__description__ = 'Executing BuildStock projects on batch infrastructure.' -__url__ = 'http://github.com/NREL/buildstockbatch' -__version__ = '2023.10.0' -__schema_version__ = '0.3' -__author__ = 'Noel Merket' -__author_email__ = 'noel.merket@nrel.gov' -__license__ = 'BSD-3' -__copyright__ = 'Copyright {} The Alliance for Sustainable Energy'.format(dt.date.today().year) +__title__ = "buildstockbatch" +__description__ = "Executing BuildStock projects on batch infrastructure." +__url__ = "http://github.com/NREL/buildstockbatch" +__version__ = "2023.10.0" +__schema_version__ = "0.3" +__author__ = "Noel Merket" +__author_email__ = "noel.merket@nrel.gov" +__license__ = "BSD-3" +__copyright__ = "Copyright {} The Alliance for Sustainable Energy".format( + dt.date.today().year +) diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py index c9064303..5b8c7b04 100644 --- a/buildstockbatch/aws/aws.py +++ b/buildstockbatch/aws/aws.py @@ -41,13 +41,18 @@ from buildstockbatch.base import ValidationError, BuildStockBatchBase from buildstockbatch.aws.awsbase import AwsJobBase from buildstockbatch import postprocessing -from buildstockbatch.utils import ContainerRuntime, log_error_details, get_project_configuration, read_csv +from buildstockbatch.utils import ( + ContainerRuntime, + log_error_details, + get_project_configuration, + read_csv, +) logger = logging.getLogger(__name__) def upload_file_to_s3(*args, **kwargs): - s3 = boto3.client('s3') + s3 = boto3.client("s3") s3.upload_file(*args, **kwargs) @@ -57,42 +62,36 @@ def upload_directory_to_s3(local_directory, bucket, prefix): def filename_generator(): for dirpath, dirnames, filenames in os.walk(local_dir_abs): for filename in filenames: - if filename.startswith('.'): + if filename.startswith("."): continue local_filepath = pathlib.Path(dirpath, filename) s3_key = pathlib.PurePosixPath( - prefix, - local_filepath.relative_to(local_dir_abs) + prefix, local_filepath.relative_to(local_dir_abs) ) yield local_filepath, s3_key - logger.debug('Uploading {} => {}/{}'.format(local_dir_abs, bucket, prefix)) + logger.debug("Uploading {} => {}/{}".format(local_dir_abs, bucket, prefix)) Parallel(n_jobs=-1, verbose=9)( delayed(upload_file_to_s3)(str(local_file), bucket, s3_key.as_posix()) - for local_file, s3_key - in filename_generator() + for local_file, s3_key in filename_generator() ) def compress_file(in_filename, out_filename): - with gzip.open(str(out_filename), 'wb') as f_out: - with open(str(in_filename), 'rb') as f_in: + with gzip.open(str(out_filename), "wb") as f_out: + with open(str(in_filename), "rb") as f_in: shutil.copyfileobj(f_in, f_out) def calc_hash_for_file(filename): - with open(filename, 'rb') as f: + with open(filename, "rb") as f: return hashlib.sha256(f.read()).hexdigest() def copy_s3_file(src_bucket, src_key, dest_bucket, dest_key): - s3 = boto3.client('s3') - s3.copy( - {'Bucket': src_bucket, 'Key': src_key}, - dest_bucket, - dest_key - ) + s3 = boto3.client("s3") + s3.copy({"Bucket": src_bucket, "Key": src_key}, dest_bucket, dest_key) class AwsBatchEnv(AwsJobBase): @@ -108,14 +107,14 @@ def __init__(self, job_name, aws_config, boto3_session): """ super().__init__(job_name, aws_config, boto3_session) - self.batch = self.session.client('batch') - self.ec2 = self.session.client('ec2') - self.ec2r = self.session.resource('ec2') - self.emr = self.session.client('emr') - self.step_functions = self.session.client('stepfunctions') - self.aws_lambda = self.session.client('lambda') - self.s3 = self.session.client('s3') - self.s3_res = self.session.resource('s3') + self.batch = self.session.client("batch") + self.ec2 = self.session.client("ec2") + self.ec2r = self.session.resource("ec2") + self.emr = self.session.client("emr") + self.step_functions = self.session.client("stepfunctions") + self.aws_lambda = self.session.client("lambda") + self.s3 = self.session.client("s3") + self.s3_res = self.session.resource("s3") self.task_role_arn = None self.job_definition_arn = None @@ -128,7 +127,6 @@ def __init__(self, job_name, aws_config, boto3_session): logger.propagate = False def __repr__(self): - return super().__repr__() def create_emr_lambda_roles(self): @@ -144,25 +142,21 @@ def create_emr_lambda_roles(self): { "Effect": "Allow", "Action": "logs:CreateLogGroup", - "Resource": f"arn:aws:logs:{self.region}:{self.account}:*" + "Resource": f"arn:aws:logs:{self.region}:{self.account}:*", }, { "Effect": "Allow", - "Action": [ - "logs:CreateLogStream", - "logs:PutLogEvents" - ], + "Action": ["logs:CreateLogStream", "logs:PutLogEvents"], "Resource": [ f"arn:aws:logs:{self.region}:{self.account}:log-group:/aws/lambda/launchemr:*" - ] + ], }, { "Effect": "Allow", "Action": "elasticmapreduce:RunJobFlow", - "Resource": "*" + "Resource": "*", }, { - "Effect": "Allow", "Action": "iam:PassRole", "Resource": [ @@ -170,69 +164,60 @@ def create_emr_lambda_roles(self): f"arn:aws:iam::{self.account}:role/EMR_EC2_DefaultRole", f"arn:aws:iam::{self.account}:role/EMR_AutoScaling_DefaultRole", self.emr_job_flow_role_arn, - self.emr_service_role_arn - ] + self.emr_service_role_arn, + ], }, { "Effect": "Allow", "Action": "s3:GetObject", - "Resource": [ - f"arn:aws:s3:::{self.s3_bucket}/*" - ] - } - ] + "Resource": [f"arn:aws:s3:::{self.s3_bucket}/*"], + }, + ], } self.lambda_emr_job_step_execution_role_arn = self.iam_helper.role_stitcher( self.lambda_emr_job_step_execution_role, - 'lambda', - f'Lambda execution role for {self.lambda_emr_job_step_function_name}', - policies_list=[json.dumps(lambda_policy, indent=4)] + "lambda", + f"Lambda execution role for {self.lambda_emr_job_step_function_name}", + policies_list=[json.dumps(lambda_policy, indent=4)], ) def create_vpc(self): cidrs_in_use = set() vpc_response = AWSRetry.backoff()(self.ec2.describe_vpcs)() - for vpc in vpc_response['Vpcs']: - cidrs_in_use.add(vpc['CidrBlock']) - for cidr_assoc in vpc['CidrBlockAssociationSet']: - cidrs_in_use.add(cidr_assoc['CidrBlock']) + for vpc in vpc_response["Vpcs"]: + cidrs_in_use.add(vpc["CidrBlock"]) + for cidr_assoc in vpc["CidrBlockAssociationSet"]: + cidrs_in_use.add(cidr_assoc["CidrBlock"]) need_to_find_cidr = True while need_to_find_cidr: - self.vpc_cidr = '172.{}.0.0/16'.format(random.randrange(100, 200)) + self.vpc_cidr = "172.{}.0.0/16".format(random.randrange(100, 200)) need_to_find_cidr = self.vpc_cidr in cidrs_in_use - self.pub_subnet_cidr = self.vpc_cidr.replace('/16', '/17') - self.priv_subnet_cidr_1 = self.vpc_cidr.replace('.0.0/16', '.128.0/18') - self.priv_subnet_cidr_2 = self.vpc_cidr.replace('.0.0/16', '.192.0/18') + self.pub_subnet_cidr = self.vpc_cidr.replace("/16", "/17") + self.priv_subnet_cidr_1 = self.vpc_cidr.replace(".0.0/16", ".128.0/18") + self.priv_subnet_cidr_2 = self.vpc_cidr.replace(".0.0/16", ".192.0/18") # Create the VPC response = self.ec2.create_vpc( CidrBlock=self.vpc_cidr, AmazonProvidedIpv6CidrBlock=False, - InstanceTenancy='default' + InstanceTenancy="default", ) - self.vpc_id = response['Vpc']['VpcId'] + self.vpc_id = response["Vpc"]["VpcId"] logger.info(f"VPC {self.vpc_id} created") while True: try: self.ec2.create_tags( - Resources=[ - self.vpc_id - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.vpc_id], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) break except Exception as e: - if 'InvalidVpcID.NotFound' in str(e): + if "InvalidVpcID.NotFound" in str(e): logger.info("Cannot tag VPC. VPC not yet created. Sleeping...") time.sleep(5) else: @@ -242,35 +227,26 @@ def create_vpc(self): sec_response = self.ec2.describe_security_groups( Filters=[ - { - 'Name': 'vpc-id', - 'Values': [ - self.vpc_id - ] - }, + {"Name": "vpc-id", "Values": [self.vpc_id]}, ] ) - self.batch_security_group = sec_response['SecurityGroups'][0]['GroupId'] + self.batch_security_group = sec_response["SecurityGroups"][0]["GroupId"] - logger.info(f'Security group {self.batch_security_group} created for vpc/job.') + logger.info(f"Security group {self.batch_security_group} created for vpc/job.") response = self.ec2.authorize_security_group_ingress( - GroupId=self.batch_security_group, IpPermissions=[ { - 'FromPort': 0, - 'IpProtocol': 'tcp', - 'IpRanges': [ - { - 'CidrIp': '0.0.0.0/0' - }, + "FromPort": 0, + "IpProtocol": "tcp", + "IpRanges": [ + {"CidrIp": "0.0.0.0/0"}, ], - - 'ToPort': 65535 + "ToPort": 65535, }, - ] + ], ) # Create the private subnets @@ -278,121 +254,81 @@ def create_vpc(self): priv_response_1 = self.ec2.create_subnet( CidrBlock=self.priv_subnet_cidr_1, AvailabilityZone=f"{self.region}a", - VpcId=self.vpc_id + VpcId=self.vpc_id, ) - self.priv_vpc_subnet_id_1 = priv_response_1['Subnet']['SubnetId'] + self.priv_vpc_subnet_id_1 = priv_response_1["Subnet"]["SubnetId"] logger.info("Private subnet created.") priv_response_2 = self.ec2.create_subnet( CidrBlock=self.priv_subnet_cidr_2, AvailabilityZone=f"{self.region}b", - VpcId=self.vpc_id + VpcId=self.vpc_id, ) - self.priv_vpc_subnet_id_2 = priv_response_2['Subnet']['SubnetId'] + self.priv_vpc_subnet_id_2 = priv_response_2["Subnet"]["SubnetId"] logger.info("Private subnet created.") self.ec2.create_tags( - Resources=[ - self.priv_vpc_subnet_id_1 - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.priv_vpc_subnet_id_1], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) self.ec2.create_tags( - Resources=[ - self.priv_vpc_subnet_id_2 - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.priv_vpc_subnet_id_2], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) ig_response = self.ec2.create_internet_gateway() - self.internet_gateway_id = ig_response['InternetGateway']['InternetGatewayId'] + self.internet_gateway_id = ig_response["InternetGateway"]["InternetGatewayId"] AWSRetry.backoff()(self.ec2.create_tags)( - Resources=[ - self.internet_gateway_id - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.internet_gateway_id], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) - logger.info(f'Internet gateway {self.internet_gateway_id} created.') + logger.info(f"Internet gateway {self.internet_gateway_id} created.") # Create the public subnet pub_response = self.ec2.create_subnet( - CidrBlock=self.pub_subnet_cidr, - VpcId=self.vpc_id + CidrBlock=self.pub_subnet_cidr, VpcId=self.vpc_id ) logger.info("EIP allocated.") - self.pub_vpc_subnet_id = pub_response['Subnet']['SubnetId'] + self.pub_vpc_subnet_id = pub_response["Subnet"]["SubnetId"] self.ec2.create_tags( - Resources=[ - self.pub_vpc_subnet_id - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.pub_vpc_subnet_id], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) # Create and elastic IP for the NAT Gateway try: + ip_response = self.ec2.allocate_address(Domain="vpc") - ip_response = self.ec2.allocate_address( - Domain='vpc' - ) - - self.nat_ip_allocation = ip_response['AllocationId'] + self.nat_ip_allocation = ip_response["AllocationId"] logger.info("EIP allocated.") self.ec2.create_tags( - Resources=[ - self.nat_ip_allocation - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.nat_ip_allocation], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) except Exception as e: - if 'AddressLimitExceeded' in str(e): + if "AddressLimitExceeded" in str(e): raise # Create an internet gateway self.ec2.attach_internet_gateway( - InternetGatewayId=self.internet_gateway_id, - VpcId=self.vpc_id + InternetGatewayId=self.internet_gateway_id, VpcId=self.vpc_id ) logger.info("Internet Gateway attached.") @@ -401,31 +337,26 @@ def create_vpc(self): drt_response = self.ec2.describe_route_tables( Filters=[ - { - 'Name': 'vpc-id', - 'Values': [ - self.vpc_id - ] - }, + {"Name": "vpc-id", "Values": [self.vpc_id]}, ] ) - self.pub_route_table_id = drt_response['RouteTables'][0]['RouteTableId'] + self.pub_route_table_id = drt_response["RouteTables"][0]["RouteTableId"] # Modify the default route table to be used as the public route while True: try: self.ec2.create_route( - DestinationCidrBlock='0.0.0.0/0', + DestinationCidrBlock="0.0.0.0/0", GatewayId=self.internet_gateway_id, - RouteTableId=self.pub_route_table_id + RouteTableId=self.pub_route_table_id, ) logger.info("Route created for Internet Gateway.") break except Exception as e: - if 'NotFound' in str(e): + if "NotFound" in str(e): time.sleep(5) logger.info("Internet Gateway not yet created. Sleeping...") else: @@ -434,47 +365,35 @@ def create_vpc(self): # Create a NAT Gateway nat_response = self.ec2.create_nat_gateway( - AllocationId=self.nat_ip_allocation, - SubnetId=self.pub_vpc_subnet_id + AllocationId=self.nat_ip_allocation, SubnetId=self.pub_vpc_subnet_id ) - self.nat_gateway_id = nat_response['NatGateway']['NatGatewayId'] + self.nat_gateway_id = nat_response["NatGateway"]["NatGatewayId"] logger.info("NAT Gateway created.") # Create a new private route table - prt_response = self.ec2.create_route_table( - VpcId=self.vpc_id - ) + prt_response = self.ec2.create_route_table(VpcId=self.vpc_id) - self.priv_route_table_id = prt_response['RouteTable']['RouteTableId'] + self.priv_route_table_id = prt_response["RouteTable"]["RouteTableId"] logger.info("Route table created.") AWSRetry.backoff()(self.ec2.create_tags)( - Resources=[ - self.priv_route_table_id - ], - Tags=[ - { - 'Key': 'Name', - 'Value': self.job_identifier - } - ] + Resources=[self.priv_route_table_id], + Tags=[{"Key": "Name", "Value": self.job_identifier}], ) # Associate the private route to the private subnet self.ec2.associate_route_table( - RouteTableId=self.priv_route_table_id, - SubnetId=self.priv_vpc_subnet_id_1 + RouteTableId=self.priv_route_table_id, SubnetId=self.priv_vpc_subnet_id_1 ) logger.info("Route table associated with subnet.") self.ec2.associate_route_table( - RouteTableId=self.priv_route_table_id, - SubnetId=self.priv_vpc_subnet_id_2 + RouteTableId=self.priv_route_table_id, SubnetId=self.priv_vpc_subnet_id_2 ) logger.info("Route table associated with subnet.") @@ -483,14 +402,14 @@ def create_vpc(self): while True: try: self.ec2.create_route( - DestinationCidrBlock='0.0.0.0/0', + DestinationCidrBlock="0.0.0.0/0", NatGatewayId=self.nat_gateway_id, - RouteTableId=self.priv_route_table_id + RouteTableId=self.priv_route_table_id, ) logger.info("Route created for subnet.") break except Exception as e: - if 'InvalidNatGatewayID.NotFound' in str(e): + if "InvalidNatGatewayID.NotFound" in str(e): time.sleep(5) logger.info("Nat Gateway not yet created. Sleeping...") else: @@ -516,7 +435,9 @@ def create_batch_service_roles(self): self.batch_service_role_name, "batch", f"Service role for Batch environment {self.job_identifier}", - managed_policie_arns=['arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole'] + managed_policie_arns=[ + "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole" + ], ) # Instance Role for Batch compute environment @@ -525,7 +446,9 @@ def create_batch_service_roles(self): self.batch_instance_role_name, "ec2", f"Instance role for Batch compute environment {self.job_identifier}", - managed_policie_arns=['arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role'] + managed_policie_arns=[ + "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" + ], ) # Instance Profile @@ -535,26 +458,26 @@ def create_batch_service_roles(self): InstanceProfileName=self.batch_instance_profile_name ) - self.instance_profile_arn = response['InstanceProfile']['Arn'] + self.instance_profile_arn = response["InstanceProfile"]["Arn"] logger.info("Instance Profile created") response = self.iam.add_role_to_instance_profile( InstanceProfileName=self.batch_instance_profile_name, - RoleName=self.batch_instance_role_name + RoleName=self.batch_instance_role_name, ) except Exception as e: - if 'EntityAlreadyExists' in str(e): - logger.info('ECS Instance Profile not created - already exists') + if "EntityAlreadyExists" in str(e): + logger.info("ECS Instance Profile not created - already exists") response = self.iam.get_instance_profile( InstanceProfileName=self.batch_instance_profile_name ) - self.instance_profile_arn = response['InstanceProfile']['Arn'] + self.instance_profile_arn = response["InstanceProfile"]["Arn"] # ECS Task Policy - task_permissions_policy = f'''{{ + task_permissions_policy = f"""{{ "Version": "2012-10-17", "Statement": [ {{ @@ -639,12 +562,14 @@ def create_batch_service_roles(self): "Resource": "*" }} ] - }}''' + }}""" - self.task_role_arn = self.iam_helper.role_stitcher(self.batch_ecs_task_role_name, - "ecs-tasks", - f"Task role for Batch job {self.job_identifier}", - policies_list=[task_permissions_policy]) + self.task_role_arn = self.iam_helper.role_stitcher( + self.batch_ecs_task_role_name, + "ecs-tasks", + f"Task role for Batch job {self.job_identifier}", + policies_list=[task_permissions_policy], + ) if self.batch_use_spot: # Spot Fleet Role @@ -652,7 +577,9 @@ def create_batch_service_roles(self): self.batch_spot_service_role_name, "spotfleet", f"Spot Fleet role for Batch compute environment {self.job_identifier}", - managed_policie_arns=['arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole'] + managed_policie_arns=[ + "arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole" + ], ) def create_compute_environment(self, maxCPUs=10000): @@ -665,41 +592,46 @@ def create_compute_environment(self, maxCPUs=10000): try: compute_resources = { - 'minvCpus': 0, - 'maxvCpus': maxCPUs, - 'desiredvCpus': 0, - 'instanceTypes': [ - 'optimal', + "minvCpus": 0, + "maxvCpus": maxCPUs, + "desiredvCpus": 0, + "instanceTypes": [ + "optimal", ], - 'imageId': self.batch_compute_environment_ami, - 'subnets': [self.priv_vpc_subnet_id_1, self.priv_vpc_subnet_id_2], - 'securityGroupIds': [self.batch_security_group], - 'instanceRole': self.instance_profile_arn + "imageId": self.batch_compute_environment_ami, + "subnets": [self.priv_vpc_subnet_id_1, self.priv_vpc_subnet_id_2], + "securityGroupIds": [self.batch_security_group], + "instanceRole": self.instance_profile_arn, } if self.batch_use_spot: - compute_resources.update({ - 'type': 'SPOT', - 'bidPercentage': 100, - 'spotIamFleetRole': self.spot_service_role_arn - }) + compute_resources.update( + { + "type": "SPOT", + "bidPercentage": 100, + "spotIamFleetRole": self.spot_service_role_arn, + } + ) else: - compute_resources['type'] = 'EC2' + compute_resources["type"] = "EC2" self.batch.create_compute_environment( computeEnvironmentName=self.batch_compute_environment_name, - type='MANAGED', - state='ENABLED', + type="MANAGED", + state="ENABLED", computeResources=compute_resources, - serviceRole=self.service_role_arn + serviceRole=self.service_role_arn, ) - logger.info(f'Compute environment {self.batch_compute_environment_name} created.') + logger.info( + f"Compute environment {self.batch_compute_environment_name} created." + ) except Exception as e: - if 'Object already exists' in str(e): + if "Object already exists" in str(e): logger.info( - f'Compute environment {self.batch_compute_environment_name} not created - already exists') + f"Compute environment {self.batch_compute_environment_name} not created - already exists" + ) else: raise @@ -712,37 +644,41 @@ def create_job_queue(self): try: response = self.batch.create_job_queue( jobQueueName=self.batch_job_queue_name, - state='ENABLED', + state="ENABLED", priority=1, computeEnvironmentOrder=[ { - 'order': 1, - 'computeEnvironment': self.batch_compute_environment_name + "order": 1, + "computeEnvironment": self.batch_compute_environment_name, }, - ] + ], ) # print("JOB QUEUE") # print(response) - self.job_queue_arn = response['jobQueueArn'] - logger.info(f'Job queue {self.batch_job_queue_name} created') + self.job_queue_arn = response["jobQueueArn"] + logger.info(f"Job queue {self.batch_job_queue_name} created") break except Exception as e: - if 'Object already exists' in str(e): - logger.info(f'Job queue {self.batch_job_queue_name} not created - already exists') + if "Object already exists" in str(e): + logger.info( + f"Job queue {self.batch_job_queue_name} not created - already exists" + ) response = self.batch.describe_job_queues( jobQueues=[ self.batch_job_queue_name, ] ) - self.job_queue_arn = response['jobQueues'][0]['jobQueueArn'] + self.job_queue_arn = response["jobQueues"][0]["jobQueueArn"] break - elif 'is not valid' in str(e): + elif "is not valid" in str(e): # Need to wait a second for the compute environment to complete registration logger.warning( - '5 second sleep initiated to wait for compute environment creation due to error: ' + str(e)) + "5 second sleep initiated to wait for compute environment creation due to error: " + + str(e) + ) time.sleep(5) else: @@ -759,24 +695,22 @@ def create_job_definition(self, docker_image, vcpus, memory, command, env_vars): """ response = self.batch.register_job_definition( jobDefinitionName=self.job_identifier, - type='container', + type="container", # parameters={ # 'string': 'string' # }, containerProperties={ - 'image': docker_image, - 'vcpus': vcpus, - 'memory': memory, - 'command': command, - 'jobRoleArn': self.task_role_arn, - 'environment': self.generate_name_value_inputs(env_vars) + "image": docker_image, + "vcpus": vcpus, + "memory": memory, + "command": command, + "jobRoleArn": self.task_role_arn, + "environment": self.generate_name_value_inputs(env_vars), }, - retryStrategy={ - 'attempts': 2 - } + retryStrategy={"attempts": 2}, ) - self.job_definition_arn = response['jobDefinitionArn'] + self.job_definition_arn = response["jobDefinitionArn"] def submit_job(self, array_size=4): """ @@ -788,27 +722,26 @@ def submit_job(self, array_size=4): self.batch.submit_job( jobName=self.job_identifier, jobQueue=self.batch_job_queue_name, - arrayProperties={ - 'size': array_size - }, - jobDefinition=self.job_definition_arn + arrayProperties={"size": array_size}, + jobDefinition=self.job_definition_arn, ) logger.info(f"Job {self.job_identifier} submitted.") break except Exception as e: - - if 'not in VALID state' in str(e): + if "not in VALID state" in str(e): # Need to wait a second for the compute environment to complete registration - logger.warning('5 second sleep initiated to wait for job queue creation due to error: ' + str(e)) + logger.warning( + "5 second sleep initiated to wait for job queue creation due to error: " + + str(e) + ) time.sleep(5) else: raise def create_state_machine_roles(self): - - lambda_policy = f'''{{ + lambda_policy = f"""{{ "Version": "2012-10-17", "Statement": [ {{ @@ -823,9 +756,9 @@ def create_state_machine_roles(self): ] }} - ''' + """ - batch_policy = '''{ + batch_policy = """{ "Version": "2012-10-17", "Statement": [ { @@ -851,9 +784,9 @@ def create_state_machine_roles(self): ] } - ''' + """ - sns_policy = f'''{{ + sns_policy = f"""{{ "Version": "2012-10-17", "Statement": [ {{ @@ -865,17 +798,19 @@ def create_state_machine_roles(self): }} ] }} - ''' + """ policies_list = [lambda_policy, batch_policy, sns_policy] - self.state_machine_role_arn = self.iam_helper.role_stitcher(self.state_machine_role_name, 'states', - 'Permissions for statemachine to run jobs', - policies_list=policies_list) + self.state_machine_role_arn = self.iam_helper.role_stitcher( + self.state_machine_role_name, + "states", + "Permissions for statemachine to run jobs", + policies_list=policies_list, + ) def create_state_machine(self): - - job_definition = f'''{{ + job_definition = f"""{{ "Comment": "An example of the Amazon States Language for notification on an AWS Batch job completion", "StartAt": "Submit Batch Job", "States": {{ @@ -951,19 +886,18 @@ def create_state_machine(self): }} }} - ''' + """ while True: - try: response = self.step_functions.create_state_machine( name=self.state_machine_name, definition=job_definition, - roleArn=self.state_machine_role_arn + roleArn=self.state_machine_role_arn, ) # print(response) - self.state_machine_arn = response['stateMachineArn'] + self.state_machine_arn = response["stateMachineArn"] logger.info(f"State machine {self.state_machine_name} created.") break except Exception as e: @@ -979,49 +913,47 @@ def create_state_machine(self): raise def start_state_machine_execution(self, array_size): - self.step_functions.start_execution( stateMachineArn=self.state_machine_arn, - name=f'{self.state_machine_name}_execution_{int(time.time())}', - input=f'{{"array_size": {array_size}}}' + name=f"{self.state_machine_name}_execution_{int(time.time())}", + input=f'{{"array_size": {array_size}}}', ) logger.info(f"Starting state machine {self.state_machine_name}.") def clean(self): - # Get our vpc: response = self.ec2.describe_vpcs( Filters=[ { - 'Name': 'tag:Name', - 'Values': [ + "Name": "tag:Name", + "Values": [ self.vpc_name, - ] + ], }, ] ) try: - self.vpc_id = response['Vpcs'][0]['VpcId'] + self.vpc_id = response["Vpcs"][0]["VpcId"] except (KeyError, IndexError): self.vpc_id = None logger.info("Cleaning up EMR.") try: - self.emr.terminate_job_flows( - JobFlowIds=[ - self.emr_cluster_name - ] - ) + self.emr.terminate_job_flows(JobFlowIds=[self.emr_cluster_name]) logger.info(f"EMR cluster {self.emr_cluster_name} deleted.") except Exception as e: - if 'ResourceNotFoundException' in str(e): - logger.info(f"EMR cluster {self.emr_cluster_name} already MIA - skipping...") + if "ResourceNotFoundException" in str(e): + logger.info( + f"EMR cluster {self.emr_cluster_name} already MIA - skipping..." + ) - self.iam_helper.remove_role_from_instance_profile(self.emr_instance_profile_name) + self.iam_helper.remove_role_from_instance_profile( + self.emr_instance_profile_name + ) self.iam_helper.delete_instance_profile(self.emr_instance_profile_name) self.iam_helper.delete_role(self.emr_job_flow_role_name) self.iam_helper.delete_role(self.emr_service_role_name) @@ -1030,57 +962,63 @@ def clean(self): f"EMR clean complete. Results bucket and data {self.s3_bucket} have not been deleted." ) - logger.info(f'Deleting Security group {self.emr_cluster_security_group_name}.') + logger.info(f"Deleting Security group {self.emr_cluster_security_group_name}.") default_sg_response = self.ec2.describe_security_groups( Filters=[ { - 'Name': 'group-name', - 'Values': [ - 'default', - ] + "Name": "group-name", + "Values": [ + "default", + ], }, ] ) logger.info("Removing egress from default security group.") - for group in default_sg_response['SecurityGroups']: - if group['VpcId'] == self.vpc_id: - default_group_id = group['GroupId'] + for group in default_sg_response["SecurityGroups"]: + if group["VpcId"] == self.vpc_id: + default_group_id = group["GroupId"] dsg = self.ec2r.SecurityGroup(default_group_id) if len(dsg.ip_permissions_egress): - response = dsg.revoke_egress(IpPermissions=dsg.ip_permissions_egress) + response = dsg.revoke_egress( + IpPermissions=dsg.ip_permissions_egress + ) sg_response = AWSRetry.backoff()(self.ec2.describe_security_groups)( Filters=[ { - 'Name': 'group-name', - 'Values': [ + "Name": "group-name", + "Values": [ self.emr_cluster_security_group_name, - ] + ], }, ] ) try: - group_id = sg_response['SecurityGroups'][0]['GroupId'] + group_id = sg_response["SecurityGroups"][0]["GroupId"] sg = self.ec2r.SecurityGroup(group_id) if len(sg.ip_permissions): sg.revoke_ingress(IpPermissions=sg.ip_permissions) while True: try: - self.ec2.delete_security_group( - GroupId=group_id - ) + self.ec2.delete_security_group(GroupId=group_id) break except ClientError: - logger.info("Waiting for security group ingress rules to be removed ...") + logger.info( + "Waiting for security group ingress rules to be removed ..." + ) time.sleep(5) - logger.info(f"Deleted security group {self.emr_cluster_security_group_name}.") + logger.info( + f"Deleted security group {self.emr_cluster_security_group_name}." + ) except Exception as e: - if 'does not exist' in str(e) or 'list index out of range' in str(e): - logger.info(f'Security group {self.emr_cluster_security_group_name} does not exist - skipping...') + if "does not exist" in str(e) or "list index out of range" in str(e): + logger.info( + f"Security group {self.emr_cluster_security_group_name} does not exist - skipping..." + ) else: raise @@ -1089,18 +1027,22 @@ def clean(self): FunctionName=self.lambda_emr_job_step_function_name ) except Exception as e: - if 'Function not found' in str(e): - logger.info(f"Function {self.lambda_emr_job_step_function_name} not found, skipping...") + if "Function not found" in str(e): + logger.info( + f"Function {self.lambda_emr_job_step_function_name} not found, skipping..." + ) else: raise try: - self.s3.delete_object(Bucket=self.s3_bucket, Key=self.s3_lambda_code_emr_cluster_key) + self.s3.delete_object( + Bucket=self.s3_bucket, Key=self.s3_lambda_code_emr_cluster_key + ) logger.info( f"S3 object {self.s3_lambda_code_emr_cluster_key} for bucket {self.s3_bucket} deleted." # noqa E501 ) except Exception as e: - if 'NoSuchBucket' in str(e): + if "NoSuchBucket" in str(e): logger.info( f"S3 object {self.s3_lambda_code_emr_cluster_key} for bucket {self.s3_bucket} missing - not deleted." # noqa E501 ) @@ -1111,9 +1053,9 @@ def clean(self): state_machines = self.step_functions.list_state_machines() - for sm in state_machines['stateMachines']: - if sm['name'] == self.state_machine_name: - self.state_machine_arn = sm['stateMachineArn'] + for sm in state_machines["stateMachines"]: + if sm["name"] == self.state_machine_name: + self.state_machine_arn = sm["stateMachineArn"] self.step_functions.delete_state_machine( stateMachineArn=self.state_machine_arn ) @@ -1123,10 +1065,8 @@ def clean(self): self.iam_helper.delete_role(self.state_machine_role_name) try: - self.batch.update_job_queue( - jobQueue=self.batch_job_queue_name, - state='DISABLED' + jobQueue=self.batch_job_queue_name, state="DISABLED" ) while True: @@ -1137,22 +1077,24 @@ def clean(self): logger.info(f"Job queue {self.batch_job_queue_name} deleted.") break except Exception as e: - if 'Cannot delete, resource is being modified' in str(e): - logger.info("Job queue being modified - sleeping until ready...") + if "Cannot delete, resource is being modified" in str(e): + logger.info( + "Job queue being modified - sleeping until ready..." + ) time.sleep(5) else: raise except Exception as e: - if 'does not exist' in str(e): - logger.info(f"Job queue {self.batch_job_queue_name} missing, skipping...") + if "does not exist" in str(e): + logger.info( + f"Job queue {self.batch_job_queue_name} missing, skipping..." + ) # Delete compute enviornment try: - self.batch.update_compute_environment( - computeEnvironment=self.batch_compute_environment_name, - state='DISABLED' + computeEnvironment=self.batch_compute_environment_name, state="DISABLED" ) while True: @@ -1160,17 +1102,25 @@ def clean(self): response = self.batch.delete_compute_environment( computeEnvironment=self.batch_compute_environment_name ) - logger.info(f"Compute environment {self.batch_compute_environment_name} deleted.") + logger.info( + f"Compute environment {self.batch_compute_environment_name} deleted." + ) break except Exception as e: - if 'Cannot delete, resource is being modified' in str(e) or 'found existing JobQueue' in str(e): - logger.info("Compute environment being modified - sleeping until ready...") + if "Cannot delete, resource is being modified" in str( + e + ) or "found existing JobQueue" in str(e): + logger.info( + "Compute environment being modified - sleeping until ready..." + ) time.sleep(5) else: raise except Exception as e: - if 'does not exist' in str(e): - logger.info(f"Compute environment {self.batch_compute_environment_name} missing, skipping...") + if "does not exist" in str(e): + logger.info( + f"Compute environment {self.batch_compute_environment_name} missing, skipping..." + ) else: raise @@ -1178,7 +1128,9 @@ def clean(self): self.iam_helper.delete_role(self.batch_spot_service_role_name) self.iam_helper.delete_role(self.batch_ecs_task_role_name) # Instance profile order of removal - self.iam_helper.remove_role_from_instance_profile(self.batch_instance_profile_name) + self.iam_helper.remove_role_from_instance_profile( + self.batch_instance_profile_name + ) self.iam_helper.delete_role(self.batch_instance_role_name) self.iam_helper.delete_instance_profile(self.batch_instance_profile_name) @@ -1187,55 +1139,37 @@ def clean(self): response = AWSRetry.backoff()(self.ec2.describe_vpcs)( Filters=[ { - 'Name': 'tag:Name', - 'Values': [ + "Name": "tag:Name", + "Values": [ self.job_identifier, - ] + ], }, ], - ) - for vpc in response['Vpcs']: - this_vpc = vpc['VpcId'] + for vpc in response["Vpcs"]: + this_vpc = vpc["VpcId"] ng_response = AWSRetry.backoff()(self.ec2.describe_nat_gateways)( - Filters=[ - { - 'Name': 'vpc-id', - 'Values': [ - this_vpc - ] - } - ] + Filters=[{"Name": "vpc-id", "Values": [this_vpc]}] ) - for natgw in ng_response['NatGateways']: - this_natgw = natgw['NatGatewayId'] + for natgw in ng_response["NatGateways"]: + this_natgw = natgw["NatGatewayId"] - if natgw['State'] != 'deleted': - self.ec2.delete_nat_gateway( - NatGatewayId=this_natgw - ) + if natgw["State"] != "deleted": + self.ec2.delete_nat_gateway(NatGatewayId=this_natgw) rtas_response = AWSRetry.backoff()(self.ec2.describe_route_tables)( - Filters=[ - { - 'Name': 'vpc-id', - 'Values': [ - this_vpc - ] - } - ] - + Filters=[{"Name": "vpc-id", "Values": [this_vpc]}] ) - for route_table in rtas_response['RouteTables']: - route_table_id = route_table['RouteTableId'] - for association in route_table['Associations']: - if not association['Main']: + for route_table in rtas_response["RouteTables"]: + route_table_id = route_table["RouteTableId"] + for association in route_table["Associations"]: + if not association["Main"]: response = self.ec2.disassociate_route_table( - AssociationId=association['RouteTableAssociationId'] + AssociationId=association["RouteTableAssociationId"] ) rt_counter = 10 while rt_counter: @@ -1247,153 +1181,151 @@ def clean(self): break except Exception as e: rt_counter = rt_counter - 1 - if 'DependencyViolation' in str(e): - logger.info("Waiting for association to be released before deleting route table. Sleeping...") # noqa E501 + if "DependencyViolation" in str(e): + logger.info( + "Waiting for association to be released before deleting route table. Sleeping..." + ) # noqa E501 time.sleep(5) else: raise igw_response = AWSRetry.backoff()(self.ec2.describe_internet_gateways)( - Filters=[ - { - 'Name': 'tag:Name', - 'Values': [ - self.job_identifier - ] - } - ] + Filters=[{"Name": "tag:Name", "Values": [self.job_identifier]}] ) - for internet_gateway in igw_response['InternetGateways']: - for attachment in internet_gateway['Attachments']: - if attachment['VpcId'] == this_vpc: + for internet_gateway in igw_response["InternetGateways"]: + for attachment in internet_gateway["Attachments"]: + if attachment["VpcId"] == this_vpc: while True: try: try: self.ec2.detach_internet_gateway( - InternetGatewayId=internet_gateway['InternetGatewayId'], - VpcId=attachment['VpcId'] + InternetGatewayId=internet_gateway[ + "InternetGatewayId" + ], + VpcId=attachment["VpcId"], ) except Exception as e: - logger.info(f"Error on Internet Gateway disassociation - ignoring... {str(e)}") + logger.info( + f"Error on Internet Gateway disassociation - ignoring... {str(e)}" + ) self.ec2.delete_internet_gateway( - InternetGatewayId=internet_gateway['InternetGatewayId'] + InternetGatewayId=internet_gateway[ + "InternetGatewayId" + ] ) logger.info("Internet Gateway deleted.") break except Exception as e: - if 'DependencyViolation' in str(e): + if "DependencyViolation" in str(e): logger.info( - "Waiting for IPs to be released before deleting Internet Gateway. Sleeping...") + "Waiting for IPs to be released before deleting Internet Gateway. Sleeping..." + ) time.sleep(5) else: raise subn_response = self.ec2.describe_subnets( - Filters=[ - { - 'Name': 'vpc-id', - 'Values': [ - this_vpc - ] - } - ] + Filters=[{"Name": "vpc-id", "Values": [this_vpc]}] ) - for subnet in subn_response['Subnets']: + for subnet in subn_response["Subnets"]: while True: try: - self.ec2.delete_subnet( - SubnetId=subnet['SubnetId'] - ) + self.ec2.delete_subnet(SubnetId=subnet["SubnetId"]) break except Exception as e: - if 'DependencyViolation' in str(e): - logger.info('Subnet cannot be deleted as dependencies are still being deleted. Sleeping...') + if "DependencyViolation" in str(e): + logger.info( + "Subnet cannot be deleted as dependencies are still being deleted. Sleeping..." + ) time.sleep(10) else: raise - AWSRetry.backoff()(self.ec2.delete_vpc)( - VpcId=this_vpc - ) + AWSRetry.backoff()(self.ec2.delete_vpc)(VpcId=this_vpc) # Find the Elastic IP from the NAT response = self.ec2.describe_addresses( Filters=[ { - 'Name': 'tag:Name', - 'Values': [ + "Name": "tag:Name", + "Values": [ self.job_identifier, - ] + ], }, ], - ) - for address in response['Addresses']: - this_address = address['AllocationId'] + for address in response["Addresses"]: + this_address = address["AllocationId"] - response = self.ec2.release_address( - AllocationId=this_address - ) + response = self.ec2.release_address(AllocationId=this_address) def create_emr_security_groups(self): - try: response = self.ec2.create_security_group( - Description='EMR Job Flow Security Group (full cluster access)', + Description="EMR Job Flow Security Group (full cluster access)", GroupName=self.emr_cluster_security_group_name, - VpcId=self.vpc_id + VpcId=self.vpc_id, ) - self.emr_cluster_security_group_id = response['GroupId'] + self.emr_cluster_security_group_id = response["GroupId"] except Exception as e: - if 'already exists for VPC' in str(e): + if "already exists for VPC" in str(e): logger.info("Security group for EMR already exists, skipping ...") response = self.ec2.describe_security_groups( Filters=[ { - 'Name': 'group-name', - 'Values': [ + "Name": "group-name", + "Values": [ self.emr_cluster_security_group_name, - ] + ], }, ] ) - self.emr_cluster_security_group_id = response['SecurityGroups'][0]['GroupId'] + self.emr_cluster_security_group_id = response["SecurityGroups"][0][ + "GroupId" + ] else: raise try: response = self.ec2.authorize_security_group_ingress( GroupId=self.emr_cluster_security_group_id, - IpPermissions=[dict( - IpProtocol='-1', - UserIdGroupPairs=[dict( - GroupId=self.emr_cluster_security_group_id, - UserId=self.account - )] - )] + IpPermissions=[ + dict( + IpProtocol="-1", + UserIdGroupPairs=[ + dict( + GroupId=self.emr_cluster_security_group_id, + UserId=self.account, + ) + ], + ) + ], ) except Exception as e: - if 'already exists' in str(e): - logger.info("Security group egress rule for EMR already exists, skipping ...") + if "already exists" in str(e): + logger.info( + "Security group egress rule for EMR already exists, skipping ..." + ) else: raise def create_emr_iam_roles(self): - self.emr_service_role_arn = self.iam_helper.role_stitcher( self.emr_service_role_name, "elasticmapreduce", f"EMR Service Role {self.job_identifier}", - managed_policie_arns=['arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole'] + managed_policie_arns=[ + "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole" + ], ) - emr_policy = '''{ + emr_policy = """{ "Version": "2012-10-17", "Statement": [ { @@ -1423,14 +1355,16 @@ def create_emr_iam_roles(self): "Resource": "arn:aws:iam::*:role/service-role/AWSGlueServiceRole-default" } ] -}''' +}""" self.emr_job_flow_role_arn = self.iam_helper.role_stitcher( self.emr_job_flow_role_name, "ec2", f"EMR Job Flow Role {self.job_identifier}", - managed_policie_arns=['arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role'], - policies_list=[emr_policy] + managed_policie_arns=[ + "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role" + ], + policies_list=[emr_policy], ) try: @@ -1438,126 +1372,129 @@ def create_emr_iam_roles(self): InstanceProfileName=self.emr_instance_profile_name ) - self.emr_instance_profile_arn = response['InstanceProfile']['Arn'] + self.emr_instance_profile_arn = response["InstanceProfile"]["Arn"] logger.info("EMR Instance Profile created") response = self.iam.add_role_to_instance_profile( InstanceProfileName=self.emr_instance_profile_name, - RoleName=self.emr_job_flow_role_name + RoleName=self.emr_job_flow_role_name, ) except Exception as e: - if 'EntityAlreadyExists' in str(e): - logger.info('EMR Instance Profile not created - already exists') + if "EntityAlreadyExists" in str(e): + logger.info("EMR Instance Profile not created - already exists") response = self.iam.get_instance_profile( InstanceProfileName=self.emr_instance_profile_name ) - self.emr_instance_profile_arn = response['InstanceProfile']['Arn'] + self.emr_instance_profile_arn = response["InstanceProfile"]["Arn"] def upload_assets(self): - - logger.info('Uploading EMR support assets...') + logger.info("Uploading EMR support assets...") fs = S3FileSystem() here = os.path.dirname(os.path.abspath(__file__)) - emr_folder = f"{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}" + emr_folder = ( + f"{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}" + ) fs.makedirs(emr_folder) # bsb_post.sh - bsb_post_bash = f'''#!/bin/bash + bsb_post_bash = f"""#!/bin/bash aws s3 cp "s3://{self.s3_bucket}/{self.s3_bucket_prefix}/emr/bsb_post.py" bsb_post.py /home/hadoop/miniconda/bin/python bsb_post.py "{self.s3_bucket}" "{self.s3_bucket_prefix}" - ''' - with fs.open(f'{emr_folder}/bsb_post.sh', 'w', encoding='utf-8') as f: + """ + with fs.open(f"{emr_folder}/bsb_post.sh", "w", encoding="utf-8") as f: f.write(bsb_post_bash) # bsb_post.py - fs.put(os.path.join(here, 's3_assets', 'bsb_post.py'), f'{emr_folder}/bsb_post.py') + fs.put( + os.path.join(here, "s3_assets", "bsb_post.py"), f"{emr_folder}/bsb_post.py" + ) # bootstrap-dask-custom - fs.put(os.path.join(here, 's3_assets', 'bootstrap-dask-custom'), f'{emr_folder}/bootstrap-dask-custom') + fs.put( + os.path.join(here, "s3_assets", "bootstrap-dask-custom"), + f"{emr_folder}/bootstrap-dask-custom", + ) # postprocessing.py - with fs.open(f'{emr_folder}/postprocessing.tar.gz', 'wb') as f: - with tarfile.open(fileobj=f, mode='w:gz') as tarf: - tarf.add(os.path.join(here, '..', 'postprocessing.py'), arcname='postprocessing.py') - tarf.add(os.path.join(here, 's3_assets', 'setup_postprocessing.py'), arcname='setup.py') + with fs.open(f"{emr_folder}/postprocessing.tar.gz", "wb") as f: + with tarfile.open(fileobj=f, mode="w:gz") as tarf: + tarf.add( + os.path.join(here, "..", "postprocessing.py"), + arcname="postprocessing.py", + ) + tarf.add( + os.path.join(here, "s3_assets", "setup_postprocessing.py"), + arcname="setup.py", + ) - logger.info('EMR support assets uploaded.') + logger.info("EMR support assets uploaded.") def create_emr_cluster_function(self): script_name = f"s3://{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}/bsb_post.sh" - bootstrap_action = f's3://{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}/bootstrap-dask-custom' # noqa E501 + bootstrap_action = f"s3://{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}/bootstrap-dask-custom" # noqa E501 run_job_flow_args = dict( Name=self.emr_cluster_name, LogUri=self.emr_log_uri, - - ReleaseLabel='emr-5.23.0', + ReleaseLabel="emr-5.23.0", Instances={ - 'InstanceGroups': [ + "InstanceGroups": [ { - 'Market': 'SPOT' if self.batch_use_spot else 'ON_DEMAND', - 'InstanceRole': 'MASTER', - 'InstanceType': self.emr_manager_instance_type, - 'InstanceCount': 1 + "Market": "SPOT" if self.batch_use_spot else "ON_DEMAND", + "InstanceRole": "MASTER", + "InstanceType": self.emr_manager_instance_type, + "InstanceCount": 1, }, { - 'Market': 'SPOT' if self.batch_use_spot else 'ON_DEMAND', - 'InstanceRole': 'CORE', - 'InstanceType': self.emr_worker_instance_type, - 'InstanceCount': self.emr_worker_instance_count + "Market": "SPOT" if self.batch_use_spot else "ON_DEMAND", + "InstanceRole": "CORE", + "InstanceType": self.emr_worker_instance_type, + "InstanceCount": self.emr_worker_instance_count, }, ], - 'Ec2SubnetId': self.priv_vpc_subnet_id_1, - 'KeepJobFlowAliveWhenNoSteps': False, - 'EmrManagedMasterSecurityGroup': self.emr_cluster_security_group_id, - 'EmrManagedSlaveSecurityGroup': self.emr_cluster_security_group_id, - 'ServiceAccessSecurityGroup': self.batch_security_group + "Ec2SubnetId": self.priv_vpc_subnet_id_1, + "KeepJobFlowAliveWhenNoSteps": False, + "EmrManagedMasterSecurityGroup": self.emr_cluster_security_group_id, + "EmrManagedSlaveSecurityGroup": self.emr_cluster_security_group_id, + "ServiceAccessSecurityGroup": self.batch_security_group, }, - Applications=[ - { - 'Name': 'Hadoop' - }, + {"Name": "Hadoop"}, ], - BootstrapActions=[ { - 'Name': 'launchFromS3', - 'ScriptBootstrapAction': { - 'Path': bootstrap_action, - 'Args': [f's3://{self.s3_bucket}/{self.s3_bucket_prefix}/emr/postprocessing.tar.gz'] - } + "Name": "launchFromS3", + "ScriptBootstrapAction": { + "Path": bootstrap_action, + "Args": [ + f"s3://{self.s3_bucket}/{self.s3_bucket_prefix}/emr/postprocessing.tar.gz" + ], + }, }, ], - Steps=[ { - 'Name': 'Dask', - 'ActionOnFailure': 'TERMINATE_CLUSTER', - - 'HadoopJarStep': { - 'Jar': 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar', - 'Args': [script_name] - } + "Name": "Dask", + "ActionOnFailure": "TERMINATE_CLUSTER", + "HadoopJarStep": { + "Jar": "s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar", + "Args": [script_name], + }, }, ], - VisibleToAllUsers=True, JobFlowRole=self.emr_instance_profile_name, ServiceRole=self.emr_service_role_name, Tags=[ - { - 'Key': 'org', - 'Value': 'ops' - }, + {"Key": "org", "Value": "ops"}, ], - AutoScalingRole='EMR_AutoScaling_DefaultRole', - ScaleDownBehavior='TERMINATE_AT_TASK_COMPLETION', - EbsRootVolumeSize=100 + AutoScalingRole="EMR_AutoScaling_DefaultRole", + ScaleDownBehavior="TERMINATE_AT_TASK_COMPLETION", + EbsRootVolumeSize=100, ) with io.BytesIO() as f: @@ -1565,84 +1502,94 @@ def create_emr_cluster_function(self): f.seek(0) self.s3.upload_fileobj(f, self.s3_bucket, self.s3_lambda_emr_config_key) - lambda_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3_assets', 'lambda_function.py') - with open(lambda_filename, 'r') as f: + lambda_filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "s3_assets", + "lambda_function.py", + ) + with open(lambda_filename, "r") as f: function_script = f.read() with io.BytesIO() as f: - with zipfile.ZipFile(f, mode='w', compression=zipfile.ZIP_STORED) as zf: - zi = zipfile.ZipInfo('emr_function.py') + with zipfile.ZipFile(f, mode="w", compression=zipfile.ZIP_STORED) as zf: + zi = zipfile.ZipInfo("emr_function.py") zi.date_time = time.localtime() zi.external_attr = 0o100755 << 16 zf.writestr(zi, function_script, zipfile.ZIP_DEFLATED) f.seek(0) - self.s3.upload_fileobj(f, self.s3_bucket, self.s3_lambda_code_emr_cluster_key) + self.s3.upload_fileobj( + f, self.s3_bucket, self.s3_lambda_code_emr_cluster_key + ) while True: try: self.aws_lambda.create_function( FunctionName=self.lambda_emr_job_step_function_name, - Runtime='python3.7', + Runtime="python3.7", Role=self.lambda_emr_job_step_execution_role_arn, - Handler='emr_function.lambda_handler', + Handler="emr_function.lambda_handler", Code={ - 'S3Bucket': self.s3_bucket, - 'S3Key': self.s3_lambda_code_emr_cluster_key + "S3Bucket": self.s3_bucket, + "S3Key": self.s3_lambda_code_emr_cluster_key, }, - Description=f'Lambda for emr cluster execution on job {self.job_identifier}', + Description=f"Lambda for emr cluster execution on job {self.job_identifier}", Timeout=900, MemorySize=128, Publish=True, Environment={ - 'Variables': { - 'REGION': self.region, - 'BUCKET': self.s3_bucket, - 'EMR_CONFIG_JSON_KEY': self.s3_lambda_emr_config_key + "Variables": { + "REGION": self.region, + "BUCKET": self.s3_bucket, + "EMR_CONFIG_JSON_KEY": self.s3_lambda_emr_config_key, } }, - Tags={ - 'job': self.job_identifier - } + Tags={"job": self.job_identifier}, ) - logger.info(f"Lambda function {self.lambda_emr_job_step_function_name} created.") + logger.info( + f"Lambda function {self.lambda_emr_job_step_function_name} created." + ) break except Exception as e: - if 'role defined for the function cannot be assumed' in str(e): + if "role defined for the function cannot be assumed" in str(e): logger.info( - f"Lambda role not registered for {self.lambda_emr_job_step_function_name} - sleeping ...") + f"Lambda role not registered for {self.lambda_emr_job_step_function_name} - sleeping ..." + ) time.sleep(5) - elif 'Function already exist' in str(e): - logger.info(f'Lambda function {self.lambda_emr_job_step_function_name} exists, skipping...') + elif "Function already exist" in str(e): + logger.info( + f"Lambda function {self.lambda_emr_job_step_function_name} exists, skipping..." + ) break - elif 'ARN does not refer to a valid principal' in str(e): - logger.info('Waiting for roles/permissions to propagate to allow Lambda function creation ...') + elif "ARN does not refer to a valid principal" in str(e): + logger.info( + "Waiting for roles/permissions to propagate to allow Lambda function creation ..." + ) time.sleep(5) else: raise class AwsSNS(AwsJobBase): - def __init__(self, job_name, aws_config, boto3_session): super().__init__(job_name, aws_config, boto3_session) self.sns = self.session.client("sns") self.sns_state_machine_topic_arn = None def create_topic(self): - response = self.sns.create_topic( - Name=self.sns_state_machine_topic - ) + response = self.sns.create_topic(Name=self.sns_state_machine_topic) - logger.info(f"Simple notifications topic {self.sns_state_machine_topic} created.") + logger.info( + f"Simple notifications topic {self.sns_state_machine_topic} created." + ) - self.sns_state_machine_topic_arn = response['TopicArn'] + self.sns_state_machine_topic_arn = response["TopicArn"] def subscribe_to_topic(self): self.sns.subscribe( TopicArn=self.sns_state_machine_topic_arn, - Protocol='email', - Endpoint=self.operator_email + Protocol="email", + Endpoint=self.operator_email, ) logger.info( @@ -1654,11 +1601,12 @@ def clean(self): TopicArn=f"arn:aws:sns:{self.region}:{self.account}:{self.sns_state_machine_topic}" ) - logger.info(f"Simple notifications topic {self.sns_state_machine_topic} deleted.") + logger.info( + f"Simple notifications topic {self.sns_state_machine_topic} deleted." + ) class DockerBatchBase(BuildStockBatchBase): - CONTAINER_RUNTIME = ContainerRuntime.DOCKER def __init__(self, project_filename): @@ -1668,8 +1616,12 @@ def __init__(self, project_filename): try: self.docker_client.ping() except: # noqa: E722 (allow bare except in this case because error can be a weird non-class Windows API error) - logger.error('The docker server did not respond, make sure Docker Desktop is started then retry.') - raise RuntimeError('The docker server did not respond, make sure Docker Desktop is started then retry.') + logger.error( + "The docker server did not respond, make sure Docker Desktop is started then retry." + ) + raise RuntimeError( + "The docker server did not respond, make sure Docker Desktop is started then retry." + ) @staticmethod def validate_project(project_file): @@ -1677,43 +1629,49 @@ def validate_project(project_file): @property def docker_image(self): - return 'nrel/openstudio:{}'.format(self.os_version) + return "nrel/openstudio:{}".format(self.os_version) class AwsBatch(DockerBatchBase): - def __init__(self, project_filename): super().__init__(project_filename) - self.job_identifier = re.sub('[^0-9a-zA-Z]+', '_', self.cfg['aws']['job_identifier'])[:10] + self.job_identifier = re.sub( + "[^0-9a-zA-Z]+", "_", self.cfg["aws"]["job_identifier"] + )[:10] self.project_filename = project_filename - self.region = self.cfg['aws']['region'] - self.ecr = boto3.client('ecr', region_name=self.region) - self.s3 = boto3.client('s3', region_name=self.region) - self.s3_bucket = self.cfg['aws']['s3']['bucket'] - self.s3_bucket_prefix = self.cfg['aws']['s3']['prefix'].rstrip('/') - self.batch_env_use_spot = self.cfg['aws']['use_spot'] - self.batch_array_size = self.cfg['aws']['batch_array_size'] + self.region = self.cfg["aws"]["region"] + self.ecr = boto3.client("ecr", region_name=self.region) + self.s3 = boto3.client("s3", region_name=self.region) + self.s3_bucket = self.cfg["aws"]["s3"]["bucket"] + self.s3_bucket_prefix = self.cfg["aws"]["s3"]["prefix"].rstrip("/") + self.batch_env_use_spot = self.cfg["aws"]["use_spot"] + self.batch_array_size = self.cfg["aws"]["batch_array_size"] self.boto3_session = boto3.Session(region_name=self.region) @staticmethod def validate_instance_types(project_file): cfg = get_project_configuration(project_file) - aws_config = cfg['aws'] - boto3_session = boto3.Session(region_name=aws_config['region']) - ec2 = boto3_session.client('ec2') - job_base = AwsJobBase('genericjobid', aws_config, boto3_session) + aws_config = cfg["aws"] + boto3_session = boto3.Session(region_name=aws_config["region"]) + ec2 = boto3_session.client("ec2") + job_base = AwsJobBase("genericjobid", aws_config, boto3_session) instance_types_requested = set() instance_types_requested.add(job_base.emr_manager_instance_type) instance_types_requested.add(job_base.emr_worker_instance_type) - inst_type_resp = ec2.describe_instance_type_offerings(Filters=[{ - 'Name': 'instance-type', - 'Values': list(instance_types_requested) - }]) - instance_types_available = set([x['InstanceType'] for x in inst_type_resp['InstanceTypeOfferings']]) + inst_type_resp = ec2.describe_instance_type_offerings( + Filters=[ + {"Name": "instance-type", "Values": list(instance_types_requested)} + ] + ) + instance_types_available = set( + [x["InstanceType"] for x in inst_type_resp["InstanceTypeOfferings"]] + ) if not instance_types_requested == instance_types_available: - instance_types_not_available = instance_types_requested - instance_types_available + instance_types_not_available = ( + instance_types_requested - instance_types_available + ) raise ValidationError( f"The instance type(s) {', '.join(instance_types_not_available)} are not available in region {aws_config['region']}." # noqa E501 ) @@ -1725,7 +1683,7 @@ def validate_project(project_file): @property def docker_image(self): - return 'nrel/buildstockbatch' + return "nrel/buildstockbatch" @property def weather_dir(self): @@ -1736,12 +1694,12 @@ def container_repo(self): repo_name = self.docker_image repos = self.ecr.describe_repositories() repo = None - for repo in repos['repositories']: - if repo['repositoryName'] == repo_name: + for repo in repos["repositories"]: + if repo["repositoryName"] == repo_name: break if repo is None: resp = self.ecr.create_repository(repositoryName=repo_name) - repo = resp['repository'] + repo = resp["repository"] return repo def build_image(self): @@ -1749,13 +1707,13 @@ def build_image(self): Build the docker image to use in the batch simulation """ root_path = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent - if not (root_path / 'Dockerfile').exists(): - raise RuntimeError(f'The needs to be run from the root of the repo, found {root_path}') - logger.debug('Building docker image') + if not (root_path / "Dockerfile").exists(): + raise RuntimeError( + f"The needs to be run from the root of the repo, found {root_path}" + ) + logger.debug("Building docker image") self.docker_client.images.build( - path=str(root_path), - tag=self.docker_image, - rm=True + path=str(root_path), tag=self.docker_image, rm=True ) def push_image(self): @@ -1763,28 +1721,31 @@ def push_image(self): Push the locally built docker image to the AWS docker repo """ auth_token = self.ecr.get_authorization_token() - dkr_user, dkr_pass = base64.b64decode(auth_token['authorizationData'][0]['authorizationToken']). \ - decode('ascii').split(':') - repo_url = self.container_repo['repositoryUri'] - registry_url = 'https://' + repo_url.split('/')[0] + dkr_user, dkr_pass = ( + base64.b64decode(auth_token["authorizationData"][0]["authorizationToken"]) + .decode("ascii") + .split(":") + ) + repo_url = self.container_repo["repositoryUri"] + registry_url = "https://" + repo_url.split("/")[0] resp = self.docker_client.login( - username=dkr_user, - password=dkr_pass, - registry=registry_url + username=dkr_user, password=dkr_pass, registry=registry_url ) logger.debug(resp) image = self.docker_client.images.get(self.docker_image) image.tag(repo_url, tag=self.job_identifier) last_status = None - for x in self.docker_client.images.push(repo_url, tag=self.job_identifier, stream=True): + for x in self.docker_client.images.push( + repo_url, tag=self.job_identifier, stream=True + ): try: y = json.loads(x) except json.JSONDecodeError: continue else: - if y.get('status') is not None and y.get('status') != last_status: - logger.debug(y['status']) - last_status = y['status'] + if y.get("status") is not None and y.get("status") != last_status: + logger.debug(y["status"]) + last_status = y["status"] def clean(self): """ @@ -1793,10 +1754,12 @@ def clean(self): """ logger.info("Beginning cleanup of AWS resources...") - batch_env = AwsBatchEnv(self.job_identifier, self.cfg['aws'], self.boto3_session) + batch_env = AwsBatchEnv( + self.job_identifier, self.cfg["aws"], self.boto3_session + ) batch_env.clean() - sns_env = AwsSNS(self.job_identifier, self.cfg['aws'], self.boto3_session) + sns_env = AwsSNS(self.job_identifier, self.cfg["aws"], self.boto3_session) sns_env.clean() def run_batch(self): @@ -1813,29 +1776,43 @@ def run_batch(self): buildstock_csv_filename = self.sampler.run_sampling() # Compress and upload assets to S3 - with tempfile.TemporaryDirectory(prefix='bsb_') as tmpdir, tempfile.TemporaryDirectory(prefix='bsb_') as tmp_weather_dir: # noqa: E501 + with tempfile.TemporaryDirectory( + prefix="bsb_" + ) as tmpdir, tempfile.TemporaryDirectory( + prefix="bsb_" + ) as tmp_weather_dir: # noqa: E501 self._weather_dir = tmp_weather_dir self._get_weather_files() tmppath = pathlib.Path(tmpdir) - logger.debug('Creating assets tarfile') - with tarfile.open(tmppath / 'assets.tar.gz', 'x:gz') as tar_f: + logger.debug("Creating assets tarfile") + with tarfile.open(tmppath / "assets.tar.gz", "x:gz") as tar_f: project_path = pathlib.Path(self.project_dir) buildstock_path = pathlib.Path(self.buildstock_dir) - tar_f.add(buildstock_path / 'measures', 'measures') - if os.path.exists(buildstock_path / 'resources/hpxml-measures'): - tar_f.add(buildstock_path / 'resources/hpxml-measures', 'resources/hpxml-measures') - tar_f.add(buildstock_path / 'resources', 'lib/resources') - tar_f.add(project_path / 'housing_characteristics', 'lib/housing_characteristics') + tar_f.add(buildstock_path / "measures", "measures") + if os.path.exists(buildstock_path / "resources/hpxml-measures"): + tar_f.add( + buildstock_path / "resources/hpxml-measures", + "resources/hpxml-measures", + ) + tar_f.add(buildstock_path / "resources", "lib/resources") + tar_f.add( + project_path / "housing_characteristics", + "lib/housing_characteristics", + ) # Weather files - weather_path = tmppath / 'weather' + weather_path = tmppath / "weather" os.makedirs(weather_path) # Determine the unique weather files - epw_filenames = list(filter(lambda x: x.endswith('.epw'), os.listdir(self.weather_dir))) - logger.debug('Calculating hashes for weather files') + epw_filenames = list( + filter(lambda x: x.endswith(".epw"), os.listdir(self.weather_dir)) + ) + logger.debug("Calculating hashes for weather files") epw_hashes = Parallel(n_jobs=-1, verbose=9)( - delayed(calc_hash_for_file)(pathlib.Path(self.weather_dir) / epw_filename) + delayed(calc_hash_for_file)( + pathlib.Path(self.weather_dir) / epw_filename + ) for epw_filename in epw_filenames ) unique_epws = collections.defaultdict(list) @@ -1843,17 +1820,17 @@ def run_batch(self): unique_epws[epw_hash].append(epw_filename) # Compress unique weather files - logger.debug('Compressing weather files') + logger.debug("Compressing weather files") Parallel(n_jobs=-1, verbose=9)( delayed(compress_file)( pathlib.Path(self.weather_dir) / x[0], - str(weather_path / x[0]) + '.gz' + str(weather_path / x[0]) + ".gz", ) for x in unique_epws.values() ) - logger.debug('Writing project configuration for upload') - with open(tmppath / 'config.json', 'wt', encoding='utf-8') as f: + logger.debug("Writing project configuration for upload") + with open(tmppath / "config.json", "wt", encoding="utf-8") as f: json.dump(self.cfg, f) # Collect simulations to queue @@ -1861,8 +1838,8 @@ def run_batch(self): self.validate_buildstock_csv(self.project_filename, df) building_ids = df.index.tolist() n_datapoints = len(building_ids) - n_sims = n_datapoints * (len(self.cfg.get('upgrades', [])) + 1) - logger.debug('Total number of simulations = {}'.format(n_sims)) + n_sims = n_datapoints * (len(self.cfg.get("upgrades", [])) + 1) + logger.debug("Total number of simulations = {}".format(n_sims)) # This is the maximum number of jobs that can be in an array if self.batch_array_size <= 10000: @@ -1871,69 +1848,90 @@ def run_batch(self): max_array_size = 10000 n_sims_per_job = math.ceil(n_sims / max_array_size) n_sims_per_job = max(n_sims_per_job, 2) - logger.debug('Number of simulations per array job = {}'.format(n_sims_per_job)) + logger.debug( + "Number of simulations per array job = {}".format(n_sims_per_job) + ) baseline_sims = zip(building_ids, itertools.repeat(None)) - upgrade_sims = itertools.product(building_ids, range(len(self.cfg.get('upgrades', [])))) + upgrade_sims = itertools.product( + building_ids, range(len(self.cfg.get("upgrades", []))) + ) all_sims = list(itertools.chain(baseline_sims, upgrade_sims)) random.shuffle(all_sims) all_sims_iter = iter(all_sims) - os.makedirs(tmppath / 'jobs') + os.makedirs(tmppath / "jobs") - logger.info('Queueing jobs') + logger.info("Queueing jobs") for i in itertools.count(0): batch = list(itertools.islice(all_sims_iter, n_sims_per_job)) if not batch: break - job_json_filename = tmppath / 'jobs' / 'job{:05d}.json'.format(i) - with open(job_json_filename, 'w') as f: - json.dump({ - 'job_num': i, - 'n_datapoints': n_datapoints, - 'batch': batch, - }, f, indent=4) + job_json_filename = tmppath / "jobs" / "job{:05d}.json".format(i) + with open(job_json_filename, "w") as f: + json.dump( + { + "job_num": i, + "n_datapoints": n_datapoints, + "batch": batch, + }, + f, + indent=4, + ) array_size = i - logger.debug('Array size = {}'.format(array_size)) + logger.debug("Array size = {}".format(array_size)) # Compress job jsons - jobs_dir = tmppath / 'jobs' - logger.debug('Compressing job jsons using gz') + jobs_dir = tmppath / "jobs" + logger.debug("Compressing job jsons using gz") tick = time.time() - with tarfile.open(tmppath / 'jobs.tar.gz', 'w:gz') as tf: - tf.add(jobs_dir, arcname='jobs') + with tarfile.open(tmppath / "jobs.tar.gz", "w:gz") as tf: + tf.add(jobs_dir, arcname="jobs") tick = time.time() - tick - logger.debug('Done compressing job jsons using gz {:.1f} seconds'.format(tick)) + logger.debug( + "Done compressing job jsons using gz {:.1f} seconds".format(tick) + ) shutil.rmtree(jobs_dir) - os.makedirs(tmppath / 'results' / 'simulation_output') + os.makedirs(tmppath / "results" / "simulation_output") - logger.debug('Uploading files to S3') - upload_directory_to_s3(tmppath, self.cfg['aws']['s3']['bucket'], self.cfg['aws']['s3']['prefix']) + logger.debug("Uploading files to S3") + upload_directory_to_s3( + tmppath, + self.cfg["aws"]["s3"]["bucket"], + self.cfg["aws"]["s3"]["prefix"], + ) # Copy the non-unique weather files on S3 epws_to_copy = [] for epws in unique_epws.values(): # The first in the list is already up there, copy the rest for filename in epws[1:]: - epws_to_copy.append(( - f"{self.cfg['aws']['s3']['prefix']}/weather/{epws[0]}.gz", - f"{self.cfg['aws']['s3']['prefix']}/weather/{filename}.gz" - )) + epws_to_copy.append( + ( + f"{self.cfg['aws']['s3']['prefix']}/weather/{epws[0]}.gz", + f"{self.cfg['aws']['s3']['prefix']}/weather/{filename}.gz", + ) + ) - logger.debug('Copying weather files on S3') - bucket = self.cfg['aws']['s3']['bucket'] + logger.debug("Copying weather files on S3") + bucket = self.cfg["aws"]["s3"]["bucket"] Parallel(n_jobs=-1, verbose=9)( - delayed(copy_s3_file)(bucket, src, bucket, dest) for src, dest in epws_to_copy + delayed(copy_s3_file)(bucket, src, bucket, dest) + for src, dest in epws_to_copy ) # Create the output directories fs = S3FileSystem() - for upgrade_id in range(len(self.cfg.get('upgrades', [])) + 1): - fs.makedirs(f"{self.cfg['aws']['s3']['bucket']}/{self.cfg['aws']['s3']['prefix']}/results/simulation_output/timeseries/up{upgrade_id:02d}") # noqa E501 + for upgrade_id in range(len(self.cfg.get("upgrades", [])) + 1): + fs.makedirs( + f"{self.cfg['aws']['s3']['bucket']}/{self.cfg['aws']['s3']['prefix']}/results/simulation_output/timeseries/up{upgrade_id:02d}" + ) # noqa E501 # Define the batch environment - batch_env = AwsBatchEnv(self.job_identifier, self.cfg['aws'], self.boto3_session) + batch_env = AwsBatchEnv( + self.job_identifier, self.cfg["aws"], self.boto3_session + ) logger.info( "Launching Batch environment - (resource configs will not be updated on subsequent executions, but new job revisions will be created):" # noqa 501 ) @@ -1944,25 +1942,28 @@ def run_batch(self): batch_env.create_job_queue() # Pass through config for the Docker containers - env_vars = dict(S3_BUCKET=self.s3_bucket, S3_PREFIX=self.s3_bucket_prefix, JOB_NAME=self.job_identifier, - REGION=self.region) + env_vars = dict( + S3_BUCKET=self.s3_bucket, + S3_PREFIX=self.s3_bucket_prefix, + JOB_NAME=self.job_identifier, + REGION=self.region, + ) - image_url = '{}:{}'.format( - self.container_repo['repositoryUri'], - self.job_identifier + image_url = "{}:{}".format( + self.container_repo["repositoryUri"], self.job_identifier ) - job_env_cfg = self.cfg['aws'].get('job_environment', {}) + job_env_cfg = self.cfg["aws"].get("job_environment", {}) batch_env.create_job_definition( image_url, - command=['python3.8', '-m', 'buildstockbatch.aws.aws'], - vcpus=job_env_cfg.get('vcpus', 1), - memory=job_env_cfg.get('memory', 1024), - env_vars=env_vars + command=["python3.8", "-m", "buildstockbatch.aws.aws"], + vcpus=job_env_cfg.get("vcpus", 1), + memory=job_env_cfg.get("memory", 1024), + env_vars=env_vars, ) # SNS Topic - sns_env = AwsSNS(self.job_identifier, self.cfg['aws'], self.boto3_session) + sns_env = AwsSNS(self.job_identifier, self.cfg["aws"], self.boto3_session) sns_env.create_topic() sns_env.subscribe_to_topic() @@ -1980,7 +1981,9 @@ def run_batch(self): # start job batch_env.start_state_machine_execution(array_size) - logger.info('Batch job submitted. Check your email to subscribe to notifications.') + logger.info( + "Batch job submitted. Check your email to subscribe to notifications." + ) @classmethod def run_job(cls, job_id, bucket, prefix, job_name, region): @@ -1993,65 +1996,75 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): """ logger.debug(f"region: {region}") - s3 = boto3.client('s3') + s3 = boto3.client("s3") - sim_dir = pathlib.Path('/var/simdata/openstudio') + sim_dir = pathlib.Path("/var/simdata/openstudio") - logger.debug('Downloading assets') - assets_file_path = sim_dir.parent / 'assets.tar.gz' - s3.download_file(bucket, f'{prefix}/assets.tar.gz', str(assets_file_path)) - with tarfile.open(assets_file_path, 'r') as tar_f: + logger.debug("Downloading assets") + assets_file_path = sim_dir.parent / "assets.tar.gz" + s3.download_file(bucket, f"{prefix}/assets.tar.gz", str(assets_file_path)) + with tarfile.open(assets_file_path, "r") as tar_f: tar_f.extractall(sim_dir) os.remove(assets_file_path) - logger.debug('Reading config') + logger.debug("Reading config") with io.BytesIO() as f: - s3.download_fileobj(bucket, f'{prefix}/config.json', f) + s3.download_fileobj(bucket, f"{prefix}/config.json", f) cfg = json.loads(f.getvalue()) - logger.debug('Getting job information') - jobs_file_path = sim_dir.parent / 'jobs.tar.gz' - s3.download_file(bucket, f'{prefix}/jobs.tar.gz', str(jobs_file_path)) - with tarfile.open(jobs_file_path, 'r') as tar_f: - jobs_d = json.load(tar_f.extractfile(f'jobs/job{job_id:05d}.json'), encoding='utf-8') - logger.debug('Number of simulations = {}'.format(len(jobs_d['batch']))) + logger.debug("Getting job information") + jobs_file_path = sim_dir.parent / "jobs.tar.gz" + s3.download_file(bucket, f"{prefix}/jobs.tar.gz", str(jobs_file_path)) + with tarfile.open(jobs_file_path, "r") as tar_f: + jobs_d = json.load( + tar_f.extractfile(f"jobs/job{job_id:05d}.json"), encoding="utf-8" + ) + logger.debug("Number of simulations = {}".format(len(jobs_d["batch"]))) - logger.debug('Getting weather files') - weather_dir = sim_dir / 'weather' + logger.debug("Getting weather files") + weather_dir = sim_dir / "weather" os.makedirs(weather_dir, exist_ok=True) # Make a lookup of which parameter points to the weather file from options_lookup.tsv - with open(sim_dir / 'lib' / 'resources' / 'options_lookup.tsv', 'r', encoding='utf-8') as f: - tsv_reader = csv.reader(f, delimiter='\t') + with open( + sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8" + ) as f: + tsv_reader = csv.reader(f, delimiter="\t") next(tsv_reader) # skip headers param_name = None epws_by_option = {} for row in tsv_reader: - row_has_epw = [x.endswith('.epw') for x in row[2:]] + row_has_epw = [x.endswith(".epw") for x in row[2:]] if sum(row_has_epw): if row[0] != param_name and param_name is not None: - raise RuntimeError(f'The epw files are specified in options_lookup.tsv under more than one parameter type: {param_name}, {row[0]}') # noqa: E501 - epw_filename = row[row_has_epw.index(True) + 2].split('=')[1] + raise RuntimeError( + f"The epw files are specified in options_lookup.tsv under more than one parameter type: {param_name}, {row[0]}" + ) # noqa: E501 + epw_filename = row[row_has_epw.index(True) + 2].split("=")[1] param_name = row[0] option_name = row[1] epws_by_option[option_name] = epw_filename # Look through the buildstock.csv to find the appropriate location and epw epws_to_download = set() - building_ids = [x[0] for x in jobs_d['batch']] - with open(sim_dir / 'lib' / 'housing_characteristics' / 'buildstock.csv', 'r', encoding='utf-8') as f: + building_ids = [x[0] for x in jobs_d["batch"]] + with open( + sim_dir / "lib" / "housing_characteristics" / "buildstock.csv", + "r", + encoding="utf-8", + ) as f: csv_reader = csv.DictReader(f) for row in csv_reader: - if int(row['Building']) in building_ids: + if int(row["Building"]) in building_ids: epws_to_download.add(epws_by_option[row[param_name]]) # Download the epws needed for these simulations for epw_filename in epws_to_download: with io.BytesIO() as f_gz: - logger.debug('Downloading {}.gz'.format(epw_filename)) - s3.download_fileobj(bucket, f'{prefix}/weather/{epw_filename}.gz', f_gz) - with open(weather_dir / epw_filename, 'wb') as f_out: - logger.debug('Extracting {}'.format(epw_filename)) + logger.debug("Downloading {}.gz".format(epw_filename)) + s3.download_fileobj(bucket, f"{prefix}/weather/{epw_filename}.gz", f_gz) + with open(weather_dir / epw_filename, "wb") as f_out: + logger.debug("Extracting {}".format(epw_filename)) f_out.write(gzip.decompress(f_gz.getvalue())) asset_dirs = os.listdir(sim_dir) @@ -2059,30 +2072,32 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): local_fs = LocalFileSystem() reporting_measures = cls.get_reporting_measures(cfg) dpouts = [] - simulation_output_tar_filename = sim_dir.parent / 'simulation_outputs.tar.gz' - with tarfile.open(str(simulation_output_tar_filename), 'w:gz') as simout_tar: - for building_id, upgrade_idx in jobs_d['batch']: + simulation_output_tar_filename = sim_dir.parent / "simulation_outputs.tar.gz" + with tarfile.open(str(simulation_output_tar_filename), "w:gz") as simout_tar: + for building_id, upgrade_idx in jobs_d["batch"]: upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 - sim_id = f'bldg{building_id:07d}up{upgrade_id:02d}' + sim_id = f"bldg{building_id:07d}up{upgrade_id:02d}" # Create OSW - osw = cls.create_osw(cfg, jobs_d['n_datapoints'], sim_id, building_id, upgrade_idx) - with open(os.path.join(sim_dir, 'in.osw'), 'w') as f: + osw = cls.create_osw( + cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx + ) + with open(os.path.join(sim_dir, "in.osw"), "w") as f: json.dump(osw, f, indent=4) # Run Simulation - with open(sim_dir / 'os_stdout.log', 'w') as f_out: + with open(sim_dir / "os_stdout.log", "w") as f_out: try: - logger.debug('Running {}'.format(sim_id)) + logger.debug("Running {}".format(sim_id)) subprocess.run( - ['openstudio', 'run', '-w', 'in.osw'], + ["openstudio", "run", "-w", "in.osw"], check=True, stdout=f_out, stderr=subprocess.STDOUT, - cwd=str(sim_dir) + cwd=str(sim_dir), ) except subprocess.CalledProcessError: - logger.debug(f'Simulation failed: see {sim_id}/os_stdout.log') + logger.debug(f"Simulation failed: see {sim_id}/os_stdout.log") # Clean Up simulation directory cls.cleanup_sim_dir( @@ -2090,7 +2105,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): fs, f"{bucket}/{prefix}/results/simulation_output/timeseries", upgrade_id, - building_id + building_id, ) # Read data_point_out.json @@ -2100,7 +2115,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): dpouts.append(dpout) # Add the rest of the simulation outputs to the tar archive - logger.info('Archiving simulation outputs') + logger.info("Archiving simulation outputs") for dirpath, dirnames, filenames in os.walk(sim_dir): if dirpath == str(sim_dir): for dirname in set(dirnames).intersection(asset_dirs): @@ -2111,7 +2126,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): simout_tar.add(abspath, os.path.join(sim_id, relpath)) # Clear directory for next simulation - logger.debug('Clearing out simulation directory') + logger.debug("Clearing out simulation directory") for item in set(os.listdir(sim_dir)).difference(asset_dirs): if os.path.isdir(item): shutil.rmtree(item) @@ -2121,12 +2136,15 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): # Upload simulation outputs tarfile to s3 fs.put( str(simulation_output_tar_filename), - f'{bucket}/{prefix}/results/simulation_output/simulations_job{job_id}.tar.gz' + f"{bucket}/{prefix}/results/simulation_output/simulations_job{job_id}.tar.gz", ) # Upload aggregated dpouts as a json file - with fs.open(f'{bucket}/{prefix}/results/simulation_output/results_job{job_id}.json.gz', 'wb') as f1: - with gzip.open(f1, 'wt', encoding='utf-8') as f2: + with fs.open( + f"{bucket}/{prefix}/results/simulation_output/results_job{job_id}.json.gz", + "wb", + ) as f1: + with gzip.open(f1, "wt", encoding="utf-8") as f2: json.dump(dpouts, f2) # Remove files (it helps docker if we don't leave a bunch of files laying around) @@ -2140,56 +2158,59 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): @log_error_details() def main(): - logging.config.dictConfig({ - 'version': 1, - 'disable_existing_loggers': True, - 'formatters': { - 'defaultfmt': { - 'format': '%(levelname)s:%(asctime)s:%(name)s:%(message)s', - 'datefmt': '%Y-%m-%d %H:%M:%S' - } - }, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - 'formatter': 'defaultfmt', - 'level': 'DEBUG', - 'stream': 'ext://sys.stdout', - } - }, - 'loggers': { - '__main__': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'] + logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "defaultfmt": { + "format": "%(levelname)s:%(asctime)s:%(name)s:%(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + } }, - 'buildstockbatch': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'] - } - }, - }) + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "defaultfmt", + "level": "DEBUG", + "stream": "ext://sys.stdout", + } + }, + "loggers": { + "__main__": { + "level": "DEBUG", + "propagate": True, + "handlers": ["console"], + }, + "buildstockbatch": { + "level": "DEBUG", + "propagate": True, + "handlers": ["console"], + }, + }, + } + ) print(AwsBatch.LOGO) - if 'AWS_BATCH_JOB_ARRAY_INDEX' in os.environ: - job_id = int(os.environ['AWS_BATCH_JOB_ARRAY_INDEX']) - s3_bucket = os.environ['S3_BUCKET'] - s3_prefix = os.environ['S3_PREFIX'] - job_name = os.environ['JOB_NAME'] - region = os.environ['REGION'] + if "AWS_BATCH_JOB_ARRAY_INDEX" in os.environ: + job_id = int(os.environ["AWS_BATCH_JOB_ARRAY_INDEX"]) + s3_bucket = os.environ["S3_BUCKET"] + s3_prefix = os.environ["S3_PREFIX"] + job_name = os.environ["JOB_NAME"] + region = os.environ["REGION"] AwsBatch.run_job(job_id, s3_bucket, s3_prefix, job_name, region) else: parser = argparse.ArgumentParser() - parser.add_argument('project_filename') + parser.add_argument("project_filename") parser.add_argument( - '-c', '--clean', - action='store_true', - help='After the simulation is done, run with --clean to clean up AWS environment' + "-c", + "--clean", + action="store_true", + help="After the simulation is done, run with --clean to clean up AWS environment", ) parser.add_argument( - '--validateonly', - help='Only validate the project YAML file and references. Nothing is executed', - action='store_true' + "--validateonly", + help="Only validate the project YAML file and references. Nothing is executed", + action="store_true", ) args = parser.parse_args() @@ -2207,5 +2228,5 @@ def main(): batch.run_batch() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/buildstockbatch/aws/awsbase.py b/buildstockbatch/aws/awsbase.py index 8884e761..e4de45b7 100644 --- a/buildstockbatch/aws/awsbase.py +++ b/buildstockbatch/aws/awsbase.py @@ -4,20 +4,26 @@ logger = logging.getLogger(__name__) -class AWSIAMHelper(): - +class AWSIAMHelper: logger.propagate = False def __init__(self, session): - ''' + """ Initialize the AWSIAM class with a boto3 Session :param session: boto3 Session from 'parent' job base class - ''' + """ self.session = session - self.iam = self.session.client('iam') - - def role_stitcher(self, role_name, trust_service, description, policies_list=[], managed_policie_arns=[]): - ''' + self.iam = self.session.client("iam") + + def role_stitcher( + self, + role_name, + trust_service, + description, + policies_list=[], + managed_policie_arns=[], + ): + """ Creates a role and attached the policies - will catch errors and skip if role already exists :param role_name: Name of service role to create :param trust_service: Trusted service to associate with the service role @@ -25,9 +31,9 @@ def role_stitcher(self, role_name, trust_service, description, policies_list=[], :param policies_list: List of JSON policies (optional) :param managed_policie_arns: Managed policies to attach (optional) :return: Role ARN is returned - ''' + """ role_arn = None - trust_policy = f'''{{ + trust_policy = f"""{{ "Version": "2012-10-17", "Statement": [{{ "Effect": "Allow", @@ -37,99 +43,83 @@ def role_stitcher(self, role_name, trust_service, description, policies_list=[], "Action": "sts:AssumeRole" }}] }} - ''' + """ try: response = self.iam.create_role( - Path='/', + Path="/", RoleName=role_name, AssumeRolePolicyDocument=trust_policy, - Description=description + Description=description, ) - role_arn = response['Role']['Arn'] + role_arn = response["Role"]["Arn"] p_counter = 1 for policy in policies_list: - response = self.iam.put_role_policy( RoleName=role_name, - PolicyName=f'{role_name}_policy_{p_counter}', - PolicyDocument=policy + PolicyName=f"{role_name}_policy_{p_counter}", + PolicyDocument=policy, ) p_counter = p_counter + 1 for managed_policy_arn in managed_policie_arns: - response = self.iam.attach_role_policy( - PolicyArn=managed_policy_arn, - RoleName=role_name + PolicyArn=managed_policy_arn, RoleName=role_name ) - logger.info(f'Role {role_name} created') + logger.info(f"Role {role_name} created") return role_arn except Exception as e: - if 'EntityAlreadyExists' in str(e): - logger.info(f'Role {role_name} not created - already exists') - response = self.iam.get_role( - RoleName=role_name - ) - role_arn = response['Role']['Arn'] + if "EntityAlreadyExists" in str(e): + logger.info(f"Role {role_name} not created - already exists") + response = self.iam.get_role(RoleName=role_name) + role_arn = response["Role"]["Arn"] return role_arn else: raise def delete_role(self, role_name): - ''' + """ Delete a role :param role_name: name of the role to delete :return: None - ''' + """ try: - response = self.iam.list_role_policies( - RoleName=role_name - ) + response = self.iam.list_role_policies(RoleName=role_name) - for policy in response['PolicyNames']: - self.iam.delete_role_policy( - RoleName=role_name, - PolicyName=policy - ) + for policy in response["PolicyNames"]: + self.iam.delete_role_policy(RoleName=role_name, PolicyName=policy) - response = self.iam.list_attached_role_policies( - RoleName=role_name - ) + response = self.iam.list_attached_role_policies(RoleName=role_name) - for policy in response['AttachedPolicies']: + for policy in response["AttachedPolicies"]: self.iam.detach_role_policy( - RoleName=role_name, - PolicyArn=policy['PolicyArn'] - ) + RoleName=role_name, PolicyArn=policy["PolicyArn"] + ) - logger.info(f'Policies detached from role {role_name}.') + logger.info(f"Policies detached from role {role_name}.") - response = self.iam.delete_role( - RoleName=role_name - ) - logger.info(f'Role {role_name} deleted.') + response = self.iam.delete_role(RoleName=role_name) + logger.info(f"Role {role_name} deleted.") except Exception as e: - if 'NoSuchEntity' in str(e): - logger.info(f'Role {role_name} missing, skipping...') + if "NoSuchEntity" in str(e): + logger.info(f"Role {role_name} missing, skipping...") else: raise def delete_instance_profile(self, instance_profile_name): - try: - self.iam.delete_instance_profile( - InstanceProfileName=instance_profile_name - ) + self.iam.delete_instance_profile(InstanceProfileName=instance_profile_name) logger.info(f"Instance profile {instance_profile_name} deleted.") except Exception as e: - if 'NoSuchEntity' in str(e): - logger.info(f"Instance profile {instance_profile_name} missing, skipping...") + if "NoSuchEntity" in str(e): + logger.info( + f"Instance profile {instance_profile_name} missing, skipping..." + ) else: raise @@ -139,21 +129,21 @@ def remove_role_from_instance_profile(self, instance_profile_name): InstanceProfileName=instance_profile_name ) - for role in response['InstanceProfile']['Roles']: + for role in response["InstanceProfile"]["Roles"]: response = self.iam.remove_role_from_instance_profile( - InstanceProfileName=instance_profile_name, - RoleName=role['RoleName'] + InstanceProfileName=instance_profile_name, RoleName=role["RoleName"] ) logger.info(f"Roles removed from instance profile {instance_profile_name}") except Exception as e: - if 'NoSuchEntity' in str(e): - logger.info(f"Instance profile {instance_profile_name} does not exist. Skipping...") + if "NoSuchEntity" in str(e): + logger.info( + f"Instance profile {instance_profile_name} does not exist. Skipping..." + ) else: raise -class AwsJobBase(): - +class AwsJobBase: logger.propagate = False def __init__(self, job_identifier, aws_config, boto3_session): @@ -161,69 +151,86 @@ def __init__(self, job_identifier, aws_config, boto3_session): self.session = boto3_session self.iam_helper = AWSIAMHelper(self.session) self.iam = self.iam_helper.iam - self.s3 = self.session.client('s3') + self.s3 = self.session.client("s3") self.job_identifier = job_identifier - self.account = self.session.client('sts').get_caller_identity().get('Account') - self.region = aws_config['region'] - self.operator_email = aws_config['notifications_email'] + self.account = self.session.client("sts").get_caller_identity().get("Account") + self.region = aws_config["region"] + self.operator_email = aws_config["notifications_email"] # S3 - self.s3_bucket = aws_config['s3']['bucket'] + self.s3_bucket = aws_config["s3"]["bucket"] self.s3_bucket_arn = f"arn:aws:s3:::{self.s3_bucket}" - self.s3_bucket_prefix = aws_config['s3']['prefix'].rstrip('/') - self.s3_lambda_code_emr_cluster_key = f'{self.s3_bucket_prefix}/lambda_functions/emr_function.py.zip' - self.s3_lambda_emr_config_key = f'{self.s3_bucket_prefix}/lambda_functions/emr_config.json' - self.s3_emr_folder_name = 'emr' + self.s3_bucket_prefix = aws_config["s3"]["prefix"].rstrip("/") + self.s3_lambda_code_emr_cluster_key = ( + f"{self.s3_bucket_prefix}/lambda_functions/emr_function.py.zip" + ) + self.s3_lambda_emr_config_key = ( + f"{self.s3_bucket_prefix}/lambda_functions/emr_config.json" + ) + self.s3_emr_folder_name = "emr" # EMR - emr_config = aws_config.get('emr', {}) - self.emr_manager_instance_type = emr_config.get('manager_instance_type', 'm5.4xlarge') - self.emr_worker_instance_type = emr_config.get('worker_instance_type', 'r5.4xlarge') - self.emr_worker_instance_count = emr_config.get('worker_instance_count', 4) - self.emr_cluster_security_group_name = f'{self.job_identifier}_emr_security_group' - self.emr_cluster_name = f'{self.job_identifier}_emr_dask_cluster' - self.emr_job_flow_role_name = f'{self.job_identifier}_emr_job_flow_role' - self.emr_job_flow_role_arn = '' - self.emr_service_role_name = f'{self.job_identifier}_emr_service_role' - self.emr_service_role_arn = '' - self.emr_cluster_security_group_id = '' - self.emr_log_uri = f's3://{self.s3_bucket}/{self.s3_bucket_prefix}/emrlogs/' - self.emr_instance_profile_name = f'{self.job_identifier}_emr_instance_profile' + emr_config = aws_config.get("emr", {}) + self.emr_manager_instance_type = emr_config.get( + "manager_instance_type", "m5.4xlarge" + ) + self.emr_worker_instance_type = emr_config.get( + "worker_instance_type", "r5.4xlarge" + ) + self.emr_worker_instance_count = emr_config.get("worker_instance_count", 4) + self.emr_cluster_security_group_name = ( + f"{self.job_identifier}_emr_security_group" + ) + self.emr_cluster_name = f"{self.job_identifier}_emr_dask_cluster" + self.emr_job_flow_role_name = f"{self.job_identifier}_emr_job_flow_role" + self.emr_job_flow_role_arn = "" + self.emr_service_role_name = f"{self.job_identifier}_emr_service_role" + self.emr_service_role_arn = "" + self.emr_cluster_security_group_id = "" + self.emr_log_uri = f"s3://{self.s3_bucket}/{self.s3_bucket_prefix}/emrlogs/" + self.emr_instance_profile_name = f"{self.job_identifier}_emr_instance_profile" # Lambda - self.lambda_emr_job_step_execution_role = f'{self.job_identifier}_emr_job_step_execution_role' - self.lambda_emr_job_step_function_name = f'{self.job_identifier}_emr_job_step_submission' - self.lambda_emr_job_step_execution_role_arn = '' + self.lambda_emr_job_step_execution_role = ( + f"{self.job_identifier}_emr_job_step_execution_role" + ) + self.lambda_emr_job_step_function_name = ( + f"{self.job_identifier}_emr_job_step_submission" + ) + self.lambda_emr_job_step_execution_role_arn = "" # Batch self.batch_compute_environment_name = f"computeenvionment_{self.job_identifier}" - self.batch_compute_environment_ami = 'ami-0184013939261b626' + self.batch_compute_environment_ami = "ami-0184013939261b626" self.batch_job_queue_name = f"job_queue_{self.job_identifier}" self.batch_service_role_name = f"batch_service_role_{self.job_identifier}" self.batch_instance_role_name = f"batch_instance_role_{self.job_identifier}" - self.batch_instance_profile_name = f"batch_instance_profile_{self.job_identifier}" + self.batch_instance_profile_name = ( + f"batch_instance_profile_{self.job_identifier}" + ) self.batch_spot_service_role_name = f"spot_fleet_role_{self.job_identifier}" self.batch_ecs_task_role_name = f"ecs_task_role_{self.job_identifier}" self.batch_task_policy_name = f"ecs_task_policy_{self.job_identifier}" - self.batch_use_spot = aws_config.get('use_spot', True) - self.batch_spot_bid_percent = aws_config.get('spot_bid_percent', 100) + self.batch_use_spot = aws_config.get("use_spot", True) + self.batch_spot_bid_percent = aws_config.get("spot_bid_percent", 100) # Step Functions self.state_machine_name = f"{self.job_identifier}_state_machine" self.state_machine_role_name = f"{self.job_identifier}_state_machine_role" # SNS - self.sns_state_machine_topic = f"{self.job_identifier}_state_machine_notifications" + self.sns_state_machine_topic = ( + f"{self.job_identifier}_state_machine_notifications" + ) # VPC self.vpc_name = self.job_identifier - self.vpc_id = '' # will be available after VPC creation - self.priv_subnet_cidr_1 = '' # will be available after VPC creation - self.priv_vpc_subnet_id_1 = 'REPL' # will be available after VPC creation - self.priv_vpc_subnet_id_2 = 'REPL' # will be available after VPC creation + self.vpc_id = "" # will be available after VPC creation + self.priv_subnet_cidr_1 = "" # will be available after VPC creation + self.priv_vpc_subnet_id_1 = "REPL" # will be available after VPC creation + self.priv_vpc_subnet_id_2 = "REPL" # will be available after VPC creation def __repr__(self): - return f""" Job Identifier: {self.job_identifier} S3 Bucket for Source Data: {self.s3_bucket} diff --git a/buildstockbatch/aws/s3_assets/bsb_post.py b/buildstockbatch/aws/s3_assets/bsb_post.py index 9d2c8b3d..5449dada 100644 --- a/buildstockbatch/aws/s3_assets/bsb_post.py +++ b/buildstockbatch/aws/s3_assets/bsb_post.py @@ -5,65 +5,67 @@ import json from s3fs import S3FileSystem -from postprocessing import combine_results, create_athena_tables, remove_intermediate_files +from postprocessing import ( + combine_results, + create_athena_tables, + remove_intermediate_files, +) def do_postprocessing(s3_bucket, s3_bucket_prefix): - fs = S3FileSystem() - with fs.open(f'{s3_bucket}/{s3_bucket_prefix}/config.json', 'r') as f: + with fs.open(f"{s3_bucket}/{s3_bucket_prefix}/config.json", "r") as f: cfg = json.load(f) - ec2 = boto3.client('ec2') + ec2 = boto3.client("ec2") - with open('/mnt/var/lib/info/job-flow.json', 'r') as f: + with open("/mnt/var/lib/info/job-flow.json", "r") as f: job_flow_info = json.load(f) - for instance_group in job_flow_info['instanceGroups']: - if instance_group['instanceRole'].lower() == 'core': - instance_type = instance_group['instanceType'] - instance_count = instance_group['requestedInstanceCount'] + for instance_group in job_flow_info["instanceGroups"]: + if instance_group["instanceRole"].lower() == "core": + instance_type = instance_group["instanceType"] + instance_count = instance_group["requestedInstanceCount"] instance_info = ec2.describe_instance_types(InstanceTypes=[instance_type]) - dask_worker_vcores = cfg['aws'].get('emr', {}).get('dask_worker_vcores', 2) - instance_memory = instance_info['InstanceTypes'][0]['MemoryInfo']['SizeInMiB'] - instance_ncpus = instance_info['InstanceTypes'][0]['VCpuInfo']['DefaultVCpus'] + dask_worker_vcores = cfg["aws"].get("emr", {}).get("dask_worker_vcores", 2) + instance_memory = instance_info["InstanceTypes"][0]["MemoryInfo"]["SizeInMiB"] + instance_ncpus = instance_info["InstanceTypes"][0]["VCpuInfo"]["DefaultVCpus"] n_dask_workers = instance_count * instance_ncpus // dask_worker_vcores worker_memory = round(instance_memory / instance_ncpus * dask_worker_vcores * 0.95) cluster = YarnCluster( - deploy_mode='local', + deploy_mode="local", worker_vcores=dask_worker_vcores, - worker_memory='{} MiB'.format(worker_memory), - n_workers=n_dask_workers + worker_memory="{} MiB".format(worker_memory), + n_workers=n_dask_workers, ) client = Client(cluster) # noqa E841 - results_s3_loc = f'{s3_bucket}/{s3_bucket_prefix}/results' + results_s3_loc = f"{s3_bucket}/{s3_bucket_prefix}/results" combine_results(fs, results_s3_loc, cfg) - aws_conf = cfg.get('postprocessing', {}).get('aws', {}) - if 'athena' in aws_conf: - tbl_prefix = s3_bucket_prefix.split('/')[-1] + aws_conf = cfg.get("postprocessing", {}).get("aws", {}) + if "athena" in aws_conf: + tbl_prefix = s3_bucket_prefix.split("/")[-1] if not tbl_prefix: - tbl_prefix = cfg['aws']['job_identifier'] + tbl_prefix = cfg["aws"]["job_identifier"] create_athena_tables( - aws_conf, - tbl_prefix, - s3_bucket, - f'{s3_bucket_prefix}/results/parquet' + aws_conf, tbl_prefix, s3_bucket, f"{s3_bucket_prefix}/results/parquet" ) - keep_individual_timeseries = cfg.get('postprocessing', {}).get('keep_individual_timeseries', False) + keep_individual_timeseries = cfg.get("postprocessing", {}).get( + "keep_individual_timeseries", False + ) remove_intermediate_files(fs, results_s3_loc, keep_individual_timeseries) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('s3_bucket') - parser.add_argument('s3_bucket_prefix') + parser.add_argument("s3_bucket") + parser.add_argument("s3_bucket_prefix") args = parser.parse_args() do_postprocessing(args.s3_bucket, args.s3_bucket_prefix) diff --git a/buildstockbatch/aws/s3_assets/lambda_function.py b/buildstockbatch/aws/s3_assets/lambda_function.py index d8c9c3d5..8b742846 100644 --- a/buildstockbatch/aws/s3_assets/lambda_function.py +++ b/buildstockbatch/aws/s3_assets/lambda_function.py @@ -11,11 +11,11 @@ def lambda_handler(event, context): # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/emr.html - session = boto3.Session(region_name=os.environ['REGION']) + session = boto3.Session(region_name=os.environ["REGION"]) - s3 = session.client('s3') + s3 = session.client("s3") with io.BytesIO() as f: - s3.download_fileobj(os.environ['BUCKET'], os.environ['EMR_CONFIG_JSON_KEY'], f) + s3.download_fileobj(os.environ["BUCKET"], os.environ["EMR_CONFIG_JSON_KEY"], f) args = json.loads(f.getvalue()) emr = session.client("emr") diff --git a/buildstockbatch/aws/s3_assets/setup_postprocessing.py b/buildstockbatch/aws/s3_assets/setup_postprocessing.py index 9fbbc703..6700198a 100644 --- a/buildstockbatch/aws/s3_assets/setup_postprocessing.py +++ b/buildstockbatch/aws/s3_assets/setup_postprocessing.py @@ -1,16 +1,16 @@ from setuptools import setup setup( - name='buildstockbatch-postprocessing', - version='0.1', - description='Just the stand alone postprocessing functions from Buildstock-Batch', - py_modules=['postprocessing'], + name="buildstockbatch-postprocessing", + version="0.1", + description="Just the stand alone postprocessing functions from Buildstock-Batch", + py_modules=["postprocessing"], install_requires=[ - 'dask[complete]>=2022.10.0', - 's3fs>=0.4.2,<0.5.0', - 'boto3', - 'pandas>=1.0.0,!=1.0.4', - 'pyarrow>=3.0.0', - 'numpy>=1.20.0' - ] + "dask[complete]>=2022.10.0", + "s3fs>=0.4.2,<0.5.0", + "boto3", + "pandas>=1.0.0,!=1.0.4", + "pyarrow>=3.0.0", + "numpy>=1.20.0", + ], ) diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py index 99d44a1d..97fe0d66 100644 --- a/buildstockbatch/base.py +++ b/buildstockbatch/base.py @@ -30,11 +30,7 @@ import pprint from buildstockbatch.__version__ import __schema_version__ -from buildstockbatch import ( - sampler, - workflow_generator, - postprocessing -) +from buildstockbatch import sampler, workflow_generator, postprocessing from buildstockbatch.exc import SimulationExists, ValidationError from buildstockbatch.utils import path_rel_to_file, get_project_configuration, read_csv from buildstockbatch.__version__ import __version__ as bsb_version @@ -43,19 +39,18 @@ class BuildStockBatchBase(object): - # http://openstudio-builds.s3-website-us-east-1.amazonaws.com - DEFAULT_OS_VERSION = '3.6.1' - DEFAULT_OS_SHA = 'bb9481519e' + DEFAULT_OS_VERSION = "3.6.1" + DEFAULT_OS_SHA = "bb9481519e" CONTAINER_RUNTIME = None - LOGO = ''' + LOGO = """ _ __ _ __, _ __ ( / ) o // /( _/_ / ( / ) _/_ / /--< , ,, // __/ `. / __ _, /< /--< __, / _, / /___/(_/_(_(/_(_/_(___)(__(_)(__/ |_/___/(_/(_(__(__/ /_ Executing BuildStock projects with grace since 2018 -''' +""" def __init__(self, project_filename): self.project_filename = os.path.abspath(project_filename) @@ -63,35 +58,47 @@ def __init__(self, project_filename): # Load project file to self.cfg self.cfg = get_project_configuration(project_filename) - self.buildstock_dir = self.cfg['buildstock_directory'] + self.buildstock_dir = self.cfg["buildstock_directory"] if not os.path.isdir(self.buildstock_dir): - raise FileNotFoundError(f'buildstock_directory = {self.buildstock_dir} is not a directory.') - self.project_dir = os.path.join(self.buildstock_dir, self.cfg['project_directory']) + raise FileNotFoundError( + f"buildstock_directory = {self.buildstock_dir} is not a directory." + ) + self.project_dir = os.path.join( + self.buildstock_dir, self.cfg["project_directory"] + ) if not os.path.isdir(self.project_dir): - raise FileNotFoundError(f'project_directory = {self.project_dir} is not a directory.') + raise FileNotFoundError( + f"project_directory = {self.project_dir} is not a directory." + ) # Load in OS_VERSION and OS_SHA arguments if they exist in the YAML, # otherwise use defaults specified here. - self.os_version = self.cfg.get('os_version', self.DEFAULT_OS_VERSION) - self.os_sha = self.cfg.get('os_sha', self.DEFAULT_OS_SHA) - logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}") + self.os_version = self.cfg.get("os_version", self.DEFAULT_OS_VERSION) + self.os_sha = self.cfg.get("os_sha", self.DEFAULT_OS_SHA) + logger.debug( + f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}" + ) @staticmethod def get_sampler_class(sampler_name): - sampler_class_name = ''.join(x.capitalize() for x in sampler_name.strip().split('_')) + 'Sampler' + sampler_class_name = ( + "".join(x.capitalize() for x in sampler_name.strip().split("_")) + "Sampler" + ) return getattr(sampler, sampler_class_name) @staticmethod def get_workflow_generator_class(workflow_generator_name): - workflow_generator_class_name = \ - ''.join(x.capitalize() for x in workflow_generator_name.strip().split('_')) + 'WorkflowGenerator' + workflow_generator_class_name = ( + "".join(x.capitalize() for x in workflow_generator_name.strip().split("_")) + + "WorkflowGenerator" + ) return getattr(workflow_generator, workflow_generator_class_name) @property def sampler(self): # Select a sampler - Sampler = self.get_sampler_class(self.cfg['sampler']['type']) - return Sampler(self, **self.cfg['sampler'].get('args', {})) + Sampler = self.get_sampler_class(self.cfg["sampler"]["type"]) + return Sampler(self, **self.cfg["sampler"].get("args", {})) @staticmethod def openstudio_exe(): @@ -101,22 +108,24 @@ def path_rel_to_projectfile(self, x): return path_rel_to_file(self.project_filename, x) def _get_weather_files(self): - if 'weather_files_path' in self.cfg: - logger.debug('Copying weather files') - weather_file_path = self.cfg['weather_files_path'] - with zipfile.ZipFile(weather_file_path, 'r') as zf: - logger.debug('Extracting weather files to: {}'.format(self.weather_dir)) + if "weather_files_path" in self.cfg: + logger.debug("Copying weather files") + weather_file_path = self.cfg["weather_files_path"] + with zipfile.ZipFile(weather_file_path, "r") as zf: + logger.debug("Extracting weather files to: {}".format(self.weather_dir)) zf.extractall(self.weather_dir) else: - logger.debug('Downloading weather files') - r = requests.get(self.cfg['weather_files_url'], stream=True) - with tempfile.TemporaryFile(dir=os.environ.get('LOCAL_SCRATCH')) as f: + logger.debug("Downloading weather files") + r = requests.get(self.cfg["weather_files_url"], stream=True) + with tempfile.TemporaryFile(dir=os.environ.get("LOCAL_SCRATCH")) as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.seek(0) - with zipfile.ZipFile(f, 'r') as zf: - logger.debug('Extracting weather files to: {}'.format(self.weather_dir)) + with zipfile.ZipFile(f, "r") as zf: + logger.debug( + "Extracting weather files to: {}".format(self.weather_dir) + ) zf.extractall(self.weather_dir) @property @@ -133,13 +142,17 @@ def output_dir(self): @property def skip_baseline_sims(self): - baseline_skip = self.cfg['baseline'].get('skip_sims', False) + baseline_skip = self.cfg["baseline"].get("skip_sims", False) return baseline_skip @classmethod def get_reporting_measures(cls, cfg): - WorkflowGenerator = cls.get_workflow_generator_class(cfg['workflow_generator']['type']) - wg = WorkflowGenerator(cfg, 1) # Number of datapoints doesn't really matter here + WorkflowGenerator = cls.get_workflow_generator_class( + cfg["workflow_generator"]["type"] + ) + wg = WorkflowGenerator( + cfg, 1 + ) # Number of datapoints doesn't really matter here return wg.reporting_measures() def run_batch(self): @@ -147,22 +160,34 @@ def run_batch(self): @classmethod def create_osw(cls, cfg, n_datapoints, *args, **kwargs): - WorkflowGenerator = cls.get_workflow_generator_class(cfg['workflow_generator']['type']) + WorkflowGenerator = cls.get_workflow_generator_class( + cfg["workflow_generator"]["type"] + ) osw_generator = WorkflowGenerator(cfg, n_datapoints) return osw_generator.create_osw(*args, **kwargs) @staticmethod def make_sim_dir(building_id, upgrade_idx, base_dir, overwrite_existing=False): real_upgrade_idx = 0 if upgrade_idx is None else upgrade_idx + 1 - sim_id = 'bldg{:07d}up{:02d}'.format(building_id, real_upgrade_idx) + sim_id = "bldg{:07d}up{:02d}".format(building_id, real_upgrade_idx) # Check to see if the simulation is done already and skip it if so. - sim_dir = os.path.join(base_dir, 'up{:02d}'.format(real_upgrade_idx), 'bldg{:07d}'.format(building_id)) + sim_dir = os.path.join( + base_dir, + "up{:02d}".format(real_upgrade_idx), + "bldg{:07d}".format(building_id), + ) if os.path.exists(sim_dir) and not overwrite_existing: - if os.path.exists(os.path.join(sim_dir, 'run', 'finished.job')): - raise SimulationExists('{} exists and finished successfully'.format(sim_id), sim_id, sim_dir) - elif os.path.exists(os.path.join(sim_dir, 'run', 'failed.job')): - raise SimulationExists('{} exists and failed'.format(sim_id), sim_id, sim_dir) + if os.path.exists(os.path.join(sim_dir, "run", "finished.job")): + raise SimulationExists( + "{} exists and finished successfully".format(sim_id), + sim_id, + sim_dir, + ) + elif os.path.exists(os.path.join(sim_dir, "run", "failed.job")): + raise SimulationExists( + "{} exists and failed".format(sim_id), sim_id, sim_dir + ) else: shutil.rmtree(sim_dir) @@ -189,48 +214,56 @@ def cleanup_sim_dir(sim_dir, dest_fs, simout_ts_dir, upgrade_id, building_id): # Convert the timeseries data to parquet # and copy it to the results directory - timeseries_filepath = os.path.join(sim_dir, 'run', 'results_timeseries.csv') + timeseries_filepath = os.path.join(sim_dir, "run", "results_timeseries.csv") # FIXME: Allowing both names here for compatibility. Should consolidate on one timeseries filename. if os.path.isfile(timeseries_filepath): units_dict = read_csv(timeseries_filepath, nrows=1).transpose().to_dict()[0] skiprows = [1] else: - timeseries_filepath = os.path.join(sim_dir, 'run', 'enduse_timeseries.csv') + timeseries_filepath = os.path.join(sim_dir, "run", "enduse_timeseries.csv") units_dict = {} skiprows = [] - schedules_filepath = '' - if os.path.isdir(os.path.join(sim_dir, 'generated_files')): - for file in os.listdir(os.path.join(sim_dir, 'generated_files')): + schedules_filepath = "" + if os.path.isdir(os.path.join(sim_dir, "generated_files")): + for file in os.listdir(os.path.join(sim_dir, "generated_files")): if re.match(r".*schedules.*\.csv", file): - schedules_filepath = os.path.join(sim_dir, 'generated_files', file) + schedules_filepath = os.path.join(sim_dir, "generated_files", file) if os.path.isfile(timeseries_filepath): # Find the time columns present in the enduse_timeseries file - possible_time_cols = ['time', 'Time', 'TimeDST', 'TimeUTC'] - cols = read_csv(timeseries_filepath, index_col=False, nrows=0).columns.tolist() + possible_time_cols = ["time", "Time", "TimeDST", "TimeUTC"] + cols = read_csv( + timeseries_filepath, index_col=False, nrows=0 + ).columns.tolist() actual_time_cols = [c for c in cols if c in possible_time_cols] if not actual_time_cols: - logger.error(f'Did not find any time column ({possible_time_cols}) in {timeseries_filepath}.') - raise RuntimeError(f'Did not find any time column ({possible_time_cols}) in {timeseries_filepath}.') + logger.error( + f"Did not find any time column ({possible_time_cols}) in {timeseries_filepath}." + ) + raise RuntimeError( + f"Did not find any time column ({possible_time_cols}) in {timeseries_filepath}." + ) - tsdf = read_csv(timeseries_filepath, parse_dates=actual_time_cols, skiprows=skiprows) + tsdf = read_csv( + timeseries_filepath, parse_dates=actual_time_cols, skiprows=skiprows + ) if os.path.isfile(schedules_filepath): schedules = read_csv(schedules_filepath, dtype=np.float64) - schedules.rename(columns=lambda x: f'schedules_{x}', inplace=True) - schedules['TimeDST'] = tsdf['Time'] - tsdf = tsdf.merge(schedules, how='left', on='TimeDST') + schedules.rename(columns=lambda x: f"schedules_{x}", inplace=True) + schedules["TimeDST"] = tsdf["Time"] + tsdf = tsdf.merge(schedules, how="left", on="TimeDST") def get_clean_column_name(x): - """" + """ " Will rename column names like End Use: Natural Gas: Range/Oven to end_use__natural_gas__range_oven__kbtu to play nice with Athena """ unit = units_dict.get(x) # missing units (e.g. for time) gets nan - unit = unit if isinstance(unit, str) else '' - sepecial_characters = [':', ' ', '/'] + unit = unit if isinstance(unit, str) else "" + sepecial_characters = [":", " ", "/"] for char in sepecial_characters: - x = x.replace(char, '_') + x = x.replace(char, "_") x = x + "__" + unit if unit else x return x.lower() @@ -238,20 +271,23 @@ def get_clean_column_name(x): postprocessing.write_dataframe_as_parquet( tsdf, dest_fs, - f'{simout_ts_dir}/up{upgrade_id:02d}/bldg{building_id:07d}.parquet' + f"{simout_ts_dir}/up{upgrade_id:02d}/bldg{building_id:07d}.parquet", ) # Remove files already in data_point.zip - zipfilename = os.path.join(sim_dir, 'run', 'data_point.zip') + zipfilename = os.path.join(sim_dir, "run", "data_point.zip") if os.path.isfile(zipfilename): - with zipfile.ZipFile(zipfilename, 'r') as zf: + with zipfile.ZipFile(zipfilename, "r") as zf: for filename in zf.namelist(): - for filepath in (os.path.join(sim_dir, 'run', filename), os.path.join(sim_dir, filename)): + for filepath in ( + os.path.join(sim_dir, "run", filename), + os.path.join(sim_dir, filename), + ): if os.path.exists(filepath): os.remove(filepath) # Remove reports dir - reports_dir = os.path.join(sim_dir, 'reports') + reports_dir = os.path.join(sim_dir, "reports") if os.path.isdir(reports_dir): shutil.rmtree(reports_dir, ignore_errors=True) @@ -270,7 +306,7 @@ def validate_project(cls, project_file): assert cls.validate_resstock_or_comstock_version(project_file) assert cls.validate_openstudio_version(project_file) assert cls.validate_number_of_options(project_file) - logger.info('Base Validation Successful') + logger.info("Base Validation Successful") return True @staticmethod @@ -279,26 +315,32 @@ def get_buildstock_dir(project_file, cfg): if os.path.isabs(buildstock_dir): return os.path.abspath(buildstock_dir) else: - return os.path.abspath(os.path.join(os.path.dirname(project_file), buildstock_dir)) + return os.path.abspath( + os.path.join(os.path.dirname(project_file), buildstock_dir) + ) @classmethod def validate_openstudio_path(cls, project_file): cfg = get_project_configuration(project_file) - os_version = cfg.get('os_version', cls.DEFAULT_OS_VERSION) - os_sha = cfg.get('os_sha', cls.DEFAULT_OS_SHA) + os_version = cfg.get("os_version", cls.DEFAULT_OS_VERSION) + os_sha = cfg.get("os_sha", cls.DEFAULT_OS_SHA) try: proc_out = subprocess.run( [cls.openstudio_exe(), "openstudio_version"], capture_output=True, - text=True + text=True, ) except FileNotFoundError: raise ValidationError(f"Cannot find openstudio at `{cls.openstudio_exe()}`") if proc_out.returncode != 0: - raise ValidationError(f"OpenStudio failed with the following error {proc_out.stderr}") + raise ValidationError( + f"OpenStudio failed with the following error {proc_out.stderr}" + ) actual_os_version, actual_os_sha = proc_out.stdout.strip().split("+") if os_version != actual_os_version: - raise ValidationError(f"OpenStudio version is {actual_os_version}, expected is {os_version}") + raise ValidationError( + f"OpenStudio version is {actual_os_version}, expected is {os_version}" + ) if os_sha != actual_os_sha: raise ValidationError( f"OpenStudio version is correct at {os_version}, but the shas don't match. " @@ -309,21 +351,23 @@ def validate_openstudio_path(cls, project_file): @staticmethod def validate_sampler(project_file): cfg = get_project_configuration(project_file) - sampler_name = cfg['sampler']['type'] + sampler_name = cfg["sampler"]["type"] try: Sampler = BuildStockBatchBase.get_sampler_class(sampler_name) except AttributeError: - raise ValidationError(f'Sampler class `{sampler_name}` is not available.') - args = cfg['sampler']['args'] + raise ValidationError(f"Sampler class `{sampler_name}` is not available.") + args = cfg["sampler"]["args"] Sampler.validate_args(project_file, **args) if issubclass(Sampler, sampler.PrecomputedSampler): - sample_file = cfg['sampler']['args']['sample_file'] + sample_file = cfg["sampler"]["args"]["sample_file"] if not os.path.isabs(sample_file): sample_file = os.path.join(os.path.dirname(project_file), sample_file) else: sample_file = os.path.abspath(sample_file) buildstock_df = read_csv(sample_file, dtype=str) - return BuildStockBatchBase.validate_buildstock_csv(project_file, buildstock_df) + return BuildStockBatchBase.validate_buildstock_csv( + project_file, buildstock_df + ) return True @staticmethod @@ -333,38 +377,48 @@ def validate_buildstock_csv(project_file, buildstock_df): # param_option_dict has format: {column_name: [valid_option1, valid_option2, ...], ...} errors = [] for column in buildstock_df.columns: - if column in {'Building'}: + if column in {"Building"}: continue if column not in param_option_dict: - errors.append(f'Column {column} in buildstock_csv is not available in options_lookup.tsv') + errors.append( + f"Column {column} in buildstock_csv is not available in options_lookup.tsv" + ) continue if "*" in param_option_dict[column]: continue # skip validating options when wildcard is present for option in buildstock_df[column].unique(): if option not in param_option_dict[column]: - errors.append(f'Option {option} in column {column} of buildstock_csv is not available ' - 'in options_lookup.tsv') + errors.append( + f"Option {option} in column {column} of buildstock_csv is not available " + "in options_lookup.tsv" + ) if errors: - raise ValidationError('\n'.join(errors)) + raise ValidationError("\n".join(errors)) return True @classmethod def validate_workflow_generator(cls, project_file): cfg = get_project_configuration(project_file) - WorkflowGenerator = cls.get_workflow_generator_class(cfg['workflow_generator']['type']) + WorkflowGenerator = cls.get_workflow_generator_class( + cfg["workflow_generator"]["type"] + ) return WorkflowGenerator.validate(cfg) @staticmethod def validate_project_schema(project_file): cfg = get_project_configuration(project_file) - schema_version = cfg.get('schema_version') - version_schema = os.path.join(os.path.dirname(__file__), 'schemas', f'v{schema_version}.yaml') + schema_version = cfg.get("schema_version") + version_schema = os.path.join( + os.path.dirname(__file__), "schemas", f"v{schema_version}.yaml" + ) if not os.path.isfile(version_schema): - logger.error(f'Could not find validation schema for YAML version {schema_version}') + logger.error( + f"Could not find validation schema for YAML version {schema_version}" + ) raise FileNotFoundError(version_schema) schema = yamale.make_schema(version_schema) - data = yamale.make_data(project_file, parser='ruamel') + data = yamale.make_data(project_file, parser="ruamel") return yamale.validate(schema, data, strict=True) @staticmethod @@ -378,22 +432,27 @@ def validate_misc_constraints(project_file): def validate_postprocessing_spec(project_file): cfg = get_project_configuration(project_file) # noqa F841 param_option_dict, _ = BuildStockBatchBase.get_param_option_dict(project_file) - partition_cols = cfg.get('postprocessing', {}).get("partition_columns", []) + partition_cols = cfg.get("postprocessing", {}).get("partition_columns", []) invalid_cols = [c for c in partition_cols if c not in param_option_dict.keys()] if invalid_cols: - raise ValidationError(f"The following partition columns are not valid: {invalid_cols}") + raise ValidationError( + f"The following partition columns are not valid: {invalid_cols}" + ) return True @staticmethod def validate_xor_nor_schema_keys(project_file): cfg = get_project_configuration(project_file) - major, minor = cfg.get('version', __schema_version__).split('.') + major, minor = cfg.get("version", __schema_version__).split(".") if int(major) >= 0: if int(minor) >= 0: # xor - if ('weather_files_url' in cfg.keys()) is \ - ('weather_files_path' in cfg.keys()): - raise ValidationError('Both/neither weather_files_url and weather_files_path found in yaml root') + if ("weather_files_url" in cfg.keys()) is ( + "weather_files_path" in cfg.keys() + ): + raise ValidationError( + "Both/neither weather_files_url and weather_files_path found in yaml root" + ) return True @@ -402,25 +461,34 @@ def get_param_option_dict(project_file): cfg = get_project_configuration(project_file) param_option_dict = defaultdict(set) buildstock_dir = BuildStockBatchBase.get_buildstock_dir(project_file, cfg) - options_lookup_path = f'{buildstock_dir}/resources/options_lookup.tsv' + options_lookup_path = f"{buildstock_dir}/resources/options_lookup.tsv" # fill in the param_option_dict with {'param1':['valid_option1','valid_option2' ...]} from options_lookup.tsv try: - with open(options_lookup_path, 'r') as f: - options = csv.DictReader(f, delimiter='\t') - invalid_options_lookup_str = '' # Holds option/parameter names with invalid characters + with open(options_lookup_path, "r") as f: + options = csv.DictReader(f, delimiter="\t") + invalid_options_lookup_str = ( + "" # Holds option/parameter names with invalid characters + ) for row in options: - for col in ['Parameter Name', 'Option Name']: - invalid_chars = set(row[col]).intersection(set('|&()')) - invalid_chars = ''.join(invalid_chars) + for col in ["Parameter Name", "Option Name"]: + invalid_chars = set(row[col]).intersection(set("|&()")) + invalid_chars = "".join(invalid_chars) if invalid_chars: invalid_options_lookup_str += f"{col}: '{row[col]}', Invalid chars: '{invalid_chars}' \n" - param_name, opt_name = row['Parameter Name'], row['Option Name'] - param_option_dict[row['Parameter Name']].add(row['Option Name']) - if opt_name == '*' and row['Measure Dir']: - invalid_options_lookup_str += f"{param_name}: '*' cannot pass arguments to measure.\n" - if "*" in param_option_dict[param_name] and len(param_option_dict[param_name]) > 1: - invalid_options_lookup_str += f"{param_name}: '*' cannot be mixed with other options\n" + param_name, opt_name = row["Parameter Name"], row["Option Name"] + param_option_dict[row["Parameter Name"]].add(row["Option Name"]) + if opt_name == "*" and row["Measure Dir"]: + invalid_options_lookup_str += ( + f"{param_name}: '*' cannot pass arguments to measure.\n" + ) + if ( + "*" in param_option_dict[param_name] + and len(param_option_dict[param_name]) > 1 + ): + invalid_options_lookup_str += ( + f"{param_name}: '*' cannot be mixed with other options\n" + ) except FileNotFoundError as err: logger.error(f"Options lookup file not found at: '{options_lookup_path}'") raise err @@ -432,7 +500,10 @@ def validate_options_lookup(project_file): Validates that the parameter|options specified in the project yaml file is available in the options_lookup.tsv """ cfg = get_project_configuration(project_file) - param_option_dict, invalid_options_lookup_str = BuildStockBatchBase.get_param_option_dict(project_file) + ( + param_option_dict, + invalid_options_lookup_str, + ) = BuildStockBatchBase.get_param_option_dict(project_file) invalid_option_spec_counter = Counter() invalid_param_counter = Counter() invalid_option_counter_dict = defaultdict(Counter) @@ -446,44 +517,59 @@ def get_errors(source_str, option_str): :return: returns empty string if the param|option is valid i.e. they are found in options_lookup.tsv if not returns error message, close matches, and specifies where the error occurred (source_str) """ - if '||' in option_str and '&&' in option_str: - invalid_option_spec_counter[(option_str, "has both || and && (not supported)")] += 1 + if "||" in option_str and "&&" in option_str: + invalid_option_spec_counter[ + (option_str, "has both || and && (not supported)") + ] += 1 return "" - if '||' in option_str or '&&' in option_str: - splitter = '||' if '||' in option_str else '&&' - errors = '' + if "||" in option_str or "&&" in option_str: + splitter = "||" if "||" in option_str else "&&" + errors = "" broken_options = option_str.split(splitter) - if broken_options[-1] == '': - invalid_option_spec_counter[(option_str, "has trailing 'splitter'")] += 1 + if broken_options[-1] == "": + invalid_option_spec_counter[ + (option_str, "has trailing 'splitter'") + ] += 1 return "" for broken_option_str in broken_options: new_source_str = source_str + f" in composite option '{option_str}'" errors += get_errors(new_source_str, broken_option_str) return errors - if not option_str or '|' == option_str: + if not option_str or "|" == option_str: return f"* Option name empty. {source_str}\n" try: - parameter_name, option_name = option_str.split('|') + parameter_name, option_name = option_str.split("|") except ValueError: - invalid_option_spec_counter[(option_str, "has has too many or too few '|' (exactly 1 required).")] += 1 + invalid_option_spec_counter[ + ( + option_str, + "has has too many or too few '|' (exactly 1 required).", + ) + ] += 1 return "" if parameter_name not in param_option_dict: - close_match = difflib.get_close_matches(parameter_name, param_option_dict.keys(), 1) + close_match = difflib.get_close_matches( + parameter_name, param_option_dict.keys(), 1 + ) close_match = close_match[0] if close_match else "" invalid_param_counter[(parameter_name, close_match)] += 1 return "" if not option_name or option_name not in param_option_dict[parameter_name]: - close_match = difflib.get_close_matches(option_name, list(param_option_dict[parameter_name]), 1) + close_match = difflib.get_close_matches( + option_name, list(param_option_dict[parameter_name]), 1 + ) close_match = close_match[0] if close_match else "" - invalid_option_counter_dict[parameter_name][(option_name, close_match)] += 1 + invalid_option_counter_dict[parameter_name][ + (option_name, close_match) + ] += 1 return "" - return '' + return "" def get_all_option_str(source_str, inp): """ @@ -499,46 +585,76 @@ def get_all_option_str(source_str, inp): if type(inp) == str: return [(source_str, inp)] elif type(inp) == list: - return sum([get_all_option_str(source_str + f", in entry {count}", entry) for count, entry - in enumerate(inp)], []) + return sum( + [ + get_all_option_str(source_str + f", in entry {count}", entry) + for count, entry in enumerate(inp) + ], + [], + ) elif type(inp) == dict: if len(inp) > 1: - raise ValidationError(f"{source_str} the logic is malformed. Dict can't have more than one entry") + raise ValidationError( + f"{source_str} the logic is malformed. Dict can't have more than one entry" + ) source_str += f", in {list(inp.keys())[0]}" - return sum([get_all_option_str(source_str, i) for i in inp.values()], []) + return sum( + [get_all_option_str(source_str, i) for i in inp.values()], [] + ) # store all of the option_str in the project file as a list of (source_str, option_str) tuple source_option_str_list = [] - if 'upgrades' in cfg: - for upgrade_count, upgrade in enumerate(cfg['upgrades']): - upgrade_name = upgrade.get('upgrade_name', '') + f' (Upgrade Number: {upgrade_count})' + if "upgrades" in cfg: + for upgrade_count, upgrade in enumerate(cfg["upgrades"]): + upgrade_name = ( + upgrade.get("upgrade_name", "") + + f" (Upgrade Number: {upgrade_count})" + ) source_str_upgrade = f"In upgrade '{upgrade_name}'" - for option_count, option in enumerate(upgrade['options']): - option_name = option.get('option', '') + f' (Option Number: {option_count})' - source_str_option = source_str_upgrade + f", in option '{option_name}'" - source_option_str_list.append((source_str_option, option.get('option'))) - if 'apply_logic' in option: + for option_count, option in enumerate(upgrade["options"]): + option_name = ( + option.get("option", "") + f" (Option Number: {option_count})" + ) + source_str_option = ( + source_str_upgrade + f", in option '{option_name}'" + ) + source_option_str_list.append( + (source_str_option, option.get("option")) + ) + if "apply_logic" in option: source_str_logic = source_str_option + ", in apply_logic" - source_option_str_list += get_all_option_str(source_str_logic, option['apply_logic']) + source_option_str_list += get_all_option_str( + source_str_logic, option["apply_logic"] + ) - if 'package_apply_logic' in upgrade: + if "package_apply_logic" in upgrade: source_str_package = source_str_upgrade + ", in package_apply_logic" - source_option_str_list += get_all_option_str(source_str_package, upgrade['package_apply_logic']) + source_option_str_list += get_all_option_str( + source_str_package, upgrade["package_apply_logic"] + ) # TODO: refactor this into Sampler.validate_args - if 'downselect' in cfg or "downselect" in cfg.get('sampler', {}).get('type'): + if "downselect" in cfg or "downselect" in cfg.get("sampler", {}).get("type"): source_str = "In downselect" - logic = cfg['downselect']['logic'] if 'downselect' in cfg else cfg['sampler']['args']['logic'] + logic = ( + cfg["downselect"]["logic"] + if "downselect" in cfg + else cfg["sampler"]["args"]["logic"] + ) source_option_str_list += get_all_option_str(source_str, logic) # Gather all the errors in the option_str, if any - error_message = '' + error_message = "" for source_str, option_str in source_option_str_list: error_message += get_errors(source_str, option_str) if error_message: - error_message = "Following option/parameter entries have problem:\n" + error_message + "\n" + error_message = ( + "Following option/parameter entries have problem:\n" + + error_message + + "\n" + ) if invalid_option_spec_counter: error_message += "* Following option/parameter entries have problem:\n" @@ -546,7 +662,9 @@ def get_all_option_str(source_str, inp): error_message += f" '{invalid_entry}' {error} - used '{count}' times\n" if invalid_param_counter: - error_message += "* Following parameters do not exist in options_lookup.tsv\n" + error_message += ( + "* Following parameters do not exist in options_lookup.tsv\n" + ) for (param, close_match), count in invalid_param_counter.items(): error_message += f" '{param}' - used '{count}' times." if close_match: @@ -565,8 +683,13 @@ def get_all_option_str(source_str, inp): error_message += "\n" if invalid_options_lookup_str: - error_message = "Following option/parameter names(s) have invalid characters in the options_lookup.tsv\n" +\ - invalid_options_lookup_str + "*"*80 + "\n" + error_message + error_message = ( + "Following option/parameter names(s) have invalid characters in the options_lookup.tsv\n" + + invalid_options_lookup_str + + "*" * 80 + + "\n" + + error_message + ) if not error_message: return True else: @@ -589,18 +712,20 @@ def validate_logic(project_file): printer = pprint.PrettyPrinter() def get_option(element): - return element.split('|')[0] if isinstance(element, str) else None + return element.split("|")[0] if isinstance(element, str) else None def get_logic_problems(logic, parent=None): if isinstance(logic, list): all_options = [opt for el in logic if (opt := get_option(el))] problems = [] - if parent in ['not', 'and', None, '&&']: + if parent in ["not", "and", None, "&&"]: for opt, count in Counter(all_options).items(): if count > 1: - parent_name = parent or 'and' - problem_text = f"Option '{opt}' occurs {count} times in a '{parent_name}' block. "\ + parent_name = parent or "and" + problem_text = ( + f"Option '{opt}' occurs {count} times in a '{parent_name}' block. " f"It should occur at max one times. This is the block:\n{printer.pformat(logic)}" + ) if parent is None: problem_text += "\nRemember a list without a parent is considered an 'and' block." problems.append(problem_text) @@ -610,43 +735,64 @@ def get_logic_problems(logic, parent=None): elif isinstance(logic, dict): assert len(logic) == 1 for key, val in logic.items(): - if key not in ['or', 'and', 'not']: - raise ValidationError(f"Invalid key {key}. Only 'or', 'and' and 'not' is allowed.") + if key not in ["or", "and", "not"]: + raise ValidationError( + f"Invalid key {key}. Only 'or', 'and' and 'not' is allowed." + ) return get_logic_problems(val, parent=key) elif isinstance(logic, str): - if '&&' not in logic: + if "&&" not in logic: return [] - entries = logic.split('&&') + entries = logic.split("&&") return get_logic_problems(entries, parent="&&") else: - raise ValidationError(f"Invalid logic element {logic} with type {type(logic)}") + raise ValidationError( + f"Invalid logic element {logic} with type {type(logic)}" + ) all_problems = [] - if 'upgrades' in cfg: - for upgrade_count, upgrade in enumerate(cfg['upgrades']): - upgrade_name = upgrade.get('upgrade_name', '') - source_str_upgrade = f"upgrade '{upgrade_name}' (Upgrade Number:{upgrade_count})" - for option_count, option in enumerate(upgrade['options']): - option_name = option.get('option', '') - source_str_option = source_str_upgrade + f", option '{option_name}' (Option Number:{option_count})" - if 'apply_logic' in option: - if problems := get_logic_problems(option['apply_logic']): - all_problems.append((source_str_option, problems, option['apply_logic'])) - - if 'package_apply_logic' in upgrade: + if "upgrades" in cfg: + for upgrade_count, upgrade in enumerate(cfg["upgrades"]): + upgrade_name = upgrade.get("upgrade_name", "") + source_str_upgrade = ( + f"upgrade '{upgrade_name}' (Upgrade Number:{upgrade_count})" + ) + for option_count, option in enumerate(upgrade["options"]): + option_name = option.get("option", "") + source_str_option = ( + source_str_upgrade + + f", option '{option_name}' (Option Number:{option_count})" + ) + if "apply_logic" in option: + if problems := get_logic_problems(option["apply_logic"]): + all_problems.append( + (source_str_option, problems, option["apply_logic"]) + ) + + if "package_apply_logic" in upgrade: source_str_package = source_str_upgrade + ", in package_apply_logic" - if problems := get_logic_problems(upgrade['package_apply_logic']): - all_problems.append((source_str_package, problems, upgrade['package_apply_logic'])) + if problems := get_logic_problems(upgrade["package_apply_logic"]): + all_problems.append( + ( + source_str_package, + problems, + upgrade["package_apply_logic"], + ) + ) # TODO: refactor this into Sampler.validate_args - if 'downselect' in cfg or "downselect" in cfg.get('sampler', {}).get('type'): + if "downselect" in cfg or "downselect" in cfg.get("sampler", {}).get("type"): source_str = "in downselect logic" - logic = cfg['downselect']['logic'] if 'downselect' in cfg else cfg['sampler']['args']['logic'] + logic = ( + cfg["downselect"]["logic"] + if "downselect" in cfg + else cfg["sampler"]["args"]["logic"] + ) if problems := get_logic_problems(logic): all_problems.append((source_str, problems, logic)) if all_problems: - error_str = '' + error_str = "" for location, problems, logic in all_problems: error_str += f"There are following problems in {location} with this logic\n{printer.pformat(logic)}\n" problem_str = "\n".join(problems) @@ -664,15 +810,15 @@ def validate_measure_references(project_file): cfg = get_project_configuration(project_file) measure_dirs = set() buildstock_dir = BuildStockBatchBase.get_buildstock_dir(project_file, cfg) - options_lookup_path = f'{buildstock_dir}/resources/options_lookup.tsv' + options_lookup_path = f"{buildstock_dir}/resources/options_lookup.tsv" # fill in the param_option_dict with {'param1':['valid_option1','valid_option2' ...]} from options_lookup.tsv try: - with open(options_lookup_path, 'r') as f: - options = csv.DictReader(f, delimiter='\t') + with open(options_lookup_path, "r") as f: + options = csv.DictReader(f, delimiter="\t") for row in options: - if row['Measure Dir']: - measure_dirs.add(row['Measure Dir']) + if row["Measure Dir"]: + measure_dirs.add(row["Measure Dir"]) except FileNotFoundError as err: logger.error(f"Options lookup file not found at: '{options_lookup_path}'") raise err @@ -689,25 +835,29 @@ def get_errors(source_str, measure_str): """ if measure_str not in measure_dirs: closest = difflib.get_close_matches(measure_str, list(measure_dirs)) - return f"Measure directory {measure_str} not found. Closest matches: {closest}" \ + return ( + f"Measure directory {measure_str} not found. Closest matches: {closest}" f" {source_str}\n" - return '' + ) + return "" source_measures_str_list = [] - if 'measures_to_ignore' in cfg['baseline']: + if "measures_to_ignore" in cfg["baseline"]: source_str = "In baseline 'measures_to_ignore'" - for measure_str in cfg['baseline']['measures_to_ignore']: + for measure_str in cfg["baseline"]["measures_to_ignore"]: source_measures_str_list.append((source_str, measure_str)) - error_message = '' + error_message = "" for source_str, measure_str in source_measures_str_list: error_message += get_errors(source_str, measure_str) if not error_message: return True else: - error_message = 'Measure name(s)/directory(ies) is(are) invalid. \n' + error_message + error_message = ( + "Measure name(s)/directory(ies) is(are) invalid. \n" + error_message + ) logger.error(error_message) raise ValidationError(error_message) @@ -720,19 +870,23 @@ def validate_reference_scenario(project_file): # collect all upgrade_names upgrade_names = set() - for upgrade_count, upgrade in enumerate(cfg.get('upgrades', [])): - upgrade_names.add(upgrade.get('upgrade_name', '')) + for upgrade_count, upgrade in enumerate(cfg.get("upgrades", [])): + upgrade_names.add(upgrade.get("upgrade_name", "")) warning_string = "" # check if the reference_scenario matches with any upgrade_names - for upgrade_count, upgrade in enumerate(cfg.get('upgrades', [])): - if 'reference_scenario' in upgrade: - if upgrade['reference_scenario'] not in upgrade_names: - warning_string += f"* In Upgrade '{upgrade.get('upgrade_name', '')}', reference_scenario: " \ + for upgrade_count, upgrade in enumerate(cfg.get("upgrades", [])): + if "reference_scenario" in upgrade: + if upgrade["reference_scenario"] not in upgrade_names: + warning_string += ( + f"* In Upgrade '{upgrade.get('upgrade_name', '')}', reference_scenario: " f"'{upgrade['reference_scenario']}' does not match any existing upgrade names \n" - elif upgrade['reference_scenario'] == upgrade.get('upgrade_name', ''): - warning_string += f"* In Upgrade '{upgrade.get('upgrade_name', '')}', reference_scenario: " \ + ) + elif upgrade["reference_scenario"] == upgrade.get("upgrade_name", ""): + warning_string += ( + f"* In Upgrade '{upgrade.get('upgrade_name', '')}', reference_scenario: " f"'{upgrade['reference_scenario']}' points to the same upgrade \n" + ) if warning_string: logger.warning(warning_string) @@ -746,22 +900,30 @@ def validate_resstock_or_comstock_version(project_file): """ cfg = get_project_configuration(project_file) - buildstock_rb = os.path.join(cfg['buildstock_directory'], 'resources/buildstock.rb') + buildstock_rb = os.path.join( + cfg["buildstock_directory"], "resources/buildstock.rb" + ) if os.path.exists(buildstock_rb): - with open(buildstock_rb, 'r') as f: + with open(buildstock_rb, "r") as f: versions = dict( - re.findall(r"^\s*(ResStock|ComStock|BuildStockBatch)_Version\s*=\s*'(.+)'", f.read(), re.MULTILINE) + re.findall( + r"^\s*(ResStock|ComStock|BuildStockBatch)_Version\s*=\s*'(.+)'", + f.read(), + re.MULTILINE, + ) ) - BuildStockBatch_Version = semver.Version.parse(versions['BuildStockBatch']) + BuildStockBatch_Version = semver.Version.parse(versions["BuildStockBatch"]) if bsb_version < BuildStockBatch_Version: - if 'ResStock' in versions.keys(): - stock_version = versions['ResStock'] - elif 'ComStock' in versions.keys(): - stock_version = versions['ComStock'] + if "ResStock" in versions.keys(): + stock_version = versions["ResStock"] + elif "ComStock" in versions.keys(): + stock_version = versions["ComStock"] else: - stock_version = 'Unknown' - val_err = f"BuildStockBatch version {BuildStockBatch_Version} or above is required" \ + stock_version = "Unknown" + val_err = ( + f"BuildStockBatch version {BuildStockBatch_Version} or above is required" f" for ResStock or ComStock version {stock_version}. Found {bsb_version}" + ) raise ValidationError(val_err) return True @@ -790,10 +952,14 @@ def validate_number_of_options(project_file): if m_option: option_number = int(m_option.group(1)) n_options_in_measure = max(option_number, n_options_in_measure) - m_costs = re.match(r"^option_(\d+)_cost_(\d+)_value", str(argument.name)) + m_costs = re.match( + r"^option_(\d+)_cost_(\d+)_value", str(argument.name) + ) if m_costs: cost_number = int(m_costs.group(2)) - n_costs_per_option_in_measure = max(cost_number, n_costs_per_option_in_measure) + n_costs_per_option_in_measure = max( + cost_number, n_costs_per_option_in_measure + ) n_options_in_cfg = 0 n_costs_in_cfg = 0 for upgrade in cfg.get("upgrades", []): @@ -828,24 +994,26 @@ def validate_openstudio_version(project_file): """ cfg = get_project_configuration(project_file) - os_version = cfg.get('os_version', BuildStockBatchBase.DEFAULT_OS_VERSION) - version_path = 'resources/hpxml-measures/HPXMLtoOpenStudio/resources/version.rb' - version_rb = os.path.join(cfg['buildstock_directory'], version_path) + os_version = cfg.get("os_version", BuildStockBatchBase.DEFAULT_OS_VERSION) + version_path = "resources/hpxml-measures/HPXMLtoOpenStudio/resources/version.rb" + version_rb = os.path.join(cfg["buildstock_directory"], version_path) if os.path.exists(version_rb): versions = {} - with open(version_rb, 'r') as f: + with open(version_rb, "r") as f: for line in f: line = line.strip() - for tool in ['OS_HPXML_Version', 'OS_Version']: + for tool in ["OS_HPXML_Version", "OS_Version"]: if line.startswith(tool): - lhs, rhs = line.split('=') - version, _ = rhs.split('#') + lhs, rhs = line.split("=") + version, _ = rhs.split("#") versions[tool] = eval(version.strip()) - OS_HPXML_Version = versions['OS_HPXML_Version'] - OS_Version = versions['OS_Version'] + OS_HPXML_Version = versions["OS_HPXML_Version"] + OS_Version = versions["OS_Version"] if not os_version.startswith(OS_Version): - val_err = f"OS version {OS_Version} is required" \ + val_err = ( + f"OS version {OS_Version} is required" f" for OS-HPXML version {OS_HPXML_Version}. Found {os_version}" + ) raise ValidationError(val_err) return True @@ -856,26 +1024,47 @@ def get_dask_client(self): def process_results(self, skip_combine=False, force_upload=False): self.get_dask_client() # noqa: F841 - if self.cfg['workflow_generator']['type'] == 'residential_hpxml': - if 'simulation_output_report' in self.cfg['workflow_generator']['args'].keys(): - if 'timeseries_frequency' in self.cfg['workflow_generator']['args']['simulation_output_report'].keys(): - do_timeseries = \ - (self.cfg['workflow_generator']['args']['simulation_output_report']['timeseries_frequency'] != - 'none') + if self.cfg["workflow_generator"]["type"] == "residential_hpxml": + if ( + "simulation_output_report" + in self.cfg["workflow_generator"]["args"].keys() + ): + if ( + "timeseries_frequency" + in self.cfg["workflow_generator"]["args"][ + "simulation_output_report" + ].keys() + ): + do_timeseries = ( + self.cfg["workflow_generator"]["args"][ + "simulation_output_report" + ]["timeseries_frequency"] + != "none" + ) else: - do_timeseries = 'timeseries_csv_export' in self.cfg['workflow_generator']['args'].keys() + do_timeseries = ( + "timeseries_csv_export" in self.cfg["workflow_generator"]["args"].keys() + ) fs = LocalFileSystem() if not skip_combine: - postprocessing.combine_results(fs, self.results_dir, self.cfg, do_timeseries=do_timeseries) + postprocessing.combine_results( + fs, self.results_dir, self.cfg, do_timeseries=do_timeseries + ) - aws_conf = self.cfg.get('postprocessing', {}).get('aws', {}) - if 's3' in aws_conf or force_upload: + aws_conf = self.cfg.get("postprocessing", {}).get("aws", {}) + if "s3" in aws_conf or force_upload: s3_bucket, s3_prefix = postprocessing.upload_results( aws_conf, self.output_dir, self.results_dir, self.sampler.csv_path ) - if 'athena' in aws_conf: - postprocessing.create_athena_tables(aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix) + if "athena" in aws_conf: + postprocessing.create_athena_tables( + aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix + ) - keep_individual_timeseries = self.cfg.get('postprocessing', {}).get('keep_individual_timeseries', False) - postprocessing.remove_intermediate_files(fs, self.results_dir, keep_individual_timeseries) + keep_individual_timeseries = self.cfg.get("postprocessing", {}).get( + "keep_individual_timeseries", False + ) + postprocessing.remove_intermediate_files( + fs, self.results_dir, keep_individual_timeseries + ) diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py index 3a833c2a..50d4e1e8 100644 --- a/buildstockbatch/eagle.py +++ b/buildstockbatch/eagle.py @@ -42,7 +42,7 @@ ContainerRuntime, path_rel_to_file, get_project_configuration, - read_csv + read_csv, ) from buildstockbatch import postprocessing from buildstockbatch.__version__ import __version__ as bsb_version @@ -52,34 +52,34 @@ def get_bool_env_var(varname): - return os.environ.get(varname, '0').lower() in ('true', 't', '1', 'y', 'yes') + return os.environ.get(varname, "0").lower() in ("true", "t", "1", "y", "yes") class EagleBatch(BuildStockBatchBase): - CONTAINER_RUNTIME = ContainerRuntime.SINGULARITY - DEFAULT_SYS_IMAGE_DIR = '/shared-projects/buildstock/singularity_images' - hpc_name = 'eagle' + DEFAULT_SYS_IMAGE_DIR = "/shared-projects/buildstock/singularity_images" + hpc_name = "eagle" min_sims_per_job = 36 * 2 - local_scratch = pathlib.Path(os.environ.get('LOCAL_SCRATCH', '/tmp/scratch')) - local_project_dir = local_scratch / 'project' - local_buildstock_dir = local_scratch / 'buildstock' - local_weather_dir = local_scratch / 'weather' - local_output_dir = local_scratch / 'output' - local_singularity_img = local_scratch / 'openstudio.simg' - local_housing_characteristics_dir = local_scratch / 'housing_characteristics' + local_scratch = pathlib.Path(os.environ.get("LOCAL_SCRATCH", "/tmp/scratch")) + local_project_dir = local_scratch / "project" + local_buildstock_dir = local_scratch / "buildstock" + local_weather_dir = local_scratch / "weather" + local_output_dir = local_scratch / "output" + local_singularity_img = local_scratch / "openstudio.simg" + local_housing_characteristics_dir = local_scratch / "housing_characteristics" def __init__(self, project_filename): super().__init__(project_filename) output_dir = pathlib.Path(self.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) - logger.debug('Output directory = {}'.format(output_dir)) + logger.debug("Output directory = {}".format(output_dir)) weather_dir = self.weather_dir # noqa E841 - self.singularity_image = self.get_singularity_image(self.cfg, self.os_version, self.os_sha) - + self.singularity_image = self.get_singularity_image( + self.cfg, self.os_version, self.os_sha + ) @classmethod def validate_project(cls, project_file): @@ -93,18 +93,20 @@ def validate_project(cls, project_file): @classmethod def validate_output_directory_eagle(cls, project_file): cfg = get_project_configuration(project_file) - output_dir = path_rel_to_file(project_file, cfg['output_directory']) + output_dir = path_rel_to_file(project_file, cfg["output_directory"]) if not re.match(r"/(lustre/eaglefs/)?(scratch|projects)", output_dir): - raise ValidationError(f"`output_directory` must be in /scratch or /projects," - f" `output_directory` = {output_dir}") + raise ValidationError( + f"`output_directory` must be in /scratch or /projects," + f" `output_directory` = {output_dir}" + ) @classmethod def validate_singularity_image_eagle(cls, project_file): cfg = get_project_configuration(project_file) singularity_image = cls.get_singularity_image( cfg, - cfg.get('os_version', cls.DEFAULT_OS_VERSION), - cfg.get('os_sha', cls.DEFAULT_OS_SHA) + cfg.get("os_version", cls.DEFAULT_OS_VERSION), + cfg.get("os_sha", cls.DEFAULT_OS_SHA), ) if not os.path.exists(singularity_image): raise ValidationError( @@ -113,12 +115,14 @@ def validate_singularity_image_eagle(cls, project_file): @property def output_dir(self): - output_dir = path_rel_to_file(self.project_filename, self.cfg['output_directory']) + output_dir = path_rel_to_file( + self.project_filename, self.cfg["output_directory"] + ) return output_dir @property def results_dir(self): - results_dir = os.path.join(self.output_dir, 'results') + results_dir = os.path.join(self.output_dir, "results") assert os.path.isdir(results_dir) return results_dir @@ -131,28 +135,31 @@ def clear_and_copy_dir(src, dst): @classmethod def get_singularity_image(cls, cfg, os_version, os_sha): return os.path.join( - cfg.get('sys_image_dir', cls.DEFAULT_SYS_IMAGE_DIR), - 'OpenStudio-{ver}.{sha}-Singularity.simg'.format( - ver=os_version, - sha=os_sha - ) + cfg.get("sys_image_dir", cls.DEFAULT_SYS_IMAGE_DIR), + "OpenStudio-{ver}.{sha}-Singularity.simg".format( + ver=os_version, sha=os_sha + ), ) @property def weather_dir(self): - weather_dir = os.path.join(self.output_dir, 'weather') + weather_dir = os.path.join(self.output_dir, "weather") if not os.path.exists(weather_dir): os.makedirs(weather_dir) self._get_weather_files() return weather_dir def run_batch(self, sampling_only=False): - # Create simulation_output dir - sim_out_ts_dir = pathlib.Path(self.output_dir) / 'results' / 'simulation_output' / 'timeseries' + sim_out_ts_dir = ( + pathlib.Path(self.output_dir) + / "results" + / "simulation_output" + / "timeseries" + ) os.makedirs(sim_out_ts_dir, exist_ok=True) - for i in range(0, len(self.cfg.get('upgrades', [])) + 1): - os.makedirs(sim_out_ts_dir / f'up{i:02d}') + for i in range(0, len(self.cfg.get("upgrades", [])) + 1): + os.makedirs(sim_out_ts_dir / f"up{i:02d}") # create destination_dir and copy housing_characteristics into it logger.debug("Copying housing characteristics") @@ -160,8 +167,7 @@ def run_batch(self, sampling_only=False): if os.path.exists(destination_dir): shutil.rmtree(destination_dir) shutil.copytree( - os.path.join(self.project_dir, 'housing_characteristics'), - destination_dir + os.path.join(self.project_dir, "housing_characteristics"), destination_dir ) logger.debug("Housing characteristics copied.") @@ -183,16 +189,18 @@ def run_batch(self, sampling_only=False): building_ids = df.index.tolist() n_datapoints = len(building_ids) # number of simulations is number of buildings * number of upgrades - n_sims = n_datapoints * (len(self.cfg.get('upgrades', [])) + 1) + n_sims = n_datapoints * (len(self.cfg.get("upgrades", [])) + 1) # this is the number of simulations defined for this run as a "full job" # number of simulations per job if we believe the .yml file n_jobs - n_sims_per_job = math.ceil(n_sims / self.cfg[self.hpc_name]['n_jobs']) + n_sims_per_job = math.ceil(n_sims / self.cfg[self.hpc_name]["n_jobs"]) # use more appropriate batch size in the case of n_jobs being much # larger than we need, now that we know n_sims n_sims_per_job = max(n_sims_per_job, self.min_sims_per_job) - upgrade_sims = itertools.product(building_ids, range(len(self.cfg.get('upgrades', [])))) + upgrade_sims = itertools.product( + building_ids, range(len(self.cfg.get("upgrades", []))) + ) if not self.skip_baseline_sims: # create batches of simulations baseline_sims = zip(building_ids, itertools.repeat(None)) @@ -206,76 +214,88 @@ def run_batch(self, sampling_only=False): batch = list(itertools.islice(all_sims_iter, n_sims_per_job)) if not batch: break - logger.info('Queueing job {} ({} simulations)'.format(i, len(batch))) - job_json_filename = os.path.join(self.output_dir, 'job{:03d}.json'.format(i)) - with open(job_json_filename, 'w') as f: - json.dump({ - 'job_num': i, - 'n_datapoints': n_datapoints, - 'batch': batch, - }, f, indent=4) + logger.info("Queueing job {} ({} simulations)".format(i, len(batch))) + job_json_filename = os.path.join( + self.output_dir, "job{:03d}.json".format(i) + ) + with open(job_json_filename, "w") as f: + json.dump( + { + "job_num": i, + "n_datapoints": n_datapoints, + "batch": batch, + }, + f, + indent=4, + ) # now queue them jobids = self.queue_jobs() # queue up post-processing to run after all the simulation jobs are complete - if not get_bool_env_var('MEASURESONLY'): + if not get_bool_env_var("MEASURESONLY"): self.queue_post_processing(jobids) def run_job_batch(self, job_array_number): - self.clear_and_copy_dir( - pathlib.Path(self.buildstock_dir) / 'resources', - self.local_buildstock_dir / 'resources' + pathlib.Path(self.buildstock_dir) / "resources", + self.local_buildstock_dir / "resources", ) self.clear_and_copy_dir( - pathlib.Path(self.buildstock_dir) / 'measures', - self.local_buildstock_dir / 'measures' + pathlib.Path(self.buildstock_dir) / "measures", + self.local_buildstock_dir / "measures", ) - if os.path.exists(pathlib.Path(self.buildstock_dir) / 'resources/hpxml-measures'): + if os.path.exists( + pathlib.Path(self.buildstock_dir) / "resources/hpxml-measures" + ): self.clear_and_copy_dir( - pathlib.Path(self.buildstock_dir) / 'resources/hpxml-measures', - self.local_buildstock_dir / 'resources/hpxml-measures' + pathlib.Path(self.buildstock_dir) / "resources/hpxml-measures", + self.local_buildstock_dir / "resources/hpxml-measures", ) + self.clear_and_copy_dir(self.weather_dir, self.local_weather_dir) self.clear_and_copy_dir( - self.weather_dir, - self.local_weather_dir - ) - self.clear_and_copy_dir( - pathlib.Path(self.output_dir) / 'housing_characteristics', - self.local_housing_characteristics_dir + pathlib.Path(self.output_dir) / "housing_characteristics", + self.local_housing_characteristics_dir, ) if os.path.exists(self.local_singularity_img): os.remove(self.local_singularity_img) shutil.copy2(self.singularity_image, self.local_singularity_img) # Run the job batch as normal - job_json_filename = os.path.join(self.output_dir, 'job{:03d}.json'.format(job_array_number)) - with open(job_json_filename, 'r') as f: + job_json_filename = os.path.join( + self.output_dir, "job{:03d}.json".format(job_array_number) + ) + with open(job_json_filename, "r") as f: args = json.load(f) # trim the buildstock.csv file to only include rows for current batch. Helps speed up simulation logger.debug("Trimming buildstock.csv") - building_ids = {x[0] for x in args['batch']} - buildstock_csv_path = self.local_housing_characteristics_dir / 'buildstock.csv' + building_ids = {x[0] for x in args["batch"]} + buildstock_csv_path = self.local_housing_characteristics_dir / "buildstock.csv" valid_rows = [] - with open(buildstock_csv_path, 'r', encoding='utf-8') as f: + with open(buildstock_csv_path, "r", encoding="utf-8") as f: csv_reader = csv.DictReader(f) for row in csv_reader: - if int(row['Building']) in building_ids: + if int(row["Building"]) in building_ids: valid_rows.append(row) df = pd.DataFrame.from_records(valid_rows) df.to_csv(buildstock_csv_path, index=False) logger.debug(f"Buildstock.csv trimmed to {len(df)} rows.") - traceback_file_path = self.local_output_dir / 'simulation_output' / f'traceback{job_array_number}.out' + traceback_file_path = ( + self.local_output_dir + / "simulation_output" + / f"traceback{job_array_number}.out" + ) @delayed def run_building_d(i, upgrade_idx): try: - return self.run_building(self.output_dir, self.cfg, args['n_datapoints'], i, upgrade_idx) + return self.run_building( + self.output_dir, self.cfg, args["n_datapoints"], i, upgrade_idx + ) except Exception: - with open(traceback_file_path, 'a') as f: + with open(traceback_file_path, "a") as f: txt = get_error_details() txt = "\n" + "#" * 20 + "\n" + f"Traceback for building{i}\n" + txt f.write(txt) @@ -286,155 +306,189 @@ def run_building_d(i, upgrade_idx): # Run the simulations, get the data_point_out.json info from each tick = time.time() with Parallel(n_jobs=-1, verbose=9) as parallel: - dpouts = parallel(itertools.starmap(run_building_d, args['batch'])) + dpouts = parallel(itertools.starmap(run_building_d, args["batch"])) tick = time.time() - tick - logger.info('Simulation time: {:.2f} minutes'.format(tick / 60.)) + logger.info("Simulation time: {:.2f} minutes".format(tick / 60.0)) # Save the aggregated dpouts as a json file - lustre_sim_out_dir = pathlib.Path(self.results_dir) / 'simulation_output' - results_json = lustre_sim_out_dir / f'results_job{job_array_number}.json.gz' - logger.info(f'Writing results to {results_json}') - with gzip.open(results_json, 'wt', encoding='utf-8') as f: + lustre_sim_out_dir = pathlib.Path(self.results_dir) / "simulation_output" + results_json = lustre_sim_out_dir / f"results_job{job_array_number}.json.gz" + logger.info(f"Writing results to {results_json}") + with gzip.open(results_json, "wt", encoding="utf-8") as f: json.dump(dpouts, f) # Compress simulation results - if self.cfg.get('max_minutes_per_sim') is not None: + if self.cfg.get("max_minutes_per_sim") is not None: time.sleep(60) # Allow results JSON to finish writing - simout_filename = lustre_sim_out_dir / f'simulations_job{job_array_number}.tar.gz' - logger.info(f'Compressing simulation outputs to {simout_filename}') - local_sim_out_dir = self.local_output_dir / 'simulation_output' + simout_filename = ( + lustre_sim_out_dir / f"simulations_job{job_array_number}.tar.gz" + ) + logger.info(f"Compressing simulation outputs to {simout_filename}") + local_sim_out_dir = self.local_output_dir / "simulation_output" subprocess.run( [ - 'tar', - 'cf', str(simout_filename), - '-I', 'pigz', - '-C', str(local_sim_out_dir), - '.' + "tar", + "cf", + str(simout_filename), + "-I", + "pigz", + "-C", + str(local_sim_out_dir), + ".", ], - check=True + check=True, ) # copy the tracebacks if it exists if os.path.exists(traceback_file_path): shutil.copy2(traceback_file_path, lustre_sim_out_dir) - logger.info('batch complete') + logger.info("batch complete") @classmethod def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): - fs = LocalFileSystem() upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 try: - sim_id, sim_dir = cls.make_sim_dir(i, upgrade_idx, os.path.join(cls.local_output_dir, 'simulation_output')) + sim_id, sim_dir = cls.make_sim_dir( + i, upgrade_idx, os.path.join(cls.local_output_dir, "simulation_output") + ) except SimulationExists as ex: sim_dir = ex.sim_dir else: # Generate the osw for this simulation - osw = cls.create_osw(cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx) - with open(os.path.join(sim_dir, 'in.osw'), 'w') as f: + osw = cls.create_osw( + cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx + ) + with open(os.path.join(sim_dir, "in.osw"), "w") as f: json.dump(osw, f, indent=4) # Copy other necessary stuff into the simulation directory dirs_to_mount = [ - os.path.join(cls.local_buildstock_dir, 'measures'), + os.path.join(cls.local_buildstock_dir, "measures"), cls.local_weather_dir, ] # Create a temporary directory for the simulation to use - with tempfile.TemporaryDirectory(dir=cls.local_scratch, prefix=f"{sim_id}_") as tmpdir: - + with tempfile.TemporaryDirectory( + dir=cls.local_scratch, prefix=f"{sim_id}_" + ) as tmpdir: # Build the command to instantiate and configure the singularity container the simulation is run inside - local_resources_dir = cls.local_buildstock_dir / 'resources' + local_resources_dir = cls.local_buildstock_dir / "resources" args = [ - 'singularity', 'exec', - '--contain', - '-e', - '--pwd', '/var/simdata/openstudio', - '-B', f'{sim_dir}:/var/simdata/openstudio', - '-B', f'{local_resources_dir}:/lib/resources', - '-B', f'{cls.local_housing_characteristics_dir}:/lib/housing_characteristics', - '-B', f'{tmpdir}:/tmp' - ] - runscript = [ - 'ln -s /lib /var/simdata/openstudio/lib' + "singularity", + "exec", + "--contain", + "-e", + "--pwd", + "/var/simdata/openstudio", + "-B", + f"{sim_dir}:/var/simdata/openstudio", + "-B", + f"{local_resources_dir}:/lib/resources", + "-B", + f"{cls.local_housing_characteristics_dir}:/lib/housing_characteristics", + "-B", + f"{tmpdir}:/tmp", ] + runscript = ["ln -s /lib /var/simdata/openstudio/lib"] for src in dirs_to_mount: - container_mount = '/' + os.path.basename(src) - args.extend(['-B', '{}:{}:ro'.format(src, container_mount)]) - container_symlink = os.path.join('/var/simdata/openstudio', os.path.basename(src)) - runscript.append('ln -s {} {}'.format(*map(shlex.quote, (container_mount, container_symlink)))) + container_mount = "/" + os.path.basename(src) + args.extend(["-B", "{}:{}:ro".format(src, container_mount)]) + container_symlink = os.path.join( + "/var/simdata/openstudio", os.path.basename(src) + ) + runscript.append( + "ln -s {} {}".format( + *map(shlex.quote, (container_mount, container_symlink)) + ) + ) - if os.path.exists(os.path.join(cls.local_buildstock_dir, 'resources/hpxml-measures')): - runscript.append('ln -s /resources /var/simdata/openstudio/resources') - src = os.path.join(cls.local_buildstock_dir, 'resources/hpxml-measures') - container_mount = '/resources/hpxml-measures' - args.extend(['-B', '{}:{}:ro'.format(src, container_mount)]) + if os.path.exists( + os.path.join(cls.local_buildstock_dir, "resources/hpxml-measures") + ): + runscript.append( + "ln -s /resources /var/simdata/openstudio/resources" + ) + src = os.path.join( + cls.local_buildstock_dir, "resources/hpxml-measures" + ) + container_mount = "/resources/hpxml-measures" + args.extend(["-B", "{}:{}:ro".format(src, container_mount)]) # Build the openstudio command that will be issued within the # singularity container If custom gems are to be used in the # singularity container add extra bundle arguments to the cli # command - cli_cmd = 'openstudio run -w in.osw' - if cfg.get('baseline', dict()).get('custom_gems', False): + cli_cmd = "openstudio run -w in.osw" + if cfg.get("baseline", dict()).get("custom_gems", False): cli_cmd = ( - 'openstudio --bundle /var/oscli/Gemfile --bundle_path /var/oscli/gems ' - '--bundle_without native_ext run -w in.osw --debug' + "openstudio --bundle /var/oscli/Gemfile --bundle_path /var/oscli/gems " + "--bundle_without native_ext run -w in.osw --debug" ) - if get_bool_env_var('MEASURESONLY'): - cli_cmd += ' --measures_only' + if get_bool_env_var("MEASURESONLY"): + cli_cmd += " --measures_only" runscript.append(cli_cmd) - args.extend([ - str(cls.local_singularity_img), - 'bash', '-x' - ]) + args.extend([str(cls.local_singularity_img), "bash", "-x"]) env_vars = dict(os.environ) - env_vars['SINGULARITYENV_BUILDSTOCKBATCH_VERSION'] = bsb_version - logger.debug('\n'.join(map(str, args))) - max_time_min = cfg.get('max_minutes_per_sim') + env_vars["SINGULARITYENV_BUILDSTOCKBATCH_VERSION"] = bsb_version + logger.debug("\n".join(map(str, args))) + max_time_min = cfg.get("max_minutes_per_sim") if max_time_min is not None: subprocess_kw = {"timeout": max_time_min * 60} else: subprocess_kw = {} start_time = dt.datetime.now() - with open(os.path.join(sim_dir, 'openstudio_output.log'), 'w') as f_out: + with open(os.path.join(sim_dir, "openstudio_output.log"), "w") as f_out: try: subprocess.run( args, check=True, - input='\n'.join(runscript).encode('utf-8'), + input="\n".join(runscript).encode("utf-8"), stdout=f_out, stderr=subprocess.STDOUT, cwd=cls.local_output_dir, env=env_vars, - **subprocess_kw + **subprocess_kw, ) except subprocess.TimeoutExpired: end_time = dt.datetime.now() - msg = f'Terminated {sim_id} after reaching max time of {max_time_min} minutes' - f_out.write(f'[{end_time.now()} ERROR] {msg}') + msg = f"Terminated {sim_id} after reaching max time of {max_time_min} minutes" + f_out.write(f"[{end_time.now()} ERROR] {msg}") logger.warning(msg) - with open(os.path.join(sim_dir, 'out.osw'), 'w') as out_osw: + with open(os.path.join(sim_dir, "out.osw"), "w") as out_osw: out_msg = { - 'started_at': start_time.strftime('%Y%m%dT%H%M%SZ'), - 'completed_at': end_time.strftime('%Y%m%dT%H%M%SZ'), - 'completed_status': 'Fail', - 'timeout': msg + "started_at": start_time.strftime("%Y%m%dT%H%M%SZ"), + "completed_at": end_time.strftime("%Y%m%dT%H%M%SZ"), + "completed_status": "Fail", + "timeout": msg, } out_osw.write(json.dumps(out_msg, indent=3)) - with open(os.path.join(sim_dir, 'run', 'out.osw'), 'a') as run_log: - run_log.write(f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}") - with open(os.path.join(sim_dir, 'run', 'failed.job'), 'w') as failed_job: - failed_job.write(f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}") - time.sleep(60) # Wait for EnergyPlus to release file locks and data_point.zip to finish + with open( + os.path.join(sim_dir, "run", "out.osw"), "a" + ) as run_log: + run_log.write( + f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}" + ) + with open( + os.path.join(sim_dir, "run", "failed.job"), "w" + ) as failed_job: + failed_job.write( + f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}" + ) + time.sleep( + 60 + ) # Wait for EnergyPlus to release file locks and data_point.zip to finish except subprocess.CalledProcessError: pass finally: # Clean up the symbolic links we created in the container - for mount_dir in dirs_to_mount + [os.path.join(sim_dir, 'lib')]: + for mount_dir in dirs_to_mount + [os.path.join(sim_dir, "lib")]: try: - os.unlink(os.path.join(sim_dir, os.path.basename(mount_dir))) + os.unlink( + os.path.join(sim_dir, os.path.basename(mount_dir)) + ) except FileNotFoundError: pass @@ -442,166 +496,195 @@ def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): cls.cleanup_sim_dir( sim_dir, fs, - f'{output_dir}/results/simulation_output/timeseries', + f"{output_dir}/results/simulation_output/timeseries", upgrade_id, - i + i, ) reporting_measures = cls.get_reporting_measures(cfg) - dpout = postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, i) + dpout = postprocessing.read_simulation_outputs( + fs, reporting_measures, sim_dir, upgrade_id, i + ) return dpout def queue_jobs(self, array_ids=None, hipri=False): - eagle_cfg = self.cfg['eagle'] - with open(os.path.join(self.output_dir, 'job001.json'), 'r') as f: + eagle_cfg = self.cfg["eagle"] + with open(os.path.join(self.output_dir, "job001.json"), "r") as f: job_json = json.load(f) - n_sims_per_job = len(job_json['batch']) + n_sims_per_job = len(job_json["batch"]) del job_json if array_ids: - array_spec = ','.join(map(str, array_ids)) + array_spec = ",".join(map(str, array_ids)) else: - jobjson_re = re.compile(r'job(\d+).json') - array_max = max(map( - lambda m: int(m.group(1)), - filter(lambda m: m is not None, map(jobjson_re.match, (os.listdir(self.output_dir)))) - )) - array_spec = '1-{}'.format(array_max) - account = eagle_cfg['account'] + jobjson_re = re.compile(r"job(\d+).json") + array_max = max( + map( + lambda m: int(m.group(1)), + filter( + lambda m: m is not None, + map(jobjson_re.match, (os.listdir(self.output_dir))), + ), + ) + ) + array_spec = "1-{}".format(array_max) + account = eagle_cfg["account"] # Estimate the wall time in minutes cores_per_node = 36 - minutes_per_sim = eagle_cfg['minutes_per_sim'] - walltime = math.ceil(math.ceil(n_sims_per_job / cores_per_node) * minutes_per_sim) + minutes_per_sim = eagle_cfg["minutes_per_sim"] + walltime = math.ceil( + math.ceil(n_sims_per_job / cores_per_node) * minutes_per_sim + ) # Queue up simulations here = os.path.dirname(os.path.abspath(__file__)) - eagle_sh = os.path.join(here, 'eagle.sh') + eagle_sh = os.path.join(here, "eagle.sh") env = {} env.update(os.environ) - env['PROJECTFILE'] = self.project_filename - env['MY_CONDA_ENV'] = os.environ['CONDA_PREFIX'] + env["PROJECTFILE"] = self.project_filename + env["MY_CONDA_ENV"] = os.environ["CONDA_PREFIX"] args = [ - 'sbatch', - '--account={}'.format(account), - '--time={}'.format(walltime), - '--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY', - '--array={}'.format(array_spec), - '--output=job.out-%a', - '--job-name=bstk', - eagle_sh + "sbatch", + "--account={}".format(account), + "--time={}".format(walltime), + "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY", + "--array={}".format(array_spec), + "--output=job.out-%a", + "--job-name=bstk", + eagle_sh, ] - if os.environ.get('SLURM_JOB_QOS'): - args.insert(-1, '--qos={}'.format(os.environ.get('SLURM_JOB_QOS'))) + if os.environ.get("SLURM_JOB_QOS"): + args.insert(-1, "--qos={}".format(os.environ.get("SLURM_JOB_QOS"))) elif hipri: - args.insert(-1, '--qos=high') + args.insert(-1, "--qos=high") - logger.debug(' '.join(args)) + logger.debug(" ".join(args)) resp = subprocess.run( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, - encoding='utf-8', - cwd=self.output_dir + encoding="utf-8", + cwd=self.output_dir, ) try: resp.check_returncode() except subprocess.CalledProcessError as ex: logger.error(ex.stderr) raise - for line in resp.stdout.split('\n'): - logger.debug('sbatch:' + line) - m = re.search(r'Submitted batch job (\d+)', resp.stdout) + for line in resp.stdout.split("\n"): + logger.debug("sbatch:" + line) + m = re.search(r"Submitted batch job (\d+)", resp.stdout) if not m: - logger.error('Did not receive job id back from sbatch:') - raise RuntimeError('Didn\'t receive job id back from sbatch') + logger.error("Did not receive job id back from sbatch:") + raise RuntimeError("Didn't receive job id back from sbatch") job_id = m.group(1) return [job_id] def queue_post_processing(self, after_jobids=[], upload_only=False, hipri=False): # Configuration values - account = self.cfg['eagle']['account'] - walltime = self.cfg['eagle'].get('postprocessing', {}).get('time', '1:30:00') - memory = self.cfg['eagle'].get('postprocessing', {}).get('node_memory_mb', 85248) - n_procs = self.cfg['eagle'].get('postprocessing', {}).get('n_procs', 18) - n_workers = self.cfg['eagle'].get('postprocessing', {}).get('n_workers', 2) - print(f"Submitting job to {n_workers} {memory}MB memory nodes using {n_procs} cores in each.") + account = self.cfg["eagle"]["account"] + walltime = self.cfg["eagle"].get("postprocessing", {}).get("time", "1:30:00") + memory = ( + self.cfg["eagle"].get("postprocessing", {}).get("node_memory_mb", 85248) + ) + n_procs = self.cfg["eagle"].get("postprocessing", {}).get("n_procs", 18) + n_workers = self.cfg["eagle"].get("postprocessing", {}).get("n_workers", 2) + print( + f"Submitting job to {n_workers} {memory}MB memory nodes using {n_procs} cores in each." + ) # Throw an error if the files already exist. if not upload_only: - for subdir in ('parquet', 'results_csvs'): - subdirpath = pathlib.Path(self.output_dir, 'results', subdir) + for subdir in ("parquet", "results_csvs"): + subdirpath = pathlib.Path(self.output_dir, "results", subdir) if subdirpath.exists(): - raise FileExistsError(f'{subdirpath} already exists. This means you may have run postprocessing already. If you are sure you want to rerun, delete that directory and try again.') # noqa E501 + raise FileExistsError( + f"{subdirpath} already exists. This means you may have run postprocessing already. If you are sure you want to rerun, delete that directory and try again." + ) # noqa E501 # Move old output logs and config to make way for new ones - for filename in ('dask_scheduler.json', 'dask_scheduler.out', 'dask_workers.out', 'postprocessing.out'): + for filename in ( + "dask_scheduler.json", + "dask_scheduler.out", + "dask_workers.out", + "postprocessing.out", + ): filepath = pathlib.Path(self.output_dir, filename) if filepath.exists(): last_mod_date = dt.datetime.fromtimestamp(os.path.getmtime(filepath)) shutil.move( filepath, - filepath.parent / f'{filepath.stem}_{last_mod_date:%Y%m%d%H%M}{filepath.suffix}' + filepath.parent + / f"{filepath.stem}_{last_mod_date:%Y%m%d%H%M}{filepath.suffix}", ) env = {} env.update(os.environ) - env['PROJECTFILE'] = self.project_filename - env['MY_CONDA_ENV'] = os.environ['CONDA_PREFIX'] - env['OUT_DIR'] = self.output_dir - env['UPLOADONLY'] = str(upload_only) - env['MEMORY'] = str(memory) - env['NPROCS'] = str(n_procs) + env["PROJECTFILE"] = self.project_filename + env["MY_CONDA_ENV"] = os.environ["CONDA_PREFIX"] + env["OUT_DIR"] = self.output_dir + env["UPLOADONLY"] = str(upload_only) + env["MEMORY"] = str(memory) + env["NPROCS"] = str(n_procs) here = os.path.dirname(os.path.abspath(__file__)) - eagle_post_sh = os.path.join(here, 'eagle_postprocessing.sh') + eagle_post_sh = os.path.join(here, "eagle_postprocessing.sh") args = [ - 'sbatch', - '--account={}'.format(account), - '--time={}'.format(walltime), - '--export=PROJECTFILE,MY_CONDA_ENV,OUT_DIR,UPLOADONLY,MEMORY,NPROCS', - '--job-name=bstkpost', - '--output=postprocessing.out', - '--nodes=1', - ':', - '--mem={}'.format(memory), - '--output=dask_workers.out', - '--nodes={}'.format(n_workers), - eagle_post_sh + "sbatch", + "--account={}".format(account), + "--time={}".format(walltime), + "--export=PROJECTFILE,MY_CONDA_ENV,OUT_DIR,UPLOADONLY,MEMORY,NPROCS", + "--job-name=bstkpost", + "--output=postprocessing.out", + "--nodes=1", + ":", + "--mem={}".format(memory), + "--output=dask_workers.out", + "--nodes={}".format(n_workers), + eagle_post_sh, ] if after_jobids: - args.insert(4, '--dependency=afterany:{}'.format(':'.join(after_jobids))) + args.insert(4, "--dependency=afterany:{}".format(":".join(after_jobids))) - if os.environ.get('SLURM_JOB_QOS'): - args.insert(-1, '--qos={}'.format(os.environ.get('SLURM_JOB_QOS'))) + if os.environ.get("SLURM_JOB_QOS"): + args.insert(-1, "--qos={}".format(os.environ.get("SLURM_JOB_QOS"))) elif hipri: - args.insert(-1, '--qos=high') + args.insert(-1, "--qos=high") resp = subprocess.run( args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env, - encoding='utf-8', - cwd=self.output_dir + encoding="utf-8", + cwd=self.output_dir, ) - for line in resp.stdout.split('\n'): - logger.debug('sbatch: {}'.format(line)) + for line in resp.stdout.split("\n"): + logger.debug("sbatch: {}".format(line)) def get_dask_client(self): - if get_bool_env_var('DASKLOCALCLUSTER'): - cluster = LocalCluster(local_directory='/data/dask-tmp') + if get_bool_env_var("DASKLOCALCLUSTER"): + cluster = LocalCluster(local_directory="/data/dask-tmp") return Client(cluster) else: - return Client(scheduler_file=os.path.join(self.output_dir, 'dask_scheduler.json')) + return Client( + scheduler_file=os.path.join(self.output_dir, "dask_scheduler.json") + ) def process_results(self, *args, **kwargs): # Check that all the jobs succeeded before proceeding failed_job_array_ids = self.get_failed_job_array_ids() if failed_job_array_ids: - logger.error("The following simulation jobs failed: {}".format(", ".join(map(str, failed_job_array_ids)))) - logger.error("Please inspect those jobs and fix any problems before resubmitting.") + logger.error( + "The following simulation jobs failed: {}".format( + ", ".join(map(str, failed_job_array_ids)) + ) + ) + logger.error( + "Please inspect those jobs and fix any problems before resubmitting." + ) logger.critical("Postprocessing cancelled.") return False @@ -617,11 +700,11 @@ def _get_job_ids_for_file_pattern(self, pat): return job_ids def get_failed_job_array_ids(self): - job_out_files = sorted(pathlib.Path(self.output_dir).glob('job.out-*')) + job_out_files = sorted(pathlib.Path(self.output_dir).glob("job.out-*")) failed_job_ids = set() for filename in job_out_files: - with open(filename, 'r') as f: + with open(filename, "r") as f: if not re.search(r"batch complete", f.read()): job_id = int(re.match(r"job\.out-(\d+)", filename.name).group(1)) logger.debug(f"Array Job ID {job_id} had a failure.") @@ -644,28 +727,29 @@ def rerun_failed_jobs(self, hipri=False): output_path = pathlib.Path(self.output_dir) results_path = pathlib.Path(self.results_dir) - prev_failed_job_out_dir = output_path / 'prev_failed_jobs' + prev_failed_job_out_dir = output_path / "prev_failed_jobs" os.makedirs(prev_failed_job_out_dir, exist_ok=True) for job_array_id in failed_job_array_ids: # Move the failed job.out file so it doesn't get overwritten - filepath = output_path / f'job.out-{job_array_id}' + filepath = output_path / f"job.out-{job_array_id}" if filepath.exists(): last_mod_date = dt.datetime.fromtimestamp(os.path.getmtime(filepath)) shutil.move( filepath, - prev_failed_job_out_dir / f'{filepath.name}_{last_mod_date:%Y%m%d%H%M}' + prev_failed_job_out_dir + / f"{filepath.name}_{last_mod_date:%Y%m%d%H%M}", ) # Delete simulation results for jobs we're about to rerun - files_to_delete = [f'simulations_job{job_array_id}.tar.gz', f'results_job{job_array_id}.json.gz'] + files_to_delete = [ + f"simulations_job{job_array_id}.tar.gz", + f"results_job{job_array_id}.json.gz", + ] for filename in files_to_delete: - (results_path / 'simulation_output' / filename).unlink(missing_ok=True) + (results_path / "simulation_output" / filename).unlink(missing_ok=True) # Clear out postprocessed data so we can start from a clean slate - dirs_to_delete = [ - results_path / 'results_csvs', - results_path / 'parquet' - ] + dirs_to_delete = [results_path / "results_csvs", results_path / "parquet"] for x in dirs_to_delete: if x.exists(): shutil.rmtree(x) @@ -675,41 +759,37 @@ def rerun_failed_jobs(self, hipri=False): logging_config = { - 'version': 1, - 'disable_existing_loggers': True, - 'formatters': { - 'defaultfmt': { - 'format': '%(levelname)s:%(asctime)s:%(name)s:%(message)s', - 'datefmt': '%Y-%m-%d %H:%M:%S' - } + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "defaultfmt": { + "format": "%(levelname)s:%(asctime)s:%(name)s:%(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "defaultfmt", + "level": "DEBUG", + "stream": "ext://sys.stdout", + } + }, + "loggers": { + "__main__": {"level": "DEBUG", "propagate": True, "handlers": ["console"]}, + "buildstockbatch": { + "level": "DEBUG", + "propagate": True, + "handlers": ["console"], }, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - 'formatter': 'defaultfmt', - 'level': 'DEBUG', - 'stream': 'ext://sys.stdout', - } - }, - 'loggers': { - '__main__': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'] - }, - 'buildstockbatch': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'] - } - }, - } + }, +} def user_cli(argv=sys.argv[1:]): - ''' + """ This is the user entry point for running buildstockbatch on Eagle - ''' + """ # set up logging, currently based on within-this-file hard-coded config logging.config.dictConfig(logging_config) @@ -718,42 +798,39 @@ def user_cli(argv=sys.argv[1:]): # CLI arguments parser = argparse.ArgumentParser() - parser.add_argument('project_filename') + parser.add_argument("project_filename") parser.add_argument( - '--hipri', - action='store_true', - help='Submit this job to the high priority queue. Uses 2x node hours.' + "--hipri", + action="store_true", + help="Submit this job to the high priority queue. Uses 2x node hours.", ) parser.add_argument( - '-m', '--measures_only', - action='store_true', - help='Only apply the measures, but don\'t run simulations. Useful for debugging.' + "-m", + "--measures_only", + action="store_true", + help="Only apply the measures, but don't run simulations. Useful for debugging.", ) group = parser.add_mutually_exclusive_group() group.add_argument( - '--postprocessonly', - help='Only do postprocessing, useful for when the simulations are already done', - action='store_true' + "--postprocessonly", + help="Only do postprocessing, useful for when the simulations are already done", + action="store_true", ) group.add_argument( - '--uploadonly', - help='Only upload to S3, useful when postprocessing is already done. Ignores the upload flag in yaml', - action='store_true' + "--uploadonly", + help="Only upload to S3, useful when postprocessing is already done. Ignores the upload flag in yaml", + action="store_true", ) group.add_argument( - '--validateonly', - help='Only validate the project YAML file and references. Nothing is executed', - action='store_true' + "--validateonly", + help="Only validate the project YAML file and references. Nothing is executed", + action="store_true", ) group.add_argument( - '--samplingonly', - help='Run the sampling only.', - action='store_true' + "--samplingonly", help="Run the sampling only.", action="store_true" ) group.add_argument( - '--rerun_failed', - help='Rerun the failed jobs', - action='store_true' + "--rerun_failed", help="Rerun the failed jobs", action="store_true" ) # parse CLI arguments @@ -762,10 +839,10 @@ def user_cli(argv=sys.argv[1:]): # load the yaml project file if not os.path.isfile(args.project_filename): raise FileNotFoundError( - 'The project file {} doesn\'t exist'.format(args.project_filename) + "The project file {} doesn't exist".format(args.project_filename) ) project_filename = os.path.abspath(args.project_filename) - with open(project_filename, 'r') as f: + with open(project_filename, "r") as f: cfg = yaml.load(f, Loader=yaml.SafeLoader) # validate the project, and in case of the --validateonly flag return True if validation passes @@ -786,35 +863,37 @@ def user_cli(argv=sys.argv[1:]): # otherwise, queue up the whole eagle buildstockbatch process # the main work of the first Eagle job is to run the sampling script ... - eagle_sh = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'eagle.sh') + eagle_sh = os.path.join(os.path.dirname(os.path.abspath(__file__)), "eagle.sh") assert os.path.exists(eagle_sh) - out_dir = cfg['output_directory'] + out_dir = cfg["output_directory"] if os.path.exists(out_dir): raise FileExistsError( - 'The output directory {} already exists. Please delete it or choose another.'.format(out_dir) + "The output directory {} already exists. Please delete it or choose another.".format( + out_dir + ) ) - logger.info('Creating output directory {}'.format(out_dir)) + logger.info("Creating output directory {}".format(out_dir)) os.makedirs(out_dir) env = {} env.update(os.environ) - env['PROJECTFILE'] = project_filename - env['MY_CONDA_ENV'] = os.environ['CONDA_PREFIX'] - env['MEASURESONLY'] = str(int(args.measures_only)) - env['SAMPLINGONLY'] = str(int(args.samplingonly)) + env["PROJECTFILE"] = project_filename + env["MY_CONDA_ENV"] = os.environ["CONDA_PREFIX"] + env["MEASURESONLY"] = str(int(args.measures_only)) + env["SAMPLINGONLY"] = str(int(args.samplingonly)) subargs = [ - 'sbatch', - '--time={}'.format(cfg['eagle'].get('sampling', {}).get('time', 60)), - '--account={}'.format(cfg['eagle']['account']), - '--nodes=1', - '--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY', - '--output=sampling.out', - eagle_sh + "sbatch", + "--time={}".format(cfg["eagle"].get("sampling", {}).get("time", 60)), + "--account={}".format(cfg["eagle"]["account"]), + "--nodes=1", + "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY", + "--output=sampling.out", + eagle_sh, ] if args.hipri: - subargs.insert(-1, '--qos=high') - logger.info('Submitting sampling job to task scheduler') + subargs.insert(-1, "--qos=high") + logger.info("Submitting sampling job to task scheduler") subprocess.run(subargs, env=env, cwd=out_dir, check=True) - logger.info('Run squeue -u $USER to monitor the progress of your jobs') + logger.info("Run squeue -u $USER to monitor the progress of your jobs") # eagle.sh calls main() @@ -838,18 +917,18 @@ def main(): # only direct script argument is the project .yml file parser = argparse.ArgumentParser() - parser.add_argument('project_filename') + parser.add_argument("project_filename") args = parser.parse_args() # initialize the EagleBatch object batch = EagleBatch(args.project_filename) # other arguments/cues about which part of the process we are in are # encoded in slurm job environment variables - job_array_number = int(os.environ.get('SLURM_ARRAY_TASK_ID', 0)) - post_process = get_bool_env_var('POSTPROCESS') - upload_only = get_bool_env_var('UPLOADONLY') - measures_only = get_bool_env_var('MEASURESONLY') - sampling_only = get_bool_env_var('SAMPLINGONLY') + job_array_number = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0)) + post_process = get_bool_env_var("POSTPROCESS") + upload_only = get_bool_env_var("UPLOADONLY") + measures_only = get_bool_env_var("MEASURESONLY") + sampling_only = get_bool_env_var("SAMPLINGONLY") if job_array_number: # if job array number is non-zero, run the batch job # Simulation should not be scheduled for sampling only @@ -873,7 +952,7 @@ def main(): batch.run_batch(sampling_only) -if __name__ == '__main__': +if __name__ == "__main__": if get_bool_env_var("BUILDSTOCKBATCH_CLI"): user_cli() else: diff --git a/buildstockbatch/exc.py b/buildstockbatch/exc.py index b8233e73..63ba78d3 100644 --- a/buildstockbatch/exc.py +++ b/buildstockbatch/exc.py @@ -1,5 +1,4 @@ class SimulationExists(Exception): - def __init__(self, msg, sim_id, sim_dir): super().__init__(msg) self.sim_id = sim_id diff --git a/buildstockbatch/local.py b/buildstockbatch/local.py index 0b5f5207..a51ca8c2 100644 --- a/buildstockbatch/local.py +++ b/buildstockbatch/local.py @@ -38,7 +38,6 @@ class LocalBatch(BuildStockBatchBase): - CONTAINER_RUNTIME = ContainerRuntime.LOCAL_OPENSTUDIO def __init__(self, project_filename): @@ -47,73 +46,91 @@ def __init__(self, project_filename): self._weather_dir = None # Create simulation_output dir - sim_out_ts_dir = os.path.join(self.results_dir, 'simulation_output', 'timeseries') + sim_out_ts_dir = os.path.join( + self.results_dir, "simulation_output", "timeseries" + ) os.makedirs(sim_out_ts_dir, exist_ok=True) - for i in range(0, len(self.cfg.get('upgrades', [])) + 1): - os.makedirs(os.path.join(sim_out_ts_dir, f'up{i:02d}'), exist_ok=True) + for i in range(0, len(self.cfg.get("upgrades", [])) + 1): + os.makedirs(os.path.join(sim_out_ts_dir, f"up{i:02d}"), exist_ok=True) # Install custom gems to a volume that will be used by all workers # FIXME: Get working without docker - if self.cfg.get('baseline', dict()).get('custom_gems', False): + if self.cfg.get("baseline", dict()).get("custom_gems", False): # TODO: Fix this stuff to work without docker - logger.info('Installing custom gems to docker volume: buildstockbatch_custom_gems') + logger.info( + "Installing custom gems to docker volume: buildstockbatch_custom_gems" + ) docker_client = docker.client.from_env() # Create a volume to store the custom gems - docker_client.volumes.create(name='buildstockbatch_custom_gems', driver='local') - simdata_vol = docker_client.volumes.create(name='buildstockbatch_simdata_temp', driver='local') + docker_client.volumes.create( + name="buildstockbatch_custom_gems", driver="local" + ) + simdata_vol = docker_client.volumes.create( + name="buildstockbatch_simdata_temp", driver="local" + ) # Define directories to be mounted in the container - mnt_gem_dir = '/var/oscli/gems' + mnt_gem_dir = "/var/oscli/gems" # Install custom gems to be used in the docker container - local_gemfile_path = os.path.join(self.buildstock_dir, 'resources', 'Gemfile') + local_gemfile_path = os.path.join( + self.buildstock_dir, "resources", "Gemfile" + ) mnt_gemfile_path_orig = "/var/oscli/gemfile/Gemfile" docker_volume_mounts = { - 'buildstockbatch_custom_gems': {'bind': mnt_gem_dir, 'mode': 'rw'}, - local_gemfile_path: {'bind': mnt_gemfile_path_orig, 'mode': 'ro'}, - simdata_vol.name: {'bind': '/var/simdata/openstudio', 'mode': 'rw'}, + "buildstockbatch_custom_gems": {"bind": mnt_gem_dir, "mode": "rw"}, + local_gemfile_path: {"bind": mnt_gemfile_path_orig, "mode": "ro"}, + simdata_vol.name: {"bind": "/var/simdata/openstudio", "mode": "rw"}, } # Check that the Gemfile exists if not os.path.exists(local_gemfile_path): - print(f'local_gemfile_path = {local_gemfile_path}') - raise AttributeError('baseline:custom_gems = True, but did not find Gemfile in /resources directory') + print(f"local_gemfile_path = {local_gemfile_path}") + raise AttributeError( + "baseline:custom_gems = True, but did not find Gemfile in /resources directory" + ) # Make the buildstock/resources/.custom_gems dir to store logs - local_log_dir = os.path.join(self.buildstock_dir, 'resources', '.custom_gems') + local_log_dir = os.path.join( + self.buildstock_dir, "resources", ".custom_gems" + ) if not os.path.exists(local_log_dir): os.makedirs(local_log_dir) # Run bundler to install the custom gems mnt_gemfile_path = f"{mnt_gem_dir}/Gemfile" bundle_install_cmd = f'/bin/bash -c "cp {mnt_gemfile_path_orig} {mnt_gemfile_path} && bundle install --path={mnt_gem_dir} --gemfile={mnt_gemfile_path}"' # noqa: E501 - logger.debug(f'Running {bundle_install_cmd}') + logger.debug(f"Running {bundle_install_cmd}") container_output = docker_client.containers.run( self.docker_image, bundle_install_cmd, remove=True, volumes=docker_volume_mounts, - name='install_custom_gems' + name="install_custom_gems", ) - with open(os.path.join(local_log_dir, 'bundle_install_output.log'), 'wb') as f_out: + with open( + os.path.join(local_log_dir, "bundle_install_output.log"), "wb" + ) as f_out: f_out.write(container_output) # Report out custom gems loaded by OpenStudio CLI - check_active_gems_cmd = f'openstudio --bundle {mnt_gemfile_path} --bundle_path {mnt_gem_dir} ' \ - '--bundle_without native_ext gem_list' + check_active_gems_cmd = ( + f"openstudio --bundle {mnt_gemfile_path} --bundle_path {mnt_gem_dir} " + "--bundle_without native_ext gem_list" + ) container_output = docker_client.containers.run( self.docker_image, check_active_gems_cmd, remove=True, volumes=docker_volume_mounts, - name='list_custom_gems' + name="list_custom_gems", ) - gem_list_log = os.path.join(local_log_dir, 'openstudio_gem_list_output.log') - with open(gem_list_log, 'wb') as f_out: + gem_list_log = os.path.join(local_log_dir, "openstudio_gem_list_output.log") + with open(gem_list_log, "wb") as f_out: f_out.write(container_output) simdata_vol.remove() - logger.debug(f'Review custom gems list at: {gem_list_log}') + logger.debug(f"Review custom gems list at: {gem_list_log}") @classmethod def validate_project(cls, project_file): @@ -124,44 +141,61 @@ def validate_project(cls, project_file): @property def weather_dir(self): if self._weather_dir is None: - self._weather_dir = os.path.join(self.buildstock_dir, 'weather') + self._weather_dir = os.path.join(self.buildstock_dir, "weather") self._get_weather_files() return self._weather_dir @classmethod - def run_building(cls, buildstock_dir, weather_dir, results_dir, measures_only, - n_datapoints, cfg, i, upgrade_idx=None): - + def run_building( + cls, + buildstock_dir, + weather_dir, + results_dir, + measures_only, + n_datapoints, + cfg, + i, + upgrade_idx=None, + ): upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 try: - sim_id, sim_dir = cls.make_sim_dir(i, upgrade_idx, os.path.join(results_dir, 'simulation_output')) + sim_id, sim_dir = cls.make_sim_dir( + i, upgrade_idx, os.path.join(results_dir, "simulation_output") + ) except SimulationExists: return sim_path = pathlib.Path(sim_dir) buildstock_path = pathlib.Path(buildstock_dir) # Make symlinks to project and buildstock stuff - (sim_path / 'measures').symlink_to(buildstock_path / 'measures', target_is_directory=True) - (sim_path / 'lib').symlink_to(buildstock_path / "lib", target_is_directory=True) - (sim_path / 'weather').symlink_to(weather_dir, target_is_directory=True) - hpxml_measures_path = buildstock_path / 'resources' / 'hpxml-measures' + (sim_path / "measures").symlink_to( + buildstock_path / "measures", target_is_directory=True + ) + (sim_path / "lib").symlink_to(buildstock_path / "lib", target_is_directory=True) + (sim_path / "weather").symlink_to(weather_dir, target_is_directory=True) + hpxml_measures_path = buildstock_path / "resources" / "hpxml-measures" if hpxml_measures_path.exists(): - resources_path = sim_path / 'resources' + resources_path = sim_path / "resources" resources_path.mkdir() - (resources_path / 'hpxml-measures').symlink_to(hpxml_measures_path, target_is_directory=True) + (resources_path / "hpxml-measures").symlink_to( + hpxml_measures_path, target_is_directory=True + ) else: resources_path = None - osw = cls.create_osw(cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx) + osw = cls.create_osw( + cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx + ) - with open(sim_path / 'in.osw', 'w') as f: + with open(sim_path / "in.osw", "w") as f: json.dump(osw, f, indent=4) run_cmd = [ cls.openstudio_exe(), - 'run', - '-w', 'in.osw', + "run", + "-w", + "in.osw", ] # FIXME: Custom gems @@ -179,19 +213,19 @@ def run_building(cls, buildstock_dir, weather_dir, results_dir, measures_only, # if cfg.get('baseline', dict()).get('custom_gems', False): # run_cmd.insert(8, '--measures_only') # else: - run_cmd.insert(2, '--measures_only') + run_cmd.insert(2, "--measures_only") env_vars = {} env_vars.update(os.environ) - env_vars['BUILDSTOCKBATCH_VERSION'] = bsb_version + env_vars["BUILDSTOCKBATCH_VERSION"] = bsb_version - max_time_min = cfg.get('max_minutes_per_sim') + max_time_min = cfg.get("max_minutes_per_sim") if max_time_min is not None: subprocess_kw = {"timeout": max_time_min * 60} else: subprocess_kw = {} start_time = dt.datetime.now() - with open(sim_path / 'openstudio_output.log', 'w') as f_out: + with open(sim_path / "openstudio_output.log", "w") as f_out: try: subprocess.run( run_cmd, @@ -200,25 +234,25 @@ def run_building(cls, buildstock_dir, weather_dir, results_dir, measures_only, stderr=subprocess.STDOUT, env=env_vars, cwd=sim_dir, - **subprocess_kw + **subprocess_kw, ) except subprocess.TimeoutExpired: end_time = dt.datetime.now() - msg = f'Terminated {sim_id} after reaching max time of {max_time_min} minutes' + msg = f"Terminated {sim_id} after reaching max time of {max_time_min} minutes" logger.warning(msg) f_out.write(msg) - with open(sim_path / 'out.osw', 'w') as out_osw: + with open(sim_path / "out.osw", "w") as out_osw: out_msg = { - 'started_at': start_time.strftime('%Y%m%dT%H%M%SZ'), - 'completed_at': end_time.strftime('%Y%m%dT%H%M%SZ'), - 'completed_status': 'Fail', - 'timeout': msg + "started_at": start_time.strftime("%Y%m%dT%H%M%SZ"), + "completed_at": end_time.strftime("%Y%m%dT%H%M%SZ"), + "completed_status": "Fail", + "timeout": msg, } out_osw.write(json.dumps(out_msg, indent=3)) - (sim_path / 'run').mkdir(exist_ok=True) - with open(sim_path / 'run' / 'run.log', 'a') as run_log: + (sim_path / "run").mkdir(exist_ok=True) + with open(sim_path / "run" / "run.log", "a") as run_log: run_log.write(f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}") - with open(sim_path / 'run' / 'failed.job', 'w') as failed_job: + with open(sim_path / "run" / "failed.job", "w") as failed_job: failed_job.write(f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}") time.sleep(20) # Wait for EnergyPlus to release file locks except subprocess.CalledProcessError: @@ -230,19 +264,21 @@ def run_building(cls, buildstock_dir, weather_dir, results_dir, measures_only, fs, f"{results_dir}/simulation_output/timeseries", upgrade_id, - i + i, ) # Clean up symlinks - for directory in ('measures', 'lib', 'weather'): + for directory in ("measures", "lib", "weather"): (sim_path / directory).unlink() if resources_path: - (resources_path / 'hpxml-measures').unlink() + (resources_path / "hpxml-measures").unlink() resources_path.rmdir() # Read data_point_out.json reporting_measures = cls.get_reporting_measures(cfg) - dpout = postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, i) + dpout = postprocessing.read_simulation_outputs( + fs, reporting_measures, sim_dir, upgrade_id, i + ) return dpout def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False): @@ -255,12 +291,15 @@ def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False): # FIXME: does this work for comstock? buildstock_path = pathlib.Path(self.buildstock_dir) project_path = pathlib.Path(self.project_dir) - lib_path = pathlib.Path(self.buildstock_dir, 'lib') + lib_path = pathlib.Path(self.buildstock_dir, "lib") if lib_path.exists(): shutil.rmtree(lib_path) lib_path.mkdir() shutil.copytree(buildstock_path / "resources", lib_path / "resources") - shutil.copytree(project_path / "housing_characteristics", lib_path / "housing_characteristics") + shutil.copytree( + project_path / "housing_characteristics", + lib_path / "housing_characteristics", + ) df = read_csv(buildstock_csv_filename, index_col=0, dtype=str) self.validate_buildstock_csv(self.project_filename, df) @@ -274,11 +313,13 @@ def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False): self.results_dir, measures_only, n_datapoints, - self.cfg + self.cfg, ) upgrade_sims = [] - for i in range(len(self.cfg.get('upgrades', []))): - upgrade_sims.append(map(functools.partial(run_building_d, upgrade_idx=i), building_ids)) + for i in range(len(self.cfg.get("upgrades", []))): + upgrade_sims.append( + map(functools.partial(run_building_d, upgrade_idx=i), building_ids) + ) if not self.skip_baseline_sims: baseline_sims = map(run_building_d, building_ids) all_sims = itertools.chain(baseline_sims, *upgrade_sims) @@ -291,18 +332,18 @@ def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False): time.sleep(10) shutil.rmtree(lib_path) - sim_out_path = pathlib.Path(self.results_dir, 'simulation_output') + sim_out_path = pathlib.Path(self.results_dir, "simulation_output") - results_job_json_filename = sim_out_path / 'results_job0.json.gz' - with gzip.open(results_job_json_filename, 'wt', encoding='utf-8') as f: + results_job_json_filename = sim_out_path / "results_job0.json.gz" + with gzip.open(results_job_json_filename, "wt", encoding="utf-8") as f: json.dump(dpouts, f) del dpouts - sim_out_tarfile_name = sim_out_path / 'simulations_job0.tar.gz' - logger.debug(f'Compressing simulation outputs to {sim_out_tarfile_name}') - with tarfile.open(sim_out_tarfile_name, 'w:gz') as tarf: + sim_out_tarfile_name = sim_out_path / "simulations_job0.tar.gz" + logger.debug(f"Compressing simulation outputs to {sim_out_tarfile_name}") + with tarfile.open(sim_out_tarfile_name, "w:gz") as tarf: for dirname in os.listdir(sim_out_path): - if re.match(r'up\d+', dirname) and (sim_out_path / dirname).is_dir(): + if re.match(r"up\d+", dirname) and (sim_out_path / dirname).is_dir(): tarf.add(sim_out_path / dirname, arcname=dirname) shutil.rmtree(sim_out_path / dirname) @@ -313,8 +354,7 @@ def output_dir(self): @property def results_dir(self): results_dir = self.cfg.get( - 'output_directory', - os.path.join(self.project_dir, 'localResults') + "output_directory", os.path.join(self.project_dir, "localResults") ) results_dir = self.path_rel_to_projectfile(results_dir) if not os.path.isdir(results_dir): @@ -322,70 +362,86 @@ def results_dir(self): return results_dir def get_dask_client(self): - cluster = LocalCluster(local_directory=os.path.join(self.results_dir, 'dask-tmp')) + cluster = LocalCluster( + local_directory=os.path.join(self.results_dir, "dask-tmp") + ) return Client(cluster) @log_error_details() def main(): - logging.config.dictConfig({ - 'version': 1, - 'disable_existing_loggers': True, - 'formatters': { - 'defaultfmt': { - 'format': '%(levelname)s:%(asctime)s:%(name)s:%(message)s', - 'datefmt': '%Y-%m-%d %H:%M:%S' - } - }, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - 'formatter': 'defaultfmt', - 'level': 'DEBUG', - 'stream': 'ext://sys.stdout', - } - }, - 'loggers': { - '__main__': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'] + logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "defaultfmt": { + "format": "%(levelname)s:%(asctime)s:%(name)s:%(message)s", + "datefmt": "%Y-%m-%d %H:%M:%S", + } }, - 'buildstockbatch': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'] - } - }, - }) + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "defaultfmt", + "level": "DEBUG", + "stream": "ext://sys.stdout", + } + }, + "loggers": { + "__main__": { + "level": "DEBUG", + "propagate": True, + "handlers": ["console"], + }, + "buildstockbatch": { + "level": "DEBUG", + "propagate": True, + "handlers": ["console"], + }, + }, + } + ) parser = argparse.ArgumentParser() print(BuildStockBatchBase.LOGO) - parser.add_argument('project_filename') + parser.add_argument("project_filename") parser.add_argument( - '-j', + "-j", type=int, - help='Number of parallel simulations. Default: all cores.', - default=None + help="Number of parallel simulations. Default: all cores.", + default=None, ) parser.add_argument( - '-m', '--measures_only', - action='store_true', - help='Only apply the measures, but don\'t run simulations. Useful for debugging.' + "-m", + "--measures_only", + action="store_true", + help="Only apply the measures, but don't run simulations. Useful for debugging.", ) group = parser.add_mutually_exclusive_group() - group.add_argument('--postprocessonly', - help='Only do postprocessing, useful for when the simulations are already done', - action='store_true') - group.add_argument('--uploadonly', - help='Only upload to S3, useful when postprocessing is already done. Ignores the ' - 'upload flag in yaml', action='store_true') - group.add_argument('--validateonly', help='Only validate the project YAML file and references. Nothing is executed', - action='store_true') - group.add_argument('--samplingonly', help='Run the sampling only.', - action='store_true') + group.add_argument( + "--postprocessonly", + help="Only do postprocessing, useful for when the simulations are already done", + action="store_true", + ) + group.add_argument( + "--uploadonly", + help="Only upload to S3, useful when postprocessing is already done. Ignores the " + "upload flag in yaml", + action="store_true", + ) + group.add_argument( + "--validateonly", + help="Only validate the project YAML file and references. Nothing is executed", + action="store_true", + ) + group.add_argument( + "--samplingonly", help="Run the sampling only.", action="store_true" + ) args = parser.parse_args() if not os.path.isfile(args.project_filename): - raise FileNotFoundError(f'The project file {args.project_filename} doesn\'t exist') + raise FileNotFoundError( + f"The project file {args.project_filename} doesn't exist" + ) # Validate the project, and in case of the --validateonly flag return True if validation passes LocalBatch.validate_project(args.project_filename) @@ -393,7 +449,11 @@ def main(): return True batch = LocalBatch(args.project_filename) if not (args.postprocessonly or args.uploadonly or args.validateonly): - batch.run_batch(n_jobs=args.j, measures_only=args.measures_only, sampling_only=args.samplingonly) + batch.run_batch( + n_jobs=args.j, + measures_only=args.measures_only, + sampling_only=args.samplingonly, + ) if args.measures_only or args.samplingonly: return if args.uploadonly: @@ -402,5 +462,5 @@ def main(): batch.process_results() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 9d5e02f0..79a604e4 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -40,88 +40,79 @@ def read_data_point_out_json(fs, reporting_measures, filename): try: - with fs.open(filename, 'r') as f: + with fs.open(filename, "r") as f: d = json.load(f) except (FileNotFoundError, json.JSONDecodeError): return None else: - sim_out_report = 'SimulationOutputReport' - if 'ReportSimulationOutput' in d: - sim_out_report = 'ReportSimulationOutput' + sim_out_report = "SimulationOutputReport" + if "ReportSimulationOutput" in d: + sim_out_report = "ReportSimulationOutput" if sim_out_report not in d: - d[sim_out_report] = {'applicable': False} + d[sim_out_report] = {"applicable": False} for reporting_measure in reporting_measures: if reporting_measure not in d: - d[reporting_measure] = {'applicable': False} + d[reporting_measure] = {"applicable": False} return d def to_camelcase(x): - s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', x) - return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", x) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() def flatten_datapoint_json(reporting_measures, d): new_d = {} - cols_to_keep = { - 'ApplyUpgrade': [ - 'upgrade_name', - 'applicable' - ] - } + cols_to_keep = {"ApplyUpgrade": ["upgrade_name", "applicable"]} for k1, k2s in cols_to_keep.items(): for k2 in k2s: - new_d[f'{k1}.{k2}'] = d.get(k1, {}).get(k2) + new_d[f"{k1}.{k2}"] = d.get(k1, {}).get(k2) # copy over all the key and values from BuildExistingModel - col1 = 'BuildExistingModel' + col1 = "BuildExistingModel" for k, v in d.get(col1, {}).items(): - new_d[f'{col1}.{k}'] = v + new_d[f"{col1}.{k}"] = v # if there is no units_represented key, default to 1 # TODO @nmerket @rajeee is there a way to not apply this to Commercial jobs? It doesn't hurt, but it is weird for us - units = int(new_d.get(f'{col1}.units_represented', 1)) - new_d[f'{col1}.units_represented'] = units - sim_out_report = 'SimulationOutputReport' - if 'ReportSimulationOutput' in d: - sim_out_report = 'ReportSimulationOutput' + units = int(new_d.get(f"{col1}.units_represented", 1)) + new_d[f"{col1}.units_represented"] = units + sim_out_report = "SimulationOutputReport" + if "ReportSimulationOutput" in d: + sim_out_report = "ReportSimulationOutput" col2 = sim_out_report for k, v in d.get(col2, {}).items(): - new_d[f'{col2}.{k}'] = v + new_d[f"{col2}.{k}"] = v # additional reporting measures - if sim_out_report == 'ReportSimulationOutput': - reporting_measures += ['ReportUtilityBills'] - reporting_measures += ['UpgradeCosts'] + if sim_out_report == "ReportSimulationOutput": + reporting_measures += ["ReportUtilityBills"] + reporting_measures += ["UpgradeCosts"] for col in reporting_measures: for k, v in d.get(col, {}).items(): - new_d[f'{col}.{k}'] = v + new_d[f"{col}.{k}"] = v - new_d['building_id'] = new_d['BuildExistingModel.building_id'] - del new_d['BuildExistingModel.building_id'] + new_d["building_id"] = new_d["BuildExistingModel.building_id"] + del new_d["BuildExistingModel.building_id"] return new_d def read_out_osw(fs, filename): try: - with fs.open(filename, 'r') as f: + with fs.open(filename, "r") as f: d = json.load(f) except (FileNotFoundError, json.JSONDecodeError): return None else: out_d = {} - keys_to_copy = [ - 'started_at', - 'completed_at', - 'completed_status' - ] + keys_to_copy = ["started_at", "completed_at", "completed_status"] for key in keys_to_copy: out_d[key] = d.get(key, None) - for step in d.get('steps', []): - if step['measure_dir_name'] == 'BuildExistingModel': - out_d['building_id'] = step['arguments']['building_id'] + for step in d.get("steps", []): + if step["measure_dir_name"] == "BuildExistingModel": + out_d["building_id"] = step["arguments"]["building_id"] return out_d @@ -142,71 +133,96 @@ def read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, buildin """ dpout = read_data_point_out_json( - fs, reporting_measures, f'{sim_dir}/run/data_point_out.json' + fs, reporting_measures, f"{sim_dir}/run/data_point_out.json" ) if dpout is None: dpout = {} else: dpout = flatten_datapoint_json(reporting_measures, dpout) - out_osw = read_out_osw(fs, f'{sim_dir}/out.osw') + out_osw = read_out_osw(fs, f"{sim_dir}/out.osw") if out_osw: dpout.update(out_osw) - dpout['upgrade'] = upgrade_id - dpout['building_id'] = building_id + dpout["upgrade"] = upgrade_id + dpout["building_id"] = building_id return dpout def write_dataframe_as_parquet(df, fs, filename, schema=None): tbl = pa.Table.from_pandas(df, schema=schema, preserve_index=False) - with fs.open(filename, 'wb') as f: + with fs.open(filename, "wb") as f: parquet.write_table(tbl, f) def clean_up_results_df(df, cfg, keep_upgrade_id=False): results_df = df.copy() cols_to_remove = ( - 'build_existing_model.weight', - 'simulation_output_report.weight', - 'build_existing_model.workflow_json', - 'simulation_output_report.upgrade_name' + "build_existing_model.weight", + "simulation_output_report.weight", + "build_existing_model.workflow_json", + "simulation_output_report.upgrade_name", ) for col in cols_to_remove: if col in results_df.columns: del results_df[col] - for col in ('started_at', 'completed_at'): + for col in ("started_at", "completed_at"): if col in results_df.columns: results_df[col] = results_df[col].map( - lambda x: dt.datetime.strptime(x, '%Y%m%dT%H%M%SZ') if isinstance(x, str) else x + lambda x: dt.datetime.strptime(x, "%Y%m%dT%H%M%SZ") + if isinstance(x, str) + else x ) - reference_scenarios = dict([(i, x.get('reference_scenario')) for i, x in enumerate(cfg.get('upgrades', []), 1)]) - results_df['apply_upgrade.reference_scenario'] = \ - results_df['upgrade'].map(reference_scenarios).fillna('').astype(str) + reference_scenarios = dict( + [ + (i, x.get("reference_scenario")) + for i, x in enumerate(cfg.get("upgrades", []), 1) + ] + ) + results_df["apply_upgrade.reference_scenario"] = ( + results_df["upgrade"].map(reference_scenarios).fillna("").astype(str) + ) # standardize the column orders first_few_cols = [ - 'building_id', - 'started_at', - 'completed_at', - 'completed_status', - 'apply_upgrade.applicable', - 'apply_upgrade.upgrade_name', - 'apply_upgrade.reference_scenario' + "building_id", + "started_at", + "completed_at", + "completed_status", + "apply_upgrade.applicable", + "apply_upgrade.upgrade_name", + "apply_upgrade.reference_scenario", ] if keep_upgrade_id: - first_few_cols.insert(1, 'upgrade') - if 'job_id' in results_df.columns: - first_few_cols.insert(2, 'job_id') - - build_existing_model_cols = sorted([col for col in results_df.columns if col.startswith('build_existing_model')]) - sim_output_report_cols = sorted([col for col in results_df.columns if col.startswith('simulation_output_report')]) - report_sim_output_cols = sorted([col for col in results_df.columns if col.startswith('report_simulation_output')]) - upgrade_costs_cols = sorted([col for col in results_df.columns if col.startswith('upgrade_costs')]) - sorted_cols = \ - first_few_cols + \ - build_existing_model_cols + \ - sim_output_report_cols + \ - report_sim_output_cols + \ - upgrade_costs_cols + first_few_cols.insert(1, "upgrade") + if "job_id" in results_df.columns: + first_few_cols.insert(2, "job_id") + + build_existing_model_cols = sorted( + [col for col in results_df.columns if col.startswith("build_existing_model")] + ) + sim_output_report_cols = sorted( + [ + col + for col in results_df.columns + if col.startswith("simulation_output_report") + ] + ) + report_sim_output_cols = sorted( + [ + col + for col in results_df.columns + if col.startswith("report_simulation_output") + ] + ) + upgrade_costs_cols = sorted( + [col for col in results_df.columns if col.startswith("upgrade_costs")] + ) + sorted_cols = ( + first_few_cols + + build_existing_model_cols + + sim_output_report_cols + + report_sim_output_cols + + upgrade_costs_cols + ) remaining_cols = sorted(set(results_df.columns.values).difference(sorted_cols)) sorted_cols += remaining_cols @@ -217,17 +233,17 @@ def clean_up_results_df(df, cfg, keep_upgrade_id=False): def get_cols(fs, filepath): - with fs.open(filepath, 'rb') as f: + with fs.open(filepath, "rb") as f: schema = parquet.read_schema(f) return set(schema.names) def read_results_json(fs, filename, all_cols=None): - with fs.open(filename, 'rb') as f1: - with gzip.open(f1, 'rt', encoding='utf-8') as f2: + with fs.open(filename, "rb") as f1: + with gzip.open(f1, "rt", encoding="utf-8") as f2: dpouts = json.load(f2) df = pd.DataFrame(dpouts) - df['job_id'] = int(re.search(r'results_job(\d+)\.json\.gz', filename).group(1)) + df["job_id"] = int(re.search(r"results_job(\d+)\.json\.gz", filename).group(1)) if all_cols is not None: for missing_col in set(all_cols).difference(df.columns.values): df[missing_col] = None @@ -238,7 +254,7 @@ def read_results_json(fs, filename, all_cols=None): def get_schema_dict(fs, filename): df = read_results_json(fs, filename) - df = df.replace('', np.nan) # required to make pa correctly infer the dtypes + df = df.replace("", np.nan) # required to make pa correctly infer the dtypes sch = pa.Schema.from_pandas(df) sch_dict = {name: type for name, type in zip(sch.names, sch.types)} return sch_dict @@ -255,17 +271,19 @@ def merge_schema_dicts(dict1, dict2): def read_enduse_timeseries_parquet(fs, all_cols, src_path, bldg_id): src_filename = f"{src_path}/bldg{bldg_id:07}.parquet" - with fs.open(src_filename, 'rb') as f: - df = pd.read_parquet(f, engine='pyarrow') - df['building_id'] = bldg_id + with fs.open(src_filename, "rb") as f: + df = pd.read_parquet(f, engine="pyarrow") + df["building_id"] = bldg_id for col in set(all_cols).difference(df.columns.values): df[col] = np.nan df = df[all_cols] - df.set_index('building_id', inplace=True) + df.set_index("building_id", inplace=True) return df -def concat_and_normalize(fs, all_cols, src_path, dst_path, partition_columns, indx, bldg_ids, partition_vals): +def concat_and_normalize( + fs, all_cols, src_path, dst_path, partition_columns, indx, bldg_ids, partition_vals +): dfs = [] for bldg_id in sorted(bldg_ids): df = read_enduse_timeseries_parquet(fs, all_cols, src_path, bldg_id) @@ -280,7 +298,7 @@ def concat_and_normalize(fs, all_cols, src_path, dst_path, partition_columns, in fs.makedirs(dst_filepath, exist_ok=True) dst_filename = f"{dst_filepath}/group{indx}.parquet" - with fs.open(dst_filename, 'wb') as f: + with fs.open(dst_filename, "wb") as f: df.to_parquet(f, index=True) return len(bldg_ids) @@ -327,24 +345,34 @@ def split_into_groups(total_size, max_group_size): def get_partitioned_bldg_groups(partition_df, partition_columns, files_per_partition): """ - Returns intelligent grouping of building_ids by partition columns. - 1. Group the building_ids by partition columns. For each group, say (CO, Jefferson), we have a list of building - ids. The total number of such groups is ngroups - 2. Concatenate those list to get bldg_id_list, which will have all the bldg_ids but ordered such that that - buildings belonging to the same group are close together. - 3. Split the list of building in each group in 1 to multiple subgroups so that total number of buildings - in each subgroup is less than or equal to files_per_partition. This will give the bldg_id_groups (list of - list) used to read the dataframe. The buildings within the inner list will be concatenated. - len(bldg_id_groups) is equal to number of such concatenation, and eventually, number of output parquet files. + Returns intelligent grouping of building_ids by partition columns. + 1. Group the building_ids by partition columns. For each group, say (CO, Jefferson), we have a list of building + ids. The total number of such groups is ngroups + 2. Concatenate those list to get bldg_id_list, which will have all the bldg_ids but ordered such that that + buildings belonging to the same group are close together. + 3. Split the list of building in each group in 1 to multiple subgroups so that total number of buildings + in each subgroup is less than or equal to files_per_partition. This will give the bldg_id_groups (list of + list) used to read the dataframe. The buildings within the inner list will be concatenated. + len(bldg_id_groups) is equal to number of such concatenation, and eventually, number of output parquet files. """ total_building = len(partition_df) if partition_columns: - bldg_id_list_df = partition_df.reset_index().groupby(partition_columns)['building_id'].apply(list) + bldg_id_list_df = ( + partition_df.reset_index() + .groupby(partition_columns)["building_id"] + .apply(list) + ) ngroups = len(bldg_id_list_df) bldg_id_list = bldg_id_list_df.sum() - nfiles_in_each_group = [nfiles for nfiles in bldg_id_list_df.map(lambda x: len(x))] - files_groups = [split_into_groups(n, files_per_partition) for n in nfiles_in_each_group] - flat_groups = [n for group in files_groups for n in group] # flatten list of list into a list (maintain order) + nfiles_in_each_group = [ + nfiles for nfiles in bldg_id_list_df.map(lambda x: len(x)) + ] + files_groups = [ + split_into_groups(n, files_per_partition) for n in nfiles_in_each_group + ] + flat_groups = [ + n for group in files_groups for n in group + ] # flatten list of list into a list (maintain order) else: # no partitioning by a column. Just put buildings into groups of files_per_partition ngroups = 1 @@ -363,8 +391,8 @@ def get_partitioned_bldg_groups(partition_df, partition_columns, files_per_parti def get_upgrade_list(cfg): - upgrade_start = 1 if cfg['baseline'].get('skip_sims', False) else 0 - upgrade_end = len(cfg.get('upgrades', [])) + 1 + upgrade_start = 1 if cfg["baseline"].get("skip_sims", False) else 0 + upgrade_end = len(cfg.get("upgrades", [])) + 1 return list(range(upgrade_start, upgrade_end)) @@ -375,7 +403,7 @@ def write_metadata_files(fs, parquet_root_dir, partition_columns): logger.info(f"Written _common_metadata to {parquet_root_dir}") if partition_columns: - partition_glob = "/".join([f'{c}*' for c in partition_columns]) + partition_glob = "/".join([f"{c}*" for c in partition_columns]) glob_str = f"{parquet_root_dir}/up*/{partition_glob}/*.parquet" else: glob_str = f"{parquet_root_dir}/up*/*.parquet" @@ -384,7 +412,9 @@ def write_metadata_files(fs, parquet_root_dir, partition_columns): concat_files = fs.glob(glob_str) logger.info(f"Gathered {len(concat_files)} files. Now writing _metadata") parquet_root_dir = Path(parquet_root_dir).as_posix() - create_metadata_file(concat_files, root_dir=parquet_root_dir, engine='pyarrow', fs=fs) + create_metadata_file( + concat_files, root_dir=parquet_root_dir, engine="pyarrow", fs=fs + ) logger.info(f"_metadata file written to {parquet_root_dir}") @@ -400,11 +430,11 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): :param do_timeseries: process timeseries results, defaults to True :type do_timeseries: bool, optional """ - sim_output_dir = f'{results_dir}/simulation_output' - ts_in_dir = f'{sim_output_dir}/timeseries' - results_csvs_dir = f'{results_dir}/results_csvs' - parquet_dir = f'{results_dir}/parquet' - ts_dir = f'{results_dir}/parquet/timeseries' + sim_output_dir = f"{results_dir}/simulation_output" + ts_in_dir = f"{sim_output_dir}/timeseries" + results_csvs_dir = f"{results_dir}/results_csvs" + parquet_dir = f"{results_dir}/parquet" + ts_dir = f"{results_dir}/parquet/timeseries" dirs = [results_csvs_dir, parquet_dir] if do_timeseries: dirs.append(ts_dir) @@ -414,42 +444,61 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): fs.makedirs(dr) # Results "CSV" - results_json_files = fs.glob(f'{sim_output_dir}/results_job*.json.gz') + results_json_files = fs.glob(f"{sim_output_dir}/results_job*.json.gz") if not results_json_files: raise ValueError("No simulation results found to post-process.") - logger.info("Collecting all the columns and datatypes in results_job*.json.gz parquet files.") - all_schema_dict = db.from_sequence(results_json_files).map(partial(get_schema_dict, fs)).\ - fold(lambda x, y: merge_schema_dicts(x, y)).compute() + logger.info( + "Collecting all the columns and datatypes in results_job*.json.gz parquet files." + ) + all_schema_dict = ( + db.from_sequence(results_json_files) + .map(partial(get_schema_dict, fs)) + .fold(lambda x, y: merge_schema_dicts(x, y)) + .compute() + ) logger.info(f"Got {len(all_schema_dict)} columns") all_results_cols = list(all_schema_dict.keys()) - all_schema_dict = {to_camelcase(key): value for key, value in all_schema_dict.items()} + all_schema_dict = { + to_camelcase(key): value for key, value in all_schema_dict.items() + } logger.info(f"Got this schema: {all_schema_dict}\n") - delayed_results_dfs = [dask.delayed(partial(read_results_json, fs, all_cols=all_results_cols))(x) - for x in results_json_files] - results_df = dd.from_delayed(delayed_results_dfs, verify_meta=False) + delayed_results_dfs = [ + dask.delayed(partial(read_results_json, fs, all_cols=all_results_cols))(x) + for x in results_json_files + ] + results_df = dd.from_delayed(delayed_results_dfs, verify_meta=False) if do_timeseries: # Look at all the parquet files to see what columns are in all of them. logger.info("Collecting all the columns in timeseries parquet files.") do_timeseries = False all_ts_cols = set() - for upgrade_folder in fs.glob(f'{ts_in_dir}/up*'): + for upgrade_folder in fs.glob(f"{ts_in_dir}/up*"): ts_filenames = fs.ls(upgrade_folder) if ts_filenames: do_timeseries = True - logger.info(f"Found {len(ts_filenames)} files for upgrade {Path(upgrade_folder).name}.") + logger.info( + f"Found {len(ts_filenames)} files for upgrade {Path(upgrade_folder).name}." + ) files_bag = db.from_sequence(ts_filenames, partition_size=100) - all_ts_cols |= files_bag.map(partial(get_cols, fs)).\ - fold(lambda x, y: x.union(y)).compute() + all_ts_cols |= ( + files_bag.map(partial(get_cols, fs)) + .fold(lambda x, y: x.union(y)) + .compute() + ) logger.info("Collected all the columns") else: - logger.info(f"There are no timeseries files for upgrade {Path(upgrade_folder).name}.") + logger.info( + f"There are no timeseries files for upgrade {Path(upgrade_folder).name}." + ) # Sort the columns - all_ts_cols_sorted = ['building_id'] + sorted(x for x in all_ts_cols if x.startswith('time')) + all_ts_cols_sorted = ["building_id"] + sorted( + x for x in all_ts_cols if x.startswith("time") + ) all_ts_cols.difference_update(all_ts_cols_sorted) - all_ts_cols_sorted.extend(sorted(x for x in all_ts_cols if not x.endswith(']'))) + all_ts_cols_sorted.extend(sorted(x for x in all_ts_cols if not x.endswith("]"))) all_ts_cols.difference_update(all_ts_cols_sorted) all_ts_cols_sorted.extend(sorted(all_ts_cols)) logger.info(f"Got {len(all_ts_cols_sorted)} columns in total") @@ -457,14 +506,16 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): else: logger.warning("There are no timeseries files for any upgrades.") - results_df_groups = results_df.groupby('upgrade') + results_df_groups = results_df.groupby("upgrade") upgrade_list = get_upgrade_list(cfg) - partition_columns = cfg.get('postprocessing', {}).get('partition_columns', []) + partition_columns = cfg.get("postprocessing", {}).get("partition_columns", []) partition_columns = [c.lower() for c in partition_columns] - df_partition_columns = [f'build_existing_model.{c}' for c in partition_columns] + df_partition_columns = [f"build_existing_model.{c}" for c in partition_columns] missing_cols = set(df_partition_columns) - set(all_schema_dict.keys()) if missing_cols: - raise ValueError(f"The following partitioning columns are not found in results.json: {missing_cols}") + raise ValueError( + f"The following partitioning columns are not found in results.json: {missing_cols}" + ) if partition_columns: logger.info(f"The timeseries files will be partitioned by {partition_columns}.") @@ -475,17 +526,21 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): logger.info(f"Obtained results_df for {upgrade_id} with {len(df)} datapoints. ") df.rename(columns=to_camelcase, inplace=True) df = clean_up_results_df(df, cfg, keep_upgrade_id=True) - del df['upgrade'] - df.set_index('building_id', inplace=True) + del df["upgrade"] + df.set_index("building_id", inplace=True) df.sort_index(inplace=True) schema = None partition_df = df[df_partition_columns].copy() - partition_df.rename(columns={df_c: c for df_c, c in zip(df_partition_columns, partition_columns)}, - inplace=True) + partition_df.rename( + columns={ + df_c: c for df_c, c in zip(df_partition_columns, partition_columns) + }, + inplace=True, + ) if upgrade_id > 0: # Remove building characteristics for upgrade scenarios. cols_to_keep = list( - filter(lambda x: not x.startswith('build_existing_model.'), df.columns) + filter(lambda x: not x.startswith("build_existing_model."), df.columns) ) df = df[cols_to_keep] null_cols = get_null_cols(df) @@ -495,15 +550,19 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): logger.info(f"Upgrade {upgrade_id} has null cols: {null_cols}") schema, unresolved = correct_schema(all_schema_dict, df) if unresolved: - logger.info(f"The types for {unresolved} columns couldn't be determined.") + logger.info( + f"The types for {unresolved} columns couldn't be determined." + ) else: - logger.info("All columns were successfully assigned a datatype based on other upgrades.") + logger.info( + "All columns were successfully assigned a datatype based on other upgrades." + ) # Write CSV csv_filename = f"{results_csvs_dir}/results_up{upgrade_id:02d}.csv.gz" - logger.info(f'Writing {csv_filename}') - with fs.open(csv_filename, 'wb') as f: - with gzip.open(f, 'wt', encoding='utf-8') as gf: - df.to_csv(gf, index=True, lineterminator='\n') + logger.info(f"Writing {csv_filename}") + with fs.open(csv_filename, "wb") as f: + with gzip.open(f, "wt", encoding="utf-8") as gf: + df.to_csv(gf, index=True, lineterminator="\n") # Write Parquet if upgrade_id == 0: @@ -513,43 +572,64 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): fs.makedirs(results_parquet_dir) parquet_filename = f"{results_parquet_dir}/results_up{upgrade_id:02d}.parquet" - logger.info(f'Writing {parquet_filename}') + logger.info(f"Writing {parquet_filename}") write_dataframe_as_parquet( - df.reset_index(), - fs, - parquet_filename, - schema=schema + df.reset_index(), fs, parquet_filename, schema=schema ) if do_timeseries: # Get the names of the timeseries file for each simulation in this upgrade - ts_upgrade_path = f'{ts_in_dir}/up{upgrade_id:02d}' - ts_filenames = [ts_upgrade_path + ts_filename for ts_filename in fs.ls(ts_upgrade_path)] - ts_bldg_ids = [int(re.search(r'bldg(\d+).parquet', flname).group(1)) for flname in ts_filenames] + ts_upgrade_path = f"{ts_in_dir}/up{upgrade_id:02d}" + ts_filenames = [ + ts_upgrade_path + ts_filename for ts_filename in fs.ls(ts_upgrade_path) + ] + ts_bldg_ids = [ + int(re.search(r"bldg(\d+).parquet", flname).group(1)) + for flname in ts_filenames + ] if not ts_filenames: - logger.warning(f"There are no timeseries files for upgrade{upgrade_id}.") + logger.warning( + f"There are no timeseries files for upgrade{upgrade_id}." + ) continue - logger.info(f"There are {len(ts_filenames)} timeseries files for upgrade{upgrade_id}.") + logger.info( + f"There are {len(ts_filenames)} timeseries files for upgrade{upgrade_id}." + ) # Calculate the mean and estimate the total memory usage - read_ts_parquet = partial(read_enduse_timeseries_parquet, fs, all_ts_cols_sorted, ts_upgrade_path) - get_ts_mem_usage_d = dask.delayed(lambda x: read_ts_parquet(x).memory_usage(deep=True).sum()) + read_ts_parquet = partial( + read_enduse_timeseries_parquet, fs, all_ts_cols_sorted, ts_upgrade_path + ) + get_ts_mem_usage_d = dask.delayed( + lambda x: read_ts_parquet(x).memory_usage(deep=True).sum() + ) sample_size = min(len(ts_bldg_ids), 36 * 3) - mean_mem = np.mean(dask.compute(map(get_ts_mem_usage_d, random.sample(ts_bldg_ids, sample_size)))[0]) + mean_mem = np.mean( + dask.compute( + map(get_ts_mem_usage_d, random.sample(ts_bldg_ids, sample_size)) + )[0] + ) # Determine how many files should be in each partition and group the files - parquet_memory = int(cfg.get('eagle', {}).get('postprocessing', {} - ).get('parquet_memory_mb', MAX_PARQUET_MEMORY)) + parquet_memory = int( + cfg.get("eagle", {}) + .get("postprocessing", {}) + .get("parquet_memory_mb", MAX_PARQUET_MEMORY) + ) logger.info(f"Max parquet memory: {parquet_memory} MB") - max_files_per_partition = max(1, math.floor(parquet_memory / (mean_mem / 1e6))) + max_files_per_partition = max( + 1, math.floor(parquet_memory / (mean_mem / 1e6)) + ) partition_df = partition_df.loc[ts_bldg_ids].copy() logger.info(f"partition_df for the upgrade has {len(partition_df)} rows.") - bldg_id_groups, bldg_id_list, ngroup = get_partitioned_bldg_groups(partition_df, - partition_columns, - max_files_per_partition) - logger.info(f"Processing {len(bldg_id_list)} building timeseries by combining max of " - f"{max_files_per_partition} parquets together. This will create {len(bldg_id_groups)} parquet " - f"partitions which go into {ngroup} column group(s) of {partition_columns}") + bldg_id_groups, bldg_id_list, ngroup = get_partitioned_bldg_groups( + partition_df, partition_columns, max_files_per_partition + ) + logger.info( + f"Processing {len(bldg_id_list)} building timeseries by combining max of " + f"{max_files_per_partition} parquets together. This will create {len(bldg_id_groups)} parquet " + f"partitions which go into {ngroup} column group(s) of {partition_columns}" + ) if isinstance(fs, LocalFileSystem): ts_out_loc = f"{ts_dir}/upgrade={upgrade_id}/" @@ -558,22 +638,47 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): ts_out_loc = f"s3://{ts_dir}/upgrade={upgrade_id}/" fs.makedirs(ts_out_loc) - logger.info(f'Created directory {ts_out_loc} for writing. Now concatenating ...') + logger.info( + f"Created directory {ts_out_loc} for writing. Now concatenating ..." + ) - src_path = f'{ts_in_dir}/up{upgrade_id:02d}/' - concat_partial = dask.delayed(partial(concat_and_normalize, - fs, all_ts_cols_sorted, src_path, ts_out_loc, partition_columns)) - partition_vals_list = [list(partition_df.loc[bldg_id_list[0]].values) if partition_columns else [] - for bldg_id_list in bldg_id_groups] + src_path = f"{ts_in_dir}/up{upgrade_id:02d}/" + concat_partial = dask.delayed( + partial( + concat_and_normalize, + fs, + all_ts_cols_sorted, + src_path, + ts_out_loc, + partition_columns, + ) + ) + partition_vals_list = [ + list(partition_df.loc[bldg_id_list[0]].values) + if partition_columns + else [] + for bldg_id_list in bldg_id_groups + ] with tempfile.TemporaryDirectory() as tmpdir: - tmpfilepath = Path(tmpdir, 'dask-report.html') + tmpfilepath = Path(tmpdir, "dask-report.html") with performance_report(filename=str(tmpfilepath)): - dask.compute(map(concat_partial, *zip(*enumerate(bldg_id_groups)), partition_vals_list)) + dask.compute( + map( + concat_partial, + *zip(*enumerate(bldg_id_groups)), + partition_vals_list, + ) + ) if tmpfilepath.exists(): - fs.put_file(str(tmpfilepath), f'{results_dir}/dask_combine_report{upgrade_id}.html') + fs.put_file( + str(tmpfilepath), + f"{results_dir}/dask_combine_report{upgrade_id}.html", + ) - logger.info(f"Finished combining and saving timeseries for upgrade{upgrade_id}.") + logger.info( + f"Finished combining and saving timeseries for upgrade{upgrade_id}." + ) logger.info("All aggregation completed. ") if do_timeseries: logger.info("Writing timeseries metadata files") @@ -582,13 +687,13 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): def remove_intermediate_files(fs, results_dir, keep_individual_timeseries=False): # Remove aggregated files to save space - sim_output_dir = f'{results_dir}/simulation_output' - results_job_json_glob = f'{sim_output_dir}/results_job*.json.gz' - logger.info('Removing results_job*.json.gz') + sim_output_dir = f"{results_dir}/simulation_output" + results_job_json_glob = f"{sim_output_dir}/results_job*.json.gz" + logger.info("Removing results_job*.json.gz") for filename in fs.glob(results_job_json_glob): fs.rm(filename) if not keep_individual_timeseries: - ts_in_dir = f'{sim_output_dir}/timeseries' + ts_in_dir = f"{sim_output_dir}/timeseries" fs.rm(ts_in_dir, recursive=True) @@ -596,35 +701,43 @@ def upload_results(aws_conf, output_dir, results_dir, buildstock_csv_filename): logger.info("Uploading the parquet files to s3") output_folder_name = Path(output_dir).name - parquet_dir = Path(results_dir).joinpath('parquet') - ts_dir = parquet_dir / 'timeseries' + parquet_dir = Path(results_dir).joinpath("parquet") + ts_dir = parquet_dir / "timeseries" if not parquet_dir.is_dir(): - logger.error(f"{parquet_dir} does not exist. Please make sure postprocessing has been done.") + logger.error( + f"{parquet_dir} does not exist. Please make sure postprocessing has been done." + ) raise FileNotFoundError(parquet_dir) all_files = [] - for file in parquet_dir.rglob('*.parquet'): + for file in parquet_dir.rglob("*.parquet"): all_files.append(file.relative_to(parquet_dir)) - for file in [*ts_dir.glob('_common_metadata'), *ts_dir.glob('_metadata')]: + for file in [*ts_dir.glob("_common_metadata"), *ts_dir.glob("_metadata")]: all_files.append(file.relative_to(parquet_dir)) - s3_prefix = aws_conf.get('s3', {}).get('prefix', '').rstrip('/') - s3_bucket = aws_conf.get('s3', {}).get('bucket', None) + s3_prefix = aws_conf.get("s3", {}).get("prefix", "").rstrip("/") + s3_bucket = aws_conf.get("s3", {}).get("bucket", None) if not (s3_prefix and s3_bucket): - logger.error("YAML file missing postprocessing:aws:s3:prefix and/or bucket entry.") + logger.error( + "YAML file missing postprocessing:aws:s3:prefix and/or bucket entry." + ) return - s3_prefix_output = s3_prefix + '/' + output_folder_name + '/' + s3_prefix_output = s3_prefix + "/" + output_folder_name + "/" - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") bucket = s3.Bucket(s3_bucket) n_existing_files = len(list(bucket.objects.filter(Prefix=s3_prefix_output))) if n_existing_files > 0: - logger.error(f"There are already {n_existing_files} files in the s3 folder {s3_bucket}/{s3_prefix_output}.") + logger.error( + f"There are already {n_existing_files} files in the s3 folder {s3_bucket}/{s3_prefix_output}." + ) raise FileExistsError(f"s3://{s3_bucket}/{s3_prefix_output}") def upload_file(filepath, s3key=None): - full_path = filepath if filepath.is_absolute() else parquet_dir.joinpath(filepath) - s3 = boto3.resource('s3') + full_path = ( + filepath if filepath.is_absolute() else parquet_dir.joinpath(filepath) + ) + s3 = boto3.resource("s3") bucket = s3.Bucket(s3_bucket) if s3key is None: s3key = Path(s3_prefix_output).joinpath(filepath).as_posix() @@ -634,95 +747,120 @@ def upload_file(filepath, s3key=None): if buildstock_csv_filename is not None: buildstock_csv_filepath = Path(buildstock_csv_filename) if buildstock_csv_filepath.exists(): - tasks.append(dask.delayed(upload_file)( - buildstock_csv_filepath, - f"{s3_prefix_output}buildstock_csv/{buildstock_csv_filepath.name}" - )) + tasks.append( + dask.delayed(upload_file)( + buildstock_csv_filepath, + f"{s3_prefix_output}buildstock_csv/{buildstock_csv_filepath.name}", + ) + ) else: logger.warning(f"{buildstock_csv_filename} doesn't exist, can't upload.") dask.compute(tasks) - logger.info(f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}") + logger.info( + f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}" + ) return s3_bucket, s3_prefix_output def create_athena_tables(aws_conf, tbl_prefix, s3_bucket, s3_prefix): logger.info("Creating Athena tables using glue crawler") - region_name = aws_conf.get('region_name', 'us-west-2') - db_name = aws_conf.get('athena', {}).get('database_name', None) - role = aws_conf.get('athena', {}).get('glue_service_role', 'service-role/AWSGlueServiceRole-default') - max_crawling_time = aws_conf.get('athena', {}).get('max_crawling_time', 600) + region_name = aws_conf.get("region_name", "us-west-2") + db_name = aws_conf.get("athena", {}).get("database_name", None) + role = aws_conf.get("athena", {}).get( + "glue_service_role", "service-role/AWSGlueServiceRole-default" + ) + max_crawling_time = aws_conf.get("athena", {}).get("max_crawling_time", 600) assert db_name, "athena:database_name not supplied" # Check that there are files in the s3 bucket before creating and running glue crawler - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") bucket = s3.Bucket(s3_bucket) - s3_path = f's3://{s3_bucket}/{s3_prefix}' + s3_path = f"s3://{s3_bucket}/{s3_prefix}" n_existing_files = len(list(bucket.objects.filter(Prefix=s3_prefix))) if n_existing_files == 0: - logger.warning(f"There are no files in {s3_path}, Athena tables will not be created as intended") + logger.warning( + f"There are no files in {s3_path}, Athena tables will not be created as intended" + ) return - glueClient = boto3.client('glue', region_name=region_name) + glueClient = boto3.client("glue", region_name=region_name) crawlTarget = { - 'S3Targets': [{ - 'Path': s3_path, - 'Exclusions': ['**_metadata', '**_common_metadata'] - }] + "S3Targets": [ + {"Path": s3_path, "Exclusions": ["**_metadata", "**_common_metadata"]} + ] } - crawler_name = db_name + '_' + tbl_prefix - tbl_prefix = tbl_prefix + '_' + crawler_name = db_name + "_" + tbl_prefix + tbl_prefix = tbl_prefix + "_" def create_crawler(): - glueClient.create_crawler(Name=crawler_name, - Role=role, - Targets=crawlTarget, - DatabaseName=db_name, - TablePrefix=tbl_prefix) + glueClient.create_crawler( + Name=crawler_name, + Role=role, + Targets=crawlTarget, + DatabaseName=db_name, + TablePrefix=tbl_prefix, + ) try: create_crawler() except glueClient.exceptions.AlreadyExistsException: logger.info(f"Deleting existing crawler: {crawler_name}. And creating new one.") glueClient.delete_crawler(Name=crawler_name) - time.sleep(1) # A small delay after deleting is required to prevent AlreadyExistsException again + time.sleep( + 1 + ) # A small delay after deleting is required to prevent AlreadyExistsException again create_crawler() try: - existing_tables = [x['Name'] for x in glueClient.get_tables(DatabaseName=db_name)['TableList']] + existing_tables = [ + x["Name"] for x in glueClient.get_tables(DatabaseName=db_name)["TableList"] + ] except glueClient.exceptions.EntityNotFoundException: existing_tables = [] to_be_deleted_tables = [x for x in existing_tables if x.startswith(tbl_prefix)] if to_be_deleted_tables: - logger.info(f"Deleting existing tables in db {db_name}: {to_be_deleted_tables}. And creating new ones.") - glueClient.batch_delete_table(DatabaseName=db_name, TablesToDelete=to_be_deleted_tables) + logger.info( + f"Deleting existing tables in db {db_name}: {to_be_deleted_tables}. And creating new ones." + ) + glueClient.batch_delete_table( + DatabaseName=db_name, TablesToDelete=to_be_deleted_tables + ) glueClient.start_crawler(Name=crawler_name) logger.info("Crawler started") is_crawler_running = True t = time.time() while time.time() - t < (3 * max_crawling_time): - crawler_state = glueClient.get_crawler(Name=crawler_name)['Crawler']['State'] - metrics = glueClient.get_crawler_metrics(CrawlerNameList=[crawler_name])['CrawlerMetricsList'][0] - if is_crawler_running and crawler_state != 'RUNNING': + crawler_state = glueClient.get_crawler(Name=crawler_name)["Crawler"]["State"] + metrics = glueClient.get_crawler_metrics(CrawlerNameList=[crawler_name])[ + "CrawlerMetricsList" + ][0] + if is_crawler_running and crawler_state != "RUNNING": is_crawler_running = False logger.info(f"Crawler has completed running. It is {crawler_state}.") - logger.info(f"TablesCreated: {metrics['TablesCreated']} " - f"TablesUpdated: {metrics['TablesUpdated']} " - f"TablesDeleted: {metrics['TablesDeleted']} ") - if crawler_state == 'READY': + logger.info( + f"TablesCreated: {metrics['TablesCreated']} " + f"TablesUpdated: {metrics['TablesUpdated']} " + f"TablesDeleted: {metrics['TablesDeleted']} " + ) + if crawler_state == "READY": logger.info("Crawler stopped. Deleting it now.") glueClient.delete_crawler(Name=crawler_name) break elif time.time() - t > max_crawling_time: logger.info("Crawler is taking too long. Aborting ...") - logger.info(f"TablesCreated: {metrics['TablesCreated']} " - f"TablesUpdated: {metrics['TablesUpdated']} " - f"TablesDeleted: {metrics['TablesDeleted']} ") + logger.info( + f"TablesCreated: {metrics['TablesCreated']} " + f"TablesUpdated: {metrics['TablesUpdated']} " + f"TablesDeleted: {metrics['TablesDeleted']} " + ) glueClient.stop_crawler(Name=crawler_name) elif time.time() - t > 2 * max_crawling_time: - logger.warning(f"Crawler could not be stopped and deleted. Please delete the crawler {crawler_name} " - f"manually from the AWS console") + logger.warning( + f"Crawler could not be stopped and deleted. Please delete the crawler {crawler_name} " + f"manually from the AWS console" + ) break time.sleep(30) diff --git a/buildstockbatch/sampler/__init__.py b/buildstockbatch/sampler/__init__.py index 1cb55992..f821ad37 100644 --- a/buildstockbatch/sampler/__init__.py +++ b/buildstockbatch/sampler/__init__.py @@ -1,5 +1,8 @@ # -*- coding: utf-8 -*- -from .residential_quota import ResidentialQuotaSampler, ResidentialQuotaDownselectSampler # noqa F041 +from .residential_quota import ( + ResidentialQuotaSampler, + ResidentialQuotaDownselectSampler, +) # noqa F041 from .commercial_sobol import CommercialSobolSampler # noqa F041 from .precomputed import PrecomputedSampler # noqa F041 diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py index 8ae55bed..a150958d 100644 --- a/buildstockbatch/sampler/base.py +++ b/buildstockbatch/sampler/base.py @@ -19,7 +19,6 @@ class BuildStockSampler(object): - csv_path = None @staticmethod @@ -47,11 +46,20 @@ def __init__(self, parent): :param parent: The BuildStockBatchBase object that owns this sampler. """ - self.parent = weakref.ref(parent) # This removes circular references and allows garbage collection to work. - if self.container_runtime in (ContainerRuntime.DOCKER, ContainerRuntime.LOCAL_OPENSTUDIO): - self.csv_path = os.path.join(self.project_dir, 'housing_characteristics', 'buildstock.csv') + self.parent = weakref.ref( + parent + ) # This removes circular references and allows garbage collection to work. + if self.container_runtime in ( + ContainerRuntime.DOCKER, + ContainerRuntime.LOCAL_OPENSTUDIO, + ): + self.csv_path = os.path.join( + self.project_dir, "housing_characteristics", "buildstock.csv" + ) elif self.container_runtime == ContainerRuntime.SINGULARITY: - self.csv_path = os.path.join(self.parent().output_dir, 'housing_characteristics', 'buildstock.csv') + self.csv_path = os.path.join( + self.parent().output_dir, "housing_characteristics", "buildstock.csv" + ) else: self.csv_path = None diff --git a/buildstockbatch/sampler/commercial_sobol.py b/buildstockbatch/sampler/commercial_sobol.py index 7e1ac4b8..ae8f1bd5 100644 --- a/buildstockbatch/sampler/commercial_sobol.py +++ b/buildstockbatch/sampler/commercial_sobol.py @@ -28,7 +28,6 @@ class CommercialSobolSampler(BuildStockSampler): - def __init__(self, parent, n_datapoints): """ Initialize the sampler. @@ -41,26 +40,32 @@ def __init__(self, parent, n_datapoints): super().__init__(parent) self.validate_args(self.parent().project_filename, n_datapoints=n_datapoints) if self.container_runtime == ContainerRuntime.SINGULARITY: - self.csv_path = os.path.join(self.output_dir, 'buildstock.csv') + self.csv_path = os.path.join(self.output_dir, "buildstock.csv") else: - assert self.container_runtime in (ContainerRuntime.DOCKER, ContainerRuntime.LOCAL_OPENSTUDIO) - self.csv_path = os.path.join(self.project_dir, 'buildstock.csv') + assert self.container_runtime in ( + ContainerRuntime.DOCKER, + ContainerRuntime.LOCAL_OPENSTUDIO, + ) + self.csv_path = os.path.join(self.project_dir, "buildstock.csv") self.n_datapoints = n_datapoints @classmethod def validate_args(cls, project_filename, **kw): - expected_args = set(['n_datapoints']) + expected_args = set(["n_datapoints"]) for k, v in kw.items(): expected_args.discard(k) - if k == 'n_datapoints': + if k == "n_datapoints": if not isinstance(v, int): - raise ValidationError('n_datapoints needs to be an integer') + raise ValidationError("n_datapoints needs to be an integer") if v <= 0: - raise ValidationError('n_datapoints need to be >= 1') + raise ValidationError("n_datapoints need to be >= 1") else: - raise ValidationError(f'Unknown argument for sampler: {k}') + raise ValidationError(f"Unknown argument for sampler: {k}") if len(expected_args) > 0: - raise ValidationError('The following sampler arguments are required: ' + ', '.join(expected_args)) + raise ValidationError( + "The following sampler arguments are required: " + + ", ".join(expected_args) + ) return True def run_sampling(self): @@ -74,33 +79,44 @@ def run_sampling(self): :param n_datapoints: Number of datapoints to sample from the distributions. :return: Absolute path to the output buildstock.csv file """ - sample_number = self.cfg['baseline'].get('n_datapoints', 350000) + sample_number = self.cfg["baseline"].get("n_datapoints", 350000) if isinstance(self.n_datapoints, int): sample_number = self.n_datapoints - logging.debug(f'Sampling, number of data points is {sample_number}') + logging.debug(f"Sampling, number of data points is {sample_number}") tsv_hash = {} for tsv_file in os.listdir(self.buildstock_dir): - if '.tsv' in tsv_file: - tsv_df = read_csv(os.path.join(self.buildstock_dir, tsv_file), sep='\t') - dependency_columns = [item for item in list(tsv_df) if 'Dependency=' in item] - tsv_df[dependency_columns] = tsv_df[dependency_columns].astype('str') - tsv_hash[tsv_file.replace('.tsv', '')] = tsv_df + if ".tsv" in tsv_file: + tsv_df = read_csv(os.path.join(self.buildstock_dir, tsv_file), sep="\t") + dependency_columns = [ + item for item in list(tsv_df) if "Dependency=" in item + ] + tsv_df[dependency_columns] = tsv_df[dependency_columns].astype("str") + tsv_hash[tsv_file.replace(".tsv", "")] = tsv_df dependency_hash, attr_order = self._com_order_tsvs(tsv_hash) - sample_matrix = self._com_execute_sobol_sampling(attr_order.__len__(), sample_number) + sample_matrix = self._com_execute_sobol_sampling( + attr_order.__len__(), sample_number + ) csv_path = self.csv_path - header = 'Building,' + header = "Building," for item in attr_order: - header += str(item) + ',' - header = header[0:-1] + '\n' - with open(csv_path, 'w') as fd: + header += str(item) + "," + header = header[0:-1] + "\n" + with open(csv_path, "w") as fd: fd.write(header) manager = Manager() lock = manager.Lock() - logger.info('Beginning sampling process') + logger.info("Beginning sampling process") n_jobs = cpu_count() * 2 Parallel(n_jobs=n_jobs, verbose=5)( - delayed(self._com_execute_sample)(tsv_hash, dependency_hash, attr_order, sample_matrix, index, csv_path, - lock) + delayed(self._com_execute_sample)( + tsv_hash, + dependency_hash, + attr_order, + sample_matrix, + index, + csv_path, + lock, + ) for index in range(sample_number) ) return csv_path @@ -115,7 +131,9 @@ def _com_execute_sobol_sampling(n_dims, n_samples): :param n_samples: Number of samples to calculate :return: Pandas DataFrame object which contains the low discrepancy result of the sobol algorithm """ - return pd.DataFrame(i4_sobol_generate(n_dims, n_samples, 0)).replace(1.0, 0.999999) + return pd.DataFrame(i4_sobol_generate(n_dims, n_samples, 0)).replace( + 1.0, 0.999999 + ) @staticmethod def _com_order_tsvs(tsv_hash): @@ -127,8 +145,11 @@ def _com_order_tsvs(tsv_hash): """ dependency_hash = {} for attr in tsv_hash.keys(): - dependency_hash[attr] = [item.replace('Dependency=', '') for item in list(tsv_hash[attr]) if - 'Dependency=' in item] + dependency_hash[attr] = [ + item.replace("Dependency=", "") + for item in list(tsv_hash[attr]) + if "Dependency=" in item + ] attr_order = [] for attr in dependency_hash.keys(): if dependency_hash[attr]: @@ -149,11 +170,21 @@ def _com_order_tsvs(tsv_hash): elif max_iterations > 0: max_iterations -= 1 else: - raise RuntimeError('Unable to resolve the dependency tree within the set iteration limit') + raise RuntimeError( + "Unable to resolve the dependency tree within the set iteration limit" + ) return dependency_hash, attr_order @staticmethod - def _com_execute_sample(tsv_hash, dependency_hash, attr_order, sample_matrix, sample_index, csv_path, lock): + def _com_execute_sample( + tsv_hash, + dependency_hash, + attr_order, + sample_matrix, + sample_index, + csv_path, + lock, + ): """ This function evaluates a single point in the sample matrix with the provided TSV files & persists the result\ of the sample to the CSV file specified. The provided lock ensures the file is not corrupted by multiple\ @@ -174,27 +205,40 @@ def _com_execute_sample(tsv_hash, dependency_hash, attr_order, sample_matrix, sa tsv_lkup = tsv_hash[attr] tsv_dist_val = sample_vector[attr_index] for dependency in sample_dependency_hash[attr]: - tsv_lkup = tsv_lkup.loc[tsv_lkup.loc[:, 'Dependency=' + dependency] == - sample_dependency_hash[dependency]] - tsv_lkup = tsv_lkup.drop('Dependency=' + dependency, axis=1) + tsv_lkup = tsv_lkup.loc[ + tsv_lkup.loc[:, "Dependency=" + dependency] + == sample_dependency_hash[dependency] + ] + tsv_lkup = tsv_lkup.drop("Dependency=" + dependency, axis=1) if tsv_lkup.shape[0] == 0: - warn('TSV lookup reduced to 0 for {}, index {}, dep hash {}'.format(attr, sample_index, - sample_dependency_hash)) + warn( + "TSV lookup reduced to 0 for {}, index {}, dep hash {}".format( + attr, sample_index, sample_dependency_hash + ) + ) return if tsv_lkup.shape[0] != 1: - raise RuntimeError('Unable to reduce tsv for {} to 1 row, index {}'.format(attr, sample_index)) + raise RuntimeError( + "Unable to reduce tsv for {} to 1 row, index {}".format( + attr, sample_index + ) + ) tsv_lkup_cdf = tsv_lkup.values.cumsum() > tsv_dist_val - option_values = [item.replace('Option=', '') for item in list(tsv_lkup) if 'Option=' in item] + option_values = [ + item.replace("Option=", "") + for item in list(tsv_lkup) + if "Option=" in item + ] attr_result = list(compress(option_values, tsv_lkup_cdf))[0] sample_dependency_hash[attr] = attr_result result_vector.append(attr_result) - csv_row = str(sample_index + 1) + ',' + csv_row = str(sample_index + 1) + "," for item in result_vector: - csv_row += str(item) + ',' - csv_row = csv_row[0:-1] + '\n' + csv_row += str(item) + "," + csv_row = csv_row[0:-1] + "\n" lock.acquire() try: - with open(csv_path, 'a') as fd: + with open(csv_path, "a") as fd: fd.write(csv_row) finally: lock.release() diff --git a/buildstockbatch/sampler/downselect.py b/buildstockbatch/sampler/downselect.py index e71578c1..a7fa9e6b 100644 --- a/buildstockbatch/sampler/downselect.py +++ b/buildstockbatch/sampler/downselect.py @@ -22,7 +22,6 @@ class DownselectSamplerBase(BuildStockSampler): - SUB_SAMPLER_CLASS = None def __init__(self, parent, n_datapoints, logic, resample=True, **kw): @@ -58,39 +57,42 @@ def __init__(self, parent, n_datapoints, logic, resample=True, **kw): @classmethod def validate_args(cls, project_filename, **kw): - expected_args = set(['logic']) + expected_args = set(["logic"]) extra_kw = {} for k, v in kw.items(): expected_args.discard(k) - if k == 'logic': + if k == "logic": # TODO: do some validation of the logic here. pass - elif k == 'resample': + elif k == "resample": pass else: extra_kw[k] = v if len(expected_args) > 0: - raise ValidationError('The following sampler arguments are required: ' + ', '.join(expected_args)) + raise ValidationError( + "The following sampler arguments are required: " + + ", ".join(expected_args) + ) cls.SUB_SAMPLER_CLASS.validate_args(project_filename, **extra_kw) return True @classmethod def downselect_logic(cls, df, logic): if isinstance(logic, dict): - assert (len(logic) == 1) + assert len(logic) == 1 key = list(logic.keys())[0] values = logic[key] - if key == 'and': + if key == "and": retval = cls.downselect_logic(df, values[0]) for value in values[1:]: retval &= cls.downselect_logic(df, value) return retval - elif key == 'or': + elif key == "or": retval = cls.downselect_logic(df, values[0]) for value in values[1:]: retval |= cls.downselect_logic(df, value) return retval - elif key == 'not': + elif key == "not": return ~cls.downselect_logic(df, values) elif isinstance(logic, list): retval = cls.downselect_logic(df, logic[0]) @@ -98,32 +100,42 @@ def downselect_logic(cls, df, logic): retval &= cls.downselect_logic(df, value) return retval elif isinstance(logic, str): - key, value = logic.split('|') + key, value = logic.split("|") return df[key] == value def run_sampling(self): if self.resample: - logger.debug('Performing initial sampling to figure out number of samples for downselect') + logger.debug( + "Performing initial sampling to figure out number of samples for downselect" + ) n_samples_init = 350000 - init_sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_samples_init, **self.sub_kw) + init_sampler = self.SUB_SAMPLER_CLASS( + self.parent(), n_datapoints=n_samples_init, **self.sub_kw + ) buildstock_csv_filename = init_sampler.run_sampling() df = read_csv(buildstock_csv_filename, index_col=0, dtype=str) df_new = df[self.downselect_logic(df, self.logic)] downselected_n_samples_init = df_new.shape[0] - n_samples = math.ceil(self.n_datapoints * n_samples_init / downselected_n_samples_init) + n_samples = math.ceil( + self.n_datapoints * n_samples_init / downselected_n_samples_init + ) os.remove(buildstock_csv_filename) del init_sampler else: n_samples = self.n_datapoints - sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_samples, **self.sub_kw) + sampler = self.SUB_SAMPLER_CLASS( + self.parent(), n_datapoints=n_samples, **self.sub_kw + ) buildstock_csv_filename = sampler.run_sampling() - with gzip.open(os.path.splitext(buildstock_csv_filename)[0] + '_orig.csv.gz', 'wb') as f_out: - with open(buildstock_csv_filename, 'rb') as f_in: + with gzip.open( + os.path.splitext(buildstock_csv_filename)[0] + "_orig.csv.gz", "wb" + ) as f_out: + with open(buildstock_csv_filename, "rb") as f_in: shutil.copyfileobj(f_in, f_out) - df = read_csv(buildstock_csv_filename, index_col=0, dtype='str') + df = read_csv(buildstock_csv_filename, index_col=0, dtype="str") df_new = df[self.downselect_logic(df, self.logic)] if len(df_new.index) == 0: - raise RuntimeError('There are no buildings left after the down select!') + raise RuntimeError("There are no buildings left after the down select!") if self.resample: old_index_name = df_new.index.name df_new.index = np.arange(len(df_new)) + 1 diff --git a/buildstockbatch/sampler/precomputed.py b/buildstockbatch/sampler/precomputed.py index f42ca8ad..4a1ef813 100644 --- a/buildstockbatch/sampler/precomputed.py +++ b/buildstockbatch/sampler/precomputed.py @@ -22,7 +22,6 @@ class PrecomputedSampler(BuildStockSampler): - def __init__(self, parent, sample_file): """Precomputed Sampler @@ -38,16 +37,16 @@ def __init__(self, parent, sample_file): @classmethod def validate_args(cls, project_filename, **kw): - expected_args = set(['sample_file']) + expected_args = set(["sample_file"]) for k, v in kw.items(): expected_args.discard(k) - if k == 'sample_file': + if k == "sample_file": if not isinstance(v, str): - raise ValidationError('sample_file should be a path string') + raise ValidationError("sample_file should be a path string") if not os.path.exists(path_rel_to_file(project_filename, v)): - raise ValidationError(f'sample_file doesn\'t exist: {v}') + raise ValidationError(f"sample_file doesn't exist: {v}") else: - raise ValidationError(f'Unknown argument for sampler: {k}') + raise ValidationError(f"Unknown argument for sampler: {k}") return True def run_sampling(self): diff --git a/buildstockbatch/sampler/residential_quota.py b/buildstockbatch/sampler/residential_quota.py index 9789327a..f534b0dd 100644 --- a/buildstockbatch/sampler/residential_quota.py +++ b/buildstockbatch/sampler/residential_quota.py @@ -24,7 +24,6 @@ class ResidentialQuotaSampler(BuildStockSampler): - def __init__(self, parent, n_datapoints): """Residential Quota Sampler @@ -39,68 +38,79 @@ def __init__(self, parent, n_datapoints): @classmethod def validate_args(cls, project_filename, **kw): - expected_args = set(['n_datapoints']) + expected_args = set(["n_datapoints"]) for k, v in kw.items(): expected_args.discard(k) - if k == 'n_datapoints': + if k == "n_datapoints": if not isinstance(v, int): - raise ValidationError('n_datapoints needs to be an integer') + raise ValidationError("n_datapoints needs to be an integer") if v <= 0: - raise ValidationError('n_datapoints need to be >= 1') + raise ValidationError("n_datapoints need to be >= 1") else: - raise ValidationError(f'Unknown argument for sampler: {k}') + raise ValidationError(f"Unknown argument for sampler: {k}") if len(expected_args) > 0: - raise ValidationError('The following sampler arguments are required: ' + ', '.join(expected_args)) + raise ValidationError( + "The following sampler arguments are required: " + + ", ".join(expected_args) + ) return True def _run_sampling_docker(self): docker_client = docker.DockerClient.from_env() tick = time.time() extra_kws = {} - if sys.platform.startswith('linux'): - extra_kws['user'] = f'{os.getuid()}:{os.getgid()}' + if sys.platform.startswith("linux"): + extra_kws["user"] = f"{os.getuid()}:{os.getgid()}" container_output = docker_client.containers.run( self.parent().docker_image, [ - 'ruby', - 'resources/run_sampling.rb', - '-p', self.cfg['project_directory'], - '-n', str(self.n_datapoints), - '-o', 'buildstock.csv' + "ruby", + "resources/run_sampling.rb", + "-p", + self.cfg["project_directory"], + "-n", + str(self.n_datapoints), + "-o", + "buildstock.csv", ], remove=True, volumes={ - self.buildstock_dir: {'bind': '/var/simdata/openstudio', 'mode': 'rw'} + self.buildstock_dir: {"bind": "/var/simdata/openstudio", "mode": "rw"} }, - name='buildstock_sampling', - **extra_kws + name="buildstock_sampling", + **extra_kws, ) tick = time.time() - tick - for line in container_output.decode('utf-8').split('\n'): + for line in container_output.decode("utf-8").split("\n"): logger.debug(line) - logger.debug('Sampling took {:.1f} seconds'.format(tick)) + logger.debug("Sampling took {:.1f} seconds".format(tick)) destination_filename = self.csv_path if os.path.exists(destination_filename): os.remove(destination_filename) shutil.move( - os.path.join(self.buildstock_dir, 'resources', 'buildstock.csv'), - destination_filename + os.path.join(self.buildstock_dir, "resources", "buildstock.csv"), + destination_filename, ) return destination_filename def _run_sampling_singularity(self): args = [ - 'singularity', - 'exec', - '--contain', - '--home', '{}:/buildstock'.format(self.buildstock_dir), - '--bind', '{}:/outbind'.format(os.path.dirname(self.csv_path)), + "singularity", + "exec", + "--contain", + "--home", + "{}:/buildstock".format(self.buildstock_dir), + "--bind", + "{}:/outbind".format(os.path.dirname(self.csv_path)), self.parent().singularity_image, - 'ruby', - 'resources/run_sampling.rb', - '-p', self.cfg['project_directory'], - '-n', str(self.n_datapoints), - '-o', '../../outbind/{}'.format(os.path.basename(self.csv_path)) + "ruby", + "resources/run_sampling.rb", + "-p", + self.cfg["project_directory"], + "-n", + str(self.n_datapoints), + "-o", + "../../outbind/{}".format(os.path.basename(self.csv_path)), ] logger.debug(f"Starting singularity sampling with command: {' '.join(args)}") subprocess.run(args, check=True, env=os.environ, cwd=self.parent().output_dir) @@ -111,20 +121,23 @@ def _run_sampling_local_openstudio(self): subprocess.run( [ self.parent().openstudio_exe(), - str(pathlib.Path('resources', 'run_sampling.rb')), - '-p', self.cfg['project_directory'], - '-n', str(self.n_datapoints), - '-o', 'buildstock.csv' + str(pathlib.Path("resources", "run_sampling.rb")), + "-p", + self.cfg["project_directory"], + "-n", + str(self.n_datapoints), + "-o", + "buildstock.csv", ], cwd=self.buildstock_dir, - check=True + check=True, ) destination_filename = pathlib.Path(self.csv_path) if destination_filename.exists(): os.remove(destination_filename) shutil.move( - pathlib.Path(self.buildstock_dir, 'resources', 'buildstock.csv'), - destination_filename + pathlib.Path(self.buildstock_dir, "resources", "buildstock.csv"), + destination_filename, ) return destination_filename diff --git a/buildstockbatch/sampler/sobol_lib.py b/buildstockbatch/sampler/sobol_lib.py index 8a015c3b..40138ec9 100644 --- a/buildstockbatch/sampler/sobol_lib.py +++ b/buildstockbatch/sampler/sobol_lib.py @@ -58,11 +58,11 @@ def i4_bit_hi1(n): # i = int(n) bit = 0 - while (True): - if (i <= 0): + while True: + if i <= 0: break bit += 1 - i = (i // 2) + i = i // 2 return bit @@ -119,10 +119,10 @@ def i4_bit_lo0(n): # bit = 0 i = int(n) - while (1): + while 1: bit = bit + 1 - i2 = (i // 2) - if (i == 2 * i2): + i2 = i // 2 + if i == 2 * i2: break i = i2 @@ -242,163 +242,424 @@ def i4_sobol(dim_num, seed): global seed_save global v - if (not 'initialized' in globals().keys()): + if not "initialized" in globals().keys(): initialized = 0 dim_num_save = -1 - if (not initialized or dim_num != dim_num_save): + if not initialized or dim_num != dim_num_save: initialized = 1 dim_max = 40 dim_num_save = -1 log_max = 30 seed_save = -1 # - # Initialize (part of) V. + # Initialize (part of) V. # v = zeros((dim_max, log_max)) - v[0:40, 0] = transpose([ \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - - v[2:40, 1] = transpose([ \ - 1, 3, 1, 3, 1, 3, 3, 1, \ - 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, \ - 1, 3, 1, 3, 3, 1, 3, 1, 3, 1, \ - 3, 1, 1, 3, 1, 3, 1, 3, 1, 3]) - - v[3:40, 2] = transpose([ \ - 7, 5, 1, 3, 3, 7, 5, \ - 5, 7, 7, 1, 3, 3, 7, 5, 1, 1, \ - 5, 3, 3, 1, 7, 5, 1, 3, 3, 7, \ - 5, 1, 1, 5, 7, 7, 5, 1, 3, 3]) - - v[5:40, 3] = transpose([ \ - 1, 7, 9, 13, 11, \ - 1, 3, 7, 9, 5, 13, 13, 11, 3, 15, \ - 5, 3, 15, 7, 9, 13, 9, 1, 11, 7, \ - 5, 15, 1, 15, 11, 5, 3, 1, 7, 9]) - - v[7:40, 4] = transpose([ \ - 9, 3, 27, \ - 15, 29, 21, 23, 19, 11, 25, 7, 13, 17, \ - 1, 25, 29, 3, 31, 11, 5, 23, 27, 19, \ - 21, 5, 1, 17, 13, 7, 15, 9, 31, 9]) - - v[13:40, 5] = transpose([ \ - 37, 33, 7, 5, 11, 39, 63, \ - 27, 17, 15, 23, 29, 3, 21, 13, 31, 25, \ - 9, 49, 33, 19, 29, 11, 19, 27, 15, 25]) - - v[19:40, 6] = transpose([ \ - 13, \ - 33, 115, 41, 79, 17, 29, 119, 75, 73, 105, \ - 7, 59, 65, 21, 3, 113, 61, 89, 45, 107]) - - v[37:40, 7] = transpose([ \ - 7, 23, 39]) + v[0:40, 0] = transpose( + [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ] + ) + + v[2:40, 1] = transpose( + [ + 1, + 3, + 1, + 3, + 1, + 3, + 3, + 1, + 3, + 1, + 3, + 1, + 3, + 1, + 1, + 3, + 1, + 3, + 1, + 3, + 1, + 3, + 3, + 1, + 3, + 1, + 3, + 1, + 3, + 1, + 1, + 3, + 1, + 3, + 1, + 3, + 1, + 3, + ] + ) + + v[3:40, 2] = transpose( + [ + 7, + 5, + 1, + 3, + 3, + 7, + 5, + 5, + 7, + 7, + 1, + 3, + 3, + 7, + 5, + 1, + 1, + 5, + 3, + 3, + 1, + 7, + 5, + 1, + 3, + 3, + 7, + 5, + 1, + 1, + 5, + 7, + 7, + 5, + 1, + 3, + 3, + ] + ) + + v[5:40, 3] = transpose( + [ + 1, + 7, + 9, + 13, + 11, + 1, + 3, + 7, + 9, + 5, + 13, + 13, + 11, + 3, + 15, + 5, + 3, + 15, + 7, + 9, + 13, + 9, + 1, + 11, + 7, + 5, + 15, + 1, + 15, + 11, + 5, + 3, + 1, + 7, + 9, + ] + ) + + v[7:40, 4] = transpose( + [ + 9, + 3, + 27, + 15, + 29, + 21, + 23, + 19, + 11, + 25, + 7, + 13, + 17, + 1, + 25, + 29, + 3, + 31, + 11, + 5, + 23, + 27, + 19, + 21, + 5, + 1, + 17, + 13, + 7, + 15, + 9, + 31, + 9, + ] + ) + + v[13:40, 5] = transpose( + [ + 37, + 33, + 7, + 5, + 11, + 39, + 63, + 27, + 17, + 15, + 23, + 29, + 3, + 21, + 13, + 31, + 25, + 9, + 49, + 33, + 19, + 29, + 11, + 19, + 27, + 15, + 25, + ] + ) + + v[19:40, 6] = transpose( + [ + 13, + 33, + 115, + 41, + 79, + 17, + 29, + 119, + 75, + 73, + 105, + 7, + 59, + 65, + 21, + 3, + 113, + 61, + 89, + 45, + 107, + ] + ) + + v[37:40, 7] = transpose([7, 23, 39]) # - # Set POLY. + # Set POLY. # - poly = [ \ - 1, 3, 7, 11, 13, 19, 25, 37, 59, 47, \ - 61, 55, 41, 67, 97, 91, 109, 103, 115, 131, \ - 193, 137, 145, 143, 241, 157, 185, 167, 229, 171, \ - 213, 191, 253, 203, 211, 239, 247, 285, 369, 299] - - atmost = 2 ** log_max - 1 + poly = [ + 1, + 3, + 7, + 11, + 13, + 19, + 25, + 37, + 59, + 47, + 61, + 55, + 41, + 67, + 97, + 91, + 109, + 103, + 115, + 131, + 193, + 137, + 145, + 143, + 241, + 157, + 185, + 167, + 229, + 171, + 213, + 191, + 253, + 203, + 211, + 239, + 247, + 285, + 369, + 299, + ] + + atmost = 2**log_max - 1 # - # Find the number of bits in ATMOST. + # Find the number of bits in ATMOST. # maxcol = i4_bit_hi1(atmost) # - # Initialize row 1 of V. + # Initialize row 1 of V. # v[0, 0:maxcol] = 1 # - # Things to do only if the dimension changed. + # Things to do only if the dimension changed. # - if (dim_num != dim_num_save): + if dim_num != dim_num_save: # - # Check parameters. + # Check parameters. # - if (dim_num < 1 or dim_max < dim_num): - print('I4_SOBOL - Fatal error!') - print(' The spatial dimension DIM_NUM should satisfy:') - print(' 1 <= DIM_NUM <= %d' % dim_max) - print(' But this input value is DIM_NUM = %d' % dim_num) + if dim_num < 1 or dim_max < dim_num: + print("I4_SOBOL - Fatal error!") + print(" The spatial dimension DIM_NUM should satisfy:") + print(" 1 <= DIM_NUM <= %d" % dim_max) + print(" But this input value is DIM_NUM = %d" % dim_num) return dim_num_save = dim_num # - # Initialize the remaining rows of V. + # Initialize the remaining rows of V. # for i in range(2, dim_num + 1): # - # The bits of the integer POLY(I) gives the form of polynomial I. + # The bits of the integer POLY(I) gives the form of polynomial I. # - # Find the degree of polynomial I from binary encoding. + # Find the degree of polynomial I from binary encoding. # j = poly[i - 1] m = 0 - while (1): - j = math.floor(j / 2.) - if (j <= 0): + while 1: + j = math.floor(j / 2.0) + if j <= 0: break m = m + 1 # - # Expand this bit pattern to separate components of the logical array INCLUD. + # Expand this bit pattern to separate components of the logical array INCLUD. # j = poly[i - 1] includ = zeros(m) for k in range(m, 0, -1): - j2 = math.floor(j / 2.) - includ[k - 1] = (j != 2 * j2) + j2 = math.floor(j / 2.0) + includ[k - 1] = j != 2 * j2 j = j2 # - # Calculate the remaining elements of row I as explained - # in Bratley and Fox, section 2. + # Calculate the remaining elements of row I as explained + # in Bratley and Fox, section 2. # for j in range(m + 1, maxcol + 1): newv = v[i - 1, j - m - 1] l = 1 for k in range(1, m + 1): l = 2 * l - if (includ[k - 1]): + if includ[k - 1]: newv = bitwise_xor(int(newv), int(l * v[i - 1, j - k - 1])) v[i - 1, j - 1] = newv # - # Multiply columns of V by appropriate power of 2. + # Multiply columns of V by appropriate power of 2. # l = 1 for j in range(maxcol - 1, 0, -1): l = 2 * l v[0:dim_num, j - 1] = v[0:dim_num, j - 1] * l # - # RECIPD is 1/(common denominator of the elements in V). + # RECIPD is 1/(common denominator of the elements in V). # recipd = 1.0 / (2 * l) lastq = zeros(dim_num) seed = int(math.floor(seed)) - if (seed < 0): + if seed < 0: seed = 0 - if (seed == 0): + if seed == 0: l = 1 lastq = zeros(dim_num) - elif (seed == seed_save + 1): + elif seed == seed_save + 1: # - # Find the position of the right-hand zero in SEED. + # Find the position of the right-hand zero in SEED. # l = i4_bit_lo0(seed) - elif (seed <= seed_save): - + elif seed <= seed_save: seed_save = 0 l = 1 lastq = zeros(dim_num) @@ -410,8 +671,7 @@ def i4_sobol(dim_num, seed): l = i4_bit_lo0(seed) - elif (seed_save + 1 < seed): - + elif seed_save + 1 < seed: for seed_temp in range(int(seed_save + 1), int(seed)): l = i4_bit_lo0(seed_temp) for i in range(1, dim_num + 1): @@ -419,16 +679,16 @@ def i4_sobol(dim_num, seed): l = i4_bit_lo0(seed) # - # Check that the user is not calling too many times! + # Check that the user is not calling too many times! # - if (maxcol < l): - print('I4_SOBOL - Fatal error!') - print(' Too many calls!') - print(' MAXCOL = %d\n' % maxcol) - print(' L = %d\n' % l) + if maxcol < l: + print("I4_SOBOL - Fatal error!") + print(" Too many calls!") + print(" MAXCOL = %d\n" % maxcol) + print(" L = %d\n" % l) return # - # Calculate the new components of QUASI. + # Calculate the new components of QUASI. # quasi = zeros(dim_num) for i in range(1, dim_num + 1): @@ -498,9 +758,9 @@ def i4_uniform(a, b, seed): # # Output, integer SEED, the updated seed. # - if (seed == 0): - print('I4_UNIFORM - Fatal error!') - print(' Input SEED = 0!') + if seed == 0: + print("I4_UNIFORM - Fatal error!") + print(" Input SEED = 0!") seed = math.floor(seed) a = round(a) @@ -508,23 +768,23 @@ def i4_uniform(a, b, seed): seed = mod(seed, 2147483647) - if (seed < 0): + if seed < 0: seed = seed + 2147483647 k = math.floor(seed / 127773) seed = 16807 * (seed - k * 127773) - k * 2836 - if (seed < 0): + if seed < 0: seed = seed + 2147483647 - r = seed * 4.656612875E-10 + r = seed * 4.656612875e-10 # - # Scale R to lie between A-0.5 and B+0.5. + # Scale R to lie between A-0.5 and B+0.5. # r = (1.0 - r) * (min(a, b) - 0.5) + r * (max(a, b) + 0.5) # - # Use rounding to convert R to an integer between A and B. + # Use rounding to convert R to an integer between A and B. # value = round(r) @@ -578,7 +838,7 @@ def prime_ge(n): # than or equal to N. # p = max(math.ceil(n), 2) - while (not isprime(p)): + while not isprime(p): p = p + 1 return p diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py index c1e9e460..54a50d37 100644 --- a/buildstockbatch/test/conftest.py +++ b/buildstockbatch/test/conftest.py @@ -5,68 +5,78 @@ import yaml from pathlib import Path -OUTPUT_FOLDER_NAME = 'output' +OUTPUT_FOLDER_NAME = "output" @pytest.fixture def basic_residential_project_file(): with tempfile.TemporaryDirectory() as test_directory: + def _basic_residential_project_file(update_args={}, raw=False): output_dir = "simulations_job0" if raw else "simulation_output" - buildstock_directory = os.path.join(test_directory, 'openstudio_buildstock') + buildstock_directory = os.path.join(test_directory, "openstudio_buildstock") shutil.copytree( - os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_inputs', 'test_openstudio_buildstock'), - buildstock_directory + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "test_inputs", + "test_openstudio_buildstock", + ), + buildstock_directory, ) - project_directory = 'project_resstock_national' + project_directory = "project_resstock_national" os.makedirs(os.path.join(buildstock_directory, project_directory)) output_directory = os.path.join(test_directory, OUTPUT_FOLDER_NAME) shutil.copytree( - os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_results', output_dir), - os.path.join(output_directory, 'simulation_output') + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "test_results", + output_dir, + ), + os.path.join(output_directory, "simulation_output"), ) # move the job*.json file to appropriate location - if os.path.exists(os.path.join(output_directory, 'simulation_output', 'job0.json')): - shutil.move(os.path.join(output_directory, 'simulation_output', 'job0.json'), - os.path.join(output_directory, 'simulation_output', '..', '..', 'job0.json')) + if os.path.exists( + os.path.join(output_directory, "simulation_output", "job0.json") + ): + shutil.move( + os.path.join(output_directory, "simulation_output", "job0.json"), + os.path.join( + output_directory, "simulation_output", "..", "..", "job0.json" + ), + ) - os.mkdir(os.path.join(output_directory, 'housing_characteristics')) - os.mkdir(os.path.join(buildstock_directory, project_directory, 'housing_characteristics')) + os.mkdir(os.path.join(output_directory, "housing_characteristics")) + os.mkdir( + os.path.join( + buildstock_directory, project_directory, "housing_characteristics" + ) + ) cfg = { - 'buildstock_directory': buildstock_directory, - 'project_directory': project_directory, - 'output_directory': output_directory, - 'weather_files_url': 'https://s3.amazonaws.com/epwweatherfiles/project_resstock_national.zip', - 'sampler': { - 'type': 'residential_quota', - 'args': { - 'n_datapoints': 8 - } - }, - 'workflow_generator': { - 'type': 'residential_hpxml', - 'args': { + "buildstock_directory": buildstock_directory, + "project_directory": project_directory, + "output_directory": output_directory, + "weather_files_url": "https://s3.amazonaws.com/epwweatherfiles/project_resstock_national.zip", + "sampler": {"type": "residential_quota", "args": {"n_datapoints": 8}}, + "workflow_generator": { + "type": "residential_hpxml", + "args": { "build_existing_model": { "simulation_control_timestep": 60, "simulation_control_run_period_begin_month": 1, "simulation_control_run_period_begin_day_of_month": 1, "simulation_control_run_period_end_month": 12, "simulation_control_run_period_end_day_of_month": 31, - "simulation_control_run_period_calendar_year": 2007 + "simulation_control_run_period_calendar_year": 2007, }, "emissions": [ { "scenario_name": "LRMER_MidCase_15", "type": "CO2e", - "elec_folder": "data/cambium/LRMER_MidCase_15" - } - ], - "utility_bills": [ - { - "scenario_name": "Bills" + "elec_folder": "data/cambium/LRMER_MidCase_15", } ], + "utility_bills": [{"scenario_name": "Bills"}], "simulation_output_report": { "timeseries_frequency": "hourly", "include_timeseries_total_consumptions": True, @@ -81,36 +91,34 @@ def _basic_residential_project_file(update_args={}, raw=False): "include_timeseries_unmet_hours": True, "include_timeseries_zone_temperatures": True, "include_timeseries_airflows": True, - "include_timeseries_weather": True + "include_timeseries_weather": True, }, "reporting_measures": [], "server_directory_cleanup": { "retain_in_idf": False, - "retain_schedules_csv": False - } - } + "retain_schedules_csv": False, + }, + }, }, - 'baseline': { - 'n_buildings_represented': 80000000, + "baseline": { + "n_buildings_represented": 80000000, }, - 'upgrades': [{ - 'upgrade_name': 'Upgrade1', - 'options': [ - {'option': 'Infiltration|11.25 ACH50'} - ] - }], - 'eagle': { - 'sampling': { - 'time': 20 - }, - 'account': 'testaccount', - 'minutes_per_sim': 1 + "upgrades": [ + { + "upgrade_name": "Upgrade1", + "options": [{"option": "Infiltration|11.25 ACH50"}], + } + ], + "eagle": { + "sampling": {"time": 20}, + "account": "testaccount", + "minutes_per_sim": 1, }, - 'schema_version': '0.3' + "schema_version": "0.3", } cfg.update(update_args) - project_filename = os.path.join(test_directory, 'project.yml') - with open(project_filename, 'w') as f: + project_filename = os.path.join(test_directory, "project.yml") + with open(project_filename, "w") as f: yaml.dump(cfg, f) return project_filename, output_directory diff --git a/buildstockbatch/test/shared_testing_stuff.py b/buildstockbatch/test/shared_testing_stuff.py index e988f6f9..4e33ac43 100644 --- a/buildstockbatch/test/shared_testing_stuff.py +++ b/buildstockbatch/test/shared_testing_stuff.py @@ -4,9 +4,11 @@ resstock_directory = pathlib.Path( - os.environ.get("RESSTOCK_DIR", pathlib.Path(__file__).resolve().parent.parent.parent.parent / "resstock") + os.environ.get( + "RESSTOCK_DIR", + pathlib.Path(__file__).resolve().parent.parent.parent.parent / "resstock", + ) ) resstock_required = pytest.mark.skipif( - not resstock_directory.exists(), - reason="ResStock checkout is not found" + not resstock_directory.exists(), reason="ResStock checkout is not found" ) diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 5c9c0dfe..536eb32b 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -22,10 +22,10 @@ from buildstockbatch.postprocessing import write_dataframe_as_parquet from buildstockbatch.utils import read_csv, ContainerRuntime -dask.config.set(scheduler='synchronous') +dask.config.set(scheduler="synchronous") here = os.path.dirname(os.path.abspath(__file__)) -OUTPUT_FOLDER_NAME = 'output' +OUTPUT_FOLDER_NAME = "output" buildstockbatch.postprocessing.performance_report = MagicMock() @@ -34,59 +34,75 @@ def test_reference_scenario(basic_residential_project_file): # verify that the reference_scenario get's added to the upgrade file upgrade_config = { - 'upgrades': [ + "upgrades": [ { - 'upgrade_name': 'Triple-Pane Windows', - 'reference_scenario': 'example_reference_scenario' + "upgrade_name": "Triple-Pane Windows", + "reference_scenario": "example_reference_scenario", } ] } project_filename, results_dir = basic_residential_project_file(upgrade_config) - with patch.object(BuildStockBatchBase, 'weather_dir', None), \ - patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \ - patch.object(BuildStockBatchBase, 'results_dir', results_dir): + with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( + BuildStockBatchBase, "get_dask_client" + ) as get_dask_client_mock, patch.object( + BuildStockBatchBase, "results_dir", results_dir + ): bsb = BuildStockBatchBase(project_filename) bsb.process_results() get_dask_client_mock.assert_called_once() # test results.csv files - test_path = os.path.join(results_dir, 'results_csvs') - test_csv = read_csv(os.path.join(test_path, 'results_up01.csv.gz')).set_index('building_id').sort_index() - assert len(test_csv['apply_upgrade.reference_scenario'].unique()) == 1 - assert test_csv['apply_upgrade.reference_scenario'].iloc[0] == 'example_reference_scenario' + test_path = os.path.join(results_dir, "results_csvs") + test_csv = ( + read_csv(os.path.join(test_path, "results_up01.csv.gz")) + .set_index("building_id") + .sort_index() + ) + assert len(test_csv["apply_upgrade.reference_scenario"].unique()) == 1 + assert ( + test_csv["apply_upgrade.reference_scenario"].iloc[0] + == "example_reference_scenario" + ) def test_downselect_integer_options(basic_residential_project_file, mocker): with tempfile.TemporaryDirectory() as buildstock_csv_dir: - buildstock_csv = os.path.join(buildstock_csv_dir, 'buildstock.csv') + buildstock_csv = os.path.join(buildstock_csv_dir, "buildstock.csv") valid_option_values = set() - with open(os.path.join(here, 'buildstock.csv'), 'r', newline='') as f_in, \ - open(buildstock_csv, 'w', newline='') as f_out: + with open(os.path.join(here, "buildstock.csv"), "r", newline="") as f_in, open( + buildstock_csv, "w", newline="" + ) as f_out: cf_in = csv.reader(f_in) cf_out = csv.writer(f_out) for i, row in enumerate(cf_in): if i == 0: - col_idx = row.index('Days Shifted') + col_idx = row.index("Days Shifted") else: # Convert values from "Day1" to "1.10" so we hit the bug - row[col_idx] = '{0}.{0}0'.format(re.search(r'Day(\d+)', row[col_idx]).group(1)) + row[col_idx] = "{0}.{0}0".format( + re.search(r"Day(\d+)", row[col_idx]).group(1) + ) valid_option_values.add(row[col_idx]) cf_out.writerow(row) - project_filename, results_dir = basic_residential_project_file({ - 'sampler': { - 'type': 'residential_quota_downselect', - 'args': { - 'n_datapoints': 8, - 'resample': False, - 'logic': 'Geometry House Size|1500-2499' + project_filename, results_dir = basic_residential_project_file( + { + "sampler": { + "type": "residential_quota_downselect", + "args": { + "n_datapoints": 8, + "resample": False, + "logic": "Geometry House Size|1500-2499", + }, } } - }) - mocker.patch.object(BuildStockBatchBase, 'weather_dir', None) - mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir) - sampler_property_mock = mocker.patch.object(BuildStockBatchBase, 'sampler', new_callable=PropertyMock) + ) + mocker.patch.object(BuildStockBatchBase, "weather_dir", None) + mocker.patch.object(BuildStockBatchBase, "results_dir", results_dir) + sampler_property_mock = mocker.patch.object( + BuildStockBatchBase, "sampler", new_callable=PropertyMock + ) sampler_mock = mocker.MagicMock() sampler_property_mock.return_value = sampler_mock sampler_mock.run_sampling = MagicMock(return_value=buildstock_csv) @@ -94,51 +110,66 @@ def test_downselect_integer_options(basic_residential_project_file, mocker): bsb = BuildStockBatchBase(project_filename) bsb.sampler.run_sampling() sampler_mock.run_sampling.assert_called_once() - with open(buildstock_csv, 'r', newline='') as f: + with open(buildstock_csv, "r", newline="") as f: cf = csv.DictReader(f) for row in cf: - assert row['Days Shifted'] in valid_option_values + assert row["Days Shifted"] in valid_option_values -@patch('buildstockbatch.postprocessing.boto3') +@patch("buildstockbatch.postprocessing.boto3") def test_upload_files(mocked_boto3, basic_residential_project_file): - s3_bucket = 'test_bucket' - s3_prefix = 'test_prefix' - db_name = 'test_db_name' - role = 'test_role' - region = 'test_region' + s3_bucket = "test_bucket" + s3_prefix = "test_prefix" + db_name = "test_db_name" + role = "test_role" + region = "test_region" upload_config = { - 'postprocessing': { - 'aws': { - 'region_name': region, - 's3': { - 'bucket': s3_bucket, - 'prefix': s3_prefix, - }, - 'athena': { - 'glue_service_role': role, - 'database_name': db_name, - 'max_crawling_time': 250 - } - } - } - } + "postprocessing": { + "aws": { + "region_name": region, + "s3": { + "bucket": s3_bucket, + "prefix": s3_prefix, + }, + "athena": { + "glue_service_role": role, + "database_name": db_name, + "max_crawling_time": 250, + }, + } + } + } mocked_glueclient = MagicMock() - mocked_glueclient.get_crawler = MagicMock(return_value={'Crawler': {'State': 'READY'}}) + mocked_glueclient.get_crawler = MagicMock( + return_value={"Crawler": {"State": "READY"}} + ) mocked_boto3.client = MagicMock(return_value=mocked_glueclient) - mocked_boto3.resource().Bucket().objects.filter.side_effect = [[], ['a', 'b', 'c']] + mocked_boto3.resource().Bucket().objects.filter.side_effect = [[], ["a", "b", "c"]] project_filename, results_dir = basic_residential_project_file(upload_config) - buildstock_csv_path = Path(results_dir).parent / 'openstudio_buildstock' / 'project_resstock_national' / 'housing_characteristics' / 'buildstock.csv' # noqa: E501 + buildstock_csv_path = ( + Path(results_dir).parent + / "openstudio_buildstock" + / "project_resstock_national" + / "housing_characteristics" + / "buildstock.csv" + ) # noqa: E501 shutil.copy2( - Path(__file__).parent / 'test_results' / 'housing_characteristics' / 'buildstock.csv', - buildstock_csv_path + Path(__file__).parent + / "test_results" + / "housing_characteristics" + / "buildstock.csv", + buildstock_csv_path, ) - with patch.object(BuildStockBatchBase, 'weather_dir', None), \ - patch.object(BuildStockBatchBase, 'output_dir', results_dir), \ - patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \ - patch.object(BuildStockBatchBase, 'results_dir', results_dir), \ - patch.object(BuildStockBatchBase, 'CONTAINER_RUNTIME', ContainerRuntime.LOCAL_OPENSTUDIO): + with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( + BuildStockBatchBase, "output_dir", results_dir + ), patch.object( + BuildStockBatchBase, "get_dask_client" + ) as get_dask_client_mock, patch.object( + BuildStockBatchBase, "results_dir", results_dir + ), patch.object( + BuildStockBatchBase, "CONTAINER_RUNTIME", ContainerRuntime.LOCAL_OPENSTUDIO + ): bsb = BuildStockBatchBase(project_filename) bsb.process_results() get_dask_client_mock.assert_called_once() @@ -147,158 +178,181 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): crawler_created = False crawler_started = False for call in mocked_boto3.mock_calls[2:] + mocked_boto3.client().mock_calls: - call_function = call[0].split('.')[-1] # 0 is for the function name - if call_function == 'resource': - assert call[1][0] in ['s3'] # call[1] is for the positional arguments - if call_function == 'Bucket': + call_function = call[0].split(".")[-1] # 0 is for the function name + if call_function == "resource": + assert call[1][0] in ["s3"] # call[1] is for the positional arguments + if call_function == "Bucket": assert call[1][0] == s3_bucket - if call_function == 'upload_file': + if call_function == "upload_file": source_file_path = call[1][0] destination_path = call[1][1] files_uploaded.append((source_file_path, destination_path)) - if call_function == 'create_crawler': + if call_function == "create_crawler": crawler_para = call[2] # 2 is for the keyword arguments crawler_created = True - assert crawler_para['DatabaseName'] == upload_config['postprocessing']['aws']['athena']['database_name'] - assert crawler_para['Role'] == upload_config['postprocessing']['aws']['athena']['glue_service_role'] - assert crawler_para['TablePrefix'] == OUTPUT_FOLDER_NAME + '_' - assert crawler_para['Name'] == db_name + '_' + OUTPUT_FOLDER_NAME - assert crawler_para['Targets']['S3Targets'][0]['Path'] == 's3://' + s3_bucket + '/' + s3_prefix + '/' + \ - OUTPUT_FOLDER_NAME + '/' - if call_function == 'start_crawler': + assert ( + crawler_para["DatabaseName"] + == upload_config["postprocessing"]["aws"]["athena"]["database_name"] + ) + assert ( + crawler_para["Role"] + == upload_config["postprocessing"]["aws"]["athena"]["glue_service_role"] + ) + assert crawler_para["TablePrefix"] == OUTPUT_FOLDER_NAME + "_" + assert crawler_para["Name"] == db_name + "_" + OUTPUT_FOLDER_NAME + assert ( + crawler_para["Targets"]["S3Targets"][0]["Path"] + == "s3://" + + s3_bucket + + "/" + + s3_prefix + + "/" + + OUTPUT_FOLDER_NAME + + "/" + ) + if call_function == "start_crawler": assert crawler_created, "crawler attempted to start before creating" crawler_started = True crawler_para = call[2] # 2 is for keyboard arguments. - assert crawler_para['Name'] == db_name + '_' + OUTPUT_FOLDER_NAME + assert crawler_para["Name"] == db_name + "_" + OUTPUT_FOLDER_NAME assert crawler_started, "Crawler never started" # check if all the files are properly uploaded - source_path = os.path.join(results_dir, 'parquet') - s3_path = s3_prefix + '/' + OUTPUT_FOLDER_NAME + '/' + source_path = os.path.join(results_dir, "parquet") + s3_path = s3_prefix + "/" + OUTPUT_FOLDER_NAME + "/" - s3_file_path = s3_path + 'baseline/results_up00.parquet' - source_file_path = os.path.join(source_path, 'baseline', 'results_up00.parquet') + s3_file_path = s3_path + "baseline/results_up00.parquet" + source_file_path = os.path.join(source_path, "baseline", "results_up00.parquet") assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + 'upgrades/upgrade=1/results_up01.parquet' - source_file_path = os.path.join(source_path, 'upgrades', 'upgrade=1', 'results_up01.parquet') + s3_file_path = s3_path + "upgrades/upgrade=1/results_up01.parquet" + source_file_path = os.path.join( + source_path, "upgrades", "upgrade=1", "results_up01.parquet" + ) assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + 'timeseries/upgrade=0/group0.parquet' - source_file_path = os.path.join(source_path, 'timeseries', 'upgrade=0', 'group0.parquet') + s3_file_path = s3_path + "timeseries/upgrade=0/group0.parquet" + source_file_path = os.path.join( + source_path, "timeseries", "upgrade=0", "group0.parquet" + ) assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + 'timeseries/upgrade=1/group0.parquet' - source_file_path = os.path.join(source_path, 'timeseries', 'upgrade=1', 'group0.parquet') + s3_file_path = s3_path + "timeseries/upgrade=1/group0.parquet" + source_file_path = os.path.join( + source_path, "timeseries", "upgrade=1", "group0.parquet" + ) assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + 'timeseries/_common_metadata' - source_file_path = os.path.join(source_path, 'timeseries', '_common_metadata') + s3_file_path = s3_path + "timeseries/_common_metadata" + source_file_path = os.path.join(source_path, "timeseries", "_common_metadata") assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + 'timeseries/_metadata' - source_file_path = os.path.join(source_path, 'timeseries', '_metadata') + s3_file_path = s3_path + "timeseries/_metadata" + source_file_path = os.path.join(source_path, "timeseries", "_metadata") assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - s3_file_path = s3_path + 'buildstock_csv/buildstock.csv' + s3_file_path = s3_path + "buildstock_csv/buildstock.csv" source_file_path = str(buildstock_csv_path) assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - assert len(files_uploaded) == 0, f"These files shouldn't have been uploaded: {files_uploaded}" + assert ( + len(files_uploaded) == 0 + ), f"These files shouldn't have been uploaded: {files_uploaded}" def test_write_parquet_no_index(): - df = pd.DataFrame(np.random.randn(6, 4), columns=list('abcd'), index=np.arange(6)) + df = pd.DataFrame(np.random.randn(6, 4), columns=list("abcd"), index=np.arange(6)) with tempfile.TemporaryDirectory() as tmpdir: fs = LocalFileSystem() - filename = os.path.join(tmpdir, 'df.parquet') + filename = os.path.join(tmpdir, "df.parquet") write_dataframe_as_parquet(df, fs, filename) schema = parquet.read_schema(os.path.join(tmpdir, filename)) - assert '__index_level_0__' not in schema.names + assert "__index_level_0__" not in schema.names assert df.columns.values.tolist() == schema.names def test_skipping_baseline(basic_residential_project_file): - project_filename, results_dir = basic_residential_project_file({ - 'baseline': { - 'skip_sims': True, - 'sampling_algorithm': 'quota' - } - }) + project_filename, results_dir = basic_residential_project_file( + {"baseline": {"skip_sims": True, "sampling_algorithm": "quota"}} + ) - sim_output_path = os.path.join(results_dir, 'simulation_output') - shutil.rmtree(os.path.join(sim_output_path, 'timeseries', 'up00')) # remove timeseries results for baseline + sim_output_path = os.path.join(results_dir, "simulation_output") + shutil.rmtree( + os.path.join(sim_output_path, "timeseries", "up00") + ) # remove timeseries results for baseline # remove results.csv data for baseline from results_jobx.json.gz - results_json_filename = os.path.join(sim_output_path, 'results_job0.json.gz') - with gzip.open(results_json_filename, 'rt', encoding='utf-8') as f: + results_json_filename = os.path.join(sim_output_path, "results_job0.json.gz") + with gzip.open(results_json_filename, "rt", encoding="utf-8") as f: dpouts = json.load(f) - dpouts2 = list(filter(lambda x: x['upgrade'] > 0, dpouts)) - with gzip.open(results_json_filename, 'wt', encoding='utf-8') as f: + dpouts2 = list(filter(lambda x: x["upgrade"] > 0, dpouts)) + with gzip.open(results_json_filename, "wt", encoding="utf-8") as f: json.dump(dpouts2, f) # remove jobs for baseline from jobx.json - with open(os.path.join(results_dir, '..', 'job0.json'), 'rt') as f: + with open(os.path.join(results_dir, "..", "job0.json"), "rt") as f: job_json = json.load(f) - job_json['batch'] = list(filter(lambda job: job[1] is not None, job_json['batch'])) - with open(os.path.join(results_dir, '..', 'job0.json'), 'wt') as f: + job_json["batch"] = list(filter(lambda job: job[1] is not None, job_json["batch"])) + with open(os.path.join(results_dir, "..", "job0.json"), "wt") as f: json.dump(job_json, f) # run postprocessing - with patch.object(BuildStockBatchBase, 'weather_dir', None), \ - patch.object(BuildStockBatchBase, 'get_dask_client') as get_dask_client_mock, \ - patch.object(BuildStockBatchBase, 'results_dir', results_dir): - + with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( + BuildStockBatchBase, "get_dask_client" + ) as get_dask_client_mock, patch.object( + BuildStockBatchBase, "results_dir", results_dir + ): bsb = BuildStockBatchBase(project_filename) bsb.process_results() get_dask_client_mock.assert_called_once() - up00_parquet = os.path.join(results_dir, 'parquet', 'baseline', 'results_up00.parquet') + up00_parquet = os.path.join( + results_dir, "parquet", "baseline", "results_up00.parquet" + ) assert not os.path.exists(up00_parquet) - up01_parquet = os.path.join(results_dir, 'parquet', 'upgrades', 'upgrade=1', 'results_up01.parquet') + up01_parquet = os.path.join( + results_dir, "parquet", "upgrades", "upgrade=1", "results_up01.parquet" + ) assert os.path.exists(up01_parquet) - up00_csv_gz = os.path.join(results_dir, 'results_csvs', 'results_up00.csv.gz') + up00_csv_gz = os.path.join(results_dir, "results_csvs", "results_up00.csv.gz") assert not os.path.exists(up00_csv_gz) - up01_csv_gz = os.path.join(results_dir, 'results_csvs', 'results_up01.csv.gz') + up01_csv_gz = os.path.join(results_dir, "results_csvs", "results_up01.csv.gz") assert os.path.exists(up01_csv_gz) def test_provide_buildstock_csv(basic_residential_project_file, mocker): - buildstock_csv = os.path.join(here, 'buildstock.csv') + buildstock_csv = os.path.join(here, "buildstock.csv") df = read_csv(buildstock_csv, dtype=str) - project_filename, results_dir = basic_residential_project_file({ - 'sampler': { - 'type': 'precomputed', - 'args': { - 'sample_file': buildstock_csv - } - } - }) - mocker.patch.object(LocalBatch, 'weather_dir', None) - mocker.patch.object(LocalBatch, 'results_dir', results_dir) + project_filename, results_dir = basic_residential_project_file( + {"sampler": {"type": "precomputed", "args": {"sample_file": buildstock_csv}}} + ) + mocker.patch.object(LocalBatch, "weather_dir", None) + mocker.patch.object(LocalBatch, "results_dir", results_dir) bsb = LocalBatch(project_filename) sampling_output_csv = bsb.sampler.run_sampling() df2 = read_csv(sampling_output_csv, dtype=str) pd.testing.assert_frame_equal(df, df2) - assert (df['Geometry Shared Walls'] == "None").all() # Verify None is being read properly + assert ( + df["Geometry Shared Walls"] == "None" + ).all() # Verify None is being read properly # Test file missing - with open(project_filename, 'r') as f: + with open(project_filename, "r") as f: cfg = yaml.safe_load(f) - cfg['sampler']['args']['sample_file'] = os.path.join(here, 'non_existant_file.csv') - with open(project_filename, 'w') as f: + cfg["sampler"]["args"]["sample_file"] = os.path.join(here, "non_existant_file.csv") + with open(project_filename, "w") as f: yaml.dump(cfg, f) with pytest.raises(ValidationError, match=r"sample_file doesn't exist"): diff --git a/buildstockbatch/test/test_docker.py b/buildstockbatch/test/test_docker.py index 36f110eb..2a18b605 100644 --- a/buildstockbatch/test/test_docker.py +++ b/buildstockbatch/test/test_docker.py @@ -12,26 +12,27 @@ def test_custom_gem_install(basic_residential_project_file): project_filename, results_dir = basic_residential_project_file() # Add custom_gems to the project file - with open(project_filename, 'r') as f: + with open(project_filename, "r") as f: cfg = yaml.safe_load(f) - cfg['baseline']['custom_gems'] = True - with open(project_filename, 'w') as f: + cfg["baseline"]["custom_gems"] = True + with open(project_filename, "w") as f: yaml.dump(cfg, f) - buildstock_directory = cfg['buildstock_directory'] + buildstock_directory = cfg["buildstock_directory"] LocalBatch(project_filename) - bundle_install_log_path = os.path.join(buildstock_directory, - 'resources', - '.custom_gems', - 'bundle_install_output.log') + bundle_install_log_path = os.path.join( + buildstock_directory, "resources", ".custom_gems", "bundle_install_output.log" + ) assert os.path.exists(bundle_install_log_path) os.remove(bundle_install_log_path) - gem_list_log_log_path = os.path.join(buildstock_directory, - 'resources', - '.custom_gems', - 'openstudio_gem_list_output.log') + gem_list_log_log_path = os.path.join( + buildstock_directory, + "resources", + ".custom_gems", + "openstudio_gem_list_output.log", + ) assert os.path.exists(gem_list_log_log_path) os.remove(gem_list_log_log_path) diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py index 91a16da3..01e27997 100644 --- a/buildstockbatch/test/test_eagle.py +++ b/buildstockbatch/test/test_eagle.py @@ -15,258 +15,312 @@ here = os.path.dirname(os.path.abspath(__file__)) -@patch('buildstockbatch.eagle.subprocess') +@patch("buildstockbatch.eagle.subprocess") def test_hpc_run_building(mock_subprocess, monkeypatch, basic_residential_project_file): - - tar_filename = pathlib.Path(__file__).resolve().parent / 'test_results' / 'simulation_output' / 'simulations_job0.tar.gz' # noqa E501 - with tarfile.open(tar_filename, 'r') as tarf: - osw_dict = json.loads(tarf.extractfile('up00/bldg0000001/in.osw').read().decode('utf-8')) + tar_filename = ( + pathlib.Path(__file__).resolve().parent + / "test_results" + / "simulation_output" + / "simulations_job0.tar.gz" + ) # noqa E501 + with tarfile.open(tar_filename, "r") as tarf: + osw_dict = json.loads( + tarf.extractfile("up00/bldg0000001/in.osw").read().decode("utf-8") + ) project_filename, results_dir = basic_residential_project_file() tmp_path = pathlib.Path(results_dir).parent - sim_path = tmp_path / 'output' / 'simulation_output' / 'up00' / 'bldg0000001' + sim_path = tmp_path / "output" / "simulation_output" / "up00" / "bldg0000001" os.makedirs(sim_path) cfg = get_project_configuration(project_filename) - with patch.object(EagleBatch, 'weather_dir', None), \ - patch.object(EagleBatch, 'create_osw', return_value=osw_dict), \ - patch.object(EagleBatch, 'make_sim_dir', return_value=('bldg0000001up00', sim_path)), \ - patch.object(EagleBatch, 'local_scratch', tmp_path): - + with patch.object(EagleBatch, "weather_dir", None), patch.object( + EagleBatch, "create_osw", return_value=osw_dict + ), patch.object( + EagleBatch, "make_sim_dir", return_value=("bldg0000001up00", sim_path) + ), patch.object( + EagleBatch, "local_scratch", tmp_path + ): # Normal run - run_bldg_args = [ - results_dir, - cfg, - 1, - None - ] + run_bldg_args = [results_dir, cfg, 1, None] EagleBatch.run_building(*run_bldg_args) expected_singularity_args = [ - 'singularity', - 'exec', - '--contain', - '-e', - '--pwd', - '/var/simdata/openstudio', + "singularity", + "exec", + "--contain", + "-e", + "--pwd", + "/var/simdata/openstudio", ] end_expected_singularity_args = [ - str(pathlib.Path('/tmp/scratch/openstudio.simg')), - 'bash', '-x' + str(pathlib.Path("/tmp/scratch/openstudio.simg")), + "bash", + "-x", ] mock_subprocess.run.assert_called_once() args = mock_subprocess.run.call_args[0][0] - for a, b in [args[i:i+2] for i in range(6, len(args) - 3, 2)]: - assert a == '-B' + for a, b in [args[i : i + 2] for i in range(6, len(args) - 3, 2)]: + assert a == "-B" drive, tail = os.path.splitdrive(b) - assert tail.split(':')[1] in ( - '/var/simdata/openstudio', - '/lib/resources', - '/lib/housing_characteristics', - '/measures', - '/weather', - '/tmp', + assert tail.split(":")[1] in ( + "/var/simdata/openstudio", + "/lib/resources", + "/lib/housing_characteristics", + "/measures", + "/weather", + "/tmp", ) assert mock_subprocess.run.call_args[0][0][0:6] == expected_singularity_args assert mock_subprocess.run.call_args[0][0][-3:] == end_expected_singularity_args called_kw = mock_subprocess.run.call_args[1] - assert called_kw.get('check') is True - assert 'input' in called_kw - assert 'stdout' in called_kw - assert 'stderr' in called_kw - assert str(called_kw.get('cwd')) == str(pathlib.Path('/tmp/scratch/output')) - assert called_kw['input'].decode('utf-8').find(' --measures_only') == -1 + assert called_kw.get("check") is True + assert "input" in called_kw + assert "stdout" in called_kw + assert "stderr" in called_kw + assert str(called_kw.get("cwd")) == str(pathlib.Path("/tmp/scratch/output")) + assert called_kw["input"].decode("utf-8").find(" --measures_only") == -1 # Measures only run mock_subprocess.reset_mock() shutil.rmtree(sim_path) os.makedirs(sim_path) - monkeypatch.setenv('MEASURESONLY', '1') + monkeypatch.setenv("MEASURESONLY", "1") EagleBatch.run_building(*run_bldg_args) mock_subprocess.run.assert_called_once() assert mock_subprocess.run.call_args[0][0][0:6] == expected_singularity_args assert mock_subprocess.run.call_args[0][0][-3:] == end_expected_singularity_args called_kw = mock_subprocess.run.call_args[1] - assert called_kw.get('check') is True - assert 'input' in called_kw - assert 'stdout' in called_kw - assert 'stderr' in called_kw - assert str(called_kw.get('cwd')) == str(pathlib.Path('/tmp/scratch/output')) - assert called_kw['input'].decode('utf-8').find(' --measures_only') > -1 - - -@patch('buildstockbatch.base.BuildStockBatchBase.validate_options_lookup') -@patch('buildstockbatch.eagle.EagleBatch.validate_output_directory_eagle') -@patch('buildstockbatch.eagle.EagleBatch.validate_singularity_image_eagle') -@patch('buildstockbatch.eagle.subprocess') -def test_user_cli(mock_subprocess, mock_validate_singularity_image_eagle, mock_validate_output_directory_eagle, - mock_validate_options, basic_residential_project_file, monkeypatch): + assert called_kw.get("check") is True + assert "input" in called_kw + assert "stdout" in called_kw + assert "stderr" in called_kw + assert str(called_kw.get("cwd")) == str(pathlib.Path("/tmp/scratch/output")) + assert called_kw["input"].decode("utf-8").find(" --measures_only") > -1 + + +@patch("buildstockbatch.base.BuildStockBatchBase.validate_options_lookup") +@patch("buildstockbatch.eagle.EagleBatch.validate_output_directory_eagle") +@patch("buildstockbatch.eagle.EagleBatch.validate_singularity_image_eagle") +@patch("buildstockbatch.eagle.subprocess") +def test_user_cli( + mock_subprocess, + mock_validate_singularity_image_eagle, + mock_validate_output_directory_eagle, + mock_validate_options, + basic_residential_project_file, + monkeypatch, +): mock_validate_options.return_value = True mock_validate_output_directory_eagle.return_value = True mock_validate_singularity_image_eagle.return_value = True project_filename, results_dir = basic_residential_project_file() shutil.rmtree(results_dir) - monkeypatch.setenv('CONDA_PREFIX', 'something') + monkeypatch.setenv("CONDA_PREFIX", "something") argv = [project_filename] user_cli(argv) mock_subprocess.run.assert_called_once() - eagle_sh = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'eagle.sh')) + eagle_sh = os.path.abspath( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "eagle.sh") + ) assert mock_subprocess.run.call_args[0][0][-1] == eagle_sh - assert '--time=20' in mock_subprocess.run.call_args[0][0] - assert '--account=testaccount' in mock_subprocess.run.call_args[0][0] - assert '--nodes=1' in mock_subprocess.run.call_args[0][0] - assert '--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY' in mock_subprocess.run.call_args[0][0] - assert '--output=sampling.out' in mock_subprocess.run.call_args[0][0] - assert '--qos=high' not in mock_subprocess.run.call_args[0][0] - assert '0' == mock_subprocess.run.call_args[1]['env']['MEASURESONLY'] + assert "--time=20" in mock_subprocess.run.call_args[0][0] + assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] + assert "--nodes=1" in mock_subprocess.run.call_args[0][0] + assert ( + "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" + in mock_subprocess.run.call_args[0][0] + ) + assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] + assert "--qos=high" not in mock_subprocess.run.call_args[0][0] + assert "0" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] mock_subprocess.reset_mock() shutil.rmtree(results_dir) - argv = ['--hipri', project_filename] + argv = ["--hipri", project_filename] user_cli(argv) mock_subprocess.run.assert_called_once() - assert '--time=20' in mock_subprocess.run.call_args[0][0] - assert '--account=testaccount' in mock_subprocess.run.call_args[0][0] - assert '--nodes=1' in mock_subprocess.run.call_args[0][0] - assert '--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY' in mock_subprocess.run.call_args[0][0] - assert '--output=sampling.out' in mock_subprocess.run.call_args[0][0] - assert '--qos=high' in mock_subprocess.run.call_args[0][0] - assert '0' == mock_subprocess.run.call_args[1]['env']['MEASURESONLY'] - assert '0' == mock_subprocess.run.call_args[1]['env']['SAMPLINGONLY'] + assert "--time=20" in mock_subprocess.run.call_args[0][0] + assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] + assert "--nodes=1" in mock_subprocess.run.call_args[0][0] + assert ( + "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" + in mock_subprocess.run.call_args[0][0] + ) + assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] + assert "--qos=high" in mock_subprocess.run.call_args[0][0] + assert "0" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] + assert "0" == mock_subprocess.run.call_args[1]["env"]["SAMPLINGONLY"] mock_subprocess.reset_mock() shutil.rmtree(results_dir) - argv = ['--measures_only', project_filename] + argv = ["--measures_only", project_filename] user_cli(argv) mock_subprocess.run.assert_called_once() - assert '--time=20' in mock_subprocess.run.call_args[0][0] - assert '--account=testaccount' in mock_subprocess.run.call_args[0][0] - assert '--nodes=1' in mock_subprocess.run.call_args[0][0] - assert '--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY' in mock_subprocess.run.call_args[0][0] - assert '--output=sampling.out' in mock_subprocess.run.call_args[0][0] - assert '--qos=high' not in mock_subprocess.run.call_args[0][0] - assert '1' == mock_subprocess.run.call_args[1]['env']['MEASURESONLY'] - assert '0' == mock_subprocess.run.call_args[1]['env']['SAMPLINGONLY'] + assert "--time=20" in mock_subprocess.run.call_args[0][0] + assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] + assert "--nodes=1" in mock_subprocess.run.call_args[0][0] + assert ( + "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" + in mock_subprocess.run.call_args[0][0] + ) + assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] + assert "--qos=high" not in mock_subprocess.run.call_args[0][0] + assert "1" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] + assert "0" == mock_subprocess.run.call_args[1]["env"]["SAMPLINGONLY"] mock_subprocess.reset_mock() shutil.rmtree(results_dir) - argv = ['--samplingonly', project_filename] + argv = ["--samplingonly", project_filename] user_cli(argv) mock_subprocess.run.assert_called_once() - assert '--time=20' in mock_subprocess.run.call_args[0][0] - assert '--account=testaccount' in mock_subprocess.run.call_args[0][0] - assert '--nodes=1' in mock_subprocess.run.call_args[0][0] - assert '--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY' in mock_subprocess.run.call_args[0][0] - assert '--output=sampling.out' in mock_subprocess.run.call_args[0][0] - assert '--qos=high' not in mock_subprocess.run.call_args[0][0] - assert '1' == mock_subprocess.run.call_args[1]['env']['SAMPLINGONLY'] - assert '0' == mock_subprocess.run.call_args[1]['env']['MEASURESONLY'] - - -@patch('buildstockbatch.eagle.subprocess') -def test_qos_high_job_submit(mock_subprocess, basic_residential_project_file, monkeypatch): - mock_subprocess.run.return_value.stdout = 'Submitted batch job 1\n' + assert "--time=20" in mock_subprocess.run.call_args[0][0] + assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] + assert "--nodes=1" in mock_subprocess.run.call_args[0][0] + assert ( + "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" + in mock_subprocess.run.call_args[0][0] + ) + assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] + assert "--qos=high" not in mock_subprocess.run.call_args[0][0] + assert "1" == mock_subprocess.run.call_args[1]["env"]["SAMPLINGONLY"] + assert "0" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] + + +@patch("buildstockbatch.eagle.subprocess") +def test_qos_high_job_submit( + mock_subprocess, basic_residential_project_file, monkeypatch +): + mock_subprocess.run.return_value.stdout = "Submitted batch job 1\n" mock_subprocess.PIPE = None project_filename, results_dir = basic_residential_project_file() shutil.rmtree(results_dir) - monkeypatch.setenv('CONDA_PREFIX', 'something') - monkeypatch.setenv('SLURM_JOB_QOS', 'high') + monkeypatch.setenv("CONDA_PREFIX", "something") + monkeypatch.setenv("SLURM_JOB_QOS", "high") - with patch.object(EagleBatch, 'weather_dir', None): + with patch.object(EagleBatch, "weather_dir", None): batch = EagleBatch(project_filename) for i in range(1, 11): - pathlib.Path(results_dir, 'job{:03d}.json'.format(i)).touch() - with open(os.path.join(results_dir, 'job001.json'), 'w') as f: - json.dump({'batch': list(range(100))}, f) + pathlib.Path(results_dir, "job{:03d}.json".format(i)).touch() + with open(os.path.join(results_dir, "job001.json"), "w") as f: + json.dump({"batch": list(range(100))}, f) batch.queue_jobs() mock_subprocess.run.assert_called_once() - assert '--qos=high' in mock_subprocess.run.call_args[0][0] + assert "--qos=high" in mock_subprocess.run.call_args[0][0] mock_subprocess.reset_mock() - mock_subprocess.run.return_value.stdout = 'Submitted batch job 1\n' + mock_subprocess.run.return_value.stdout = "Submitted batch job 1\n" mock_subprocess.PIPE = None - with patch.object(EagleBatch, 'weather_dir', None): + with patch.object(EagleBatch, "weather_dir", None): batch = EagleBatch(project_filename) batch.queue_post_processing() mock_subprocess.run.assert_called_once() - assert '--qos=high' in mock_subprocess.run.call_args[0][0] + assert "--qos=high" in mock_subprocess.run.call_args[0][0] -def test_queue_jobs_minutes_per_sim(mocker, basic_residential_project_file, monkeypatch): - mock_subprocess = mocker.patch('buildstockbatch.eagle.subprocess') - mocker.patch.object(EagleBatch, 'weather_dir', None) - mock_subprocess.run.return_value.stdout = 'Submitted batch job 1\n' +def test_queue_jobs_minutes_per_sim( + mocker, basic_residential_project_file, monkeypatch +): + mock_subprocess = mocker.patch("buildstockbatch.eagle.subprocess") + mocker.patch.object(EagleBatch, "weather_dir", None) + mock_subprocess.run.return_value.stdout = "Submitted batch job 1\n" mock_subprocess.PIPE = None - project_filename, results_dir = basic_residential_project_file(update_args={ - 'eagle': { - 'sampling': { - 'time': 20 - }, - 'account': 'testaccount', - 'minutes_per_sim': 0.5 + project_filename, results_dir = basic_residential_project_file( + update_args={ + "eagle": { + "sampling": {"time": 20}, + "account": "testaccount", + "minutes_per_sim": 0.5, + } } - }) + ) shutil.rmtree(results_dir) - monkeypatch.setenv('CONDA_PREFIX', 'something') + monkeypatch.setenv("CONDA_PREFIX", "something") batch = EagleBatch(project_filename) for i in range(1, 11): - pathlib.Path(results_dir, 'job{:03d}.json'.format(i)).touch() - with open(os.path.join(results_dir, 'job001.json'), 'w') as f: - json.dump({'batch': list(range(1000))}, f) + pathlib.Path(results_dir, "job{:03d}.json".format(i)).touch() + with open(os.path.join(results_dir, "job001.json"), "w") as f: + json.dump({"batch": list(range(1000))}, f) batch.queue_jobs() mock_subprocess.run.assert_called_once() - assert '--time=14' in mock_subprocess.run.call_args[0][0] + assert "--time=14" in mock_subprocess.run.call_args[0][0] -def test_run_building_process(mocker, basic_residential_project_file): +def test_run_building_process(mocker, basic_residential_project_file): project_filename, results_dir = basic_residential_project_file(raw=True) results_dir = pathlib.Path(results_dir) job_json = { - 'job_num': 1, - 'batch': [(1, 0), (2, 0), (3, 0), (4, 0), (1, None), (2, None), (3, None), (4, None)], - 'n_datapoints': 8 + "job_num": 1, + "batch": [ + (1, 0), + (2, 0), + (3, 0), + (4, 0), + (1, None), + (2, None), + (3, None), + (4, None), + ], + "n_datapoints": 8, } - with open(results_dir / 'job001.json', 'w') as f: + with open(results_dir / "job001.json", "w") as f: json.dump(job_json, f) - sample_buildstock_csv = pd.DataFrame.from_records([{'Building': i, 'Dummy Column': i*i} for i in range(10)]) - os.makedirs(results_dir / 'housing_characteristics', exist_ok=True) - os.makedirs(results_dir / 'weather', exist_ok=True) - sample_buildstock_csv.to_csv(results_dir / 'housing_characteristics' / 'buildstock.csv', index=False) + sample_buildstock_csv = pd.DataFrame.from_records( + [{"Building": i, "Dummy Column": i * i} for i in range(10)] + ) + os.makedirs(results_dir / "housing_characteristics", exist_ok=True) + os.makedirs(results_dir / "weather", exist_ok=True) + sample_buildstock_csv.to_csv( + results_dir / "housing_characteristics" / "buildstock.csv", index=False + ) def sequential_parallel(**kwargs): kw2 = kwargs.copy() - kw2['n_jobs'] = 1 + kw2["n_jobs"] = 1 return joblib.Parallel(**kw2) - mocker.patch('buildstockbatch.eagle.shutil.copy2') - mocker.patch('buildstockbatch.eagle.Parallel', sequential_parallel) - mocker.patch('buildstockbatch.eagle.subprocess') - - mocker.patch.object(EagleBatch, 'local_buildstock_dir', results_dir / 'local_buildstock_dir') - mocker.patch.object(EagleBatch, 'local_weather_dir', results_dir / 'local_weather_dir') - mocker.patch.object(EagleBatch, 'local_output_dir', results_dir) - mocker.patch.object(EagleBatch, 'local_housing_characteristics_dir', - results_dir / 'local_housing_characteristics_dir') - mocker.patch.object(EagleBatch, 'results_dir', results_dir) - mocker.patch.object(EagleBatch, 'local_scratch', results_dir.parent) + mocker.patch("buildstockbatch.eagle.shutil.copy2") + mocker.patch("buildstockbatch.eagle.Parallel", sequential_parallel) + mocker.patch("buildstockbatch.eagle.subprocess") + + mocker.patch.object( + EagleBatch, "local_buildstock_dir", results_dir / "local_buildstock_dir" + ) + mocker.patch.object( + EagleBatch, "local_weather_dir", results_dir / "local_weather_dir" + ) + mocker.patch.object(EagleBatch, "local_output_dir", results_dir) + mocker.patch.object( + EagleBatch, + "local_housing_characteristics_dir", + results_dir / "local_housing_characteristics_dir", + ) + mocker.patch.object(EagleBatch, "results_dir", results_dir) + mocker.patch.object(EagleBatch, "local_scratch", results_dir.parent) def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=False): real_upgrade_idx = 0 if upgrade_idx is None else upgrade_idx + 1 - sim_id = f'bldg{building_id:07d}up{real_upgrade_idx:02d}' - sim_dir = os.path.join(base_dir, f'up{real_upgrade_idx:02d}', f'bldg{building_id:07d}') + sim_id = f"bldg{building_id:07d}up{real_upgrade_idx:02d}" + sim_dir = os.path.join( + base_dir, f"up{real_upgrade_idx:02d}", f"bldg{building_id:07d}" + ) return sim_id, sim_dir - mocker.patch.object(EagleBatch, 'make_sim_dir', make_sim_dir_mock) - sampler_prop_mock = mocker.patch.object(EagleBatch, 'sampler', new_callable=mocker.PropertyMock) + mocker.patch.object(EagleBatch, "make_sim_dir", make_sim_dir_mock) + sampler_prop_mock = mocker.patch.object( + EagleBatch, "sampler", new_callable=mocker.PropertyMock + ) sampler_mock = mocker.MagicMock() sampler_prop_mock.return_value = sampler_mock - sampler_mock.csv_path = results_dir.parent / 'housing_characteristic2' / 'buildstock.csv' - sampler_mock.run_sampling = mocker.MagicMock(return_value='buildstock.csv') + sampler_mock.csv_path = ( + results_dir.parent / "housing_characteristic2" / "buildstock.csv" + ) + sampler_mock.run_sampling = mocker.MagicMock(return_value="buildstock.csv") b = EagleBatch(project_filename) b.run_batch(sampling_only=True) # so the directories can be created @@ -274,110 +328,145 @@ def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=Fal b.run_job_batch(1) # check results job-json - refrence_path = pathlib.Path(__file__).resolve().parent / 'test_results' / 'reference_files' + refrence_path = ( + pathlib.Path(__file__).resolve().parent / "test_results" / "reference_files" + ) - refrence_list = json.loads(gzip.open(refrence_path / 'results_job1.json.gz', 'r').read()) + refrence_list = json.loads( + gzip.open(refrence_path / "results_job1.json.gz", "r").read() + ) - output_list = json.loads(gzip.open(results_dir / 'simulation_output' / 'results_job1.json.gz', 'r').read()) + output_list = json.loads( + gzip.open( + results_dir / "simulation_output" / "results_job1.json.gz", "r" + ).read() + ) refrence_list = [json.dumps(d) for d in refrence_list] output_list = [json.dumps(d) for d in output_list] assert sorted(refrence_list) == sorted(output_list) - ts_files = list(refrence_path.glob('**/*.parquet')) + ts_files = list(refrence_path.glob("**/*.parquet")) def compare_ts_parquets(source, dst): - test_pq = pd.read_parquet(source).reset_index().drop(columns=['index']).rename(columns=str.lower) - reference_pq = pd.read_parquet(dst).reset_index().drop(columns=['index']).rename(columns=str.lower) + test_pq = ( + pd.read_parquet(source) + .reset_index() + .drop(columns=["index"]) + .rename(columns=str.lower) + ) + reference_pq = ( + pd.read_parquet(dst) + .reset_index() + .drop(columns=["index"]) + .rename(columns=str.lower) + ) pd.testing.assert_frame_equal(test_pq, reference_pq) for file in ts_files: - results_file = results_dir / 'results' / 'simulation_output' / 'timeseries' / file.parent.name / file.name + results_file = ( + results_dir + / "results" + / "simulation_output" + / "timeseries" + / file.parent.name + / file.name + ) compare_ts_parquets(file, results_file) # Check that buildstock.csv was trimmed properly - local_buildstock_df = read_csv(results_dir / 'local_housing_characteristics_dir' / 'buildstock.csv', dtype=str) - unique_buildings = {str(x[0]) for x in job_json['batch']} + local_buildstock_df = read_csv( + results_dir / "local_housing_characteristics_dir" / "buildstock.csv", dtype=str + ) + unique_buildings = {str(x[0]) for x in job_json["batch"]} assert len(unique_buildings) == len(local_buildstock_df) - assert unique_buildings == set(local_buildstock_df['Building']) + assert unique_buildings == set(local_buildstock_df["Building"]) def test_run_building_error_caught(mocker, basic_residential_project_file): - project_filename, results_dir = basic_residential_project_file() results_dir = pathlib.Path(results_dir) - job_json = { - 'job_num': 1, - 'batch': [(1, 0)], - 'n_datapoints': 1 - } - with open(results_dir / 'job001.json', 'w') as f: + job_json = {"job_num": 1, "batch": [(1, 0)], "n_datapoints": 1} + with open(results_dir / "job001.json", "w") as f: json.dump(job_json, f) - sample_buildstock_csv = pd.DataFrame.from_records([{'Building': i, 'Dummy Column': i * i} for i in range(10)]) - os.makedirs(results_dir / 'housing_characteristics', exist_ok=True) - os.makedirs(results_dir / 'local_housing_characteristics', exist_ok=True) - os.makedirs(results_dir / 'weather', exist_ok=True) - sample_buildstock_csv.to_csv(results_dir / 'housing_characteristics' / 'buildstock.csv', index=False) + sample_buildstock_csv = pd.DataFrame.from_records( + [{"Building": i, "Dummy Column": i * i} for i in range(10)] + ) + os.makedirs(results_dir / "housing_characteristics", exist_ok=True) + os.makedirs(results_dir / "local_housing_characteristics", exist_ok=True) + os.makedirs(results_dir / "weather", exist_ok=True) + sample_buildstock_csv.to_csv( + results_dir / "housing_characteristics" / "buildstock.csv", index=False + ) def raise_error(*args, **kwargs): - raise RuntimeError('A problem happened') + raise RuntimeError("A problem happened") def sequential_parallel(**kwargs): kw2 = kwargs.copy() - kw2['n_jobs'] = 1 + kw2["n_jobs"] = 1 return joblib.Parallel(**kw2) - mocker.patch('buildstockbatch.eagle.shutil.copy2') - mocker.patch('buildstockbatch.eagle.Parallel', sequential_parallel) - mocker.patch('buildstockbatch.eagle.subprocess') - - mocker.patch.object(EagleBatch, 'run_building', raise_error) - mocker.patch.object(EagleBatch, 'local_output_dir', results_dir) - mocker.patch.object(EagleBatch, 'results_dir', results_dir) - mocker.patch.object(EagleBatch, 'local_buildstock_dir', results_dir / 'local_buildstock_dir') - mocker.patch.object(EagleBatch, 'local_weather_dir', results_dir / 'local_weather_dir') - mocker.patch.object(EagleBatch, 'local_housing_characteristics_dir', - results_dir / 'local_housing_characteristics_dir') + mocker.patch("buildstockbatch.eagle.shutil.copy2") + mocker.patch("buildstockbatch.eagle.Parallel", sequential_parallel) + mocker.patch("buildstockbatch.eagle.subprocess") + + mocker.patch.object(EagleBatch, "run_building", raise_error) + mocker.patch.object(EagleBatch, "local_output_dir", results_dir) + mocker.patch.object(EagleBatch, "results_dir", results_dir) + mocker.patch.object( + EagleBatch, "local_buildstock_dir", results_dir / "local_buildstock_dir" + ) + mocker.patch.object( + EagleBatch, "local_weather_dir", results_dir / "local_weather_dir" + ) + mocker.patch.object( + EagleBatch, + "local_housing_characteristics_dir", + results_dir / "local_housing_characteristics_dir", + ) b = EagleBatch(project_filename) b.run_job_batch(1) - traceback_file = results_dir / 'simulation_output' / 'traceback1.out' + traceback_file = results_dir / "simulation_output" / "traceback1.out" assert traceback_file.exists() - with open(traceback_file, 'r') as f: - assert f.read().find('RuntimeError') > -1 + with open(traceback_file, "r") as f: + assert f.read().find("RuntimeError") > -1 def test_rerun_failed_jobs(mocker, basic_residential_project_file): project_filename, results_dir = basic_residential_project_file() - os.makedirs(os.path.join(results_dir, 'results_csvs')) - os.makedirs(os.path.join(results_dir, 'parquet')) - mocker.patch.object(EagleBatch, 'weather_dir', None) - mocker.patch.object(EagleBatch, 'results_dir', results_dir) - process_results_mocker = mocker.patch.object(BuildStockBatchBase, 'process_results') - queue_jobs_mocker = mocker.patch.object(EagleBatch, 'queue_jobs', return_value=[42]) - queue_post_processing_mocker = mocker.patch.object(EagleBatch, 'queue_post_processing') + os.makedirs(os.path.join(results_dir, "results_csvs")) + os.makedirs(os.path.join(results_dir, "parquet")) + mocker.patch.object(EagleBatch, "weather_dir", None) + mocker.patch.object(EagleBatch, "results_dir", results_dir) + process_results_mocker = mocker.patch.object(BuildStockBatchBase, "process_results") + queue_jobs_mocker = mocker.patch.object(EagleBatch, "queue_jobs", return_value=[42]) + queue_post_processing_mocker = mocker.patch.object( + EagleBatch, "queue_post_processing" + ) b = EagleBatch(project_filename) for job_id in range(1, 6): json_filename = os.path.join(b.output_dir, f"job{job_id:03d}.json") - with open(json_filename, 'w') as f: + with open(json_filename, "w") as f: json.dump({}, f) if job_id == 5: continue out_filename = os.path.join(b.output_dir, f"job.out-{job_id}") with open(out_filename, "w") as f: - f.write('lots of output\ngoes\nhere\n') + f.write("lots of output\ngoes\nhere\n") if job_id % 2 == 0: f.write("Traceback") else: f.write("batch complete") - f.write('\n') + f.write("\n") failed_array_ids = b.get_failed_job_array_ids() assert sorted(failed_array_ids) == [2, 4, 5] @@ -391,16 +480,16 @@ def test_rerun_failed_jobs(mocker, basic_residential_project_file): queue_jobs_mocker.reset_mock() queue_post_processing_mocker.assert_called_once_with([42], hipri=False) queue_post_processing_mocker.reset_mock() - assert not os.path.exists(os.path.join(results_dir, 'results_csvs')) - assert not os.path.exists(os.path.join(results_dir, 'parquet')) + assert not os.path.exists(os.path.join(results_dir, "results_csvs")) + assert not os.path.exists(os.path.join(results_dir, "parquet")) for job_id in range(1, 6): json_filename = os.path.join(b.output_dir, f"job{job_id:03d}.json") - with open(json_filename, 'w') as f: + with open(json_filename, "w") as f: json.dump({}, f) out_filename = os.path.join(b.output_dir, f"job.out-{job_id}") with open(out_filename, "w") as f: - f.write('lots of output\ngoes\nhere\n') + f.write("lots of output\ngoes\nhere\n") f.write("batch complete\n") b.process_results() diff --git a/buildstockbatch/test/test_local.py b/buildstockbatch/test/test_local.py index 7778fe6b..14c0a682 100644 --- a/buildstockbatch/test/test_local.py +++ b/buildstockbatch/test/test_local.py @@ -9,15 +9,22 @@ from buildstockbatch.local import LocalBatch from buildstockbatch.utils import get_project_configuration -from buildstockbatch.test.shared_testing_stuff import resstock_directory, resstock_required - - -@pytest.mark.parametrize("project_filename", [ - resstock_directory / "project_national" / "national_baseline.yml", - resstock_directory / "project_national" / "national_upgrades.yml", - resstock_directory / "project_testing" / "testing_baseline.yml", - resstock_directory / "project_testing" / "testing_upgrades.yml", -], ids=lambda x: x.stem) +from buildstockbatch.test.shared_testing_stuff import ( + resstock_directory, + resstock_required, +) + + +@pytest.mark.parametrize( + "project_filename", + [ + resstock_directory / "project_national" / "national_baseline.yml", + resstock_directory / "project_national" / "national_upgrades.yml", + resstock_directory / "project_testing" / "testing_baseline.yml", + resstock_directory / "project_testing" / "testing_upgrades.yml", + ], + ids=lambda x: x.stem, +) @resstock_required def test_resstock_local_batch(project_filename): LocalBatch.validate_project(str(project_filename)) @@ -37,7 +44,11 @@ def test_resstock_local_batch(project_filename): n_datapoints = 2 batch.cfg["sampler"]["args"]["n_datapoints"] = n_datapoints - local_weather_file = resstock_directory.parent / "weather" / batch.cfg["weather_files_url"].split("/")[-1] + local_weather_file = ( + resstock_directory.parent + / "weather" + / batch.cfg["weather_files_url"].split("/")[-1] + ) if local_weather_file.exists(): del batch.cfg["weather_files_url"] batch.cfg["weather_files_path"] = str(local_weather_file) @@ -52,7 +63,12 @@ def test_resstock_local_batch(project_filename): for upgrade_id in range(0, n_upgrades + 1): for bldg_id in range(1, n_datapoints + 1): - assert (simout_path / "timeseries" / f"up{upgrade_id:02d}" / f"bldg{bldg_id:07d}.parquet").exists() + assert ( + simout_path + / "timeseries" + / f"up{upgrade_id:02d}" + / f"bldg{bldg_id:07d}.parquet" + ).exists() batch.process_results() @@ -67,9 +83,17 @@ def test_resstock_local_batch(project_filename): ts_pq_path = out_path / "parquet" / "timeseries" for upgrade_id in range(0, n_upgrades + 1): assert (ts_pq_path / f"upgrade={upgrade_id}" / "group0.parquet").exists() - assert (out_path / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz").exists() + assert ( + out_path / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz" + ).exists() if upgrade_id >= 1: - upg_pq = out_path / "parquet" / "upgrades" / f"upgrade={upgrade_id}" / f"results_up{upgrade_id:02d}.parquet" + upg_pq = ( + out_path + / "parquet" + / "upgrades" + / f"upgrade={upgrade_id}" + / f"results_up{upgrade_id:02d}.parquet" + ) assert upg_pq.exists() upg = pd.read_parquet(upg_pq, columns=["completed_status"]) assert (upg["completed_status"] == "Success").all() @@ -82,16 +106,17 @@ def test_resstock_local_batch(project_filename): @resstock_required def test_local_simulation_timeout(mocker): - def mocked_subprocess_run(run_cmd, **kwargs): assert "timeout" in kwargs.keys() raise subprocess.TimeoutExpired(run_cmd, kwargs["timeout"]) - mocker.patch('buildstockbatch.local.subprocess.run', mocked_subprocess_run) - sleep_mock = mocker.patch('buildstockbatch.local.time.sleep') + mocker.patch("buildstockbatch.local.subprocess.run", mocked_subprocess_run) + sleep_mock = mocker.patch("buildstockbatch.local.time.sleep") - cfg = get_project_configuration(resstock_directory / "project_national" / "national_baseline.yml") - cfg['max_minutes_per_sim'] = 5 + cfg = get_project_configuration( + resstock_directory / "project_national" / "national_baseline.yml" + ) + cfg["max_minutes_per_sim"] = 5 with tempfile.TemporaryDirectory() as tmpdir: LocalBatch.run_building( @@ -99,16 +124,16 @@ def mocked_subprocess_run(run_cmd, **kwargs): str(resstock_directory / "weather"), tmpdir, measures_only=False, - n_datapoints=cfg['sampler']['args']['n_datapoints'], + n_datapoints=cfg["sampler"]["args"]["n_datapoints"], cfg=cfg, - i=1 + i=1, ) - sim_path = pathlib.Path(tmpdir, 'simulation_output', 'up00', 'bldg0000001') + sim_path = pathlib.Path(tmpdir, "simulation_output", "up00", "bldg0000001") assert sim_path.is_dir() msg_re = re.compile(r"Terminated \w+ after reaching max time") - with open(sim_path / 'openstudio_output.log', 'r') as f: + with open(sim_path / "openstudio_output.log", "r") as f: os_output = f.read() assert msg_re.search(os_output) @@ -119,10 +144,12 @@ def mocked_subprocess_run(run_cmd, **kwargs): assert out_osw["completed_status"] == "Fail" assert msg_re.search(out_osw["timeout"]) - err_log_re = re.compile(r"\[\d\d:\d\d:\d\d ERROR\] Terminated \w+ after reaching max time") - with open(sim_path / 'run' / 'run.log', 'r') as run_log: + err_log_re = re.compile( + r"\[\d\d:\d\d:\d\d ERROR\] Terminated \w+ after reaching max time" + ) + with open(sim_path / "run" / "run.log", "r") as run_log: err_log_re.search(run_log.read()) - with open(sim_path / 'run' / 'failed.job', 'r') as failed_job: + with open(sim_path / "run" / "failed.job", "r") as failed_job: err_log_re.search(failed_job.read()) sleep_mock.assert_called_once_with(20) diff --git a/buildstockbatch/test/test_postprocessing.py b/buildstockbatch/test/test_postprocessing.py index 7e6e2b6e..d11fdb86 100644 --- a/buildstockbatch/test/test_postprocessing.py +++ b/buildstockbatch/test/test_postprocessing.py @@ -18,38 +18,37 @@ def test_report_additional_results_csv_columns(basic_residential_project_file): - reporting_measures = [ - 'ReportingMeasure1', - 'ReportingMeasure2' - ] - project_filename, results_dir = basic_residential_project_file({ - 'reporting_measures': reporting_measures - }) + reporting_measures = ["ReportingMeasure1", "ReportingMeasure2"] + project_filename, results_dir = basic_residential_project_file( + {"reporting_measures": reporting_measures} + ) fs = LocalFileSystem() results_dir = pathlib.Path(results_dir) - sim_out_dir = results_dir / 'simulation_output' - with tarfile.open(sim_out_dir / 'simulations_job0.tar.gz', 'r') as tarf: + sim_out_dir = results_dir / "simulation_output" + with tarfile.open(sim_out_dir / "simulations_job0.tar.gz", "r") as tarf: tarf.extractall(sim_out_dir) dpouts2 = [] - for filename in sim_out_dir.rglob('data_point_out.json'): - with filename.open('rt', encoding='utf-8') as f: + for filename in sim_out_dir.rglob("data_point_out.json"): + with filename.open("rt", encoding="utf-8") as f: dpout = json.load(f) - dpout['ReportingMeasure1'] = {'column_1': 1, 'column_2': 2} - dpout['ReportingMeasure2'] = {'column_3': 3, 'column_4': 4} - with filename.open('wt', encoding='utf-8') as f: + dpout["ReportingMeasure1"] = {"column_1": 1, "column_2": 2} + dpout["ReportingMeasure2"] = {"column_3": 3, "column_4": 4} + with filename.open("wt", encoding="utf-8") as f: json.dump(dpout, f) sim_dir = str(filename.parent.parent) - upgrade_id = int(re.search(r'up(\d+)', sim_dir).group(1)) - building_id = int(re.search(r'bldg(\d+)', sim_dir).group(1)) + upgrade_id = int(re.search(r"up(\d+)", sim_dir).group(1)) + building_id = int(re.search(r"bldg(\d+)", sim_dir).group(1)) dpouts2.append( - postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, building_id) + postprocessing.read_simulation_outputs( + fs, reporting_measures, sim_dir, upgrade_id, building_id + ) ) - with gzip.open(sim_out_dir / 'results_job0.json.gz', 'wt', encoding='utf-8') as f: + with gzip.open(sim_out_dir / "results_job0.json.gz", "wt", encoding="utf-8") as f: json.dump(dpouts2, f) cfg = get_project_configuration(project_filename) @@ -57,11 +56,13 @@ def test_report_additional_results_csv_columns(basic_residential_project_file): postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False) for upgrade_id in (0, 1): - df = read_csv(str(results_dir / 'results_csvs' / f'results_up{upgrade_id:02d}.csv.gz')) - assert (df['reporting_measure1.column_1'] == 1).all() - assert (df['reporting_measure1.column_2'] == 2).all() - assert (df['reporting_measure2.column_3'] == 3).all() - assert (df['reporting_measure2.column_4'] == 4).all() + df = read_csv( + str(results_dir / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz") + ) + assert (df["reporting_measure1.column_1"] == 1).all() + assert (df["reporting_measure1.column_2"] == 2).all() + assert (df["reporting_measure2.column_3"] == 3).all() + assert (df["reporting_measure2.column_4"] == 4).all() def test_empty_results_assertion(basic_residential_project_file, capsys): @@ -69,11 +70,13 @@ def test_empty_results_assertion(basic_residential_project_file, capsys): fs = LocalFileSystem() results_dir = pathlib.Path(results_dir) - sim_out_dir = results_dir / 'simulation_output' + sim_out_dir = results_dir / "simulation_output" shutil.rmtree(sim_out_dir) # no results cfg = get_project_configuration(project_filename) - with pytest.raises(ValueError, match=r'No simulation results found to post-process'): + with pytest.raises( + ValueError, match=r"No simulation results found to post-process" + ): assert postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False) @@ -83,51 +86,54 @@ def test_large_parquet_combine(basic_residential_project_file): project_filename, results_dir = basic_residential_project_file() - with patch.object(BuildStockBatchBase, 'weather_dir', None), \ - patch.object(BuildStockBatchBase, 'get_dask_client'), \ - patch.object(BuildStockBatchBase, 'results_dir', results_dir),\ - patch.object(postprocessing, 'MAX_PARQUET_MEMORY', 1): # set the max memory to just 1MB + with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( + BuildStockBatchBase, "get_dask_client" + ), patch.object(BuildStockBatchBase, "results_dir", results_dir), patch.object( + postprocessing, "MAX_PARQUET_MEMORY", 1 + ): # set the max memory to just 1MB bsb = BuildStockBatchBase(project_filename) bsb.process_results() # this would raise exception if the postprocessing could not handle the situation -@pytest.mark.parametrize('keep_individual_timeseries', [True, False]) -def test_keep_individual_timeseries(keep_individual_timeseries, basic_residential_project_file, mocker): - project_filename, results_dir = basic_residential_project_file({ - 'postprocessing': { - 'keep_individual_timeseries': keep_individual_timeseries - } - }) +@pytest.mark.parametrize("keep_individual_timeseries", [True, False]) +def test_keep_individual_timeseries( + keep_individual_timeseries, basic_residential_project_file, mocker +): + project_filename, results_dir = basic_residential_project_file( + {"postprocessing": {"keep_individual_timeseries": keep_individual_timeseries}} + ) - mocker.patch.object(BuildStockBatchBase, 'weather_dir', None) - mocker.patch.object(BuildStockBatchBase, 'get_dask_client') - mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir) + mocker.patch.object(BuildStockBatchBase, "weather_dir", None) + mocker.patch.object(BuildStockBatchBase, "get_dask_client") + mocker.patch.object(BuildStockBatchBase, "results_dir", results_dir) bsb = BuildStockBatchBase(project_filename) bsb.process_results() results_path = pathlib.Path(results_dir) - simout_path = results_path / 'simulation_output' - assert len(list(simout_path.glob('results_job*.json.gz'))) == 0 + simout_path = results_path / "simulation_output" + assert len(list(simout_path.glob("results_job*.json.gz"))) == 0 - ts_path = simout_path / 'timeseries' + ts_path = simout_path / "timeseries" assert ts_path.exists() == keep_individual_timeseries def test_upgrade_missing_ts(basic_residential_project_file, mocker, caplog): - caplog.set_level(logging.WARNING, logger='buildstockbatch.postprocessing') + caplog.set_level(logging.WARNING, logger="buildstockbatch.postprocessing") project_filename, results_dir = basic_residential_project_file() results_path = pathlib.Path(results_dir) - for filename in (results_path / 'simulation_output' / 'timeseries' / 'up01').glob('*.parquet'): + for filename in (results_path / "simulation_output" / "timeseries" / "up01").glob( + "*.parquet" + ): os.remove(filename) - mocker.patch.object(BuildStockBatchBase, 'weather_dir', None) - mocker.patch.object(BuildStockBatchBase, 'get_dask_client') - mocker.patch.object(BuildStockBatchBase, 'results_dir', results_dir) + mocker.patch.object(BuildStockBatchBase, "weather_dir", None) + mocker.patch.object(BuildStockBatchBase, "get_dask_client") + mocker.patch.object(BuildStockBatchBase, "results_dir", results_dir) bsb = BuildStockBatchBase(project_filename) bsb.process_results() assert len(caplog.records) == 1 record = caplog.records[0] - assert record.levelname == 'WARNING' - assert record.message == 'There are no timeseries files for upgrade1.' + assert record.levelname == "WARNING" + assert record.message == "There are no timeseries files for upgrade1." diff --git a/buildstockbatch/test/test_utils.py b/buildstockbatch/test/test_utils.py index 62c5d215..096d5f81 100644 --- a/buildstockbatch/test/test_utils.py +++ b/buildstockbatch/test/test_utils.py @@ -5,21 +5,32 @@ def test_str_repr(): - test_obj = [{1, 2, 3, 4, 5, 6}, {"List1": ["Item1", ('a', 'b', 'c', 'd'), "item3"], - "long_name_list": ["long_name_one_two_three", "long_name"], - "dict": {"key1": ["List_item1", "List_item2", "List_item3"], "Key2": "value2", - "key3": "value3", "key4": "val4"}}] + test_obj = [ + {1, 2, 3, 4, 5, 6}, + { + "List1": ["Item1", ("a", "b", "c", "d"), "item3"], + "long_name_list": ["long_name_one_two_three", "long_name"], + "dict": { + "key1": ["List_item1", "List_item2", "List_item3"], + "Key2": "value2", + "key3": "value3", + "key4": "val4", + }, + }, + ] gen_repr = _str_repr(test_obj, list_max=2, dict_max=3, string_max=10) - true_repr = "[{'1','2','3' ...6},{'List1': ['Item1',('a','b' ...4) ...3],'long_...14..._list': ['long_...23..."\ - "three','long_name'],'dict': {'key1': ['List_item1','List_item2' ...3],'Key2': 'value2',"\ - "'key3': 'value3' ...4}}]" + true_repr = ( + "[{'1','2','3' ...6},{'List1': ['Item1',('a','b' ...4) ...3],'long_...14..._list': ['long_...23..." + "three','long_name'],'dict': {'key1': ['List_item1','List_item2' ...3],'Key2': 'value2'," + "'key3': 'value3' ...4}}]" + ) assert true_repr == gen_repr def test_get_error_details(): - tf = tempfile.NamedTemporaryFile('w+', delete=False) + tf = tempfile.NamedTemporaryFile("w+", delete=False) tf.close() @log_error_details(tf.name) @@ -40,7 +51,7 @@ def failing_function2(arg2): failing_function1("my_arg1") assert "actual dummy exception" in str(ex_info.value) - with open(tf.name, 'r') as f: + with open(tf.name, "r") as f: error_log = f.read() assert "'arg1':'my_arg1'" in error_log assert "'level_1_string':'string1_my_arg1'" in error_log diff --git a/buildstockbatch/test/test_validation.py b/buildstockbatch/test/test_validation.py index 70025ddf..dfcaa693 100644 --- a/buildstockbatch/test/test_validation.py +++ b/buildstockbatch/test/test_validation.py @@ -20,7 +20,10 @@ from buildstockbatch.eagle import EagleBatch from buildstockbatch.local import LocalBatch from buildstockbatch.base import BuildStockBatchBase, ValidationError -from buildstockbatch.test.shared_testing_stuff import resstock_directory, resstock_required +from buildstockbatch.test.shared_testing_stuff import ( + resstock_directory, + resstock_required, +) from buildstockbatch.utils import get_project_configuration from unittest.mock import patch from testfixtures import LogCapture @@ -29,12 +32,14 @@ import yaml here = os.path.dirname(os.path.abspath(__file__)) -example_yml_dir = os.path.join(here, 'test_inputs') -resources_dir = os.path.join(here, 'test_inputs', 'test_openstudio_buildstock', 'resources') +example_yml_dir = os.path.join(here, "test_inputs") +resources_dir = os.path.join( + here, "test_inputs", "test_openstudio_buildstock", "resources" +) def filter_logs(logs, level): - filtered_logs = '' + filtered_logs = "" for record in logs.records: if record.levelname == level: filtered_logs += record.msg @@ -58,31 +63,41 @@ def test_local_docker_validation_is_classmethod(): def test_complete_schema_passes_validation(): - assert BuildStockBatchBase.validate_project_schema(os.path.join(example_yml_dir, 'complete-schema.yml')) + assert BuildStockBatchBase.validate_project_schema( + os.path.join(example_yml_dir, "complete-schema.yml") + ) def test_minimal_schema_passes_validation(): - assert BuildStockBatchBase.validate_project_schema(os.path.join(example_yml_dir, 'minimal-schema.yml')) - - -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'missing-required-schema.yml'), - os.path.join(example_yml_dir, 'missing-nested-required-schema.yml') -]) + assert BuildStockBatchBase.validate_project_schema( + os.path.join(example_yml_dir, "minimal-schema.yml") + ) + + +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "missing-required-schema.yml"), + os.path.join(example_yml_dir, "missing-nested-required-schema.yml"), + ], +) def test_missing_required_key_fails(project_file): # patch the validate_options_lookup function to always return true for this case - with patch.object(BuildStockBatchBase, 'validate_options_lookup', lambda _: True): + with patch.object(BuildStockBatchBase, "validate_options_lookup", lambda _: True): with pytest.raises(ValueError): BuildStockBatchBase.validate_project_schema(project_file) -@pytest.mark.parametrize("project_file,expected", [ - (os.path.join(example_yml_dir, 'enforce-schema-xor.yml'), ValidationError), - (os.path.join(example_yml_dir, 'enforce-schema-xor-and-passes.yml'), True), -]) +@pytest.mark.parametrize( + "project_file,expected", + [ + (os.path.join(example_yml_dir, "enforce-schema-xor.yml"), ValidationError), + (os.path.join(example_yml_dir, "enforce-schema-xor-and-passes.yml"), True), + ], +) def test_xor_violations_fail(project_file, expected): # patch the validate_options_lookup function to always return true for this case - with patch.object(BuildStockBatchBase, 'validate_options_lookup', lambda _: True): + with patch.object(BuildStockBatchBase, "validate_options_lookup", lambda _: True): if expected is not True: with pytest.raises(expected): BuildStockBatchBase.validate_xor_nor_schema_keys(project_file) @@ -90,21 +105,45 @@ def test_xor_violations_fail(project_file, expected): assert BuildStockBatchBase.validate_xor_nor_schema_keys(project_file) -@pytest.mark.parametrize("project_file, base_expected, eagle_expected", [ - (os.path.join(example_yml_dir, 'missing-required-schema.yml'), ValueError, ValueError), - (os.path.join(example_yml_dir, 'missing-nested-required-schema.yml'), ValueError, ValueError), - (os.path.join(example_yml_dir, 'enforce-schema-xor.yml'), ValidationError, ValidationError), - (os.path.join(example_yml_dir, 'complete-schema.yml'), True, True), - (os.path.join(example_yml_dir, 'minimal-schema.yml'), True, ValidationError) -]) +@pytest.mark.parametrize( + "project_file, base_expected, eagle_expected", + [ + ( + os.path.join(example_yml_dir, "missing-required-schema.yml"), + ValueError, + ValueError, + ), + ( + os.path.join(example_yml_dir, "missing-nested-required-schema.yml"), + ValueError, + ValueError, + ), + ( + os.path.join(example_yml_dir, "enforce-schema-xor.yml"), + ValidationError, + ValidationError, + ), + (os.path.join(example_yml_dir, "complete-schema.yml"), True, True), + (os.path.join(example_yml_dir, "minimal-schema.yml"), True, ValidationError), + ], +) def test_validation_integration(project_file, base_expected, eagle_expected): # patch the validate_options_lookup function to always return true for this case - with patch.object(BuildStockBatchBase, 'validate_options_lookup', lambda _: True), \ - patch.object(BuildStockBatchBase, 'validate_measure_references', lambda _: True), \ - patch.object(BuildStockBatchBase, 'validate_workflow_generator', lambda _: True), \ - patch.object(BuildStockBatchBase, 'validate_postprocessing_spec', lambda _: True), \ - patch.object(EagleBatch, 'validate_singularity_image_eagle', lambda _: True): - for cls, expected in [(BuildStockBatchBase, base_expected), (EagleBatch, eagle_expected)]: + with patch.object( + BuildStockBatchBase, "validate_options_lookup", lambda _: True + ), patch.object( + BuildStockBatchBase, "validate_measure_references", lambda _: True + ), patch.object( + BuildStockBatchBase, "validate_workflow_generator", lambda _: True + ), patch.object( + BuildStockBatchBase, "validate_postprocessing_spec", lambda _: True + ), patch.object( + EagleBatch, "validate_singularity_image_eagle", lambda _: True + ): + for cls, expected in [ + (BuildStockBatchBase, base_expected), + (EagleBatch, eagle_expected), + ]: if expected is not True: with pytest.raises(expected): cls.validate_project(project_file) @@ -112,80 +151,100 @@ def test_validation_integration(project_file, base_expected, eagle_expected): assert cls.validate_project(project_file) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-measures-bad-2.yml') -]) +@pytest.mark.parametrize( + "project_file", + [os.path.join(example_yml_dir, "enforce-validate-measures-bad-2.yml")], +) def test_bad_reference_scenario(project_file): - with LogCapture(level=logging.INFO) as logs: BuildStockBatchBase.validate_reference_scenario(project_file) - warning_logs = filter_logs(logs, 'WARNING') + warning_logs = filter_logs(logs, "WARNING") assert "non-existing upgrade' does not match " in warning_logs -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-measures-good-2.yml') -]) +@pytest.mark.parametrize( + "project_file", + [os.path.join(example_yml_dir, "enforce-validate-measures-good-2.yml")], +) def test_good_reference_scenario(project_file): with LogCapture(level=logging.INFO) as logs: assert BuildStockBatchBase.validate_reference_scenario(project_file) - warning_logs = filter_logs(logs, 'WARNING') - error_logs = filter_logs(logs, 'ERROR') - assert warning_logs == '' - assert error_logs == '' + warning_logs = filter_logs(logs, "WARNING") + error_logs = filter_logs(logs, "ERROR") + assert warning_logs == "" + assert error_logs == "" -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-measures-bad-2.yml') -]) +@pytest.mark.parametrize( + "project_file", + [os.path.join(example_yml_dir, "enforce-validate-measures-bad-2.yml")], +) def test_bad_measures(project_file): - with LogCapture(level=logging.INFO) as _: try: BuildStockBatchBase.validate_workflow_generator(project_file) except (ValidationError, YamaleError) as er: er = str(er) assert "'1.5' is not a int" in er - assert "'huorly' not in ('none', 'timestep', 'hourly', 'daily', 'monthly')" in er + assert ( + "'huorly' not in ('none', 'timestep', 'hourly', 'daily', 'monthly')" + in er + ) else: - raise Exception("measures_and_arguments was supposed to raise ValidationError for" - " enforce-validate-measures-bad.yml") - - -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-measures-good-2.yml'), - os.path.join(example_yml_dir, 'enforce-validate-measures-good-2-with-anchors.yml') -]) + raise Exception( + "measures_and_arguments was supposed to raise ValidationError for" + " enforce-validate-measures-bad.yml" + ) + + +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-measures-good-2.yml"), + os.path.join( + example_yml_dir, "enforce-validate-measures-good-2-with-anchors.yml" + ), + ], +) def test_good_measures(project_file): with LogCapture(level=logging.INFO) as logs: assert BuildStockBatchBase.validate_workflow_generator(project_file) - warning_logs = filter_logs(logs, 'WARNING') - error_logs = filter_logs(logs, 'ERROR') - assert warning_logs == '' - assert error_logs == '' - - -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-options-wrong-path.yml'), -]) + warning_logs = filter_logs(logs, "WARNING") + error_logs = filter_logs(logs, "ERROR") + assert warning_logs == "" + assert error_logs == "" + + +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-options-wrong-path.yml"), + ], +) def test_bad_path_options_validation(project_file): with pytest.raises(FileNotFoundError): BuildStockBatchBase.validate_options_lookup(project_file) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-options-good.yml'), - os.path.join(example_yml_dir, 'enforce-validate-options-good-2.yml'), -]) +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-options-good.yml"), + os.path.join(example_yml_dir, "enforce-validate-options-good-2.yml"), + ], +) def test_good_options_validation(project_file): assert BuildStockBatchBase.validate_options_lookup(project_file) assert BuildStockBatchBase.validate_postprocessing_spec(project_file) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-options-bad.yml'), - os.path.join(example_yml_dir, 'enforce-validate-options-bad-2.yml'), -]) +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-options-bad.yml"), + os.path.join(example_yml_dir, "enforce-validate-options-bad-2.yml"), + ], +) def test_bad_options_validation(project_file): try: BuildStockBatchBase.validate_options_lookup(project_file) @@ -209,19 +268,27 @@ def test_bad_options_validation(project_file): assert "Floor Insulation: '*' cannot be mixed with other options" in er else: - raise Exception("validate_options was supposed to raise ValueError for enforce-validate-options-bad.yml") + raise Exception( + "validate_options was supposed to raise ValueError for enforce-validate-options-bad.yml" + ) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-measures-good.yml'), -]) +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-measures-good.yml"), + ], +) def test_good_measures_validation(project_file): assert BuildStockBatchBase.validate_measure_references(project_file) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-measures-bad.yml'), -]) +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-measures-bad.yml"), + ], +) def test_bad_measures_validation(project_file): try: BuildStockBatchBase.validate_measure_references(project_file) @@ -233,13 +300,18 @@ def test_bad_measures_validation(project_file): assert "ResidentialConstructionsFinishedBasement" in er else: - raise Exception("validate_measure_references was supposed to raise ValueError for " - "enforce-validate-measures-bad.yml") - - -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-options-bad-2.yml'), -]) + raise Exception( + "validate_measure_references was supposed to raise ValueError for " + "enforce-validate-measures-bad.yml" + ) + + +@pytest.mark.parametrize( + "project_file", + [ + os.path.join(example_yml_dir, "enforce-validate-options-bad-2.yml"), + ], +) def test_bad_postprocessing_spec_validation(project_file): try: BuildStockBatchBase.validate_postprocessing_spec(project_file) @@ -247,12 +319,14 @@ def test_bad_postprocessing_spec_validation(project_file): er = str(er) assert "bad_partition_column" in er else: - raise Exception("validate_options was supposed to raise ValidationError for enforce-validate-options-bad-2.yml") + raise Exception( + "validate_options was supposed to raise ValidationError for enforce-validate-options-bad-2.yml" + ) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-options-good.yml') -]) +@pytest.mark.parametrize( + "project_file", [os.path.join(example_yml_dir, "enforce-validate-options-good.yml")] +) def test_logic_validation_fail(project_file): try: BuildStockBatchBase.validate_logic(project_file) @@ -262,12 +336,15 @@ def test_logic_validation_fail(project_file): assert "'Vintage' occurs 2 times in a 'and' block" in er assert "'Vintage' occurs 2 times in a '&&' block" in er else: - raise Exception("validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml") + raise Exception( + "validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml" + ) -@pytest.mark.parametrize("project_file", [ - os.path.join(example_yml_dir, 'enforce-validate-options-all-good.yml') -]) +@pytest.mark.parametrize( + "project_file", + [os.path.join(example_yml_dir, "enforce-validate-options-all-good.yml")], +) def test_logic_validation_pass(project_file): BuildStockBatchBase.validate_logic(project_file) @@ -277,7 +354,9 @@ def test_number_of_options_apply_upgrade(): proj_filename = resstock_directory / "project_national" / "national_upgrades.yml" cfg = get_project_configuration(str(proj_filename)) cfg["upgrades"][-1]["options"] = cfg["upgrades"][-1]["options"] * 10 - cfg["upgrades"][0]["options"][0]["costs"] = cfg["upgrades"][0]["options"][0]["costs"] * 5 + cfg["upgrades"][0]["options"][0]["costs"] = ( + cfg["upgrades"][0]["options"][0]["costs"] * 5 + ) with tempfile.TemporaryDirectory() as tmpdir: tmppath = pathlib.Path(tmpdir) new_proj_filename = tmppath / "project.yml" @@ -297,68 +376,93 @@ def test_validate_resstock_or_comstock_version(mocker): def test_validate_eagle_output_directory(): - minimal_yml = pathlib.Path(example_yml_dir, 'minimal-schema.yml') + minimal_yml = pathlib.Path(example_yml_dir, "minimal-schema.yml") with pytest.raises(ValidationError, match=r"must be in /scratch or /projects"): EagleBatch.validate_output_directory_eagle(str(minimal_yml)) with tempfile.TemporaryDirectory() as tmpdir: dirs_to_try = [ - '/scratch/username/out_dir', - '/projects/projname/out_dir', - '/lustre/eaglefs/scratch/username/out_dir', - '/lustre/eaglefs/projects/projname/out_dir' + "/scratch/username/out_dir", + "/projects/projname/out_dir", + "/lustre/eaglefs/scratch/username/out_dir", + "/lustre/eaglefs/projects/projname/out_dir", ] for output_directory in dirs_to_try: - with open(minimal_yml, 'r') as f: + with open(minimal_yml, "r") as f: cfg = yaml.load(f, Loader=yaml.SafeLoader) - cfg['output_directory'] = output_directory - temp_yml = pathlib.Path(tmpdir, 'temp.yml') - with open(temp_yml, 'w') as f: + cfg["output_directory"] = output_directory + temp_yml = pathlib.Path(tmpdir, "temp.yml") + with open(temp_yml, "w") as f: yaml.dump(cfg, f, Dumper=yaml.SafeDumper) EagleBatch.validate_output_directory_eagle(str(temp_yml)) def test_validate_singularity_image_eagle(mocker, basic_residential_project_file): - minimal_yml = pathlib.Path(example_yml_dir, 'minimal-schema.yml') + minimal_yml = pathlib.Path(example_yml_dir, "minimal-schema.yml") with tempfile.TemporaryDirectory() as tmpdir: - with open(minimal_yml, 'r') as f: + with open(minimal_yml, "r") as f: cfg = yaml.load(f, Loader=yaml.SafeLoader) - cfg['sys_image_dir'] = tmpdir - temp_yml = pathlib.Path(tmpdir, 'temp.yml') - with open(temp_yml, 'w') as f: + cfg["sys_image_dir"] = tmpdir + temp_yml = pathlib.Path(tmpdir, "temp.yml") + with open(temp_yml, "w") as f: yaml.dump(cfg, f, Dumper=yaml.SafeDumper) with pytest.raises(ValidationError, match=r"image does not exist"): EagleBatch.validate_singularity_image_eagle(str(temp_yml)) def test_validate_sampler_good_buildstock(basic_residential_project_file): - project_filename, _ = basic_residential_project_file({ - 'sampler': { - 'type': 'precomputed', - 'args': { - 'sample_file': str(os.path.join(resources_dir, 'buildstock_good.csv')) + project_filename, _ = basic_residential_project_file( + { + "sampler": { + "type": "precomputed", + "args": { + "sample_file": str( + os.path.join(resources_dir, "buildstock_good.csv") + ) + }, } } - }) + ) assert BuildStockBatchBase.validate_sampler(project_filename) def test_validate_sampler_bad_buildstock(basic_residential_project_file): - project_filename, _ = basic_residential_project_file({ - 'sampler': { - 'type': 'precomputed', - 'args': { - 'sample_file': str(os.path.join(resources_dir, 'buildstock_bad.csv')) + project_filename, _ = basic_residential_project_file( + { + "sampler": { + "type": "precomputed", + "args": { + "sample_file": str( + os.path.join(resources_dir, "buildstock_bad.csv") + ) + }, } } - }) + ) try: BuildStockBatchBase.validate_sampler(project_filename) except ValidationError as er: er = str(er) - assert 'Option 1940-1950 in column Vintage of buildstock_csv is not available in options_lookup.tsv' in er - assert 'Option TX in column State of buildstock_csv is not available in options_lookup.tsv' in er - assert 'Option nan in column Insulation Wall of buildstock_csv is not available in options_lookup.tsv' in er - assert 'Column Insulation in buildstock_csv is not available in options_lookup.tsv' in er - assert 'Column ZipPlusCode in buildstock_csv is not available in options_lookup.tsv' in er + assert ( + "Option 1940-1950 in column Vintage of buildstock_csv is not available in options_lookup.tsv" + in er + ) + assert ( + "Option TX in column State of buildstock_csv is not available in options_lookup.tsv" + in er + ) + assert ( + "Option nan in column Insulation Wall of buildstock_csv is not available in options_lookup.tsv" + in er + ) + assert ( + "Column Insulation in buildstock_csv is not available in options_lookup.tsv" + in er + ) + assert ( + "Column ZipPlusCode in buildstock_csv is not available in options_lookup.tsv" + in er + ) else: - raise Exception("validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml") + raise Exception( + "validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml" + ) diff --git a/buildstockbatch/utils.py b/buildstockbatch/utils.py index ca741fa5..848ceb79 100644 --- a/buildstockbatch/utils.py +++ b/buildstockbatch/utils.py @@ -18,7 +18,12 @@ class ContainerRuntime(enum.Enum): def read_csv(csv_file_path, **kwargs) -> pd.DataFrame: default_na_values = pd._libs.parsers.STR_NA_VALUES - df = pd.read_csv(csv_file_path, na_values=list(default_na_values - {"None", "NA"}), keep_default_na=False, **kwargs) + df = pd.read_csv( + csv_file_path, + na_values=list(default_na_values - {"None", "NA"}), + keep_default_na=False, + **kwargs, + ) return df @@ -34,16 +39,20 @@ def get_project_configuration(project_file): with open(project_file) as f: cfg = yaml.load(f, Loader=yaml.SafeLoader) except FileNotFoundError as err: - logger.error('Failed to load input yaml for validation') + logger.error("Failed to load input yaml for validation") raise err # Set absolute paths - cfg['buildstock_directory'] = path_rel_to_file(project_file, cfg['buildstock_directory']) + cfg["buildstock_directory"] = path_rel_to_file( + project_file, cfg["buildstock_directory"] + ) # if 'precomputed_sample' in cfg.get('baseline', {}): # cfg['baseline']['precomputed_sample'] = \ # path_rel_to_file(project_file, cfg['baseline']['precomputed_sample']) - if 'weather_files_path' in cfg: - cfg['weather_files_path'] = path_rel_to_file(project_file, cfg['weather_files_path']) + if "weather_files_path" in cfg: + cfg["weather_files_path"] = path_rel_to_file( + project_file, cfg["weather_files_path"] + ) return cfg @@ -57,28 +66,48 @@ def _str_repr(obj, list_max=20, dict_max=20, string_max=100): elif type(obj) in [int, float]: return _str_repr(str(obj), list_max, dict_max, string_max) elif type(obj) is list: - txt = "[" + ",".join([_str_repr(item, list_max, dict_max, string_max) for item in obj[0:list_max]]) + txt = "[" + ",".join( + [ + _str_repr(item, list_max, dict_max, string_max) + for item in obj[0:list_max] + ] + ) if len(obj) > list_max: txt += f" ...{len(obj)}" txt += "]" return txt elif type(obj) is tuple: - txt = "(" + ",".join([_str_repr(item, list_max, dict_max, string_max) for item in obj[0:list_max]]) + txt = "(" + ",".join( + [ + _str_repr(item, list_max, dict_max, string_max) + for item in obj[0:list_max] + ] + ) if len(obj) > list_max: txt += f" ...{len(obj)}" txt += ")" return txt elif type(obj) is set: obj = list(obj) - txt = "{" + ",".join([_str_repr(item, list_max, dict_max, string_max) for item in obj[0:dict_max]]) + txt = "{" + ",".join( + [ + _str_repr(item, list_max, dict_max, string_max) + for item in obj[0:dict_max] + ] + ) if len(obj) > dict_max: txt += f" ...{len(obj)}" txt += "}" return txt elif type(obj) is dict: keys = list(obj.keys()) - txt = "{" + ",".join([f"{_str_repr(key, list_max, dict_max, string_max)}:" - f" {_str_repr(obj[key], list_max, dict_max, string_max)}" for key in keys[0:dict_max]]) + txt = "{" + ",".join( + [ + f"{_str_repr(key, list_max, dict_max, string_max)}:" + f" {_str_repr(obj[key], list_max, dict_max, string_max)}" + for key in keys[0:dict_max] + ] + ) if len(keys) > dict_max: txt += f" ...{len(keys)}" txt += "}" @@ -92,7 +121,7 @@ def get_error_details(): text += traceback.format_exc() frames = inspect.trace() for frame in frames: - text += f'\nIn file: {frame[1]}, module {str(frame[3])} line: {frame[2]} \n' + text += f"\nIn file: {frame[1]}, module {str(frame[3])} line: {frame[2]} \n" text += "Local Variables: " for var, value in frame[0].f_locals.items(): text += _str_repr(var) + ":" + _str_repr(value) @@ -111,6 +140,7 @@ def run_with_error_capture(*args, **kwargs): text += get_error_details() f.write(text) raise + return run_with_error_capture return log_error_decorator diff --git a/buildstockbatch/workflow_generator/__init__.py b/buildstockbatch/workflow_generator/__init__.py index 7f1c991f..0e40b889 100644 --- a/buildstockbatch/workflow_generator/__init__.py +++ b/buildstockbatch/workflow_generator/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from .commercial import CommercialDefaultWorkflowGenerator # noqa F041 -from .residential_hpxml import ResidentialHpxmlWorkflowGenerator # noqa F041 \ No newline at end of file +from .residential_hpxml import ResidentialHpxmlWorkflowGenerator # noqa F041 diff --git a/buildstockbatch/workflow_generator/base.py b/buildstockbatch/workflow_generator/base.py index dc999c42..344e7e44 100644 --- a/buildstockbatch/workflow_generator/base.py +++ b/buildstockbatch/workflow_generator/base.py @@ -16,7 +16,6 @@ class WorkflowGeneratorBase(object): - def __init__(self, cfg, n_datapoints): self.cfg = cfg self.n_datapoints = n_datapoints @@ -42,17 +41,17 @@ def make_apply_logic_arg(cls, logic): :returns: str of logic """ if isinstance(logic, dict): - assert (len(logic) == 1) + assert len(logic) == 1 key = list(logic.keys())[0] val = logic[key] - if key == 'and': + if key == "and": return cls.make_apply_logic_arg(val) - elif key == 'or': - return '(' + '||'.join(map(cls.make_apply_logic_arg, val)) + ')' - elif key == 'not': - return '!' + cls.make_apply_logic_arg(val) + elif key == "or": + return "(" + "||".join(map(cls.make_apply_logic_arg, val)) + ")" + elif key == "not": + return "!" + cls.make_apply_logic_arg(val) elif isinstance(logic, list): - return '(' + '&&'.join(map(cls.make_apply_logic_arg, logic)) + ')' + return "(" + "&&".join(map(cls.make_apply_logic_arg, logic)) + ")" elif isinstance(logic, str): return logic diff --git a/buildstockbatch/workflow_generator/commercial.py b/buildstockbatch/workflow_generator/commercial.py index 5f488c99..2fff78a2 100644 --- a/buildstockbatch/workflow_generator/commercial.py +++ b/buildstockbatch/workflow_generator/commercial.py @@ -30,7 +30,6 @@ def get_measure_xml(xml_path): class CommercialDefaultWorkflowGenerator(WorkflowGeneratorBase): - @classmethod def validate(cls, cfg): """Validate arguments @@ -47,28 +46,30 @@ def validate(cls, cfg): measure_dir_name: str(required=True) arguments: map(required=False) """ - workflow_generator_args = cfg['workflow_generator']['args'] - schema_yml = re.sub(r'^ {8}', '', schema_yml, flags=re.MULTILINE) - schema = yamale.make_schema(content=schema_yml, parser='ruamel') - data = yamale.make_data(content=json.dumps(workflow_generator_args), parser='ruamel') + workflow_generator_args = cfg["workflow_generator"]["args"] + schema_yml = re.sub(r"^ {8}", "", schema_yml, flags=re.MULTILINE) + schema = yamale.make_schema(content=schema_yml, parser="ruamel") + data = yamale.make_data( + content=json.dumps(workflow_generator_args), parser="ruamel" + ) return yamale.validate(schema, data, strict=True) def reporting_measures(self): """Return a list of reporting measures to include in outputs""" - workflow_args = self.cfg['workflow_generator'].get('args', {}) + workflow_args = self.cfg["workflow_generator"].get("args", {}) # reporting_measures needs to return the ClassName in measure.rb, but # measure_dir_name in ComStock doesn't always match the ClassName - buildstock_dir = self.cfg['buildstock_directory'] - measures_dir = os.path.join(buildstock_dir, 'measures') + buildstock_dir = self.cfg["buildstock_directory"] + measures_dir = os.path.join(buildstock_dir, "measures") measure_class_names = [] - for m in workflow_args.get('reporting_measures', []): - measure_dir_name = m['measure_dir_name'] + for m in workflow_args.get("reporting_measures", []): + measure_dir_name = m["measure_dir_name"] measure_path = os.path.join(measures_dir, measure_dir_name) - root = get_measure_xml(os.path.join(measure_path, 'measure.xml')) - measure_class_name = root.find('./class_name').text + root = get_measure_xml(os.path.join(measure_path, "measure.xml")) + measure_class_name = root.find("./class_name").text # Don't include OpenStudioResults, it has too many registerValues for ComStock - if measure_class_name == 'OpenStudioResults': + if measure_class_name == "OpenStudioResults": continue measure_class_names.append(measure_class_name) @@ -82,86 +83,95 @@ def create_osw(self, sim_id, building_id, upgrade_idx): :param building_id: integer building id to use from the sampled buildstock.csv :param upgrade_idx: integer index of the upgrade scenario to apply, None if baseline """ - logger.debug('Generating OSW, sim_id={}'.format(sim_id)) + logger.debug("Generating OSW, sim_id={}".format(sim_id)) - workflow_args = { - 'measures': [] - } - workflow_args.update(self.cfg['workflow_generator'].get('args', {})) + workflow_args = {"measures": []} + workflow_args.update(self.cfg["workflow_generator"].get("args", {})) osw = { - 'id': sim_id, - 'steps': [ + "id": sim_id, + "steps": [ { "measure_dir_name": "BuildExistingModel", "arguments": { "number_of_buildings_represented": 1, - "building_id": int(building_id) + "building_id": int(building_id), }, - "measure_type": "ModelMeasure" + "measure_type": "ModelMeasure", } ], - 'created_at': dt.datetime.now().isoformat(), - 'measure_paths': [ - 'measures' - ], - 'weather_file': 'weather/empty.epw' + "created_at": dt.datetime.now().isoformat(), + "measure_paths": ["measures"], + "weather_file": "weather/empty.epw", } # Baseline measures (not typically used in ComStock) - osw['steps'].extend(workflow_args['measures']) + osw["steps"].extend(workflow_args["measures"]) # Upgrades if upgrade_idx is not None: - measure_d = self.cfg['upgrades'][upgrade_idx] + measure_d = self.cfg["upgrades"][upgrade_idx] apply_upgrade_measure = { - 'measure_dir_name': 'ApplyUpgrade', - 'arguments': { - 'run_measure': 1 - } + "measure_dir_name": "ApplyUpgrade", + "arguments": {"run_measure": 1}, } - if 'upgrade_name' in measure_d: - apply_upgrade_measure['arguments']['upgrade_name'] = measure_d['upgrade_name'] - for opt_num, option in enumerate(measure_d['options'], 1): - apply_upgrade_measure['arguments']['option_{}'.format(opt_num)] = option['option'] - if 'lifetime' in option: - apply_upgrade_measure['arguments']['option_{}_lifetime'.format(opt_num)] = option['lifetime'] - if 'apply_logic' in option: - apply_upgrade_measure['arguments']['option_{}_apply_logic'.format(opt_num)] = \ - self.make_apply_logic_arg(option['apply_logic']) - for cost_num, cost in enumerate(option.get('costs', []), 1): - for arg in ('value', 'multiplier'): + if "upgrade_name" in measure_d: + apply_upgrade_measure["arguments"]["upgrade_name"] = measure_d[ + "upgrade_name" + ] + for opt_num, option in enumerate(measure_d["options"], 1): + apply_upgrade_measure["arguments"][ + "option_{}".format(opt_num) + ] = option["option"] + if "lifetime" in option: + apply_upgrade_measure["arguments"][ + "option_{}_lifetime".format(opt_num) + ] = option["lifetime"] + if "apply_logic" in option: + apply_upgrade_measure["arguments"][ + "option_{}_apply_logic".format(opt_num) + ] = self.make_apply_logic_arg(option["apply_logic"]) + for cost_num, cost in enumerate(option.get("costs", []), 1): + for arg in ("value", "multiplier"): if arg not in cost: continue - apply_upgrade_measure['arguments']['option_{}_cost_{}_{}'.format(opt_num, cost_num, arg)] = \ - cost[arg] - if 'package_apply_logic' in measure_d: - apply_upgrade_measure['arguments']['package_apply_logic'] = \ - self.make_apply_logic_arg(measure_d['package_apply_logic']) - - build_existing_model_idx = \ - list(map(lambda x: x['measure_dir_name'] == 'BuildExistingModel', osw['steps'])).index(True) - osw['steps'].insert(build_existing_model_idx + 1, apply_upgrade_measure) - - if 'timeseries_csv_export' in workflow_args: + apply_upgrade_measure["arguments"][ + "option_{}_cost_{}_{}".format(opt_num, cost_num, arg) + ] = cost[arg] + if "package_apply_logic" in measure_d: + apply_upgrade_measure["arguments"][ + "package_apply_logic" + ] = self.make_apply_logic_arg(measure_d["package_apply_logic"]) + + build_existing_model_idx = list( + map( + lambda x: x["measure_dir_name"] == "BuildExistingModel", + osw["steps"], + ) + ).index(True) + osw["steps"].insert(build_existing_model_idx + 1, apply_upgrade_measure) + + if "timeseries_csv_export" in workflow_args: timeseries_csv_export_args = { - 'reporting_frequency': 'Timestep', - 'inc_output_variables': False + "reporting_frequency": "Timestep", + "inc_output_variables": False, } - timeseries_csv_export_args.update(workflow_args['timeseries_csv_export']) - timeseries_measure = [{ - 'measure_dir_name': 'TimeseriesCSVExport', - 'arguments': timeseries_csv_export_args, - "measure_type": "ReportingMeasure" - }] - osw['steps'].extend(timeseries_measure) + timeseries_csv_export_args.update(workflow_args["timeseries_csv_export"]) + timeseries_measure = [ + { + "measure_dir_name": "TimeseriesCSVExport", + "arguments": timeseries_csv_export_args, + "measure_type": "ReportingMeasure", + } + ] + osw["steps"].extend(timeseries_measure) # User-specified reporting measures - if 'reporting_measures' in workflow_args: - for reporting_measure in workflow_args['reporting_measures']: - if 'arguments' not in reporting_measure: - reporting_measure['arguments'] = {} - reporting_measure['measure_type'] = 'ReportingMeasure' - osw['steps'].append(reporting_measure) + if "reporting_measures" in workflow_args: + for reporting_measure in workflow_args["reporting_measures"]: + if "arguments" not in reporting_measure: + reporting_measure["arguments"] = {} + reporting_measure["measure_type"] = "ReportingMeasure" + osw["steps"].append(reporting_measure) return osw diff --git a/buildstockbatch/workflow_generator/residential_hpxml.py b/buildstockbatch/workflow_generator/residential_hpxml.py index 077ed0d1..ee71b6a1 100644 --- a/buildstockbatch/workflow_generator/residential_hpxml.py +++ b/buildstockbatch/workflow_generator/residential_hpxml.py @@ -35,14 +35,13 @@ def get_measure_arguments(xml_path): arguments = [] if os.path.isfile(xml_path): root = get_measure_xml(xml_path) - for argument in root.findall('./arguments/argument'): - name = argument.find('./name').text + for argument in root.findall("./arguments/argument"): + name = argument.find("./name").text arguments.append(name) return arguments class ResidentialHpxmlWorkflowGenerator(WorkflowGeneratorBase): - @classmethod def validate(cls, cfg): """Validate arguments @@ -143,33 +142,36 @@ def validate(cls, cfg): retain_schedules_csv: bool(required=False) debug: bool(required=False) """ # noqa E501 - workflow_generator_args = cfg['workflow_generator']['args'] - schema_yml = re.sub(r'^ {8}', '', schema_yml, flags=re.MULTILINE) - schema = yamale.make_schema(content=schema_yml, parser='ruamel') - data = yamale.make_data(content=json.dumps(workflow_generator_args), parser='ruamel') + workflow_generator_args = cfg["workflow_generator"]["args"] + schema_yml = re.sub(r"^ {8}", "", schema_yml, flags=re.MULTILINE) + schema = yamale.make_schema(content=schema_yml, parser="ruamel") + data = yamale.make_data( + content=json.dumps(workflow_generator_args), parser="ruamel" + ) yamale.validate(schema, data, strict=True) return cls.validate_measures_and_arguments(cfg) def reporting_measures(self): """Return a list of reporting measures to include in outputs""" - workflow_args = self.cfg['workflow_generator'].get('args', {}) - return [x['measure_dir_name'] for x in workflow_args.get('reporting_measures', [])] + workflow_args = self.cfg["workflow_generator"].get("args", {}) + return [ + x["measure_dir_name"] for x in workflow_args.get("reporting_measures", []) + ] @staticmethod def validate_measures_and_arguments(cfg): - - buildstock_dir = cfg['buildstock_directory'] - measures_dir = os.path.join(buildstock_dir, 'measures') + buildstock_dir = cfg["buildstock_directory"] + measures_dir = os.path.join(buildstock_dir, "measures") measure_names = { - 'BuildExistingModel': 'baseline', - 'ApplyUpgrade': 'upgrades', + "BuildExistingModel": "baseline", + "ApplyUpgrade": "upgrades", } def cfg_path_exists(cfg_path): if cfg_path is None: return False - path_items = cfg_path.split('.') + path_items = cfg_path.split(".") a = cfg for path_item in path_items: try: @@ -181,7 +183,7 @@ def cfg_path_exists(cfg_path): def get_cfg_path(cfg_path): if cfg_path is None: return None - path_items = cfg_path.split('.') + path_items = cfg_path.split(".") a = cfg for path_item in path_items: try: @@ -190,13 +192,15 @@ def get_cfg_path(cfg_path): return None return a - workflow_args = cfg['workflow_generator'].get('args', {}) - if 'reporting_measures' in workflow_args.keys(): - for reporting_measure in workflow_args['reporting_measures']: - measure_names[reporting_measure['measure_dir_name']] = 'workflow_generator.args.reporting_measures' + workflow_args = cfg["workflow_generator"].get("args", {}) + if "reporting_measures" in workflow_args.keys(): + for reporting_measure in workflow_args["reporting_measures"]: + measure_names[ + reporting_measure["measure_dir_name"] + ] = "workflow_generator.args.reporting_measures" - error_msgs = '' - warning_msgs = '' + error_msgs = "" + warning_msgs = "" for measure_name, cfg_key in measure_names.items(): measure_path = os.path.join(measures_dir, measure_name) @@ -204,29 +208,31 @@ def get_cfg_path(cfg_path): if not cfg_path_exists(cfg_key): continue - if measure_name in ['ApplyUpgrade']: + if measure_name in ["ApplyUpgrade"]: # For ApplyUpgrade measure, verify that all the cost_multipliers used are correct - root = get_measure_xml(os.path.join(measure_path, 'measure.xml')) + root = get_measure_xml(os.path.join(measure_path, "measure.xml")) valid_multipliers = set() - for argument in root.findall('./arguments/argument'): - name = argument.find('./name') - if name.text.endswith('_multiplier'): - for choice in argument.findall('./choices/choice'): - value = choice.find('./value') - value = value.text if value is not None else '' + for argument in root.findall("./arguments/argument"): + name = argument.find("./name") + if name.text.endswith("_multiplier"): + for choice in argument.findall("./choices/choice"): + value = choice.find("./value") + value = value.text if value is not None else "" valid_multipliers.add(value) invalid_multipliers = Counter() - for upgrade_count, upgrade in enumerate(cfg['upgrades']): - for option_count, option in enumerate(upgrade['options']): - for cost_indx, cost_entry in enumerate(option.get('costs', [])): - if cost_entry['multiplier'] not in valid_multipliers: - invalid_multipliers[cost_entry['multiplier']] += 1 + for upgrade_count, upgrade in enumerate(cfg["upgrades"]): + for option_count, option in enumerate(upgrade["options"]): + for cost_indx, cost_entry in enumerate(option.get("costs", [])): + if cost_entry["multiplier"] not in valid_multipliers: + invalid_multipliers[cost_entry["multiplier"]] += 1 if invalid_multipliers: error_msgs += "* The following multipliers values are invalid: \n" for multiplier, count in invalid_multipliers.items(): error_msgs += f" '{multiplier}' - Used {count} times \n" - error_msgs += f" The list of valid multipliers are {valid_multipliers}.\n" + error_msgs += ( + f" The list of valid multipliers are {valid_multipliers}.\n" + ) if warning_msgs: logger.warning(warning_msgs) @@ -247,304 +253,330 @@ def create_osw(self, sim_id, building_id, upgrade_idx): """ # Default argument values workflow_args = { - 'build_existing_model': {}, - 'measures': [], - 'simulation_output_report': {}, - 'server_directory_cleanup': {} + "build_existing_model": {}, + "measures": [], + "simulation_output_report": {}, + "server_directory_cleanup": {}, } - workflow_args.update(self.cfg['workflow_generator'].get('args', {})) + workflow_args.update(self.cfg["workflow_generator"].get("args", {})) - logger.debug('Generating OSW, sim_id={}'.format(sim_id)) + logger.debug("Generating OSW, sim_id={}".format(sim_id)) sim_ctl_args = { - 'simulation_control_timestep': 60, - 'simulation_control_run_period_begin_month': 1, - 'simulation_control_run_period_begin_day_of_month': 1, - 'simulation_control_run_period_end_month': 12, - 'simulation_control_run_period_end_day_of_month': 31, - 'simulation_control_run_period_calendar_year': 2007, - 'add_component_loads': False + "simulation_control_timestep": 60, + "simulation_control_run_period_begin_month": 1, + "simulation_control_run_period_begin_day_of_month": 1, + "simulation_control_run_period_end_month": 12, + "simulation_control_run_period_end_day_of_month": 31, + "simulation_control_run_period_calendar_year": 2007, + "add_component_loads": False, } bld_exist_model_args = { - 'building_id': building_id, - 'sample_weight': self.cfg['baseline']['n_buildings_represented'] / self.n_datapoints + "building_id": building_id, + "sample_weight": self.cfg["baseline"]["n_buildings_represented"] + / self.n_datapoints, } bld_exist_model_args.update(sim_ctl_args) - bld_exist_model_args.update(workflow_args['build_existing_model']) + bld_exist_model_args.update(workflow_args["build_existing_model"]) add_component_loads = False - if 'add_component_loads' in bld_exist_model_args: - add_component_loads = bld_exist_model_args['add_component_loads'] - bld_exist_model_args.pop('add_component_loads') - - if 'emissions' in workflow_args: - emissions = workflow_args['emissions'] - emissions_map = [['emissions_scenario_names', 'scenario_name'], - ['emissions_types', 'type'], - ['emissions_electricity_folders', 'elec_folder'], - ['emissions_natural_gas_values', 'gas_value'], - ['emissions_propane_values', 'propane_value'], - ['emissions_fuel_oil_values', 'oil_value'], - ['emissions_wood_values', 'wood_value']] + if "add_component_loads" in bld_exist_model_args: + add_component_loads = bld_exist_model_args["add_component_loads"] + bld_exist_model_args.pop("add_component_loads") + + if "emissions" in workflow_args: + emissions = workflow_args["emissions"] + emissions_map = [ + ["emissions_scenario_names", "scenario_name"], + ["emissions_types", "type"], + ["emissions_electricity_folders", "elec_folder"], + ["emissions_natural_gas_values", "gas_value"], + ["emissions_propane_values", "propane_value"], + ["emissions_fuel_oil_values", "oil_value"], + ["emissions_wood_values", "wood_value"], + ] for arg, item in emissions_map: - bld_exist_model_args[arg] = ','.join([str(s.get(item, '')) for s in emissions]) - - buildstock_dir = self.cfg['buildstock_directory'] - measures_dir = os.path.join(buildstock_dir, 'measures') - measure_path = os.path.join(measures_dir, 'BuildExistingModel') - bld_exist_model_args_avail = get_measure_arguments(os.path.join(measure_path, 'measure.xml')) - - if 'utility_bills' in workflow_args: - utility_bills = workflow_args['utility_bills'] + bld_exist_model_args[arg] = ",".join( + [str(s.get(item, "")) for s in emissions] + ) + + buildstock_dir = self.cfg["buildstock_directory"] + measures_dir = os.path.join(buildstock_dir, "measures") + measure_path = os.path.join(measures_dir, "BuildExistingModel") + bld_exist_model_args_avail = get_measure_arguments( + os.path.join(measure_path, "measure.xml") + ) + + if "utility_bills" in workflow_args: + utility_bills = workflow_args["utility_bills"] utility_bills_map = [ - ['utility_bill_scenario_names', 'scenario_name'], - ['utility_bill_simple_filepaths', 'simple_filepath'], - ['utility_bill_detailed_filepaths', 'detailed_filepath'], - ['utility_bill_electricity_fixed_charges', 'elec_fixed_charge'], - ['utility_bill_electricity_marginal_rates', 'elec_marginal_rate'], - ['utility_bill_natural_gas_fixed_charges', 'gas_fixed_charge'], - ['utility_bill_natural_gas_marginal_rates', 'gas_marginal_rate'], - ['utility_bill_propane_fixed_charges', 'propane_fixed_charge'], - ['utility_bill_propane_marginal_rates', 'propane_marginal_rate'], - ['utility_bill_fuel_oil_fixed_charges', 'oil_fixed_charge'], - ['utility_bill_fuel_oil_marginal_rates', 'oil_marginal_rate'], - ['utility_bill_wood_fixed_charges', 'wood_fixed_charge'], - ['utility_bill_wood_marginal_rates', 'wood_marginal_rate'], - ['utility_bill_pv_compensation_types', 'pv_compensation_type'], - ['utility_bill_pv_net_metering_annual_excess_sellback_rate_types', - 'pv_net_metering_annual_excess_sellback_rate_type'], - ['utility_bill_pv_net_metering_annual_excess_sellback_rates', - 'pv_net_metering_annual_excess_sellback_rate'], - ['utility_bill_pv_feed_in_tariff_rates', 'pv_feed_in_tariff_rate'], - ['utility_bill_pv_monthly_grid_connection_fee_units', 'pv_monthly_grid_connection_fee_units'], - ['utility_bill_pv_monthly_grid_connection_fees', 'pv_monthly_grid_connection_fee'] + ["utility_bill_scenario_names", "scenario_name"], + ["utility_bill_simple_filepaths", "simple_filepath"], + ["utility_bill_detailed_filepaths", "detailed_filepath"], + ["utility_bill_electricity_fixed_charges", "elec_fixed_charge"], + ["utility_bill_electricity_marginal_rates", "elec_marginal_rate"], + ["utility_bill_natural_gas_fixed_charges", "gas_fixed_charge"], + ["utility_bill_natural_gas_marginal_rates", "gas_marginal_rate"], + ["utility_bill_propane_fixed_charges", "propane_fixed_charge"], + ["utility_bill_propane_marginal_rates", "propane_marginal_rate"], + ["utility_bill_fuel_oil_fixed_charges", "oil_fixed_charge"], + ["utility_bill_fuel_oil_marginal_rates", "oil_marginal_rate"], + ["utility_bill_wood_fixed_charges", "wood_fixed_charge"], + ["utility_bill_wood_marginal_rates", "wood_marginal_rate"], + ["utility_bill_pv_compensation_types", "pv_compensation_type"], + [ + "utility_bill_pv_net_metering_annual_excess_sellback_rate_types", + "pv_net_metering_annual_excess_sellback_rate_type", + ], + [ + "utility_bill_pv_net_metering_annual_excess_sellback_rates", + "pv_net_metering_annual_excess_sellback_rate", + ], + ["utility_bill_pv_feed_in_tariff_rates", "pv_feed_in_tariff_rate"], + [ + "utility_bill_pv_monthly_grid_connection_fee_units", + "pv_monthly_grid_connection_fee_units", + ], + [ + "utility_bill_pv_monthly_grid_connection_fees", + "pv_monthly_grid_connection_fee", + ], ] for arg, item in utility_bills_map: if arg in bld_exist_model_args_avail: - bld_exist_model_args[arg] = ','.join([str(s.get(item, '')) for s in utility_bills]) + bld_exist_model_args[arg] = ",".join( + [str(s.get(item, "")) for s in utility_bills] + ) sim_out_rep_args = { - 'timeseries_frequency': 'none', - 'include_timeseries_total_consumptions': False, - 'include_timeseries_fuel_consumptions': False, - 'include_timeseries_end_use_consumptions': True, - 'include_timeseries_emissions': False, - 'include_timeseries_emission_fuels': False, - 'include_timeseries_emission_end_uses': False, - 'include_timeseries_hot_water_uses': False, - 'include_timeseries_total_loads': True, - 'include_timeseries_component_loads': False, - 'include_timeseries_zone_temperatures': False, - 'include_timeseries_airflows': False, - 'include_timeseries_weather': False, - 'timeseries_timestamp_convention': 'end', - 'add_timeseries_dst_column': True, - 'add_timeseries_utc_column': True + "timeseries_frequency": "none", + "include_timeseries_total_consumptions": False, + "include_timeseries_fuel_consumptions": False, + "include_timeseries_end_use_consumptions": True, + "include_timeseries_emissions": False, + "include_timeseries_emission_fuels": False, + "include_timeseries_emission_end_uses": False, + "include_timeseries_hot_water_uses": False, + "include_timeseries_total_loads": True, + "include_timeseries_component_loads": False, + "include_timeseries_zone_temperatures": False, + "include_timeseries_airflows": False, + "include_timeseries_weather": False, + "timeseries_timestamp_convention": "end", + "add_timeseries_dst_column": True, + "add_timeseries_utc_column": True, } - measures_dir = os.path.join(buildstock_dir, 'resources/hpxml-measures') - measure_path = os.path.join(measures_dir, 'ReportSimulationOutput') - sim_out_rep_args_avail = get_measure_arguments(os.path.join(measure_path, 'measure.xml')) + measures_dir = os.path.join(buildstock_dir, "resources/hpxml-measures") + measure_path = os.path.join(measures_dir, "ReportSimulationOutput") + sim_out_rep_args_avail = get_measure_arguments( + os.path.join(measure_path, "measure.xml") + ) - if 'include_annual_total_consumptions' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_total_consumptions'] = True + if "include_annual_total_consumptions" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_total_consumptions"] = True - if 'include_annual_fuel_consumptions' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_fuel_consumptions'] = True + if "include_annual_fuel_consumptions" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_fuel_consumptions"] = True - if 'include_annual_end_use_consumptions' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_end_use_consumptions'] = True + if "include_annual_end_use_consumptions" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_end_use_consumptions"] = True - if 'include_annual_system_use_consumptions' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_system_use_consumptions'] = False + if "include_annual_system_use_consumptions" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_system_use_consumptions"] = False - if 'include_annual_emissions' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_emissions'] = True + if "include_annual_emissions" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_emissions"] = True - if 'include_annual_emission_fuels' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_emission_fuels'] = True + if "include_annual_emission_fuels" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_emission_fuels"] = True - if 'include_annual_emission_end_uses' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_emission_end_uses'] = True + if "include_annual_emission_end_uses" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_emission_end_uses"] = True - if 'include_annual_total_loads' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_total_loads'] = True + if "include_annual_total_loads" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_total_loads"] = True - if 'include_annual_unmet_hours' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_unmet_hours'] = True + if "include_annual_unmet_hours" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_unmet_hours"] = True - if 'include_annual_peak_fuels' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_peak_fuels'] = True + if "include_annual_peak_fuels" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_peak_fuels"] = True - if 'include_annual_peak_loads' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_peak_loads'] = True + if "include_annual_peak_loads" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_peak_loads"] = True - if 'include_annual_component_loads' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_component_loads'] = True + if "include_annual_component_loads" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_component_loads"] = True - if 'include_annual_hot_water_uses' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_hot_water_uses'] = True + if "include_annual_hot_water_uses" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_hot_water_uses"] = True - if 'include_annual_hvac_summary' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_hvac_summary'] = True + if "include_annual_hvac_summary" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_hvac_summary"] = True - if 'include_annual_resilience' in sim_out_rep_args_avail: - sim_out_rep_args['include_annual_resilience'] = True + if "include_annual_resilience" in sim_out_rep_args_avail: + sim_out_rep_args["include_annual_resilience"] = True - if 'include_timeseries_system_use_consumptions' in sim_out_rep_args_avail: - sim_out_rep_args['include_timeseries_system_use_consumptions'] = False + if "include_timeseries_system_use_consumptions" in sim_out_rep_args_avail: + sim_out_rep_args["include_timeseries_system_use_consumptions"] = False - if 'include_timeseries_unmet_hours' in sim_out_rep_args_avail: - sim_out_rep_args['include_timeseries_unmet_hours'] = False + if "include_timeseries_unmet_hours" in sim_out_rep_args_avail: + sim_out_rep_args["include_timeseries_unmet_hours"] = False - if 'include_timeseries_resilience' in sim_out_rep_args_avail: - sim_out_rep_args['include_timeseries_resilience'] = False + if "include_timeseries_resilience" in sim_out_rep_args_avail: + sim_out_rep_args["include_timeseries_resilience"] = False - if 'timeseries_num_decimal_places' in sim_out_rep_args_avail: - sim_out_rep_args['timeseries_num_decimal_places'] = 3 + if "timeseries_num_decimal_places" in sim_out_rep_args_avail: + sim_out_rep_args["timeseries_num_decimal_places"] = 3 - sim_out_rep_args.update(workflow_args['simulation_output_report']) + sim_out_rep_args.update(workflow_args["simulation_output_report"]) - if 'output_variables' in sim_out_rep_args: - output_variables = sim_out_rep_args['output_variables'] - sim_out_rep_args['user_output_variables'] = ','.join([str(s.get('name')) for s in output_variables]) - sim_out_rep_args.pop('output_variables') + if "output_variables" in sim_out_rep_args: + output_variables = sim_out_rep_args["output_variables"] + sim_out_rep_args["user_output_variables"] = ",".join( + [str(s.get("name")) for s in output_variables] + ) + sim_out_rep_args.pop("output_variables") util_bills_rep_args = {} - measures_dir = os.path.join(buildstock_dir, 'resources/hpxml-measures') - measure_path = os.path.join(measures_dir, 'ReportUtilityBills') - util_bills_rep_args_avail = get_measure_arguments(os.path.join(measure_path, 'measure.xml')) + measures_dir = os.path.join(buildstock_dir, "resources/hpxml-measures") + measure_path = os.path.join(measures_dir, "ReportUtilityBills") + util_bills_rep_args_avail = get_measure_arguments( + os.path.join(measure_path, "measure.xml") + ) - if 'include_annual_bills' in util_bills_rep_args_avail: - util_bills_rep_args['include_annual_bills'] = True + if "include_annual_bills" in util_bills_rep_args_avail: + util_bills_rep_args["include_annual_bills"] = True - if 'include_monthly_bills' in util_bills_rep_args_avail: - util_bills_rep_args['include_monthly_bills'] = False + if "include_monthly_bills" in util_bills_rep_args_avail: + util_bills_rep_args["include_monthly_bills"] = False osw = { - 'id': sim_id, - 'steps': [ + "id": sim_id, + "steps": [ { - 'measure_dir_name': 'BuildExistingModel', - 'arguments': bld_exist_model_args + "measure_dir_name": "BuildExistingModel", + "arguments": bld_exist_model_args, } ], - 'created_at': dt.datetime.now().isoformat(), - 'measure_paths': [ - 'measures', - 'resources/hpxml-measures' - ], - 'run_options': { - 'skip_zip_results': True - } + "created_at": dt.datetime.now().isoformat(), + "measure_paths": ["measures", "resources/hpxml-measures"], + "run_options": {"skip_zip_results": True}, } debug = False - if 'debug' in workflow_args: - debug = workflow_args['debug'] + if "debug" in workflow_args: + debug = workflow_args["debug"] server_dir_cleanup_args = { - 'retain_in_osm': False, - 'retain_in_idf': True, - 'retain_pre_process_idf': False, - 'retain_eplusout_audit': False, - 'retain_eplusout_bnd': False, - 'retain_eplusout_eio': False, - 'retain_eplusout_end': False, - 'retain_eplusout_err': False, - 'retain_eplusout_eso': False, - 'retain_eplusout_mdd': False, - 'retain_eplusout_mtd': False, - 'retain_eplusout_rdd': False, - 'retain_eplusout_shd': False, - 'retain_eplusout_msgpack': False, - 'retain_eplustbl_htm': False, - 'retain_stdout_energyplus': False, - 'retain_stdout_expandobject': False, - 'retain_schedules_csv': True, - 'debug': debug + "retain_in_osm": False, + "retain_in_idf": True, + "retain_pre_process_idf": False, + "retain_eplusout_audit": False, + "retain_eplusout_bnd": False, + "retain_eplusout_eio": False, + "retain_eplusout_end": False, + "retain_eplusout_err": False, + "retain_eplusout_eso": False, + "retain_eplusout_mdd": False, + "retain_eplusout_mtd": False, + "retain_eplusout_rdd": False, + "retain_eplusout_shd": False, + "retain_eplusout_msgpack": False, + "retain_eplustbl_htm": False, + "retain_stdout_energyplus": False, + "retain_stdout_expandobject": False, + "retain_schedules_csv": True, + "debug": debug, } - server_dir_cleanup_args.update(workflow_args['server_directory_cleanup']) - - osw['steps'].extend([ - { - 'measure_dir_name': 'HPXMLtoOpenStudio', - 'arguments': { - 'hpxml_path': '../../run/home.xml', - 'output_dir': '../../run', - 'debug': debug, - 'add_component_loads': add_component_loads, - 'skip_validation': True - } - } - ]) - - osw['steps'].extend(workflow_args['measures']) - - osw['steps'].extend([ - { - 'measure_dir_name': 'ReportSimulationOutput', - 'arguments': sim_out_rep_args - }, - { - 'measure_dir_name': 'ReportHPXMLOutput', - 'arguments': {} - }, - { - 'measure_dir_name': 'ReportUtilityBills', - 'arguments': util_bills_rep_args - }, - { - 'measure_dir_name': 'UpgradeCosts', - 'arguments': { - 'debug': debug + server_dir_cleanup_args.update(workflow_args["server_directory_cleanup"]) + + osw["steps"].extend( + [ + { + "measure_dir_name": "HPXMLtoOpenStudio", + "arguments": { + "hpxml_path": "../../run/home.xml", + "output_dir": "../../run", + "debug": debug, + "add_component_loads": add_component_loads, + "skip_validation": True, + }, } - }, - { - 'measure_dir_name': 'ServerDirectoryCleanup', - 'arguments': server_dir_cleanup_args - } - ]) + ] + ) + + osw["steps"].extend(workflow_args["measures"]) + + osw["steps"].extend( + [ + { + "measure_dir_name": "ReportSimulationOutput", + "arguments": sim_out_rep_args, + }, + {"measure_dir_name": "ReportHPXMLOutput", "arguments": {}}, + { + "measure_dir_name": "ReportUtilityBills", + "arguments": util_bills_rep_args, + }, + {"measure_dir_name": "UpgradeCosts", "arguments": {"debug": debug}}, + { + "measure_dir_name": "ServerDirectoryCleanup", + "arguments": server_dir_cleanup_args, + }, + ] + ) if upgrade_idx is not None: - measure_d = self.cfg['upgrades'][upgrade_idx] + measure_d = self.cfg["upgrades"][upgrade_idx] apply_upgrade_measure = { - 'measure_dir_name': 'ApplyUpgrade', - 'arguments': { - 'run_measure': 1 - } + "measure_dir_name": "ApplyUpgrade", + "arguments": {"run_measure": 1}, } - if 'upgrade_name' in measure_d: - apply_upgrade_measure['arguments']['upgrade_name'] = measure_d['upgrade_name'] - for opt_num, option in enumerate(measure_d['options'], 1): - apply_upgrade_measure['arguments']['option_{}'.format(opt_num)] = option['option'] - if 'lifetime' in option: - apply_upgrade_measure['arguments']['option_{}_lifetime'.format(opt_num)] = option['lifetime'] - if 'apply_logic' in option: - apply_upgrade_measure['arguments']['option_{}_apply_logic'.format(opt_num)] = \ - self.make_apply_logic_arg(option['apply_logic']) - for cost_num, cost in enumerate(option.get('costs', []), 1): - for arg in ('value', 'multiplier'): + if "upgrade_name" in measure_d: + apply_upgrade_measure["arguments"]["upgrade_name"] = measure_d[ + "upgrade_name" + ] + for opt_num, option in enumerate(measure_d["options"], 1): + apply_upgrade_measure["arguments"][ + "option_{}".format(opt_num) + ] = option["option"] + if "lifetime" in option: + apply_upgrade_measure["arguments"][ + "option_{}_lifetime".format(opt_num) + ] = option["lifetime"] + if "apply_logic" in option: + apply_upgrade_measure["arguments"][ + "option_{}_apply_logic".format(opt_num) + ] = self.make_apply_logic_arg(option["apply_logic"]) + for cost_num, cost in enumerate(option.get("costs", []), 1): + for arg in ("value", "multiplier"): if arg not in cost: continue - apply_upgrade_measure['arguments']['option_{}_cost_{}_{}'.format(opt_num, cost_num, arg)] = \ - cost[arg] - if 'package_apply_logic' in measure_d: - apply_upgrade_measure['arguments']['package_apply_logic'] = \ - self.make_apply_logic_arg(measure_d['package_apply_logic']) - - build_existing_model_idx = \ - [x['measure_dir_name'] == 'BuildExistingModel' for x in osw['steps']].index(True) - osw['steps'].insert(build_existing_model_idx + 1, apply_upgrade_measure) - - if 'reporting_measures' in workflow_args: - for reporting_measure in workflow_args['reporting_measures']: - if 'arguments' not in reporting_measure: - reporting_measure['arguments'] = {} - reporting_measure['measure_type'] = 'ReportingMeasure' - osw['steps'].insert(-1, reporting_measure) # right before ServerDirectoryCleanup + apply_upgrade_measure["arguments"][ + "option_{}_cost_{}_{}".format(opt_num, cost_num, arg) + ] = cost[arg] + if "package_apply_logic" in measure_d: + apply_upgrade_measure["arguments"][ + "package_apply_logic" + ] = self.make_apply_logic_arg(measure_d["package_apply_logic"]) + + build_existing_model_idx = [ + x["measure_dir_name"] == "BuildExistingModel" for x in osw["steps"] + ].index(True) + osw["steps"].insert(build_existing_model_idx + 1, apply_upgrade_measure) + + if "reporting_measures" in workflow_args: + for reporting_measure in workflow_args["reporting_measures"]: + if "arguments" not in reporting_measure: + reporting_measure["arguments"] = {} + reporting_measure["measure_type"] = "ReportingMeasure" + osw["steps"].insert( + -1, reporting_measure + ) # right before ServerDirectoryCleanup return osw diff --git a/buildstockbatch/workflow_generator/test_workflow_generator.py b/buildstockbatch/workflow_generator/test_workflow_generator.py index a6171e9b..9a49eaea 100644 --- a/buildstockbatch/workflow_generator/test_workflow_generator.py +++ b/buildstockbatch/workflow_generator/test_workflow_generator.py @@ -1,265 +1,325 @@ from buildstockbatch.workflow_generator.base import WorkflowGeneratorBase -from buildstockbatch.workflow_generator.residential_hpxml import ResidentialHpxmlWorkflowGenerator -from buildstockbatch.workflow_generator.commercial import CommercialDefaultWorkflowGenerator +from buildstockbatch.workflow_generator.residential_hpxml import ( + ResidentialHpxmlWorkflowGenerator, +) +from buildstockbatch.workflow_generator.commercial import ( + CommercialDefaultWorkflowGenerator, +) from buildstockbatch.test.shared_testing_stuff import resstock_directory def test_apply_logic_recursion(): + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg(["one", "two", "three"]) + assert apply_logic == "(one&&two&&three)" - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg(['one', 'two', 'three']) - assert apply_logic == '(one&&two&&three)' + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg( + {"and": ["one", "two", "three"]} + ) + assert apply_logic == "(one&&two&&three)" - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({ - 'and': ['one', 'two', 'three'] - }) - assert apply_logic == '(one&&two&&three)' + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg( + {"or": ["four", "five", "six"]} + ) + assert apply_logic == "(four||five||six)" - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({ - 'or': ['four', 'five', 'six'] - }) - assert apply_logic == '(four||five||six)' + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({"not": "seven"}) + assert apply_logic == "!seven" - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({ - 'not': 'seven' - }) - assert apply_logic == '!seven' - - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({ - 'and': [ - {'not': 'abc'}, - {'or': [ - 'def', - 'ghi' - ]}, - 'jkl', - 'mno' - ] - }) - assert apply_logic == '(!abc&&(def||ghi)&&jkl&&mno)' + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg( + {"and": [{"not": "abc"}, {"or": ["def", "ghi"]}, "jkl", "mno"]} + ) + assert apply_logic == "(!abc&&(def||ghi)&&jkl&&mno)" def test_residential_hpxml(mocker): - sim_id = 'bldb1up1' + sim_id = "bldb1up1" building_id = 1 upgrade_idx = 0 cfg = { - 'buildstock_directory': resstock_directory, - 'baseline': { - 'n_buildings_represented': 100 - }, - 'workflow_generator': { - 'type': 'residential_hpxml', - 'args': { - 'build_existing_model': { - 'simulation_control_run_period_begin_month': 2, - 'simulation_control_run_period_begin_day_of_month': 1, - 'simulation_control_run_period_end_month': 2, - 'simulation_control_run_period_end_day_of_month': 28, - 'simulation_control_run_period_calendar_year': 2010, + "buildstock_directory": resstock_directory, + "baseline": {"n_buildings_represented": 100}, + "workflow_generator": { + "type": "residential_hpxml", + "args": { + "build_existing_model": { + "simulation_control_run_period_begin_month": 2, + "simulation_control_run_period_begin_day_of_month": 1, + "simulation_control_run_period_end_month": 2, + "simulation_control_run_period_end_day_of_month": 28, + "simulation_control_run_period_calendar_year": 2010, }, - 'simulation_output_report': { - 'timeseries_frequency': 'hourly', - 'include_timeseries_total_consumptions': True, - 'include_timeseries_end_use_consumptions': True, - 'include_timeseries_total_loads': True, - 'include_timeseries_zone_temperatures': False, - } - } + "simulation_output_report": { + "timeseries_frequency": "hourly", + "include_timeseries_total_consumptions": True, + "include_timeseries_end_use_consumptions": True, + "include_timeseries_total_loads": True, + "include_timeseries_zone_temperatures": False, + }, + }, }, - 'upgrades': [ + "upgrades": [ { - 'options': [ + "options": [ { - 'option': 'Parameter|Option', + "option": "Parameter|Option", } ], } - ] + ], } n_datapoints = 10 osw_gen = ResidentialHpxmlWorkflowGenerator(cfg, n_datapoints) osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx) - steps = osw['steps'] + steps = osw["steps"] assert len(steps) == 8 build_existing_model_step = steps[0] - assert build_existing_model_step['measure_dir_name'] == 'BuildExistingModel' - assert build_existing_model_step['arguments']['simulation_control_run_period_begin_month'] == 2 - assert build_existing_model_step['arguments']['simulation_control_run_period_begin_day_of_month'] == 1 - assert build_existing_model_step['arguments']['simulation_control_run_period_end_month'] == 2 - assert build_existing_model_step['arguments']['simulation_control_run_period_end_day_of_month'] == 28 - assert build_existing_model_step['arguments']['simulation_control_run_period_calendar_year'] == 2010 + assert build_existing_model_step["measure_dir_name"] == "BuildExistingModel" + assert ( + build_existing_model_step["arguments"][ + "simulation_control_run_period_begin_month" + ] + == 2 + ) + assert ( + build_existing_model_step["arguments"][ + "simulation_control_run_period_begin_day_of_month" + ] + == 1 + ) + assert ( + build_existing_model_step["arguments"][ + "simulation_control_run_period_end_month" + ] + == 2 + ) + assert ( + build_existing_model_step["arguments"][ + "simulation_control_run_period_end_day_of_month" + ] + == 28 + ) + assert ( + build_existing_model_step["arguments"][ + "simulation_control_run_period_calendar_year" + ] + == 2010 + ) apply_upgrade_step = steps[1] - assert apply_upgrade_step['measure_dir_name'] == 'ApplyUpgrade' + assert apply_upgrade_step["measure_dir_name"] == "ApplyUpgrade" hpxml_to_os_step = steps[2] - assert hpxml_to_os_step['measure_dir_name'] == 'HPXMLtoOpenStudio' + assert hpxml_to_os_step["measure_dir_name"] == "HPXMLtoOpenStudio" simulation_output_step = steps[3] - assert simulation_output_step['measure_dir_name'] == 'ReportSimulationOutput' - assert simulation_output_step['arguments']['timeseries_frequency'] == 'hourly' - assert simulation_output_step['arguments']['include_annual_total_consumptions'] is True - assert simulation_output_step['arguments']['include_annual_fuel_consumptions'] is True - assert simulation_output_step['arguments']['include_annual_end_use_consumptions'] is True - assert simulation_output_step['arguments']['include_annual_system_use_consumptions'] is False - assert simulation_output_step['arguments']['include_annual_emissions'] is True - assert simulation_output_step['arguments']['include_annual_emission_fuels'] is True - assert simulation_output_step['arguments']['include_annual_emission_end_uses'] is True - assert simulation_output_step['arguments']['include_annual_total_loads'] is True - assert simulation_output_step['arguments']['include_annual_unmet_hours'] is True - assert simulation_output_step['arguments']['include_annual_peak_fuels'] is True - assert simulation_output_step['arguments']['include_annual_peak_loads'] is True - assert simulation_output_step['arguments']['include_annual_component_loads'] is True - assert simulation_output_step['arguments']['include_annual_hot_water_uses'] is True - assert simulation_output_step['arguments']['include_annual_hvac_summary'] is True - assert simulation_output_step['arguments']['include_annual_resilience'] is True - assert simulation_output_step['arguments']['include_timeseries_total_consumptions'] is True - assert simulation_output_step['arguments']['include_timeseries_fuel_consumptions'] is False - assert simulation_output_step['arguments']['include_timeseries_end_use_consumptions'] is True - assert simulation_output_step['arguments']['include_timeseries_system_use_consumptions'] is False - assert simulation_output_step['arguments']['include_timeseries_emissions'] is False - assert simulation_output_step['arguments']['include_timeseries_emission_fuels'] is False - assert simulation_output_step['arguments']['include_timeseries_emission_end_uses'] is False - assert simulation_output_step['arguments']['include_timeseries_hot_water_uses'] is False - assert simulation_output_step['arguments']['include_timeseries_total_loads'] is True - assert simulation_output_step['arguments']['include_timeseries_component_loads'] is False - assert simulation_output_step['arguments']['include_timeseries_unmet_hours'] is False - assert simulation_output_step['arguments']['include_timeseries_zone_temperatures'] is False - assert simulation_output_step['arguments']['include_timeseries_airflows'] is False - assert simulation_output_step['arguments']['include_timeseries_weather'] is False - assert simulation_output_step['arguments']['include_timeseries_resilience'] is False - assert simulation_output_step['arguments']['timeseries_timestamp_convention'] == 'end' - assert simulation_output_step['arguments']['timeseries_num_decimal_places'] == 3 - assert simulation_output_step['arguments']['add_timeseries_dst_column'] is True - assert simulation_output_step['arguments']['add_timeseries_utc_column'] is True + assert simulation_output_step["measure_dir_name"] == "ReportSimulationOutput" + assert simulation_output_step["arguments"]["timeseries_frequency"] == "hourly" + assert ( + simulation_output_step["arguments"]["include_annual_total_consumptions"] is True + ) + assert ( + simulation_output_step["arguments"]["include_annual_fuel_consumptions"] is True + ) + assert ( + simulation_output_step["arguments"]["include_annual_end_use_consumptions"] + is True + ) + assert ( + simulation_output_step["arguments"]["include_annual_system_use_consumptions"] + is False + ) + assert simulation_output_step["arguments"]["include_annual_emissions"] is True + assert simulation_output_step["arguments"]["include_annual_emission_fuels"] is True + assert ( + simulation_output_step["arguments"]["include_annual_emission_end_uses"] is True + ) + assert simulation_output_step["arguments"]["include_annual_total_loads"] is True + assert simulation_output_step["arguments"]["include_annual_unmet_hours"] is True + assert simulation_output_step["arguments"]["include_annual_peak_fuels"] is True + assert simulation_output_step["arguments"]["include_annual_peak_loads"] is True + assert simulation_output_step["arguments"]["include_annual_component_loads"] is True + assert simulation_output_step["arguments"]["include_annual_hot_water_uses"] is True + assert simulation_output_step["arguments"]["include_annual_hvac_summary"] is True + assert simulation_output_step["arguments"]["include_annual_resilience"] is True + assert ( + simulation_output_step["arguments"]["include_timeseries_total_consumptions"] + is True + ) + assert ( + simulation_output_step["arguments"]["include_timeseries_fuel_consumptions"] + is False + ) + assert ( + simulation_output_step["arguments"]["include_timeseries_end_use_consumptions"] + is True + ) + assert ( + simulation_output_step["arguments"][ + "include_timeseries_system_use_consumptions" + ] + is False + ) + assert simulation_output_step["arguments"]["include_timeseries_emissions"] is False + assert ( + simulation_output_step["arguments"]["include_timeseries_emission_fuels"] + is False + ) + assert ( + simulation_output_step["arguments"]["include_timeseries_emission_end_uses"] + is False + ) + assert ( + simulation_output_step["arguments"]["include_timeseries_hot_water_uses"] + is False + ) + assert simulation_output_step["arguments"]["include_timeseries_total_loads"] is True + assert ( + simulation_output_step["arguments"]["include_timeseries_component_loads"] + is False + ) + assert ( + simulation_output_step["arguments"]["include_timeseries_unmet_hours"] is False + ) + assert ( + simulation_output_step["arguments"]["include_timeseries_zone_temperatures"] + is False + ) + assert simulation_output_step["arguments"]["include_timeseries_airflows"] is False + assert simulation_output_step["arguments"]["include_timeseries_weather"] is False + assert simulation_output_step["arguments"]["include_timeseries_resilience"] is False + assert ( + simulation_output_step["arguments"]["timeseries_timestamp_convention"] == "end" + ) + assert simulation_output_step["arguments"]["timeseries_num_decimal_places"] == 3 + assert simulation_output_step["arguments"]["add_timeseries_dst_column"] is True + assert simulation_output_step["arguments"]["add_timeseries_utc_column"] is True hpxml_output_step = steps[4] - assert hpxml_output_step['measure_dir_name'] == 'ReportHPXMLOutput' + assert hpxml_output_step["measure_dir_name"] == "ReportHPXMLOutput" utility_bills_step = steps[5] - assert utility_bills_step['measure_dir_name'] == 'ReportUtilityBills' - assert utility_bills_step['arguments']['include_annual_bills'] is True - assert utility_bills_step['arguments']['include_monthly_bills'] is False + assert utility_bills_step["measure_dir_name"] == "ReportUtilityBills" + assert utility_bills_step["arguments"]["include_annual_bills"] is True + assert utility_bills_step["arguments"]["include_monthly_bills"] is False upgrade_costs_step = steps[6] - assert upgrade_costs_step['measure_dir_name'] == 'UpgradeCosts' + assert upgrade_costs_step["measure_dir_name"] == "UpgradeCosts" server_dir_cleanup_step = steps[7] - assert server_dir_cleanup_step['measure_dir_name'] == 'ServerDirectoryCleanup' + assert server_dir_cleanup_step["measure_dir_name"] == "ServerDirectoryCleanup" def test_com_default_workflow_generator_basic(mocker): - sim_id = 'bldb1up1' + sim_id = "bldb1up1" building_id = 1 upgrade_idx = None cfg = { - 'baseline': { - 'n_buildings_represented': 100 - }, - 'workflow_generator': { - 'type': 'commercial_default', - 'args': { - } - } + "baseline": {"n_buildings_represented": 100}, + "workflow_generator": {"type": "commercial_default", "args": {}}, } CommercialDefaultWorkflowGenerator.validate(cfg) osw_gen = CommercialDefaultWorkflowGenerator(cfg, 10) osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx) # Should always get BuildExistingModel - reporting_measure_step = osw['steps'][0] - assert reporting_measure_step['measure_dir_name'] == 'BuildExistingModel' - assert reporting_measure_step['arguments']['number_of_buildings_represented'] == 1 - assert reporting_measure_step['measure_type'] == 'ModelMeasure' + reporting_measure_step = osw["steps"][0] + assert reporting_measure_step["measure_dir_name"] == "BuildExistingModel" + assert reporting_measure_step["arguments"]["number_of_buildings_represented"] == 1 + assert reporting_measure_step["measure_type"] == "ModelMeasure" # Should not get TimeseriesCSVExport if excluded in args - assert len(osw['steps']) == 1 + assert len(osw["steps"]) == 1 def test_com_default_workflow_generator_with_timeseries(mocker): - sim_id = 'bldb1up1' + sim_id = "bldb1up1" building_id = 1 upgrade_idx = None cfg = { - 'baseline': { - 'n_buildings_represented': 100 - }, - 'workflow_generator': { - 'type': 'commercial_default', - 'args': { - 'timeseries_csv_export': { - 'reporting_frequency': 'Hourly', - 'inc_output_variables': 'true' + "baseline": {"n_buildings_represented": 100}, + "workflow_generator": { + "type": "commercial_default", + "args": { + "timeseries_csv_export": { + "reporting_frequency": "Hourly", + "inc_output_variables": "true", } - } - } + }, + }, } CommercialDefaultWorkflowGenerator.validate(cfg) osw_gen = CommercialDefaultWorkflowGenerator(cfg, 10) osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx) # Should always get BuildExistingModel - reporting_measure_step = osw['steps'][0] - assert reporting_measure_step['measure_dir_name'] == 'BuildExistingModel' - assert reporting_measure_step['arguments']['number_of_buildings_represented'] == 1 - assert reporting_measure_step['measure_type'] == 'ModelMeasure' + reporting_measure_step = osw["steps"][0] + assert reporting_measure_step["measure_dir_name"] == "BuildExistingModel" + assert reporting_measure_step["arguments"]["number_of_buildings_represented"] == 1 + assert reporting_measure_step["measure_type"] == "ModelMeasure" # Should get TimeseriesCSVExport if included in args - reporting_measure_step = osw['steps'][1] - assert reporting_measure_step['measure_dir_name'] == 'TimeseriesCSVExport' - assert reporting_measure_step['measure_type'] == 'ReportingMeasure' - assert reporting_measure_step['arguments']['reporting_frequency'] == 'Hourly' - assert reporting_measure_step['arguments']['inc_output_variables'] == 'true' + reporting_measure_step = osw["steps"][1] + assert reporting_measure_step["measure_dir_name"] == "TimeseriesCSVExport" + assert reporting_measure_step["measure_type"] == "ReportingMeasure" + assert reporting_measure_step["arguments"]["reporting_frequency"] == "Hourly" + assert reporting_measure_step["arguments"]["inc_output_variables"] == "true" def test_com_default_workflow_generator_extended(mocker): - sim_id = 'bldb1up1' + sim_id = "bldb1up1" building_id = 1 upgrade_idx = None cfg = { - 'baseline': { - 'n_buildings_represented': 100 - }, - 'workflow_generator': { - 'type': 'commercial_default', - 'args': { - 'reporting_measures': [ - {'measure_dir_name': 'f8e23017-894d-4bdf-977f-37e3961e6f42', 'arguments': { - 'building_summary_section': 'true', - 'annual_overview_section': 'true', - 'monthly_overview_section': 'true', - 'utility_bills_rates_section': 'true', - 'envelope_section_section': 'true', - 'space_type_breakdown_section': 'true', - 'space_type_details_section': 'true', - 'interior_lighting_section': 'true', - 'plug_loads_section': 'true', - 'exterior_light_section': 'true', - 'water_use_section': 'true', - 'hvac_load_profile': 'true', - 'zone_condition_section': 'true', - 'zone_summary_section': 'true', - 'zone_equipment_detail_section': 'true', - 'air_loops_detail_section': 'true', - 'plant_loops_detail_section': 'true', - 'outdoor_air_section': 'true', - 'cost_summary_section': 'true', - 'source_energy_section': 'true', - 'schedules_overview_section': 'true' - }}, - {'measure_dir_name': 'SimulationOutputReport'}, - {'measure_dir_name': 'comstock_sensitivity_reports'}, - {'measure_dir_name': 'qoi_report'}, - {'measure_dir_name': 'la_100_qaqc', 'arguments': {'run_qaqc': 'true'}}, - {'measure_dir_name': 'simulation_settings_check', 'arguments': {'run_sim_settings_checks': 'true'}}, - {'measure_dir_name': 'run_directory_cleanup'}, + "baseline": {"n_buildings_represented": 100}, + "workflow_generator": { + "type": "commercial_default", + "args": { + "reporting_measures": [ + { + "measure_dir_name": "f8e23017-894d-4bdf-977f-37e3961e6f42", + "arguments": { + "building_summary_section": "true", + "annual_overview_section": "true", + "monthly_overview_section": "true", + "utility_bills_rates_section": "true", + "envelope_section_section": "true", + "space_type_breakdown_section": "true", + "space_type_details_section": "true", + "interior_lighting_section": "true", + "plug_loads_section": "true", + "exterior_light_section": "true", + "water_use_section": "true", + "hvac_load_profile": "true", + "zone_condition_section": "true", + "zone_summary_section": "true", + "zone_equipment_detail_section": "true", + "air_loops_detail_section": "true", + "plant_loops_detail_section": "true", + "outdoor_air_section": "true", + "cost_summary_section": "true", + "source_energy_section": "true", + "schedules_overview_section": "true", + }, + }, + {"measure_dir_name": "SimulationOutputReport"}, + {"measure_dir_name": "comstock_sensitivity_reports"}, + {"measure_dir_name": "qoi_report"}, + { + "measure_dir_name": "la_100_qaqc", + "arguments": {"run_qaqc": "true"}, + }, + { + "measure_dir_name": "simulation_settings_check", + "arguments": {"run_sim_settings_checks": "true"}, + }, + {"measure_dir_name": "run_directory_cleanup"}, ], - 'timeseries_csv_export': { - 'reporting_frequency': 'Hourly', - 'inc_output_variables': 'true' - } - } - } + "timeseries_csv_export": { + "reporting_frequency": "Hourly", + "inc_output_variables": "true", + }, + }, + }, } CommercialDefaultWorkflowGenerator.validate(cfg) @@ -268,23 +328,28 @@ def test_com_default_workflow_generator_extended(mocker): osw = osw_gen.create_osw(sim_id, building_id, upgrade_idx) # Should always get SimulationOutputReport - reporting_measure_step = osw['steps'][3] - assert reporting_measure_step['measure_dir_name'] == 'SimulationOutputReport' - assert reporting_measure_step['measure_type'] == 'ReportingMeasure' - assert reporting_measure_step['arguments'] == {} + reporting_measure_step = osw["steps"][3] + assert reporting_measure_step["measure_dir_name"] == "SimulationOutputReport" + assert reporting_measure_step["measure_type"] == "ReportingMeasure" + assert reporting_measure_step["arguments"] == {} # Should only be one instance of SimulationOutputReport - assert [d['measure_dir_name'] == 'SimulationOutputReport' for d in osw['steps']].count(True) == 1 + assert [ + d["measure_dir_name"] == "SimulationOutputReport" for d in osw["steps"] + ].count(True) == 1 # Should get TimeseriesCSVExport if included in args - reporting_measure_step = osw['steps'][1] - assert reporting_measure_step['measure_dir_name'] == 'TimeseriesCSVExport' - assert reporting_measure_step['measure_type'] == 'ReportingMeasure' - assert reporting_measure_step['arguments']['reporting_frequency'] == 'Hourly' - assert reporting_measure_step['arguments']['inc_output_variables'] == 'true' + reporting_measure_step = osw["steps"][1] + assert reporting_measure_step["measure_dir_name"] == "TimeseriesCSVExport" + assert reporting_measure_step["measure_type"] == "ReportingMeasure" + assert reporting_measure_step["arguments"]["reporting_frequency"] == "Hourly" + assert reporting_measure_step["arguments"]["inc_output_variables"] == "true" # Should have the openstudio report - reporting_measure_step = osw['steps'][2] - assert reporting_measure_step['measure_dir_name'] == 'f8e23017-894d-4bdf-977f-37e3961e6f42' - assert reporting_measure_step['measure_type'] == 'ReportingMeasure' - assert reporting_measure_step['arguments']['building_summary_section'] == 'true' - assert reporting_measure_step['arguments']['schedules_overview_section'] == 'true' + reporting_measure_step = osw["steps"][2] + assert ( + reporting_measure_step["measure_dir_name"] + == "f8e23017-894d-4bdf-977f-37e3961e6f42" + ) + assert reporting_measure_step["measure_type"] == "ReportingMeasure" + assert reporting_measure_step["arguments"]["building_summary_section"] == "true" + assert reporting_measure_step["arguments"]["schedules_overview_section"] == "true" # Should have 1 workflow measure plus 9 reporting measures - assert len(osw['steps']) == 9 + assert len(osw["steps"]) == 9 diff --git a/docs/conf.py b/docs/conf.py index ceb6b474..45c44c52 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,19 +20,21 @@ here = os.path.abspath(os.path.dirname(__file__)) metadata = {} -with open(os.path.join(here, '..', 'buildstockbatch', '__version__.py'), 'r', encoding='utf-8') as f: +with open( + os.path.join(here, "..", "buildstockbatch", "__version__.py"), "r", encoding="utf-8" +) as f: exec(f.read(), metadata) # -- Project information ----------------------------------------------------- -project = metadata['__title__'] -copyright = metadata['__copyright__'] -author = metadata['__author__'] +project = metadata["__title__"] +copyright = metadata["__copyright__"] +author = metadata["__author__"] # The short X.Y version -version = metadata['__version__'] +version = metadata["__version__"] # The full version, including alpha/beta/rc tags -release = metadata['__version__'] +release = metadata["__version__"] # -- General configuration --------------------------------------------------- @@ -45,12 +47,12 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.todo', - 'sphinx.ext.mathjax', - 'sphinxcontrib.programoutput', - 'changelog', - 'sphinx_paramlinks' + "sphinx.ext.autodoc", + "sphinx.ext.todo", + "sphinx.ext.mathjax", + "sphinxcontrib.programoutput", + "changelog", + "sphinx_paramlinks", ] changelog_sections = [ @@ -64,16 +66,10 @@ "local", "aws", "postprocessing", - "documentation" + "documentation", ] # tags to sort on inside of sections -changelog_inner_tag_sort = [ - "feature", - "changed", - "removed", - "bug", - "moved" -] +changelog_inner_tag_sort = ["feature", "changed", "removed", "bug", "moved"] # how to render changelog links @@ -84,28 +80,28 @@ } # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None @@ -116,8 +112,10 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' -html_theme_path = ["_themes", ] +html_theme = "sphinx_rtd_theme" +html_theme_path = [ + "_themes", +] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -144,7 +142,7 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'BuildStockBatchdoc' +htmlhelp_basename = "BuildStockBatchdoc" # -- Options for LaTeX output ------------------------------------------------ @@ -153,15 +151,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -171,8 +166,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'BuildStockBatch.tex', 'BuildStock Batch Documentation', - 'Noel Merket (NREL)', 'manual'), + ( + master_doc, + "BuildStockBatch.tex", + "BuildStock Batch Documentation", + "Noel Merket (NREL)", + "manual", + ), ] @@ -181,8 +181,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'buildstockbatch', 'BuildStock Batch Documentation', - [author], 1) + (master_doc, "buildstockbatch", "BuildStock Batch Documentation", [author], 1) ] @@ -192,9 +191,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'BuildStockBatch', 'BuildStock Batch Documentation', - author, 'BuildStockBatch', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "BuildStockBatch", + "BuildStock Batch Documentation", + author, + "BuildStockBatch", + "One line description of project.", + "Miscellaneous", + ), ] @@ -213,7 +218,7 @@ # epub_uid = '' # A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] +epub_exclude_files = ["search.html"] # -- Extension configuration ------------------------------------------------- @@ -227,14 +232,14 @@ # Ignore reference targets not found nitpick_ignore = [ - ('py:func', 'BuildStockBatchBase.validate_precomputed_sample'), - ('py:func', 'BuildStockBatchBase.validate_xor_nor_schema_keys'), - ('py:func', 'EagleBatch.run_building'), - ('py:class', 'sampler.CommercialSobolSingularitySampler'), - ('py:class', 'sampler.CommercialSobolDockerSampler'), - ('py:class', 'workflow_generator.CommercialDefaultWorkflowGenerator'), - ('py:class', 'sampler.PrecomputedSampler'), - ('py:class', 'sampler.BuildStockSampler'), - ('py:class', 'BuildStockBatchBase'), - ('py:func', 'BuildStockBatchBase.run_sampling') + ("py:func", "BuildStockBatchBase.validate_precomputed_sample"), + ("py:func", "BuildStockBatchBase.validate_xor_nor_schema_keys"), + ("py:func", "EagleBatch.run_building"), + ("py:class", "sampler.CommercialSobolSingularitySampler"), + ("py:class", "sampler.CommercialSobolDockerSampler"), + ("py:class", "workflow_generator.CommercialDefaultWorkflowGenerator"), + ("py:class", "sampler.PrecomputedSampler"), + ("py:class", "sampler.BuildStockSampler"), + ("py:class", "BuildStockBatchBase"), + ("py:func", "BuildStockBatchBase.run_sampling"), ] diff --git a/setup.py b/setup.py index bb37c4d0..9d49ca49 100644 --- a/setup.py +++ b/setup.py @@ -8,78 +8,78 @@ here = os.path.abspath(os.path.dirname(__file__)) metadata = {} -with open(os.path.join(here, 'buildstockbatch', '__version__.py'), 'r', encoding='utf-8') as f: +with open( + os.path.join(here, "buildstockbatch", "__version__.py"), "r", encoding="utf-8" +) as f: exec(f.read(), metadata) -with open('README.md', 'r', 'utf-8') as f: +with open("README.md", "r", "utf-8") as f: readme = f.read() setuptools.setup( - name=metadata['__title__'], - version=metadata['__version__'], - author=metadata['__author__'], - author_email=metadata['__author_email__'], - description=metadata['__description__'], + name=metadata["__title__"], + version=metadata["__version__"], + author=metadata["__author__"], + author_email=metadata["__author_email__"], + description=metadata["__description__"], long_description=readme, - long_description_content_type='text/markdown', - url=metadata['__url__'], + long_description_content_type="text/markdown", + url=metadata["__url__"], packages=setuptools.find_packages(), - python_requires='>=3.8', - package_data={ - 'buildstockbatch': ['*.sh', 'schemas/*.yaml'], - '': ['LICENSE'] - }, + python_requires=">=3.8", + package_data={"buildstockbatch": ["*.sh", "schemas/*.yaml"], "": ["LICENSE"]}, install_requires=[ - 'pyyaml', - 'requests', - 'numpy', - 'pandas>=2', - 'joblib', - 'pyarrow', - 'dask[complete]>=2022.10.0', - 'docker', - 's3fs[boto3]', - 'fsspec', - 'yamale', - 'ruamel.yaml', - 'awsretry', - 'lxml', - 'semver' + "pyyaml", + "requests", + "numpy", + "pandas>=2", + "joblib", + "pyarrow", + "dask[complete]>=2022.10.0", + "docker", + "s3fs[boto3]", + "fsspec", + "yamale", + "ruamel.yaml", + "awsretry", + "lxml", + "semver", ], extras_require={ - 'dev': [ - 'pytest', - 'pytest-mock', - 'pytest-cov', - 'testfixtures', - 'Sphinx', - 'sphinx_rtd_theme>=1.1.0', - 'sphinx-autobuild', - 'sphinxcontrib-programoutput', - 'sphinx_paramlinks', - 'changelog', - 'flake8', - 'rope', - 'doc8' + "dev": [ + "pytest", + "pytest-mock", + "pytest-cov", + "testfixtures", + "Sphinx", + "sphinx_rtd_theme>=1.1.0", + "sphinx-autobuild", + "sphinxcontrib-programoutput", + "sphinx_paramlinks", + "changelog", + "flake8", + "rope", + "doc8", + "pre-commit", ] }, entry_points={ - 'console_scripts': [ - 'buildstock_local=buildstockbatch.local:main', - 'buildstock_eagle=buildstockbatch.eagle:user_cli', - 'buildstock_aws=buildstockbatch.aws.aws:main' + "console_scripts": [ + "buildstock_local=buildstockbatch.local:main", + "buildstock_eagle=buildstockbatch.eagle:user_cli", + "buildstock_aws=buildstockbatch.aws.aws:main", ] }, - license='BSD-3', + license="BSD-3", classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Natural Language :: English', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11' - ] + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + ], ) From 49cc10b438f082fba48281ac6755b9c69b701f3e Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Tue, 24 Oct 2023 15:45:22 -0600 Subject: [PATCH 2/7] updating installation docs --- docs/installation.rst | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index fb920e64..d35f9aad 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -75,17 +75,34 @@ Get a copy of BuildStockBatch either by downloading the zip file from GitHub or Optional, but highly recommended, is to create a new `python virtual environment`_ if you're using python from python.org, or to create a new `conda -environment`_ if you're using Anaconda. Make sure you configure your virtual environment to use Python 3.8 or greater. Then activate your environment. +environment`_ if you're using Anaconda. Make sure you configure your virtual +environment to use Python 3.8 or greater. Then activate your environment. .. _python virtual environment: https://docs.python.org/3/library/venv.html .. _conda environment: https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html -Install the library by doing the following: +Standard Install +................ + +If you are just going to be using buildstockbatch, not working on it, install like so: + +:: + + cd /path/to/buildstockbatch + python -m pip install -e . + +Developer Install +................. + +If you are going to be working on and contributing back to buildstockbatch, +install as follows after cloning the repository and creating and activating a +new python or conda environment. :: cd /path/to/buildstockbatch - python -m pip install -e . --user + python -m pip install -e ".[dev]" + pre-commit install .. _aws-user-config-local: From dce66e2feb5e5d2ec4825fe480cb786c025c2b81 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Tue, 24 Oct 2023 15:53:19 -0600 Subject: [PATCH 3/7] adding additional pre-commit hooks --- .pre-commit-config.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 489940ae..d5bdeb08 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,12 @@ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + exclude_types: ["csv","tsv"] - repo: https://github.com/psf/black-pre-commit-mirror rev: 23.10.1 hooks: - id: black - language_version: python3.11 \ No newline at end of file + language_version: python3.11 From 7b987b55ac60af58bb5f3df687689c9bbd1c4f5f Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Mon, 30 Oct 2023 13:40:45 -0600 Subject: [PATCH 4/7] adding precommit to ci --- .github/workflows/ci.yml | 4 +++- buildstockbatch/eagle.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab56c1be..b9c4f9a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,5 @@ name: BuildStockBatch Tests -on: +on: push: branches: - develop @@ -86,3 +86,5 @@ jobs: with: name: documentation path: buildstockbatch/docs/_build/html/ + - uses: pre-commit-ci/lite-action@v1.0.1 + if: always() diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py index 50d4e1e8..f5ee0b43 100644 --- a/buildstockbatch/eagle.py +++ b/buildstockbatch/eagle.py @@ -25,7 +25,6 @@ import pathlib import random import re -import requests import shlex import shutil import subprocess From cb368d6cfdd0931f392c1617de9168265270c5eb Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Mon, 30 Oct 2023 13:51:04 -0600 Subject: [PATCH 5/7] updating OpenStudio to 3.7.0-rc1 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9c4f9a9..afcffd8b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,8 +34,8 @@ jobs: wget --quiet https://data.nrel.gov/system/files/156/BuildStock_TMY3_FIPS.zip - name: Download and Install OpenStudio run: | - wget -q https://github.com/NREL/OpenStudio/releases/download/v3.6.1/OpenStudio-3.6.1+bb9481519e-Ubuntu-20.04-x86_64.deb - sudo apt install -y ./OpenStudio-3.6.1+bb9481519e-Ubuntu-20.04-x86_64.deb + wget -q https://github.com/NREL/OpenStudio/releases/download/v3.7.0-rc1/OpenStudio-3.7.0-rc1+211bb633b0-Ubuntu-22.04-x86_64.deb + sudo apt install -y ./OpenStudio-3.7.0-rc1+211bb633b0-Ubuntu-22.04-x86_64.deb openstudio openstudio_version which openstudio - name: Install buildstockbatch From 044df4ae0c67306ee2b09a699607537f56505fe2 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Tue, 31 Oct 2023 09:18:05 -0600 Subject: [PATCH 6/7] updating line length for black --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..55ec8d78 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 120 From ba56460b1ba56c74149690c23551b2187d452f37 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Tue, 31 Oct 2023 09:18:52 -0600 Subject: [PATCH 7/7] adding black to dev extras_require --- buildstockbatch/__version__.py | 4 +- buildstockbatch/aws/aws.py | 337 +++++------------- buildstockbatch/aws/awsbase.py | 56 +-- buildstockbatch/aws/s3_assets/bsb_post.py | 8 +- buildstockbatch/base.py | 282 ++++----------- buildstockbatch/eagle.py | 170 +++------ buildstockbatch/local.py | 75 +--- buildstockbatch/postprocessing.py | 223 +++--------- buildstockbatch/sampler/base.py | 12 +- buildstockbatch/sampler/commercial_sobol.py | 40 +-- buildstockbatch/sampler/downselect.py | 31 +- buildstockbatch/sampler/residential_quota.py | 9 +- buildstockbatch/test/conftest.py | 14 +- buildstockbatch/test/shared_testing_stuff.py | 4 +- buildstockbatch/test/test_base.py | 90 ++--- buildstockbatch/test/test_eagle.py | 134 ++----- buildstockbatch/test/test_local.py | 33 +- buildstockbatch/test/test_postprocessing.py | 26 +- buildstockbatch/test/test_validation.py | 96 ++--- buildstockbatch/utils.py | 29 +- .../workflow_generator/commercial.py | 22 +- .../workflow_generator/residential_hpxml.py | 69 +--- .../test_workflow_generator.py | 129 ++----- docs/conf.py | 12 +- setup.py | 5 +- 25 files changed, 448 insertions(+), 1462 deletions(-) diff --git a/buildstockbatch/__version__.py b/buildstockbatch/__version__.py index b5750e31..b52fb766 100644 --- a/buildstockbatch/__version__.py +++ b/buildstockbatch/__version__.py @@ -9,6 +9,4 @@ __author__ = "Noel Merket" __author_email__ = "noel.merket@nrel.gov" __license__ = "BSD-3" -__copyright__ = "Copyright {} The Alliance for Sustainable Energy".format( - dt.date.today().year -) +__copyright__ = "Copyright {} The Alliance for Sustainable Energy".format(dt.date.today().year) diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py index 5b8c7b04..0d761337 100644 --- a/buildstockbatch/aws/aws.py +++ b/buildstockbatch/aws/aws.py @@ -65,9 +65,7 @@ def filename_generator(): if filename.startswith("."): continue local_filepath = pathlib.Path(dirpath, filename) - s3_key = pathlib.PurePosixPath( - prefix, local_filepath.relative_to(local_dir_abs) - ) + s3_key = pathlib.PurePosixPath(prefix, local_filepath.relative_to(local_dir_abs)) yield local_filepath, s3_key logger.debug("Uploading {} => {}/{}".format(local_dir_abs, bucket, prefix)) @@ -147,9 +145,7 @@ def create_emr_lambda_roles(self): { "Effect": "Allow", "Action": ["logs:CreateLogStream", "logs:PutLogEvents"], - "Resource": [ - f"arn:aws:logs:{self.region}:{self.account}:log-group:/aws/lambda/launchemr:*" - ], + "Resource": [f"arn:aws:logs:{self.region}:{self.account}:log-group:/aws/lambda/launchemr:*"], }, { "Effect": "Allow", @@ -294,9 +290,7 @@ def create_vpc(self): # Create the public subnet - pub_response = self.ec2.create_subnet( - CidrBlock=self.pub_subnet_cidr, VpcId=self.vpc_id - ) + pub_response = self.ec2.create_subnet(CidrBlock=self.pub_subnet_cidr, VpcId=self.vpc_id) logger.info("EIP allocated.") @@ -327,9 +321,7 @@ def create_vpc(self): # Create an internet gateway - self.ec2.attach_internet_gateway( - InternetGatewayId=self.internet_gateway_id, VpcId=self.vpc_id - ) + self.ec2.attach_internet_gateway(InternetGatewayId=self.internet_gateway_id, VpcId=self.vpc_id) logger.info("Internet Gateway attached.") @@ -364,9 +356,7 @@ def create_vpc(self): # Create a NAT Gateway - nat_response = self.ec2.create_nat_gateway( - AllocationId=self.nat_ip_allocation, SubnetId=self.pub_vpc_subnet_id - ) + nat_response = self.ec2.create_nat_gateway(AllocationId=self.nat_ip_allocation, SubnetId=self.pub_vpc_subnet_id) self.nat_gateway_id = nat_response["NatGateway"]["NatGatewayId"] @@ -387,14 +377,10 @@ def create_vpc(self): # Associate the private route to the private subnet - self.ec2.associate_route_table( - RouteTableId=self.priv_route_table_id, SubnetId=self.priv_vpc_subnet_id_1 - ) + self.ec2.associate_route_table(RouteTableId=self.priv_route_table_id, SubnetId=self.priv_vpc_subnet_id_1) logger.info("Route table associated with subnet.") - self.ec2.associate_route_table( - RouteTableId=self.priv_route_table_id, SubnetId=self.priv_vpc_subnet_id_2 - ) + self.ec2.associate_route_table(RouteTableId=self.priv_route_table_id, SubnetId=self.priv_vpc_subnet_id_2) logger.info("Route table associated with subnet.") # Associate the NAT gateway with the private route @@ -435,9 +421,7 @@ def create_batch_service_roles(self): self.batch_service_role_name, "batch", f"Service role for Batch environment {self.job_identifier}", - managed_policie_arns=[ - "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole" - ], + managed_policie_arns=["arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole"], ) # Instance Role for Batch compute environment @@ -446,17 +430,13 @@ def create_batch_service_roles(self): self.batch_instance_role_name, "ec2", f"Instance role for Batch compute environment {self.job_identifier}", - managed_policie_arns=[ - "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" - ], + managed_policie_arns=["arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"], ) # Instance Profile try: - response = self.iam.create_instance_profile( - InstanceProfileName=self.batch_instance_profile_name - ) + response = self.iam.create_instance_profile(InstanceProfileName=self.batch_instance_profile_name) self.instance_profile_arn = response["InstanceProfile"]["Arn"] @@ -470,9 +450,7 @@ def create_batch_service_roles(self): except Exception as e: if "EntityAlreadyExists" in str(e): logger.info("ECS Instance Profile not created - already exists") - response = self.iam.get_instance_profile( - InstanceProfileName=self.batch_instance_profile_name - ) + response = self.iam.get_instance_profile(InstanceProfileName=self.batch_instance_profile_name) self.instance_profile_arn = response["InstanceProfile"]["Arn"] # ECS Task Policy @@ -577,9 +555,7 @@ def create_batch_service_roles(self): self.batch_spot_service_role_name, "spotfleet", f"Spot Fleet role for Batch compute environment {self.job_identifier}", - managed_policie_arns=[ - "arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole" - ], + managed_policie_arns=["arn:aws:iam::aws:policy/service-role/AmazonEC2SpotFleetTaggingRole"], ) def create_compute_environment(self, maxCPUs=10000): @@ -623,15 +599,11 @@ def create_compute_environment(self, maxCPUs=10000): serviceRole=self.service_role_arn, ) - logger.info( - f"Compute environment {self.batch_compute_environment_name} created." - ) + logger.info(f"Compute environment {self.batch_compute_environment_name} created.") except Exception as e: if "Object already exists" in str(e): - logger.info( - f"Compute environment {self.batch_compute_environment_name} not created - already exists" - ) + logger.info(f"Compute environment {self.batch_compute_environment_name} not created - already exists") else: raise @@ -662,9 +634,7 @@ def create_job_queue(self): except Exception as e: if "Object already exists" in str(e): - logger.info( - f"Job queue {self.batch_job_queue_name} not created - already exists" - ) + logger.info(f"Job queue {self.batch_job_queue_name} not created - already exists") response = self.batch.describe_job_queues( jobQueues=[ self.batch_job_queue_name, @@ -676,8 +646,7 @@ def create_job_queue(self): elif "is not valid" in str(e): # Need to wait a second for the compute environment to complete registration logger.warning( - "5 second sleep initiated to wait for compute environment creation due to error: " - + str(e) + "5 second sleep initiated to wait for compute environment creation due to error: " + str(e) ) time.sleep(5) @@ -732,10 +701,7 @@ def submit_job(self, array_size=4): except Exception as e: if "not in VALID state" in str(e): # Need to wait a second for the compute environment to complete registration - logger.warning( - "5 second sleep initiated to wait for job queue creation due to error: " - + str(e) - ) + logger.warning("5 second sleep initiated to wait for job queue creation due to error: " + str(e)) time.sleep(5) else: raise @@ -947,20 +913,14 @@ def clean(self): except Exception as e: if "ResourceNotFoundException" in str(e): - logger.info( - f"EMR cluster {self.emr_cluster_name} already MIA - skipping..." - ) + logger.info(f"EMR cluster {self.emr_cluster_name} already MIA - skipping...") - self.iam_helper.remove_role_from_instance_profile( - self.emr_instance_profile_name - ) + self.iam_helper.remove_role_from_instance_profile(self.emr_instance_profile_name) self.iam_helper.delete_instance_profile(self.emr_instance_profile_name) self.iam_helper.delete_role(self.emr_job_flow_role_name) self.iam_helper.delete_role(self.emr_service_role_name) - logger.info( - f"EMR clean complete. Results bucket and data {self.s3_bucket} have not been deleted." - ) + logger.info(f"EMR clean complete. Results bucket and data {self.s3_bucket} have not been deleted.") logger.info(f"Deleting Security group {self.emr_cluster_security_group_name}.") default_sg_response = self.ec2.describe_security_groups( @@ -980,9 +940,7 @@ def clean(self): default_group_id = group["GroupId"] dsg = self.ec2r.SecurityGroup(default_group_id) if len(dsg.ip_permissions_egress): - response = dsg.revoke_egress( - IpPermissions=dsg.ip_permissions_egress - ) + response = dsg.revoke_egress(IpPermissions=dsg.ip_permissions_egress) sg_response = AWSRetry.backoff()(self.ec2.describe_security_groups)( Filters=[ @@ -1006,38 +964,26 @@ def clean(self): self.ec2.delete_security_group(GroupId=group_id) break except ClientError: - logger.info( - "Waiting for security group ingress rules to be removed ..." - ) + logger.info("Waiting for security group ingress rules to be removed ...") time.sleep(5) - logger.info( - f"Deleted security group {self.emr_cluster_security_group_name}." - ) + logger.info(f"Deleted security group {self.emr_cluster_security_group_name}.") except Exception as e: if "does not exist" in str(e) or "list index out of range" in str(e): - logger.info( - f"Security group {self.emr_cluster_security_group_name} does not exist - skipping..." - ) + logger.info(f"Security group {self.emr_cluster_security_group_name} does not exist - skipping...") else: raise try: - self.aws_lambda.delete_function( - FunctionName=self.lambda_emr_job_step_function_name - ) + self.aws_lambda.delete_function(FunctionName=self.lambda_emr_job_step_function_name) except Exception as e: if "Function not found" in str(e): - logger.info( - f"Function {self.lambda_emr_job_step_function_name} not found, skipping..." - ) + logger.info(f"Function {self.lambda_emr_job_step_function_name} not found, skipping...") else: raise try: - self.s3.delete_object( - Bucket=self.s3_bucket, Key=self.s3_lambda_code_emr_cluster_key - ) + self.s3.delete_object(Bucket=self.s3_bucket, Key=self.s3_lambda_code_emr_cluster_key) logger.info( f"S3 object {self.s3_lambda_code_emr_cluster_key} for bucket {self.s3_bucket} deleted." # noqa E501 ) @@ -1056,39 +1002,29 @@ def clean(self): for sm in state_machines["stateMachines"]: if sm["name"] == self.state_machine_name: self.state_machine_arn = sm["stateMachineArn"] - self.step_functions.delete_state_machine( - stateMachineArn=self.state_machine_arn - ) + self.step_functions.delete_state_machine(stateMachineArn=self.state_machine_arn) logger.info(f"Deleted state machine {self.state_machine_name}.") break self.iam_helper.delete_role(self.state_machine_role_name) try: - self.batch.update_job_queue( - jobQueue=self.batch_job_queue_name, state="DISABLED" - ) + self.batch.update_job_queue(jobQueue=self.batch_job_queue_name, state="DISABLED") while True: try: - response = self.batch.delete_job_queue( - jobQueue=self.batch_job_queue_name - ) + response = self.batch.delete_job_queue(jobQueue=self.batch_job_queue_name) logger.info(f"Job queue {self.batch_job_queue_name} deleted.") break except Exception as e: if "Cannot delete, resource is being modified" in str(e): - logger.info( - "Job queue being modified - sleeping until ready..." - ) + logger.info("Job queue being modified - sleeping until ready...") time.sleep(5) else: raise except Exception as e: if "does not exist" in str(e): - logger.info( - f"Job queue {self.batch_job_queue_name} missing, skipping..." - ) + logger.info(f"Job queue {self.batch_job_queue_name} missing, skipping...") # Delete compute enviornment @@ -1102,25 +1038,17 @@ def clean(self): response = self.batch.delete_compute_environment( computeEnvironment=self.batch_compute_environment_name ) - logger.info( - f"Compute environment {self.batch_compute_environment_name} deleted." - ) + logger.info(f"Compute environment {self.batch_compute_environment_name} deleted.") break except Exception as e: - if "Cannot delete, resource is being modified" in str( - e - ) or "found existing JobQueue" in str(e): - logger.info( - "Compute environment being modified - sleeping until ready..." - ) + if "Cannot delete, resource is being modified" in str(e) or "found existing JobQueue" in str(e): + logger.info("Compute environment being modified - sleeping until ready...") time.sleep(5) else: raise except Exception as e: if "does not exist" in str(e): - logger.info( - f"Compute environment {self.batch_compute_environment_name} missing, skipping..." - ) + logger.info(f"Compute environment {self.batch_compute_environment_name} missing, skipping...") else: raise @@ -1128,9 +1056,7 @@ def clean(self): self.iam_helper.delete_role(self.batch_spot_service_role_name) self.iam_helper.delete_role(self.batch_ecs_task_role_name) # Instance profile order of removal - self.iam_helper.remove_role_from_instance_profile( - self.batch_instance_profile_name - ) + self.iam_helper.remove_role_from_instance_profile(self.batch_instance_profile_name) self.iam_helper.delete_role(self.batch_instance_role_name) self.iam_helper.delete_instance_profile(self.batch_instance_profile_name) @@ -1174,9 +1100,7 @@ def clean(self): rt_counter = 10 while rt_counter: try: - response = self.ec2.delete_route_table( - RouteTableId=route_table_id - ) + response = self.ec2.delete_route_table(RouteTableId=route_table_id) logger.info("Route table removed.") break except Exception as e: @@ -1200,20 +1124,14 @@ def clean(self): try: try: self.ec2.detach_internet_gateway( - InternetGatewayId=internet_gateway[ - "InternetGatewayId" - ], + InternetGatewayId=internet_gateway["InternetGatewayId"], VpcId=attachment["VpcId"], ) except Exception as e: - logger.info( - f"Error on Internet Gateway disassociation - ignoring... {str(e)}" - ) + logger.info(f"Error on Internet Gateway disassociation - ignoring... {str(e)}") self.ec2.delete_internet_gateway( - InternetGatewayId=internet_gateway[ - "InternetGatewayId" - ] + InternetGatewayId=internet_gateway["InternetGatewayId"] ) logger.info("Internet Gateway deleted.") break @@ -1227,9 +1145,7 @@ def clean(self): else: raise - subn_response = self.ec2.describe_subnets( - Filters=[{"Name": "vpc-id", "Values": [this_vpc]}] - ) + subn_response = self.ec2.describe_subnets(Filters=[{"Name": "vpc-id", "Values": [this_vpc]}]) for subnet in subn_response["Subnets"]: while True: @@ -1238,9 +1154,7 @@ def clean(self): break except Exception as e: if "DependencyViolation" in str(e): - logger.info( - "Subnet cannot be deleted as dependencies are still being deleted. Sleeping..." - ) + logger.info("Subnet cannot be deleted as dependencies are still being deleted. Sleeping...") time.sleep(10) else: raise @@ -1286,9 +1200,7 @@ def create_emr_security_groups(self): ] ) - self.emr_cluster_security_group_id = response["SecurityGroups"][0][ - "GroupId" - ] + self.emr_cluster_security_group_id = response["SecurityGroups"][0]["GroupId"] else: raise @@ -1309,9 +1221,7 @@ def create_emr_security_groups(self): ) except Exception as e: if "already exists" in str(e): - logger.info( - "Security group egress rule for EMR already exists, skipping ..." - ) + logger.info("Security group egress rule for EMR already exists, skipping ...") else: raise @@ -1320,9 +1230,7 @@ def create_emr_iam_roles(self): self.emr_service_role_name, "elasticmapreduce", f"EMR Service Role {self.job_identifier}", - managed_policie_arns=[ - "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole" - ], + managed_policie_arns=["arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole"], ) emr_policy = """{ @@ -1361,16 +1269,12 @@ def create_emr_iam_roles(self): self.emr_job_flow_role_name, "ec2", f"EMR Job Flow Role {self.job_identifier}", - managed_policie_arns=[ - "arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role" - ], + managed_policie_arns=["arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"], policies_list=[emr_policy], ) try: - response = self.iam.create_instance_profile( - InstanceProfileName=self.emr_instance_profile_name - ) + response = self.iam.create_instance_profile(InstanceProfileName=self.emr_instance_profile_name) self.emr_instance_profile_arn = response["InstanceProfile"]["Arn"] @@ -1384,18 +1288,14 @@ def create_emr_iam_roles(self): except Exception as e: if "EntityAlreadyExists" in str(e): logger.info("EMR Instance Profile not created - already exists") - response = self.iam.get_instance_profile( - InstanceProfileName=self.emr_instance_profile_name - ) + response = self.iam.get_instance_profile(InstanceProfileName=self.emr_instance_profile_name) self.emr_instance_profile_arn = response["InstanceProfile"]["Arn"] def upload_assets(self): logger.info("Uploading EMR support assets...") fs = S3FileSystem() here = os.path.dirname(os.path.abspath(__file__)) - emr_folder = ( - f"{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}" - ) + emr_folder = f"{self.s3_bucket}/{self.s3_bucket_prefix}/{self.s3_emr_folder_name}" fs.makedirs(emr_folder) # bsb_post.sh @@ -1409,9 +1309,7 @@ def upload_assets(self): f.write(bsb_post_bash) # bsb_post.py - fs.put( - os.path.join(here, "s3_assets", "bsb_post.py"), f"{emr_folder}/bsb_post.py" - ) + fs.put(os.path.join(here, "s3_assets", "bsb_post.py"), f"{emr_folder}/bsb_post.py") # bootstrap-dask-custom fs.put( @@ -1470,9 +1368,7 @@ def create_emr_cluster_function(self): "Name": "launchFromS3", "ScriptBootstrapAction": { "Path": bootstrap_action, - "Args": [ - f"s3://{self.s3_bucket}/{self.s3_bucket_prefix}/emr/postprocessing.tar.gz" - ], + "Args": [f"s3://{self.s3_bucket}/{self.s3_bucket_prefix}/emr/postprocessing.tar.gz"], }, }, ], @@ -1516,9 +1412,7 @@ def create_emr_cluster_function(self): zi.external_attr = 0o100755 << 16 zf.writestr(zi, function_script, zipfile.ZIP_DEFLATED) f.seek(0) - self.s3.upload_fileobj( - f, self.s3_bucket, self.s3_lambda_code_emr_cluster_key - ) + self.s3.upload_fileobj(f, self.s3_bucket, self.s3_lambda_code_emr_cluster_key) while True: try: @@ -1545,9 +1439,7 @@ def create_emr_cluster_function(self): Tags={"job": self.job_identifier}, ) - logger.info( - f"Lambda function {self.lambda_emr_job_step_function_name} created." - ) + logger.info(f"Lambda function {self.lambda_emr_job_step_function_name} created.") break except Exception as e: @@ -1557,14 +1449,10 @@ def create_emr_cluster_function(self): ) time.sleep(5) elif "Function already exist" in str(e): - logger.info( - f"Lambda function {self.lambda_emr_job_step_function_name} exists, skipping..." - ) + logger.info(f"Lambda function {self.lambda_emr_job_step_function_name} exists, skipping...") break elif "ARN does not refer to a valid principal" in str(e): - logger.info( - "Waiting for roles/permissions to propagate to allow Lambda function creation ..." - ) + logger.info("Waiting for roles/permissions to propagate to allow Lambda function creation ...") time.sleep(5) else: raise @@ -1579,9 +1467,7 @@ def __init__(self, job_name, aws_config, boto3_session): def create_topic(self): response = self.sns.create_topic(Name=self.sns_state_machine_topic) - logger.info( - f"Simple notifications topic {self.sns_state_machine_topic} created." - ) + logger.info(f"Simple notifications topic {self.sns_state_machine_topic} created.") self.sns_state_machine_topic_arn = response["TopicArn"] @@ -1597,13 +1483,9 @@ def subscribe_to_topic(self): ) def clean(self): - self.sns.delete_topic( - TopicArn=f"arn:aws:sns:{self.region}:{self.account}:{self.sns_state_machine_topic}" - ) + self.sns.delete_topic(TopicArn=f"arn:aws:sns:{self.region}:{self.account}:{self.sns_state_machine_topic}") - logger.info( - f"Simple notifications topic {self.sns_state_machine_topic} deleted." - ) + logger.info(f"Simple notifications topic {self.sns_state_machine_topic} deleted.") class DockerBatchBase(BuildStockBatchBase): @@ -1616,12 +1498,8 @@ def __init__(self, project_filename): try: self.docker_client.ping() except: # noqa: E722 (allow bare except in this case because error can be a weird non-class Windows API error) - logger.error( - "The docker server did not respond, make sure Docker Desktop is started then retry." - ) - raise RuntimeError( - "The docker server did not respond, make sure Docker Desktop is started then retry." - ) + logger.error("The docker server did not respond, make sure Docker Desktop is started then retry.") + raise RuntimeError("The docker server did not respond, make sure Docker Desktop is started then retry.") @staticmethod def validate_project(project_file): @@ -1636,9 +1514,7 @@ class AwsBatch(DockerBatchBase): def __init__(self, project_filename): super().__init__(project_filename) - self.job_identifier = re.sub( - "[^0-9a-zA-Z]+", "_", self.cfg["aws"]["job_identifier"] - )[:10] + self.job_identifier = re.sub("[^0-9a-zA-Z]+", "_", self.cfg["aws"]["job_identifier"])[:10] self.project_filename = project_filename self.region = self.cfg["aws"]["region"] @@ -1661,17 +1537,11 @@ def validate_instance_types(project_file): instance_types_requested.add(job_base.emr_manager_instance_type) instance_types_requested.add(job_base.emr_worker_instance_type) inst_type_resp = ec2.describe_instance_type_offerings( - Filters=[ - {"Name": "instance-type", "Values": list(instance_types_requested)} - ] - ) - instance_types_available = set( - [x["InstanceType"] for x in inst_type_resp["InstanceTypeOfferings"]] + Filters=[{"Name": "instance-type", "Values": list(instance_types_requested)}] ) + instance_types_available = set([x["InstanceType"] for x in inst_type_resp["InstanceTypeOfferings"]]) if not instance_types_requested == instance_types_available: - instance_types_not_available = ( - instance_types_requested - instance_types_available - ) + instance_types_not_available = instance_types_requested - instance_types_available raise ValidationError( f"The instance type(s) {', '.join(instance_types_not_available)} are not available in region {aws_config['region']}." # noqa E501 ) @@ -1708,13 +1578,9 @@ def build_image(self): """ root_path = pathlib.Path(os.path.abspath(__file__)).parent.parent.parent if not (root_path / "Dockerfile").exists(): - raise RuntimeError( - f"The needs to be run from the root of the repo, found {root_path}" - ) + raise RuntimeError(f"The needs to be run from the root of the repo, found {root_path}") logger.debug("Building docker image") - self.docker_client.images.build( - path=str(root_path), tag=self.docker_image, rm=True - ) + self.docker_client.images.build(path=str(root_path), tag=self.docker_image, rm=True) def push_image(self): """ @@ -1722,22 +1588,16 @@ def push_image(self): """ auth_token = self.ecr.get_authorization_token() dkr_user, dkr_pass = ( - base64.b64decode(auth_token["authorizationData"][0]["authorizationToken"]) - .decode("ascii") - .split(":") + base64.b64decode(auth_token["authorizationData"][0]["authorizationToken"]).decode("ascii").split(":") ) repo_url = self.container_repo["repositoryUri"] registry_url = "https://" + repo_url.split("/")[0] - resp = self.docker_client.login( - username=dkr_user, password=dkr_pass, registry=registry_url - ) + resp = self.docker_client.login(username=dkr_user, password=dkr_pass, registry=registry_url) logger.debug(resp) image = self.docker_client.images.get(self.docker_image) image.tag(repo_url, tag=self.job_identifier) last_status = None - for x in self.docker_client.images.push( - repo_url, tag=self.job_identifier, stream=True - ): + for x in self.docker_client.images.push(repo_url, tag=self.job_identifier, stream=True): try: y = json.loads(x) except json.JSONDecodeError: @@ -1754,9 +1614,7 @@ def clean(self): """ logger.info("Beginning cleanup of AWS resources...") - batch_env = AwsBatchEnv( - self.job_identifier, self.cfg["aws"], self.boto3_session - ) + batch_env = AwsBatchEnv(self.job_identifier, self.cfg["aws"], self.boto3_session) batch_env.clean() sns_env = AwsSNS(self.job_identifier, self.cfg["aws"], self.boto3_session) @@ -1776,9 +1634,7 @@ def run_batch(self): buildstock_csv_filename = self.sampler.run_sampling() # Compress and upload assets to S3 - with tempfile.TemporaryDirectory( - prefix="bsb_" - ) as tmpdir, tempfile.TemporaryDirectory( + with tempfile.TemporaryDirectory(prefix="bsb_") as tmpdir, tempfile.TemporaryDirectory( prefix="bsb_" ) as tmp_weather_dir: # noqa: E501 self._weather_dir = tmp_weather_dir @@ -1805,14 +1661,10 @@ def run_batch(self): os.makedirs(weather_path) # Determine the unique weather files - epw_filenames = list( - filter(lambda x: x.endswith(".epw"), os.listdir(self.weather_dir)) - ) + epw_filenames = list(filter(lambda x: x.endswith(".epw"), os.listdir(self.weather_dir))) logger.debug("Calculating hashes for weather files") epw_hashes = Parallel(n_jobs=-1, verbose=9)( - delayed(calc_hash_for_file)( - pathlib.Path(self.weather_dir) / epw_filename - ) + delayed(calc_hash_for_file)(pathlib.Path(self.weather_dir) / epw_filename) for epw_filename in epw_filenames ) unique_epws = collections.defaultdict(list) @@ -1848,14 +1700,10 @@ def run_batch(self): max_array_size = 10000 n_sims_per_job = math.ceil(n_sims / max_array_size) n_sims_per_job = max(n_sims_per_job, 2) - logger.debug( - "Number of simulations per array job = {}".format(n_sims_per_job) - ) + logger.debug("Number of simulations per array job = {}".format(n_sims_per_job)) baseline_sims = zip(building_ids, itertools.repeat(None)) - upgrade_sims = itertools.product( - building_ids, range(len(self.cfg.get("upgrades", []))) - ) + upgrade_sims = itertools.product(building_ids, range(len(self.cfg.get("upgrades", [])))) all_sims = list(itertools.chain(baseline_sims, upgrade_sims)) random.shuffle(all_sims) all_sims_iter = iter(all_sims) @@ -1888,9 +1736,7 @@ def run_batch(self): with tarfile.open(tmppath / "jobs.tar.gz", "w:gz") as tf: tf.add(jobs_dir, arcname="jobs") tick = time.time() - tick - logger.debug( - "Done compressing job jsons using gz {:.1f} seconds".format(tick) - ) + logger.debug("Done compressing job jsons using gz {:.1f} seconds".format(tick)) shutil.rmtree(jobs_dir) os.makedirs(tmppath / "results" / "simulation_output") @@ -1916,10 +1762,7 @@ def run_batch(self): logger.debug("Copying weather files on S3") bucket = self.cfg["aws"]["s3"]["bucket"] - Parallel(n_jobs=-1, verbose=9)( - delayed(copy_s3_file)(bucket, src, bucket, dest) - for src, dest in epws_to_copy - ) + Parallel(n_jobs=-1, verbose=9)(delayed(copy_s3_file)(bucket, src, bucket, dest) for src, dest in epws_to_copy) # Create the output directories fs = S3FileSystem() @@ -1929,9 +1772,7 @@ def run_batch(self): ) # noqa E501 # Define the batch environment - batch_env = AwsBatchEnv( - self.job_identifier, self.cfg["aws"], self.boto3_session - ) + batch_env = AwsBatchEnv(self.job_identifier, self.cfg["aws"], self.boto3_session) logger.info( "Launching Batch environment - (resource configs will not be updated on subsequent executions, but new job revisions will be created):" # noqa 501 ) @@ -1949,9 +1790,7 @@ def run_batch(self): REGION=self.region, ) - image_url = "{}:{}".format( - self.container_repo["repositoryUri"], self.job_identifier - ) + image_url = "{}:{}".format(self.container_repo["repositoryUri"], self.job_identifier) job_env_cfg = self.cfg["aws"].get("job_environment", {}) batch_env.create_job_definition( @@ -1981,9 +1820,7 @@ def run_batch(self): # start job batch_env.start_state_machine_execution(array_size) - logger.info( - "Batch job submitted. Check your email to subscribe to notifications." - ) + logger.info("Batch job submitted. Check your email to subscribe to notifications.") @classmethod def run_job(cls, job_id, bucket, prefix, job_name, region): @@ -2016,9 +1853,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): jobs_file_path = sim_dir.parent / "jobs.tar.gz" s3.download_file(bucket, f"{prefix}/jobs.tar.gz", str(jobs_file_path)) with tarfile.open(jobs_file_path, "r") as tar_f: - jobs_d = json.load( - tar_f.extractfile(f"jobs/job{job_id:05d}.json"), encoding="utf-8" - ) + jobs_d = json.load(tar_f.extractfile(f"jobs/job{job_id:05d}.json"), encoding="utf-8") logger.debug("Number of simulations = {}".format(len(jobs_d["batch"]))) logger.debug("Getting weather files") @@ -2026,9 +1861,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): os.makedirs(weather_dir, exist_ok=True) # Make a lookup of which parameter points to the weather file from options_lookup.tsv - with open( - sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8" - ) as f: + with open(sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8") as f: tsv_reader = csv.reader(f, delimiter="\t") next(tsv_reader) # skip headers param_name = None @@ -2079,9 +1912,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): sim_id = f"bldg{building_id:07d}up{upgrade_id:02d}" # Create OSW - osw = cls.create_osw( - cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx - ) + osw = cls.create_osw(cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx) with open(os.path.join(sim_dir, "in.osw"), "w") as f: json.dump(osw, f, indent=4) diff --git a/buildstockbatch/aws/awsbase.py b/buildstockbatch/aws/awsbase.py index e4de45b7..fdac12e8 100644 --- a/buildstockbatch/aws/awsbase.py +++ b/buildstockbatch/aws/awsbase.py @@ -64,9 +64,7 @@ def role_stitcher( p_counter = p_counter + 1 for managed_policy_arn in managed_policie_arns: - response = self.iam.attach_role_policy( - PolicyArn=managed_policy_arn, RoleName=role_name - ) + response = self.iam.attach_role_policy(PolicyArn=managed_policy_arn, RoleName=role_name) logger.info(f"Role {role_name} created") @@ -97,9 +95,7 @@ def delete_role(self, role_name): response = self.iam.list_attached_role_policies(RoleName=role_name) for policy in response["AttachedPolicies"]: - self.iam.detach_role_policy( - RoleName=role_name, PolicyArn=policy["PolicyArn"] - ) + self.iam.detach_role_policy(RoleName=role_name, PolicyArn=policy["PolicyArn"]) logger.info(f"Policies detached from role {role_name}.") @@ -117,17 +113,13 @@ def delete_instance_profile(self, instance_profile_name): logger.info(f"Instance profile {instance_profile_name} deleted.") except Exception as e: if "NoSuchEntity" in str(e): - logger.info( - f"Instance profile {instance_profile_name} missing, skipping..." - ) + logger.info(f"Instance profile {instance_profile_name} missing, skipping...") else: raise def remove_role_from_instance_profile(self, instance_profile_name): try: - response = self.iam.get_instance_profile( - InstanceProfileName=instance_profile_name - ) + response = self.iam.get_instance_profile(InstanceProfileName=instance_profile_name) for role in response["InstanceProfile"]["Roles"]: response = self.iam.remove_role_from_instance_profile( @@ -136,9 +128,7 @@ def remove_role_from_instance_profile(self, instance_profile_name): logger.info(f"Roles removed from instance profile {instance_profile_name}") except Exception as e: if "NoSuchEntity" in str(e): - logger.info( - f"Instance profile {instance_profile_name} does not exist. Skipping..." - ) + logger.info(f"Instance profile {instance_profile_name} does not exist. Skipping...") else: raise @@ -161,26 +151,16 @@ def __init__(self, job_identifier, aws_config, boto3_session): self.s3_bucket = aws_config["s3"]["bucket"] self.s3_bucket_arn = f"arn:aws:s3:::{self.s3_bucket}" self.s3_bucket_prefix = aws_config["s3"]["prefix"].rstrip("/") - self.s3_lambda_code_emr_cluster_key = ( - f"{self.s3_bucket_prefix}/lambda_functions/emr_function.py.zip" - ) - self.s3_lambda_emr_config_key = ( - f"{self.s3_bucket_prefix}/lambda_functions/emr_config.json" - ) + self.s3_lambda_code_emr_cluster_key = f"{self.s3_bucket_prefix}/lambda_functions/emr_function.py.zip" + self.s3_lambda_emr_config_key = f"{self.s3_bucket_prefix}/lambda_functions/emr_config.json" self.s3_emr_folder_name = "emr" # EMR emr_config = aws_config.get("emr", {}) - self.emr_manager_instance_type = emr_config.get( - "manager_instance_type", "m5.4xlarge" - ) - self.emr_worker_instance_type = emr_config.get( - "worker_instance_type", "r5.4xlarge" - ) + self.emr_manager_instance_type = emr_config.get("manager_instance_type", "m5.4xlarge") + self.emr_worker_instance_type = emr_config.get("worker_instance_type", "r5.4xlarge") self.emr_worker_instance_count = emr_config.get("worker_instance_count", 4) - self.emr_cluster_security_group_name = ( - f"{self.job_identifier}_emr_security_group" - ) + self.emr_cluster_security_group_name = f"{self.job_identifier}_emr_security_group" self.emr_cluster_name = f"{self.job_identifier}_emr_dask_cluster" self.emr_job_flow_role_name = f"{self.job_identifier}_emr_job_flow_role" self.emr_job_flow_role_arn = "" @@ -191,12 +171,8 @@ def __init__(self, job_identifier, aws_config, boto3_session): self.emr_instance_profile_name = f"{self.job_identifier}_emr_instance_profile" # Lambda - self.lambda_emr_job_step_execution_role = ( - f"{self.job_identifier}_emr_job_step_execution_role" - ) - self.lambda_emr_job_step_function_name = ( - f"{self.job_identifier}_emr_job_step_submission" - ) + self.lambda_emr_job_step_execution_role = f"{self.job_identifier}_emr_job_step_execution_role" + self.lambda_emr_job_step_function_name = f"{self.job_identifier}_emr_job_step_submission" self.lambda_emr_job_step_execution_role_arn = "" # Batch @@ -205,9 +181,7 @@ def __init__(self, job_identifier, aws_config, boto3_session): self.batch_job_queue_name = f"job_queue_{self.job_identifier}" self.batch_service_role_name = f"batch_service_role_{self.job_identifier}" self.batch_instance_role_name = f"batch_instance_role_{self.job_identifier}" - self.batch_instance_profile_name = ( - f"batch_instance_profile_{self.job_identifier}" - ) + self.batch_instance_profile_name = f"batch_instance_profile_{self.job_identifier}" self.batch_spot_service_role_name = f"spot_fleet_role_{self.job_identifier}" self.batch_ecs_task_role_name = f"ecs_task_role_{self.job_identifier}" self.batch_task_policy_name = f"ecs_task_policy_{self.job_identifier}" @@ -219,9 +193,7 @@ def __init__(self, job_identifier, aws_config, boto3_session): self.state_machine_role_name = f"{self.job_identifier}_state_machine_role" # SNS - self.sns_state_machine_topic = ( - f"{self.job_identifier}_state_machine_notifications" - ) + self.sns_state_machine_topic = f"{self.job_identifier}_state_machine_notifications" # VPC self.vpc_name = self.job_identifier diff --git a/buildstockbatch/aws/s3_assets/bsb_post.py b/buildstockbatch/aws/s3_assets/bsb_post.py index 5449dada..c1bade48 100644 --- a/buildstockbatch/aws/s3_assets/bsb_post.py +++ b/buildstockbatch/aws/s3_assets/bsb_post.py @@ -53,13 +53,9 @@ def do_postprocessing(s3_bucket, s3_bucket_prefix): tbl_prefix = s3_bucket_prefix.split("/")[-1] if not tbl_prefix: tbl_prefix = cfg["aws"]["job_identifier"] - create_athena_tables( - aws_conf, tbl_prefix, s3_bucket, f"{s3_bucket_prefix}/results/parquet" - ) + create_athena_tables(aws_conf, tbl_prefix, s3_bucket, f"{s3_bucket_prefix}/results/parquet") - keep_individual_timeseries = cfg.get("postprocessing", {}).get( - "keep_individual_timeseries", False - ) + keep_individual_timeseries = cfg.get("postprocessing", {}).get("keep_individual_timeseries", False) remove_intermediate_files(fs, results_s3_loc, keep_individual_timeseries) diff --git a/buildstockbatch/base.py b/buildstockbatch/base.py index 97fe0d66..bf45f2e5 100644 --- a/buildstockbatch/base.py +++ b/buildstockbatch/base.py @@ -60,37 +60,26 @@ def __init__(self, project_filename): self.buildstock_dir = self.cfg["buildstock_directory"] if not os.path.isdir(self.buildstock_dir): - raise FileNotFoundError( - f"buildstock_directory = {self.buildstock_dir} is not a directory." - ) - self.project_dir = os.path.join( - self.buildstock_dir, self.cfg["project_directory"] - ) + raise FileNotFoundError(f"buildstock_directory = {self.buildstock_dir} is not a directory.") + self.project_dir = os.path.join(self.buildstock_dir, self.cfg["project_directory"]) if not os.path.isdir(self.project_dir): - raise FileNotFoundError( - f"project_directory = {self.project_dir} is not a directory." - ) + raise FileNotFoundError(f"project_directory = {self.project_dir} is not a directory.") # Load in OS_VERSION and OS_SHA arguments if they exist in the YAML, # otherwise use defaults specified here. self.os_version = self.cfg.get("os_version", self.DEFAULT_OS_VERSION) self.os_sha = self.cfg.get("os_sha", self.DEFAULT_OS_SHA) - logger.debug( - f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}" - ) + logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}") @staticmethod def get_sampler_class(sampler_name): - sampler_class_name = ( - "".join(x.capitalize() for x in sampler_name.strip().split("_")) + "Sampler" - ) + sampler_class_name = "".join(x.capitalize() for x in sampler_name.strip().split("_")) + "Sampler" return getattr(sampler, sampler_class_name) @staticmethod def get_workflow_generator_class(workflow_generator_name): workflow_generator_class_name = ( - "".join(x.capitalize() for x in workflow_generator_name.strip().split("_")) - + "WorkflowGenerator" + "".join(x.capitalize() for x in workflow_generator_name.strip().split("_")) + "WorkflowGenerator" ) return getattr(workflow_generator, workflow_generator_class_name) @@ -123,9 +112,7 @@ def _get_weather_files(self): f.write(chunk) f.seek(0) with zipfile.ZipFile(f, "r") as zf: - logger.debug( - "Extracting weather files to: {}".format(self.weather_dir) - ) + logger.debug("Extracting weather files to: {}".format(self.weather_dir)) zf.extractall(self.weather_dir) @property @@ -147,12 +134,8 @@ def skip_baseline_sims(self): @classmethod def get_reporting_measures(cls, cfg): - WorkflowGenerator = cls.get_workflow_generator_class( - cfg["workflow_generator"]["type"] - ) - wg = WorkflowGenerator( - cfg, 1 - ) # Number of datapoints doesn't really matter here + WorkflowGenerator = cls.get_workflow_generator_class(cfg["workflow_generator"]["type"]) + wg = WorkflowGenerator(cfg, 1) # Number of datapoints doesn't really matter here return wg.reporting_measures() def run_batch(self): @@ -160,9 +143,7 @@ def run_batch(self): @classmethod def create_osw(cls, cfg, n_datapoints, *args, **kwargs): - WorkflowGenerator = cls.get_workflow_generator_class( - cfg["workflow_generator"]["type"] - ) + WorkflowGenerator = cls.get_workflow_generator_class(cfg["workflow_generator"]["type"]) osw_generator = WorkflowGenerator(cfg, n_datapoints) return osw_generator.create_osw(*args, **kwargs) @@ -185,9 +166,7 @@ def make_sim_dir(building_id, upgrade_idx, base_dir, overwrite_existing=False): sim_dir, ) elif os.path.exists(os.path.join(sim_dir, "run", "failed.job")): - raise SimulationExists( - "{} exists and failed".format(sim_id), sim_id, sim_dir - ) + raise SimulationExists("{} exists and failed".format(sim_id), sim_id, sim_dir) else: shutil.rmtree(sim_dir) @@ -233,21 +212,13 @@ def cleanup_sim_dir(sim_dir, dest_fs, simout_ts_dir, upgrade_id, building_id): if os.path.isfile(timeseries_filepath): # Find the time columns present in the enduse_timeseries file possible_time_cols = ["time", "Time", "TimeDST", "TimeUTC"] - cols = read_csv( - timeseries_filepath, index_col=False, nrows=0 - ).columns.tolist() + cols = read_csv(timeseries_filepath, index_col=False, nrows=0).columns.tolist() actual_time_cols = [c for c in cols if c in possible_time_cols] if not actual_time_cols: - logger.error( - f"Did not find any time column ({possible_time_cols}) in {timeseries_filepath}." - ) - raise RuntimeError( - f"Did not find any time column ({possible_time_cols}) in {timeseries_filepath}." - ) + logger.error(f"Did not find any time column ({possible_time_cols}) in {timeseries_filepath}.") + raise RuntimeError(f"Did not find any time column ({possible_time_cols}) in {timeseries_filepath}.") - tsdf = read_csv( - timeseries_filepath, parse_dates=actual_time_cols, skiprows=skiprows - ) + tsdf = read_csv(timeseries_filepath, parse_dates=actual_time_cols, skiprows=skiprows) if os.path.isfile(schedules_filepath): schedules = read_csv(schedules_filepath, dtype=np.float64) schedules.rename(columns=lambda x: f"schedules_{x}", inplace=True) @@ -315,9 +286,7 @@ def get_buildstock_dir(project_file, cfg): if os.path.isabs(buildstock_dir): return os.path.abspath(buildstock_dir) else: - return os.path.abspath( - os.path.join(os.path.dirname(project_file), buildstock_dir) - ) + return os.path.abspath(os.path.join(os.path.dirname(project_file), buildstock_dir)) @classmethod def validate_openstudio_path(cls, project_file): @@ -333,14 +302,10 @@ def validate_openstudio_path(cls, project_file): except FileNotFoundError: raise ValidationError(f"Cannot find openstudio at `{cls.openstudio_exe()}`") if proc_out.returncode != 0: - raise ValidationError( - f"OpenStudio failed with the following error {proc_out.stderr}" - ) + raise ValidationError(f"OpenStudio failed with the following error {proc_out.stderr}") actual_os_version, actual_os_sha = proc_out.stdout.strip().split("+") if os_version != actual_os_version: - raise ValidationError( - f"OpenStudio version is {actual_os_version}, expected is {os_version}" - ) + raise ValidationError(f"OpenStudio version is {actual_os_version}, expected is {os_version}") if os_sha != actual_os_sha: raise ValidationError( f"OpenStudio version is correct at {os_version}, but the shas don't match. " @@ -365,9 +330,7 @@ def validate_sampler(project_file): else: sample_file = os.path.abspath(sample_file) buildstock_df = read_csv(sample_file, dtype=str) - return BuildStockBatchBase.validate_buildstock_csv( - project_file, buildstock_df - ) + return BuildStockBatchBase.validate_buildstock_csv(project_file, buildstock_df) return True @staticmethod @@ -380,9 +343,7 @@ def validate_buildstock_csv(project_file, buildstock_df): if column in {"Building"}: continue if column not in param_option_dict: - errors.append( - f"Column {column} in buildstock_csv is not available in options_lookup.tsv" - ) + errors.append(f"Column {column} in buildstock_csv is not available in options_lookup.tsv") continue if "*" in param_option_dict[column]: continue # skip validating options when wildcard is present @@ -400,22 +361,16 @@ def validate_buildstock_csv(project_file, buildstock_df): @classmethod def validate_workflow_generator(cls, project_file): cfg = get_project_configuration(project_file) - WorkflowGenerator = cls.get_workflow_generator_class( - cfg["workflow_generator"]["type"] - ) + WorkflowGenerator = cls.get_workflow_generator_class(cfg["workflow_generator"]["type"]) return WorkflowGenerator.validate(cfg) @staticmethod def validate_project_schema(project_file): cfg = get_project_configuration(project_file) schema_version = cfg.get("schema_version") - version_schema = os.path.join( - os.path.dirname(__file__), "schemas", f"v{schema_version}.yaml" - ) + version_schema = os.path.join(os.path.dirname(__file__), "schemas", f"v{schema_version}.yaml") if not os.path.isfile(version_schema): - logger.error( - f"Could not find validation schema for YAML version {schema_version}" - ) + logger.error(f"Could not find validation schema for YAML version {schema_version}") raise FileNotFoundError(version_schema) schema = yamale.make_schema(version_schema) data = yamale.make_data(project_file, parser="ruamel") @@ -435,9 +390,7 @@ def validate_postprocessing_spec(project_file): partition_cols = cfg.get("postprocessing", {}).get("partition_columns", []) invalid_cols = [c for c in partition_cols if c not in param_option_dict.keys()] if invalid_cols: - raise ValidationError( - f"The following partition columns are not valid: {invalid_cols}" - ) + raise ValidationError(f"The following partition columns are not valid: {invalid_cols}") return True @staticmethod @@ -447,12 +400,8 @@ def validate_xor_nor_schema_keys(project_file): if int(major) >= 0: if int(minor) >= 0: # xor - if ("weather_files_url" in cfg.keys()) is ( - "weather_files_path" in cfg.keys() - ): - raise ValidationError( - "Both/neither weather_files_url and weather_files_path found in yaml root" - ) + if ("weather_files_url" in cfg.keys()) is ("weather_files_path" in cfg.keys()): + raise ValidationError("Both/neither weather_files_url and weather_files_path found in yaml root") return True @@ -467,9 +416,7 @@ def get_param_option_dict(project_file): try: with open(options_lookup_path, "r") as f: options = csv.DictReader(f, delimiter="\t") - invalid_options_lookup_str = ( - "" # Holds option/parameter names with invalid characters - ) + invalid_options_lookup_str = "" # Holds option/parameter names with invalid characters for row in options: for col in ["Parameter Name", "Option Name"]: invalid_chars = set(row[col]).intersection(set("|&()")) @@ -479,16 +426,9 @@ def get_param_option_dict(project_file): param_name, opt_name = row["Parameter Name"], row["Option Name"] param_option_dict[row["Parameter Name"]].add(row["Option Name"]) if opt_name == "*" and row["Measure Dir"]: - invalid_options_lookup_str += ( - f"{param_name}: '*' cannot pass arguments to measure.\n" - ) - if ( - "*" in param_option_dict[param_name] - and len(param_option_dict[param_name]) > 1 - ): - invalid_options_lookup_str += ( - f"{param_name}: '*' cannot be mixed with other options\n" - ) + invalid_options_lookup_str += f"{param_name}: '*' cannot pass arguments to measure.\n" + if "*" in param_option_dict[param_name] and len(param_option_dict[param_name]) > 1: + invalid_options_lookup_str += f"{param_name}: '*' cannot be mixed with other options\n" except FileNotFoundError as err: logger.error(f"Options lookup file not found at: '{options_lookup_path}'") raise err @@ -518,9 +458,7 @@ def get_errors(source_str, option_str): if not returns error message, close matches, and specifies where the error occurred (source_str) """ if "||" in option_str and "&&" in option_str: - invalid_option_spec_counter[ - (option_str, "has both || and && (not supported)") - ] += 1 + invalid_option_spec_counter[(option_str, "has both || and && (not supported)")] += 1 return "" if "||" in option_str or "&&" in option_str: @@ -528,9 +466,7 @@ def get_errors(source_str, option_str): errors = "" broken_options = option_str.split(splitter) if broken_options[-1] == "": - invalid_option_spec_counter[ - (option_str, "has trailing 'splitter'") - ] += 1 + invalid_option_spec_counter[(option_str, "has trailing 'splitter'")] += 1 return "" for broken_option_str in broken_options: new_source_str = source_str + f" in composite option '{option_str}'" @@ -552,21 +488,15 @@ def get_errors(source_str, option_str): return "" if parameter_name not in param_option_dict: - close_match = difflib.get_close_matches( - parameter_name, param_option_dict.keys(), 1 - ) + close_match = difflib.get_close_matches(parameter_name, param_option_dict.keys(), 1) close_match = close_match[0] if close_match else "" invalid_param_counter[(parameter_name, close_match)] += 1 return "" if not option_name or option_name not in param_option_dict[parameter_name]: - close_match = difflib.get_close_matches( - option_name, list(param_option_dict[parameter_name]), 1 - ) + close_match = difflib.get_close_matches(option_name, list(param_option_dict[parameter_name]), 1) close_match = close_match[0] if close_match else "" - invalid_option_counter_dict[parameter_name][ - (option_name, close_match) - ] += 1 + invalid_option_counter_dict[parameter_name][(option_name, close_match)] += 1 return "" return "" @@ -586,62 +516,38 @@ def get_all_option_str(source_str, inp): return [(source_str, inp)] elif type(inp) == list: return sum( - [ - get_all_option_str(source_str + f", in entry {count}", entry) - for count, entry in enumerate(inp) - ], + [get_all_option_str(source_str + f", in entry {count}", entry) for count, entry in enumerate(inp)], [], ) elif type(inp) == dict: if len(inp) > 1: - raise ValidationError( - f"{source_str} the logic is malformed. Dict can't have more than one entry" - ) + raise ValidationError(f"{source_str} the logic is malformed. Dict can't have more than one entry") source_str += f", in {list(inp.keys())[0]}" - return sum( - [get_all_option_str(source_str, i) for i in inp.values()], [] - ) + return sum([get_all_option_str(source_str, i) for i in inp.values()], []) # store all of the option_str in the project file as a list of (source_str, option_str) tuple source_option_str_list = [] if "upgrades" in cfg: for upgrade_count, upgrade in enumerate(cfg["upgrades"]): - upgrade_name = ( - upgrade.get("upgrade_name", "") - + f" (Upgrade Number: {upgrade_count})" - ) + upgrade_name = upgrade.get("upgrade_name", "") + f" (Upgrade Number: {upgrade_count})" source_str_upgrade = f"In upgrade '{upgrade_name}'" for option_count, option in enumerate(upgrade["options"]): - option_name = ( - option.get("option", "") + f" (Option Number: {option_count})" - ) - source_str_option = ( - source_str_upgrade + f", in option '{option_name}'" - ) - source_option_str_list.append( - (source_str_option, option.get("option")) - ) + option_name = option.get("option", "") + f" (Option Number: {option_count})" + source_str_option = source_str_upgrade + f", in option '{option_name}'" + source_option_str_list.append((source_str_option, option.get("option"))) if "apply_logic" in option: source_str_logic = source_str_option + ", in apply_logic" - source_option_str_list += get_all_option_str( - source_str_logic, option["apply_logic"] - ) + source_option_str_list += get_all_option_str(source_str_logic, option["apply_logic"]) if "package_apply_logic" in upgrade: source_str_package = source_str_upgrade + ", in package_apply_logic" - source_option_str_list += get_all_option_str( - source_str_package, upgrade["package_apply_logic"] - ) + source_option_str_list += get_all_option_str(source_str_package, upgrade["package_apply_logic"]) # TODO: refactor this into Sampler.validate_args if "downselect" in cfg or "downselect" in cfg.get("sampler", {}).get("type"): source_str = "In downselect" - logic = ( - cfg["downselect"]["logic"] - if "downselect" in cfg - else cfg["sampler"]["args"]["logic"] - ) + logic = cfg["downselect"]["logic"] if "downselect" in cfg else cfg["sampler"]["args"]["logic"] source_option_str_list += get_all_option_str(source_str, logic) # Gather all the errors in the option_str, if any @@ -650,11 +556,7 @@ def get_all_option_str(source_str, inp): error_message += get_errors(source_str, option_str) if error_message: - error_message = ( - "Following option/parameter entries have problem:\n" - + error_message - + "\n" - ) + error_message = "Following option/parameter entries have problem:\n" + error_message + "\n" if invalid_option_spec_counter: error_message += "* Following option/parameter entries have problem:\n" @@ -662,9 +564,7 @@ def get_all_option_str(source_str, inp): error_message += f" '{invalid_entry}' {error} - used '{count}' times\n" if invalid_param_counter: - error_message += ( - "* Following parameters do not exist in options_lookup.tsv\n" - ) + error_message += "* Following parameters do not exist in options_lookup.tsv\n" for (param, close_match), count in invalid_param_counter.items(): error_message += f" '{param}' - used '{count}' times." if close_match: @@ -736,9 +636,7 @@ def get_logic_problems(logic, parent=None): assert len(logic) == 1 for key, val in logic.items(): if key not in ["or", "and", "not"]: - raise ValidationError( - f"Invalid key {key}. Only 'or', 'and' and 'not' is allowed." - ) + raise ValidationError(f"Invalid key {key}. Only 'or', 'and' and 'not' is allowed.") return get_logic_problems(val, parent=key) elif isinstance(logic, str): if "&&" not in logic: @@ -746,28 +644,19 @@ def get_logic_problems(logic, parent=None): entries = logic.split("&&") return get_logic_problems(entries, parent="&&") else: - raise ValidationError( - f"Invalid logic element {logic} with type {type(logic)}" - ) + raise ValidationError(f"Invalid logic element {logic} with type {type(logic)}") all_problems = [] if "upgrades" in cfg: for upgrade_count, upgrade in enumerate(cfg["upgrades"]): upgrade_name = upgrade.get("upgrade_name", "") - source_str_upgrade = ( - f"upgrade '{upgrade_name}' (Upgrade Number:{upgrade_count})" - ) + source_str_upgrade = f"upgrade '{upgrade_name}' (Upgrade Number:{upgrade_count})" for option_count, option in enumerate(upgrade["options"]): option_name = option.get("option", "") - source_str_option = ( - source_str_upgrade - + f", option '{option_name}' (Option Number:{option_count})" - ) + source_str_option = source_str_upgrade + f", option '{option_name}' (Option Number:{option_count})" if "apply_logic" in option: if problems := get_logic_problems(option["apply_logic"]): - all_problems.append( - (source_str_option, problems, option["apply_logic"]) - ) + all_problems.append((source_str_option, problems, option["apply_logic"])) if "package_apply_logic" in upgrade: source_str_package = source_str_upgrade + ", in package_apply_logic" @@ -783,11 +672,7 @@ def get_logic_problems(logic, parent=None): # TODO: refactor this into Sampler.validate_args if "downselect" in cfg or "downselect" in cfg.get("sampler", {}).get("type"): source_str = "in downselect logic" - logic = ( - cfg["downselect"]["logic"] - if "downselect" in cfg - else cfg["sampler"]["args"]["logic"] - ) + logic = cfg["downselect"]["logic"] if "downselect" in cfg else cfg["sampler"]["args"]["logic"] if problems := get_logic_problems(logic): all_problems.append((source_str, problems, logic)) @@ -835,10 +720,7 @@ def get_errors(source_str, measure_str): """ if measure_str not in measure_dirs: closest = difflib.get_close_matches(measure_str, list(measure_dirs)) - return ( - f"Measure directory {measure_str} not found. Closest matches: {closest}" - f" {source_str}\n" - ) + return f"Measure directory {measure_str} not found. Closest matches: {closest}" f" {source_str}\n" return "" source_measures_str_list = [] @@ -855,9 +737,7 @@ def get_errors(source_str, measure_str): if not error_message: return True else: - error_message = ( - "Measure name(s)/directory(ies) is(are) invalid. \n" + error_message - ) + error_message = "Measure name(s)/directory(ies) is(are) invalid. \n" + error_message logger.error(error_message) raise ValidationError(error_message) @@ -900,9 +780,7 @@ def validate_resstock_or_comstock_version(project_file): """ cfg = get_project_configuration(project_file) - buildstock_rb = os.path.join( - cfg["buildstock_directory"], "resources/buildstock.rb" - ) + buildstock_rb = os.path.join(cfg["buildstock_directory"], "resources/buildstock.rb") if os.path.exists(buildstock_rb): with open(buildstock_rb, "r") as f: versions = dict( @@ -939,9 +817,7 @@ def validate_number_of_options(project_file): :rtype: bool """ cfg = get_project_configuration(project_file) - measure_xml_filename = os.path.join( - cfg["buildstock_directory"], "measures", "ApplyUpgrade", "measure.xml" - ) + measure_xml_filename = os.path.join(cfg["buildstock_directory"], "measures", "ApplyUpgrade", "measure.xml") if os.path.exists(measure_xml_filename): measure_xml_tree = objectify.parse(measure_xml_filename) measure_xml = measure_xml_tree.getroot() @@ -952,14 +828,10 @@ def validate_number_of_options(project_file): if m_option: option_number = int(m_option.group(1)) n_options_in_measure = max(option_number, n_options_in_measure) - m_costs = re.match( - r"^option_(\d+)_cost_(\d+)_value", str(argument.name) - ) + m_costs = re.match(r"^option_(\d+)_cost_(\d+)_value", str(argument.name)) if m_costs: cost_number = int(m_costs.group(2)) - n_costs_per_option_in_measure = max( - cost_number, n_costs_per_option_in_measure - ) + n_costs_per_option_in_measure = max(cost_number, n_costs_per_option_in_measure) n_options_in_cfg = 0 n_costs_in_cfg = 0 for upgrade in cfg.get("upgrades", []): @@ -1025,32 +897,18 @@ def process_results(self, skip_combine=False, force_upload=False): self.get_dask_client() # noqa: F841 if self.cfg["workflow_generator"]["type"] == "residential_hpxml": - if ( - "simulation_output_report" - in self.cfg["workflow_generator"]["args"].keys() - ): - if ( - "timeseries_frequency" - in self.cfg["workflow_generator"]["args"][ - "simulation_output_report" - ].keys() - ): + if "simulation_output_report" in self.cfg["workflow_generator"]["args"].keys(): + if "timeseries_frequency" in self.cfg["workflow_generator"]["args"]["simulation_output_report"].keys(): do_timeseries = ( - self.cfg["workflow_generator"]["args"][ - "simulation_output_report" - ]["timeseries_frequency"] + self.cfg["workflow_generator"]["args"]["simulation_output_report"]["timeseries_frequency"] != "none" ) else: - do_timeseries = ( - "timeseries_csv_export" in self.cfg["workflow_generator"]["args"].keys() - ) + do_timeseries = "timeseries_csv_export" in self.cfg["workflow_generator"]["args"].keys() fs = LocalFileSystem() if not skip_combine: - postprocessing.combine_results( - fs, self.results_dir, self.cfg, do_timeseries=do_timeseries - ) + postprocessing.combine_results(fs, self.results_dir, self.cfg, do_timeseries=do_timeseries) aws_conf = self.cfg.get("postprocessing", {}).get("aws", {}) if "s3" in aws_conf or force_upload: @@ -1058,13 +916,7 @@ def process_results(self, skip_combine=False, force_upload=False): aws_conf, self.output_dir, self.results_dir, self.sampler.csv_path ) if "athena" in aws_conf: - postprocessing.create_athena_tables( - aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix - ) + postprocessing.create_athena_tables(aws_conf, os.path.basename(self.output_dir), s3_bucket, s3_prefix) - keep_individual_timeseries = self.cfg.get("postprocessing", {}).get( - "keep_individual_timeseries", False - ) - postprocessing.remove_intermediate_files( - fs, self.results_dir, keep_individual_timeseries - ) + keep_individual_timeseries = self.cfg.get("postprocessing", {}).get("keep_individual_timeseries", False) + postprocessing.remove_intermediate_files(fs, self.results_dir, keep_individual_timeseries) diff --git a/buildstockbatch/eagle.py b/buildstockbatch/eagle.py index f5ee0b43..b6ce74eb 100644 --- a/buildstockbatch/eagle.py +++ b/buildstockbatch/eagle.py @@ -76,9 +76,7 @@ def __init__(self, project_filename): logger.debug("Output directory = {}".format(output_dir)) weather_dir = self.weather_dir # noqa E841 - self.singularity_image = self.get_singularity_image( - self.cfg, self.os_version, self.os_sha - ) + self.singularity_image = self.get_singularity_image(self.cfg, self.os_version, self.os_sha) @classmethod def validate_project(cls, project_file): @@ -95,8 +93,7 @@ def validate_output_directory_eagle(cls, project_file): output_dir = path_rel_to_file(project_file, cfg["output_directory"]) if not re.match(r"/(lustre/eaglefs/)?(scratch|projects)", output_dir): raise ValidationError( - f"`output_directory` must be in /scratch or /projects," - f" `output_directory` = {output_dir}" + f"`output_directory` must be in /scratch or /projects," f" `output_directory` = {output_dir}" ) @classmethod @@ -108,15 +105,11 @@ def validate_singularity_image_eagle(cls, project_file): cfg.get("os_sha", cls.DEFAULT_OS_SHA), ) if not os.path.exists(singularity_image): - raise ValidationError( - f"The singularity image does not exist: {singularity_image}" - ) + raise ValidationError(f"The singularity image does not exist: {singularity_image}") @property def output_dir(self): - output_dir = path_rel_to_file( - self.project_filename, self.cfg["output_directory"] - ) + output_dir = path_rel_to_file(self.project_filename, self.cfg["output_directory"]) return output_dir @property @@ -135,9 +128,7 @@ def clear_and_copy_dir(src, dst): def get_singularity_image(cls, cfg, os_version, os_sha): return os.path.join( cfg.get("sys_image_dir", cls.DEFAULT_SYS_IMAGE_DIR), - "OpenStudio-{ver}.{sha}-Singularity.simg".format( - ver=os_version, sha=os_sha - ), + "OpenStudio-{ver}.{sha}-Singularity.simg".format(ver=os_version, sha=os_sha), ) @property @@ -150,12 +141,7 @@ def weather_dir(self): def run_batch(self, sampling_only=False): # Create simulation_output dir - sim_out_ts_dir = ( - pathlib.Path(self.output_dir) - / "results" - / "simulation_output" - / "timeseries" - ) + sim_out_ts_dir = pathlib.Path(self.output_dir) / "results" / "simulation_output" / "timeseries" os.makedirs(sim_out_ts_dir, exist_ok=True) for i in range(0, len(self.cfg.get("upgrades", [])) + 1): os.makedirs(sim_out_ts_dir / f"up{i:02d}") @@ -165,9 +151,7 @@ def run_batch(self, sampling_only=False): destination_dir = os.path.dirname(self.sampler.csv_path) if os.path.exists(destination_dir): shutil.rmtree(destination_dir) - shutil.copytree( - os.path.join(self.project_dir, "housing_characteristics"), destination_dir - ) + shutil.copytree(os.path.join(self.project_dir, "housing_characteristics"), destination_dir) logger.debug("Housing characteristics copied.") # run sampling @@ -197,9 +181,7 @@ def run_batch(self, sampling_only=False): # larger than we need, now that we know n_sims n_sims_per_job = max(n_sims_per_job, self.min_sims_per_job) - upgrade_sims = itertools.product( - building_ids, range(len(self.cfg.get("upgrades", []))) - ) + upgrade_sims = itertools.product(building_ids, range(len(self.cfg.get("upgrades", [])))) if not self.skip_baseline_sims: # create batches of simulations baseline_sims = zip(building_ids, itertools.repeat(None)) @@ -214,9 +196,7 @@ def run_batch(self, sampling_only=False): if not batch: break logger.info("Queueing job {} ({} simulations)".format(i, len(batch))) - job_json_filename = os.path.join( - self.output_dir, "job{:03d}.json".format(i) - ) + job_json_filename = os.path.join(self.output_dir, "job{:03d}.json".format(i)) with open(job_json_filename, "w") as f: json.dump( { @@ -244,9 +224,7 @@ def run_job_batch(self, job_array_number): pathlib.Path(self.buildstock_dir) / "measures", self.local_buildstock_dir / "measures", ) - if os.path.exists( - pathlib.Path(self.buildstock_dir) / "resources/hpxml-measures" - ): + if os.path.exists(pathlib.Path(self.buildstock_dir) / "resources/hpxml-measures"): self.clear_and_copy_dir( pathlib.Path(self.buildstock_dir) / "resources/hpxml-measures", self.local_buildstock_dir / "resources/hpxml-measures", @@ -261,9 +239,7 @@ def run_job_batch(self, job_array_number): shutil.copy2(self.singularity_image, self.local_singularity_img) # Run the job batch as normal - job_json_filename = os.path.join( - self.output_dir, "job{:03d}.json".format(job_array_number) - ) + job_json_filename = os.path.join(self.output_dir, "job{:03d}.json".format(job_array_number)) with open(job_json_filename, "r") as f: args = json.load(f) @@ -281,18 +257,12 @@ def run_job_batch(self, job_array_number): df.to_csv(buildstock_csv_path, index=False) logger.debug(f"Buildstock.csv trimmed to {len(df)} rows.") - traceback_file_path = ( - self.local_output_dir - / "simulation_output" - / f"traceback{job_array_number}.out" - ) + traceback_file_path = self.local_output_dir / "simulation_output" / f"traceback{job_array_number}.out" @delayed def run_building_d(i, upgrade_idx): try: - return self.run_building( - self.output_dir, self.cfg, args["n_datapoints"], i, upgrade_idx - ) + return self.run_building(self.output_dir, self.cfg, args["n_datapoints"], i, upgrade_idx) except Exception: with open(traceback_file_path, "a") as f: txt = get_error_details() @@ -319,9 +289,7 @@ def run_building_d(i, upgrade_idx): # Compress simulation results if self.cfg.get("max_minutes_per_sim") is not None: time.sleep(60) # Allow results JSON to finish writing - simout_filename = ( - lustre_sim_out_dir / f"simulations_job{job_array_number}.tar.gz" - ) + simout_filename = lustre_sim_out_dir / f"simulations_job{job_array_number}.tar.gz" logger.info(f"Compressing simulation outputs to {simout_filename}") local_sim_out_dir = self.local_output_dir / "simulation_output" subprocess.run( @@ -350,16 +318,12 @@ def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 try: - sim_id, sim_dir = cls.make_sim_dir( - i, upgrade_idx, os.path.join(cls.local_output_dir, "simulation_output") - ) + sim_id, sim_dir = cls.make_sim_dir(i, upgrade_idx, os.path.join(cls.local_output_dir, "simulation_output")) except SimulationExists as ex: sim_dir = ex.sim_dir else: # Generate the osw for this simulation - osw = cls.create_osw( - cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx - ) + osw = cls.create_osw(cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx) with open(os.path.join(sim_dir, "in.osw"), "w") as f: json.dump(osw, f, indent=4) @@ -370,9 +334,7 @@ def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): ] # Create a temporary directory for the simulation to use - with tempfile.TemporaryDirectory( - dir=cls.local_scratch, prefix=f"{sim_id}_" - ) as tmpdir: + with tempfile.TemporaryDirectory(dir=cls.local_scratch, prefix=f"{sim_id}_") as tmpdir: # Build the command to instantiate and configure the singularity container the simulation is run inside local_resources_dir = cls.local_buildstock_dir / "resources" args = [ @@ -395,24 +357,12 @@ def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): for src in dirs_to_mount: container_mount = "/" + os.path.basename(src) args.extend(["-B", "{}:{}:ro".format(src, container_mount)]) - container_symlink = os.path.join( - "/var/simdata/openstudio", os.path.basename(src) - ) - runscript.append( - "ln -s {} {}".format( - *map(shlex.quote, (container_mount, container_symlink)) - ) - ) + container_symlink = os.path.join("/var/simdata/openstudio", os.path.basename(src)) + runscript.append("ln -s {} {}".format(*map(shlex.quote, (container_mount, container_symlink)))) - if os.path.exists( - os.path.join(cls.local_buildstock_dir, "resources/hpxml-measures") - ): - runscript.append( - "ln -s /resources /var/simdata/openstudio/resources" - ) - src = os.path.join( - cls.local_buildstock_dir, "resources/hpxml-measures" - ) + if os.path.exists(os.path.join(cls.local_buildstock_dir, "resources/hpxml-measures")): + runscript.append("ln -s /resources /var/simdata/openstudio/resources") + src = os.path.join(cls.local_buildstock_dir, "resources/hpxml-measures") container_mount = "/resources/hpxml-measures" args.extend(["-B", "{}:{}:ro".format(src, container_mount)]) @@ -464,30 +414,18 @@ def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): "timeout": msg, } out_osw.write(json.dumps(out_msg, indent=3)) - with open( - os.path.join(sim_dir, "run", "out.osw"), "a" - ) as run_log: - run_log.write( - f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}" - ) - with open( - os.path.join(sim_dir, "run", "failed.job"), "w" - ) as failed_job: - failed_job.write( - f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}" - ) - time.sleep( - 60 - ) # Wait for EnergyPlus to release file locks and data_point.zip to finish + with open(os.path.join(sim_dir, "run", "out.osw"), "a") as run_log: + run_log.write(f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}") + with open(os.path.join(sim_dir, "run", "failed.job"), "w") as failed_job: + failed_job.write(f"[{end_time.strftime('%H:%M:%S')} ERROR] {msg}") + time.sleep(60) # Wait for EnergyPlus to release file locks and data_point.zip to finish except subprocess.CalledProcessError: pass finally: # Clean up the symbolic links we created in the container for mount_dir in dirs_to_mount + [os.path.join(sim_dir, "lib")]: try: - os.unlink( - os.path.join(sim_dir, os.path.basename(mount_dir)) - ) + os.unlink(os.path.join(sim_dir, os.path.basename(mount_dir))) except FileNotFoundError: pass @@ -501,9 +439,7 @@ def run_building(cls, output_dir, cfg, n_datapoints, i, upgrade_idx=None): ) reporting_measures = cls.get_reporting_measures(cfg) - dpout = postprocessing.read_simulation_outputs( - fs, reporting_measures, sim_dir, upgrade_id, i - ) + dpout = postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, i) return dpout def queue_jobs(self, array_ids=None, hipri=False): @@ -531,9 +467,7 @@ def queue_jobs(self, array_ids=None, hipri=False): # Estimate the wall time in minutes cores_per_node = 36 minutes_per_sim = eagle_cfg["minutes_per_sim"] - walltime = math.ceil( - math.ceil(n_sims_per_job / cores_per_node) * minutes_per_sim - ) + walltime = math.ceil(math.ceil(n_sims_per_job / cores_per_node) * minutes_per_sim) # Queue up simulations here = os.path.dirname(os.path.abspath(__file__)) @@ -584,14 +518,10 @@ def queue_post_processing(self, after_jobids=[], upload_only=False, hipri=False) # Configuration values account = self.cfg["eagle"]["account"] walltime = self.cfg["eagle"].get("postprocessing", {}).get("time", "1:30:00") - memory = ( - self.cfg["eagle"].get("postprocessing", {}).get("node_memory_mb", 85248) - ) + memory = self.cfg["eagle"].get("postprocessing", {}).get("node_memory_mb", 85248) n_procs = self.cfg["eagle"].get("postprocessing", {}).get("n_procs", 18) n_workers = self.cfg["eagle"].get("postprocessing", {}).get("n_workers", 2) - print( - f"Submitting job to {n_workers} {memory}MB memory nodes using {n_procs} cores in each." - ) + print(f"Submitting job to {n_workers} {memory}MB memory nodes using {n_procs} cores in each.") # Throw an error if the files already exist. if not upload_only: @@ -614,8 +544,7 @@ def queue_post_processing(self, after_jobids=[], upload_only=False, hipri=False) last_mod_date = dt.datetime.fromtimestamp(os.path.getmtime(filepath)) shutil.move( filepath, - filepath.parent - / f"{filepath.stem}_{last_mod_date:%Y%m%d%H%M}{filepath.suffix}", + filepath.parent / f"{filepath.stem}_{last_mod_date:%Y%m%d%H%M}{filepath.suffix}", ) env = {} @@ -668,22 +597,14 @@ def get_dask_client(self): cluster = LocalCluster(local_directory="/data/dask-tmp") return Client(cluster) else: - return Client( - scheduler_file=os.path.join(self.output_dir, "dask_scheduler.json") - ) + return Client(scheduler_file=os.path.join(self.output_dir, "dask_scheduler.json")) def process_results(self, *args, **kwargs): # Check that all the jobs succeeded before proceeding failed_job_array_ids = self.get_failed_job_array_ids() if failed_job_array_ids: - logger.error( - "The following simulation jobs failed: {}".format( - ", ".join(map(str, failed_job_array_ids)) - ) - ) - logger.error( - "Please inspect those jobs and fix any problems before resubmitting." - ) + logger.error("The following simulation jobs failed: {}".format(", ".join(map(str, failed_job_array_ids)))) + logger.error("Please inspect those jobs and fix any problems before resubmitting.") logger.critical("Postprocessing cancelled.") return False @@ -735,8 +656,7 @@ def rerun_failed_jobs(self, hipri=False): last_mod_date = dt.datetime.fromtimestamp(os.path.getmtime(filepath)) shutil.move( filepath, - prev_failed_job_out_dir - / f"{filepath.name}_{last_mod_date:%Y%m%d%H%M}", + prev_failed_job_out_dir / f"{filepath.name}_{last_mod_date:%Y%m%d%H%M}", ) # Delete simulation results for jobs we're about to rerun @@ -825,21 +745,15 @@ def user_cli(argv=sys.argv[1:]): help="Only validate the project YAML file and references. Nothing is executed", action="store_true", ) - group.add_argument( - "--samplingonly", help="Run the sampling only.", action="store_true" - ) - group.add_argument( - "--rerun_failed", help="Rerun the failed jobs", action="store_true" - ) + group.add_argument("--samplingonly", help="Run the sampling only.", action="store_true") + group.add_argument("--rerun_failed", help="Rerun the failed jobs", action="store_true") # parse CLI arguments args = parser.parse_args(argv) # load the yaml project file if not os.path.isfile(args.project_filename): - raise FileNotFoundError( - "The project file {} doesn't exist".format(args.project_filename) - ) + raise FileNotFoundError("The project file {} doesn't exist".format(args.project_filename)) project_filename = os.path.abspath(args.project_filename) with open(project_filename, "r") as f: cfg = yaml.load(f, Loader=yaml.SafeLoader) @@ -867,9 +781,7 @@ def user_cli(argv=sys.argv[1:]): out_dir = cfg["output_directory"] if os.path.exists(out_dir): raise FileExistsError( - "The output directory {} already exists. Please delete it or choose another.".format( - out_dir - ) + "The output directory {} already exists. Please delete it or choose another.".format(out_dir) ) logger.info("Creating output directory {}".format(out_dir)) os.makedirs(out_dir) diff --git a/buildstockbatch/local.py b/buildstockbatch/local.py index a51ca8c2..6efcb50d 100644 --- a/buildstockbatch/local.py +++ b/buildstockbatch/local.py @@ -46,9 +46,7 @@ def __init__(self, project_filename): self._weather_dir = None # Create simulation_output dir - sim_out_ts_dir = os.path.join( - self.results_dir, "simulation_output", "timeseries" - ) + sim_out_ts_dir = os.path.join(self.results_dir, "simulation_output", "timeseries") os.makedirs(sim_out_ts_dir, exist_ok=True) for i in range(0, len(self.cfg.get("upgrades", [])) + 1): os.makedirs(os.path.join(sim_out_ts_dir, f"up{i:02d}"), exist_ok=True) @@ -57,26 +55,18 @@ def __init__(self, project_filename): # FIXME: Get working without docker if self.cfg.get("baseline", dict()).get("custom_gems", False): # TODO: Fix this stuff to work without docker - logger.info( - "Installing custom gems to docker volume: buildstockbatch_custom_gems" - ) + logger.info("Installing custom gems to docker volume: buildstockbatch_custom_gems") docker_client = docker.client.from_env() # Create a volume to store the custom gems - docker_client.volumes.create( - name="buildstockbatch_custom_gems", driver="local" - ) - simdata_vol = docker_client.volumes.create( - name="buildstockbatch_simdata_temp", driver="local" - ) + docker_client.volumes.create(name="buildstockbatch_custom_gems", driver="local") + simdata_vol = docker_client.volumes.create(name="buildstockbatch_simdata_temp", driver="local") # Define directories to be mounted in the container mnt_gem_dir = "/var/oscli/gems" # Install custom gems to be used in the docker container - local_gemfile_path = os.path.join( - self.buildstock_dir, "resources", "Gemfile" - ) + local_gemfile_path = os.path.join(self.buildstock_dir, "resources", "Gemfile") mnt_gemfile_path_orig = "/var/oscli/gemfile/Gemfile" docker_volume_mounts = { "buildstockbatch_custom_gems": {"bind": mnt_gem_dir, "mode": "rw"}, @@ -87,14 +77,10 @@ def __init__(self, project_filename): # Check that the Gemfile exists if not os.path.exists(local_gemfile_path): print(f"local_gemfile_path = {local_gemfile_path}") - raise AttributeError( - "baseline:custom_gems = True, but did not find Gemfile in /resources directory" - ) + raise AttributeError("baseline:custom_gems = True, but did not find Gemfile in /resources directory") # Make the buildstock/resources/.custom_gems dir to store logs - local_log_dir = os.path.join( - self.buildstock_dir, "resources", ".custom_gems" - ) + local_log_dir = os.path.join(self.buildstock_dir, "resources", ".custom_gems") if not os.path.exists(local_log_dir): os.makedirs(local_log_dir) @@ -109,9 +95,7 @@ def __init__(self, project_filename): volumes=docker_volume_mounts, name="install_custom_gems", ) - with open( - os.path.join(local_log_dir, "bundle_install_output.log"), "wb" - ) as f_out: + with open(os.path.join(local_log_dir, "bundle_install_output.log"), "wb") as f_out: f_out.write(container_output) # Report out custom gems loaded by OpenStudio CLI @@ -160,33 +144,25 @@ def run_building( upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 try: - sim_id, sim_dir = cls.make_sim_dir( - i, upgrade_idx, os.path.join(results_dir, "simulation_output") - ) + sim_id, sim_dir = cls.make_sim_dir(i, upgrade_idx, os.path.join(results_dir, "simulation_output")) except SimulationExists: return sim_path = pathlib.Path(sim_dir) buildstock_path = pathlib.Path(buildstock_dir) # Make symlinks to project and buildstock stuff - (sim_path / "measures").symlink_to( - buildstock_path / "measures", target_is_directory=True - ) + (sim_path / "measures").symlink_to(buildstock_path / "measures", target_is_directory=True) (sim_path / "lib").symlink_to(buildstock_path / "lib", target_is_directory=True) (sim_path / "weather").symlink_to(weather_dir, target_is_directory=True) hpxml_measures_path = buildstock_path / "resources" / "hpxml-measures" if hpxml_measures_path.exists(): resources_path = sim_path / "resources" resources_path.mkdir() - (resources_path / "hpxml-measures").symlink_to( - hpxml_measures_path, target_is_directory=True - ) + (resources_path / "hpxml-measures").symlink_to(hpxml_measures_path, target_is_directory=True) else: resources_path = None - osw = cls.create_osw( - cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx - ) + osw = cls.create_osw(cfg, n_datapoints, sim_id, building_id=i, upgrade_idx=upgrade_idx) with open(sim_path / "in.osw", "w") as f: json.dump(osw, f, indent=4) @@ -276,9 +252,7 @@ def run_building( # Read data_point_out.json reporting_measures = cls.get_reporting_measures(cfg) - dpout = postprocessing.read_simulation_outputs( - fs, reporting_measures, sim_dir, upgrade_id, i - ) + dpout = postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, i) return dpout def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False): @@ -317,9 +291,7 @@ def run_batch(self, n_jobs=None, measures_only=False, sampling_only=False): ) upgrade_sims = [] for i in range(len(self.cfg.get("upgrades", []))): - upgrade_sims.append( - map(functools.partial(run_building_d, upgrade_idx=i), building_ids) - ) + upgrade_sims.append(map(functools.partial(run_building_d, upgrade_idx=i), building_ids)) if not self.skip_baseline_sims: baseline_sims = map(run_building_d, building_ids) all_sims = itertools.chain(baseline_sims, *upgrade_sims) @@ -353,18 +325,14 @@ def output_dir(self): @property def results_dir(self): - results_dir = self.cfg.get( - "output_directory", os.path.join(self.project_dir, "localResults") - ) + results_dir = self.cfg.get("output_directory", os.path.join(self.project_dir, "localResults")) results_dir = self.path_rel_to_projectfile(results_dir) if not os.path.isdir(results_dir): os.makedirs(results_dir) return results_dir def get_dask_client(self): - cluster = LocalCluster( - local_directory=os.path.join(self.results_dir, "dask-tmp") - ) + cluster = LocalCluster(local_directory=os.path.join(self.results_dir, "dask-tmp")) return Client(cluster) @@ -425,8 +393,7 @@ def main(): ) group.add_argument( "--uploadonly", - help="Only upload to S3, useful when postprocessing is already done. Ignores the " - "upload flag in yaml", + help="Only upload to S3, useful when postprocessing is already done. Ignores the " "upload flag in yaml", action="store_true", ) group.add_argument( @@ -434,14 +401,10 @@ def main(): help="Only validate the project YAML file and references. Nothing is executed", action="store_true", ) - group.add_argument( - "--samplingonly", help="Run the sampling only.", action="store_true" - ) + group.add_argument("--samplingonly", help="Run the sampling only.", action="store_true") args = parser.parse_args() if not os.path.isfile(args.project_filename): - raise FileNotFoundError( - f"The project file {args.project_filename} doesn't exist" - ) + raise FileNotFoundError(f"The project file {args.project_filename} doesn't exist") # Validate the project, and in case of the --validateonly flag return True if validation passes LocalBatch.validate_project(args.project_filename) diff --git a/buildstockbatch/postprocessing.py b/buildstockbatch/postprocessing.py index 79a604e4..12e1dbdf 100644 --- a/buildstockbatch/postprocessing.py +++ b/buildstockbatch/postprocessing.py @@ -132,9 +132,7 @@ def read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, buildin :return: dpout [dict] """ - dpout = read_data_point_out_json( - fs, reporting_measures, f"{sim_dir}/run/data_point_out.json" - ) + dpout = read_data_point_out_json(fs, reporting_measures, f"{sim_dir}/run/data_point_out.json") if dpout is None: dpout = {} else: @@ -167,16 +165,9 @@ def clean_up_results_df(df, cfg, keep_upgrade_id=False): for col in ("started_at", "completed_at"): if col in results_df.columns: results_df[col] = results_df[col].map( - lambda x: dt.datetime.strptime(x, "%Y%m%dT%H%M%SZ") - if isinstance(x, str) - else x + lambda x: dt.datetime.strptime(x, "%Y%m%dT%H%M%SZ") if isinstance(x, str) else x ) - reference_scenarios = dict( - [ - (i, x.get("reference_scenario")) - for i, x in enumerate(cfg.get("upgrades", []), 1) - ] - ) + reference_scenarios = dict([(i, x.get("reference_scenario")) for i, x in enumerate(cfg.get("upgrades", []), 1)]) results_df["apply_upgrade.reference_scenario"] = ( results_df["upgrade"].map(reference_scenarios).fillna("").astype(str) ) @@ -196,26 +187,10 @@ def clean_up_results_df(df, cfg, keep_upgrade_id=False): if "job_id" in results_df.columns: first_few_cols.insert(2, "job_id") - build_existing_model_cols = sorted( - [col for col in results_df.columns if col.startswith("build_existing_model")] - ) - sim_output_report_cols = sorted( - [ - col - for col in results_df.columns - if col.startswith("simulation_output_report") - ] - ) - report_sim_output_cols = sorted( - [ - col - for col in results_df.columns - if col.startswith("report_simulation_output") - ] - ) - upgrade_costs_cols = sorted( - [col for col in results_df.columns if col.startswith("upgrade_costs")] - ) + build_existing_model_cols = sorted([col for col in results_df.columns if col.startswith("build_existing_model")]) + sim_output_report_cols = sorted([col for col in results_df.columns if col.startswith("simulation_output_report")]) + report_sim_output_cols = sorted([col for col in results_df.columns if col.startswith("report_simulation_output")]) + upgrade_costs_cols = sorted([col for col in results_df.columns if col.startswith("upgrade_costs")]) sorted_cols = ( first_few_cols + build_existing_model_cols @@ -281,9 +256,7 @@ def read_enduse_timeseries_parquet(fs, all_cols, src_path, bldg_id): return df -def concat_and_normalize( - fs, all_cols, src_path, dst_path, partition_columns, indx, bldg_ids, partition_vals -): +def concat_and_normalize(fs, all_cols, src_path, dst_path, partition_columns, indx, bldg_ids, partition_vals): dfs = [] for bldg_id in sorted(bldg_ids): df = read_enduse_timeseries_parquet(fs, all_cols, src_path, bldg_id) @@ -357,22 +330,12 @@ def get_partitioned_bldg_groups(partition_df, partition_columns, files_per_parti """ total_building = len(partition_df) if partition_columns: - bldg_id_list_df = ( - partition_df.reset_index() - .groupby(partition_columns)["building_id"] - .apply(list) - ) + bldg_id_list_df = partition_df.reset_index().groupby(partition_columns)["building_id"].apply(list) ngroups = len(bldg_id_list_df) bldg_id_list = bldg_id_list_df.sum() - nfiles_in_each_group = [ - nfiles for nfiles in bldg_id_list_df.map(lambda x: len(x)) - ] - files_groups = [ - split_into_groups(n, files_per_partition) for n in nfiles_in_each_group - ] - flat_groups = [ - n for group in files_groups for n in group - ] # flatten list of list into a list (maintain order) + nfiles_in_each_group = [nfiles for nfiles in bldg_id_list_df.map(lambda x: len(x))] + files_groups = [split_into_groups(n, files_per_partition) for n in nfiles_in_each_group] + flat_groups = [n for group in files_groups for n in group] # flatten list of list into a list (maintain order) else: # no partitioning by a column. Just put buildings into groups of files_per_partition ngroups = 1 @@ -412,9 +375,7 @@ def write_metadata_files(fs, parquet_root_dir, partition_columns): concat_files = fs.glob(glob_str) logger.info(f"Gathered {len(concat_files)} files. Now writing _metadata") parquet_root_dir = Path(parquet_root_dir).as_posix() - create_metadata_file( - concat_files, root_dir=parquet_root_dir, engine="pyarrow", fs=fs - ) + create_metadata_file(concat_files, root_dir=parquet_root_dir, engine="pyarrow", fs=fs) logger.info(f"_metadata file written to {parquet_root_dir}") @@ -448,9 +409,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): if not results_json_files: raise ValueError("No simulation results found to post-process.") - logger.info( - "Collecting all the columns and datatypes in results_job*.json.gz parquet files." - ) + logger.info("Collecting all the columns and datatypes in results_job*.json.gz parquet files.") all_schema_dict = ( db.from_sequence(results_json_files) .map(partial(get_schema_dict, fs)) @@ -459,13 +418,10 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): ) logger.info(f"Got {len(all_schema_dict)} columns") all_results_cols = list(all_schema_dict.keys()) - all_schema_dict = { - to_camelcase(key): value for key, value in all_schema_dict.items() - } + all_schema_dict = {to_camelcase(key): value for key, value in all_schema_dict.items()} logger.info(f"Got this schema: {all_schema_dict}\n") delayed_results_dfs = [ - dask.delayed(partial(read_results_json, fs, all_cols=all_results_cols))(x) - for x in results_json_files + dask.delayed(partial(read_results_json, fs, all_cols=all_results_cols))(x) for x in results_json_files ] results_df = dd.from_delayed(delayed_results_dfs, verify_meta=False) @@ -478,25 +434,15 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): ts_filenames = fs.ls(upgrade_folder) if ts_filenames: do_timeseries = True - logger.info( - f"Found {len(ts_filenames)} files for upgrade {Path(upgrade_folder).name}." - ) + logger.info(f"Found {len(ts_filenames)} files for upgrade {Path(upgrade_folder).name}.") files_bag = db.from_sequence(ts_filenames, partition_size=100) - all_ts_cols |= ( - files_bag.map(partial(get_cols, fs)) - .fold(lambda x, y: x.union(y)) - .compute() - ) + all_ts_cols |= files_bag.map(partial(get_cols, fs)).fold(lambda x, y: x.union(y)).compute() logger.info("Collected all the columns") else: - logger.info( - f"There are no timeseries files for upgrade {Path(upgrade_folder).name}." - ) + logger.info(f"There are no timeseries files for upgrade {Path(upgrade_folder).name}.") # Sort the columns - all_ts_cols_sorted = ["building_id"] + sorted( - x for x in all_ts_cols if x.startswith("time") - ) + all_ts_cols_sorted = ["building_id"] + sorted(x for x in all_ts_cols if x.startswith("time")) all_ts_cols.difference_update(all_ts_cols_sorted) all_ts_cols_sorted.extend(sorted(x for x in all_ts_cols if not x.endswith("]"))) all_ts_cols.difference_update(all_ts_cols_sorted) @@ -513,9 +459,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): df_partition_columns = [f"build_existing_model.{c}" for c in partition_columns] missing_cols = set(df_partition_columns) - set(all_schema_dict.keys()) if missing_cols: - raise ValueError( - f"The following partitioning columns are not found in results.json: {missing_cols}" - ) + raise ValueError(f"The following partitioning columns are not found in results.json: {missing_cols}") if partition_columns: logger.info(f"The timeseries files will be partitioned by {partition_columns}.") @@ -532,16 +476,12 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): schema = None partition_df = df[df_partition_columns].copy() partition_df.rename( - columns={ - df_c: c for df_c, c in zip(df_partition_columns, partition_columns) - }, + columns={df_c: c for df_c, c in zip(df_partition_columns, partition_columns)}, inplace=True, ) if upgrade_id > 0: # Remove building characteristics for upgrade scenarios. - cols_to_keep = list( - filter(lambda x: not x.startswith("build_existing_model."), df.columns) - ) + cols_to_keep = list(filter(lambda x: not x.startswith("build_existing_model."), df.columns)) df = df[cols_to_keep] null_cols = get_null_cols(df) # If certain column datatype is null (happens when it doesn't have any data), the datatype @@ -550,13 +490,9 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): logger.info(f"Upgrade {upgrade_id} has null cols: {null_cols}") schema, unresolved = correct_schema(all_schema_dict, df) if unresolved: - logger.info( - f"The types for {unresolved} columns couldn't be determined." - ) + logger.info(f"The types for {unresolved} columns couldn't be determined.") else: - logger.info( - "All columns were successfully assigned a datatype based on other upgrades." - ) + logger.info("All columns were successfully assigned a datatype based on other upgrades.") # Write CSV csv_filename = f"{results_csvs_dir}/results_up{upgrade_id:02d}.csv.gz" logger.info(f"Writing {csv_filename}") @@ -573,53 +509,30 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): fs.makedirs(results_parquet_dir) parquet_filename = f"{results_parquet_dir}/results_up{upgrade_id:02d}.parquet" logger.info(f"Writing {parquet_filename}") - write_dataframe_as_parquet( - df.reset_index(), fs, parquet_filename, schema=schema - ) + write_dataframe_as_parquet(df.reset_index(), fs, parquet_filename, schema=schema) if do_timeseries: # Get the names of the timeseries file for each simulation in this upgrade ts_upgrade_path = f"{ts_in_dir}/up{upgrade_id:02d}" - ts_filenames = [ - ts_upgrade_path + ts_filename for ts_filename in fs.ls(ts_upgrade_path) - ] - ts_bldg_ids = [ - int(re.search(r"bldg(\d+).parquet", flname).group(1)) - for flname in ts_filenames - ] + ts_filenames = [ts_upgrade_path + ts_filename for ts_filename in fs.ls(ts_upgrade_path)] + ts_bldg_ids = [int(re.search(r"bldg(\d+).parquet", flname).group(1)) for flname in ts_filenames] if not ts_filenames: - logger.warning( - f"There are no timeseries files for upgrade{upgrade_id}." - ) + logger.warning(f"There are no timeseries files for upgrade{upgrade_id}.") continue - logger.info( - f"There are {len(ts_filenames)} timeseries files for upgrade{upgrade_id}." - ) + logger.info(f"There are {len(ts_filenames)} timeseries files for upgrade{upgrade_id}.") # Calculate the mean and estimate the total memory usage - read_ts_parquet = partial( - read_enduse_timeseries_parquet, fs, all_ts_cols_sorted, ts_upgrade_path - ) - get_ts_mem_usage_d = dask.delayed( - lambda x: read_ts_parquet(x).memory_usage(deep=True).sum() - ) + read_ts_parquet = partial(read_enduse_timeseries_parquet, fs, all_ts_cols_sorted, ts_upgrade_path) + get_ts_mem_usage_d = dask.delayed(lambda x: read_ts_parquet(x).memory_usage(deep=True).sum()) sample_size = min(len(ts_bldg_ids), 36 * 3) - mean_mem = np.mean( - dask.compute( - map(get_ts_mem_usage_d, random.sample(ts_bldg_ids, sample_size)) - )[0] - ) + mean_mem = np.mean(dask.compute(map(get_ts_mem_usage_d, random.sample(ts_bldg_ids, sample_size)))[0]) # Determine how many files should be in each partition and group the files parquet_memory = int( - cfg.get("eagle", {}) - .get("postprocessing", {}) - .get("parquet_memory_mb", MAX_PARQUET_MEMORY) + cfg.get("eagle", {}).get("postprocessing", {}).get("parquet_memory_mb", MAX_PARQUET_MEMORY) ) logger.info(f"Max parquet memory: {parquet_memory} MB") - max_files_per_partition = max( - 1, math.floor(parquet_memory / (mean_mem / 1e6)) - ) + max_files_per_partition = max(1, math.floor(parquet_memory / (mean_mem / 1e6))) partition_df = partition_df.loc[ts_bldg_ids].copy() logger.info(f"partition_df for the upgrade has {len(partition_df)} rows.") bldg_id_groups, bldg_id_list, ngroup = get_partitioned_bldg_groups( @@ -638,9 +551,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): ts_out_loc = f"s3://{ts_dir}/upgrade={upgrade_id}/" fs.makedirs(ts_out_loc) - logger.info( - f"Created directory {ts_out_loc} for writing. Now concatenating ..." - ) + logger.info(f"Created directory {ts_out_loc} for writing. Now concatenating ...") src_path = f"{ts_in_dir}/up{upgrade_id:02d}/" concat_partial = dask.delayed( @@ -654,9 +565,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): ) ) partition_vals_list = [ - list(partition_df.loc[bldg_id_list[0]].values) - if partition_columns - else [] + list(partition_df.loc[bldg_id_list[0]].values) if partition_columns else [] for bldg_id_list in bldg_id_groups ] @@ -676,9 +585,7 @@ def combine_results(fs, results_dir, cfg, do_timeseries=True): f"{results_dir}/dask_combine_report{upgrade_id}.html", ) - logger.info( - f"Finished combining and saving timeseries for upgrade{upgrade_id}." - ) + logger.info(f"Finished combining and saving timeseries for upgrade{upgrade_id}.") logger.info("All aggregation completed. ") if do_timeseries: logger.info("Writing timeseries metadata files") @@ -704,9 +611,7 @@ def upload_results(aws_conf, output_dir, results_dir, buildstock_csv_filename): parquet_dir = Path(results_dir).joinpath("parquet") ts_dir = parquet_dir / "timeseries" if not parquet_dir.is_dir(): - logger.error( - f"{parquet_dir} does not exist. Please make sure postprocessing has been done." - ) + logger.error(f"{parquet_dir} does not exist. Please make sure postprocessing has been done.") raise FileNotFoundError(parquet_dir) all_files = [] @@ -718,9 +623,7 @@ def upload_results(aws_conf, output_dir, results_dir, buildstock_csv_filename): s3_prefix = aws_conf.get("s3", {}).get("prefix", "").rstrip("/") s3_bucket = aws_conf.get("s3", {}).get("bucket", None) if not (s3_prefix and s3_bucket): - logger.error( - "YAML file missing postprocessing:aws:s3:prefix and/or bucket entry." - ) + logger.error("YAML file missing postprocessing:aws:s3:prefix and/or bucket entry.") return s3_prefix_output = s3_prefix + "/" + output_folder_name + "/" @@ -728,15 +631,11 @@ def upload_results(aws_conf, output_dir, results_dir, buildstock_csv_filename): bucket = s3.Bucket(s3_bucket) n_existing_files = len(list(bucket.objects.filter(Prefix=s3_prefix_output))) if n_existing_files > 0: - logger.error( - f"There are already {n_existing_files} files in the s3 folder {s3_bucket}/{s3_prefix_output}." - ) + logger.error(f"There are already {n_existing_files} files in the s3 folder {s3_bucket}/{s3_prefix_output}.") raise FileExistsError(f"s3://{s3_bucket}/{s3_prefix_output}") def upload_file(filepath, s3key=None): - full_path = ( - filepath if filepath.is_absolute() else parquet_dir.joinpath(filepath) - ) + full_path = filepath if filepath.is_absolute() else parquet_dir.joinpath(filepath) s3 = boto3.resource("s3") bucket = s3.Bucket(s3_bucket) if s3key is None: @@ -756,9 +655,7 @@ def upload_file(filepath, s3key=None): else: logger.warning(f"{buildstock_csv_filename} doesn't exist, can't upload.") dask.compute(tasks) - logger.info( - f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}" - ) + logger.info(f"Upload to S3 completed. The files are uploaded to: {s3_bucket}/{s3_prefix_output}") return s3_bucket, s3_prefix_output @@ -767,9 +664,7 @@ def create_athena_tables(aws_conf, tbl_prefix, s3_bucket, s3_prefix): region_name = aws_conf.get("region_name", "us-west-2") db_name = aws_conf.get("athena", {}).get("database_name", None) - role = aws_conf.get("athena", {}).get( - "glue_service_role", "service-role/AWSGlueServiceRole-default" - ) + role = aws_conf.get("athena", {}).get("glue_service_role", "service-role/AWSGlueServiceRole-default") max_crawling_time = aws_conf.get("athena", {}).get("max_crawling_time", 600) assert db_name, "athena:database_name not supplied" @@ -779,17 +674,11 @@ def create_athena_tables(aws_conf, tbl_prefix, s3_bucket, s3_prefix): s3_path = f"s3://{s3_bucket}/{s3_prefix}" n_existing_files = len(list(bucket.objects.filter(Prefix=s3_prefix))) if n_existing_files == 0: - logger.warning( - f"There are no files in {s3_path}, Athena tables will not be created as intended" - ) + logger.warning(f"There are no files in {s3_path}, Athena tables will not be created as intended") return glueClient = boto3.client("glue", region_name=region_name) - crawlTarget = { - "S3Targets": [ - {"Path": s3_path, "Exclusions": ["**_metadata", "**_common_metadata"]} - ] - } + crawlTarget = {"S3Targets": [{"Path": s3_path, "Exclusions": ["**_metadata", "**_common_metadata"]}]} crawler_name = db_name + "_" + tbl_prefix tbl_prefix = tbl_prefix + "_" @@ -807,26 +696,18 @@ def create_crawler(): except glueClient.exceptions.AlreadyExistsException: logger.info(f"Deleting existing crawler: {crawler_name}. And creating new one.") glueClient.delete_crawler(Name=crawler_name) - time.sleep( - 1 - ) # A small delay after deleting is required to prevent AlreadyExistsException again + time.sleep(1) # A small delay after deleting is required to prevent AlreadyExistsException again create_crawler() try: - existing_tables = [ - x["Name"] for x in glueClient.get_tables(DatabaseName=db_name)["TableList"] - ] + existing_tables = [x["Name"] for x in glueClient.get_tables(DatabaseName=db_name)["TableList"]] except glueClient.exceptions.EntityNotFoundException: existing_tables = [] to_be_deleted_tables = [x for x in existing_tables if x.startswith(tbl_prefix)] if to_be_deleted_tables: - logger.info( - f"Deleting existing tables in db {db_name}: {to_be_deleted_tables}. And creating new ones." - ) - glueClient.batch_delete_table( - DatabaseName=db_name, TablesToDelete=to_be_deleted_tables - ) + logger.info(f"Deleting existing tables in db {db_name}: {to_be_deleted_tables}. And creating new ones.") + glueClient.batch_delete_table(DatabaseName=db_name, TablesToDelete=to_be_deleted_tables) glueClient.start_crawler(Name=crawler_name) logger.info("Crawler started") @@ -834,9 +715,7 @@ def create_crawler(): t = time.time() while time.time() - t < (3 * max_crawling_time): crawler_state = glueClient.get_crawler(Name=crawler_name)["Crawler"]["State"] - metrics = glueClient.get_crawler_metrics(CrawlerNameList=[crawler_name])[ - "CrawlerMetricsList" - ][0] + metrics = glueClient.get_crawler_metrics(CrawlerNameList=[crawler_name])["CrawlerMetricsList"][0] if is_crawler_running and crawler_state != "RUNNING": is_crawler_running = False logger.info(f"Crawler has completed running. It is {crawler_state}.") diff --git a/buildstockbatch/sampler/base.py b/buildstockbatch/sampler/base.py index a150958d..554aab2a 100644 --- a/buildstockbatch/sampler/base.py +++ b/buildstockbatch/sampler/base.py @@ -46,20 +46,14 @@ def __init__(self, parent): :param parent: The BuildStockBatchBase object that owns this sampler. """ - self.parent = weakref.ref( - parent - ) # This removes circular references and allows garbage collection to work. + self.parent = weakref.ref(parent) # This removes circular references and allows garbage collection to work. if self.container_runtime in ( ContainerRuntime.DOCKER, ContainerRuntime.LOCAL_OPENSTUDIO, ): - self.csv_path = os.path.join( - self.project_dir, "housing_characteristics", "buildstock.csv" - ) + self.csv_path = os.path.join(self.project_dir, "housing_characteristics", "buildstock.csv") elif self.container_runtime == ContainerRuntime.SINGULARITY: - self.csv_path = os.path.join( - self.parent().output_dir, "housing_characteristics", "buildstock.csv" - ) + self.csv_path = os.path.join(self.parent().output_dir, "housing_characteristics", "buildstock.csv") else: self.csv_path = None diff --git a/buildstockbatch/sampler/commercial_sobol.py b/buildstockbatch/sampler/commercial_sobol.py index ae8f1bd5..b92f5ff3 100644 --- a/buildstockbatch/sampler/commercial_sobol.py +++ b/buildstockbatch/sampler/commercial_sobol.py @@ -62,10 +62,7 @@ def validate_args(cls, project_filename, **kw): else: raise ValidationError(f"Unknown argument for sampler: {k}") if len(expected_args) > 0: - raise ValidationError( - "The following sampler arguments are required: " - + ", ".join(expected_args) - ) + raise ValidationError("The following sampler arguments are required: " + ", ".join(expected_args)) return True def run_sampling(self): @@ -87,15 +84,11 @@ def run_sampling(self): for tsv_file in os.listdir(self.buildstock_dir): if ".tsv" in tsv_file: tsv_df = read_csv(os.path.join(self.buildstock_dir, tsv_file), sep="\t") - dependency_columns = [ - item for item in list(tsv_df) if "Dependency=" in item - ] + dependency_columns = [item for item in list(tsv_df) if "Dependency=" in item] tsv_df[dependency_columns] = tsv_df[dependency_columns].astype("str") tsv_hash[tsv_file.replace(".tsv", "")] = tsv_df dependency_hash, attr_order = self._com_order_tsvs(tsv_hash) - sample_matrix = self._com_execute_sobol_sampling( - attr_order.__len__(), sample_number - ) + sample_matrix = self._com_execute_sobol_sampling(attr_order.__len__(), sample_number) csv_path = self.csv_path header = "Building," for item in attr_order: @@ -131,9 +124,7 @@ def _com_execute_sobol_sampling(n_dims, n_samples): :param n_samples: Number of samples to calculate :return: Pandas DataFrame object which contains the low discrepancy result of the sobol algorithm """ - return pd.DataFrame(i4_sobol_generate(n_dims, n_samples, 0)).replace( - 1.0, 0.999999 - ) + return pd.DataFrame(i4_sobol_generate(n_dims, n_samples, 0)).replace(1.0, 0.999999) @staticmethod def _com_order_tsvs(tsv_hash): @@ -146,9 +137,7 @@ def _com_order_tsvs(tsv_hash): dependency_hash = {} for attr in tsv_hash.keys(): dependency_hash[attr] = [ - item.replace("Dependency=", "") - for item in list(tsv_hash[attr]) - if "Dependency=" in item + item.replace("Dependency=", "") for item in list(tsv_hash[attr]) if "Dependency=" in item ] attr_order = [] for attr in dependency_hash.keys(): @@ -170,9 +159,7 @@ def _com_order_tsvs(tsv_hash): elif max_iterations > 0: max_iterations -= 1 else: - raise RuntimeError( - "Unable to resolve the dependency tree within the set iteration limit" - ) + raise RuntimeError("Unable to resolve the dependency tree within the set iteration limit") return dependency_hash, attr_order @staticmethod @@ -206,8 +193,7 @@ def _com_execute_sample( tsv_dist_val = sample_vector[attr_index] for dependency in sample_dependency_hash[attr]: tsv_lkup = tsv_lkup.loc[ - tsv_lkup.loc[:, "Dependency=" + dependency] - == sample_dependency_hash[dependency] + tsv_lkup.loc[:, "Dependency=" + dependency] == sample_dependency_hash[dependency] ] tsv_lkup = tsv_lkup.drop("Dependency=" + dependency, axis=1) if tsv_lkup.shape[0] == 0: @@ -218,17 +204,9 @@ def _com_execute_sample( ) return if tsv_lkup.shape[0] != 1: - raise RuntimeError( - "Unable to reduce tsv for {} to 1 row, index {}".format( - attr, sample_index - ) - ) + raise RuntimeError("Unable to reduce tsv for {} to 1 row, index {}".format(attr, sample_index)) tsv_lkup_cdf = tsv_lkup.values.cumsum() > tsv_dist_val - option_values = [ - item.replace("Option=", "") - for item in list(tsv_lkup) - if "Option=" in item - ] + option_values = [item.replace("Option=", "") for item in list(tsv_lkup) if "Option=" in item] attr_result = list(compress(option_values, tsv_lkup_cdf))[0] sample_dependency_hash[attr] = attr_result result_vector.append(attr_result) diff --git a/buildstockbatch/sampler/downselect.py b/buildstockbatch/sampler/downselect.py index a7fa9e6b..0820e366 100644 --- a/buildstockbatch/sampler/downselect.py +++ b/buildstockbatch/sampler/downselect.py @@ -42,11 +42,7 @@ def __init__(self, parent, n_datapoints, logic, resample=True, **kw): """ super().__init__(parent) self.validate_args( - self.parent().project_filename, - n_datapoints=n_datapoints, - logic=logic, - resample=resample, - **kw + self.parent().project_filename, n_datapoints=n_datapoints, logic=logic, resample=resample, **kw ) self.logic = logic self.resample = resample @@ -69,10 +65,7 @@ def validate_args(cls, project_filename, **kw): else: extra_kw[k] = v if len(expected_args) > 0: - raise ValidationError( - "The following sampler arguments are required: " - + ", ".join(expected_args) - ) + raise ValidationError("The following sampler arguments are required: " + ", ".join(expected_args)) cls.SUB_SAMPLER_CLASS.validate_args(project_filename, **extra_kw) return True @@ -105,31 +98,21 @@ def downselect_logic(cls, df, logic): def run_sampling(self): if self.resample: - logger.debug( - "Performing initial sampling to figure out number of samples for downselect" - ) + logger.debug("Performing initial sampling to figure out number of samples for downselect") n_samples_init = 350000 - init_sampler = self.SUB_SAMPLER_CLASS( - self.parent(), n_datapoints=n_samples_init, **self.sub_kw - ) + init_sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_samples_init, **self.sub_kw) buildstock_csv_filename = init_sampler.run_sampling() df = read_csv(buildstock_csv_filename, index_col=0, dtype=str) df_new = df[self.downselect_logic(df, self.logic)] downselected_n_samples_init = df_new.shape[0] - n_samples = math.ceil( - self.n_datapoints * n_samples_init / downselected_n_samples_init - ) + n_samples = math.ceil(self.n_datapoints * n_samples_init / downselected_n_samples_init) os.remove(buildstock_csv_filename) del init_sampler else: n_samples = self.n_datapoints - sampler = self.SUB_SAMPLER_CLASS( - self.parent(), n_datapoints=n_samples, **self.sub_kw - ) + sampler = self.SUB_SAMPLER_CLASS(self.parent(), n_datapoints=n_samples, **self.sub_kw) buildstock_csv_filename = sampler.run_sampling() - with gzip.open( - os.path.splitext(buildstock_csv_filename)[0] + "_orig.csv.gz", "wb" - ) as f_out: + with gzip.open(os.path.splitext(buildstock_csv_filename)[0] + "_orig.csv.gz", "wb") as f_out: with open(buildstock_csv_filename, "rb") as f_in: shutil.copyfileobj(f_in, f_out) df = read_csv(buildstock_csv_filename, index_col=0, dtype="str") diff --git a/buildstockbatch/sampler/residential_quota.py b/buildstockbatch/sampler/residential_quota.py index f534b0dd..2a208170 100644 --- a/buildstockbatch/sampler/residential_quota.py +++ b/buildstockbatch/sampler/residential_quota.py @@ -49,10 +49,7 @@ def validate_args(cls, project_filename, **kw): else: raise ValidationError(f"Unknown argument for sampler: {k}") if len(expected_args) > 0: - raise ValidationError( - "The following sampler arguments are required: " - + ", ".join(expected_args) - ) + raise ValidationError("The following sampler arguments are required: " + ", ".join(expected_args)) return True def _run_sampling_docker(self): @@ -74,9 +71,7 @@ def _run_sampling_docker(self): "buildstock.csv", ], remove=True, - volumes={ - self.buildstock_dir: {"bind": "/var/simdata/openstudio", "mode": "rw"} - }, + volumes={self.buildstock_dir: {"bind": "/var/simdata/openstudio", "mode": "rw"}}, name="buildstock_sampling", **extra_kws, ) diff --git a/buildstockbatch/test/conftest.py b/buildstockbatch/test/conftest.py index 54a50d37..554c688a 100644 --- a/buildstockbatch/test/conftest.py +++ b/buildstockbatch/test/conftest.py @@ -36,22 +36,14 @@ def _basic_residential_project_file(update_args={}, raw=False): ) # move the job*.json file to appropriate location - if os.path.exists( - os.path.join(output_directory, "simulation_output", "job0.json") - ): + if os.path.exists(os.path.join(output_directory, "simulation_output", "job0.json")): shutil.move( os.path.join(output_directory, "simulation_output", "job0.json"), - os.path.join( - output_directory, "simulation_output", "..", "..", "job0.json" - ), + os.path.join(output_directory, "simulation_output", "..", "..", "job0.json"), ) os.mkdir(os.path.join(output_directory, "housing_characteristics")) - os.mkdir( - os.path.join( - buildstock_directory, project_directory, "housing_characteristics" - ) - ) + os.mkdir(os.path.join(buildstock_directory, project_directory, "housing_characteristics")) cfg = { "buildstock_directory": buildstock_directory, "project_directory": project_directory, diff --git a/buildstockbatch/test/shared_testing_stuff.py b/buildstockbatch/test/shared_testing_stuff.py index 4e33ac43..6edc738f 100644 --- a/buildstockbatch/test/shared_testing_stuff.py +++ b/buildstockbatch/test/shared_testing_stuff.py @@ -9,6 +9,4 @@ pathlib.Path(__file__).resolve().parent.parent.parent.parent / "resstock", ) ) -resstock_required = pytest.mark.skipif( - not resstock_directory.exists(), reason="ResStock checkout is not found" -) +resstock_required = pytest.mark.skipif(not resstock_directory.exists(), reason="ResStock checkout is not found") diff --git a/buildstockbatch/test/test_base.py b/buildstockbatch/test/test_base.py index 536eb32b..658d82a9 100644 --- a/buildstockbatch/test/test_base.py +++ b/buildstockbatch/test/test_base.py @@ -45,25 +45,16 @@ def test_reference_scenario(basic_residential_project_file): with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( BuildStockBatchBase, "get_dask_client" - ) as get_dask_client_mock, patch.object( - BuildStockBatchBase, "results_dir", results_dir - ): + ) as get_dask_client_mock, patch.object(BuildStockBatchBase, "results_dir", results_dir): bsb = BuildStockBatchBase(project_filename) bsb.process_results() get_dask_client_mock.assert_called_once() # test results.csv files test_path = os.path.join(results_dir, "results_csvs") - test_csv = ( - read_csv(os.path.join(test_path, "results_up01.csv.gz")) - .set_index("building_id") - .sort_index() - ) + test_csv = read_csv(os.path.join(test_path, "results_up01.csv.gz")).set_index("building_id").sort_index() assert len(test_csv["apply_upgrade.reference_scenario"].unique()) == 1 - assert ( - test_csv["apply_upgrade.reference_scenario"].iloc[0] - == "example_reference_scenario" - ) + assert test_csv["apply_upgrade.reference_scenario"].iloc[0] == "example_reference_scenario" def test_downselect_integer_options(basic_residential_project_file, mocker): @@ -80,9 +71,7 @@ def test_downselect_integer_options(basic_residential_project_file, mocker): col_idx = row.index("Days Shifted") else: # Convert values from "Day1" to "1.10" so we hit the bug - row[col_idx] = "{0}.{0}0".format( - re.search(r"Day(\d+)", row[col_idx]).group(1) - ) + row[col_idx] = "{0}.{0}0".format(re.search(r"Day(\d+)", row[col_idx]).group(1)) valid_option_values.add(row[col_idx]) cf_out.writerow(row) @@ -100,9 +89,7 @@ def test_downselect_integer_options(basic_residential_project_file, mocker): ) mocker.patch.object(BuildStockBatchBase, "weather_dir", None) mocker.patch.object(BuildStockBatchBase, "results_dir", results_dir) - sampler_property_mock = mocker.patch.object( - BuildStockBatchBase, "sampler", new_callable=PropertyMock - ) + sampler_property_mock = mocker.patch.object(BuildStockBatchBase, "sampler", new_callable=PropertyMock) sampler_mock = mocker.MagicMock() sampler_property_mock.return_value = sampler_mock sampler_mock.run_sampling = MagicMock(return_value=buildstock_csv) @@ -141,9 +128,7 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): } } mocked_glueclient = MagicMock() - mocked_glueclient.get_crawler = MagicMock( - return_value={"Crawler": {"State": "READY"}} - ) + mocked_glueclient.get_crawler = MagicMock(return_value={"Crawler": {"State": "READY"}}) mocked_boto3.client = MagicMock(return_value=mocked_glueclient) mocked_boto3.resource().Bucket().objects.filter.side_effect = [[], ["a", "b", "c"]] project_filename, results_dir = basic_residential_project_file(upload_config) @@ -155,17 +140,12 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): / "buildstock.csv" ) # noqa: E501 shutil.copy2( - Path(__file__).parent - / "test_results" - / "housing_characteristics" - / "buildstock.csv", + Path(__file__).parent / "test_results" / "housing_characteristics" / "buildstock.csv", buildstock_csv_path, ) with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( BuildStockBatchBase, "output_dir", results_dir - ), patch.object( - BuildStockBatchBase, "get_dask_client" - ) as get_dask_client_mock, patch.object( + ), patch.object(BuildStockBatchBase, "get_dask_client") as get_dask_client_mock, patch.object( BuildStockBatchBase, "results_dir", results_dir ), patch.object( BuildStockBatchBase, "CONTAINER_RUNTIME", ContainerRuntime.LOCAL_OPENSTUDIO @@ -190,25 +170,13 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): if call_function == "create_crawler": crawler_para = call[2] # 2 is for the keyword arguments crawler_created = True - assert ( - crawler_para["DatabaseName"] - == upload_config["postprocessing"]["aws"]["athena"]["database_name"] - ) - assert ( - crawler_para["Role"] - == upload_config["postprocessing"]["aws"]["athena"]["glue_service_role"] - ) + assert crawler_para["DatabaseName"] == upload_config["postprocessing"]["aws"]["athena"]["database_name"] + assert crawler_para["Role"] == upload_config["postprocessing"]["aws"]["athena"]["glue_service_role"] assert crawler_para["TablePrefix"] == OUTPUT_FOLDER_NAME + "_" assert crawler_para["Name"] == db_name + "_" + OUTPUT_FOLDER_NAME assert ( crawler_para["Targets"]["S3Targets"][0]["Path"] - == "s3://" - + s3_bucket - + "/" - + s3_prefix - + "/" - + OUTPUT_FOLDER_NAME - + "/" + == "s3://" + s3_bucket + "/" + s3_prefix + "/" + OUTPUT_FOLDER_NAME + "/" ) if call_function == "start_crawler": assert crawler_created, "crawler attempted to start before creating" @@ -228,23 +196,17 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): files_uploaded.remove((source_file_path, s3_file_path)) s3_file_path = s3_path + "upgrades/upgrade=1/results_up01.parquet" - source_file_path = os.path.join( - source_path, "upgrades", "upgrade=1", "results_up01.parquet" - ) + source_file_path = os.path.join(source_path, "upgrades", "upgrade=1", "results_up01.parquet") assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) s3_file_path = s3_path + "timeseries/upgrade=0/group0.parquet" - source_file_path = os.path.join( - source_path, "timeseries", "upgrade=0", "group0.parquet" - ) + source_file_path = os.path.join(source_path, "timeseries", "upgrade=0", "group0.parquet") assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) s3_file_path = s3_path + "timeseries/upgrade=1/group0.parquet" - source_file_path = os.path.join( - source_path, "timeseries", "upgrade=1", "group0.parquet" - ) + source_file_path = os.path.join(source_path, "timeseries", "upgrade=1", "group0.parquet") assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) @@ -263,9 +225,7 @@ def test_upload_files(mocked_boto3, basic_residential_project_file): assert (source_file_path, s3_file_path) in files_uploaded files_uploaded.remove((source_file_path, s3_file_path)) - assert ( - len(files_uploaded) == 0 - ), f"These files shouldn't have been uploaded: {files_uploaded}" + assert len(files_uploaded) == 0, f"These files shouldn't have been uploaded: {files_uploaded}" def test_write_parquet_no_index(): @@ -286,9 +246,7 @@ def test_skipping_baseline(basic_residential_project_file): ) sim_output_path = os.path.join(results_dir, "simulation_output") - shutil.rmtree( - os.path.join(sim_output_path, "timeseries", "up00") - ) # remove timeseries results for baseline + shutil.rmtree(os.path.join(sim_output_path, "timeseries", "up00")) # remove timeseries results for baseline # remove results.csv data for baseline from results_jobx.json.gz results_json_filename = os.path.join(sim_output_path, "results_job0.json.gz") @@ -308,21 +266,15 @@ def test_skipping_baseline(basic_residential_project_file): # run postprocessing with patch.object(BuildStockBatchBase, "weather_dir", None), patch.object( BuildStockBatchBase, "get_dask_client" - ) as get_dask_client_mock, patch.object( - BuildStockBatchBase, "results_dir", results_dir - ): + ) as get_dask_client_mock, patch.object(BuildStockBatchBase, "results_dir", results_dir): bsb = BuildStockBatchBase(project_filename) bsb.process_results() get_dask_client_mock.assert_called_once() - up00_parquet = os.path.join( - results_dir, "parquet", "baseline", "results_up00.parquet" - ) + up00_parquet = os.path.join(results_dir, "parquet", "baseline", "results_up00.parquet") assert not os.path.exists(up00_parquet) - up01_parquet = os.path.join( - results_dir, "parquet", "upgrades", "upgrade=1", "results_up01.parquet" - ) + up01_parquet = os.path.join(results_dir, "parquet", "upgrades", "upgrade=1", "results_up01.parquet") assert os.path.exists(up01_parquet) up00_csv_gz = os.path.join(results_dir, "results_csvs", "results_up00.csv.gz") @@ -345,9 +297,7 @@ def test_provide_buildstock_csv(basic_residential_project_file, mocker): sampling_output_csv = bsb.sampler.run_sampling() df2 = read_csv(sampling_output_csv, dtype=str) pd.testing.assert_frame_equal(df, df2) - assert ( - df["Geometry Shared Walls"] == "None" - ).all() # Verify None is being read properly + assert (df["Geometry Shared Walls"] == "None").all() # Verify None is being read properly # Test file missing with open(project_filename, "r") as f: cfg = yaml.safe_load(f) diff --git a/buildstockbatch/test/test_eagle.py b/buildstockbatch/test/test_eagle.py index 01e27997..f8354542 100644 --- a/buildstockbatch/test/test_eagle.py +++ b/buildstockbatch/test/test_eagle.py @@ -18,15 +18,10 @@ @patch("buildstockbatch.eagle.subprocess") def test_hpc_run_building(mock_subprocess, monkeypatch, basic_residential_project_file): tar_filename = ( - pathlib.Path(__file__).resolve().parent - / "test_results" - / "simulation_output" - / "simulations_job0.tar.gz" + pathlib.Path(__file__).resolve().parent / "test_results" / "simulation_output" / "simulations_job0.tar.gz" ) # noqa E501 with tarfile.open(tar_filename, "r") as tarf: - osw_dict = json.loads( - tarf.extractfile("up00/bldg0000001/in.osw").read().decode("utf-8") - ) + osw_dict = json.loads(tarf.extractfile("up00/bldg0000001/in.osw").read().decode("utf-8")) project_filename, results_dir = basic_residential_project_file() tmp_path = pathlib.Path(results_dir).parent @@ -37,9 +32,7 @@ def test_hpc_run_building(mock_subprocess, monkeypatch, basic_residential_projec with patch.object(EagleBatch, "weather_dir", None), patch.object( EagleBatch, "create_osw", return_value=osw_dict - ), patch.object( - EagleBatch, "make_sim_dir", return_value=("bldg0000001up00", sim_path) - ), patch.object( + ), patch.object(EagleBatch, "make_sim_dir", return_value=("bldg0000001up00", sim_path)), patch.object( EagleBatch, "local_scratch", tmp_path ): # Normal run @@ -121,17 +114,12 @@ def test_user_cli( argv = [project_filename] user_cli(argv) mock_subprocess.run.assert_called_once() - eagle_sh = os.path.abspath( - os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "eagle.sh") - ) + eagle_sh = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "eagle.sh")) assert mock_subprocess.run.call_args[0][0][-1] == eagle_sh assert "--time=20" in mock_subprocess.run.call_args[0][0] assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] assert "--nodes=1" in mock_subprocess.run.call_args[0][0] - assert ( - "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" - in mock_subprocess.run.call_args[0][0] - ) + assert "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" in mock_subprocess.run.call_args[0][0] assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] assert "--qos=high" not in mock_subprocess.run.call_args[0][0] assert "0" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] @@ -144,10 +132,7 @@ def test_user_cli( assert "--time=20" in mock_subprocess.run.call_args[0][0] assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] assert "--nodes=1" in mock_subprocess.run.call_args[0][0] - assert ( - "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" - in mock_subprocess.run.call_args[0][0] - ) + assert "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" in mock_subprocess.run.call_args[0][0] assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] assert "--qos=high" in mock_subprocess.run.call_args[0][0] assert "0" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] @@ -161,10 +146,7 @@ def test_user_cli( assert "--time=20" in mock_subprocess.run.call_args[0][0] assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] assert "--nodes=1" in mock_subprocess.run.call_args[0][0] - assert ( - "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" - in mock_subprocess.run.call_args[0][0] - ) + assert "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" in mock_subprocess.run.call_args[0][0] assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] assert "--qos=high" not in mock_subprocess.run.call_args[0][0] assert "1" == mock_subprocess.run.call_args[1]["env"]["MEASURESONLY"] @@ -178,10 +160,7 @@ def test_user_cli( assert "--time=20" in mock_subprocess.run.call_args[0][0] assert "--account=testaccount" in mock_subprocess.run.call_args[0][0] assert "--nodes=1" in mock_subprocess.run.call_args[0][0] - assert ( - "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" - in mock_subprocess.run.call_args[0][0] - ) + assert "--export=PROJECTFILE,MY_CONDA_ENV,MEASURESONLY,SAMPLINGONLY" in mock_subprocess.run.call_args[0][0] assert "--output=sampling.out" in mock_subprocess.run.call_args[0][0] assert "--qos=high" not in mock_subprocess.run.call_args[0][0] assert "1" == mock_subprocess.run.call_args[1]["env"]["SAMPLINGONLY"] @@ -189,9 +168,7 @@ def test_user_cli( @patch("buildstockbatch.eagle.subprocess") -def test_qos_high_job_submit( - mock_subprocess, basic_residential_project_file, monkeypatch -): +def test_qos_high_job_submit(mock_subprocess, basic_residential_project_file, monkeypatch): mock_subprocess.run.return_value.stdout = "Submitted batch job 1\n" mock_subprocess.PIPE = None project_filename, results_dir = basic_residential_project_file() @@ -220,9 +197,7 @@ def test_qos_high_job_submit( assert "--qos=high" in mock_subprocess.run.call_args[0][0] -def test_queue_jobs_minutes_per_sim( - mocker, basic_residential_project_file, monkeypatch -): +def test_queue_jobs_minutes_per_sim(mocker, basic_residential_project_file, monkeypatch): mock_subprocess = mocker.patch("buildstockbatch.eagle.subprocess") mocker.patch.object(EagleBatch, "weather_dir", None) mock_subprocess.run.return_value.stdout = "Submitted batch job 1\n" @@ -270,14 +245,10 @@ def test_run_building_process(mocker, basic_residential_project_file): with open(results_dir / "job001.json", "w") as f: json.dump(job_json, f) - sample_buildstock_csv = pd.DataFrame.from_records( - [{"Building": i, "Dummy Column": i * i} for i in range(10)] - ) + sample_buildstock_csv = pd.DataFrame.from_records([{"Building": i, "Dummy Column": i * i} for i in range(10)]) os.makedirs(results_dir / "housing_characteristics", exist_ok=True) os.makedirs(results_dir / "weather", exist_ok=True) - sample_buildstock_csv.to_csv( - results_dir / "housing_characteristics" / "buildstock.csv", index=False - ) + sample_buildstock_csv.to_csv(results_dir / "housing_characteristics" / "buildstock.csv", index=False) def sequential_parallel(**kwargs): kw2 = kwargs.copy() @@ -288,12 +259,8 @@ def sequential_parallel(**kwargs): mocker.patch("buildstockbatch.eagle.Parallel", sequential_parallel) mocker.patch("buildstockbatch.eagle.subprocess") - mocker.patch.object( - EagleBatch, "local_buildstock_dir", results_dir / "local_buildstock_dir" - ) - mocker.patch.object( - EagleBatch, "local_weather_dir", results_dir / "local_weather_dir" - ) + mocker.patch.object(EagleBatch, "local_buildstock_dir", results_dir / "local_buildstock_dir") + mocker.patch.object(EagleBatch, "local_weather_dir", results_dir / "local_weather_dir") mocker.patch.object(EagleBatch, "local_output_dir", results_dir) mocker.patch.object( EagleBatch, @@ -306,20 +273,14 @@ def sequential_parallel(**kwargs): def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=False): real_upgrade_idx = 0 if upgrade_idx is None else upgrade_idx + 1 sim_id = f"bldg{building_id:07d}up{real_upgrade_idx:02d}" - sim_dir = os.path.join( - base_dir, f"up{real_upgrade_idx:02d}", f"bldg{building_id:07d}" - ) + sim_dir = os.path.join(base_dir, f"up{real_upgrade_idx:02d}", f"bldg{building_id:07d}") return sim_id, sim_dir mocker.patch.object(EagleBatch, "make_sim_dir", make_sim_dir_mock) - sampler_prop_mock = mocker.patch.object( - EagleBatch, "sampler", new_callable=mocker.PropertyMock - ) + sampler_prop_mock = mocker.patch.object(EagleBatch, "sampler", new_callable=mocker.PropertyMock) sampler_mock = mocker.MagicMock() sampler_prop_mock.return_value = sampler_mock - sampler_mock.csv_path = ( - results_dir.parent / "housing_characteristic2" / "buildstock.csv" - ) + sampler_mock.csv_path = results_dir.parent / "housing_characteristic2" / "buildstock.csv" sampler_mock.run_sampling = mocker.MagicMock(return_value="buildstock.csv") b = EagleBatch(project_filename) @@ -328,19 +289,11 @@ def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=Fal b.run_job_batch(1) # check results job-json - refrence_path = ( - pathlib.Path(__file__).resolve().parent / "test_results" / "reference_files" - ) + refrence_path = pathlib.Path(__file__).resolve().parent / "test_results" / "reference_files" - refrence_list = json.loads( - gzip.open(refrence_path / "results_job1.json.gz", "r").read() - ) + refrence_list = json.loads(gzip.open(refrence_path / "results_job1.json.gz", "r").read()) - output_list = json.loads( - gzip.open( - results_dir / "simulation_output" / "results_job1.json.gz", "r" - ).read() - ) + output_list = json.loads(gzip.open(results_dir / "simulation_output" / "results_job1.json.gz", "r").read()) refrence_list = [json.dumps(d) for d in refrence_list] output_list = [json.dumps(d) for d in output_list] @@ -350,35 +303,16 @@ def make_sim_dir_mock(building_id, upgrade_idx, base_dir, overwrite_existing=Fal ts_files = list(refrence_path.glob("**/*.parquet")) def compare_ts_parquets(source, dst): - test_pq = ( - pd.read_parquet(source) - .reset_index() - .drop(columns=["index"]) - .rename(columns=str.lower) - ) - reference_pq = ( - pd.read_parquet(dst) - .reset_index() - .drop(columns=["index"]) - .rename(columns=str.lower) - ) + test_pq = pd.read_parquet(source).reset_index().drop(columns=["index"]).rename(columns=str.lower) + reference_pq = pd.read_parquet(dst).reset_index().drop(columns=["index"]).rename(columns=str.lower) pd.testing.assert_frame_equal(test_pq, reference_pq) for file in ts_files: - results_file = ( - results_dir - / "results" - / "simulation_output" - / "timeseries" - / file.parent.name - / file.name - ) + results_file = results_dir / "results" / "simulation_output" / "timeseries" / file.parent.name / file.name compare_ts_parquets(file, results_file) # Check that buildstock.csv was trimmed properly - local_buildstock_df = read_csv( - results_dir / "local_housing_characteristics_dir" / "buildstock.csv", dtype=str - ) + local_buildstock_df = read_csv(results_dir / "local_housing_characteristics_dir" / "buildstock.csv", dtype=str) unique_buildings = {str(x[0]) for x in job_json["batch"]} assert len(unique_buildings) == len(local_buildstock_df) assert unique_buildings == set(local_buildstock_df["Building"]) @@ -392,15 +326,11 @@ def test_run_building_error_caught(mocker, basic_residential_project_file): with open(results_dir / "job001.json", "w") as f: json.dump(job_json, f) - sample_buildstock_csv = pd.DataFrame.from_records( - [{"Building": i, "Dummy Column": i * i} for i in range(10)] - ) + sample_buildstock_csv = pd.DataFrame.from_records([{"Building": i, "Dummy Column": i * i} for i in range(10)]) os.makedirs(results_dir / "housing_characteristics", exist_ok=True) os.makedirs(results_dir / "local_housing_characteristics", exist_ok=True) os.makedirs(results_dir / "weather", exist_ok=True) - sample_buildstock_csv.to_csv( - results_dir / "housing_characteristics" / "buildstock.csv", index=False - ) + sample_buildstock_csv.to_csv(results_dir / "housing_characteristics" / "buildstock.csv", index=False) def raise_error(*args, **kwargs): raise RuntimeError("A problem happened") @@ -417,12 +347,8 @@ def sequential_parallel(**kwargs): mocker.patch.object(EagleBatch, "run_building", raise_error) mocker.patch.object(EagleBatch, "local_output_dir", results_dir) mocker.patch.object(EagleBatch, "results_dir", results_dir) - mocker.patch.object( - EagleBatch, "local_buildstock_dir", results_dir / "local_buildstock_dir" - ) - mocker.patch.object( - EagleBatch, "local_weather_dir", results_dir / "local_weather_dir" - ) + mocker.patch.object(EagleBatch, "local_buildstock_dir", results_dir / "local_buildstock_dir") + mocker.patch.object(EagleBatch, "local_weather_dir", results_dir / "local_weather_dir") mocker.patch.object( EagleBatch, "local_housing_characteristics_dir", @@ -447,9 +373,7 @@ def test_rerun_failed_jobs(mocker, basic_residential_project_file): mocker.patch.object(EagleBatch, "results_dir", results_dir) process_results_mocker = mocker.patch.object(BuildStockBatchBase, "process_results") queue_jobs_mocker = mocker.patch.object(EagleBatch, "queue_jobs", return_value=[42]) - queue_post_processing_mocker = mocker.patch.object( - EagleBatch, "queue_post_processing" - ) + queue_post_processing_mocker = mocker.patch.object(EagleBatch, "queue_post_processing") b = EagleBatch(project_filename) diff --git a/buildstockbatch/test/test_local.py b/buildstockbatch/test/test_local.py index 14c0a682..8078944f 100644 --- a/buildstockbatch/test/test_local.py +++ b/buildstockbatch/test/test_local.py @@ -44,11 +44,7 @@ def test_resstock_local_batch(project_filename): n_datapoints = 2 batch.cfg["sampler"]["args"]["n_datapoints"] = n_datapoints - local_weather_file = ( - resstock_directory.parent - / "weather" - / batch.cfg["weather_files_url"].split("/")[-1] - ) + local_weather_file = resstock_directory.parent / "weather" / batch.cfg["weather_files_url"].split("/")[-1] if local_weather_file.exists(): del batch.cfg["weather_files_url"] batch.cfg["weather_files_path"] = str(local_weather_file) @@ -63,12 +59,7 @@ def test_resstock_local_batch(project_filename): for upgrade_id in range(0, n_upgrades + 1): for bldg_id in range(1, n_datapoints + 1): - assert ( - simout_path - / "timeseries" - / f"up{upgrade_id:02d}" - / f"bldg{bldg_id:07d}.parquet" - ).exists() + assert (simout_path / "timeseries" / f"up{upgrade_id:02d}" / f"bldg{bldg_id:07d}.parquet").exists() batch.process_results() @@ -83,17 +74,9 @@ def test_resstock_local_batch(project_filename): ts_pq_path = out_path / "parquet" / "timeseries" for upgrade_id in range(0, n_upgrades + 1): assert (ts_pq_path / f"upgrade={upgrade_id}" / "group0.parquet").exists() - assert ( - out_path / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz" - ).exists() + assert (out_path / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz").exists() if upgrade_id >= 1: - upg_pq = ( - out_path - / "parquet" - / "upgrades" - / f"upgrade={upgrade_id}" - / f"results_up{upgrade_id:02d}.parquet" - ) + upg_pq = out_path / "parquet" / "upgrades" / f"upgrade={upgrade_id}" / f"results_up{upgrade_id:02d}.parquet" assert upg_pq.exists() upg = pd.read_parquet(upg_pq, columns=["completed_status"]) assert (upg["completed_status"] == "Success").all() @@ -113,9 +96,7 @@ def mocked_subprocess_run(run_cmd, **kwargs): mocker.patch("buildstockbatch.local.subprocess.run", mocked_subprocess_run) sleep_mock = mocker.patch("buildstockbatch.local.time.sleep") - cfg = get_project_configuration( - resstock_directory / "project_national" / "national_baseline.yml" - ) + cfg = get_project_configuration(resstock_directory / "project_national" / "national_baseline.yml") cfg["max_minutes_per_sim"] = 5 with tempfile.TemporaryDirectory() as tmpdir: @@ -144,9 +125,7 @@ def mocked_subprocess_run(run_cmd, **kwargs): assert out_osw["completed_status"] == "Fail" assert msg_re.search(out_osw["timeout"]) - err_log_re = re.compile( - r"\[\d\d:\d\d:\d\d ERROR\] Terminated \w+ after reaching max time" - ) + err_log_re = re.compile(r"\[\d\d:\d\d:\d\d ERROR\] Terminated \w+ after reaching max time") with open(sim_path / "run" / "run.log", "r") as run_log: err_log_re.search(run_log.read()) with open(sim_path / "run" / "failed.job", "r") as failed_job: diff --git a/buildstockbatch/test/test_postprocessing.py b/buildstockbatch/test/test_postprocessing.py index d11fdb86..667faa7f 100644 --- a/buildstockbatch/test/test_postprocessing.py +++ b/buildstockbatch/test/test_postprocessing.py @@ -19,9 +19,7 @@ def test_report_additional_results_csv_columns(basic_residential_project_file): reporting_measures = ["ReportingMeasure1", "ReportingMeasure2"] - project_filename, results_dir = basic_residential_project_file( - {"reporting_measures": reporting_measures} - ) + project_filename, results_dir = basic_residential_project_file({"reporting_measures": reporting_measures}) fs = LocalFileSystem() @@ -42,11 +40,7 @@ def test_report_additional_results_csv_columns(basic_residential_project_file): sim_dir = str(filename.parent.parent) upgrade_id = int(re.search(r"up(\d+)", sim_dir).group(1)) building_id = int(re.search(r"bldg(\d+)", sim_dir).group(1)) - dpouts2.append( - postprocessing.read_simulation_outputs( - fs, reporting_measures, sim_dir, upgrade_id, building_id - ) - ) + dpouts2.append(postprocessing.read_simulation_outputs(fs, reporting_measures, sim_dir, upgrade_id, building_id)) with gzip.open(sim_out_dir / "results_job0.json.gz", "wt", encoding="utf-8") as f: json.dump(dpouts2, f) @@ -56,9 +50,7 @@ def test_report_additional_results_csv_columns(basic_residential_project_file): postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False) for upgrade_id in (0, 1): - df = read_csv( - str(results_dir / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz") - ) + df = read_csv(str(results_dir / "results_csvs" / f"results_up{upgrade_id:02d}.csv.gz")) assert (df["reporting_measure1.column_1"] == 1).all() assert (df["reporting_measure1.column_2"] == 2).all() assert (df["reporting_measure2.column_3"] == 3).all() @@ -74,9 +66,7 @@ def test_empty_results_assertion(basic_residential_project_file, capsys): shutil.rmtree(sim_out_dir) # no results cfg = get_project_configuration(project_filename) - with pytest.raises( - ValueError, match=r"No simulation results found to post-process" - ): + with pytest.raises(ValueError, match=r"No simulation results found to post-process"): assert postprocessing.combine_results(fs, results_dir, cfg, do_timeseries=False) @@ -96,9 +86,7 @@ def test_large_parquet_combine(basic_residential_project_file): @pytest.mark.parametrize("keep_individual_timeseries", [True, False]) -def test_keep_individual_timeseries( - keep_individual_timeseries, basic_residential_project_file, mocker -): +def test_keep_individual_timeseries(keep_individual_timeseries, basic_residential_project_file, mocker): project_filename, results_dir = basic_residential_project_file( {"postprocessing": {"keep_individual_timeseries": keep_individual_timeseries}} ) @@ -122,9 +110,7 @@ def test_upgrade_missing_ts(basic_residential_project_file, mocker, caplog): project_filename, results_dir = basic_residential_project_file() results_path = pathlib.Path(results_dir) - for filename in (results_path / "simulation_output" / "timeseries" / "up01").glob( - "*.parquet" - ): + for filename in (results_path / "simulation_output" / "timeseries" / "up01").glob("*.parquet"): os.remove(filename) mocker.patch.object(BuildStockBatchBase, "weather_dir", None) diff --git a/buildstockbatch/test/test_validation.py b/buildstockbatch/test/test_validation.py index dfcaa693..4dc2696b 100644 --- a/buildstockbatch/test/test_validation.py +++ b/buildstockbatch/test/test_validation.py @@ -33,9 +33,7 @@ here = os.path.dirname(os.path.abspath(__file__)) example_yml_dir = os.path.join(here, "test_inputs") -resources_dir = os.path.join( - here, "test_inputs", "test_openstudio_buildstock", "resources" -) +resources_dir = os.path.join(here, "test_inputs", "test_openstudio_buildstock", "resources") def filter_logs(logs, level): @@ -63,15 +61,11 @@ def test_local_docker_validation_is_classmethod(): def test_complete_schema_passes_validation(): - assert BuildStockBatchBase.validate_project_schema( - os.path.join(example_yml_dir, "complete-schema.yml") - ) + assert BuildStockBatchBase.validate_project_schema(os.path.join(example_yml_dir, "complete-schema.yml")) def test_minimal_schema_passes_validation(): - assert BuildStockBatchBase.validate_project_schema( - os.path.join(example_yml_dir, "minimal-schema.yml") - ) + assert BuildStockBatchBase.validate_project_schema(os.path.join(example_yml_dir, "minimal-schema.yml")) @pytest.mark.parametrize( @@ -129,13 +123,9 @@ def test_xor_violations_fail(project_file, expected): ) def test_validation_integration(project_file, base_expected, eagle_expected): # patch the validate_options_lookup function to always return true for this case - with patch.object( - BuildStockBatchBase, "validate_options_lookup", lambda _: True - ), patch.object( + with patch.object(BuildStockBatchBase, "validate_options_lookup", lambda _: True), patch.object( BuildStockBatchBase, "validate_measure_references", lambda _: True - ), patch.object( - BuildStockBatchBase, "validate_workflow_generator", lambda _: True - ), patch.object( + ), patch.object(BuildStockBatchBase, "validate_workflow_generator", lambda _: True), patch.object( BuildStockBatchBase, "validate_postprocessing_spec", lambda _: True ), patch.object( EagleBatch, "validate_singularity_image_eagle", lambda _: True @@ -186,14 +176,10 @@ def test_bad_measures(project_file): except (ValidationError, YamaleError) as er: er = str(er) assert "'1.5' is not a int" in er - assert ( - "'huorly' not in ('none', 'timestep', 'hourly', 'daily', 'monthly')" - in er - ) + assert "'huorly' not in ('none', 'timestep', 'hourly', 'daily', 'monthly')" in er else: raise Exception( - "measures_and_arguments was supposed to raise ValidationError for" - " enforce-validate-measures-bad.yml" + "measures_and_arguments was supposed to raise ValidationError for" " enforce-validate-measures-bad.yml" ) @@ -201,9 +187,7 @@ def test_bad_measures(project_file): "project_file", [ os.path.join(example_yml_dir, "enforce-validate-measures-good-2.yml"), - os.path.join( - example_yml_dir, "enforce-validate-measures-good-2-with-anchors.yml" - ), + os.path.join(example_yml_dir, "enforce-validate-measures-good-2-with-anchors.yml"), ], ) def test_good_measures(project_file): @@ -268,9 +252,7 @@ def test_bad_options_validation(project_file): assert "Floor Insulation: '*' cannot be mixed with other options" in er else: - raise Exception( - "validate_options was supposed to raise ValueError for enforce-validate-options-bad.yml" - ) + raise Exception("validate_options was supposed to raise ValueError for enforce-validate-options-bad.yml") @pytest.mark.parametrize( @@ -301,8 +283,7 @@ def test_bad_measures_validation(project_file): else: raise Exception( - "validate_measure_references was supposed to raise ValueError for " - "enforce-validate-measures-bad.yml" + "validate_measure_references was supposed to raise ValueError for " "enforce-validate-measures-bad.yml" ) @@ -319,14 +300,10 @@ def test_bad_postprocessing_spec_validation(project_file): er = str(er) assert "bad_partition_column" in er else: - raise Exception( - "validate_options was supposed to raise ValidationError for enforce-validate-options-bad-2.yml" - ) + raise Exception("validate_options was supposed to raise ValidationError for enforce-validate-options-bad-2.yml") -@pytest.mark.parametrize( - "project_file", [os.path.join(example_yml_dir, "enforce-validate-options-good.yml")] -) +@pytest.mark.parametrize("project_file", [os.path.join(example_yml_dir, "enforce-validate-options-good.yml")]) def test_logic_validation_fail(project_file): try: BuildStockBatchBase.validate_logic(project_file) @@ -336,9 +313,7 @@ def test_logic_validation_fail(project_file): assert "'Vintage' occurs 2 times in a 'and' block" in er assert "'Vintage' occurs 2 times in a '&&' block" in er else: - raise Exception( - "validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml" - ) + raise Exception("validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml") @pytest.mark.parametrize( @@ -354,9 +329,7 @@ def test_number_of_options_apply_upgrade(): proj_filename = resstock_directory / "project_national" / "national_upgrades.yml" cfg = get_project_configuration(str(proj_filename)) cfg["upgrades"][-1]["options"] = cfg["upgrades"][-1]["options"] * 10 - cfg["upgrades"][0]["options"][0]["costs"] = ( - cfg["upgrades"][0]["options"][0]["costs"] * 5 - ) + cfg["upgrades"][0]["options"][0]["costs"] = cfg["upgrades"][0]["options"][0]["costs"] * 5 with tempfile.TemporaryDirectory() as tmpdir: tmppath = pathlib.Path(tmpdir) new_proj_filename = tmppath / "project.yml" @@ -414,11 +387,7 @@ def test_validate_sampler_good_buildstock(basic_residential_project_file): { "sampler": { "type": "precomputed", - "args": { - "sample_file": str( - os.path.join(resources_dir, "buildstock_good.csv") - ) - }, + "args": {"sample_file": str(os.path.join(resources_dir, "buildstock_good.csv"))}, } } ) @@ -430,11 +399,7 @@ def test_validate_sampler_bad_buildstock(basic_residential_project_file): { "sampler": { "type": "precomputed", - "args": { - "sample_file": str( - os.path.join(resources_dir, "buildstock_bad.csv") - ) - }, + "args": {"sample_file": str(os.path.join(resources_dir, "buildstock_bad.csv"))}, } } ) @@ -442,27 +407,10 @@ def test_validate_sampler_bad_buildstock(basic_residential_project_file): BuildStockBatchBase.validate_sampler(project_filename) except ValidationError as er: er = str(er) - assert ( - "Option 1940-1950 in column Vintage of buildstock_csv is not available in options_lookup.tsv" - in er - ) - assert ( - "Option TX in column State of buildstock_csv is not available in options_lookup.tsv" - in er - ) - assert ( - "Option nan in column Insulation Wall of buildstock_csv is not available in options_lookup.tsv" - in er - ) - assert ( - "Column Insulation in buildstock_csv is not available in options_lookup.tsv" - in er - ) - assert ( - "Column ZipPlusCode in buildstock_csv is not available in options_lookup.tsv" - in er - ) + assert "Option 1940-1950 in column Vintage of buildstock_csv is not available in options_lookup.tsv" in er + assert "Option TX in column State of buildstock_csv is not available in options_lookup.tsv" in er + assert "Option nan in column Insulation Wall of buildstock_csv is not available in options_lookup.tsv" in er + assert "Column Insulation in buildstock_csv is not available in options_lookup.tsv" in er + assert "Column ZipPlusCode in buildstock_csv is not available in options_lookup.tsv" in er else: - raise Exception( - "validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml" - ) + raise Exception("validate_options was supposed to raise ValidationError for enforce-validate-options-good.yml") diff --git a/buildstockbatch/utils.py b/buildstockbatch/utils.py index 848ceb79..c74f6521 100644 --- a/buildstockbatch/utils.py +++ b/buildstockbatch/utils.py @@ -43,16 +43,12 @@ def get_project_configuration(project_file): raise err # Set absolute paths - cfg["buildstock_directory"] = path_rel_to_file( - project_file, cfg["buildstock_directory"] - ) + cfg["buildstock_directory"] = path_rel_to_file(project_file, cfg["buildstock_directory"]) # if 'precomputed_sample' in cfg.get('baseline', {}): # cfg['baseline']['precomputed_sample'] = \ # path_rel_to_file(project_file, cfg['baseline']['precomputed_sample']) if "weather_files_path" in cfg: - cfg["weather_files_path"] = path_rel_to_file( - project_file, cfg["weather_files_path"] - ) + cfg["weather_files_path"] = path_rel_to_file(project_file, cfg["weather_files_path"]) return cfg @@ -66,35 +62,20 @@ def _str_repr(obj, list_max=20, dict_max=20, string_max=100): elif type(obj) in [int, float]: return _str_repr(str(obj), list_max, dict_max, string_max) elif type(obj) is list: - txt = "[" + ",".join( - [ - _str_repr(item, list_max, dict_max, string_max) - for item in obj[0:list_max] - ] - ) + txt = "[" + ",".join([_str_repr(item, list_max, dict_max, string_max) for item in obj[0:list_max]]) if len(obj) > list_max: txt += f" ...{len(obj)}" txt += "]" return txt elif type(obj) is tuple: - txt = "(" + ",".join( - [ - _str_repr(item, list_max, dict_max, string_max) - for item in obj[0:list_max] - ] - ) + txt = "(" + ",".join([_str_repr(item, list_max, dict_max, string_max) for item in obj[0:list_max]]) if len(obj) > list_max: txt += f" ...{len(obj)}" txt += ")" return txt elif type(obj) is set: obj = list(obj) - txt = "{" + ",".join( - [ - _str_repr(item, list_max, dict_max, string_max) - for item in obj[0:dict_max] - ] - ) + txt = "{" + ",".join([_str_repr(item, list_max, dict_max, string_max) for item in obj[0:dict_max]]) if len(obj) > dict_max: txt += f" ...{len(obj)}" txt += "}" diff --git a/buildstockbatch/workflow_generator/commercial.py b/buildstockbatch/workflow_generator/commercial.py index 2fff78a2..6495acfe 100644 --- a/buildstockbatch/workflow_generator/commercial.py +++ b/buildstockbatch/workflow_generator/commercial.py @@ -49,9 +49,7 @@ def validate(cls, cfg): workflow_generator_args = cfg["workflow_generator"]["args"] schema_yml = re.sub(r"^ {8}", "", schema_yml, flags=re.MULTILINE) schema = yamale.make_schema(content=schema_yml, parser="ruamel") - data = yamale.make_data( - content=json.dumps(workflow_generator_args), parser="ruamel" - ) + data = yamale.make_data(content=json.dumps(workflow_generator_args), parser="ruamel") return yamale.validate(schema, data, strict=True) def reporting_measures(self): @@ -116,17 +114,11 @@ def create_osw(self, sim_id, building_id, upgrade_idx): "arguments": {"run_measure": 1}, } if "upgrade_name" in measure_d: - apply_upgrade_measure["arguments"]["upgrade_name"] = measure_d[ - "upgrade_name" - ] + apply_upgrade_measure["arguments"]["upgrade_name"] = measure_d["upgrade_name"] for opt_num, option in enumerate(measure_d["options"], 1): - apply_upgrade_measure["arguments"][ - "option_{}".format(opt_num) - ] = option["option"] + apply_upgrade_measure["arguments"]["option_{}".format(opt_num)] = option["option"] if "lifetime" in option: - apply_upgrade_measure["arguments"][ - "option_{}_lifetime".format(opt_num) - ] = option["lifetime"] + apply_upgrade_measure["arguments"]["option_{}_lifetime".format(opt_num)] = option["lifetime"] if "apply_logic" in option: apply_upgrade_measure["arguments"][ "option_{}_apply_logic".format(opt_num) @@ -139,9 +131,9 @@ def create_osw(self, sim_id, building_id, upgrade_idx): "option_{}_cost_{}_{}".format(opt_num, cost_num, arg) ] = cost[arg] if "package_apply_logic" in measure_d: - apply_upgrade_measure["arguments"][ - "package_apply_logic" - ] = self.make_apply_logic_arg(measure_d["package_apply_logic"]) + apply_upgrade_measure["arguments"]["package_apply_logic"] = self.make_apply_logic_arg( + measure_d["package_apply_logic"] + ) build_existing_model_idx = list( map( diff --git a/buildstockbatch/workflow_generator/residential_hpxml.py b/buildstockbatch/workflow_generator/residential_hpxml.py index ee71b6a1..71cab179 100644 --- a/buildstockbatch/workflow_generator/residential_hpxml.py +++ b/buildstockbatch/workflow_generator/residential_hpxml.py @@ -145,18 +145,14 @@ def validate(cls, cfg): workflow_generator_args = cfg["workflow_generator"]["args"] schema_yml = re.sub(r"^ {8}", "", schema_yml, flags=re.MULTILINE) schema = yamale.make_schema(content=schema_yml, parser="ruamel") - data = yamale.make_data( - content=json.dumps(workflow_generator_args), parser="ruamel" - ) + data = yamale.make_data(content=json.dumps(workflow_generator_args), parser="ruamel") yamale.validate(schema, data, strict=True) return cls.validate_measures_and_arguments(cfg) def reporting_measures(self): """Return a list of reporting measures to include in outputs""" workflow_args = self.cfg["workflow_generator"].get("args", {}) - return [ - x["measure_dir_name"] for x in workflow_args.get("reporting_measures", []) - ] + return [x["measure_dir_name"] for x in workflow_args.get("reporting_measures", [])] @staticmethod def validate_measures_and_arguments(cfg): @@ -195,9 +191,7 @@ def get_cfg_path(cfg_path): workflow_args = cfg["workflow_generator"].get("args", {}) if "reporting_measures" in workflow_args.keys(): for reporting_measure in workflow_args["reporting_measures"]: - measure_names[ - reporting_measure["measure_dir_name"] - ] = "workflow_generator.args.reporting_measures" + measure_names[reporting_measure["measure_dir_name"]] = "workflow_generator.args.reporting_measures" error_msgs = "" warning_msgs = "" @@ -230,9 +224,7 @@ def get_cfg_path(cfg_path): error_msgs += "* The following multipliers values are invalid: \n" for multiplier, count in invalid_multipliers.items(): error_msgs += f" '{multiplier}' - Used {count} times \n" - error_msgs += ( - f" The list of valid multipliers are {valid_multipliers}.\n" - ) + error_msgs += f" The list of valid multipliers are {valid_multipliers}.\n" if warning_msgs: logger.warning(warning_msgs) @@ -274,8 +266,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx): bld_exist_model_args = { "building_id": building_id, - "sample_weight": self.cfg["baseline"]["n_buildings_represented"] - / self.n_datapoints, + "sample_weight": self.cfg["baseline"]["n_buildings_represented"] / self.n_datapoints, } bld_exist_model_args.update(sim_ctl_args) @@ -298,16 +289,12 @@ def create_osw(self, sim_id, building_id, upgrade_idx): ["emissions_wood_values", "wood_value"], ] for arg, item in emissions_map: - bld_exist_model_args[arg] = ",".join( - [str(s.get(item, "")) for s in emissions] - ) + bld_exist_model_args[arg] = ",".join([str(s.get(item, "")) for s in emissions]) buildstock_dir = self.cfg["buildstock_directory"] measures_dir = os.path.join(buildstock_dir, "measures") measure_path = os.path.join(measures_dir, "BuildExistingModel") - bld_exist_model_args_avail = get_measure_arguments( - os.path.join(measure_path, "measure.xml") - ) + bld_exist_model_args_avail = get_measure_arguments(os.path.join(measure_path, "measure.xml")) if "utility_bills" in workflow_args: utility_bills = workflow_args["utility_bills"] @@ -346,9 +333,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx): ] for arg, item in utility_bills_map: if arg in bld_exist_model_args_avail: - bld_exist_model_args[arg] = ",".join( - [str(s.get(item, "")) for s in utility_bills] - ) + bld_exist_model_args[arg] = ",".join([str(s.get(item, "")) for s in utility_bills]) sim_out_rep_args = { "timeseries_frequency": "none", @@ -371,9 +356,7 @@ def create_osw(self, sim_id, building_id, upgrade_idx): measures_dir = os.path.join(buildstock_dir, "resources/hpxml-measures") measure_path = os.path.join(measures_dir, "ReportSimulationOutput") - sim_out_rep_args_avail = get_measure_arguments( - os.path.join(measure_path, "measure.xml") - ) + sim_out_rep_args_avail = get_measure_arguments(os.path.join(measure_path, "measure.xml")) if "include_annual_total_consumptions" in sim_out_rep_args_avail: sim_out_rep_args["include_annual_total_consumptions"] = True @@ -436,18 +419,14 @@ def create_osw(self, sim_id, building_id, upgrade_idx): if "output_variables" in sim_out_rep_args: output_variables = sim_out_rep_args["output_variables"] - sim_out_rep_args["user_output_variables"] = ",".join( - [str(s.get("name")) for s in output_variables] - ) + sim_out_rep_args["user_output_variables"] = ",".join([str(s.get("name")) for s in output_variables]) sim_out_rep_args.pop("output_variables") util_bills_rep_args = {} measures_dir = os.path.join(buildstock_dir, "resources/hpxml-measures") measure_path = os.path.join(measures_dir, "ReportUtilityBills") - util_bills_rep_args_avail = get_measure_arguments( - os.path.join(measure_path, "measure.xml") - ) + util_bills_rep_args_avail = get_measure_arguments(os.path.join(measure_path, "measure.xml")) if "include_annual_bills" in util_bills_rep_args_avail: util_bills_rep_args["include_annual_bills"] = True @@ -538,17 +517,11 @@ def create_osw(self, sim_id, building_id, upgrade_idx): "arguments": {"run_measure": 1}, } if "upgrade_name" in measure_d: - apply_upgrade_measure["arguments"]["upgrade_name"] = measure_d[ - "upgrade_name" - ] + apply_upgrade_measure["arguments"]["upgrade_name"] = measure_d["upgrade_name"] for opt_num, option in enumerate(measure_d["options"], 1): - apply_upgrade_measure["arguments"][ - "option_{}".format(opt_num) - ] = option["option"] + apply_upgrade_measure["arguments"]["option_{}".format(opt_num)] = option["option"] if "lifetime" in option: - apply_upgrade_measure["arguments"][ - "option_{}_lifetime".format(opt_num) - ] = option["lifetime"] + apply_upgrade_measure["arguments"]["option_{}_lifetime".format(opt_num)] = option["lifetime"] if "apply_logic" in option: apply_upgrade_measure["arguments"][ "option_{}_apply_logic".format(opt_num) @@ -561,13 +534,11 @@ def create_osw(self, sim_id, building_id, upgrade_idx): "option_{}_cost_{}_{}".format(opt_num, cost_num, arg) ] = cost[arg] if "package_apply_logic" in measure_d: - apply_upgrade_measure["arguments"][ - "package_apply_logic" - ] = self.make_apply_logic_arg(measure_d["package_apply_logic"]) + apply_upgrade_measure["arguments"]["package_apply_logic"] = self.make_apply_logic_arg( + measure_d["package_apply_logic"] + ) - build_existing_model_idx = [ - x["measure_dir_name"] == "BuildExistingModel" for x in osw["steps"] - ].index(True) + build_existing_model_idx = [x["measure_dir_name"] == "BuildExistingModel" for x in osw["steps"]].index(True) osw["steps"].insert(build_existing_model_idx + 1, apply_upgrade_measure) if "reporting_measures" in workflow_args: @@ -575,8 +546,6 @@ def create_osw(self, sim_id, building_id, upgrade_idx): if "arguments" not in reporting_measure: reporting_measure["arguments"] = {} reporting_measure["measure_type"] = "ReportingMeasure" - osw["steps"].insert( - -1, reporting_measure - ) # right before ServerDirectoryCleanup + osw["steps"].insert(-1, reporting_measure) # right before ServerDirectoryCleanup return osw diff --git a/buildstockbatch/workflow_generator/test_workflow_generator.py b/buildstockbatch/workflow_generator/test_workflow_generator.py index 9a49eaea..bd61c46a 100644 --- a/buildstockbatch/workflow_generator/test_workflow_generator.py +++ b/buildstockbatch/workflow_generator/test_workflow_generator.py @@ -12,14 +12,10 @@ def test_apply_logic_recursion(): apply_logic = WorkflowGeneratorBase.make_apply_logic_arg(["one", "two", "three"]) assert apply_logic == "(one&&two&&three)" - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg( - {"and": ["one", "two", "three"]} - ) + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({"and": ["one", "two", "three"]}) assert apply_logic == "(one&&two&&three)" - apply_logic = WorkflowGeneratorBase.make_apply_logic_arg( - {"or": ["four", "five", "six"]} - ) + apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({"or": ["four", "five", "six"]}) assert apply_logic == "(four||five||six)" apply_logic = WorkflowGeneratorBase.make_apply_logic_arg({"not": "seven"}) @@ -76,36 +72,11 @@ def test_residential_hpxml(mocker): build_existing_model_step = steps[0] assert build_existing_model_step["measure_dir_name"] == "BuildExistingModel" - assert ( - build_existing_model_step["arguments"][ - "simulation_control_run_period_begin_month" - ] - == 2 - ) - assert ( - build_existing_model_step["arguments"][ - "simulation_control_run_period_begin_day_of_month" - ] - == 1 - ) - assert ( - build_existing_model_step["arguments"][ - "simulation_control_run_period_end_month" - ] - == 2 - ) - assert ( - build_existing_model_step["arguments"][ - "simulation_control_run_period_end_day_of_month" - ] - == 28 - ) - assert ( - build_existing_model_step["arguments"][ - "simulation_control_run_period_calendar_year" - ] - == 2010 - ) + assert build_existing_model_step["arguments"]["simulation_control_run_period_begin_month"] == 2 + assert build_existing_model_step["arguments"]["simulation_control_run_period_begin_day_of_month"] == 1 + assert build_existing_model_step["arguments"]["simulation_control_run_period_end_month"] == 2 + assert build_existing_model_step["arguments"]["simulation_control_run_period_end_day_of_month"] == 28 + assert build_existing_model_step["arguments"]["simulation_control_run_period_calendar_year"] == 2010 apply_upgrade_step = steps[1] assert apply_upgrade_step["measure_dir_name"] == "ApplyUpgrade" @@ -116,25 +87,13 @@ def test_residential_hpxml(mocker): simulation_output_step = steps[3] assert simulation_output_step["measure_dir_name"] == "ReportSimulationOutput" assert simulation_output_step["arguments"]["timeseries_frequency"] == "hourly" - assert ( - simulation_output_step["arguments"]["include_annual_total_consumptions"] is True - ) - assert ( - simulation_output_step["arguments"]["include_annual_fuel_consumptions"] is True - ) - assert ( - simulation_output_step["arguments"]["include_annual_end_use_consumptions"] - is True - ) - assert ( - simulation_output_step["arguments"]["include_annual_system_use_consumptions"] - is False - ) + assert simulation_output_step["arguments"]["include_annual_total_consumptions"] is True + assert simulation_output_step["arguments"]["include_annual_fuel_consumptions"] is True + assert simulation_output_step["arguments"]["include_annual_end_use_consumptions"] is True + assert simulation_output_step["arguments"]["include_annual_system_use_consumptions"] is False assert simulation_output_step["arguments"]["include_annual_emissions"] is True assert simulation_output_step["arguments"]["include_annual_emission_fuels"] is True - assert ( - simulation_output_step["arguments"]["include_annual_emission_end_uses"] is True - ) + assert simulation_output_step["arguments"]["include_annual_emission_end_uses"] is True assert simulation_output_step["arguments"]["include_annual_total_loads"] is True assert simulation_output_step["arguments"]["include_annual_unmet_hours"] is True assert simulation_output_step["arguments"]["include_annual_peak_fuels"] is True @@ -143,55 +102,22 @@ def test_residential_hpxml(mocker): assert simulation_output_step["arguments"]["include_annual_hot_water_uses"] is True assert simulation_output_step["arguments"]["include_annual_hvac_summary"] is True assert simulation_output_step["arguments"]["include_annual_resilience"] is True - assert ( - simulation_output_step["arguments"]["include_timeseries_total_consumptions"] - is True - ) - assert ( - simulation_output_step["arguments"]["include_timeseries_fuel_consumptions"] - is False - ) - assert ( - simulation_output_step["arguments"]["include_timeseries_end_use_consumptions"] - is True - ) - assert ( - simulation_output_step["arguments"][ - "include_timeseries_system_use_consumptions" - ] - is False - ) + assert simulation_output_step["arguments"]["include_timeseries_total_consumptions"] is True + assert simulation_output_step["arguments"]["include_timeseries_fuel_consumptions"] is False + assert simulation_output_step["arguments"]["include_timeseries_end_use_consumptions"] is True + assert simulation_output_step["arguments"]["include_timeseries_system_use_consumptions"] is False assert simulation_output_step["arguments"]["include_timeseries_emissions"] is False - assert ( - simulation_output_step["arguments"]["include_timeseries_emission_fuels"] - is False - ) - assert ( - simulation_output_step["arguments"]["include_timeseries_emission_end_uses"] - is False - ) - assert ( - simulation_output_step["arguments"]["include_timeseries_hot_water_uses"] - is False - ) + assert simulation_output_step["arguments"]["include_timeseries_emission_fuels"] is False + assert simulation_output_step["arguments"]["include_timeseries_emission_end_uses"] is False + assert simulation_output_step["arguments"]["include_timeseries_hot_water_uses"] is False assert simulation_output_step["arguments"]["include_timeseries_total_loads"] is True - assert ( - simulation_output_step["arguments"]["include_timeseries_component_loads"] - is False - ) - assert ( - simulation_output_step["arguments"]["include_timeseries_unmet_hours"] is False - ) - assert ( - simulation_output_step["arguments"]["include_timeseries_zone_temperatures"] - is False - ) + assert simulation_output_step["arguments"]["include_timeseries_component_loads"] is False + assert simulation_output_step["arguments"]["include_timeseries_unmet_hours"] is False + assert simulation_output_step["arguments"]["include_timeseries_zone_temperatures"] is False assert simulation_output_step["arguments"]["include_timeseries_airflows"] is False assert simulation_output_step["arguments"]["include_timeseries_weather"] is False assert simulation_output_step["arguments"]["include_timeseries_resilience"] is False - assert ( - simulation_output_step["arguments"]["timeseries_timestamp_convention"] == "end" - ) + assert simulation_output_step["arguments"]["timeseries_timestamp_convention"] == "end" assert simulation_output_step["arguments"]["timeseries_num_decimal_places"] == 3 assert simulation_output_step["arguments"]["add_timeseries_dst_column"] is True assert simulation_output_step["arguments"]["add_timeseries_utc_column"] is True @@ -333,9 +259,7 @@ def test_com_default_workflow_generator_extended(mocker): assert reporting_measure_step["measure_type"] == "ReportingMeasure" assert reporting_measure_step["arguments"] == {} # Should only be one instance of SimulationOutputReport - assert [ - d["measure_dir_name"] == "SimulationOutputReport" for d in osw["steps"] - ].count(True) == 1 + assert [d["measure_dir_name"] == "SimulationOutputReport" for d in osw["steps"]].count(True) == 1 # Should get TimeseriesCSVExport if included in args reporting_measure_step = osw["steps"][1] assert reporting_measure_step["measure_dir_name"] == "TimeseriesCSVExport" @@ -344,10 +268,7 @@ def test_com_default_workflow_generator_extended(mocker): assert reporting_measure_step["arguments"]["inc_output_variables"] == "true" # Should have the openstudio report reporting_measure_step = osw["steps"][2] - assert ( - reporting_measure_step["measure_dir_name"] - == "f8e23017-894d-4bdf-977f-37e3961e6f42" - ) + assert reporting_measure_step["measure_dir_name"] == "f8e23017-894d-4bdf-977f-37e3961e6f42" assert reporting_measure_step["measure_type"] == "ReportingMeasure" assert reporting_measure_step["arguments"]["building_summary_section"] == "true" assert reporting_measure_step["arguments"]["schedules_overview_section"] == "true" diff --git a/docs/conf.py b/docs/conf.py index 45c44c52..94ca7931 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,9 +20,7 @@ here = os.path.abspath(os.path.dirname(__file__)) metadata = {} -with open( - os.path.join(here, "..", "buildstockbatch", "__version__.py"), "r", encoding="utf-8" -) as f: +with open(os.path.join(here, "..", "buildstockbatch", "__version__.py"), "r", encoding="utf-8") as f: exec(f.read(), metadata) # -- Project information ----------------------------------------------------- @@ -75,9 +73,7 @@ # how to render changelog links changelog_render_ticket = "http://www.github.com/nrel/buildstockbatch/issues/%s" -changelog_render_pullreq = { - "default": "https://www.github.com/nrel/buildstockbatch/pull/%s" -} +changelog_render_pullreq = {"default": "https://www.github.com/nrel/buildstockbatch/pull/%s"} # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -180,9 +176,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, "buildstockbatch", "BuildStock Batch Documentation", [author], 1) -] +man_pages = [(master_doc, "buildstockbatch", "BuildStock Batch Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- diff --git a/setup.py b/setup.py index 9d49ca49..ff621424 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,7 @@ here = os.path.abspath(os.path.dirname(__file__)) metadata = {} -with open( - os.path.join(here, "buildstockbatch", "__version__.py"), "r", encoding="utf-8" -) as f: +with open(os.path.join(here, "buildstockbatch", "__version__.py"), "r", encoding="utf-8") as f: exec(f.read(), metadata) with open("README.md", "r", "utf-8") as f: @@ -58,6 +56,7 @@ "sphinx_paramlinks", "changelog", "flake8", + "black", "rope", "doc8", "pre-commit",